WvStreams
wvstrutils.h
Go to the documentation of this file.
1 /* -*- Mode: C++ -*-
2  * Worldvisions Weaver Software:
3  * Copyright (C) 1997-2002 Net Integration Technologies, Inc.
4  *
5  * Various little string functions...
6  *
7  * FIXME: and some other assorted crap that belongs anywhere but here.
8  */
9 #ifndef __WVSTRUTILS_H
10 #define __WVSTRUTILS_H
11 
12 #include <sys/types.h> // for off_t
13 #include <sys/stat.h>
14 #include <unistd.h>
15 #include <time.h>
16 #include <ctype.h>
17 #include "wvstring.h"
18 #include "wvstringlist.h"
19 #include "wvhex.h"
20 #ifndef _WIN32
21 #include "wvregex.h"
22 #endif
23 
36 char *terminate_string(char *string, char c);
37 
46 char *trim_string(char *string);
47 
52 char *trim_string(char *string, char c);
53 
67 WvString spacecat(WvStringParm a, WvStringParm b, char sep = ' ',
68  bool onesep = false);
69 
70 
75 char *non_breaking(const char *string);
76 
81 void replace_char(void *string, char c1, char c2, int length);
82 
86 char *snip_string(char *haystack, char *needle);
87 
88 #ifndef _WIN32
89 
93 char *strlwr(char *string);
94 
99 char *strupr(char *string);
100 
101 #endif
102 
104 bool is_word(const char *string);
105 
114 WvString hexdump_buffer(const void *buf, size_t len, bool charRep = true);
115 
120 bool isnewline(char c);
121 
129 WvString url_decode(WvStringParm str, bool no_space = false);
130 
131 
140 WvString url_encode(WvStringParm str, WvStringParm unsafe = "");
141 
142 
146 WvString diff_dates(time_t t1, time_t t2);
147 
148 
153 WvString rfc822_date(time_t _when = -1);
154 
156 WvString rfc1123_date(time_t _when);
157 
159 WvString local_date(time_t _when = -1);
160 
162 WvString intl_time(time_t _when = -1);
163 
165 WvString intl_date(time_t _when = -1);
166 
168 WvString intl_datetime(time_t _when = -1);
169 
170 time_t intl_gmtoff(time_t t);
171 
172 #ifndef _WIN32
173 
178 WvString passwd_crypt(const char *str);
179 
180 #endif
181 
186 WvString passwd_md5(const char *str);
187 
192 WvString backslash_escape(WvStringParm s1);
193 
195 int strcount(WvStringParm s, const char c);
196 
202 
209 WvString nice_hostname(WvStringParm name);
210 
216 WvString getfilename(WvStringParm fullname);
217 WvString getdirname(WvStringParm fullname);
218 
219 /*
220  * Possible rounding methods for numbers -- remember from school?
221  */
222 enum RoundingMethod
223 {
224  ROUND_DOWN,
225  ROUND_DOWN_AT_POINT_FIVE,
226  ROUND_UP_AT_POINT_FIVE,
227  ROUND_UP
228 };
229 
235 WvString sizetoa(unsigned long long blocks, unsigned long blocksize = 1,
236  RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
237 
242 WvString sizektoa(unsigned long long kbytes,
243  RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
244 
250 WvString sizeitoa(unsigned long long blocks, unsigned long blocksize = 1,
251  RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
252 
257 WvString sizekitoa(unsigned long long kbytes,
258  RoundingMethod rounding_method = ROUND_UP_AT_POINT_FIVE);
259 
263 WvString secondstoa(unsigned int total_seconds);
264 
269 int lookup(const char *str, const char * const *table,
270  bool case_sensitive = false);
271 
279 template<class StringCollection>
280 void strcoll_split(StringCollection &coll, WvStringParm _s,
281  const char *splitchars = " \t", int limit = 0)
282 {
283  WvString s(_s);
284  char *sptr = s.edit(), *eptr, oldc;
285 
286  // Simple if statement to catch (and add) empty (but not NULL) strings.
287  if (sptr && !*sptr )
288  {
289  WvString *emptyString = new WvString("");
290  coll.add(emptyString, true);
291  }
292 
293  // Needed to catch delimeters at the beginning of the string.
294  bool firstrun = true;
295 
296  while (sptr && *sptr)
297  {
298  --limit;
299 
300  if (firstrun)
301  {
302  firstrun = false;
303  }
304  else
305  {
306  sptr += strspn(sptr, splitchars);
307  }
308 
309  if (limit)
310  {
311  eptr = sptr + strcspn(sptr, splitchars);
312  }
313  else
314  {
315  eptr = sptr + strlen(sptr);
316  }
317 
318  oldc = *eptr;
319  *eptr = 0;
320 
321  WvString *newstr = new WvString(sptr);
322  coll.add(newstr, true);
323 
324  *eptr = oldc;
325  sptr = eptr;
326  }
327 }
328 
329 
343 template<class StringCollection>
344 void strcoll_splitstrict(StringCollection &coll, WvStringParm _s,
345  const char *splitchars = " \t", int limit = 0)
346 {
347  WvString s(_s);
348  char *cur = s.edit();
349 
350  if (!cur) return;
351 
352  for (;;)
353  {
354  --limit;
355  if (!limit)
356  {
357  coll.add(new WvString(cur), true);
358  break;
359  }
360 
361  int len = strcspn(cur, splitchars);
362 
363  char tmp = cur[len];
364  cur[len] = 0;
365  coll.add(new WvString(cur), true);
366  cur[len] = tmp;
367 
368  if (!cur[len]) break;
369  cur += len + 1;
370  }
371 }
372 
373 
374 #ifndef _WIN32 // don't have regex on win32
375 
382 template<class StringCollection>
383 void strcoll_split(StringCollection &coll, WvStringParm s,
384  const WvRegex &regex, int limit = 0)
385 {
386  int start = 0;
387  int match_start, match_end;
388  int count = 0;
389 
390  while ((limit == 0 || count < limit)
391  && regex.continuable_match(&s[start], match_start, match_end)
392  && match_end > 0)
393  {
394  WvString *substr = new WvString;
395  int len = match_start;
396  substr->setsize(len+1);
397  memcpy(substr->edit(), &s[start], len);
398  substr->edit()[len] = '\0';
399  coll.add(substr, true);
400  start += match_end;
401  ++count;
402  }
403 
404  if (limit == 0 || count < limit)
405  {
406  WvString *last = new WvString(&s[start]);
407  last->unique();
408  coll.add(last, true);
409  }
410 }
411 #endif
412 
413 
419 template<class StringCollection>
420 WvString strcoll_join(const StringCollection &coll,
421  const char *joinchars = " \t")
422 {
423  size_t joinlen = strlen(joinchars);
424  size_t totlen = 1;
425  typename StringCollection::Iter s(
426  const_cast<StringCollection&>(coll));
427  for (s.rewind(); s.next(); )
428  {
429  if (s->cstr())
430  totlen += strlen(s->cstr());
431  totlen += joinlen;
432  }
433  totlen -= joinlen; // no join chars at tail
434 
435  WvString total;
436  total.setsize(totlen);
437 
438  char *te = total.edit();
439  te[0] = 0;
440  bool first = true;
441  for (s.rewind(); s.next(); )
442  {
443  if (first)
444  first = false;
445  else
446  strcat(te, joinchars);
447  if (s->cstr())
448  strcat(te, s->cstr());
449  }
450  return total;
451 }
452 
457 WvString strreplace(WvStringParm s, WvStringParm a, WvStringParm b);
458 
460 WvString undupe(WvStringParm s, char c);
461 
464 
467 
470 
475 WvString metriculate(const off_t i);
476 
481 WvString afterstr(WvStringParm line, WvStringParm a);
482 
487 WvString beforestr(WvStringParm line, WvStringParm a);
488 
495 WvString substr(WvString line, unsigned int pos, unsigned int len);
496 
501 WvString depunctuate(WvStringParm line);
502 
503 // Converts a string in decimal to an arbitrary numeric type
504 template<class T>
505 bool wvstring_to_num(WvStringParm str, T &n)
506 {
507  bool neg = false;
508  n = 0;
509 
510  for (const char *p = str; *p; ++p)
511  {
512  if (isdigit(*p))
513  {
514  n = n * T(10) + T(*p - '0');
515  }
516  else if ((const char *)str == p
517  && *p == '-')
518  {
519  neg = true;
520  }
521  else return false;
522  }
523 
524  if (neg)
525  n = -n;
526 
527  return true;
528 }
529 
530 /*
531  * Before using the C-style string escaping functions below, please consider
532  * using the functions in wvtclstring.h instead; they usualy lead to much more
533  * human readable and manageable results, and allow representation of
534  * lists of strings.
535  */
536 
538 {
539  char ch;
540  const char *esc;
541 };
542 extern const CStrExtraEscape CSTR_TCLSTR_ESCAPES[];
543 
545 //
546 // If data is NULL, returns WvString::null; otherwise, returns an allocated
547 // WvString containing the C-style string constant that represents the data.
548 //
549 // All printable characters including space except " and \ are represented with
550 // escaping.
551 //
552 // The usual C escapes are performed, such as \n, \r, \", \\ and \0.
553 //
554 // All other characters are escaped in uppercase hex form, eg. \x9E
555 //
556 // The extra_escapes parameter allows for additional characters beyond
557 // the usual ones escaped in C; setting it to CSTR_TCLSTR_ESCAPES will
558 // escape { and } as < and >, which allows the resulting strings to be
559 // TCL-string coded without ridiculous double-escaping.
560 //
561 WvString cstr_escape(const void *data, size_t size,
562  const CStrExtraEscape extra_escapes[] = NULL);
563 
565 //
566 // This function does *not* include the trailing null that a C compiler would --
567 // if you want this null, put \0 at the end of the C-style string
568 //
569 // If cstr is correctly formatted and max_size is large enough for the
570 // resulting data, returns true and size will equal the size of the
571 // resulting data. If data is not NULL it will contain this data.
572 //
573 // If cstr is correctly formatted but max_size is too small for the resulting
574 // data, returns false and size will equal the minimum value of min_size
575 // for this function to have returned true. If data is non-NULL it will
576 // contain the first max_size bytes of resulting data.
577 //
578 // If cstr is incorrectly formatted, returns false and size will equal 0.
579 //
580 // This functions works just as well on multiple, whitespace-separated
581 // C-style strings as well. This allows you to concatenate strings produced
582 // by cstr_escape, and the result of cstr_unescape will be the data blocks
583 // concatenated together. This implies that the empty string corresponds
584 // to a valid data block of length zero; however, a null string still returns
585 // an error.
586 //
587 // The extra_escapes parameter must match that used in the call to
588 // cstr_escape used to produce the escaped strings.
589 //
590 bool cstr_unescape(WvStringParm cstr, void *data, size_t max_size, size_t &size,
591  const CStrExtraEscape extra_escapes[] = NULL);
592 
593 static inline bool is_int(const char *str)
594 {
595  if (!str)
596  return false;
597 
598  if (*str == '-')
599  ++str;
600 
601  if (!*str)
602  return false;
603 
604  while (*str)
605  if (!isdigit(*str++))
606  return false;
607 
608  return true;
609 }
610 
613 WvString ptr2str(void* ptr);
614 
615 #endif // __WVSTRUTILS_H
WvString::edit
char * edit()
make the string editable, and return a non-const (char*)
Definition: wvstring.h:397
intl_datetime
WvString intl_datetime(time_t _when=-1)
Return the local date and time (in format of ISO 8601) out of _when.
Definition: strutils.cc:1274
wvhex.h
intl_gmtoff
time_t intl_gmtoff(time_t t)
Return the number of seconds by which localtime (at the given timestamp) is offset from GMT.
Definition: strutils.cc:1294
intl_time
WvString intl_time(time_t _when=-1)
Return the local time (in format of ISO 8601) out of _when.
Definition: strutils.cc:1246
lookup
int lookup(const char *str, const char *const *table, bool case_sensitive=false)
Finds a string in an array and returns its index.
Definition: strutils.cc:850
fqdomainname
WvString fqdomainname()
Get the fqdn of the local host, using gethostbyname() and gethostname()
Definition: strutils.cc:893
getfilename
WvString getfilename(WvStringParm fullname)
Take a full path/file name and splits it up into respective pathname and filename.
Definition: strutils.cc:506
strcoll_join
WvString strcoll_join(const StringCollection &coll, const char *joinchars=" \t")
Concatenates all strings in a collection and returns the result.
Definition: wvstrutils.h:420
url_decode
WvString url_decode(WvStringParm str, bool no_space=false)
Converts escaped characters (things like %20 etc.) from web URLS into their normal ASCII representati...
Definition: strutils.cc:311
passwd_md5
WvString passwd_md5(const char *str)
Similar to crypt(), but this randomly selects its own salt.
Definition: strcrypt.cc:38
sizetoa
WvString sizetoa(unsigned long long blocks, unsigned long blocksize=1, RoundingMethod rounding_method=ROUND_UP_AT_POINT_FIVE)
Given a number of blocks and a blocksize (default==1 byte), return a WvString containing a human-read...
Definition: strutils.cc:708
isnewline
bool isnewline(char c)
Returns true if 'c' is a newline or carriage return character.
Definition: strutils.cc:304
CStrExtraEscape
Definition: wvstrutils.h:537
rfc1123_date
WvString rfc1123_date(time_t _when)
Returns an RFC1123-compatible date made out of _when.
Definition: strutils.cc:838
undupe
WvString undupe(WvStringParm s, char c)
Replace any consecutive instances of character c with a single one.
Definition: strutils.cc:814
diff_dates
WvString diff_dates(time_t t1, time_t t2)
Returns the difference between to dates in a human readable format.
Definition: strutils.cc:376
trim_string
char * trim_string(char *string)
Trims whitespace from the beginning and end of the character string, including carriage return / line...
Definition: strutils.cc:59
passwd_crypt
WvString passwd_crypt(const char *str)
Similar to crypt(), but this randomly selects its own salt.
Definition: strcrypt.cc:14
WvRegex::continuable_match
bool continuable_match(WvStringParm string, int &match_start, int &match_end, WVREGEX_REGS_DECL) const
Match a given string against the compiled regular expression, capturing the start and end positions o...
Definition: wvregex.h:230
spacecat
WvString spacecat(WvStringParm a, WvStringParm b, char sep=' ', bool onesep=false)
return the string formed by concatenating string 'a' and string 'b' with the 'sep' character between ...
Definition: strutils.cc:114
local_date
WvString local_date(time_t _when=-1)
Return the local date (TZ applied) out of _when.
Definition: strutils.cc:1232
encode_hostname_as_DN
WvString encode_hostname_as_DN(WvStringParm hostname)
Example: encode_hostname_as_DN("www.fizzle.com") will result in dc=www,dc=fizzle,dc=com,...
Definition: strutils.cc:444
replace_char
void replace_char(void *string, char c1, char c2, int length)
Replace all instances of c1 with c2 for the first 'length' characters in 'string'.
Definition: strutils.cc:178
WvString
WvString is an implementation of a simple and efficient printable-string class.
Definition: wvstring.h:329
afterstr
WvString afterstr(WvStringParm line, WvStringParm a)
Returns everything in line (exclusively) after a.
Definition: strutils.cc:965
url_encode
WvString url_encode(WvStringParm str, WvStringParm unsafe="")
Converts all those pesky spaces, colons, and other nasties into nice unreadable Quasi-Unicode codes.
Definition: strutils.cc:351
depunctuate
WvString depunctuate(WvStringParm line)
Removes any trailing punctuation ('.
Definition: strutils.cc:1306
sizekitoa
WvString sizekitoa(unsigned long long kbytes, RoundingMethod rounding_method=ROUND_UP_AT_POINT_FIVE)
Given a size in kilobytes, return a human readable size.
Definition: strutils.cc:742
is_word
bool is_word(const char *string)
Returns true if all characters in 'string' are isalnum() (alphanumeric).
Definition: strutils.cc:228
non_breaking
char * non_breaking(const char *string)
Replaces all whitespace characters in the string with non-breaking spaces (&#160;) for use with web stuff.
Definition: strutils.cc:154
nice_hostname
WvString nice_hostname(WvStringParm name)
Given a hostname, turn it into a "nice" one.
Definition: strutils.cc:460
WvRegex
WvRegex – Unified support for regular expressions.
Definition: wvregex.h:47
substr
WvString substr(WvString line, unsigned int pos, unsigned int len)
Returns the string of length len starting at pos in line.
Definition: strutils.cc:998
strcount
int strcount(WvStringParm s, const char c)
How many times does 'c' occur in "s"?
Definition: strutils.cc:433
beforestr
WvString beforestr(WvStringParm line, WvStringParm a)
Returns everything in line (exclusively) before 'a'.
Definition: strutils.cc:981
snip_string
char * snip_string(char *haystack, char *needle)
Snip off the first part of 'haystack' if it consists of 'needle'.
Definition: strutils.cc:187
rfc822_date
WvString rfc822_date(time_t _when=-1)
Returns an RFC822-compatible date made out of _when, or, if _when < 0, out of the current time.
Definition: strutils.cc:395
sizektoa
WvString sizektoa(unsigned long long kbytes, RoundingMethod rounding_method=ROUND_UP_AT_POINT_FIVE)
Given a size in kilobyes, return a human readable size.
Definition: strutils.cc:721
intl_date
WvString intl_date(time_t _when=-1)
Return the local date (in format of ISO 8601) out of _when.
Definition: strutils.cc:1260
strlwr
char * strlwr(char *string)
In-place modify a character string so that all contained letters are in lower case.
Definition: strutils.cc:201
WvString::unique
WvString & unique()
make the buf and str pointers owned only by this WvString.
Definition: wvstring.cc:306
strcoll_splitstrict
void strcoll_splitstrict(StringCollection &coll, WvStringParm _s, const char *splitchars=" \t", int limit=0)
Splits a string and adds each substring to a collection.
Definition: wvstrutils.h:344
strcoll_split
void strcoll_split(StringCollection &coll, WvStringParm _s, const char *splitchars=" \t", int limit=0)
Splits a string and adds each substring to a collection.
Definition: wvstrutils.h:280
metriculate
WvString metriculate(const off_t i)
Inserts SI-style spacing into a number (eg passing 9876543210 returns "9 876 543 210")
Definition: strutils.cc:926
strupr
char * strupr(char *string)
In-place modify a character string so that all contained letters are in upper case.
Definition: strutils.cc:214
hexdump_buffer
WvString hexdump_buffer(const void *buf, size_t len, bool charRep=true)
Produce a hexadecimal dump of the data buffer in 'buf' of length 'len'.
Definition: strutils.cc:245
sizeitoa
WvString sizeitoa(unsigned long long blocks, unsigned long blocksize=1, RoundingMethod rounding_method=ROUND_UP_AT_POINT_FIVE)
Given a number of blocks and a blocksize (default==1 byte), return a WvString containing a human-read...
Definition: strutils.cc:729
secondstoa
WvString secondstoa(unsigned int total_seconds)
Given a number of seconds, returns a formatted human-readable string saying how long the period is.
Definition: strutils.cc:750
cstr_escape
WvString cstr_escape(const void *data, size_t size, const CStrExtraEscape extra_escapes[]=NULL)
Converts data into a C-style string constant.
Definition: strutils.cc:1143
wvgetcwd
WvString wvgetcwd()
Get the current working directory without a fixed-length buffer.
Definition: strutils.cc:905
hostname
WvString hostname()
Do gethostname() without a fixed-length buffer.
Definition: strutils.cc:870
terminate_string
char * terminate_string(char *string, char c)
Add character c to the end of a string after removing terminating carriage returns/linefeeds if any.
Definition: strutils.cc:32
backslash_escape
WvString backslash_escape(WvStringParm s1)
Returns a string with a backslash in front of every non alphanumeric character in s1.
Definition: strutils.cc:410
cstr_unescape
bool cstr_unescape(WvStringParm cstr, void *data, size_t max_size, size_t &size, const CStrExtraEscape extra_escapes[]=NULL)
Converts a C-style string constant into data.
Definition: strutils.cc:1182
strreplace
WvString strreplace(WvStringParm s, WvStringParm a, WvStringParm b)
Replace any instances of "a" with "b" in "s".
Definition: strutils.cc:797
ptr2str
WvString ptr2str(void *ptr)
Converts a pointer into a string, like glibc's p formatter would do.
Definition: strutils.cc:1318