WvStreams
wvtclstring.cc
1 /*
2  * Worldvisions Weaver Software:
3  * Copyright (C) 1997-2002 Net Integration Technologies, Inc.
4  */
5 #include "wvbackslash.h"
6 #include "wvbuf.h"
7 #include "wvstream.h"
8 #include "wvstring.h"
9 #include "wvstringmask.h"
10 #include "wvtclstring.h"
11 #include <climits>
12 
13 const WvStringMask WVTCL_NASTY_SPACES(WVTCL_NASTY_SPACES_STR);
14 const WvStringMask WVTCL_NASTY_NEWLINES(WVTCL_NASTY_NEWLINES_STR);
15 const WvStringMask WVTCL_SPLITCHARS(WVTCL_SPLITCHARS_STR);
16 
17 static size_t wvtcl_escape(char *dst, const char *s, size_t s_len,
18  const WvStringMask &nasties, bool *verbatim = NULL)
19 {
20  if (verbatim) *verbatim = false;
21 
22  // NULL strings remain such
23  if (s == NULL)
24  return 0;
25  // empty strings are just {}
26  if (s_len == 0)
27  {
28  if (dst)
29  {
30  dst[0] = '{';
31  dst[1] = '}';
32  }
33  return 2;
34  }
35 
36  bool backslashify = false, inescape = false;
37  int len = 0, unprintables = 0, bracecount = 0;
38  const char *cptr, *cptr_end = s + s_len;
39 
40  // figure out which method we need to use: backslashify or embrace.
41  // also count the number of unprintable characters we'll need to
42  // backslashify, if it turns out that's necessary.
43  for (cptr = s; cptr != cptr_end; cptr++)
44  {
45  // Assume we do nothing
46  if (dst) dst[len] = *cptr;
47  ++len;
48 
49  if (!inescape && *cptr == '{')
50  bracecount++;
51  else if (!inescape && *cptr == '}')
52  bracecount--;
53  if (bracecount < 0)
54  backslashify = true;
55 
56  bool doit = false;
57  switch (*cptr)
58  {
59  case WVTCL_ALWAYS_NASTY_CASE:
60  doit = true;
61  break;
62  default:
63  if (nasties[*cptr])
64  doit = true;
65  }
66  if (doit)
67  unprintables++;
68 
69  if (*cptr == '\\')
70  inescape = !inescape;
71  else
72  inescape = false;
73  }
74 
75  // if the braces aren't balanced, backslashify
76  if (bracecount != 0 || inescape)
77  backslashify = true;
78 
79  if (!backslashify && !unprintables)
80  {
81  if (verbatim) *verbatim = true;
82  return len; // no work needed!
83  }
84 
85  if (backslashify)
86  {
87  if (dst)
88  {
89  len = 0;
90  for (cptr = s; cptr != cptr_end; ++cptr)
91  {
92  bool doit = false;
93  switch (*cptr)
94  {
95  case WVTCL_ALWAYS_NASTY_CASE:
96  doit = true;
97  break;
98  default:
99  if (nasties[*cptr])
100  doit = true;
101  }
102  if (doit)
103  dst[len++] = '\\';
104 
105  dst[len++] = *cptr;
106  }
107  return len;
108  }
109  else return len+unprintables;
110  }
111  else
112  {
113  // the embrace method: just take the string and put braces around it
114  if (dst)
115  {
116  len = 0;
117  dst[len++] = '{';
118  for (cptr = s; cptr != cptr_end; ++cptr)
119  dst[len++] = *cptr;
120  dst[len++] = '}';
121  return len;
122  }
123  else return len+2;
124  }
125 }
126 
127 
128 WvString wvtcl_escape(WvStringParm s, const WvStringMask &nasties)
129 {
130  size_t s_len = s.len();
131 
132  bool verbatim;
133  size_t len = wvtcl_escape(NULL, s, s_len, nasties, &verbatim);
134  if (verbatim) return s;
135 
136  WvString result;
137  result.setsize(len);
138  char *e = result.edit();
139  e += wvtcl_escape(e, s, s_len, nasties);
140  *e = '\0';
141  return result;
142 }
143 
144 
145 static size_t wvtcl_unescape(char *dst, const char *s, size_t s_len,
146  bool *verbatim = NULL)
147 {
148  //printf(" unescape '%s'\n", (const char *)s);
149 
150  // empty or NULL strings remain themselves
151  if (!s)
152  {
153  if (verbatim) *verbatim = true;
154  return 0;
155  }
156 
157  if (verbatim) *verbatim = false;
158 
159  // deal with embraced strings by simply removing the braces
160  if (s[0] == '{' && s[s_len-1] == '}')
161  {
162  if (dst) memcpy(dst, &s[1], s_len-2);
163  return s_len - 2;
164  }
165 
166  bool skipquotes = false;
167  // deal with quoted strings by ignoring the quotes _and_ unbackslashifying.
168  if (s[0] == '"' && s[s_len-1] == '"')
169  skipquotes = true;
170 
171  // otherwise, unbackslashify it.
172  const char *start = s, *end = &s[s_len];
173  if (skipquotes)
174  {
175  ++start;
176  --end;
177  }
178  size_t len = 0;
179  bool inescape = false;
180  for (; start != end; ++start)
181  {
182  if (*start == '\\')
183  {
184  if (inescape)
185  {
186  if (dst) dst[len] = *start;
187  len++;
188  inescape = false;
189  }
190  else
191  inescape = true;
192  }
193  else
194  {
195  inescape = false;
196  if (dst) dst[len] = *start;
197  len++;
198  }
199  }
200  return len;
201 }
202 
203 
204 WvString wvtcl_unescape(WvStringParm s)
205 {
206  size_t s_len = s.len();
207 
208  bool verbatim;
209  size_t len = wvtcl_unescape(NULL, s, s_len, &verbatim);
210  if (verbatim) return s;
211 
212  WvString result;
213  result.setsize(len+1);
214  char *e = result.edit();
215  e += wvtcl_unescape(e, s, s_len);
216  *e = '\0';
217  return result;
218 }
219 
220 
222  const WvStringMask &splitchars)
223 {
224  int size = 0;
225 
227  int count = 0;
228  for (i.rewind(); i.next(); )
229  {
230  size += wvtcl_escape(NULL, *i, i->len(), nasties);
231  ++count;
232  }
233 
234  WvString result;
235  result.setsize(size+(count-1)+1);
236 
237  char *p = result.edit();
238  int j;
239  for (i.rewind(), j=0; i.next(); ++j)
240  {
241  p += wvtcl_escape(p, *i, i->len(), nasties);
242  if (j < count - 1)
243  *p++ = splitchars.first();
244  }
245  *p = '\0';
246 
247  return result;
248 }
249 
250 const size_t WVTCL_GETWORD_NONE (UINT_MAX);
251 
252 static size_t wvtcl_getword(char *dst, const char *s, size_t s_len,
253  const WvStringMask &splitchars,
254  bool do_unescape, size_t *end = NULL)
255 {
256  //printf(" used=%d\n", origsize);
257  if (!s_len) return WVTCL_GETWORD_NONE;
258 
259  bool inescape = false, inquote = false, incontinuation = false;
260  int bracecount = 0;
261  const char *origend = s + s_len;
262  const char *sptr, *eptr;
263 
264  // skip leading separators
265  for (sptr = s; sptr != origend; sptr++)
266  {
267  if (!splitchars[*sptr])
268  break;
269  }
270 
271  if (sptr == origend) // nothing left
272  return WVTCL_GETWORD_NONE;
273 
274  // detect initial quote
275  if (*sptr == '"')
276  {
277  inquote = true;
278  eptr = sptr+1;
279  }
280  else
281  eptr = sptr;
282 
283  // loop over string until something satisfactory is found
284  for (; eptr != origend; eptr++)
285  {
286  char ch = *eptr;
287 
288  incontinuation = false;
289 
290  if (inescape)
291  {
292  if (ch == '\n')
293  {
294  // technically we've finished the line-continuation
295  // sequence, but we require at least one more character
296  // in order to prove that there's a next line somewhere
297  // in the buffer. Otherwise we might stop parsing before
298  // we're "really" done if we're given input line-by-line.
299  //
300  // A better way to do this would be for getword() to *never*
301  // return a string unless it contains a separator character;
302  // then we wouldn't need this weird special case. But it
303  // don't work like that; we'll return the last word in the
304  // buffer even if it *doesn't* end in a separator character.
305  incontinuation = true;
306  }
307  inescape = false;
308  }
309  else if (ch == '\\')
310  {
311  inescape = true;
312  // now we need a character to complete the escape
313  }
314  else // not an escape sequence
315  {
316  // detect end of a quoted/unquoted string
317  if (bracecount == 0)
318  {
319  if (inquote)
320  {
321  if (ch == '"')
322  {
323  eptr++;
324  break;
325  }
326  }
327  else if (splitchars[ch])
328  break;
329  }
330 
331  // match braces
332  if (!inquote)
333  {
334  if (ch == '{')
335  bracecount++;
336  else if (bracecount > 0 && ch == '}')
337  bracecount--;
338  }
339  }
340  }
341 
342  if (bracecount || sptr==eptr || inquote || inescape || incontinuation)
343  // not there yet...
344  return WVTCL_GETWORD_NONE;
345 
346  //printf("len=%d, unget=%d\n", eptr - sptr, origend - eptr);
347  if (end) *end = eptr - s;
348 
349  if (do_unescape)
350  return wvtcl_unescape(dst, sptr, eptr-sptr);
351  else
352  {
353  if (dst) memcpy(dst, sptr, eptr-sptr);
354  return eptr - sptr;
355  }
356 }
357 
358 
359 WvString wvtcl_getword(WvBuf &buf, const WvStringMask &splitchars,
360  bool do_unescape)
361 {
362  int origsize = buf.used();
363  const char *origptr = (const char *)buf.get(origsize);
364 
365  size_t end;
366  size_t len = wvtcl_getword(NULL, origptr, origsize,
367  splitchars, do_unescape, &end);
368  if (len == WVTCL_GETWORD_NONE)
369  {
370  buf.unget(origsize);
371  return WvString::null;
372  }
373 
374  WvString result;
375  result.setsize(len+1);
376  char *e = result.edit();
377  e += wvtcl_getword(e, origptr, origsize, splitchars, do_unescape);
378  *e = '\0';
379 
380  buf.unget(origsize - end);
381 
382  return result;
383 }
384 
385 
386 void wvtcl_decode(WvList<WvString> &l, WvStringParm _s,
387  const WvStringMask &splitchars, bool do_unescape)
388 {
389  const char *s = _s;
390  size_t s_len = _s.len();
391  for (;;)
392  {
393  size_t end;
394  size_t len = wvtcl_getword(NULL, s, s_len,
395  splitchars, do_unescape, &end);
396  if (len == WVTCL_GETWORD_NONE)
397  break;
398 
399  WvString *word = new WvString();
400  word->setsize(len+1);
401 
402  char *e = word->edit();
403  e += wvtcl_getword(e, s, s_len, splitchars, do_unescape);
404  *e = '\0';
405  l.append(word, true);
406 
407  s += end;
408  s_len -= end;
409  }
410 }
WvString::edit
char * edit()
make the string editable, and return a non-const (char*)
Definition: wvstring.h:397
WvList::append
void append(T *data, bool autofree, const char *id=NULL)
Appends the element to the end of the list.
Definition: wvlinklist.h:276
WvStringMask
A class used to provide a masked lookup for characters in a string.
Definition: wvstringmask.h:18
wvtcl_getword
WvString wvtcl_getword(WvBuf &buf, const WvStringMask &splitchars=WVTCL_SPLITCHARS, bool do_unescape=true)
Get a single tcl word from an input buffer, and return the rest of the buffer untouched.
Definition: wvtclstring.cc:359
WvBufBaseCommonImpl::get
const T * get(size_t count)
Reads exactly the specified number of elements and returns a pointer to a storage location owned by t...
Definition: wvbufbase.h:114
WvBufBaseCommonImpl::unget
void unget(size_t count)
Ungets exactly the specified number of elements by returning them to the buffer for subsequent reads.
Definition: wvbufbase.h:177
wvtclstring.h
wvtcl_unescape
WvString wvtcl_unescape(WvStringParm s)
tcl-unescape a string.
Definition: wvtclstring.cc:204
WvStringMask::first
const char first() const
Get the first character set into the mask.
Definition: wvstringmask.cc:30
WvList::Iter
The iterator type for linked lists.
Definition: wvlinklist.h:350
WvString
WvString is an implementation of a simple and efficient printable-string class.
Definition: wvstring.h:329
wvtcl_escape
WvString wvtcl_escape(WvStringParm s, const WvStringMask &nasties=WVTCL_NASTY_SPACES)
tcl-escape a string.
Definition: wvtclstring.cc:128
wvtcl_encode
WvString wvtcl_encode(WvList< WvString > &l, const WvStringMask &nasties=WVTCL_NASTY_SPACES, const WvStringMask &splitchars=WVTCL_SPLITCHARS)
encode a tcl-style list.
Definition: wvtclstring.cc:221
WvBufBase< unsigned char >
Specialization of WvBufBase for unsigned char type buffers intended for use with raw memory buffers.
Definition: wvbuf.h:22
WvListBase::IterBase::rewind
void rewind()
Rewinds the iterator to make it point to an imaginary element preceeding the first element of the lis...
Definition: wvlinklist.h:90
WvBufBaseCommonImpl::used
size_t used() const
Returns the number of elements in the buffer currently available for reading.
Definition: wvbufbase.h:92
wvtcl_decode
void wvtcl_decode(WvList< WvString > &l, WvStringParm _s, const WvStringMask &splitchars=WVTCL_SPLITCHARS, bool do_unescape=true)
split a tcl-style list.
Definition: wvtclstring.cc:386
WvList
A linked list container class.
Definition: wvlinklist.h:197
WvListBase::IterBase::next
WvLink * next()
Moves the iterator along the list to point to the next element.
Definition: wvlinklist.h:103