WvStreams
wvbackslash.cc
1 /*
2  * Worldvisions Weaver Software:
3  * Copyright (C) 2002 Net Integration Technologies, Inc.
4  *
5  * Performs C-style backslash escaping and unescaping of strings.
6  */
7 #include <ctype.h>
8 #include "wvbackslash.h"
9 
10 static const char *escapein = "\a\b\f\n\r\t\v";
11 static const char *escapeout = "abfnrtv";
12 
13 static inline char tohex(int digit, char alphabase = ('a' - 10))
14 {
15  return (digit < 10 ? '0' : alphabase) + digit;
16 }
17 
18 static inline int fromhex(char digit)
19 {
20  if (isdigit(digit))
21  return digit - '0';
22  if (digit >= 'A' && digit <= 'F')
23  return digit - 'A' + 10;
24  if (digit >= 'a' && digit <= 'f')
25  return digit - 'a' + 10;
26  return -1;
27 }
28 
29 static inline int fromoctal(char digit)
30 {
31  if (digit >= '0' && digit <= '7')
32  return digit - '0';
33  return -1;
34 }
35 
36 
37 /***** WvBackslashEncoder *****/
38 
40  nasties(_nasties)
41 {
42 }
43 
44 
46  bool flush)
47 {
48  size_t avail = outbuf.free();
49  size_t len;
50  while ((len = inbuf.optgettable()) != 0)
51  {
52  const unsigned char *datain = inbuf.get(len);
53  for (size_t i = 0; i < len; ++i)
54  {
55  int c = datain[i];
56 
57  // handle 1 character escape sequences
58  if (avail < 1)
59  { outbuf.unget(len - i); return ! flush; }
60  const char *foundnasty = NULL;
61  const char *foundspecial = NULL;
62  if (c != '\0')
63  {
64  foundnasty = strchr(nasties.cstr(), c);
65  if (! foundnasty)
66  {
67  foundspecial = strchr(escapein, c);
68  if (! foundspecial && isprint(c))
69  {
70  outbuf.putch(c);
71  avail -= 1;
72  continue;
73  }
74  }
75  }
76 
77  // handle 2 character escape sequences
78  if (avail < 2)
79  { outbuf.unget(len - i); return ! flush; }
80  if (foundnasty != NULL)
81  {
82  outbuf.putch('\\');
83  outbuf.putch(c);
84  avail -= 2;
85  continue;
86  }
87  if (foundspecial != NULL)
88  {
89  outbuf.putch('\\');
90  outbuf.putch(escapeout[foundspecial - escapein]);
91  avail -= 2;
92  continue;
93  }
94 
95  // handle 4 character escape sequences
96  if (avail < 4)
97  { outbuf.unget(len - i); return ! flush; }
98  outbuf.put("\\x", 2);
99  outbuf.putch(tohex(c >> 4));
100  outbuf.putch(tohex(c & 15));
101  avail -= 4;
102  }
103  }
104  return true;
105 }
106 
107 
109 {
110  return true;
111 }
112 
113 
114 /***** WvBackslashDecoder *****/
115 
117 {
118  _reset();
119 }
120 
121 
123  bool flush)
124 {
125  if (outbuf.free() == 0)
126  return inbuf.used() == 0;
127  if (! flushtmpbuf(outbuf))
128  return false;
129 
130  size_t len;
131  while ((len = inbuf.optgettable()) != 0)
132  {
133  const unsigned char *datain = inbuf.get(len);
134  for (size_t i = 0; i < len; ++i)
135  {
136  int c = datain[i];
137 
138  switch (state)
139  {
140  case Initial:
141  if (c == '\\')
142  state = Escape;
143  tmpbuf.putch(c);
144  break;
145 
146  case Escape:
147  if (c >= '0' && c <= '3')
148  {
149  tmpbuf.unalloc(1);
150  value = c - '0';
151  state = Octal1;
152  }
153  else if (c == 'x')
154  {
155  tmpbuf.putch(c);
156  state = Hex1;
157  }
158  else if (c == '\n')
159  {
160  // line continuation sequence
161  tmpbuf.unalloc(1);
162  tmpbuf.putch('\n');
163  state = Initial;
164  }
165  else
166  {
167  const char *found = strchr(escapeout, c);
168  tmpbuf.unalloc(1);
169  if (found != NULL)
170  c = escapein[found - escapeout];
171  // else we just drop the backslash
172  tmpbuf.putch(c);
173  state = Initial;
174  }
175  break;
176 
177  case Hex2:
178  case Hex1: {
179  int digit = fromhex(c);
180  if (digit >= 0)
181  {
182  if (state == Hex1)
183  {
184  tmpbuf.unalloc(2);
185  value = digit;
186  state = Hex2;
187  }
188  else
189  {
190  value = (value << 4) | digit;
191  state = Initial;
192  }
193  }
194  else
195  {
196  i -= 1;
197  state = Initial;
198  }
199  break;
200  }
201 
202  case Octal3:
203  case Octal2:
204  case Octal1: {
205  int digit = fromoctal(c);
206  if (digit >= 0)
207  {
208  value = (value << 3) | digit;
209  if (state != Octal3)
210  state = State(state + 1);
211  else
212  state = Initial;
213  }
214  else
215  {
216  i -= 1;
217  state = Initial;
218  }
219  break;
220  }
221  }
222 
223  flushtmpbuf(outbuf);
224  if (outbuf.free() == 0)
225  {
226  inbuf.unget(len - i);
227  break;
228  }
229  }
230  }
231  if (flush)
232  {
233  if (inbuf.used() != 0)
234  return false;
235  state = Initial;
236  return flushtmpbuf(outbuf);
237  }
238  return true;
239 
240 }
241 
242 
244 {
245  state = Initial;
246  value = -1;
247  tmpbuf.zap();
248  return true;
249 }
250 
251 
252 bool WvBackslashDecoder::flushtmpbuf(WvBuf &outbuf)
253 {
254  if (state != Initial)
255  return true;
256 
257  if (value != -1)
258  {
259  tmpbuf.putch(value);
260  value = -1;
261  }
262 
263  size_t len = tmpbuf.used();
264  if (len == 0)
265  return true;
266  size_t avail = outbuf.free();
267  if (avail > len)
268  avail = len;
269  outbuf.merge(tmpbuf, avail);
270  len -= avail;
271  if (len == 0)
272  {
273  tmpbuf.zap();
274  return true;
275  }
276  return false;
277 }
WvBackslashEncoder::_reset
virtual bool _reset()
Template method implementation of reset().
Definition: wvbackslash.cc:108
WvBackslashDecoder::WvBackslashDecoder
WvBackslashDecoder()
Creates a C-style backslash decoder.
Definition: wvbackslash.cc:116
WvBufBaseCommonImpl::get
const T * get(size_t count)
Reads exactly the specified number of elements and returns a pointer to a storage location owned by t...
Definition: wvbufbase.h:114
WvBufBaseCommonImpl::unget
void unget(size_t count)
Ungets exactly the specified number of elements by returning them to the buffer for subsequent reads.
Definition: wvbufbase.h:177
WvBufBaseCommonImpl::unalloc
void unalloc(size_t count)
Unallocates exactly the specified number of elements by removing them from the buffer and releasing t...
Definition: wvbufbase.h:421
WvBufBaseCommonImpl::free
size_t free() const
Returns the number of elements that the buffer can currently accept for writing.
Definition: wvbufbase.h:353
WvBufBaseCommonImpl::optgettable
size_t optgettable() const
Returns the optimal maximum number of elements in the buffer currently available for reading without ...
Definition: wvbufbase.h:154
WvBufBase< unsigned char >::putch
void putch(int ch)
Puts a single character into the buffer as an int.
Definition: wvbuf.h:76
WvFastString::cstr
const char * cstr() const
return a (const char *) for this string.
Definition: wvstring.h:267
WvBackslashDecoder::_encode
virtual bool _encode(WvBuf &inbuf, WvBuf &outbuf, bool flush)
Template method implementation of encode().
Definition: wvbackslash.cc:122
WvBufBase< unsigned char >
Specialization of WvBufBase for unsigned char type buffers intended for use with raw memory buffers.
Definition: wvbuf.h:22
WvBufBaseCommonImpl::zap
void zap()
Clears the buffer.
Definition: wvbufbase.h:257
WvBackslashEncoder::_encode
virtual bool _encode(WvBuf &inbuf, WvBuf &outbuf, bool flush)
Template method implementation of encode().
Definition: wvbackslash.cc:45
WvEncoder::flush
bool flush(WvBuf &inbuf, WvBuf &outbuf, bool finish=false)
Flushes the encoder and optionally finishes it.
Definition: wvencoder.h:163
WvBufBaseCommonImpl::used
size_t used() const
Returns the number of elements in the buffer currently available for reading.
Definition: wvbufbase.h:92
WvBackslashEncoder::WvBackslashEncoder
WvBackslashEncoder(WvStringParm _nasties="\\\"")
Creates a C-style backslash encoder.
Definition: wvbackslash.cc:39
WvBufBaseCommonImpl::merge
void merge(Buffer &inbuf, size_t count)
Efficiently moves count bytes from the specified buffer into this one.
Definition: wvbufbase.h:558
WvBackslashDecoder::_reset
virtual bool _reset()
Template method implementation of reset().
Definition: wvbackslash.cc:243