WvStreams
ftpparse.cc
1 /* ftpparse.c, ftpparse.h: library for parsing FTP LIST responses
2 20001223
3 D. J. Bernstein, djb@cr.yp.to
4 http://cr.yp.to/ftpparse.html
5 
6 Commercial use is fine, if you let me know what programs you're using this in.
7 
8 Currently covered formats:
9 EPLF.
10 UNIX ls, with or without gid.
11 Microsoft FTP Service.
12 Windows NT FTP Server.
13 VMS.
14 WFTPD.
15 NetPresenz (Mac).
16 NetWare.
17 MSDOS.
18 
19 Definitely not covered:
20 Long VMS filenames, with information split across two lines.
21 NCSA Telnet FTP server. Has LIST = NLST (and bad NLST for directories).
22 */
23 
24 #include <time.h>
25 #include "ftpparse.h"
26 
27 static long totai(long year,long month,long mday)
28 {
29  long result;
30  if (month >= 2) month -= 2;
31  else { month += 10; --year; }
32  result = (mday - 1) * 10 + 5 + 306 * month;
33  result /= 10;
34  if (result == 365) { year -= 3; result = 1460; }
35  else result += 365 * (year % 4);
36  year /= 4;
37  result += 1461 * (year % 25);
38  year /= 25;
39  if (result == 36524) { year -= 3; result = 146096; }
40  else { result += 36524 * (year % 4); }
41  year /= 4;
42  result += 146097 * (year - 5);
43  result += 11017;
44  return result * 86400;
45 }
46 
47 static int flagneedbase = 1;
48 static time_t base; /* time() value on this OS at the beginning of 1970 TAI */
49 static long now; /* current time */
50 static int flagneedcurrentyear = 1;
51 static long currentyear; /* approximation to current year */
52 
53 static void initbase(void)
54 {
55  struct tm *t;
56  if (!flagneedbase) return;
57 
58  base = 0;
59  t = gmtime(&base);
60  base = -(totai(t->tm_year + 1900,t->tm_mon,t->tm_mday) + t->tm_hour * 3600 + t->tm_min * 60 + t->tm_sec);
61  /* assumes the right time_t, counting seconds. */
62  /* base may be slightly off if time_t counts non-leap seconds. */
63  flagneedbase = 0;
64 }
65 
66 static void initnow(void)
67 {
68  long day;
69  long year;
70 
71  initbase();
72  now = time((time_t *) 0) - base;
73 
74  if (flagneedcurrentyear) {
75  day = now / 86400;
76  if ((now % 86400) < 0) --day;
77  day -= 11017;
78  year = 5 + day / 146097;
79  day = day % 146097;
80  if (day < 0) { day += 146097; --year; }
81  year *= 4;
82  if (day == 146096) { year += 3; day = 36524; }
83  else { year += day / 36524; day %= 36524; }
84  year *= 25;
85  year += day / 1461;
86  day %= 1461;
87  year *= 4;
88  if (day == 1460) { year += 3; day = 365; }
89  else { year += day / 365; day %= 365; }
90  day *= 10;
91  if ((day + 5) / 306 >= 10) ++year;
92  currentyear = year;
93  flagneedcurrentyear = 0;
94  }
95 }
96 
97 /* UNIX ls does not show the year for dates in the last six months. */
98 /* So we have to guess the year. */
99 /* Apparently NetWare uses ``twelve months'' instead of ``six months''; ugh. */
100 /* Some versions of ls also fail to show the year for future dates. */
101 static long guesstai(long month,long mday)
102 {
103  long year;
104  long t;
105 
106  initnow();
107 
108  for (year = currentyear - 1;year < currentyear + 100;++year) {
109  t = totai(year,month,mday);
110  if (now - t < 350 * 86400)
111  return t;
112  }
113  return 0; /* shouldn't happen, but this gets rid of compiler warnings */
114 }
115 
116 static int check(char *buf, const char *monthname)
117 {
118  if ((buf[0] != monthname[0]) && (buf[0] != monthname[0] - 32)) return 0;
119  if ((buf[1] != monthname[1]) && (buf[1] != monthname[1] - 32)) return 0;
120  if ((buf[2] != monthname[2]) && (buf[2] != monthname[2] - 32)) return 0;
121  return 1;
122 }
123 
124 static const char *months[12] = {
125  "jan","feb","mar","apr","may","jun","jul","aug","sep","oct","nov","dec"
126 } ;
127 
128 static int getmonth(char *buf, int len)
129 {
130  int i;
131  if (len == 3)
132  for (i = 0;i < 12;++i)
133  if (check(buf,months[i])) return i;
134  return -1;
135 }
136 
137 static long getlong(char *buf,int len)
138 {
139  long u = 0;
140  while (len-- > 0)
141  u = u * 10 + (*buf++ - '0');
142  return u;
143 }
144 
145 int ftpparse(struct ftpparse *fp,char *buf,int len)
146 {
147  int i;
148  int j;
149  int state;
150  long size = 0;
151  long year;
152  long month = 0;
153  long mday = 0;
154  long hour;
155  long minute;
156 
157  fp->name = 0;
158  fp->namelen = 0;
159  fp->flagtrycwd = 0;
160  fp->flagtryretr = 0;
161  fp->sizetype = FTPPARSE_SIZE_UNKNOWN;
162  fp->size = 0;
163  fp->mtimetype = FTPPARSE_MTIME_UNKNOWN;
164  fp->mtime = 0;
165  fp->idtype = FTPPARSE_ID_UNKNOWN;
166  fp->id = 0;
167  fp->idlen = 0;
168 
169  if (len < 2) /* an empty name in EPLF, with no info, could be 2 chars */
170  return 0;
171 
172  switch(*buf) {
173  /* see http://pobox.com/~djb/proto/eplf.txt */
174  /* "+i8388621.29609,m824255902,/,\tdev" */
175  /* "+i8388621.44468,m839956783,r,s10376,\tRFCEPLF" */
176  case '+':
177  i = 1;
178  for (j = 1;j < len;++j) {
179  if (buf[j] == 9) {
180  fp->name = buf + j + 1;
181  fp->namelen = len - j - 1;
182  return 1;
183  }
184  if (buf[j] == ',') {
185  switch(buf[i]) {
186  case '/':
187  fp->flagtrycwd = 1;
188  break;
189  case 'r':
190  fp->flagtryretr = 1;
191  break;
192  case 's':
193  fp->sizetype = FTPPARSE_SIZE_BINARY;
194  fp->size = getlong(buf + i + 1,j - i - 1);
195  break;
196  case 'm':
197  fp->mtimetype = FTPPARSE_MTIME_LOCAL;
198  initbase();
199  fp->mtime = base + getlong(buf + i + 1,j - i - 1);
200  break;
201  case 'i':
202  fp->idtype = FTPPARSE_ID_FULL;
203  fp->id = buf + i + 1;
204  fp->idlen = j - i - 1;
205  }
206  i = j + 1;
207  }
208  }
209  return 0;
210 
211  /* UNIX-style listing, without inum and without blocks */
212  /* "-rw-r--r-- 1 root other 531 Jan 29 03:26 README" */
213  /* "dr-xr-xr-x 2 root other 512 Apr 8 1994 etc" */
214  /* "dr-xr-xr-x 2 root 512 Apr 8 1994 etc" */
215  /* "lrwxrwxrwx 1 root other 7 Jan 25 00:17 bin -> usr/bin" */
216  /* Also produced by Microsoft's FTP servers for Windows: */
217  /* "---------- 1 owner group 1803128 Jul 10 10:18 ls-lR.Z" */
218  /* "d--------- 1 owner group 0 May 9 19:45 Softlib" */
219  /* Also WFTPD for MSDOS: */
220  /* "-rwxrwxrwx 1 noone nogroup 322 Aug 19 1996 message.ftp" */
221  /* Also NetWare: */
222  /* "d [R----F--] supervisor 512 Jan 16 18:53 login" */
223  /* "- [R----F--] rhesus 214059 Oct 20 15:27 cx.exe" */
224  /* Also NetPresenz for the Mac: */
225  /* "-------r-- 326 1391972 1392298 Nov 22 1995 MegaPhone.sit" */
226  /* "drwxrwxr-x folder 2 May 10 1996 network" */
227  case 'b':
228  case 'c':
229  case 'd':
230  case 'l':
231  case 'p':
232  case 's':
233  case '-':
234 
235  if (*buf == 'd') fp->flagtrycwd = 1;
236  if (*buf == '-') fp->flagtryretr = 1;
237  if (*buf == 'l') fp->flagtrycwd = fp->flagtryretr = 1;
238 
239  state = 1;
240  i = 0;
241  for (j = 1;j < len;++j)
242  if ((buf[j] == ' ') && (buf[j - 1] != ' ')) {
243  switch(state) {
244  case 1: /* skipping perm */
245  state = 2;
246  break;
247  case 2: /* skipping nlink */
248  state = 3;
249  if ((j - i == 6) && (buf[i] == 'f')) /* for NetPresenz */
250  state = 4;
251  break;
252  case 3: /* skipping uid */
253  state = 4;
254  break;
255  case 4: /* getting tentative size */
256  size = getlong(buf + i,j - i);
257  state = 5;
258  break;
259  case 5: /* searching for month, otherwise getting tentative size */
260  month = getmonth(buf + i,j - i);
261  if (month >= 0)
262  state = 6;
263  else
264  size = getlong(buf + i,j - i);
265  break;
266  case 6: /* have size and month */
267  mday = getlong(buf + i,j - i);
268  state = 7;
269  break;
270  case 7: /* have size, month, mday */
271  if ((j - i == 4) && (buf[i + 1] == ':')) {
272  hour = getlong(buf + i,1);
273  minute = getlong(buf + i + 2,2);
274  fp->mtimetype = FTPPARSE_MTIME_REMOTEMINUTE;
275  initbase();
276  fp->mtime = base + guesstai(month,mday) + hour * 3600 + minute * 60;
277  } else if ((j - i == 5) && (buf[i + 2] == ':')) {
278  hour = getlong(buf + i,2);
279  minute = getlong(buf + i + 3,2);
280  fp->mtimetype = FTPPARSE_MTIME_REMOTEMINUTE;
281  initbase();
282  fp->mtime = base + guesstai(month,mday) + hour * 3600 + minute * 60;
283  }
284  else if (j - i >= 4) {
285  year = getlong(buf + i,j - i);
286  fp->mtimetype = FTPPARSE_MTIME_REMOTEDAY;
287  initbase();
288  fp->mtime = base + totai(year,month,mday);
289  }
290  else
291  return 0;
292  fp->name = buf + j + 1;
293  fp->namelen = len - j - 1;
294  state = 8;
295  break;
296  case 8: /* twiddling thumbs */
297  break;
298  }
299  i = j + 1;
300  while ((i < len) && (buf[i] == ' ')) ++i;
301  }
302 
303  if (state != 8)
304  return 0;
305 
306  fp->size = size;
307  fp->sizetype = FTPPARSE_SIZE_BINARY;
308 
309  if (*buf == 'l')
310  for (i = 0;i + 3 < fp->namelen;++i)
311  if (fp->name[i] == ' ')
312  if (fp->name[i + 1] == '-')
313  if (fp->name[i + 2] == '>')
314  if (fp->name[i + 3] == ' ') {
315  fp->namelen = i;
316  break;
317  }
318 
319  /* eliminate extra NetWare spaces */
320  if ((buf[1] == ' ') || (buf[1] == '['))
321  if (fp->namelen > 3)
322  if (fp->name[0] == ' ')
323  if (fp->name[1] == ' ')
324  if (fp->name[2] == ' ') {
325  fp->name += 3;
326  fp->namelen -= 3;
327  }
328 
329  return 1;
330  }
331 
332  /* MultiNet (some spaces removed from examples) */
333  /* "00README.TXT;1 2 30-DEC-1996 17:44 [SYSTEM] (RWED,RWED,RE,RE)" */
334  /* "CORE.DIR;1 1 8-SEP-1996 16:09 [SYSTEM] (RWE,RWE,RE,RE)" */
335  /* and non-MutliNet VMS: */
336  /* "CII-MANUAL.TEX;1 213/216 29-JAN-1996 03:33:12 [ANONYMOU,ANONYMOUS] (RWED,RWED,,)" */
337  for (i = 0;i < len;++i)
338  if (buf[i] == ';')
339  break;
340  if (i < len) {
341  fp->name = buf;
342  fp->namelen = i;
343  if (i > 4)
344  if (buf[i - 4] == '.')
345  if (buf[i - 3] == 'D')
346  if (buf[i - 2] == 'I')
347  if (buf[i - 1] == 'R') {
348  fp->namelen -= 4;
349  fp->flagtrycwd = 1;
350  }
351  if (!fp->flagtrycwd)
352  fp->flagtryretr = 1;
353  while (buf[i] != ' ') if (++i == len) return 0;
354  while (buf[i] == ' ') if (++i == len) return 0;
355  while (buf[i] != ' ') if (++i == len) return 0;
356  while (buf[i] == ' ') if (++i == len) return 0;
357  j = i;
358  while (buf[j] != '-') if (++j == len) return 0;
359  mday = getlong(buf + i,j - i);
360  while (buf[j] == '-') if (++j == len) return 0;
361  i = j;
362  while (buf[j] != '-') if (++j == len) return 0;
363  month = getmonth(buf + i,j - i);
364  if (month < 0) return 0;
365  while (buf[j] == '-') if (++j == len) return 0;
366  i = j;
367  while (buf[j] != ' ') if (++j == len) return 0;
368  year = getlong(buf + i,j - i);
369  while (buf[j] == ' ') if (++j == len) return 0;
370  i = j;
371  while (buf[j] != ':') if (++j == len) return 0;
372  hour = getlong(buf + i,j - i);
373  while (buf[j] == ':') if (++j == len) return 0;
374  i = j;
375  while ((buf[j] != ':') && (buf[j] != ' ')) if (++j == len) return 0;
376  minute = getlong(buf + i,j - i);
377 
378  fp->mtimetype = FTPPARSE_MTIME_REMOTEMINUTE;
379  initbase();
380  fp->mtime = base + totai(year,month,mday) + hour * 3600 + minute * 60;
381 
382  return 1;
383  }
384 
385  /* MSDOS format */
386  /* 04-27-00 09:09PM <DIR> licensed */
387  /* 07-18-00 10:16AM <DIR> pub */
388  /* 04-14-00 03:47PM 589 readme.htm */
389  if ((*buf >= '0') && (*buf <= '9')) {
390  i = 0;
391  j = 0;
392  while (buf[j] != '-') if (++j == len) return 0;
393  month = getlong(buf + i,j - i) - 1;
394  while (buf[j] == '-') if (++j == len) return 0;
395  i = j;
396  while (buf[j] != '-') if (++j == len) return 0;
397  mday = getlong(buf + i,j - i);
398  while (buf[j] == '-') if (++j == len) return 0;
399  i = j;
400  while (buf[j] != ' ') if (++j == len) return 0;
401  year = getlong(buf + i,j - i);
402  if (year < 50) year += 2000;
403  if (year < 1000) year += 1900;
404  while (buf[j] == ' ') if (++j == len) return 0;
405  i = j;
406  while (buf[j] != ':') if (++j == len) return 0;
407  hour = getlong(buf + i,j - i);
408  while (buf[j] == ':') if (++j == len) return 0;
409  i = j;
410  while ((buf[j] != 'A') && (buf[j] != 'P')) if (++j == len) return 0;
411  minute = getlong(buf + i,j - i);
412  if (hour == 12) hour = 0;
413  if (buf[j] == 'A') if (++j == len) return 0;
414  if (buf[j] == 'P') { hour += 12; if (++j == len) return 0; }
415  if (buf[j] == 'M') if (++j == len) return 0;
416 
417  while (buf[j] == ' ') if (++j == len) return 0;
418  if (buf[j] == '<') {
419  fp->flagtrycwd = 1;
420  while (buf[j] != ' ') if (++j == len) return 0;
421  }
422  else {
423  i = j;
424  while (buf[j] != ' ') if (++j == len) return 0;
425  fp->size = getlong(buf + i,j - i);
426  fp->sizetype = FTPPARSE_SIZE_BINARY;
427  fp->flagtryretr = 1;
428  }
429  while (buf[j] == ' ') if (++j == len) return 0;
430 
431  fp->name = buf + j;
432  fp->namelen = len - j;
433 
434  fp->mtimetype = FTPPARSE_MTIME_REMOTEMINUTE;
435  initbase();
436  fp->mtime = base + totai(year,month,mday) + hour * 3600 + minute * 60;
437 
438  return 1;
439  }
440 
441  /* Some useless lines, safely ignored: */
442  /* "Total of 11 Files, 10966 Blocks." (VMS) */
443  /* "total 14786" (UNIX) */
444  /* "DISK$ANONFTP:[ANONYMOUS]" (VMS) */
445  /* "Directory DISK$PCSA:[ANONYM]" (VMS) */
446 
447  return 0;
448 }
ftpparse
Definition: ftpparse.h:21