WvStreams
wvurl.cc
1 /*
2  * Worldvisions Weaver Software:
3  * Copyright (C) 1997-2002 Net Integration Technologies, Inc.
4  *
5  * WvUrl is a simple URL-parsing class with built-in (though still somewhat
6  * inconvenient) DNS resolution.
7  *
8  * See wvurl.h.
9  */
10 #include "wvurl.h"
11 #include "strutils.h"
12 
13 // A static list of the default ports for each protocol.
15 {
16  const char *proto;
17  int port;
18  bool uses_slashes;
19 };
20 
21 // The protocols must be arranged from longest to shortest because they're
22 // compared with strncmp, so "https://" will also match http.
23 static DefaultPort portmap[] = {
24  { "exchangeits", 7070, false },
25  { "exchangeit", 6969, false },
26  { "https", 443, true },
27  { "http", 80, true },
28  { "file", 0, true },
29  { "sip", 5060, false },
30  { "ftp", 21, true },
31  { "ldaps", 636, false },
32  { "ldap", 389, false },
33  { NULL, 0 }
34 };
35 
36 // Look up the protocol and return the default port.
37 static int get_default_port(WvString proto)
38 {
39  DefaultPort *p = portmap;
40  for (p = portmap; p->proto != NULL; p++)
41  {
42  if (strncmp(p->proto, proto, strlen(p->proto)) == 0)
43  return p->port;
44  }
45  return -1;
46 }
47 
48 // Look up the protocol and decide whether it uses slashes (http) or not (sip)
49 // A check of rfc2396 shows that the URI standard actually distinguishes
50 // these: 'hierarchical' vs. 'opaque'.
51 static bool protocol_uses_slashes(WvString proto)
52 {
53  DefaultPort *p = portmap;
54  for (p = portmap; p->proto != NULL; p++)
55  {
56  if (strncmp(p->proto, proto, strlen(p->proto)) == 0)
57  return p->uses_slashes;
58  }
59  return false;
60 }
61 
62 // Split up the URL into a hostname, a port, and the rest of it.
63 WvUrl::WvUrl(WvStringParm url) : err("No error")
64 {
65  WvString work(url);
66  char *cptr, *wptr = work.edit();
67 
68  port = 0; // error condition by default
69  addr = NULL;
70  resolving = true;
71 
72  // deal with extra whitespace.
73  wptr = trim_string(wptr);
74  cptr = wptr + strcspn(wptr, " \t\r\n");
75  *cptr = 0;
76 
77  // if it's not one of these easy prefixes, give up. Our URL parser is
78  // pretty dumb.
79  if (get_default_port(wptr) < 0)
80  {
81  err = "WvUrl cannot handle the given protocol.";
82  return;
83  }
84 
85  cptr = strchr(wptr, ':');
86  if (!cptr)
87  {
88  err = "No colon after the protocol.";
89  return;
90  }
91  *cptr = 0;
92  proto = wptr;
93 
94  bool use_slashes = protocol_uses_slashes(proto);
95  wptr = cptr + (use_slashes ? 3 : 1);
96 
97  cptr = strchr(wptr, '@');
98  if (!cptr) // no user given
99  {
100  user = "";
101  password = "";
102  }
103  else
104  {
105  *cptr = 0;
106  char *cptr2 = strchr(wptr, ':');
107  if (cptr2 && (*(cptr2+1) != 0))
108  {
109  *cptr2 = 0;
110  password = cptr2 + 1;
111  }
112  else
113  password = "";
114  user = wptr;
115  wptr = cptr + 1;
116  }
117 
118  cptr = strchr(wptr, '/');
119  if (!cptr) // no path given
120  file = use_slashes ? "/" : "";
121  else
122  {
123  file = cptr;
124  *cptr = 0;
125  }
126 
127  cptr = strchr(wptr, ':');
128  if (!cptr)
129  port = get_default_port(proto);
130  else
131  {
132  port = atoi(cptr+1);
133  *cptr = 0;
134  }
135 
136  hostname = wptr;
137 
138  resolve();
139 }
140 
141 
142 WvUrl::WvUrl(const WvUrl &url) : err("No error")
143 {
144  addr = NULL;
145  resolving = true;
146 
147  proto = url.proto;
148  user = url.user;
149  password = url.password;
150  hostname = url.hostname;
151  file = url.file;
152  port = url.port;
153 
154  resolve();
155 }
156 
157 
158 WvUrl::~WvUrl()
159 {
160  if (addr) delete addr;
161 }
162 
163 
164 bool WvUrl::resolve()
165 {
166  const WvIPAddr *ip;
167  int numaddrs;
168 
169  numaddrs = dns.findaddr(0, hostname, &ip);
170  if (!numaddrs) // error condition
171  {
172  err = WvString("Host '%s' could not be found.", hostname);
173  resolving = false;
174  return false;
175  }
176  else if (numaddrs < 0) // still waiting
177  {
178  resolving = true;
179  return false;
180  }
181  else // got at least one address
182  {
183  resolving = false;
184  if (addr) delete addr;
185  addr = new WvIPPortAddr(*ip, port);
186  return true;
187  }
188 }
189 
190 
191 // Print out the URL, using the port name (if it's not 80), and either the
192 // hostname (if we know it) or the address (if we know that instead.)
193 WvUrl::operator WvString () const
194 {
195  if (!isok())
196  return WvString("(Invalid URL: %s)", err);
197 
198  WvString protostr;
199  if (protocol_uses_slashes(proto))
200  protostr = WvString("%s://", proto);
201  else
202  protostr = WvString("%s:", proto);
203  WvString userstr("");
204  if (user && user.len() != 0)
205  {
206  userstr = WvString("%s", user);
207  if (password && password.len() != 0)
208  userstr.append(WvString(":%s@", password));
209  else
210  userstr.append("@");
211  }
212  WvString portstr("");
213  if (port && port != get_default_port(proto))
214  portstr = WvString(":%s", port);
215  if (hostname)
216  return WvString("%s%s%s%s%s", protostr, userstr, hostname, portstr, file);
217  else if (addr)
218  return WvString("%s%s%s%s%s", protostr, userstr, *addr, portstr, file);
219  else
220  {
221  assert(0);
222  return WvString("(Invalid URL)");
223  }
224 }
225 
226 
DefaultPort
Definition: wvurl.cc:14
trim_string
char * trim_string(char *string)
Trims whitespace from the beginning and end of the character string, including carriage return / line...
Definition: strutils.cc:59
WvString
WvString is an implementation of a simple and efficient printable-string class.
Definition: wvstring.h:329
WvIPPortAddr
An IP+Port address also includes a port number, with the resulting form www.xxx.yyy....
Definition: wvaddr.h:393
WvResolver::findaddr
int findaddr(int msec_timeout, WvStringParm name, WvIPAddr const **addr, WvIPAddrList *addrlist=NULL)
Return -1 on timeout, or the number of addresses found, which may be 0 if the address does not exist.
Definition: wvresolver.cc:149
WvIPAddr
An IP address is made up of a "dotted quad" – four decimal numbers in the form www....
Definition: wvaddr.h:249
WvUrl
Definition: wvurl.h:16
hostname
WvString hostname()
Do gethostname() without a fixed-length buffer.
Definition: strutils.cc:870