1 """Utilities using NDG HTTPS Client, including a main module that can be used to
2 fetch from a URL.
3 """
4 __author__ = "R B Wilkinson"
5 __date__ = "09/12/11"
6 __copyright__ = "(C) 2011 Science and Technology Facilities Council"
7 __license__ = "BSD - see LICENSE file in top-level directory"
8 __contact__ = "Philip.Kershaw@stfc.ac.uk"
9 __revision__ = '$Id$'
10
11 import logging
12 from optparse import OptionParser
13 import os
14 import sys
15
16 if sys.version_info[0] > 2:
17 import http.cookiejar as cookiejar_
18 import http.client as http_client_
19 from urllib.request import Request as Request_
20 from urllib.request import HTTPHandler as HTTPHandler_
21 from urllib.request import HTTPCookieProcessor as HTTPCookieProcessor_
22 from urllib.request import HTTPBasicAuthHandler as HTTPBasicAuthHandler_
23 from urllib.request import HTTPPasswordMgrWithDefaultRealm as \
24 HTTPPasswordMgrWithDefaultRealm_
25 from urllib.request import ProxyHandler as ProxyHandler_
26 from urllib.error import HTTPError as HTTPError_
27 import urllib.parse as urlparse_
28 else:
29 import cookielib as cookiejar_
30 import httplib as http_client_
31 from urllib2 import Request as Request_
32 from urllib2 import HTTPHandler as HTTPHandler_
33 from urllib2 import HTTPCookieProcessor as HTTPCookieProcessor_
34 from urllib2 import HTTPBasicAuthHandler as HTTPBasicAuthHandler_
35 from urllib2 import HTTPPasswordMgrWithDefaultRealm as \
36 HTTPPasswordMgrWithDefaultRealm_
37 from urllib2 import ProxyHandler as ProxyHandler_
38 from urllib2 import HTTPError as HTTPError_
39 import urlparse as urlparse_
40
41 from ndg.httpsclient.urllib2_build_opener import build_opener
42 from ndg.httpsclient.https import HTTPSContextHandler
43 from ndg.httpsclient import ssl_context_util
44
45 log = logging.getLogger(__name__)
46
48 """Cookie processor that adds new cookies (instead of replacing the existing
49 ones as HTTPCookieProcessor does)
50 """
52 """Processes cookies for a HTTP request.
53 @param request: request to process
54 @type request: urllib2.Request
55 @return: request
56 @rtype: urllib2.Request
57 """
58 COOKIE_HEADER_NAME = "Cookie"
59 tmp_request = Request_(request.get_full_url(), request.data, {},
60 request.origin_req_host,
61 request.unverifiable)
62 self.cookiejar.add_cookie_header(tmp_request)
63
64 new_cookies = tmp_request.get_header(COOKIE_HEADER_NAME)
65 if new_cookies:
66 if request.has_header(COOKIE_HEADER_NAME):
67
68 old_cookies = request.get_header(COOKIE_HEADER_NAME)
69 merged_cookies = '; '.join([old_cookies, new_cookies])
70 request.add_unredirected_header(COOKIE_HEADER_NAME,
71 merged_cookies)
72 else:
73
74 request.add_unredirected_header(COOKIE_HEADER_NAME, new_cookies)
75 return request
76
77
78 https_request = http_request
79
80
82 """Error fetching content from URL"""
83
84
86 """Returns data retrieved from a URL.
87 @param url: URL to attempt to open
88 @type url: basestring
89 @param config: SSL context configuration
90 @type config: Configuration
91 @return data retrieved from URL or None
92 """
93 return_code, return_message, response = open_url(url, config, data=data,
94 handlers=handlers)
95 if return_code and return_code == http_client_.OK:
96 return_data = response.read()
97 response.close()
98 return return_data
99 else:
100 raise URLFetchError(return_message)
101
103 """Writes data retrieved from a URL to a file.
104 @param url: URL to attempt to open
105 @type url: basestring
106 @param config: SSL context configuration
107 @type config: Configuration
108 @param output_file: output file
109 @type output_file: basestring
110 @return: tuple (
111 returned HTTP status code or 0 if an error occurred
112 returned message
113 boolean indicating whether access was successful)
114 """
115 return_code, return_message, response = open_url(url, config, data=data,
116 handlers=handlers)
117 if return_code == http_client_.OK:
118 return_data = response.read()
119 response.close()
120 outfile = open(output_file, "w")
121 outfile.write(return_data)
122 outfile.close()
123
124 return return_code, return_message, return_code == http_client_.OK
125
126
128 """Returns data retrieved from a URL.
129 @param url: URL to attempt to open
130 @type url: basestring
131 @param config: SSL context configuration
132 @type config: Configuration
133 @param data: HTTP POST data
134 @type data: str
135 @param handlers: list of custom urllib2 handlers to add to the request
136 @type handlers: iterable
137 @return: data retrieved from URL or None
138 @rtype: file derived type
139 """
140 return_code, return_message, response = open_url(url, config, data=data,
141 handlers=handlers)
142 if return_code and return_code == http_client_.OK:
143 return response
144 else:
145 raise URLFetchError(return_message)
146
147
148 -def open_url(url, config, data=None, handlers=None):
149 """Attempts to open a connection to a specified URL.
150 @param url: URL to attempt to open
151 @param config: SSL context configuration
152 @type config: Configuration
153 @param data: HTTP POST data
154 @type data: str
155 @param handlers: list of custom urllib2 handlers to add to the request
156 @type handlers: iterable
157 @return: tuple (
158 returned HTTP status code or 0 if an error occurred
159 returned message or error description
160 response object)
161 """
162 debuglevel = 1 if config.debug else 0
163
164
165 if config.cookie:
166 cj = config.cookie
167 else:
168 cj = cookiejar_.CookieJar()
169
170
171
172
173
174 cookie_handler = AccumulatingHTTPCookieProcessor(cj)
175
176 if not handlers:
177 handlers = []
178
179 handlers.append(cookie_handler)
180
181 if config.debug:
182 http_handler = HTTPHandler_(debuglevel=debuglevel)
183 https_handler = HTTPSContextHandler(config.ssl_context,
184 debuglevel=debuglevel)
185 handlers.extend([http_handler, https_handler])
186
187 if config.http_basicauth:
188
189 auth_handler = HTTPBasicAuthHandler_(HTTPPasswordMgrWithDefaultRealm_())
190 auth_handler.add_password(realm=None, uri=url,
191 user=config.http_basicauth[0],
192 passwd=config.http_basicauth[1])
193 handlers.append(auth_handler)
194
195
196
197
198
199
200 if not _should_use_proxy(url, config.no_proxy):
201 handlers.append(ProxyHandler_({}))
202 log.debug("Not using proxy")
203 elif config.proxies:
204 handlers.append(ProxyHandler_(config.proxies))
205 log.debug("Configuring proxies: %s" % config.proxies)
206
207 opener = build_opener(*handlers, ssl_context=config.ssl_context)
208
209 headers = config.headers
210 if headers is None:
211 headers = {}
212
213 request = Request_(url, data, headers)
214
215
216 return_code = 0
217 return_message = ''
218 response = None
219
220 try:
221 response = opener.open(request)
222 return_message = response.msg
223 return_code = response.code
224 if log.isEnabledFor(logging.DEBUG):
225 for index, cookie in enumerate(cj):
226 log.debug("%s : %s", index, cookie)
227
228 except HTTPError_ as exc:
229 return_code = exc.code
230 return_message = "Error: %s" % exc.msg
231 if log.isEnabledFor(logging.DEBUG):
232 log.debug("%s %s", exc.code, exc.msg)
233
234 except Exception as exc:
235 return_message = "Error: %s" % exc.__str__()
236 if log.isEnabledFor(logging.DEBUG):
237 import traceback
238 log.debug(traceback.format_exc())
239
240 return (return_code, return_message, response)
241
242
244 """Determines whether a proxy should be used to open a connection to the
245 specified URL, based on the value of the no_proxy environment variable.
246 @param url: URL
247 @type url: basestring or urllib2.Request
248 """
249 if no_proxy is None:
250 no_proxy_effective = os.environ.get('no_proxy', '')
251 else:
252 no_proxy_effective = no_proxy
253
254 urlObj = urlparse_.urlparse(_url_as_string(url))
255 for np in [h.strip() for h in no_proxy_effective.split(',')]:
256 if urlObj.hostname == np:
257 return False
258
259 return True
260
262 """Returns the URL string from a URL value that is either a string or
263 urllib2.Request..
264 @param url: URL
265 @type url: basestring or urllib2.Request
266 @return: URL string
267 @rtype: basestring
268 """
269 if isinstance(url, Request_):
270 return url.get_full_url()
271 elif isinstance(url, str):
272 return url
273 else:
274 raise TypeError("Expected type %r or %r" %
275 (str, Request_))
276
277
279 """Connection configuration.
280 """
281 - def __init__(self, ssl_context, debug=False, proxies=None, no_proxy=None,
282 cookie=None, http_basicauth=None, headers=None):
283 """
284 @param ssl_context: SSL context to use with this configuration
285 @type ssl_context: OpenSSL.SSL.Context
286 @param debug: if True, output debugging information
287 @type debug: bool
288 @param proxies: proxies to use for
289 @type proxies: dict with basestring keys and values
290 @param no_proxy: hosts for which a proxy should not be used
291 @type no_proxy: basestring
292 @param cookie: cookies to set for request
293 @type cookie: cookielib.CookieJar (python 3 - http.cookiejar)
294 @param http_basicauth: http authentication, or None
295 @type http_basicauth: tuple of (username,password)
296 @param headers: http headers
297 @type headers: dict
298 """
299 self.ssl_context = ssl_context
300 self.debug = debug
301 self.proxies = proxies
302 self.no_proxy = no_proxy
303 self.cookie = cookie
304 self.http_basicauth = http_basicauth
305 self.headers = headers
306
307
309 '''Utility to fetch data using HTTP or HTTPS GET from a specified URL.
310 '''
311 parser = OptionParser(usage="%prog [options] url")
312 parser.add_option("-c", "--certificate", dest="cert_file", metavar="FILE",
313 default=os.path.expanduser("~/credentials.pem"),
314 help="Certificate file - defaults to $HOME/credentials.pem")
315 parser.add_option("-k", "--private-key", dest="key_file", metavar="FILE",
316 default=None,
317 help="Private key file - defaults to the certificate file")
318 parser.add_option("-t", "--ca-certificate-dir", dest="ca_dir",
319 metavar="PATH",
320 default=None,
321 help="Trusted CA certificate file directory")
322 parser.add_option("-d", "--debug", action="store_true", dest="debug",
323 default=False,
324 help="Print debug information.")
325 parser.add_option("-p", "--post-data-file", dest="data_file",
326 metavar="FILE", default=None,
327 help="POST data file")
328 parser.add_option("-f", "--fetch", dest="output_file", metavar="FILE",
329 default=None, help="Output file")
330 parser.add_option("-n", "--no-verify-peer", action="store_true",
331 dest="no_verify_peer", default=False,
332 help="Skip verification of peer certificate.")
333 parser.add_option("-a", "--basicauth", dest="basicauth",
334 metavar="USER:PASSWD",
335 default=None,
336 help="HTTP authentication credentials")
337 parser.add_option("--header", action="append", dest="headers",
338 metavar="HEADER: VALUE",
339 help="Add HTTP header to request")
340 (options, args) = parser.parse_args()
341 if len(args) != 1:
342 parser.error("Incorrect number of arguments")
343
344 url = args[0]
345
346 if options.debug:
347 logging.getLogger().setLevel(logging.DEBUG)
348
349 if options.key_file and os.path.exists(options.key_file):
350 key_file = options.key_file
351 else:
352 key_file = None
353
354 if options.cert_file and os.path.exists(options.cert_file):
355 cert_file = options.cert_file
356 else:
357 cert_file = None
358
359 if options.ca_dir and os.path.exists(options.ca_dir):
360 ca_dir = options.ca_dir
361 else:
362 ca_dir = None
363
364 verify_peer = not options.no_verify_peer
365
366 if options.data_file and os.path.exists(options.data_file):
367 data_file = open(options.data_file)
368 data = data_file.read()
369 data_file.close()
370 else:
371 data = None
372
373 if options.basicauth:
374 http_basicauth = options.basicauth.split(':', 1)
375 else:
376 http_basicauth = None
377
378 headers = {}
379 if options.headers:
380 for h in options.headers:
381 key, val = h.split(':', 1)
382 headers[key.strip()] = val.lstrip()
383
384
385
386 ssl_context = ssl_context_util.make_ssl_context(key_file,
387 cert_file,
388 None,
389 ca_dir,
390 verify_peer,
391 url)
392
393 config = Configuration(ssl_context,
394 options.debug,
395 http_basicauth=http_basicauth,
396 headers=headers)
397 if options.output_file:
398 return_code, return_message = fetch_from_url_to_file(
399 url,
400 config,
401 options.output_file,
402 data)[:2]
403 raise SystemExit(return_code, return_message)
404 else:
405 data = fetch_from_url(url, config)
406 print(data)
407
408
409 if __name__=='__main__':
410 logging.basicConfig()
411 main()
412