292 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			292 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import re
 | |
| import os
 | |
| import sys
 | |
| 
 | |
| from .. import http
 | |
| 
 | |
| try:
 | |
|     # Python 3
 | |
|     from http.client import HTTPException, BadStatusLine
 | |
|     from urllib.request import ProxyHandler, HTTPPasswordMgrWithDefaultRealm, ProxyBasicAuthHandler, ProxyDigestAuthHandler, build_opener, Request
 | |
|     from urllib.error import HTTPError, URLError
 | |
|     import urllib.request as urllib_compat
 | |
| except (ImportError):
 | |
|     # Python 2
 | |
|     from httplib import HTTPException, BadStatusLine
 | |
|     from urllib2 import ProxyHandler, HTTPPasswordMgrWithDefaultRealm, ProxyBasicAuthHandler, ProxyDigestAuthHandler, build_opener, Request
 | |
|     from urllib2 import HTTPError, URLError
 | |
|     import urllib2 as urllib_compat
 | |
| 
 | |
| try:
 | |
|     # Python 3.3
 | |
|     import ConnectionError
 | |
| except (ImportError):
 | |
|     # Python 2.6-3.2
 | |
|     from socket import error as ConnectionError
 | |
| 
 | |
| from ..console_write import console_write
 | |
| from ..unicode import unicode_from_os
 | |
| from ..http.validating_https_handler import ValidatingHTTPSHandler
 | |
| from ..http.debuggable_http_handler import DebuggableHTTPHandler
 | |
| from .rate_limit_exception import RateLimitException
 | |
| from .downloader_exception import DownloaderException
 | |
| from .cert_provider import CertProvider
 | |
| from .decoding_downloader import DecodingDownloader
 | |
| from .limiting_downloader import LimitingDownloader
 | |
| from .caching_downloader import CachingDownloader
 | |
| 
 | |
| 
 | |
| class UrlLibDownloader(CertProvider, DecodingDownloader, LimitingDownloader, CachingDownloader):
 | |
|     """
 | |
|     A downloader that uses the Python urllib module
 | |
| 
 | |
|     :param settings:
 | |
|         A dict of the various Package Control settings. The Sublime Text
 | |
|         Settings API is not used because this code is run in a thread.
 | |
|     """
 | |
| 
 | |
|     def __init__(self, settings):
 | |
|         self.opener = None
 | |
|         self.settings = settings
 | |
| 
 | |
|     def close(self):
 | |
|         """
 | |
|         Closes any persistent/open connections
 | |
|         """
 | |
| 
 | |
|         if not self.opener:
 | |
|             return
 | |
|         handler = self.get_handler()
 | |
|         if handler:
 | |
|             handler.close()
 | |
|         self.opener = None
 | |
| 
 | |
|     def download(self, url, error_message, timeout, tries, prefer_cached=False):
 | |
|         """
 | |
|         Downloads a URL and returns the contents
 | |
| 
 | |
|         Uses the proxy settings from the Package Control.sublime-settings file,
 | |
|         however there seem to be a decent number of proxies that this code
 | |
|         does not work with. Patches welcome!
 | |
| 
 | |
|         :param url:
 | |
|             The URL to download
 | |
| 
 | |
|         :param error_message:
 | |
|             A string to include in the console error that is printed
 | |
|             when an error occurs
 | |
| 
 | |
|         :param timeout:
 | |
|             The int number of seconds to set the timeout to
 | |
| 
 | |
|         :param tries:
 | |
|             The int number of times to try and download the URL in the case of
 | |
|             a timeout or HTTP 503 error
 | |
| 
 | |
|         :param prefer_cached:
 | |
|             If a cached version should be returned instead of trying a new request
 | |
| 
 | |
|         :raises:
 | |
|             NoCaCertException: when no CA certs can be found for the url
 | |
|             RateLimitException: when a rate limit is hit
 | |
|             DownloaderException: when any other download error occurs
 | |
| 
 | |
|         :return:
 | |
|             The string contents of the URL
 | |
|         """
 | |
| 
 | |
|         if prefer_cached:
 | |
|             cached = self.retrieve_cached(url)
 | |
|             if cached:
 | |
|                 return cached
 | |
| 
 | |
|         self.setup_opener(url, timeout)
 | |
| 
 | |
|         debug = self.settings.get('debug')
 | |
|         error_string = None
 | |
|         while tries > 0:
 | |
|             tries -= 1
 | |
|             try:
 | |
|                 request_headers = {
 | |
|                     "User-Agent": self.settings.get('user_agent'),
 | |
|                     # Don't be alarmed if the response from the server does not
 | |
|                     # select one of these since the server runs a relatively new
 | |
|                     # version of OpenSSL which supports compression on the SSL
 | |
|                     # layer, and Apache will use that instead of HTTP-level
 | |
|                     # encoding.
 | |
|                     "Accept-Encoding": "gzip,deflate"
 | |
|                 }
 | |
|                 request_headers = self.add_conditional_headers(url, request_headers)
 | |
|                 request = Request(url, headers=request_headers)
 | |
|                 http_file = self.opener.open(request, timeout=timeout)
 | |
|                 self.handle_rate_limit(http_file.headers, url)
 | |
| 
 | |
|                 result = http_file.read()
 | |
|                 # Make sure the response is closed so we can re-use the connection
 | |
|                 http_file.close()
 | |
| 
 | |
|                 encoding = http_file.headers.get('content-encoding')
 | |
|                 result = self.decode_response(encoding, result)
 | |
| 
 | |
|                 return self.cache_result('get', url, http_file.getcode(),
 | |
|                     http_file.headers, result)
 | |
| 
 | |
|             except (HTTPException) as e:
 | |
|                 # Since we use keep-alives, it is possible the other end closed
 | |
|                 # the connection, and we may just need to re-open
 | |
|                 if isinstance(e, BadStatusLine):
 | |
|                     handler = self.get_handler()
 | |
|                     if handler and handler.use_count > 1:
 | |
|                         self.close()
 | |
|                         self.setup_opener(url, timeout)
 | |
|                         tries += 1
 | |
|                         continue
 | |
| 
 | |
|                 error_string = u'%s HTTP exception %s (%s) downloading %s.' % (
 | |
|                     error_message, e.__class__.__name__, unicode_from_os(e), url)
 | |
| 
 | |
|             except (HTTPError) as e:
 | |
|                 # Make sure the response is closed so we can re-use the connection
 | |
|                 e.read()
 | |
|                 e.close()
 | |
| 
 | |
|                 # Make sure we obey Github's rate limiting headers
 | |
|                 self.handle_rate_limit(e.headers, url)
 | |
| 
 | |
|                 # Handle cached responses
 | |
|                 if unicode_from_os(e.code) == '304':
 | |
|                     return self.cache_result('get', url, int(e.code), e.headers, b'')
 | |
| 
 | |
|                 # Bitbucket and Github return 503 a decent amount
 | |
|                 if unicode_from_os(e.code) == '503' and tries != 0:
 | |
|                     error_string = u'Downloading %s was rate limited' % url
 | |
|                     if tries:
 | |
|                         error_string += ', trying again'
 | |
|                         if debug:
 | |
|                             console_write(error_string, True)
 | |
|                     continue
 | |
| 
 | |
|                 error_string = u'%s HTTP error %s downloading %s.' % (
 | |
|                     error_message, unicode_from_os(e.code), url)
 | |
| 
 | |
|             except (URLError) as e:
 | |
| 
 | |
|                 # Bitbucket and Github timeout a decent amount
 | |
|                 if unicode_from_os(e.reason) == 'The read operation timed out' \
 | |
|                         or unicode_from_os(e.reason) == 'timed out':
 | |
|                     error_string = u'Downloading %s timed out' % url
 | |
|                     if tries:
 | |
|                         error_string += ', trying again'
 | |
|                         if debug:
 | |
|                             console_write(error_string, True)
 | |
|                     continue
 | |
| 
 | |
|                 error_string = u'%s URL error %s downloading %s.' % (
 | |
|                     error_message, unicode_from_os(e.reason), url)
 | |
| 
 | |
|             except (ConnectionError):
 | |
|                 # Handle broken pipes/reset connections by creating a new opener, and
 | |
|                 # thus getting new handlers and a new connection
 | |
|                 error_string = u'Connection went away while trying to download %s, trying again' % url
 | |
|                 if debug:
 | |
|                     console_write(error_string, True)
 | |
| 
 | |
|                 self.opener = None
 | |
|                 self.setup_opener(url, timeout)
 | |
|                 tries += 1
 | |
| 
 | |
|                 continue
 | |
| 
 | |
|             break
 | |
| 
 | |
|         raise DownloaderException(error_string)
 | |
| 
 | |
|     def get_handler(self):
 | |
|         """
 | |
|         Get the HTTPHandler object for the current connection
 | |
|         """
 | |
| 
 | |
|         if not self.opener:
 | |
|             return None
 | |
| 
 | |
|         for handler in self.opener.handlers:
 | |
|             if isinstance(handler, ValidatingHTTPSHandler) or isinstance(handler, DebuggableHTTPHandler):
 | |
|                 return handler
 | |
| 
 | |
|     def setup_opener(self, url, timeout):
 | |
|         """
 | |
|         Sets up a urllib OpenerDirector to be used for requests. There is a
 | |
|         fair amount of custom urllib code in Package Control, and part of it
 | |
|         is to handle proxies and keep-alives. Creating an opener the way
 | |
|         below is because the handlers have been customized to send the
 | |
|         "Connection: Keep-Alive" header and hold onto connections so they
 | |
|         can be re-used.
 | |
| 
 | |
|         :param url:
 | |
|             The URL to download
 | |
| 
 | |
|         :param timeout:
 | |
|             The int number of seconds to set the timeout to
 | |
|         """
 | |
| 
 | |
|         if not self.opener:
 | |
|             http_proxy = self.settings.get('http_proxy')
 | |
|             https_proxy = self.settings.get('https_proxy')
 | |
|             if http_proxy or https_proxy:
 | |
|                 proxies = {}
 | |
|                 if http_proxy:
 | |
|                     proxies['http'] = http_proxy
 | |
|                 if https_proxy:
 | |
|                     proxies['https'] = https_proxy
 | |
|                 proxy_handler = ProxyHandler(proxies)
 | |
|             else:
 | |
|                 proxy_handler = ProxyHandler()
 | |
| 
 | |
|             password_manager = HTTPPasswordMgrWithDefaultRealm()
 | |
|             proxy_username = self.settings.get('proxy_username')
 | |
|             proxy_password = self.settings.get('proxy_password')
 | |
|             if proxy_username and proxy_password:
 | |
|                 if http_proxy:
 | |
|                     password_manager.add_password(None, http_proxy, proxy_username,
 | |
|                         proxy_password)
 | |
|                 if https_proxy:
 | |
|                     password_manager.add_password(None, https_proxy, proxy_username,
 | |
|                         proxy_password)
 | |
| 
 | |
|             handlers = [proxy_handler]
 | |
| 
 | |
|             basic_auth_handler = ProxyBasicAuthHandler(password_manager)
 | |
|             digest_auth_handler = ProxyDigestAuthHandler(password_manager)
 | |
|             handlers.extend([digest_auth_handler, basic_auth_handler])
 | |
| 
 | |
|             debug = self.settings.get('debug')
 | |
| 
 | |
|             if debug:
 | |
|                 console_write(u"Urllib Debug Proxy", True)
 | |
|                 console_write(u"  http_proxy: %s" % http_proxy)
 | |
|                 console_write(u"  https_proxy: %s" % https_proxy)
 | |
|                 console_write(u"  proxy_username: %s" % proxy_username)
 | |
|                 console_write(u"  proxy_password: %s" % proxy_password)
 | |
| 
 | |
|             secure_url_match = re.match('^https://([^/]+)', url)
 | |
|             if secure_url_match != None:
 | |
|                 secure_domain = secure_url_match.group(1)
 | |
|                 bundle_path = self.check_certs(secure_domain, timeout)
 | |
|                 bundle_path = bundle_path.encode(sys.getfilesystemencoding())
 | |
|                 handlers.append(ValidatingHTTPSHandler(ca_certs=bundle_path,
 | |
|                     debug=debug, passwd=password_manager,
 | |
|                     user_agent=self.settings.get('user_agent')))
 | |
|             else:
 | |
|                 handlers.append(DebuggableHTTPHandler(debug=debug,
 | |
|                     passwd=password_manager))
 | |
|             self.opener = build_opener(*handlers)
 | |
| 
 | |
|     def supports_ssl(self):
 | |
|         """
 | |
|         Indicates if the object can handle HTTPS requests
 | |
| 
 | |
|         :return:
 | |
|             If the object supports HTTPS requests
 | |
|         """
 | |
|         return 'ssl' in sys.modules and hasattr(urllib_compat, 'HTTPSHandler')
 |