292 lines
11 KiB
Python
292 lines
11 KiB
Python
import re
|
|
import os
|
|
import sys
|
|
|
|
from .. import http
|
|
|
|
try:
|
|
# Python 3
|
|
from http.client import HTTPException, BadStatusLine
|
|
from urllib.request import ProxyHandler, HTTPPasswordMgrWithDefaultRealm, ProxyBasicAuthHandler, ProxyDigestAuthHandler, build_opener, Request
|
|
from urllib.error import HTTPError, URLError
|
|
import urllib.request as urllib_compat
|
|
except (ImportError):
|
|
# Python 2
|
|
from httplib import HTTPException, BadStatusLine
|
|
from urllib2 import ProxyHandler, HTTPPasswordMgrWithDefaultRealm, ProxyBasicAuthHandler, ProxyDigestAuthHandler, build_opener, Request
|
|
from urllib2 import HTTPError, URLError
|
|
import urllib2 as urllib_compat
|
|
|
|
try:
|
|
# Python 3.3
|
|
import ConnectionError
|
|
except (ImportError):
|
|
# Python 2.6-3.2
|
|
from socket import error as ConnectionError
|
|
|
|
from ..console_write import console_write
|
|
from ..unicode import unicode_from_os
|
|
from ..http.validating_https_handler import ValidatingHTTPSHandler
|
|
from ..http.debuggable_http_handler import DebuggableHTTPHandler
|
|
from .rate_limit_exception import RateLimitException
|
|
from .downloader_exception import DownloaderException
|
|
from .cert_provider import CertProvider
|
|
from .decoding_downloader import DecodingDownloader
|
|
from .limiting_downloader import LimitingDownloader
|
|
from .caching_downloader import CachingDownloader
|
|
|
|
|
|
class UrlLibDownloader(CertProvider, DecodingDownloader, LimitingDownloader, CachingDownloader):
|
|
"""
|
|
A downloader that uses the Python urllib module
|
|
|
|
:param settings:
|
|
A dict of the various Package Control settings. The Sublime Text
|
|
Settings API is not used because this code is run in a thread.
|
|
"""
|
|
|
|
def __init__(self, settings):
|
|
self.opener = None
|
|
self.settings = settings
|
|
|
|
def close(self):
|
|
"""
|
|
Closes any persistent/open connections
|
|
"""
|
|
|
|
if not self.opener:
|
|
return
|
|
handler = self.get_handler()
|
|
if handler:
|
|
handler.close()
|
|
self.opener = None
|
|
|
|
def download(self, url, error_message, timeout, tries, prefer_cached=False):
|
|
"""
|
|
Downloads a URL and returns the contents
|
|
|
|
Uses the proxy settings from the Package Control.sublime-settings file,
|
|
however there seem to be a decent number of proxies that this code
|
|
does not work with. Patches welcome!
|
|
|
|
:param url:
|
|
The URL to download
|
|
|
|
:param error_message:
|
|
A string to include in the console error that is printed
|
|
when an error occurs
|
|
|
|
:param timeout:
|
|
The int number of seconds to set the timeout to
|
|
|
|
:param tries:
|
|
The int number of times to try and download the URL in the case of
|
|
a timeout or HTTP 503 error
|
|
|
|
:param prefer_cached:
|
|
If a cached version should be returned instead of trying a new request
|
|
|
|
:raises:
|
|
NoCaCertException: when no CA certs can be found for the url
|
|
RateLimitException: when a rate limit is hit
|
|
DownloaderException: when any other download error occurs
|
|
|
|
:return:
|
|
The string contents of the URL
|
|
"""
|
|
|
|
if prefer_cached:
|
|
cached = self.retrieve_cached(url)
|
|
if cached:
|
|
return cached
|
|
|
|
self.setup_opener(url, timeout)
|
|
|
|
debug = self.settings.get('debug')
|
|
error_string = None
|
|
while tries > 0:
|
|
tries -= 1
|
|
try:
|
|
request_headers = {
|
|
"User-Agent": self.settings.get('user_agent'),
|
|
# Don't be alarmed if the response from the server does not
|
|
# select one of these since the server runs a relatively new
|
|
# version of OpenSSL which supports compression on the SSL
|
|
# layer, and Apache will use that instead of HTTP-level
|
|
# encoding.
|
|
"Accept-Encoding": "gzip,deflate"
|
|
}
|
|
request_headers = self.add_conditional_headers(url, request_headers)
|
|
request = Request(url, headers=request_headers)
|
|
http_file = self.opener.open(request, timeout=timeout)
|
|
self.handle_rate_limit(http_file.headers, url)
|
|
|
|
result = http_file.read()
|
|
# Make sure the response is closed so we can re-use the connection
|
|
http_file.close()
|
|
|
|
encoding = http_file.headers.get('content-encoding')
|
|
result = self.decode_response(encoding, result)
|
|
|
|
return self.cache_result('get', url, http_file.getcode(),
|
|
http_file.headers, result)
|
|
|
|
except (HTTPException) as e:
|
|
# Since we use keep-alives, it is possible the other end closed
|
|
# the connection, and we may just need to re-open
|
|
if isinstance(e, BadStatusLine):
|
|
handler = self.get_handler()
|
|
if handler and handler.use_count > 1:
|
|
self.close()
|
|
self.setup_opener(url, timeout)
|
|
tries += 1
|
|
continue
|
|
|
|
error_string = u'%s HTTP exception %s (%s) downloading %s.' % (
|
|
error_message, e.__class__.__name__, unicode_from_os(e), url)
|
|
|
|
except (HTTPError) as e:
|
|
# Make sure the response is closed so we can re-use the connection
|
|
e.read()
|
|
e.close()
|
|
|
|
# Make sure we obey Github's rate limiting headers
|
|
self.handle_rate_limit(e.headers, url)
|
|
|
|
# Handle cached responses
|
|
if unicode_from_os(e.code) == '304':
|
|
return self.cache_result('get', url, int(e.code), e.headers, b'')
|
|
|
|
# Bitbucket and Github return 503 a decent amount
|
|
if unicode_from_os(e.code) == '503' and tries != 0:
|
|
error_string = u'Downloading %s was rate limited' % url
|
|
if tries:
|
|
error_string += ', trying again'
|
|
if debug:
|
|
console_write(error_string, True)
|
|
continue
|
|
|
|
error_string = u'%s HTTP error %s downloading %s.' % (
|
|
error_message, unicode_from_os(e.code), url)
|
|
|
|
except (URLError) as e:
|
|
|
|
# Bitbucket and Github timeout a decent amount
|
|
if unicode_from_os(e.reason) == 'The read operation timed out' \
|
|
or unicode_from_os(e.reason) == 'timed out':
|
|
error_string = u'Downloading %s timed out' % url
|
|
if tries:
|
|
error_string += ', trying again'
|
|
if debug:
|
|
console_write(error_string, True)
|
|
continue
|
|
|
|
error_string = u'%s URL error %s downloading %s.' % (
|
|
error_message, unicode_from_os(e.reason), url)
|
|
|
|
except (ConnectionError):
|
|
# Handle broken pipes/reset connections by creating a new opener, and
|
|
# thus getting new handlers and a new connection
|
|
error_string = u'Connection went away while trying to download %s, trying again' % url
|
|
if debug:
|
|
console_write(error_string, True)
|
|
|
|
self.opener = None
|
|
self.setup_opener(url, timeout)
|
|
tries += 1
|
|
|
|
continue
|
|
|
|
break
|
|
|
|
raise DownloaderException(error_string)
|
|
|
|
def get_handler(self):
|
|
"""
|
|
Get the HTTPHandler object for the current connection
|
|
"""
|
|
|
|
if not self.opener:
|
|
return None
|
|
|
|
for handler in self.opener.handlers:
|
|
if isinstance(handler, ValidatingHTTPSHandler) or isinstance(handler, DebuggableHTTPHandler):
|
|
return handler
|
|
|
|
def setup_opener(self, url, timeout):
|
|
"""
|
|
Sets up a urllib OpenerDirector to be used for requests. There is a
|
|
fair amount of custom urllib code in Package Control, and part of it
|
|
is to handle proxies and keep-alives. Creating an opener the way
|
|
below is because the handlers have been customized to send the
|
|
"Connection: Keep-Alive" header and hold onto connections so they
|
|
can be re-used.
|
|
|
|
:param url:
|
|
The URL to download
|
|
|
|
:param timeout:
|
|
The int number of seconds to set the timeout to
|
|
"""
|
|
|
|
if not self.opener:
|
|
http_proxy = self.settings.get('http_proxy')
|
|
https_proxy = self.settings.get('https_proxy')
|
|
if http_proxy or https_proxy:
|
|
proxies = {}
|
|
if http_proxy:
|
|
proxies['http'] = http_proxy
|
|
if https_proxy:
|
|
proxies['https'] = https_proxy
|
|
proxy_handler = ProxyHandler(proxies)
|
|
else:
|
|
proxy_handler = ProxyHandler()
|
|
|
|
password_manager = HTTPPasswordMgrWithDefaultRealm()
|
|
proxy_username = self.settings.get('proxy_username')
|
|
proxy_password = self.settings.get('proxy_password')
|
|
if proxy_username and proxy_password:
|
|
if http_proxy:
|
|
password_manager.add_password(None, http_proxy, proxy_username,
|
|
proxy_password)
|
|
if https_proxy:
|
|
password_manager.add_password(None, https_proxy, proxy_username,
|
|
proxy_password)
|
|
|
|
handlers = [proxy_handler]
|
|
|
|
basic_auth_handler = ProxyBasicAuthHandler(password_manager)
|
|
digest_auth_handler = ProxyDigestAuthHandler(password_manager)
|
|
handlers.extend([digest_auth_handler, basic_auth_handler])
|
|
|
|
debug = self.settings.get('debug')
|
|
|
|
if debug:
|
|
console_write(u"Urllib Debug Proxy", True)
|
|
console_write(u" http_proxy: %s" % http_proxy)
|
|
console_write(u" https_proxy: %s" % https_proxy)
|
|
console_write(u" proxy_username: %s" % proxy_username)
|
|
console_write(u" proxy_password: %s" % proxy_password)
|
|
|
|
secure_url_match = re.match('^https://([^/]+)', url)
|
|
if secure_url_match != None:
|
|
secure_domain = secure_url_match.group(1)
|
|
bundle_path = self.check_certs(secure_domain, timeout)
|
|
bundle_path = bundle_path.encode(sys.getfilesystemencoding())
|
|
handlers.append(ValidatingHTTPSHandler(ca_certs=bundle_path,
|
|
debug=debug, passwd=password_manager,
|
|
user_agent=self.settings.get('user_agent')))
|
|
else:
|
|
handlers.append(DebuggableHTTPHandler(debug=debug,
|
|
passwd=password_manager))
|
|
self.opener = build_opener(*handlers)
|
|
|
|
def supports_ssl(self):
|
|
"""
|
|
Indicates if the object can handle HTTPS requests
|
|
|
|
:return:
|
|
If the object supports HTTPS requests
|
|
"""
|
|
return 'ssl' in sys.modules and hasattr(urllib_compat, 'HTTPSHandler')
|