268 lines
		
	
	
		
			9.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			268 lines
		
	
	
		
			9.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import tempfile
 | |
| import re
 | |
| import os
 | |
| 
 | |
| from ..console_write import console_write
 | |
| from ..open_compat import open_compat, read_compat
 | |
| from .cli_downloader import CliDownloader
 | |
| from .non_clean_exit_error import NonCleanExitError
 | |
| from .rate_limit_exception import RateLimitException
 | |
| from .downloader_exception import DownloaderException
 | |
| from .cert_provider import CertProvider
 | |
| from .limiting_downloader import LimitingDownloader
 | |
| from .caching_downloader import CachingDownloader
 | |
| 
 | |
| 
 | |
| class CurlDownloader(CliDownloader, CertProvider, LimitingDownloader, CachingDownloader):
 | |
|     """
 | |
|     A downloader that uses the command line program curl
 | |
| 
 | |
|     :param settings:
 | |
|         A dict of the various Package Control settings. The Sublime Text
 | |
|         Settings API is not used because this code is run in a thread.
 | |
| 
 | |
|     :raises:
 | |
|         BinaryNotFoundError: when curl can not be found
 | |
|     """
 | |
| 
 | |
|     def __init__(self, settings):
 | |
|         self.settings = settings
 | |
|         self.curl = self.find_binary('curl')
 | |
| 
 | |
|     def close(self):
 | |
|         """
 | |
|         No-op for compatibility with UrllibDownloader and WinINetDownloader
 | |
|         """
 | |
| 
 | |
|         pass
 | |
| 
 | |
|     def download(self, url, error_message, timeout, tries, prefer_cached=False):
 | |
|         """
 | |
|         Downloads a URL and returns the contents
 | |
| 
 | |
|         :param url:
 | |
|             The URL to download
 | |
| 
 | |
|         :param error_message:
 | |
|             A string to include in the console error that is printed
 | |
|             when an error occurs
 | |
| 
 | |
|         :param timeout:
 | |
|             The int number of seconds to set the timeout to
 | |
| 
 | |
|         :param tries:
 | |
|             The int number of times to try and download the URL in the case of
 | |
|             a timeout or HTTP 503 error
 | |
| 
 | |
|         :param prefer_cached:
 | |
|             If a cached version should be returned instead of trying a new request
 | |
| 
 | |
|         :raises:
 | |
|             NoCaCertException: when no CA certs can be found for the url
 | |
|             RateLimitException: when a rate limit is hit
 | |
|             DownloaderException: when any other download error occurs
 | |
| 
 | |
|         :return:
 | |
|             The string contents of the URL
 | |
|         """
 | |
| 
 | |
|         if prefer_cached:
 | |
|             cached = self.retrieve_cached(url)
 | |
|             if cached:
 | |
|                 return cached
 | |
| 
 | |
|         self.tmp_file = tempfile.NamedTemporaryFile().name
 | |
|         command = [self.curl, '--user-agent', self.settings.get('user_agent'),
 | |
|             '--connect-timeout', str(int(timeout)), '-sSL',
 | |
|             # Don't be alarmed if the response from the server does not select
 | |
|             # one of these since the server runs a relatively new version of
 | |
|             # OpenSSL which supports compression on the SSL layer, and Apache
 | |
|             # will use that instead of HTTP-level encoding.
 | |
|             '--compressed',
 | |
|             # We have to capture the headers to check for rate limit info
 | |
|             '--dump-header', self.tmp_file]
 | |
| 
 | |
|         request_headers = self.add_conditional_headers(url, {})
 | |
| 
 | |
|         for name, value in request_headers.items():
 | |
|             command.extend(['--header', "%s: %s" % (name, value)])
 | |
| 
 | |
|         secure_url_match = re.match('^https://([^/]+)', url)
 | |
|         if secure_url_match != None:
 | |
|             secure_domain = secure_url_match.group(1)
 | |
|             bundle_path = self.check_certs(secure_domain, timeout)
 | |
|             command.extend(['--cacert', bundle_path])
 | |
| 
 | |
|         debug = self.settings.get('debug')
 | |
|         if debug:
 | |
|             command.append('-v')
 | |
| 
 | |
|         http_proxy = self.settings.get('http_proxy')
 | |
|         https_proxy = self.settings.get('https_proxy')
 | |
|         proxy_username = self.settings.get('proxy_username')
 | |
|         proxy_password = self.settings.get('proxy_password')
 | |
| 
 | |
|         if debug:
 | |
|             console_write(u"Curl Debug Proxy", True)
 | |
|             console_write(u"  http_proxy: %s" % http_proxy)
 | |
|             console_write(u"  https_proxy: %s" % https_proxy)
 | |
|             console_write(u"  proxy_username: %s" % proxy_username)
 | |
|             console_write(u"  proxy_password: %s" % proxy_password)
 | |
| 
 | |
|         if http_proxy or https_proxy:
 | |
|             command.append('--proxy-anyauth')
 | |
| 
 | |
|         if proxy_username or proxy_password:
 | |
|             command.extend(['-U', u"%s:%s" % (proxy_username, proxy_password)])
 | |
| 
 | |
|         if http_proxy:
 | |
|             os.putenv('http_proxy', http_proxy)
 | |
|         if https_proxy:
 | |
|             os.putenv('HTTPS_PROXY', https_proxy)
 | |
| 
 | |
|         command.append(url)
 | |
| 
 | |
|         error_string = None
 | |
|         while tries > 0:
 | |
|             tries -= 1
 | |
|             try:
 | |
|                 output = self.execute(command)
 | |
| 
 | |
|                 with open_compat(self.tmp_file, 'r') as f:
 | |
|                     headers_str = read_compat(f)
 | |
|                 self.clean_tmp_file()
 | |
| 
 | |
|                 message = 'OK'
 | |
|                 status = 200
 | |
|                 headers = {}
 | |
|                 for header in headers_str.splitlines():
 | |
|                     if header[0:5] == 'HTTP/':
 | |
|                         message = re.sub('^HTTP/\d\.\d\s+\d+\s*', '', header)
 | |
|                         status = int(re.sub('^HTTP/\d\.\d\s+(\d+)(\s+.*)?$', '\\1', header))
 | |
|                         continue
 | |
|                     if header.strip() == '':
 | |
|                         continue
 | |
|                     name, value = header.split(':', 1)
 | |
|                     headers[name.lower()] = value.strip()
 | |
| 
 | |
|                 if debug:
 | |
|                     self.print_debug(self.stderr.decode('utf-8'))
 | |
| 
 | |
|                 self.handle_rate_limit(headers, url)
 | |
| 
 | |
|                 if status not in [200, 304]:
 | |
|                     e = NonCleanExitError(22)
 | |
|                     e.stderr = "%s %s" % (status, message)
 | |
|                     raise e
 | |
| 
 | |
|                 output = self.cache_result('get', url, status, headers, output)
 | |
| 
 | |
|                 return output
 | |
| 
 | |
|             except (NonCleanExitError) as e:
 | |
|                 # Stderr is used for both the error message and the debug info
 | |
|                 # so we need to process it to extra the debug info
 | |
|                 if self.settings.get('debug'):
 | |
|                     if hasattr(e.stderr, 'decode'):
 | |
|                         e.stderr = e.stderr.decode('utf-8')
 | |
|                     e.stderr = self.print_debug(e.stderr)
 | |
| 
 | |
|                 self.clean_tmp_file()
 | |
| 
 | |
|                 if e.returncode == 22:
 | |
|                     code = re.sub('^.*?(\d+)([\w\s]+)?$', '\\1', e.stderr)
 | |
|                     if code == '503' and tries != 0:
 | |
|                         # GitHub and BitBucket seem to rate limit via 503
 | |
|                         error_string = u'Downloading %s was rate limited' % url
 | |
|                         if tries:
 | |
|                             error_string += ', trying again'
 | |
|                             if debug:
 | |
|                                 console_write(error_string, True)
 | |
|                         continue
 | |
| 
 | |
|                     download_error = u'HTTP error ' + code
 | |
| 
 | |
|                 elif e.returncode == 6:
 | |
|                     download_error = u'URL error host not found'
 | |
| 
 | |
|                 elif e.returncode == 28:
 | |
|                     # GitHub and BitBucket seem to time out a lot
 | |
|                     error_string = u'Downloading %s timed out' % url
 | |
|                     if tries:
 | |
|                         error_string += ', trying again'
 | |
|                         if debug:
 | |
|                             console_write(error_string, True)
 | |
|                     continue
 | |
| 
 | |
|                 else:
 | |
|                     download_error = e.stderr.rstrip()
 | |
| 
 | |
|                 error_string = u'%s %s downloading %s.' % (error_message, download_error, url)
 | |
| 
 | |
|             break
 | |
| 
 | |
|         raise DownloaderException(error_string)
 | |
| 
 | |
|     def supports_ssl(self):
 | |
|         """
 | |
|         Indicates if the object can handle HTTPS requests
 | |
| 
 | |
|         :return:
 | |
|             If the object supports HTTPS requests
 | |
|         """
 | |
| 
 | |
|         return True
 | |
| 
 | |
|     def print_debug(self, string):
 | |
|         """
 | |
|         Takes debug output from curl and groups and prints it
 | |
| 
 | |
|         :param string:
 | |
|             The complete debug output from curl
 | |
| 
 | |
|         :return:
 | |
|             A string containing any stderr output
 | |
|         """
 | |
| 
 | |
|         section = 'General'
 | |
|         last_section = None
 | |
| 
 | |
|         output = ''
 | |
| 
 | |
|         for line in string.splitlines():
 | |
|             # Placeholder for body of request
 | |
|             if line and line[0:2] == '{ ':
 | |
|                 continue
 | |
|             if line and line[0:18] == '} [data not shown]':
 | |
|                 continue
 | |
| 
 | |
|             if len(line) > 1:
 | |
|                 subtract = 0
 | |
|                 if line[0:2] == '* ':
 | |
|                     section = 'General'
 | |
|                     subtract = 2
 | |
|                 elif line[0:2] == '> ':
 | |
|                     section = 'Write'
 | |
|                     subtract = 2
 | |
|                 elif line[0:2] == '< ':
 | |
|                     section = 'Read'
 | |
|                     subtract = 2
 | |
|                 line = line[subtract:]
 | |
| 
 | |
|                 # If the line does not start with "* ", "< ", "> " or "  "
 | |
|                 # then it is a real stderr message
 | |
|                 if subtract == 0 and line[0:2] != '  ':
 | |
|                     output += line.rstrip() + ' '
 | |
|                     continue
 | |
| 
 | |
|             if line.strip() == '':
 | |
|                 continue
 | |
| 
 | |
|             if section != last_section:
 | |
|                 console_write(u"Curl HTTP Debug %s" % section, True)
 | |
| 
 | |
|             console_write(u'  ' + line)
 | |
|             last_section = section
 | |
| 
 | |
|         return output.rstrip()
 |