diff -r eb6cee73c796 lib.py --- a/lib.py Tue Nov 26 10:59:18 2013 -0500 +++ b/lib.py Wed Dec 04 03:17:17 2013 +0000 @@ -20,6 +20,91 @@ # FreeBSD port: devel/py-yaml import yaml +# Peter's hack BEGIN +import httplib +import ssl + +class InvalidCertificateException(httplib.HTTPException, urllib2.URLError): + def __init__(self, host, cert, reason): + httplib.HTTPException.__init__(self) + self.host = host + self.cert = cert + self.reason = reason + + def __str__(self): + return ('Host %s returned an invalid certificate (%s) %s\n' % + (self.host, self.reason, self.cert)) + +class CertValidatingHTTPSConnection(httplib.HTTPConnection): + default_port = httplib.HTTPS_PORT + + def __init__(self, host, port=None, key_file=None, cert_file=None, + ca_certs=None, strict=None, **kwargs): + httplib.HTTPConnection.__init__(self, host, port, strict, **kwargs) + self.key_file = key_file + self.cert_file = cert_file + self.ca_certs = ca_certs + if self.ca_certs: + self.cert_reqs = ssl.CERT_REQUIRED + else: + self.cert_reqs = ssl.CERT_NONE + + def _GetValidHostsForCert(self, cert): + if 'subjectAltName' in cert: + return [x[1] for x in cert['subjectAltName'] + if x[0].lower() == 'dns'] + else: + return [x[0][1] for x in cert['subject'] + if x[0][0].lower() == 'commonname'] + + def _ValidateCertificateHostname(self, cert, hostname): + hosts = self._GetValidHostsForCert(cert) + for host in hosts: + host_re = host.replace('.', '\.').replace('*', '[^.]*') + if re.search('^%s$' % (host_re,), hostname, re.I): + return True + return False + + def connect(self): + sock = socket.create_connection((self.host, self.port)) + self.sock = ssl.wrap_socket(sock, keyfile=self.key_file, + certfile=self.cert_file, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs) + if self.cert_reqs & ssl.CERT_REQUIRED: + cert = self.sock.getpeercert() + hostname = self.host.split(':', 0)[0] + if not self._ValidateCertificateHostname(cert, hostname): + raise InvalidCertificateException(hostname, cert, + 'hostname mismatch') + + +class VerifiedHTTPSHandler(urllib2.HTTPSHandler): + def __init__(self, **kwargs): + urllib2.AbstractHTTPHandler.__init__(self) + self._connection_args = kwargs + + def https_open(self, req): + def http_class_wrapper(host, **kwargs): + full_kwargs = dict(self._connection_args) + full_kwargs.update(kwargs) + return CertValidatingHTTPSConnection(host, **full_kwargs) + + try: + return self.do_open(http_class_wrapper, req) + except urllib2.URLError, e: + if type(e.reason) == ssl.SSLError and e.reason.args[0] == 1: + raise InvalidCertificateException(req.host, '', + e.reason.args[1]) + raise + + https_request = urllib2.HTTPSHandler.do_request_ + +# handler = VerifiedHTTPSHandler(ca_certs = '/home/byte-sync/byte-sync/fbs-cacert.pem') +# opener = urllib2.build_opener(handler) +# opener.open(sys.argv[2]) +# Peter's hack END + DATASET_STATES = [ 'new', 'downloading', @@ -159,7 +244,7 @@ # Hard coded for now self.remote_base = 'byte-sync' - self.remote_url_root = 'http://%s/%s/%s' % ( + self.remote_url_root = 'https://%s/%s/%s' % ( self.remote_host, self.remote_base, self.name) self.remote_dataset_url = '%s/dataset_index.yaml' % ( self.remote_url_root) @@ -200,7 +285,9 @@ logging.debug('Fetching dataset index from %s', self.remote_dataset_url) try: - datasets_yaml = urllib2.urlopen(self.remote_dataset_url, + handler = VerifiedHTTPSHandler(ca_certs = '/home/byte-sync/byte-sync/fbs-cacert.pem') + opener = urllib2.build_opener(handler) + datasets_yaml = opener.open(self.remote_dataset_url, timeout=30) except (socket.timeout, urllib2.HTTPError, urllib2.URLError) as e: logging.warning('Unable to fetch dataindex: %s', @@ -233,7 +320,9 @@ dataset.Process() def GetRemoteDatasets(self): - datasets = urllib2.urlopen(self.remote_dataset_url, timeout=30) + handler = VerifiedHTTPSHandler(ca_certs = '/home/byte-sync/byte-sync/fbs-cacert.pem') + opener = urllib2.build_opener(handler) + datasets = opener.open(self.remote_dataset_url, timeout=30) for dataset in yaml.safe_load(datasets): print dataset @@ -362,7 +451,9 @@ logging.info('Fetching dataset yaml: %s', self.remote_metadata_master_url) self.CreateLocalDirs() try: - yaml_req = urllib2.urlopen(self.remote_metadata_master_url, + handler = VerifiedHTTPSHandler(ca_certs = '/home/byte-sync/byte-sync/fbs-cacert.pem') + opener = urllib2.build_opener(handler) + yaml_req = opener.open(self.remote_metadata_master_url, timeout=30) except (socket.timeout, urllib2.HTTPError, urllib2.URLError) as e: logging.warning('Unable to fetch metadata from %s: %s', @@ -379,7 +470,9 @@ def FetchMetadataLocal(self): logging.info('Fetching dataset yaml: %s', self.remote_metadata_local_url) try: - yaml_req = urllib2.urlopen(self.remote_metadata_local_url, timeout=30) + handler = VerifiedHTTPSHandler(ca_certs = '/home/byte-sync/byte-sync/fbs-cacert.pem') + opener = urllib2.build_opener(handler) + yaml_req = opener.open(self.remote_metadata_local_url, timeout=30) except (socket.timeout, urllib2.HTTPError, urllib2.URLError) as e: logging.warning('Unable to fetch metadata from %s: %s', self.remote_metadata_master_url, e) @@ -404,7 +497,8 @@ self.pub_path = os.path.join(self.pub_path, pub_dir) # Size of blocks used for downloads etc. - BLOCKSIZE = (128 * 1024) + BLOCKSIZE = (8 * 1024 * 1024) + #BLOCKSIZE = (128 * 1024) # How often to write state to metadata METADATA_WRITTEN_INTERVAL = (10 * 1024 * 1024) @@ -420,7 +514,9 @@ self.UpdateState('downloading') logging.info('Fetching data_tar from %s to %s', self.remote_data_tar_url, self.local_data_tar_tmp_path) - dataset_req = urllib2.urlopen(self.remote_data_tar_url, timeout=30) + handler = VerifiedHTTPSHandler(ca_certs = '/home/byte-sync/byte-sync/fbs-cacert.pem') + opener = urllib2.build_opener(handler) + dataset_req = opener.open(self.remote_data_tar_url, timeout=30) m256 = hashlib.sha256() m512 = hashlib.sha512() written = 0