""" Author: Daniel Mohr.
Date: 2017-03-07, 2021-02-17 (last change).
License: GNU GENERAL PUBLIC LICENSE, Version 3, 29 June 2007.
Ported from pfu on 2021-02-17 by Daniel Mohr (author of original code and main author of this file). """
""" :Author: Daniel Mohr :Email: daniel.mohr@dlr.de :Date: 2021-02-17 (last change). :License: GNU GENERAL PUBLIC LICENSE, Version 3, 29 June 2007.
class to extract check checksums from a file """ # pylint: disable=too-few-public-methods # we allow here many hash functions, but better only use common ones, # e. g.: md5, sha256, sha512 # (md5 is only acceptable for very small files!) 104: ('sha512', 'base32'), # detect by length 88: ('sha512', 'base64'), 64: ('sha256', 'base16'), 56: ('sha256', 'base32'), 44: ('sha256', 'base64'), 32: ('md5', 'base16 or base32'), 24: ('md5', 'base64')} 'base16': base64.b16decode, 'Base16': base64.b16decode, 'base32': base64.b32decode, 'Base32': base64.b32decode, 'base64': base64.b64decode, 'Base64': base64.b64decode} 'base16': base64.b16encode, 'Base16': base64.b16encode, 'base32': base64.b32encode, 'Base32': base64.b32encode, 'base64': base64.b64encode, 'Base64': base64.b64encode} re.compile( r"(?P<hash>[0-9a-zA-Z/+=]+) [ \*]{1}(?P<filename>.+) \(bytes " r"(?P<start>[0-9]+) - (?P<stop>[0-9]+)\)$"), re.compile(r"(?P<hash>[0-9a-zA-Z/+=]+) [ \*]{1}(?P<filename>.+)$"), re.compile(r"(?P<type>MD5|SHA256|SHA512|SHA1|SHA224|SHA384)[ ]{0,1}\(" r"(?P<filename>.+)\)[ ]{0,1}= (?P<hash>[0-9a-zA-Z/+=]+)$")]
checksum_file, buf_size=524288, # 1024*512 Bytes = 512 kB level=20): """ :Author: Daniel Mohr :Email: daniel.mohr@dlr.de :Date: 2017-02-25, 2021-02-17 (last change). :License: GNU GENERAL PUBLIC LICENSE, Version 3, 29 June 2007.
class to extract check checksums from a file
:param checksum_file: File to read the checksums from. :param buf_size: Files will be read in chunks of the given amount of Bytes. This should be a factor of the data handled by the hash function (e. g. 64 Bytes for md5, 64 Bytes for sha256, 128 Bytes for sha512). :param level: Set how verbose should be the output. This is the level of logging. Lower numbers give more output. The parameter is a number between 1 and 50. """ # extract hash from checksum file
""" :Author: Daniel Mohr :Email: daniel.mohr@dlr.de :Date: 2021-02-17 (last change). :License: GNU GENERAL PUBLIC LICENSE, Version 3, 29 June 2007.
:param file_name: file_name to search :param encoding: define the encoding of the returned hash string
:return: return the hash string, the hash encoding and the source file name (where the data was read from) or None (if file_name not available) """ # only return first hash # RFC 3548 defines the following alphabets: # base64: ABCDEFGHIJKLMNOPQRSTUVWXYZ # abcdefghijklmnopqrstuvwxyz0123456789-_ # base32: abcdefghijklmnopqrstuvwxyz234567 # base16: 0123456789ABCDEF elif hash_info[1][1] in ['base32', 'Base32']: hash_info[0] = hash_info[0].lower() return None
""" :Author: Daniel Mohr :Email: daniel.mohr@dlr.de :Date: 2017-03-02 (last change). :License: GNU GENERAL PUBLIC LICENSE, Version 3, 29 June 2007.
Try to determine hash function and encode from hash. If this is not possible assume the file extension gives the hash type.
:param hash_string: the hash to analyse :param hashfilename: file name of the hash (if hash is not unique the file extension is used)
:return: tuple of hash algorithm and encoding or None on error """ if hash_string[-6:] == '======': hash_encode = (hash_encode[0], 'base32') else: hash_encode = (hash_encode[0], 'base16') extension = os.path.splitext(hashfilename)[1][1:].strip().lower() if extension in self.hashfcts: # assume file extension gives the hash type # the coding is really hard to detect, therefore assume base16 # RFC 3548 defines the following alphabets: # base64: ABCDEFGHIJKLMNOPQRSTUVWXYZ # abcdefghijklmnopqrstuvwxyz0123456789-_ # base32: abcdefghijklmnopqrstuvwxyz234567 # base16: 0123456789ABCDEF # Unfortunately typical used tools like *sum (e. g. md5sum) # gives the output as base16 in lower letters. hash_encode = (extension, 'base16')
""" :Author: Daniel Mohr :Email: daniel.mohr@dlr.de :Date: 2017-03-02 (last change). :License: GNU GENERAL PUBLIC LICENSE, Version 3, 29 June 2007.
Analyse line of a hash file describing hash of complete file. This method should not be called from outside.
:param sres: re instance :param hashfilename: file name of the hash (normaly only path is used, if hash is not unique the file extension is used) """ hashfilename) os.path.join( os.path.dirname(hashfilename), sres.group('filename'))) hash_string = sres.group('hash') else: self.hash_dict[relfilename] += [( hash_string, hash_encode, hashfilename)] else: hash_string, hash_encode, hashfilename)]
""" :Author: Daniel Mohr :Email: daniel.mohr@dlr.de :Date: 2017-03-01 (last change). :License: GNU GENERAL PUBLIC LICENSE, Version 3, 29 June 2007.
Analyse line of a hash file describing hash of complete file in BSD-style. This method should not be called from outside.
:param sres: re instance :param hashfilename: file name of the hash (here only path is used) """ relfilename = os.path.normpath( os.path.join( os.path.dirname(hashfilename), sres.group('filename'))) if relfilename in self.hash_dict: self.hash_dict[relfilename] += [( sres.group('hash').lower(), (sres.group('type').lower(), 'base16'), hashfilename)] else: self.hash_dict[relfilename] = [( sres.group('hash').lower(), (sres.group('type').lower(), 'base16'), hashfilename)]
""" :Author: Daniel Mohr :Email: daniel.mohr@dlr.de :Date: 2017-02-25, 2021-02-17 (last change). :License: GNU GENERAL PUBLIC LICENSE, Version 3, 29 June 2007.
read hash file """ os.access(self.hash_file_name, os.R_OK)): # this is ignored here self.log.info("ignoring chunk hashes") else: sres, self.hash_file_name) else: sres = self.regexps[2].search(line) if sres: # hash of a complete file (BSD-style) self._analyse_hashline_of_file_bsd( sres, self.hash_file_name) else: self.log.warning( "do not understand line in hash file " "\"%s\": %s", self.hash_file_name, line) elif not os.access(self.hash_file_name, os.R_OK): self.log.warning('hash file "%s" is not readable', self.hash_file_name) else: self.log.warning('hash file "%s" not existing (anymore?)', self.hash_file_name) |