From efcc78a5586f09eea10f520438c50537e29d4b85 Mon Sep 17 00:00:00 2001 From: RichardG867 Date: Fri, 15 Apr 2022 20:23:37 -0300 Subject: [PATCH] Improve debug logging, it can now be enabled through -d --- biostools/__main__.py | 51 +++++++++++++++++++------- biostools/analyzers.py | 79 ++++++++++++++++++++++++++++++----------- biostools/extractors.py | 20 +++++------ docker-entrypoint.sh | 2 +- 4 files changed, 109 insertions(+), 43 deletions(-) diff --git a/biostools/__main__.py b/biostools/__main__.py index bc6c64e..023e86a 100644 --- a/biostools/__main__.py +++ b/biostools/__main__.py @@ -89,7 +89,7 @@ def extract_dir(file_extractors, dir_number_path, next_dir_number_path, scan_dir # Remove this directory if it ends up empty. util.rmdirs(scan_dir_path) -def extract_process(queue, dir_number_path, next_dir_number_path): +def extract_process(queue, dir_number_path, next_dir_number_path, debug): """Main loop for the extraction multiprocessing pool.""" # Set up extractors. @@ -117,6 +117,12 @@ def extract_process(queue, dir_number_path, next_dir_number_path): extractors.MBRUnsafeExtractor(), ] + # Disable debug mode on extractors. + if not debug: + dummy_func = lambda self, *args: None + for extractor in file_extractors: + extractor.debug_print = dummy_func + # Receive work from the queue. while True: item = queue.get() @@ -161,7 +167,7 @@ def extract(dir_path, _, options): # Start multiprocessing pool. print('Starting extraction on directory {0}'.format(dir_number), end='', flush=True) queue = multiprocessing.Queue(maxsize=MP_PROCESS_COUNT) - mp_pool = multiprocessing.Pool(MP_PROCESS_COUNT, initializer=extract_process, initargs=(queue, dir_number_path, next_dir_number_path)) + mp_pool = multiprocessing.Pool(MP_PROCESS_COUNT, initializer=extract_process, initargs=(queue, dir_number_path, next_dir_number_path, options.get('debug'))) # Create next directory. if not os.path.isdir(next_dir_number_path): @@ -471,7 +477,7 @@ def analyze_dir(formatter, scan_base, file_analyzers, scan_dir_path, scan_file_n # Output the results. formatter.output_row(fields) -def analyze_process(queue, formatter, scan_base): +def analyze_process(queue, formatter, scan_base, debug): """Main loop for the analysis multiprocessing pool.""" # Set up analyzers. @@ -518,6 +524,13 @@ def analyze_process(queue, formatter, scan_base): analyzers.ZenithAnalyzer(), ] + # Disable debug mode on analyzers. + if not debug: + dummy_func = lambda self, *args: None + for analyzer in file_analyzers: + analyzer.debug_print = dummy_func + analyzer.debug = False + # Receive work from the queue. while True: item = queue.get() @@ -556,7 +569,7 @@ def analyze(dir_path, formatter_args, options): # Start multiprocessing pool. queue = multiprocessing.Queue(maxsize=MP_PROCESS_COUNT) - mp_pool = multiprocessing.Pool(MP_PROCESS_COUNT, initializer=analyze_process, initargs=(queue, formatter, dir_path)) + mp_pool = multiprocessing.Pool(MP_PROCESS_COUNT, initializer=analyze_process, initargs=(queue, formatter, dir_path, options.get('debug'))) if os.path.isdir(dir_path): # Scan directory structure. @@ -579,21 +592,26 @@ def analyze(dir_path, formatter_args, options): def main(): + # Set default options. mode = None options = { 'array': False, + 'debug': False, 'format': 'csv', 'headers': True, 'hyperlink': False, 'docker-usage': False, } - args, remainder = getopt.gnu_getopt(sys.argv[1:], 'xaf:hnr', ['extract', 'analyze', 'format=', 'hyperlink', 'no-headers', 'array', 'docker-usage']) + # Parse arguments. + args, remainder = getopt.gnu_getopt(sys.argv[1:], 'xadf:hnr', ['extract', 'analyze', 'debug', 'format=', 'hyperlink', 'no-headers', 'array', 'docker-usage']) for opt, arg in args: if opt in ('-x', '--extract'): mode = 'extract' elif opt in ('-a', '--analyze'): mode = 'analyze' + elif opt in ('-d', '--debug'): + options['debug'] = True elif opt in ('-f', '--format'): options['format'] = arg.lower() elif opt in ('-h', '--hyperlink'): @@ -606,33 +624,42 @@ def main(): options['docker-usage'] = True if len(remainder) > 0: + # Disable multi-threading in debug mode. + if options.get('debug'): + global MP_PROCESS_COUNT + MP_PROCESS_COUNT = 1 + + # Run mode handler. if mode == 'extract': return extract(remainder[0], remainder[1:], options) elif mode == 'analyze': return analyze(remainder[0], remainder[1:], options) + # Print usage. if options['docker-usage']: usage = ''' -Usage: docker run -v directory:/bios biostools [-f output_format] [-h] [-n] [-r] [formatter_options] +Usage: docker run -v directory:/bios biostools [-d] [-f output_format] [-h] [-n] [-r] [formatter_options] Archives and BIOS images in the directory mounted to /bios will be extracted and analyzed. ''' else: usage = ''' -Usage: python3 -m biostools -x directory - python3 -m biostools [-f output_format] [-h] [-n] [-r] -a directory|single_file [formatter_options] +Usage: python3 -m biostools [-d] -x directory + python3 -m biostools [-d] [-f output_format] [-h] [-n] [-r] -a directory|single_file [formatter_options] -x Extract archives and BIOS images recursively in the given directory + -d Enable debug output. -a Analyze extracted BIOS images in the given directory, or a single extracted file (extracting with -x first is recommended)''' usage += ''' + -d Enable debug output. -f Output format: - csv Comma-separated values with quotes (default) - scsv Semicolon-separated values with quotes - json JSON object array - jsontable JSON table + csv Comma-separated values with quotes (default) + scsv Semicolon-separated values with quotes + json JSON object array + jsontable JSON table -h Generate download links for file paths representing HTTP URLs. csv/scsv: The Excel HYPERLINK formula is used; if you have non-English Excel, you must provide your language's diff --git a/biostools/analyzers.py b/biostools/analyzers.py index 10f80d1..fac8790 100644 --- a/biostools/analyzers.py +++ b/biostools/analyzers.py @@ -102,9 +102,9 @@ class AbortAnalysisError(Exception): class Analyzer: - def __init__(self, vendor, debug=False): + def __init__(self, vendor): self.vendor_id = self.vendor = vendor - self.debug = debug + self.debug = True # to speed up analyze_line self._check_list = [] @@ -124,7 +124,8 @@ class Analyzer: if checker_result: callback_result = callback_func(line, checker_result) if callback_result: - self.debug_print(callback_func.__name__, line) + if self.debug: + self.debug_print(callback_func.__name__, '=>', repr(line)) return callback_result def can_analyze(self): @@ -137,10 +138,9 @@ class Analyzer: None if no such file exists.""" return True - def debug_print(self, key, line=None): - """Print a line containing analyzer state if debugging is enabled.""" - if self.debug: - print(self._file_path, '=> found', self.vendor_id, key, '=', (line == None) and 'no line' or repr(line), '\n', end='', file=sys.stderr) + def debug_print(self, *args): + """Print a log line if debug output is enabled.""" + print(self.__class__.__name__ + ':', *args, file=sys.stderr) def register_check_list(self, check_list): """Register the list of checks this analyzer will handle. @@ -177,7 +177,7 @@ class Analyzer: self.addons = [] self.oroms = [] - self._file_path = '?' + self._file_path = '[?]' class NoInfoAnalyzer(Analyzer): """Special analyzer for BIOSes which can be identified, @@ -367,6 +367,8 @@ class AMIAnalyzer(Analyzer): # rely on the header version data to get the Intel version sign-on. if header_data: is_intel = AMIIntelAnalyzer.can_handle(self, file_data, header_data) + if is_intel: + self.debug_print('Intel data found') else: is_intel = False @@ -375,12 +377,14 @@ class AMIAnalyzer(Analyzer): if match: # Determine location of the identification block. id_block_index = match.start(0) + self.debug_print('ID block starts at', hex(id_block_index)) # Extract version. version_6plus = match.group(1) if version_6plus: # AMIBIOS 6 onwards. self.version = version_6plus.decode('cp437', 'ignore') + self.debug_print('Version (6+):', self.version) # Pad 4-digit versions. (Biostar) if self.version[-1] not in '0123456789': @@ -390,56 +394,73 @@ class AMIAnalyzer(Analyzer): version_winbios_maj = match.group(2) version_winbios_min = match.group(3) if version_winbios_maj and version_winbios_min: - self.version = (version_winbios_maj + version_winbios_min).decode('cp437', 'ignore') + '00' + self.version = (version_winbios_maj + version_winbios_min).decode('cp437', 'ignore') + self.debug_print('Version (4-5):', self.version) + self.version += '00' self.addons.append('WinBIOS') else: - # AMI Color date. + # AMI Color (or WinBIOS 12/15/93) date. self.version = match.group(4).decode('cp437', 'ignore') + self.debug_print('Version (Color):', self.version) # Extract string. self.string = util.read_string(file_data[id_block_index + 0x78:id_block_index + 0xa0]) if 'Intel Corporation' in self.string or len(self.string) <= 6: # (later Intel AMI with no string) self.string = '' + self.debug_print('Intel with no string') else: + self.debug_print('Base string:', self.string) + # Add identification tag to the string if one is present. id_tag = util.read_string(file_data[id_block_index + 0xec:id_block_index + 0x100]) + self.debug_print('String tag:', id_tag) if id_tag[:4] == '_TG_': self.string = self.string.rstrip() + '-' + id_tag[4:].lstrip() - # Stop if this BIOS is actually Aptio UEFI CSM. - if self._uefi_csm_pattern.match(self.string): - return False + # Stop if this BIOS is actually Aptio UEFI CSM. + if self._uefi_csm_pattern.match(self.string): + self.debug_print('String matches UEFI CSM, aborting') + return False # Ignore unwanted string terminator on sign-on. (TriGem Lisbon-II) signon_terminator = b'\x00' if file_data[id_block_index + 0x123:id_block_index + 0x12b] == b' Inc.,\x00 ': + self.debug_print('Applying sign-on terminator hack') signon_terminator += b'\x00' # Extract sign-on, while removing carriage returns. self.signon = util.read_string(file_data[id_block_index + 0x100:id_block_index + 0x200], terminator=signon_terminator) + self.debug_print('Raw sign-on:', repr(self.signon)) # The actual sign-on starts on the second line. self.signon = '\n'.join(x.rstrip('\r').strip() for x in self.signon.split('\n')[1:] if x != '\r').strip('\n') elif len(file_data) < 1024: # Ignore false positives from sannata readmes. + self.debug_print('False positive by size of', len(file_data), 'bytes') return False elif self._precolor_date_pattern.search(file_data): + self.debug_print('Found potential pre-Color') + # Check date, using a different pattern to differentiate core date from build date. match = self._precolor_core_date_pattern.search(file_data) if match: date_start = match.start(0) + self.debug_print('Pre-Color data starts at', hex(date_start)) else: match = self._date_pattern.search(file_data) date_start = 0 + self.debug_print('Pre-Color data start is unknown') if match: # Extract date as the version. self.version = (match.group(1) or (match.group(2) + b'/' + match.group(3) + b'/' + match.group(4))).decode('cp437', 'ignore') + self.debug_print('Version (pre-Color):', self.version) # Check pre-Color identification block. match = self._precolor_block_pattern.search(file_data) if match: # Determine location of the identification block. id_block_index = match.start(0) + self.debug_print('Pre-Color ID block starts at', hex(id_block_index)) # Locate the encoded string. match = self._precolor_string_pattern.search(file_data[date_start:]) @@ -453,6 +474,7 @@ class AMIAnalyzer(Analyzer): if c & 0x80: # MSB termination break self.string = bytes(buf).decode('cp437', 'ignore') + self.debug_print('Base string:', self.string) # Remove "-K" KBC suffix. # Note: K without preceding - is possible (Atari PC5) @@ -470,6 +492,8 @@ class AMIAnalyzer(Analyzer): # Add date. self.string += '-' + util.read_string(file_data[id_block_index + 0x9c:id_block_index + 0xa4]).replace('/', '').strip() + self.debug_print('Reconstructed string:', self.string) + # Invalidate string if the identification block doesn't # appear to be valid. (Intel AMI post-Color without string) if self.string[:10] in ('????-0000-', '????-0166-'): @@ -478,6 +502,7 @@ class AMIAnalyzer(Analyzer): elif check_match.group(1): # 8088-BIOS header # Extract version. self.version = check_match.group(1).decode('cp437', 'ignore') + self.debug_print('Version (8088):', self.string) # Locate the encoded string. match = self._8088_string_pattern.search(file_data) @@ -489,20 +514,27 @@ class AMIAnalyzer(Analyzer): c = (c << 1) | (c >> 7) buf.append(c & 0x7f) self.string = bytes(buf).decode('cp437', 'ignore') + + self.debug_print('Base string:', self.string) else: # Fallback if we can't find the encoded string. self.string = '????-' + self.version.replace('/', '') + self.debug_print('Reconstructed string:', self.string) + # Extract additional information after the copyright as a sign-on. # (Shuttle 386SX, CDTEK 286, Flying Triumph Access Methods) match = self._precolor_signon_pattern.search(file_data) if match: self.signon = match.group(1).decode('cp437', 'ignore') + self.debug_print('Raw sign-on:', repr(self.signon)) # Split sign-on lines. (Video Technology Info-Tech 286-BIOS) self.signon = '\n'.join(x.strip() for x in self.signon.split('\n') if x.strip()).strip('\n') else: # Assume this is not an AMI BIOS, unless we found Intel data above. + if is_intel: + self.debug_print('No AMI data found but Intel data found') return is_intel return True @@ -617,7 +649,7 @@ class AMIDellAnalyzer(AMIAnalyzer): if file_data[:9] == b'DELLBIOS\x00': # DELLBIOS header contains the Dell version. self.version = '11/11/92' - self.debug_print('DELLBIOS header') + self.debug_print('DELLBIOS header present') # Extract the version as a sign-on. terminator_index = file_data.find(b'\x00', 10) @@ -640,7 +672,7 @@ class AMIDellAnalyzer(AMIAnalyzer): # The Dell version will be in the BIOS body. self.version = '11/11/92' - self.debug_print('DELLXBIOS string') + self.debug_print('DELLXBIOS string present') return True @@ -1776,13 +1808,13 @@ class PhoenixAnalyzer(Analyzer): # Extract the build code as a string. build_code = self.string = util.read_string(file_data[offset + 55:offset + 63].replace(b'\x00', b'\x20')).strip() if build_code: - self.debug_print('BCPSYS build code', build_code) + self.debug_print('BCPSYS build code:', build_code) # Append the build date and time to the string. date_time = util.read_string(file_data[offset + 15:offset + 32].replace(b'\x00', b'\x20')).strip() if self._bcpsys_datetime_pattern.match(date_time): # discard if this is an invalid date/time (PHLASH.EXE) self._bcpsys_date_time = date_time - self.debug_print('BCPSYS build date/time', date_time) + self.debug_print('BCPSYS build date/time:', date_time) if self.string: self.string += '\n' self.string += date_time @@ -1798,7 +1830,7 @@ class PhoenixAnalyzer(Analyzer): if dell_version[0:1] == 'A': self.signon += 'BIOS Version: ' + dell_version - self.debug_print('Dell version', dell_version) + self.debug_print('Dell version:', dell_version) else: # Determine if this is some sort of HP Vectra BIOS. match = self._hp_pattern.search(file_data) @@ -1812,7 +1844,7 @@ class PhoenixAnalyzer(Analyzer): match = self._hp_signon_pattern.search(file_data) if match: self.signon = match.group(0).decode('cp437', 'ignore') - self.debug_print('HP version', self.signon) + self.debug_print('HP version:', self.signon) else: # Extract sign-on from Core and some 4.0 Release 6.0 BIOSes. match = self._core_signon_pattern.search(file_data) @@ -1825,15 +1857,22 @@ class PhoenixAnalyzer(Analyzer): self.signon = self.signon.replace(frm, to) self.signon = self.signon.decode('cp437', 'ignore') + print('Raw sign-on (4R6+):', repr(self.signon)) else: # Extract sign-on from Ax86 and older BIOSes. match = self._rombios_signon_pattern.search(file_data) - if not match: + if match: + signon_log = '(old std):' + else: match = self._rombios_signon_alt_pattern.search(file_data) + signon_log = '(old alt):' if match: end = match.end(0) if file_data[end] != 0xfa: # (unknown 8088 PLUS 2.52) self.signon = util.read_string(file_data[end:end + 256]) + print('Raw sign-on', signon_log, repr(self.signon)) + else: + self.debug_print('Ignored bogus sign-on, first bytes:', repr(file_data[end:end + 8])) # Split sign-on lines. if self.signon: diff --git a/biostools/extractors.py b/biostools/extractors.py index f382373..978b0d4 100644 --- a/biostools/extractors.py +++ b/biostools/extractors.py @@ -33,9 +33,9 @@ class Extractor: - a string with the produced output file/directory path""" raise NotImplementedError() - def log_print(self, *args): - """Print a log line.""" - print('{0}:'.format(self.__class__.__name__), *args, file=sys.stderr) + def debug_print(self, *args): + """Print a log line if debug output is enabled.""" + print(self.__class__.__name__ + ':', *args, file=sys.stderr) class ApricotExtractor(Extractor): @@ -1365,7 +1365,7 @@ class IntelExtractor(Extractor): # Create destination file. dest_file_path = os.path.join(dest_dir, 'intel.bin') out_f = open(dest_file_path, 'wb') - self.log_print('Found', len(found_parts), 'parts, header size', header_size, 'bytes, largest part size', largest_part_size, 'bytes') + self.debug_print('Found', len(found_parts), 'parts, header size', header_size, 'bytes, largest part size', largest_part_size, 'bytes') # Copy parts to the destination file. bootblock_offset = None @@ -1404,11 +1404,11 @@ class IntelExtractor(Extractor): if bootblock_offset < 0: bootblock_offset = 0 dest_offset += bootblock_offset - self.log_print('bbo', hex(bootblock_offset), 'do', hex(dest_offset)) + self.debug_print('bbo', hex(bootblock_offset), 'do', hex(dest_offset)) out_f.seek(dest_offset) # Copy data. - self.log_print(data_length, 'bytes @', hex(dest_offset), '=>', found_part_path) + self.debug_print(data_length, 'bytes @', hex(dest_offset), '=>', found_part_path) remaining = max(data_length, largest_part_size) part_data = b' ' while part_data and remaining > 0: @@ -1421,17 +1421,17 @@ class IntelExtractor(Extractor): if data_length <= 8192 and len(found_parts_boot) == 0: # Workaround for JN440BX, which requires its final # part (sized 8 KB) to be at the end of the image. - self.log_print('> Final part non-padded') + self.debug_print('> Final part non-padded') remaining = 0 elif data_length == largest_part_size and ((dest_offset >> 16) & 1) == int(invert): # Workaround for SE440BX-2 and SRMK2, which require a # gap at the final 64 KB where the boot block goes. - self.log_print('> Final part gap') + self.debug_print('> Final part gap') remaining += largest_part_size elif logical_area == 0 and dest_offset == bootblock_offset: # Don't pad a boot block insertion. remaining = 0 - self.log_print('> Adding', remaining, 'padding bytes') + self.debug_print('> Adding', remaining, 'padding bytes') while remaining > 0: out_f.write(b'\xFF' * min(remaining, 1048576)) remaining -= 1048576 @@ -1461,7 +1461,7 @@ class IntelExtractor(Extractor): out_f = open(dest_file_path + '.padded', 'wb') # Write padding. - self.log_print('Padding by', hex(padding_size)) + self.debug_print('Padding by', hex(padding_size)) while padding_size > 0: out_f.write(b'\xFF' * min(padding_size, 1048576)) padding_size -= 1048576 diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 5b17aaa..212a540 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -20,7 +20,7 @@ if [ ! -d /bios/0 -o -d /bios/1 ] then # Run extractor. - python3 -u -m biostools -x /bios >&2 + python3 -u -m biostools -x /bios $* >&2 # Print usage if there's no 1 directory (nothing bound to /bios). [ ! -d /bios/1 ] && exec python3 -u -m biostools --docker-usage >&2