From d5b38bc94a716c72becb1a869ad0e804c2aab595 Mon Sep 17 00:00:00 2001 From: RichardG867 Date: Wed, 15 Dec 2021 21:19:32 -0300 Subject: [PATCH] Import biostools source --- README.md | 3 +- biostools/__init__.py | 17 + biostools/__main__.py | 591 ++++++++++ biostools/analyzers.py | 2320 +++++++++++++++++++++++++++++++++++++++ biostools/extractors.py | 1312 ++++++++++++++++++++++ biostools/formatters.py | 199 ++++ biostools/pciutil.py | 257 +++++ biostools/util.py | 139 +++ requirements.txt | 1 + 9 files changed, 4838 insertions(+), 1 deletion(-) create mode 100644 biostools/__init__.py create mode 100644 biostools/__main__.py create mode 100644 biostools/analyzers.py create mode 100644 biostools/extractors.py create mode 100644 biostools/formatters.py create mode 100644 biostools/pciutil.py create mode 100644 biostools/util.py create mode 100644 requirements.txt diff --git a/README.md b/README.md index 096b2de..d3c6295 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,10 @@ A toolkit for analyzing and extracting x86 BIOS ROM images (mostly) within the c ## System requirements -* **Linux**. Unfortunately, we rely on tools which contain non-portable code and generate filenames that are invalid for Windows. +* **Linux**. Unfortunately, we rely on tools which contain non-portable code and generate filenames that are invalid for Windows, as well as GNU-specific extensions to shell commands. * **Python 3.5** or newer. * **Standard gcc toolchain** for building the essential `bios_extract` tool. +** **7-Zip** command line utility installed as `7z`. ## Installation diff --git a/biostools/__init__.py b/biostools/__init__.py new file mode 100644 index 0000000..02cf789 --- /dev/null +++ b/biostools/__init__.py @@ -0,0 +1,17 @@ +#!/usr/bin/python3 +# +# 86Box A hypervisor and IBM PC system emulator that specializes in +# running old operating systems and software designed for IBM +# PC systems and compatibles from 1981 through fairly recent +# system designs based on the PCI bus. +# +# This file is part of the 86Box BIOS Tools distribution. +# +# Dummy module initialization file. +# +# +# +# Authors: RichardG, +# +# Copyright 2021 RichardG. +# diff --git a/biostools/__main__.py b/biostools/__main__.py new file mode 100644 index 0000000..8037c14 --- /dev/null +++ b/biostools/__main__.py @@ -0,0 +1,591 @@ +#!/usr/bin/python3 -u +# +# 86Box A hypervisor and IBM PC system emulator that specializes in +# running old operating systems and software designed for IBM +# PC systems and compatibles from 1981 through fairly recent +# system designs based on the PCI bus. +# +# This file is part of the 86Box BIOS Tools distribution. +# +# Main BIOS extractor and analyzer program. +# +# +# +# Authors: RichardG, +# +# Copyright 2021 RichardG. +# + +import getopt, os, multiprocessing, re, subprocess, sys +from . import analyzers, extractors, formatters, util + +# Constants. +MP_PROCESS_COUNT = 4 +ANALYZER_MAX_CACHE_MB = 512 + + +# Extraction module. + +def extract_dir(file_extractors, dir_number_path, next_dir_number_path, scan_dir_path, scan_file_names): + """Process a given directory for extraction.""" + + # Determine the destination subdirectory. + dest_subdir = scan_dir_path[len(dir_number_path):] + while dest_subdir[:len(os.sep)] == os.sep: + dest_subdir = dest_subdir[len(os.sep):] + + # Iterate through files. + for scan_file_name in scan_file_names: + file_path = os.path.join(scan_dir_path, scan_file_name) + + # Remove links. + if os.path.islink(file_path): + try: + os.remove(file_path) + except: + try: + os.rmdir(file_path) + except: + pass + continue + + # Read header. + try: + f = open(file_path, 'rb') + file_data = f.read(32775) # upper limit set by ISOExtractor + f.close() + except: + # Permission issues or after-the-fact removal of other files by + # extractors can cause this. Give up. + continue + + # Come up with a destination directory for this file. + dest_file_path = os.path.join(dest_subdir, scan_file_name + ':') + dest_dir = os.path.join(next_dir_number_path, dest_file_path) + dest_dir_0 = os.path.join(os.path.dirname(next_dir_number_path), '0', dest_file_path) + + # Run through file extractors until one succeeds. + for extractor in file_extractors: + # Run the extractor. + try: + extractor_result = extractor.extract(file_path, file_data, dest_dir, dest_dir_0) + except: + # Log an error. + util.log_traceback('extracting', file_path) + continue + + # Check if the extractor produced any results. + if extractor_result: + # Handle the line break ourselves, since Python prints the main + # body and line break separately, causing issues when multiple + # threads/processes are printing simultaneously. + print('{0} => {1}{2}\n'.format(file_path, extractor.__class__.__name__, (extractor_result == True) and ' (skipped)' or ''), end='') + break + + # Remove destination directories if they were created but are empty. + for to_remove in (dest_dir, dest_dir_0): + util.rmdirs(to_remove) + + # Remove this directory if it ends up empty. + util.rmdirs(scan_dir_path) + + +def extract_process(queue, dir_number_path, next_dir_number_path): + """Main loop for the extraction multiprocessing pool.""" + + # Set up extractors. + file_extractors = [ + extractors.DiscardExtractor(), + extractors.ISOExtractor(), + extractors.PEExtractor(), + extractors.FATExtractor(), + extractors.TarExtractor(), + extractors.ArchiveExtractor(), + extractors.HexExtractor(), + extractors.ImageExtractor(), + extractors.DellExtractor(), + extractors.IntelExtractor(), + extractors.OMFExtractor(), + extractors.InterleaveExtractor(), + extractors.BIOSExtractor(), + extractors.UEFIExtractor(), + ] + + # Receive work from the queue. + while True: + item = queue.get() + if item == None: # special item to stop the loop + break + extract_dir(file_extractors, dir_number_path, next_dir_number_path, *item) + +def extract(dir_path, _, options): + """Main function for extraction.""" + + # Check if the structure is correct. + if not os.path.exists(os.path.join(dir_path, '1')): + print('Incorrect directory structure. All data to unpack should be located inside', file=sys.stderr) + print('a directory named 1 in turn located inside the given directory.', file=sys.stderr) + return 2 + + # Check if bios_extract is there. + if not os.path.exists(os.path.abspath(os.path.join('bios_extract', 'src', 'bios_extract'))): + print('bios_extract binary not found, did you compile it?', file=sys.stderr) + return 3 + + # Open devnull file for shell command output. + devnull = open(os.devnull, 'wb') + + # Recurse through directory numbers. + dir_number = 1 + while True: + dir_number_path = os.path.join(dir_path, str(dir_number)) + next_dir_number_path = os.path.join(dir_path, str(dir_number + 1)) + + # Fix permissions on extracted archives. + print('Fixing up directory {0}:'.format(dir_number), end=' ', flush=True) + try: + print('chown', end=' ', flush=True) + subprocess.run(['chown', '-hR', '--reference=' + dir_path, '--', dir_number_path], stdout=devnull, stderr=subprocess.STDOUT) + print('chmod', end=' ', flush=True) + subprocess.run(['chmod', '-R', 'u+rwx', '--', dir_number_path], stdout=devnull, stderr=subprocess.STDOUT) # execute for listing directories + except: + pass + print() + + # Start multiprocessing pool. + print('Starting extraction on directory {0}'.format(dir_number), end='', flush=True) + queue = multiprocessing.Queue(maxsize=MP_PROCESS_COUNT) + mp_pool = multiprocessing.Pool(MP_PROCESS_COUNT, initializer=extract_process, initargs=(queue, dir_number_path, next_dir_number_path)) + + # Create next directory. + if not os.path.isdir(next_dir_number_path): + os.makedirs(next_dir_number_path) + + # Scan directory structure. I really wanted this to have file-level + # granularity, but IntelExtractor and InterleaveBIOSExtractor + # both require directory-level granularity for inspecting other files. + print(flush=True) + found_any_files = False + for scan_dir_path, scan_dir_names, scan_file_names in os.walk(dir_number_path): + if len(scan_file_names) > 0: + found_any_files = True + queue.put((scan_dir_path, scan_file_names)) + + # Stop if no files are left. + if not found_any_files: + # Remove this directory and the directory if they're empty. + try: + os.rmdir(dir_number_path) + dir_number -= 1 + except: + pass + try: + os.rmdir(next_dir_number_path) + except: + pass + break + + # Increase number. + dir_number += 1 + + # Stop multiprocessing pool and wait for its workers to finish. + for _ in range(MP_PROCESS_COUNT): + queue.put(None) + mp_pool.close() + mp_pool.join() + + # Create 0 directory if it doesn't exist. + print('Merging directories:', end=' ') + merge_dest_path = os.path.join(dir_path, '0') + if not os.path.isdir(merge_dest_path): + os.makedirs(merge_dest_path) + + # Merge all directories into the 0 directory. + for merge_dir_name in range(1, dir_number + 1): + merge_dir_path = os.path.join(dir_path, str(merge_dir_name)) + if not os.path.isdir(merge_dir_path): + continue + print(merge_dir_name, end=' ') + + subprocess.run(['cp', '-rlaT', merge_dir_path, merge_dest_path], stdout=devnull, stderr=subprocess.STDOUT) + subprocess.Popen(['rm', '-rf', merge_dir_path], stdout=devnull, stderr=subprocess.STDOUT) + + # Clean up. + devnull.close() + print() + return 0 + + +# Analysis module. + +def analyze_dir(formatter, scan_base, file_analyzers, scan_dir_path, scan_file_names): + """Process a given directory for analysis.""" + + # Sort file names for better predictability. + scan_file_names.sort() + + # Set up caches. + files_flags = {} + files_data = {} + combined_oroms = [] + header_data = None + + # In combined mode (enabled by InterleaveExtractor and BIOSExtractor), we + # handle all files in the directory as a single large blob, to avoid any doubts. + combined = ':combined:' in scan_file_names + if combined: + files_data[''] = b'' + + # Read files into the cache. + cache_quota = ANALYZER_MAX_CACHE_MB * 1073741824 + for scan_file_name in scan_file_names: + # Skip known red herrings. This check is legacy code with an unknown impact. + scan_file_name_lower = scan_file_name.lower() + if 'post.string' in scan_file_name_lower or 'poststr.rom' in scan_file_name_lower: + continue + + # Read up to 16 MB as a safety net. + file_data = util.read_complement(os.path.join(scan_dir_path, scan_file_name)) + + # Write data to cache. + if scan_file_name == ':header:': + header_data = file_data + elif combined: + files_data[''] += file_data + + # Add PCI option ROM IDs extracted from AMI BIOSes by bios_extract, since the ROM might not + # contain a valid PCI header to begin with. (Apple PC Card with OPTi Viper and AMIBIOS 6) + match = re.match('''amipci_([0-9a-f]{4})_([0-9a-f]{4})\.rom$''', scan_file_name_lower) + if match: + combined_oroms.append((int(match.group(1), 16), int(match.group(2), 16))) + else: + files_data[scan_file_name] = file_data + + # Stop reading if the cache has gotten too big. + cache_quota -= len(file_data) + if cache_quota <= 0: + break + + # Prepare combined-mode analysis. + if combined: + # Set interleaved flag on de-interleaved blobs. + if scan_file_names == [':combined:', 'deinterleaved_a.bin', 'deinterleaved_b.bin', 'interleaved_a.bin', 'interleaved_b.bin']: + combined = 'interleaved' + + # Commit to only analyzing the large blob. + scan_file_names = [''] + elif header_data: + # Remove header flag file from list. + scan_file_names.remove(':header:') + + # Analyze each file. + for scan_file_name in scan_file_names: + # Read file from cache if possible. + scan_file_path = os.path.join(scan_dir_path, scan_file_name) + file_data = files_data.get(scan_file_name, None) + if file_data == None: + # Read up to 16 MB as a safety net. + file_data = util.read_complement(scan_file_path) + + # Check for an analyzer which can handle this file. + bonus_analyzer_addons = bonus_analyzer_oroms = None + file_analyzer = None + strings = None + for analyzer in file_analyzers: + # Reset this analyzer. + analyzer.reset() + analyzer._file_path = scan_file_path + + # Check if the analyzer can handle this file. + try: + analyzer_result = analyzer.can_handle(file_data, header_data) + except: + # Log an error. + util.log_traceback('searching for analyzers for', os.path.join(scan_dir_path, scan_file_name)) + continue + + # Move on if the analyzer responded negatively. + if not analyzer_result: + # Extract add-ons and option ROMs from the bonus analyzer. + if bonus_analyzer_addons == None: + bonus_analyzer_addons = analyzer.addons + bonus_analyzer_oroms = analyzer.oroms + continue + + # Run strings on the file data if required (only once). + if not strings: + try: + strings = subprocess.run(['strings', '-n8'], input=file_data, stdout=subprocess.PIPE).stdout.decode('ascii', 'ignore').split('\n') + except: + util.log_traceback('running strings on', os.path.join(scan_dir_path, scan_file_name)) + continue + + # Analyze each string. + try: + for string in strings: + analyzer.analyze_line(string) + except analyzers.AbortAnalysisError: + # Analysis aborted. + pass + except: + # Log an error. + util.log_traceback('analyzing', os.path.join(scan_dir_path, scan_file_name)) + continue + + # Take this analyzer if it produced a version. + if analyzer.version: + # Clean up version field if an unknown version was returned. + if analyzer.version == '?': + analyzer.version = '' + + # Stop looking for analyzers. + file_analyzer = analyzer + break + + # Did any analyzer successfully handle this file? + if not file_analyzer: + # Treat this as a standalone PCI option ROM file if BonusAnalyzer found any. + if bonus_analyzer_oroms: + bonus_analyzer_addons = [] + file_analyzer = file_analyzers[0] + else: + # Move on to the next file if nothing else. + continue + + # Add interleaved flag to add-ons. + if combined == 'interleaved': + bonus_analyzer_addons.append('Interleaved') + + # Clean up the file path. + scan_file_path_full = os.path.join(scan_dir_path, scan_file_name) + + # Remove combined directories. + found_flag_file = True + while found_flag_file: + # Find archive indicator. + archive_idx = scan_file_path_full.rfind(':' + os.sep) + if archive_idx == -1: + break + + # Check if a combined or header flag file exists. + found_flag_file = False + for flag_file in (':combined:', ':header:'): + if os.path.exists(os.path.join(scan_file_path_full[:archive_idx] + ':', flag_file)): + # Trim the directory off. + scan_file_path_full = scan_file_path_full[:archive_idx] + found_flag_file = True + break + + scan_file_path = scan_file_path_full[len(scan_base) + len(os.sep):] + + # Remove root extraction directory. + slash_index = scan_file_path.find(os.sep) + if slash_index == 1 and scan_file_path[0] == '0': + scan_file_path = scan_file_path[2:] + + # De-duplicate and sort add-ons and option ROMs. + addons = list(set(addon.strip() for addon in (analyzer.addons + bonus_analyzer_addons))) + addons.sort() + oroms = list(set(combined_oroms + analyzer.oroms + bonus_analyzer_oroms)) + oroms.sort() + + # Add names to option ROMs. + previous_vendor = previous_device = None + for x in range(len(oroms)): + # Get vendor and device IDs and names. + vendor_id, device_id = oroms[x] + vendor, device = util.get_pci_id(vendor_id, device_id) + + # Skip valid vendor IDs associated to a bogus device ID. + if device == '[Unknown]' and device_id == 0x0000: + oroms[x] = None + continue + + # Clean up IDs. + vendor = util.clean_vendor(vendor).strip() + device = util.clean_device(device, vendor).strip() + + # De-duplicate vendor names. + if vendor == previous_vendor and vendor != '[Unknown]': + if device == previous_device: + previous_device, device = device, '' + previous_vendor, vendor = vendor, '\u2196' # up-left arrow + else: + previous_device = device + previous_vendor, vendor = vendor, ' ' * len(vendor) + else: + previous_device = device + previous_vendor = vendor + + # Format string. + oroms[x] = '[{0:04x}:{1:04x}] {2} {3}'.format(vendor_id, device_id, vendor, device) + + # Remove bogus option ROM device ID entries. + while None in oroms: + oroms.remove(None) + + # Collect the analyzer's results. + fields = [((type(field) == str) and field.replace('\t', ' ').strip(' \n') or field) for field in [ + scan_file_path, + file_analyzer.vendor, + file_analyzer.version, + formatter.split_if_required('\n', file_analyzer.string), + formatter.split_if_required('\n', file_analyzer.signon), + formatter.join_if_required(' ', addons), + formatter.join_if_required('\n', oroms), + ]] + + # Output the results. + formatter.output_row(fields) + +def analyze_process(queue, formatter, scan_base): + """Main loop for the analysis multiprocessing pool.""" + + # Set up analyzers. + file_analyzers = [ + analyzers.BonusAnalyzer(), # must be the first one + analyzers.AwardPowerAnalyzer(), # must run before AwardAnalyzer + analyzers.ToshibaAnalyzer(), # must run before AwardAnalyzer + analyzers.AwardAnalyzer(), # must run before PhoenixAnalyzer + analyzers.QuadtelAnalyzer(), # must run before PhoenixAnalyzer + analyzers.PhoenixAnalyzer(), # must run before AMIDellAnalyzer and AMIIntelAnalyzer + #analyzers.AMIDellAnalyzer(), # must run before AMIAnalyzer + analyzers.AMIUEFIAnalyzer(), # must run before AMIAnalyzer + analyzers.AMIAnalyzer(), # must run before AMIIntelAnalyzer + analyzers.AMIIntelAnalyzer(), + analyzers.MRAnalyzer(), + # less common BIOSes with no dependencies on the common part begin here # + analyzers.AcerAnalyzer(), + analyzers.AmstradAnalyzer(), + analyzers.CDIAnalyzer(), + analyzers.CentralPointAnalyzer(), + analyzers.ChipsAnalyzer(), + analyzers.CommodoreAnalyzer(), + analyzers.CompaqAnalyzer(), + analyzers.CorebootAnalyzer(), + analyzers.DTKGoldStarAnalyzer(), + analyzers.GeneralSoftwareAnalyzer(), + analyzers.IBMAnalyzer(), + analyzers.InsydeAnalyzer(), + analyzers.IntelUEFIAnalyzer(), + analyzers.JukoAnalyzer(), + analyzers.MRAnalyzer(), + analyzers.OlivettiAnalyzer(), + analyzers.SchneiderAnalyzer(), + analyzers.SystemSoftAnalyzer(), + analyzers.TandonAnalyzer(), + analyzers.TinyBIOSAnalyzer(), + analyzers.WhizproAnalyzer(), + analyzers.ZenithAnalyzer(), + ] + + # Receive work from the queue. + while True: + item = queue.get() + if item == None: # special item to stop the loop + break + analyze_dir(formatter, scan_base, file_analyzers, *item) + +def analyze(dir_path, formatter_args, options): + """Main function for analysis.""" + + # Initialize output formatter. + output_formats = { + 'csv': (formatters.XSVFormatter, ','), + 'scsv': (formatters.XSVFormatter, ';'), + 'json': formatters.JSONObjectFormatter, + 'jsontable': formatters.JSONTableFormatter, + } + formatter = output_formats.get(options['format'], None) + if not formatter: + raise Exception('unknown output format ' + options['format']) + if type(formatter) == tuple: + formatter = formatter[0](*formatter[1:], sys.stdout, options, formatter_args) + else: + formatter = formatter(sys.stdout, options, formatter_args) + + # Begin output. + formatter.begin() + formatter.output_headers(['File', 'Vendor', 'Version', 'String', 'Sign-on', 'Add-ons', 'PCI ROMs'], options.get('headers')) + + # Remove any trailing slash from the root path, as the output path cleanup + # functions rely on it not being present. + if dir_path[-len(os.sep):] == os.sep: + dir_path = dir_path[:-len(os.sep)] + elif dir_path[-1:] == '/': + dir_path = dir_path[:-1] + + # Start multiprocessing pool. + queue = multiprocessing.Queue(maxsize=MP_PROCESS_COUNT) + mp_pool = multiprocessing.Pool(MP_PROCESS_COUNT, initializer=analyze_process, initargs=(queue, formatter, dir_path)) + + # Scan directory structure. + for scan_dir_path, scan_dir_names, scan_file_names in os.walk(dir_path): + queue.put((scan_dir_path, scan_file_names)) + + # Stop multiprocessing pool and wait for its workers to finish. + for _ in range(MP_PROCESS_COUNT): + queue.put(None) + mp_pool.close() + mp_pool.join() + + # End output. + formatter.end() + + return 0 + + +def main(): + mode = None + options = { + 'array': False, + 'format': 'csv', + 'headers': True, + 'hyperlink': False, + } + + args, remainder = getopt.getopt(sys.argv[1:], 'xaf:hnr', ['extract', 'analyze', 'format=', 'hyperlink', 'no-headers', 'array']) + for opt, arg in args: + if opt in ('-x', '--extract'): + mode = 'extract' + elif opt in ('-a', '--analyze'): + mode = 'analyze' + elif opt in ('-f', '--format'): + options['format'] = arg.lower() + elif opt in ('-h', '--hyperlink'): + options['hyperlink'] = True + elif opt in ('-n', '--no-headers'): + options['headers'] = False + elif opt in ('-r', '--array'): + options['array'] = True + + if len(remainder) > 0: + if mode == 'extract': + return extract(remainder[0], remainder[1:], options) + elif mode == 'analyze': + return analyze(remainder[0], remainder[1:], options) + + usage = ''' +Usage: python3 -m biostools -x directory + python3 -m biostools [-f output_format] [-h] [-n] [-r] -a directory [formatter_options] + + -x Extract archives and BIOS images recursively in the given directory + + -a Analyze extracted BIOS images in the given directory + -f Output format: + csv Comma-separated values with quotes (default) + scsv Semicolon-separated values with quotes + json JSON object array + jsontable JSON table + -h Generate download links for file paths representing HTTP URLs. + csv/scsv: The Excel HYPERLINK formula is used; if you have + non-English Excel, you must provide your language's + HYPERLINK formula name in formatter_options. + -n csv/scsv/jsontable: Don't output column headers. + -r json/jsontable: Output multi-value cells as arrays. +''' + print(usage, file=sys.stderr) + return 1 + +if __name__ == '__main__': + sys.exit(main()) diff --git a/biostools/analyzers.py b/biostools/analyzers.py new file mode 100644 index 0000000..3a630d4 --- /dev/null +++ b/biostools/analyzers.py @@ -0,0 +1,2320 @@ +#!/usr/bin/python3 +# +# 86Box A hypervisor and IBM PC system emulator that specializes in +# running old operating systems and software designed for IBM +# PC systems and compatibles from 1981 through fairly recent +# system designs based on the PCI bus. +# +# This file is part of the 86Box BIOS Tools distribution. +# +# BIOS image analyzer classes. +# +# +# +# Authors: RichardG, +# +# Copyright 2021 RichardG. +# +import codecs, re, struct, sys +from . import util + +class Checker: + def __init__(self, pattern, flags): + self.pattern = pattern + self.flags = flags + + def match(self, line): + raise NotImplementedError() + + +class AlwaysRunChecker(Checker): + def match(self, line): + return True + + +SUBSTRING_CASE_SENSITIVE = 0 +SUBSTRING_CASE_INSENSITIVE = 1 +SUBSTRING_FULL_STRING = 2 +SUBSTRING_BEGINNING = 4 + +class SubstringChecker(Checker): + def __init__(self, pattern, flags): + super().__init__(pattern, flags) + + if self.flags & SUBSTRING_CASE_INSENSITIVE: + self.pattern = self.pattern.lower() + + def match(self, line): + if self.flags & SUBSTRING_CASE_INSENSITIVE: + line = line.lower() + + if self.flags & SUBSTRING_FULL_STRING: + return self.pattern == line + elif self.flags & SUBSTRING_BEGINNING: + return self.pattern == line[:len(self.pattern)] + else: + index = line.find(self.pattern) + if index > -1: + return (index,) + else: + return False + + def __repr__(self): + return '{cls}({pattern}{case})'.format( + cls=self.__class__.__name__, + pattern=repr(self.pattern), + case=self.flags and ', case_insensitive' or '' + ) + + +class RegexChecker(Checker): + def __init__(self, pattern, flags): + super().__init__(pattern, flags) + + if pattern: + self.pattern = re.compile(pattern, flags=flags) + + if pattern[0:1] == '^': + pattern = pattern[1:] + self.re_func = self.pattern.match + else: + self.re_func = self.pattern.search + else: + self.pattern = None + self.re_func = self._dummy_always_false + + def _dummy_always_false(self, line): + return False + + def match(self, line): + return self.re_func(line) + + def __repr__(self): + return '{cls}({pattern}, {func})'.format( + cls=self.__class__.__name__, + pattern=self.pattern, + func=self.re_func == self.pattern.match and 'match' or 'search' + ) + + +class AbortAnalysisError(Exception): + pass + + +class Analyzer: + def __init__(self, vendor, debug=False): + self.vendor_id = self.vendor = vendor + self.debug = debug + + self._check_list = [] + + self.reset() + + def analyze_line(self, line): + """Analyze a string found on the given file.""" + for callback, checker in self._check_list: + if type(callback) == tuple: + pre_check_func, callback_func = callback + if not pre_check_func(line): + continue + else: + callback_func = callback + + checker_result = checker.match(line) + if checker_result: + callback_result = callback_func(line, checker_result) + if callback_result: + self.debug_print(callback_func.__name__, line) + return callback_result + + def can_handle(self, file_data, header_data): + """Returns True if this analyzer can handle the given file data. + header_data contains data from the :header: flag file, or + None if no such file exists.""" + return True + + def debug_print(self, key, line=None): + """Print a line containing analyzer state if debugging is enabled.""" + if self.debug: + print(self._file_path, '=> found', self.vendor_id, key, '=', (line == None) and 'no line' or repr(line), '\n', end='', file=sys.stderr) + + def register_check_list(self, check_list): + """Register the list of checks this analyzer will handle. + + This function accepts a list of tuples, each containing: + - callback or (pre-checker, callback) + - checker class + - checker flags (optional) + """ + for entry in check_list: + # Make a tuple out of a non-tuple. + if type(entry) != tuple: + entry = (entry,) + + # Extract parameters. + if len(entry) >= 3: + callback, checker_class, flags = entry + else: + callback, checker_class = entry + flags = 0 + + # Add to check list. + if type(callback) == tuple: + pattern = callback[1].__doc__ + else: + pattern = callback.__doc__ + self._check_list.append((callback, checker_class(pattern, flags))) + + def reset(self): + """Restore this analyzer to its initial state.""" + self.version = '' + self.string = '' + self.signon = '' + self.addons = [] + self.oroms = [] + + self._file_path = '?' + +class NoInfoAnalyzer(Analyzer): + """Special analyzer for BIOSes which can be identified, + but contain no information to be extracted.""" + def __init__(self, vendor, *args, **kwargs): + super().__init__(vendor, *args, **kwargs) + + def can_handle(self, file_data, header_data): + has_strings = self.has_strings(file_data) + + if has_strings: + self.version = '?' + + return has_strings + + def has_strings(self, file_data): + """Returns True if this analyzer can handle the given file data.""" + raise NotImplementedError() + + +class AcerAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('Acer', *args, **kwargs) + + self.register_check_list([ + (self._signon_486, RegexChecker), + ((self._version_precheck, self._version), RegexChecker), + (self._string, RegexChecker), + ]) + + def reset(self): + super().reset() + self._cpus = [] + self._trap_version = False + + def can_handle(self, file_data, header_data): + return b'Copyright (C) Acer Incorporated 1990' in file_data or b'Acer Boot Block v1.0' in file_data + + def _version_precheck(self, line): + return self._trap_version + + def _version_r(self, line, match): + '''^R([0-9])\.([0-9])''' + + # Extract version. + self.version = match.group(0) + + return True + + def _version(self, line, match): + '''V([0-9])\.([0-9])''' + + # Extract version. + self.version = match.group(0) + + return True + + def _signon_486(self, line, match): + '''^(?:((?:PCI/)?(?:E)?ISA) )?(.+) BIOS $''' + + # Stop if the CPU is invalid. + cpu = match.group(2) + if cpu in ('E)', 'AM') or 'SCSI' in cpu or '(tm)' in cpu: + # "E)", "SCSI" (V55LA-2 R03-B1S0) + # "(tm)" (Fortress 1100) + # "AM" (V66LT) + return False + + # Add CPU to the sign-on if it wasn't already seen. + if cpu not in self._cpus: + self._cpus.append(cpu) + linebreak_index = self.signon.find('\n') + if linebreak_index > -1: + first_signon_line = self.signon[:linebreak_index] + else: + first_signon_line = self.signon + + if first_signon_line: + first_signon_line += '/' + first_signon_line += cpu + + if linebreak_index > -1: + self.signon = first_signon_line + self.signon[linebreak_index:] + else: + self.signon = first_signon_line + + # Add any prefix to the sign-on. + prefix = match.group(1) + if prefix and self.signon[:len(prefix) + 1] != (prefix + ' '): + self.signon = prefix + ' ' + self.signon + + # Read revision on the next non-string line. + self._trap_version = True + + return True + + def _string(self, line, match): + '''([A-Z]{3}[A-Z0-9]{3}00-[A-Z0-9]{3}-[0-9]{6}-[^\s]+)(?:\s+(.+))?''' + + # Extract string. + self.string = match.group(1) + + # Extract sign-on if present. + signon = match.group(2) + if signon: + if self.signon: + self.signon += '\n' + self.signon = signon + + # Read version on the next line. + self._trap_version = True + + return True + + +class AMIAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('AMI', *args, **kwargs) + + self._date_pattern = re.compile(b'''([0-9]{2}/[0-9]{2}/[0-9]{2})[^0-9]''') + self._uefi_csm_pattern = re.compile('''63-0100-000001-00101111-......-Chipset$''') + self._intel_86_pattern = re.compile('''(?:[0-9A-Z]{8})\.86(?:[0-9A-Z])\.(?:[0-9A-Z]{4})\.(?:[0-9A-Z]{3})\.(?:[0-9]{10})$''') + # The "All Rights Reserved" is important to not catch the same header on other files. + # AMIBIOS 6+ version corner cases: + # - Second digit not 0 (I forget which one had 000000) + # - Can be 4-digit instead of 6-digit (Biostar) + self._id_block_pattern = re.compile(b'''(?:AMIBIOS (?:(0[1-9][0-9]{2}[\\x00-\\xFF]{2})[\\x00-\\xFF]{2}|W ([0-9]{2}) ([0-9]{2})[\\x00-\\xFF])|0123AAAAMMMMIIII|\(AAMMIIBBIIOOSS\))([0-9]{2}/[0-9]{2}/[0-9]{2})\(C\)[0-9]{4} American Megatrends,? Inc(?:\.,? All Rights Reserved|/Hewlett-Packard Company)''') + # Weird TGem identifier (TriGem 486-BIOS) + self._precolor_block_pattern = re.compile(b'''\(C\)[0-9]{4}(?:AMI,404-263-8181|TGem-HCS,PSC,JGS)''') + # "Date:-" might not have a space after it (Intel AMI) + self._precolor_date_pattern = re.compile(b'''(?: Date:- ?|AMI- )[0-9]{2}/[0-9]{2}/[0-9]{2}''') + self._precolor_chipset_pattern = re.compile(b'''(SETUP PROGRAM FOR [\\x20-\\x7F]+)|(EMI 386 CHIPSET SETUP UTILITY)|(VLSI BIOS, 286 CHIPSET)|(CHIP & TECH SETUP PROGRAM)|( 286 BIOS)|(386 BIOS, NO CHIPSET)|([234]86-BIOS \(C\))''') + self._precolor_signon_pattern = re.compile(b'''BIOS \(C\).*(?:AMI|American Megatrends Inc), for ([\\x0D\\x0A\\x20-\\x7E]+)''') + + self.register_check_list([ + (self._string_pcchips, RegexChecker), + (self._string_setupheader, RegexChecker), + (self._signon_intel, RegexChecker), + (self._addons_color, SubstringChecker, SUBSTRING_FULL_STRING | SUBSTRING_CASE_SENSITIVE), + (self._addons_easy, SubstringChecker, SUBSTRING_BEGINNING | SUBSTRING_CASE_SENSITIVE), + (self._addons_hiflex, SubstringChecker, SUBSTRING_FULL_STRING | SUBSTRING_CASE_SENSITIVE), + (self._addons_new, SubstringChecker, SUBSTRING_BEGINNING | SUBSTRING_CASE_SENSITIVE), + (self._addons_simple, SubstringChecker, SUBSTRING_BEGINNING | SUBSTRING_CASE_SENSITIVE), + (self._addons_winbios, SubstringChecker, SUBSTRING_CASE_SENSITIVE), + ]) + + def can_handle(self, file_data, header_data): + if b'American Megatrends Inc' not in file_data and b'AMIBIOSC' not in file_data and b'All Rights Reserved, (C)AMI (C)AMI (C)AMI ' not in file_data and b'(C) Access Methods Inc.' not in file_data: + return False + + # The decompressed body for some BIOSes on Intel's first AMI run lacks the Intel version number, so we + # can't determine this is an Intel first AMI run BIOS (which needs a classic date version) solely through + # that. Use AMIIntelAnalyzer to determine if this is an Intel BIOS, and enable classic dates in that case. + if header_data and AMIIntelAnalyzer.can_handle(self, file_data, header_data): + self._can_version_classic = True + + # Check post-Color identification block. + match = self._id_block_pattern.search(file_data) + if match: + # Determine location of the identification block. + id_block_index = match.start(0) + + # Extract version. + version_6plus = match.group(1) + if version_6plus: + # AMIBIOS 6 onwards. + self.version = version_6plus.decode('cp437', 'ignore') + + # Pad 4-digit versions. (Biostar) + if self.version[-1] not in '0123456789': + self.version = self.version[:4] + '00' + else: + # WinBIOS (AMIBIOS 4/5) + version_winbios_maj = match.group(2) + version_winbios_min = match.group(3) + if version_winbios_maj and version_winbios_min: + self.version = (version_winbios_maj + version_winbios_min).decode('cp437', 'ignore') + '00' + self.addons.append('WinBIOS') + else: + # AMI Color date. + self.version = match.group(4).decode('cp437', 'ignore') + + # Extract string. + self.string = util.read_string(file_data[id_block_index + 0x78:id_block_index + 0xa0]).strip() + + # Stop if this BIOS is actually Aptio UEFI CSM. + if self._uefi_csm_pattern.match(self.string): + return False + + # Extract sign-on, while removing carriage returns. + self.signon = util.read_string(file_data[id_block_index + 0x100:id_block_index + 0x200]) + + # The actual sign-on starts on the second line. + self.signon = '\n'.join(x.rstrip('\r').strip() for x in self.signon.split('\n')[1:] if x != '\r').strip('\n') + elif len(file_data) < 1024: + # Ignore false positives from sannata readmes. + return False + elif self._precolor_date_pattern.search(file_data): + # Check date, using a different pattern to differentiate core date from build date. + match = self._date_pattern.search(file_data) + if match: + # Extract date as the version. + self.version = match.group(1).decode('cp437', 'ignore') + + # Check pre-Color identification block. + match = self._precolor_block_pattern.search(file_data) + if match: + # Determine location of the identification block. + id_block_index = match.start(0) + + # Reconstruct string, starting with the setup type. + if b'ROM DIAGNOSTICS.(C)' in file_data: + self.string = 'D' + elif b'EXTENDED CMOS SETUP PROGRAM Ver - ' in file_data: + self.string = 'E' + else: + self.string = 'S' + + # Add chipset. Known undetectable codes due + # to a lack of BIOS images or marker strings: + # - 307 (C&T CS8236) + # - GS2 (GoldStar) + # - INT (Intel 82335) + # - PAQ (Compaq) + # - S24 (??? Morse KP286) + # - SUN (Suntac) + # - VLX (VLSI 386?) + chipset = '???' + match = self._precolor_chipset_pattern.search(file_data) + if match: + setup_program_for = match.group(1) # "SETUP PROGRAM FOR" + if setup_program_for: + #if b' C&T ' in setup_program_for: # not necessary with fallback below + # chipset = 'C&T' + if b' INTEL 386 ' in setup_program_for: + chipset = '343' + elif b' NEAT ' in setup_program_for: + if b'NEATsx Memory Controller Identifier' in file_data: + chipset = 'NSX' + else: + chipset = 'NET' + elif b' OPTI ' in setup_program_for: + chipset = 'OPB' + elif b' SCAT ' in setup_program_for: + chipset = 'SC2' + elif b' SIS ' in setup_program_for: + chipset = 'SIS' + else: + # Your guess is as good as mine. + chipset = setup_program_for[18:21].decode('cp437', 'ignore') + if chipset != 'C&T': + self.signon = 'DEBUG:UnknownSetup:' + setup_program_for.decode('cp437', 'ignore') + else: + bios_id_index = match.start(5) # " 286 BIOS" + if bios_id_index > -1: + bios_id = file_data[bios_id_index - 10:bios_id_index + 1] + if b'ACER 1207 ' in bios_id: + chipset = 'AR2' + elif b'HT-11 ' in bios_id: + chipset = 'H12' + elif b'HT-1X ' in bios_id: + chipset = 'H1X' + elif b'NEAT ' in bios_id: # assumed; not bootable on 86Box + chipset = 'NET' + elif b'WIN ' in bios_id: # Winbond; not bootable on 86Box, source is a MAME comment + chipset = '286' + else: + self.signon = 'DEBUG:UnknownChipset:' + bios_id.decode('cp437', 'ignore') + elif match.group(2): # "EMI 386 CHIPSET SETUP UTILITY" + chipset = 'EMI' + elif match.group(3): # "VLSI BIOS, 286 CHIPSET" + chipset = 'VL2' + elif match.group(4): # "CHIP & TECH SETUP PROGRAM" + chipset = 'C&T' + elif match.group(6): # "386 BIOS, NO CHIPSET" + chipset = 'INT' + else: + x86_bios = match.group(7) # "[234]86-BIOS (C)" + if x86_bios: + chipset = x86_bios[:3].decode('cp437', 'ignore') + self.string += chipset + + # Add vendor ID. + self.string += '-' + codecs.encode(file_data[id_block_index - 0xbb:id_block_index - 0xb9], 'hex').decode('ascii', 'ignore').upper() + + # Add date. Use the entry point date instead of the identification block one, as it + # appears the entry point one is displayed on screen. (Shuttle 386SX, TriGem 486-BIOS) + self.string += '-' + util.read_string(file_data[id_block_index + 0x9c:id_block_index + 0xa4]).replace('/', '').strip() + + # Invalidate string if the identification block + # doesn't appear to be valid. (Intel AMI post-Color) + if self.string[:10] in ('S???-0000-', 'S???-0166-') and file_data[id_block_index - 0xb9:id_block_index - 0xb7] != b'\x00\x01': + self.string = '' + else: + # Extract additional information after the copyright as a sign-on. + # (Shuttle 386SX, CDTEK 286, Flying Triumph Access Methods) + match = self._precolor_signon_pattern.search(file_data) + if match: + self.signon = match.group(1).decode('cp437', 'ignore') + + # Split sign-on lines. (Video Technology Info-Tech 286-BIOS) + self.signon = '\n'.join(x.strip() for x in self.signon.split('\n') if x.strip()).strip('\n') + else: + # Assume this is not an AMI BIOS. + return False + + return True + + def _string_pcchips(self, line, match): + '''ADVANCED SYSTEM SETUP UTILITY VERSION.+PC CHIPS INC''' + + # This is an early PC Chips BIOS. + if not self.string: + self.string = 'PC Chips' + + return True + + def _string_setupheader(self, line, match): + '''[a-z][0-9/]+([^\(]*(SETUP PROGRAM FOR | SETUP UTILITY)[^\(]*)\(C\)19''' + + # Extract the setup header as a string if none was already found. + if not self.string: + self.string = match.group(1).replace(match.group(2), '') + + return True + + def _signon_intel(self, line, match): + '''^(?:(BIOS (?:Release|Version) )?([0-9]\.[0-9]{2}\.[0-9]{2}\.[A-Z][0-9A-Z]{1,})|(?:\$IBIOSI\$)?([0-9A-Z]{8}\.([0-9A-Z]{3})\.[0-9A-Z]{4}\.[0-9A-Z]{3}\.[0-9]{10}|(?:\.[0-9]{4}){3}))''' + + # If this is Intel's second AMI run, check if this is not a generic + # (86x) version string overwriting an OEM version string. + oem = match.group(4) + if not oem or oem[:2] != '86' or not self._intel_86_pattern.match(self.signon): + # Extract the version string as a sign-on. + prefix_idx = self.signon.rfind(' ') + if prefix_idx > -1: + prefix = self.signon[:prefix_idx + 1] + else: + prefix = match.group(1) or '' + self.signon = prefix + (match.group(2) or match.group(3)) + + return True + + def _addons_color(self, line, match): + '''Improper Use of Setup may Cause Problems !!''' + + # Add setup type to add-ons. + self.addons.append('Color') + + return True + + def _addons_easy(self, line, match): + '''AMIBIOS EASY SETUP UTILIT''' + + # Add setup type to add-ons. + self.addons.append('EasySetup') + + return True + + def _addons_hiflex(self, line, match): + '''\\HAMIBIOS HIFLEX SETUP UTILIT''' + + # Add setup type to add-ons. + self.addons.append('HiFlex') + + return True + + def _addons_new(self, line, match): + '''AMIBIOS NEW SETUP UTILIT''' + + # Add setup type to add-ons. + self.addons.append('NewSetup') + + return True + + def _addons_simple(self, line, match): + '''\\HAMIBIOS SIMPLE SETUP UTILIT''' + + # Add setup type to add-ons. + self.addons.append('SimpleSetup') + + return True + + def _addons_winbios(self, line, match): + '''Keystroke/Mouse Convention''' + + # Add setup type to add-ons. + self.addons.append('WinBIOS') + + return True + + +class AMIDellAnalyzer(AMIAnalyzer): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.vendor_id = 'AMIDell' + + self.register_check_list([ + (self._version_dell, RegexChecker), + ]) + + def reset(self): + super().reset() + self._trap_signon_lines = 0 + + def can_handle(self, file_data, header_data): + if file_data[:9] == b'DELLBIOS\x00': + # DELLBIOS header contains the Dell version. + self.version = '11/11/92' + self.debug_print('DELLBIOS header') + + # Extract the version as a sign-on. + terminator_index = file_data.find(b'\x00', 10) + if terminator_index > -1: + self.signon = file_data[10:terminator_index].decode('ascii', 'ignore').strip() + if self.signon: + self.signon = 'BIOS Version ' + self.signon + + return True + elif b'DELLXBIOS' in file_data and not re.search( + b'''att6300plus|''' + b'''Flash BIOS Update Program - Version |''' + b'''Technologies Ltd|''' + b'''Western Digital 32-bit disk driver \(WDCDRV\)''', + file_data): + # "att6300plus" (HIMEM.SYS) + # "Flash BIOS Update Program - Version " (FLASH.EXE) + # Substring of "Phoenix Technologies Ltd" (4xxT/M/L) + # "Western Digital 32-bit disk driver (WDCDRV)" (WDCDRV.386) + + # The Dell version will be in the BIOS body. + self.version = '11/11/92' + self.debug_print('DELLXBIOS string') + + return True + + return False + + def _version_intel(self, line, match): + # Prevent the Intel version detector from working here. + return False + + def _version_dell(self, line, match): + '''^BIOS Version (.+)''' + + # Extract both Dell and Intel version numbers as a sign-on. + version = match.group(1).strip() + if version[1:2] == '.': + # Intel version on second line. + linebreak_index = self.signon.find('\n') + if linebreak_index > -1: + self.signon = self.signon[:linebreak_index] + self.signon = self.signon.rstrip() + '\n' + version + else: + # Dell version. + self.signon = match.group(0).rstrip() + '\n' + self.signon.lstrip() + + return True + + def _string_main(self, line, match): + # Prevent the AMI string detector from working here. + return False + + def _signon_trigger(self, line, match): + '''^DELLXBIOS$''' + + # Read sign-on on the next few lines. + self._trap_signon_lines = 1 + + return True + + def _signon_line(self, line, match): + self._trap_signon_lines += 1 + if self._trap_signon_lines == 4: + # Extract the sign-on as a string, and disarm the trap. + self.string = line.strip() + if self.string[:5] == 'Dell ': + self.string = self.string[5:] + self._trap_signon_lines = 0 + + return True + + +class AMIIntelAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('AMI', *args, **kwargs) + self.vendor_id = 'AMIIntel' + + def can_handle(self, file_data, header_data): + # Handle Intel AMI BIOSes that could not be decompressed. + + # Stop if there is no header data. + if not header_data: + return False + + # Stop if this is an User Data Area file. + if header_data[112:126] == b'User Data Area': + return False + + # Extract the Intel version from the multi-part header. + if header_data[90:95] == b'FLASH': + version = header_data[112:header_data.find(b'\x00', 112)] + elif header_data[602:607] == b'FLASH': + version = header_data[624:header_data.find(b'\x00', 624)] + else: + version = None + + # Apply the version as a sign-on if one was extracted. + if version: + self.version = 'Unknown Intel' + self.signon = version.decode('cp437', 'ignore').strip() + return True + + return False + + +class AMIUEFIAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('AMI', *args, **kwargs) + self.vendor_id = 'AMIUEFI' + + self._identifier_regex = re.compile(b'''Version %x\.%02x\.%04x\.|ALASKAA M I''') + + self.register_check_list([ + (self._string_csm, RegexChecker), + ((self._signon_precheck, self._signon), AlwaysRunChecker), + (self._signon_trigger, RegexChecker), + (self._signon_asus, RegexChecker), + (self._signon_prefixed, RegexChecker), + ]) + + def reset(self): + super().reset() + self._trap_signon = False + + def _signon_precheck(self, line): + return self._trap_signon + + def can_handle(self, file_data, header_data): + # Only handle files sent through UEFIExtractor. + if header_data != b'\x00\xFFUEFIExtract\xFF\x00': + return False + + # Check for version format string or "ALASKA" ACPI table identifier. + if not self._identifier_regex.search(file_data): + return False + + self.version = 'UEFI' + + return True + + def _string_csm(self, line, match): + '''^63-0100-000001-00101111-......-Chipset$''' + + # Extract string from the AMIBIOS 8-based CSM, just because. + self.string = line + + return True + + def _signon_trigger(self, line, match): + '''^Version %x\.%02x\.%04x. Copyright \(C\)''' + + # Read sign-on on the next line if one wasn't already found. + if not self.signon: + self._trap_signon = True + + return True + + def _signon(self, line, match): + # Extract sign-on. + self.signon = line + + # Disarm trap. + self._trap_signon = False + + return True + + def _signon_asus(self, line, match): + '''. ACPI BIOS Revision .''' + + # Extract sign-on. + self.signon = line + + return True + + def _signon_prefixed(self, line, match): + '''^\$(?:(?:IBIOSI\$|UBI)([0-9A-Z]{8}\.[0-9A-Z]{3}(?:\.[0-9]{4}){4})|MSESGN\$(.+))''' + # "$IBIOSI$", "$UBI" (Intel) + # "$MSESGN$" (MSI) + + # Extract sign-on. + self.signon = match.group(1) or match.group(2) + + return True + + +class AmstradAnalyzer(NoInfoAnalyzer): + def __init__(self, *args, **kwargs): + super().__init__('Amstrad', *args, **kwargs) + + def has_strings(self, file_data): + return (b'AMSTRAD plc' in file_data or b'Amstrad Consumer Electronics plc' in file_data) and ((b'Veuillez mettre des piles neuves' in file_data and b'Batterie da sostituire' in file_data and b'ponga piles nuevas' in file_data and b'neue Batterien einsetzen' in file_data) or b'IBMUS NON CARBORUNDUM' in file_data) + + +class AwardAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('Award', *args, **kwargs) + + self._early_modular_prefix_pattern = re.compile('''(.+) Modular BIOS ''') + self._gigabyte_bif_pattern = re.compile(b'''\$BIF[\\x00-\\xFF]{5}([\\x20-\\x7F]+)\\x00.([\\x20-\\x7F]+)\\x00''') + self._gigabyte_eval_pattern = re.compile('''\([a-z0-9]{1,8}\) EVALUATION ROM - NOT FOR SALE$''') + self._id_block_pattern = re.compile(b'''(?:Award |Phoeni)[\\x00-\\xFF]{8}IBM COMPATIBLE ''') + self._ignore_pattern = re.compile(b'search=f000,0,ffff,S,"|VGA BIOS Version (?:[^\r]+)\r\n(?:Copyright \(c\) (?:[^\r]+)\r\n)?Copyright \(c\) (?:NCR \& )?Award', re.M) + self._romby_date_pattern = re.compile(b'''N((?:[0-9]{2})/(?:[0-9]{2})/)([0-9]{2})([0-9]{2})(\\1\\3)''') + self._string_date_pattern = re.compile('''(?:[0-9]{2})/(?:[0-9]{2})/([0-9]{2,4})-''') + self._version_pattern = re.compile(''' (?:v([^\s]+)|Version [^0-9]*([0-9]\.[0-9]{2}))(?:[. ]([\\x20-\\x7F]+))?''') + + self.register_check_list([ + (self._version_ast, RegexChecker), + (self._version_pcxt, RegexChecker), + (self._addons_uefi, SubstringChecker, SUBSTRING_FULL_STRING | SUBSTRING_CASE_SENSITIVE), + ]) + + def can_handle(self, file_data, header_data): + if b'Award Software Inc.' not in file_data and b'Award Decompression Bios' not in file_data: + return False + + # Skip Windows 95 INF updates and Award VBIOS. + if self._ignore_pattern.search(file_data): + return False + + # The bulk of Award identification data has remained in one place for the longest time. + match = self._id_block_pattern.search(file_data) + if match: + # Determine location of the identification block. + id_block_index = match.start(0) + + # Extract version. + version_string = util.read_string(file_data[id_block_index + 0x61:id_block_index + 0xa1]) + version_match = self._version_pattern.search(version_string) + if version_match: + self.version = 'v' + (version_match.group(1) or version_match.group(2)) + elif version_string == 'Award Modular BIOS Version ': # Award version removed (Intel YM430TX) + self.version = 'Intel' + + # Add Phoenix-Award and WorkstationBIOS indicators. + if 'Phoenix' in version_string: + self.version += ' (Phoenix)' + elif 'WorkstationBIOS' in version_string: + self.version += ' (Workstation)' + + # Extract sign-on. + # Vertical tab characters may be employed (??? reported by BurnedPinguin) + self.signon = util.read_string(file_data[id_block_index + 0xc1:id_block_index + 0x10f]).replace('\r', '').replace('\v', '\n') + + # Split sign-on lines. + self.signon = '\n'.join(x.strip() for x in self.signon.split('\n') if x.strip()).strip('\n') + + # Extract string, unless the version is known to be too old to have a string. + if self.version[:3] not in ('v2.', 'v3.'): + self.string = util.read_string(file_data[id_block_index + 0xc71:id_block_index + 0xce0]) + + # bp/rom.by patches may include a new date in the "modul.tmp" + # patch code. If one is present, apply it to the string. + match = self._romby_date_pattern.search(file_data) + if match: + date_match = self._string_date_pattern.match(self.string) + if date_match: + # Apply the correct date (2-digit or 4-digit year). + if len(date_match.group(1)) == 2: + date = match.group(4) + else: + date = match.group(1) + match.group(2) + match.group(3) + date = date.decode('cp437', 'ignore') + self.string = date + self.string[len(date):] + + if self.version == 'v6.00PG' and self._gigabyte_eval_pattern.match(self.signon): + # Reconstruct actual sign-on of a Gigabyte fork BIOS through + # the data in the $BIF area (presumably BIOS update data). + match = self._gigabyte_bif_pattern.search(file_data) + if match: + self.signon = (match.group(1) + b' ' + match.group(2)).decode('cp437', 'ignore').strip() + elif 'Award' not in version_string.split('\n')[0]: # "386SX Modular BIOS v3.15" + # Extract early Modular type as the string. + match = self._early_modular_prefix_pattern.match(version_string) + if match: + self.string = match.group(1) + + # Append post-version data to the string. + if version_match: + post_version = version_match.group(3) + if post_version: + post_version = post_version.strip() + if post_version: + if match: + self.string += '\n' + post_version + else: + self.string = post_version + + # Perform final clean-up. + self.version = self.version.strip() + self.string = self.string.strip() + self.signon = self.signon.strip() + + return True + + def _version_ast(self, line, match): + '''^.AST ((?:.+) BIOS Rel\. (?:.+))''' + + # This is an AST BIOS. + self.version = 'AST' + + # Extract model and version as a sign-on. + self.signon = match.group(1) + + return True + + def _version_pcxt(self, line, match): + '''(PC|XT) BIOS V([^\s]+)''' + + # Extract version if one wasn't already found. + if not self.version: + self.version = 'v' + match.group(2) + + # Extract BIOS type as a string. + self.string = match.group(1) + + return True + + def _addons_uefi(self, line, match): + '''EFI CD/DVD Boot Option''' + + # Flag Gigabyte Hybrid EFI as UEFI. + self.addons.append('UEFI') + + return True + + +class AwardPowerAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('AwardPower', *args, **kwargs) + self.vendor = 'Award' + + self.register_check_list([ + (self._version, RegexChecker), + (self._string, RegexChecker) + ]) + + def can_handle(self, file_data, header_data): + if b'PowerBIOS Setup' not in file_data or b'Award Software International, Inc.' not in file_data: + return False + + # Identify as PowerBIOS. + self.version = 'PowerBIOS' + + return True + + def _version(self, line, match): + '''PowerBIOS Version (.+)''' + + # Add version number if there isn't one already. + if ' ' not in self.version: + self.version += ' ' + match.group(1).lstrip() + return True + + return False + + def _string(self, line, match): + '''-3[12357ABCDE]([A-Z0-9]{6})''' + + # PowerBIOS strings are quite fragmented. Just use the whole line with + # some stripping applied to it. + self.string = line.strip(' -') + + +class BonusAnalyzer(Analyzer): + """Special analyzer for ACPI tables and option ROMs.""" + + def __init__(self, *args, **kwargs): + super().__init__('', *args, **kwargs) + self._pci_ids = {} + + self._acpi_table_pattern = re.compile(b'''(?:APIC|DSDT|FACP|PSDT|RSDT|SBST|SSDT)(.{4}).{24}[\\x00\\x20-\\x7E]{4}''') + self._adaptec_pattern = re.compile(b'''Adaptec (?:BIOS:|([\\x20-\\x7E]+) BIOS )''') + self._ncr_pattern = re.compile(b''' SDMS \(TM\) V([0-9])''') + self._pci_rom_pattern = re.compile(b'''\\x55\\xAA[^\\x00].{21}(.{2})''') + self._phoenixnet_patterns = ( + re.compile(b'''CPLRESELLERID'''), + re.compile(b'''BINCPUTBL'''), + re.compile(b'''BINIDETBL'''), + ) + self._pxe_patterns = ( + re.compile(b'''PXE-M0F: Exiting '''), + re.compile(b'''PXE-EC6: UNDI driver image is invalid\.'''), + ) + self._rpl_pattern = re.compile(b'''NetWare Ready ROM''') + self._sli_pattern = re.compile(b'''[0-9]{12}Genuine NVIDIA Certified SLI Ready Motherboard for ''') + self._vbios_pattern = re.compile(b'''IBM (?:VGA C(?:OMPATIBLE|ompatible)|COMPATIBLE PARADISE)|ATI Technologies Inc\.|SiS super VGA chip''') + + def can_handle(self, file_data, header_data): + # PhoenixNet + if util.all_match(self._phoenixnet_patterns, file_data): + self.addons.append('PhoenixNet') + + # ACPI tables + match = self._acpi_table_pattern.search(file_data) + if match and struct.unpack(' 36: # length includes header, header is 36 bytes + self.addons.append('ACPI') + + # Adaptec SCSI + if self._adaptec_pattern.search(file_data): + self.addons.append('Adaptec') + + # NCR SCSI + match = self._ncr_pattern.search(file_data) + if match: + self.addons.append('NCR' + match.group(1).decode('ascii', 'ignore')) + + # PXE boot + if util.all_match(self._pxe_patterns, file_data): + self.addons.append('PXE') + + # RPL boot + if self._rpl_pattern.search(file_data): + self.addons.append('RPL') + + # SLI certificate + if self._sli_pattern.search(file_data): + self.addons.append('SLI') + + # UEFI + if header_data == b'\x00\xFFUEFIExtract\xFF\x00': + self.addons.append('UEFI') + + # VGA BIOS + if self._vbios_pattern.search(file_data): + self.addons.append('VGA') + + # Look for PCI option ROMs. + for match in self._pci_rom_pattern.finditer(file_data): + # Move on to the next ROM if we don't have a valid PCI data structure. + pci_header_ptr, = struct.unpack(' -1: + self.string = self.version[dash_index + 1:] + self.version = self.version[:dash_index] + + return True + + def _version_linuxbios(self, line, match): + '''^LinuxBIOS-([^_ ]+)[_ ](?:Normal |Fallback )(.+) starting\.\.\.$''' + + # Set vendor to LinuxBIOS instead. + self.vendor = 'LinuxBIOS' + + # Extract version. + self.version = match.group(1) + + # Extract any additional information after the version as a string. + self.string = match.group(2) + + return True + + def _string_coreboot(self, line, match): + '''^#define COREBOOT_BUILD "([^"]+)"''' + + # Add build date to string. + if self.string: + self.string += '\n' + self.string += match.group(1) + + return True + + +class DTKGoldStarAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('DTKGoldStar', *args, **kwargs) + + self.register_check_list([ + (self._version, RegexChecker), + ]) + + def reset(self): + super().reset() + self._dtk = False + + def can_handle(self, file_data, header_data): + return b'Datatech Enterprises Co., Ltd.' in file_data or b'(C) Copyright by GoldStar Co.,Ltd.' in file_data or b'GOLDSTAR SYSTEM SETUP' in file_data + + def _version(self, line, match): + '''^(?:(DTK|GoldStar) (.+) ROM BIOS Version |VER )([^\s]+)(?: ([^\s]+))?''' + + # Extract vendor. + self.vendor = match.group(1) or 'GoldStar' + + # Extract version. + self.version = match.group(3) + + # Extract string. + self.string = match.group(2) or '' + + # Add revision to string. + revision = match.group(4) + if revision: + if self.string: + self.string += '\n' + self.string += revision + + return True + + +class GeneralSoftwareAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('General', *args, **kwargs) + + self._string_pattern = re.compile(b'''([0-9]{2}/[0-9]{2}/[0-9]{2})\(C\) [0-9]+ General Software, Inc\. ''') + self._version_pattern = re.compile(b'''General Software (.+)(?: \(tm\))? (?:rel\.|Revision)''') + + def can_handle(self, file_data, header_data): + # Extract version. + match = self._version_pattern.search(file_data) + if match: + self.version = match.group(1).decode('cp437', 'ignore').replace(' BIOS ', '').strip() + else: + self.version = '?' + + # Extract date and revision as a string. + match = self._string_pattern.search(file_data) + if match: + end = match.end(0) + self.string = util.read_string(file_data[end:end + 256]) + '\n' + match.group(1).decode('cp437', 'ignore') + + # Take this analyzer if we found a version or string. + return self.version != '?' or self.string + + +class IBMAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('IBM', *args, **kwargs) + + self._header_pattern = re.compile(b'''([0-9]{2}[A-Z0-9][0-9]{4}) (COPR\. IBM|\(C\) COPYRIGHT IBM CORPORATION) 19[89][0-9]''') + self._interleaved_header_pattern = re.compile(b'''(([0-9])\\2([0-9])\\3([A-Z0-9])\\4(?:[0-9]{8})) (CCOOPPRR\.\. IIBBMM|\(\(CC\)\) CCOOPPYYRRIIGGHHTT IIBBMM CCOORRPPOORRAATTIIOONN) 1199([89])\\6([0-9])\\7''') + + def can_handle(self, file_data, header_data): + # Extract IBM part number/copyright headers. + part_numbers = [] + copyrights = [] + for part_number, copyright in self._header_pattern.findall(file_data): + part_numbers.append(part_number) + copyrights.append(copyright) + + # Deinterleave interleaved headers. + for part_number, _, _, _, copyright, _, _ in self._interleaved_header_pattern.findall(file_data): + part_numbers.append(part_number[::2]) + part_numbers.append(part_number[1::2]) + copyrights.append(copyright[::2]) + copyrights.append(copyright[1::2]) + + # Do we have any part numbers? + if part_numbers: + # Assume long-form copyright indicates a PS/2. + if b'(C) COPYRIGHT IBM CORPORATION' in copyrights: + self.version = 'PS/2 or PS/1' + else: + self.version = 'PC series' + + # Sort FRU codes and remove duplicates. + part_numbers = list(set(part_number.decode('ascii', 'ignore') for part_number in part_numbers)) + part_numbers.sort() + + # Extract FRU codes as a string. + self.string = '\n'.join(part_numbers) + + return True + else: + return False + + +class InsydeAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('Insyde', *args, **kwargs) + + self._identifier_regex = re.compile(b'''InsydeH2O Version ''') + + def can_handle(self, file_data, header_data): + # Only handle files sent through UEFIExtractor. + if header_data != b'\x00\xFFUEFIExtract\xFF\x00': + return False + + # Check for InsydeH2O version string. + if not self._identifier_regex.search(file_data): + return False + + self.version = '?' + + return True + + +class IntelUEFIAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('Intel', *args, **kwargs) + self.vendor_id = 'IntelUEFI' + + self._identifier_regex = re.compile(b'''\$(?:IBIOSI\$|FID)[0-9A-Z]{8}\.''') + + self.register_check_list([ + (self._signon, RegexChecker), + ]) + + def can_handle(self, file_data, header_data): + # Only handle files sent through UEFIExtractor. + if header_data != b'\x00\xFFUEFIExtract\xFF\x00': + return False + + # Check for any Intel version code identifiers. + if not self._identifier_regex.search(file_data): + return False + + self.version = 'UEFI' + + return True + + def _signon(self, line, match): + '''^(?:\$(?:IBIOSI\$|FID))?([0-9A-Z]{8}\.([0-9A-Z]{3})(?:\.[0-9]{4}){4})''' + + # Extract sign-on. + self.signon = match.group(1) + + return True + + +class JukoAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('Juko', *args, **kwargs) + + self.register_check_list([ + (self._version, RegexChecker), + ]) + + def can_handle(self, file_data, header_data): + return b'Juko Electronics Industrial Co.,Ltd.' in file_data + + def _version(self, line, match): + '''Juko (.+) BIOS ver (.+)''' + + # Extract version. + self.version = match.group(2) + + # Extract string. + self.string = match.group(1) + + return True + + +class MRAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('MR', *args, **kwargs) + + self._signon_pattern = re.compile(b'''OEM SIGNON >>-->([\\x20-\\x7F]+)''') + + self.register_check_list([ + (self._version_newer, RegexChecker), + (self._version_older, RegexChecker), + ]) + + def can_handle(self, file_data, header_data): + # Skip readme false positives. + if len(file_data) < 2048 or b'MR BIOS (r) V' not in file_data: + return False + + # Extract custom OEM sign-on. + match = self._signon_pattern.search(file_data) + if match: + self.signon = match.group(1).decode('cp437', 'ignore') + if len(self.signon) == 1: # single character when not set + self.signon = '' + self.signon = self.signon.strip() + + return True + + def _version_newer(self, line, match): + '''^MR BIOS \(r\) (V(?:[^\s]+))(?: (.+))?$''' + + # Extract version. + self.version = match.group(1) + + # Extract part number as a string if one was found. + part_number = match.group(2) + if part_number: + self.string = part_number + + return True + + def _version_older(self, line, match): + '''^Ver: (V[^-]+)-(.+)''' + + # Extract version. + self.version = match.group(1) + + # Extract part number(?) + self.string = match.group(2) + + return True + + +class OlivettiAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('Olivetti', *args, **kwargs) + + self._version_pattern = re.compile('''Version ([^\s]+)''') + + self.register_check_list([ + ((self._version_precheck, self._version), AlwaysRunChecker), + (self._string_date, RegexChecker), + ]) + + def reset(self): + super().reset() + self._trap_version = False + + def can_handle(self, file_data, header_data): + has_strings = b'COPYRIGHT (C) OLIVETTI' in file_data and (b'No ROM BASIC available - RESET' in file_data or b'ROM BASIC Not Available,' in file_data) + + if has_strings: + # Start by assuming this is an unversioned BIOS. + self.version = '?' + + return has_strings + + def _version_precheck(self, line): + return self._trap_version + + def _version(self, line, match): + # Extract version if valid. + match = self._version_pattern.match(line) + if match: + self.version = match.group(1) + + # Disarm trap. + self._trap_version = False + + return True + + def _string_date(self, line, match): + '''^(?:COPYRIGHT \(C\) OLIVETTI )?([0-9]{2}/[0-9]{2}/[0-9]{2})$''' + + # Extract the date as a string if newer than any previously-found date. + date = match.group(1) + if not self.string or util.date_gt(date, self.string, util.date_pattern_mmddyy): + self.string = date + + # Read version on the next line. + self._trap_version = True + + return True + + +class PhoenixAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('Phoenix', *args, **kwargs) + + # "All Rights Reserved\r\n\n\x00\xF4\x01" (Ax86) + # "All Rights Reserved\r\n\n\x00" (Commodore 386LT, Tandy 1000RSX) + # "All Rights Reserved\r\n\n" (ROM BIOS) + # "All Rights Reserved\r\n\r\n\r\n" (Gateway 4DX2-50V) + self._rombios_signon_pattern = re.compile(b'''\\x0D\\x0AAll Rights Reserved\\x0D\\x0A(?:\\x0A(?:\\x00(?:\\xF4\\x01)?)?|\\x0D\\x0A\\x0D\\x0A)''') + self._bcpsys_datetime_pattern = re.compile('''(?:[0-9]{2})/(?:[0-9]{2})/(?:[0-9]{2}) ''') + self._core_signon_pattern = re.compile(b'''\\x00FOR EVALUATION ONLY\. NOT FOR RESALE\.\\x00([\\x00-\\xFF]+)\\x00Primary Master \\x00''') + self._intel_86_pattern = re.compile('''(?:[0-9A-Z]{8})\.86(?:[0-9A-Z])\.(?:[0-9A-Z]{4})\.(?:[0-9A-Z]{3})\.(?:[0-9]{10})$''') + + self.register_check_list([ + ((self._signon_fujitsu_precheck, self._signon_fujitsu), AlwaysRunChecker), + ((self._signon_nec_precheck, self._signon_nec), AlwaysRunChecker), + (self._version_xx86, RegexChecker), # "All Rights Reserved" => "A286 Version 1.01" + (self._version_pentium, RegexChecker), + (self._version_40rel, RegexChecker), + (self._version_40x, RegexChecker), + (self._version_branch, RegexChecker), + (self._version_core, RegexChecker), + (self._version_grid, SubstringChecker, SUBSTRING_FULL_STRING | SUBSTRING_CASE_SENSITIVE), + (self._version_rombios, RegexChecker), + (self._version_tandy, SubstringChecker, SUBSTRING_FULL_STRING | SUBSTRING_CASE_SENSITIVE), + ((self._date_precheck, self._string_date), RegexChecker), + (self._signon_ast, SubstringChecker, SUBSTRING_BEGINNING | SUBSTRING_CASE_SENSITIVE), + ((self._dell_precheck, self._signon_dell), RegexChecker), + (self._signon_commodore, RegexChecker), + (self._signon_fujitsu_trigger, SubstringChecker, SUBSTRING_FULL_STRING | SUBSTRING_CASE_SENSITIVE), + (self._signon_hp, RegexChecker), + (self._signon_intel, RegexChecker), + (self._signon_nec_trigger, RegexChecker), + (self._signon_surepath, RegexChecker), + (self._signon_tandy, RegexChecker), + ]) + + def reset(self): + super().reset() + self._is_core = False + self._trap_signon_fujitsu_lines = 0 + self._trap_signon_nec = False + self._found_signon_tandy = '' + + def can_handle(self, file_data, header_data): + # "Phoenix ROM BIOS" (Dell Latitude CP/CPI) + if b'Phoenix Technologies Ltd' not in file_data and b'Phoenix ROM BIOS' not in file_data and b'PPhhooeenniixx TTeecchhnnoollooggiieess' not in file_data: + return False + + # Skip Windows 95 INF updates. + if b'search=f000,0,ffff,S,"' in file_data: + return False + + # Read build date and time from BCPSYS on 4.0 and newer BIOSes. + offset = file_data.find(b'BCPSYS') + if offset > -1: + # Extract the build date and time as a string. + self.string = file_data[offset + 15:offset + 32].replace(b'\x00', b'\x20').decode('ascii', 'ignore').strip() + + # Discard if this is an invalid date/time (PHLASH.EXE) + if not self._bcpsys_datetime_pattern.match(self.string): + self.string = '' + else: + self.debug_print('BCPSYS date/time', self.string) + + # Determine if this is a Dell BIOS (48-byte header). + offset = file_data.find(b'Dell System ') + if offset > -1: + self.version = 'Dell' + self.signon = '\n' + + # Extract Dell version. + if file_data[offset + 0x20:offset + 0x21] == b'A': + self.signon += 'BIOS Version: ' + file_data[offset + 0x20:offset + 0x30].decode('ascii', 'ignore').rstrip('^\x00') + else: + # Extract sign-on from Core and some 4.0 Release 6.0 BIOSes. + match = self._core_signon_pattern.search(file_data) + if match: + self.signon = match.group(1).decode('cp437', 'ignore') + else: + # Extract sign-on from Ax86 and older BIOSes. + match = self._rombios_signon_pattern.search(file_data) + if match: + end = match.end(0) + if file_data[end] != 0xfa: # (unknown 8088 PLUS 2.52) + self.signon = util.read_string(file_data[end:end + 256]) + + # Split sign-on lines. + if self.signon: + self.signon = self.signon.replace('\r', '\n').replace('\x00', ' ') + self.signon = '\n'.join(x.strip() for x in self.signon.split('\n') if x.strip()).strip('\n') + + return True + + def _core_precheck(self, line): + return self._is_core + + def _date_precheck(self, line): + return len(self.string) != 8 or util.date_pattern_mmddyy.match(line) + + def _dell_precheck(self, line): + return self.version == 'Dell' + + def _signon_fujitsu_precheck(self, line): + return self._trap_signon_fujitsu_lines > 0 + + def _signon_nec_precheck(self, line): + return self._trap_signon_nec + + def _version_40rel(self, line, match): + '''Phoenix(MB)? ?BIOS ([0-9]\.[^\s]+ Release [0-9]\.[^\s]+)(?: (.+))?''' + + # Extract version with release. + self.version = match.group(2) + + # Add version prefix if one was found. + prefix = match.group(1) + if prefix: + self.version = prefix + ' ' + self.version + + # Extract any additional information after the version + # as a sign-on, if one wasn't already found. + additional_info = match.group(3) + if additional_info and not self.signon: + self.signon = additional_info.rstrip() + + return True + + def _version_40x(self, line, match): + '''Phoenix(?:(MB)(?: BIOS)?| ?BIOS(?: (Developmental))?) Version +([0-9]\.[^\s.]+)(?:[\s\.](.+))?''' + + # Extract version. + self.version = match.group(3) + + # Add version prefix if one was found. + prefix = match.group(1) or match.group(2) + if prefix: + self.version = prefix + ' ' + self.version + + # Extract any additional information after the version + # as a sign-on, if one wasn't already found. + additional_info = match.group(4) + if additional_info and not self.signon: + self.signon = additional_info.rstrip() + + return True + + def _version_branch(self, line, match): + '''Phoenix ([A-Za-z]+(?:BIOS|Bios)) (?:Version ([0-9]\.[^\s]+)|([0-9](?:\.[0-9.]+)? Release [0-9]\.[^\s]+))(?:[\s\.](.+))?''' + + # Extract version with branch and release. + self.version = match.group(1) + ' ' + (match.group(2) or match.group(3)) + + # Extract any additional information after the version + # as a sign-on, if one wasn't already found. + additional_info = match.group(4) + if additional_info and not self.signon: + self.signon = additional_info.rstrip() + + return True + + def _version_core(self, line, match): + '''^Phoenix (cME|[A-Za-z]+Core)(?:\(tm\))? (?!Setup)([^\s]+)?''' + + # Extract the first word. + self.version = match.group(1) + + # Extract the second word. + second_word = match.group(2) + if second_word: + if second_word == 'SVR': + second_word = 'Server' + self.version += ' ' + second_word + + # Mark this as a Core BIOS for sign-on extraction. + self._is_core = True + + return True + + def _version_grid(self, line, match): + '''Copyright (C) 1987-1991, GRiD Systems Corp.All Rights Reserved''' + + # This is a GRiD BIOS. + if not self.version: + self.version = 'GRiD' + + return False + + def _version_pentium(self, line, match): + '''^(?:PhoenixBIOS(?:\(TM\))? )?for ((?:486/)?Pentium)\s?\(TM\)(?: CPU)? - ([^\s]+) Version ([^-\s]+)(?:(?:-|\s)(.+))?''' + + # Add branch to version. + self.version = match.group(1) + + # Add non-ISA bus types to version. + bus_type = match.group(2) + if bus_type != 'ISA': + self.version += ' ' + bus_type + + # Add actual version. + self.version += ' ' + match.group(3) + + # Extract any additional information after the version as a sign-on, + # if one wasn't already found. + post_version = match.group(4) + if not self.signon and post_version: + post_version = post_version.strip() + if post_version: + self.signon = post_version + + return True + + def _version_rombios(self, line, match): + '''(?:(?:((?:8086|8088|V20 |(?:80)?(?:[0-9]{3}))(?:/EISA)?) )?ROM BIOS (PLUS )?|^ (PLUS) )Ver(?:sion)? ([0-9]\.[A-Z0-9]{2,})\.?([^\s]*)(\s+[0-9A-Z].+)?''' + + # Stop if this was already determined to be a Dell BIOS. + if self.version == 'Dell': + # Let _signon_dell handle this version line. + return False + + # Extract version. + self.version = match.group(4).rstrip('. ') + + # Extract version prefix if present. + pre_version = match.group(1) + if pre_version: + # Shorten 80286/80386(/80486?) + if len(pre_version) >= 5 and pre_version[:2] == '80': + pre_version = pre_version[2:] + + self.version = pre_version.strip() + ' ' + self.version + + # Add PLUS prefix/suffix if present. + if match.group(1) or match.group(2): + space_index = self.version.find(' ') + if space_index > -1: + self.version = self.version[:space_index] + ' PLUS' + self.version[space_index:] + else: + self.version = 'PLUS ' + self.version + + # Extract any additional information after the version as a sign-on + # if none was already found. + if not self.signon.replace('\t', '').replace(' ', ''): + additional_info = (match.group(5) or '') + (match.group(6) or '') + if additional_info and (len(additional_info) > 3 or additional_info[0] != '.'): + self.signon = additional_info + + return True + + def _version_tandy(self, line, match): + '''$ Tandy Corporation ''' + + # This is a Tandy BIOS with Phoenix Compatibility Software. + if not self.version: + self.version = 'Tandy' + + # Set Tandy sign-on if we already found one. + self.signon = self._found_signon_tandy + + return True + + def _version_xx86(self, line, match): + '''(?:Phoenix(?:(?:\s)?BIOS(?:\(TM\))?)? )?([ADE][23456]86) Version (?:([0-9]\.[0-9]{2})(.*))?$''' + + # Stop if this is A386 after A486 (Apricot LS Pro) + branch = match.group(1) + if branch == 'A386' and self.version[:5] == 'A486 ': + return True + + # Add branch to the version. + self.version = branch + + # Add actual version, if found. + version = match.group(2) + if version: + self.version += ' ' + version + + # Abort analysis if this is a non-BIOS file. (ZEOS id.txt) + if version == 'A486 1.0x"': + self.version = '' + raise AbortAnalysisError('Phoenix non-BIOS (_version_xx86)') + + # Extract any additional information after the version as a sign-on + # if none was already found. + if not self.signon: + additional_info = match.group(3) + if additional_info and (len(additional_info) > 3 or additional_info[0] != '.'): + self.signon = additional_info + + return True + + def _string_date(self, line, match): + '''^((?:[0-9]{2})/(?:[0-9]{2})/(?:[0-9]{2})|(?:[0-9]{4})//(?:[0-9]{4})//(?:[0-9]{4}))((?:[0-9]{2})/(?:[0-9]{2})/(?:[0-9]{2}))?''' + + # De-interleave date if interleaved. + date = match.group(1) + if len(date) > 8: + date = date[::2] + + # If two dates were found, the newest one takes precedence. + other_date = match.group(2) + if other_date and util.date_gt(other_date, date, util.date_pattern_mmddyy): + date = other_date + + # Skip known bad dates. + if date == '00/00/00': + return True + + # Extract the date as a string if newer than any previously-found date. + if ' ' not in self.string and (not self.string or util.date_gt(date, self.string, util.date_pattern_mmddyy)): + self.string = date + + return True + + def _signon_ast(self, line, match): + '''AST System BIOS Version ''' + + # This is an AST BIOS. + self.version = 'AST' + + # Extract version as a sign-on. + self.signon = line + + return True + + def _signon_commodore(self, line, match): + '''^ *(Commodore [^\s]+ BIOS Rev\. [^\s]+)''' + + # Extract the version string as a sign-on. + self.signon = match.group(1) + + return True + + def _signon_dell(self, line, match): + '''^(?:(D)ell System |(?:BIOS Version(?! =)|Phoenix ROM BIOS PLUS Version (?:[^\s]+)) )(.+)''' + + # Add model or BIOS version to the sign-on. + linebreak_index = self.signon.find('\n') + if match.group(1): # the single captured character is a flag + self.signon = match.group(2) + self.signon[linebreak_index:] + else: + self.signon = self.signon[:linebreak_index + 1] + 'BIOS Version: ' + match.group(2) + + return True + + def _signon_fujitsu_trigger(self, line, match): + '''Phoenix/FUJITSU''' + + # Read sign-on on the next 2 lines. + self._trap_signon_fujitsu_lines = 1 + + return True + + def _signon_fujitsu(self, line, match): + if self._trap_signon_fujitsu_lines == 1: + # Extract the version on the first line. + self.signon = ' '.join(line.split()) + + # Move on to the next line. + self._trap_signon_fujitsu_lines = 2 + else: + # Extract the model number on the second line. + self.signon = self.signon + ' (' + line.lstrip() + ')' + + # Disarm the trap. + self._trap_signon_fujitsu_lines = 0 + + return True + + def _signon_hp(self, line, match): + '''^(?:[A-Z]{2,3})\.(?:[0-9]{2})\.(?:[0-9]{2})(?: \((?:[A-Z]{2,3})\.(?:[0-9]{2})\.(?:[0-9]{2})\)|$)''' + + # This is an HP BIOS. + if not self.version: + self.version = 'HP' + + # Extract the version string as a sign-on. + self.signon = match.group(0) + + return True + + def _signon_intel(self, line, match): + '''^(?:\$IBIOSI\$)?((?:[0-9])\.(?:[0-9]{2})\.(?:[0-9]{2})\.(?:[0-9A-Z]{2,})|(?:[0-9A-Z]{8})\.([0-9A-Z]{3})\.(?:[0-9A-Z]{4})\.(?:[0-9A-Z]{3})\.([0-9]{10}))''' + + # This is an Intel BIOS. + if not self.version: + self.version = 'Intel' + + # If this is Intel's second Phoenix run, check if this is not a generic + # (86x) version string overwriting an OEM version string. + oem = match.group(2) + if not oem or oem[:2] != '86' or not self._intel_86_pattern.match(self.signon): + # Extract the version string as a sign-on. + self.signon = match.group(1) + + # The longer string on Intel's second Phoenix run has a build date and + # time, which is more accurate than the build date and time in BCPSYS. + build_date_time = match.group(3) + if build_date_time: + # Check if the date is newer than any existing date. + build_date = '{0}/{1}/{2}'.format(build_date_time[2:4], build_date_time[4:6], build_date_time[:2]) + if len(self.string) >= 8 and util.date_gt(build_date, self.string[:8], util.date_pattern_mmddyy): + # Extract the date as a string. + self.string = '{0} {1}:{2}'.format(build_date, build_date_time[6:8], build_date_time[8:10]) + + return True + + def _signon_nec_trigger(self, line, match): + '''^..(NEC Corporation)$''' + + # This is an NEC BIOS. + if not self.version: + self.version = 'NEC' + + # Discard any bogus sign-on extracted earlier. + self.signon = match.group(1) + + # Read sign-on on the next line or two. + self._trap_signon_nec = True + + return True + + def _signon_nec(self, line, match): + # Disarm trap once we reach the end. + if line == '@((PP((PP,(-)*.': + self._trap_signon_nec = False + return False + + # Add line to the sign-on, skipping duplicates. + signon = line.strip() + if signon not in self.signon: + self.signon += '\n' + signon + + return True + + def _signon_surepath(self, line, match): + '''^SurePath\(tm\) BIOS Version (.+)''' + + # This is an IBM BIOS. + if not self.version: + self.version = 'IBM' + + # Extract the version string as a sign-on. + self.signon = match.group(0) + + return True + + def _signon_tandy(self, line, match): + '''^\!BIOS ROM version ([^\s]+)''' + + # Extract the Tandy version as a sign-on. + self._found_signon_tandy = line[1:] + + # Set sign-on if we already determined this is a Tandy BIOS. + if self.version == 'Tandy': + self.signon = self._found_signon_tandy + + +class QuadtelAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('Quadtel', *args, **kwargs) + + self._id_block_pattern = re.compile(b'''Copyright 19..-.... Quadtel Corp\. Version''') + self._version_pattern = re.compile('''(?:(?:Quadtel|QUADTEL|PhoenixBIOS) )?(.+) BIOS Version ([^\\r\\n]+)''') + + self.register_check_list([ + (self._string_date, RegexChecker), + ]) + + def can_handle(self, file_data, header_data): + if b' Quadtel Corp. Version ' not in file_data: + return False + + # Quadtel appears to have a consistent identification block. + match = self._id_block_pattern.search(file_data) + if match: + # Determine location of the identification block. + id_block_index = match.start(0) + + # Extract version. + version_string = util.read_string(file_data[id_block_index + 0xc8:id_block_index + 0x190]) + version_match = self._version_pattern.search(version_string) # may start with a linebreak (Phoenix-Quadtel) + if version_match: + self.version = version_match.group(2).replace(' \b', '').rstrip('.').strip() # remove trailing "." (quadt286) and space followed by backspace (ZEOS Marlin) + if self.version[0:1] == 'Q': # flag Phoenix-Quadtel + self.version = self.version[1:] + ' (Phoenix)' + + # Extract BIOS type as the string. + self.string = version_match.group(1).strip() + + # Extract sign-on. + self.signon = util.read_string(file_data[id_block_index + 0x190:id_block_index + 0x290]).strip() + + # Split sign-on lines. + self.signon = '\n'.join(x.rstrip('\r').strip() for x in self.signon.split('\n') if x != '\r').strip('\n') + + return True + + def _string_date(self, line, match): + '''^[0-9]{2}/[0-9]{2}/[0-9]{2}$''' + + # Add date to string, or replace any previously-found date with a newer one. + linebreak_index = self.string.find('\n') + if linebreak_index > -1: + if util.date_gt(line, self.string[linebreak_index + 1:], util.date_pattern_mmddyy): + self.string = self.string[:linebreak_index + 1] + match.group(0) + else: + if self.string: + self.string += '\n' + self.string += line + + # Disarm sign-on trap if armed. + self._trap_signon = False + + return True + + +class SchneiderAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('Schneider', *args, **kwargs) + + self.register_check_list([ + (self._version, RegexChecker), + ]) + + def can_handle(self, file_data, header_data): + return b'Schneider Rundfunkwerke AG' in file_data + + def _version(self, line, match): + '''EURO PC(?:\s+)BIOS (V.+)''' + + # Extract version. + self.version = match.group(1) + + return True + + +class SystemSoftAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('SystemSoft', *args, **kwargs) + + self._lowbyte_pattern = re.compile(b'''[\x00-\x09\x0B-\x0F]''') + self._branch_pattern = re.compile('''(?:^|\s)[Ff]or ([^\s]+)''') + + self.register_check_list([ + ((self._signon_precheck, self._signon), AlwaysRunChecker), + (self._version, RegexChecker), + (self._version_mobilepro, RegexChecker), + (self._string_for, RegexChecker), + (self._string_scu, RegexChecker), + (self._string_440bx, RegexChecker), + (self._signon_trigger, RegexChecker), + ]) + + def reset(self): + super().reset() + + self._version_prefix = '' + self._version_number = '' + self._string_branch = '' + self._string_branch_decisive = '' + self._string_additional = '' + + self._trap_signon = False + + def can_handle(self, file_data, header_data): + return b'SystemSoft BIOS' in file_data + + def _update_version_string(self): + self.version = (self._version_prefix.strip() + ' ' + self._version_number.strip()).strip() + self.string = self._string_branch_decisive or self._string_branch + if self._string_additional: + self.string += ' (' + self._string_additional.lstrip(' (').rstrip(' )') + ')' + + def _signon_precheck(self, line): + return self._trap_signon + + def _version(self, line, match): + '''^SystemSoft BIOS for (.+) Vers(?:\.|ion) 0?([^\s]+)(?: (.+))?''' + + # Extract version number if valid. + version = match.group(2) + if '.' in version: + self._version_number = version + + # Extract branch as the string. + self._string_branch_decisive = (match.group(1) or '').rstrip(' .,') # strip dot and comma (NCR 3xxx series) + + # Extract model information after the branch (NCR 3315) as a sign-on. + onthe_split = self._string_branch_decisive.split(' on the ') + if len(onthe_split) == 2: + self._string_branch_decisive, self.signon = onthe_split + + # Add any additional information after the version to the string. + additional_info = match.group(3) + if additional_info: + self._string_additional = additional_info + + # Update version and string. + self._update_version_string() + + return True + + def _version_mobilepro(self, line, match): + '''^SystemSoft (MobilePRO) BIOS Version ([^\s]+)(?: (.+))?''' + + # Set prefix. + self._version_prefix = match.group(1) + + # Set version number. + self._version_number = match.group(2) + + # Add any additional information after the version to the string. + additional_info = match.group(3) + if additional_info: + self._string_additional = additional_info + + # Update version and string. + self._update_version_string() + + return True + + def _string_440bx(self, line, match): + '''^(Intel )?(440BX(?:/ZX)?)''' + + # Extract branch as decisive, so it doesn't get overwritten by 430TX. + self._string_branch_decisive = (match.group(1) or 'Intel ') + match.group(2) + + # Update string. + self._update_version_string() + + return True + + def _string_for(self, line, match): + '''SystemSoft BIOS [Ff]or ([^\(]+)''' + + # Extract branch. + self._string_branch = match.group(1) + + # Update string. + self._update_version_string() + + return True + + def _string_scu(self, line, match): + '''SystemSoft SCU [Ff]or (.+) [Cc]hipset''' + + # Extract branch. + self._string_branch = match.group(1) + + # Update string. + self._update_version_string() + + return True + + def _signon_trigger(self, line, match): + '''^Copyright (?:.+ SystemSoft Corp(?:oration|\.)?|SystemSoft Corp(?:oration|\.) .+\.)\s+All Rights Reserved''' + + # Read sign-on on the next line. + self.signon = '' + self._trap_signon = True + + return True + + def _signon(self, line, match): + # Add line to sign-on if it's valid. + if '. All Rights Reserved' not in line[-22:]: + if line not in (' 9 9 9 9', 'ICICIC9CW') and line[:27] != 'OEM-CONFIGURABLE MESSAGE # ': + # " 9 9 9 9" (Kapok 8x00C/P) + # "ICICIC9CW" (Systemax sndbk105) + # ". All Rights Reserved" (Dual/Smile mic6903/mic6907) + if self.signon: + self.signon += '\n' + self.signon = line + + # Disarm trap if there's no additional sign-on line up next. + if line != 'TriGem Computer, Inc.': + self._trap_signon = False + + return True + + +class TandonAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('Tandon', *args, **kwargs) + + self.register_check_list([ + (self._version, RegexChecker), + ]) + + def can_handle(self, file_data, header_data): + return b'NOT COPR. IBM 1984 BIOS VERSION ' in file_data + + def _version(self, line, match): + '''NOT COPR. IBM 1984 BIOS VERSION (.+)''' + + # Extract version. + self.version = match.group(1) + + return True + + +class TinyBIOSAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('tinyBIOS', *args, **kwargs) + + self.register_check_list([ + (self._version, RegexChecker), + ((self._noversion_precheck, self._signon), AlwaysRunChecker), + ]) + + def can_handle(self, file_data, header_data): + return b'tinyBIOS V' in file_data and b' PC Engines' in file_data + + def _noversion_precheck(self, line): + return not self.version + + def _version(self, line, match): + '''^tinyBIOS (V(?:[^\s]+))''' + + # Extract version. + self.version = match.group(1) + + return True + + def _signon(self, line, match): + # Extract the last line before the version as a sign-on. + self.signon = line + + return False + + +class ToshibaAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('Toshiba', *args, **kwargs) + self.vendor = 'Award' + + self._string_pattern = re.compile(b'''([\\x21-\\x7F]+\s*V[\\x21-\\x7F]{1,16}\s*)TOSHIBA ''') + + def can_handle(self, file_data, header_data): + has_strings = (b' TOSHIBA ' in file_data and b'Use Toshiba\'s BASIC.' in file_data) or b'Toshiba Corporation. & Award Software Inc.' in file_data + + if has_strings: + self.version = 'Toshiba' + + # Extract string. + match = self._string_pattern.search(file_data) + if match: + # Extract 16 characters from the end to avoid preceding characters. (T3100e) + self.string = match.group(1)[-16:].decode('cp437', 'ignore').strip() + + return has_strings + +class WhizproAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('Whizpro', *args, **kwargs) + + def can_handle(self, file_data, header_data): + has_strings = b'$PREPOST' in file_data and b'$BOOTBLK' in file_data + + if has_strings: + # Extract build date as version, as there's no actual + # version information to be found anywhere. (compressed?) + date_index = len(file_data) - 0x0b + self.version = util.read_string(file_data[date_index:date_index + 8]) + + # Determine location of the identification block. I've only ever + # seen 512K BIOSes; other sizes are assumed to work the same way. + id_block_index = len(file_data) - 0x20110 + + # Extract string. + self.string = util.read_string(file_data[id_block_index + 0xe0:id_block_index + 0x100]).strip() + + # Extract sign-on. + self.signon = util.read_string(file_data[id_block_index:id_block_index + 0x20]).strip() + + return has_strings + + def _signon_precheck(self, line): + return self._trap_signon + + def _signon(self, line, match): + # The sign-on is one line before the string, so we must store all + # lines, then act upon the last stored line when the string is found. + self._found_signon = line + + return True + + def _string(self, line, match): + '''^[A-Z]-.+-[0-9]+$''' + + # Extract string. + self.string = match.group(0) + + # Extract sign-on. + self.signon = self._found_signon + + # Disarm sign-on trap. + self._trap_signon = False + + +class ZenithAnalyzer(Analyzer): + def __init__(self, *args, **kwargs): + super().__init__('Zenith', *args, **kwargs) + + self.register_check_list([ + (self._version_date, RegexChecker) + ]) + + def can_handle(self, file_data, header_data): + return b'(C)ZDS CORP' in file_data and b'+++ Wild Hardware Interrupt! +++' in file_data + + def _version_date(self, line, match): + '''^([0-9]{2}/[0-9]{2}/[0-9]{2}) \(C\)ZDS CORP''' + + # Extract date as a version. + self.version = match.group(1) diff --git a/biostools/extractors.py b/biostools/extractors.py new file mode 100644 index 0000000..ba964c8 --- /dev/null +++ b/biostools/extractors.py @@ -0,0 +1,1312 @@ +#!/usr/bin/python3 +# +# 86Box A hypervisor and IBM PC system emulator that specializes in +# running old operating systems and software designed for IBM +# PC systems and compatibles from 1981 through fairly recent +# system designs based on the PCI bus. +# +# This file is part of the 86Box BIOS Tools distribution. +# +# BIOS and archive extraction classes. +# +# +# +# Authors: RichardG, +# +# Copyright 2021 RichardG. +# +import array, codecs, io, math, os, re, shutil, struct, subprocess +try: + import PIL.Image +except ImportError: + PIL = lambda x: x + PIL.Image = None +from . import util + +class Extractor: + def extract(self, file_path, file_header, dest_dir, dest_dir_0): + """Extract the given file into one of the destination directories: + dest_dir allows extracted files to be reprocessed in the next run, + while dest_dir_0 does not. This must return either: + - False if this extractor can't handle the given file + - True if this extractor can handle the given file, but no output was produced + - a string with the produced output file/directory path""" + raise NotImplementedError() + + def log_print(self, *args): + """Print a log line.""" + print('{0}:'.format(self.__class__.__name__), *args, file=sys.stderr) + + +class ArchiveExtractor(Extractor): + """Extract known archive types.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # Known signatures for archive files. + self._archive_signatures = [ + b'PK\x03\x04', # zip + b'Rar!\x1A\x07', # rar + b'7z\xBC\xAF\x27\x1C', # 7z + b'MSCF', # cab + b'\x1F\x8B', # gzip + b'BZh', # bzip2 + b'\xFD7zXZ\x00', # xz + b'LHA\x20', # lha + b'ZOO', # zoo + ] + + # /dev/null handle for suppressing output. + self._devnull = open(os.devnull, 'wb') + + def extract(self, file_path, file_header, dest_dir, dest_dir_0): + """Extract an archive.""" + + # Determine if this is an archive through file signatures. + is_archive = False + for signature in self._archive_signatures: + if file_header[:len(signature)] == signature: + is_archive = True + break + + # Stop if this is apparently not an archive. + if not is_archive: + return False + + # Do the actual extraction. + return self._extract_archive(file_path, dest_dir) + + def _extract_archive(self, file_path, dest_dir): + # Create destination directory and stop if it couldn't be created. + if not util.try_makedirs(dest_dir): + return True + + # Run 7z command to extract the archive. + # The dummy password prevents any password prompts from stalling 7z. + subprocess.run(['7z', 'x', '-y', '-ppassword', '--', os.path.abspath(file_path)], stdout=self._devnull, stderr=subprocess.STDOUT, cwd=dest_dir) + + # Assume failure if nothing was extracted. + if len(os.listdir(dest_dir)) < 1: + return False + + # Remove archive file. + try: + os.remove(file_path) + except: + pass + + # Return destination directory path. + return dest_dir + + +class BIOSExtractor(Extractor): + """Extract a bios_extract-compatible BIOS file.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # Path to the bios_extract utility. + self._bios_extract_path = os.path.abspath(os.path.join('bios_extract', 'src', 'bios_extract')) + if not os.path.exists(self._bios_extract_path): + self._bios_extract_path = None + + # /dev/null handle for suppressing output. + self._devnull = open(os.devnull, 'wb') + + # Built-in instance of ImageExtractor for converting + # any extracted BIOS logo images that were found. + self._image_extractor = ImageExtractor() + + def extract(self, file_path, file_header, dest_dir, dest_dir_0): + # Stop if bios_extract is not available. + if not self._bios_extract_path: + return False + + # Create destination directory and stop if it couldn't be created. + if not util.try_makedirs(dest_dir_0): + return True + + # Start bios_extract process. + file_path_abs = os.path.abspath(file_path) + try: + subprocess.run([self._bios_extract_path, file_path_abs], timeout=30, stdout=self._devnull, stderr=subprocess.STDOUT, cwd=dest_dir_0) + except: + # Bad data can cause infinite loops. + pass + + # Assume failure if nothing was extracted. A lone amiboot.bin also counts as a failure, since + # the AMI extractor writes the boot block before attempting to extract any actual BIOS modules. + dest_dir_files = os.listdir(dest_dir_0) + num_files_extracted = len(dest_dir_files) + if num_files_extracted < 1: + return False + elif num_files_extracted == 1 and dest_dir_files[0] in ('amiboot.rom', 'ssboot.rom'): + # Remove amiboot so that the destination directory can be rmdir'd later. + try: + os.remove(os.path.join(dest_dir_0, dest_dir_files[0])) + except: + pass + return False + + # Convert any BIOS logo images in-line (to the same destination directory). + for dest_dir_file in dest_dir_files: + # Read 8 bytes, which is enough to ascertain any potential logo type. + dest_dir_file_path = os.path.join(dest_dir_0, dest_dir_file) + f = open(dest_dir_file_path, 'rb') + dest_dir_file_header = f.read(8) + f.close() + + # Run ImageExtractor. + image_dest_dir = dest_dir_file_path + ':' + self._image_extractor.extract(dest_dir_file_path, dest_dir_file_header, image_dest_dir, image_dest_dir) + + # Remove destination directory if it was created but is empty. + util.rmdirs(image_dest_dir) + + # Create flag file on the destination directory for the analyzer to + # treat it as a big chunk of data. + open(os.path.join(dest_dir_0, ':combined:'), 'wb').close() + + # Copy any header file to extracted directory, for identifying Intel BIOSes. + # See AMIAnalyzer.can_handle for more information. + try: + shutil.copy(os.path.join(os.path.dirname(file_path_abs), ':header:'), os.path.join(dest_dir_0, ':header:')) + except: + pass + + # Remove BIOS file. + try: + os.remove(file_path) + except: + pass + + # Return destination directory path. + return dest_dir_0 + + +class DellExtractor(Extractor): + """Extract Dell/Phoenix ROM BIOS PLUS images. + Based on dell_inspiron_1100_unpacker.py""" + + def _memcpy(self, arr1, off1, arr2, off2, count): + while count: + if off1 < len(arr1): + try: + arr1[off1] = arr2[off2] + except: + break + elif off1 == len(arr1): + try: + arr1.append(arr2[off2]) + except: + break + else: + break + off1 += 1 + off2 += 1 + count -= 1 + + def _dell_unpack(self, indata): + srcoff = 0 + dstoff = 0 + src = bytearray(indata) + dst = bytearray() + inlen = len(indata) + while srcoff < inlen: + b = src[srcoff] + nibl, nibh = b & 0x0F, (b >> 4) & 0x0F + srcoff += 1 + if nibl: + if nibl == 0xF: + al = src[srcoff] + ah = src[srcoff+1] + srcoff += 2 + cx = nibh | (ah << 4) + count = (cx & 0x3F) + 2 + delta = ((ah >> 2) << 8) | al + else: + count = nibl + 1 + delta = (nibh << 8) | src[srcoff] + srcoff += 1 + self._memcpy(dst, dstoff, dst, dstoff - delta - 1, count) + dstoff += count + elif nibh == 0x0E: + count = src[srcoff] + 1 + srcoff += 1 + self._memcpy(dst, dstoff, dst, dstoff - 1, count) + dstoff += count + else: + if nibh == 0x0F: + count = src[srcoff] + 15 + srcoff += 1 + else: + count = nibh + 1 + self._memcpy(dst, dstoff, src, srcoff, count) + dstoff += count + srcoff += count + + return dst + + def extract(self, file_path, file_header, dest_dir, dest_dir_0): + # Read up to 16 MB as a safety net. + file_header += util.read_complement(file_path, file_header) + + # Stop if this is not the type of BIOS we're looking for. + copyright_string = b'\xF0\x00Copyright 1985-\x02\x04\xF0\x0F8 Phoenix Technologies Ltd.' + offset = file_header.find(copyright_string) + if offset < 5: + return False + + # Determine the length format. + if file_header[offset - 5] == 1: + # 32-bit length. + length_size = 5 + struct_format = ' 0: + f = open(os.path.join(dest_dir_0, 'ec.bin'), 'wb') + f.write(file_header[:offset]) + f.close() + + # Extract modules. + file_size = len(file_header) + module_number = 0 + while (offset + length_size) < file_size: + # Read module type and length. + module_type, module_length = struct.unpack(struct_format, file_header[offset:offset + length_size]) + if module_type == 0xFF: + break + offset += length_size + + # Decompress data if required. + data = file_header[offset:offset + module_length] + if module_type != 0x0C: + try: + data = self._dell_unpack(data) + except: + pass + offset += module_length + + # Write module. + f = open(os.path.join(dest_dir_0, 'module_{0:02}.bin'.format(module_number)), 'wb') + f.write(data) + f.close() + + # Increase filename counter. + module_number += 1 + + # Remove BIOS file. + try: + os.remove(file_path) + except: + pass + + # Return destination directory path. + return dest_dir_0 + +class DiscardExtractor(Extractor): + """Detect and discard known non-useful file types.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # File signatures to discard. + self._signature_pattern = re.compile( + # images + b'''\\xFF\\xD8\\xFF|GIF8|\\x89PNG|''' + # documents (a cursory check for HTML ought not to upset anyone) + b'''%PDF|\\xD0\\xCF\\x11\\xE0\\xA1\\xB1\\x1A\\xE1|\\x3F\\x5F\\x03\\x00|<(?:\![Dd][Oo][Cc][Tt][Yy][Pp][Ee]|[Hh][Tt][Mm][Ll])[ >]|''' + # executables + b'''(\\x7FELF)|''' + # reports + b'''CPU-Z TXT Report|\s{7}File: A|-+\[ AIDA32 |HWiNFO64 Version |3DMARK2001 PROJECT|Report Dr. Hardware|\r\n(?:\s+(HWiNFO v)|\r\n\s+\r\n\s+Microsoft Diagnostics version )|SIV[^\s]+ - System Information Viewer V''' + ) + + def extract(self, file_path, file_header, dest_dir, dest_dir_0): + # Determine if this is a known non-useful file type through the signature pattern. + match = self._signature_pattern.match(file_header) + if match: + # Don't discard LinuxBIOS ELFs. + if match.group(1) and file_header[128:136] == b'ELFBoot\x00': + return False + + # temp + if match.group(2): + try: + shutil.copy2(file_path, '/mnt/scratch/hwinfo/' + file_path.replace('/', '_')) + except: + pass + + # Remove file and stop. + try: + os.remove(file_path) + except: + pass + return True + + # Not a known file type, cleared to go. + return False + + +class ImageExtractor(Extractor): + """Extract BIOS logo images by converting them into PNG.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # Standard EGA/VGA palette for v1 and palette-less v2 Award EPAs. + self._vga_palette = [ + 0x000000, 0x0000aa, 0x00aa00, 0x00aaaa, 0xaa0000, 0xaa00aa, 0xaa5500, 0xaaaaaa, 0x555555, 0x5555ff, 0x55ff55, 0x55ffff, 0xff5555, 0xff55ff, 0xffff55, 0xffffff, + 0x000000, 0x101010, 0x202020, 0x353535, 0x454545, 0x555555, 0x656565, 0x757575, 0x8a8a8a, 0x9a9a9a, 0xaaaaaa, 0xbababa, 0xcacaca, 0xdfdfdf, 0xefefef, 0xffffff, + 0x0000ff, 0x4100ff, 0x8200ff, 0xbe00ff, 0xff00ff, 0xff00be, 0xff0082, 0xff0041, 0xff0000, 0xff4100, 0xff8200, 0xffbe00, 0xffff00, 0xbeff00, 0x82ff00, 0x41ff00, + 0x00ff00, 0x00ff41, 0x00ff82, 0x00ffbe, 0x00ffff, 0x00beff, 0x0082ff, 0x0041ff, 0x8282ff, 0x9e82ff, 0xbe82ff, 0xdf82ff, 0xff82ff, 0xff82df, 0xff82be, 0xff829e, + 0xff8282, 0xff9e82, 0xffbe82, 0xffdf82, 0xffff82, 0xdfff82, 0xbeff82, 0x9eff82, 0x82ff82, 0x82ff9e, 0x82ffbe, 0x82ffdf, 0x82ffff, 0x82dfff, 0x82beff, 0x829eff, + 0xbabaff, 0xcabaff, 0xdfbaff, 0xefbaff, 0xffbaff, 0xffbaef, 0xffbadf, 0xffbaca, 0xffbaba, 0xffcaba, 0xffdfba, 0xffefba, 0xffffba, 0xefffba, 0xdfffba, 0xcaffba, + 0xbaffba, 0xbaffca, 0xbaffdf, 0xbaffef, 0xbaffff, 0xbaefff, 0xbadfff, 0xbacaff, 0x000071, 0x1c0071, 0x390071, 0x550071, 0x710071, 0x710055, 0x710039, 0x71001c, + 0x710000, 0x711c00, 0x713900, 0x715500, 0x717100, 0x557100, 0x397100, 0x1c7100, 0x007100, 0x00711c, 0x007139, 0x007155, 0x007171, 0x005571, 0x003971, 0x001c71, + 0x393971, 0x453971, 0x553971, 0x613971, 0x713971, 0x713961, 0x713955, 0x713945, 0x713939, 0x714539, 0x715539, 0x716139, 0x717139, 0x617139, 0x557139, 0x457139, + 0x397139, 0x397145, 0x397155, 0x397161, 0x397171, 0x396171, 0x395571, 0x394571, 0x515171, 0x595171, 0x615171, 0x695171, 0x715171, 0x715169, 0x715161, 0x715159, + 0x715151, 0x715951, 0x716151, 0x716951, 0x717151, 0x697151, 0x617151, 0x597151, 0x517151, 0x517159, 0x517161, 0x517169, 0x517171, 0x516971, 0x516171, 0x515971, + 0x000041, 0x100041, 0x200041, 0x310041, 0x410041, 0x410031, 0x410020, 0x410010, 0x410000, 0x411000, 0x412000, 0x413100, 0x414100, 0x314100, 0x204100, 0x104100, + 0x004100, 0x004110, 0x004120, 0x004131, 0x004141, 0x003141, 0x002041, 0x001041, 0x202041, 0x282041, 0x312041, 0x392041, 0x412041, 0x412039, 0x412031, 0x412028, + 0x412020, 0x412820, 0x413120, 0x413920, 0x414120, 0x394120, 0x314120, 0x284120, 0x204120, 0x204128, 0x204131, 0x204139, 0x204141, 0x203941, 0x203141, 0x202841, + 0x2d2d41, 0x312d41, 0x352d41, 0x3d2d41, 0x412d41, 0x412d3d, 0x412d35, 0x412d31, 0x412d2d, 0x41312d, 0x41352d, 0x413d2d, 0x41412d, 0x3d412d, 0x35412d, 0x31412d, + 0x2d412d, 0x2d4131, 0x2d4135, 0x2d413d, 0x2d4141, 0x2d3d41, 0x2d3541, 0x2d3141, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000, 0x000000 + ] + + def extract(self, file_path, file_header, dest_dir, dest_dir_0): + # Stop if PIL is not available or this file is too small. + if not PIL.Image or len(file_header) < 8: + return False + + # Determine if this is an image, and which type it is. + if file_header[:4] == b'AWBM': + # Get width and height for a v2 EPA. + width, height = struct.unpack('= 8 + (width * height): + func = self._convert_epav2_8b + else: + func = self._convert_epav2_4b + else: + # Determine if this file is the right size for a v1 EPA. + width, height = struct.unpack('BB', file_header[:2]) + if os.path.getsize(file_path) == 72 + (15 * width * height): + func = self._convert_epav1 + else: + # Determine if this is an AMI PCX. + if file_header[0] == 0x0a and \ + file_header[1] in (0x00, 0x02, 0x03, 0x04, 0x05) and \ + file_header[2] in (0x00, 0x01) and \ + file_header[3] in (0x01, 0x02, 0x04, 0x08): + func = self._convert_pil + else: + # Stop if this is not an image. + return False + + # Create destination directory and stop if it couldn't be created. + if not util.try_makedirs(dest_dir_0): + return True + + # Read up to 16 MB as a safety net. + file_header += util.read_complement(file_path, file_header) + + # Stop if the file was cut off, preventing parsing exceptions. + if len(file_header) == 16777216: + return True + + # Run extractor function, and stop if it was not successful. + if not func(file_header, width, height, dest_dir_0): + return True + + # Remove original file. + try: + os.remove(file_path) + except: + pass + + return dest_dir_0 + + def _convert_epav1(self, file_data, width, height, dest_dir_0): + # Write file type as a header. + self._write_type(dest_dir_0, 'EPA v1') + + # Fill color map. + color_map = [] + index = 2 + for x in range(width * height): + # Read character cell color information. + color = file_data[index] + index += 1 + + # Save RGB background and foreground color. + color_map.append((self._vga_palette[color & 0x0f], self._vga_palette[color >> 4])) + + # Create output image. + image = PIL.Image.new('RGB', (width * 8, height * 14)) + + # Read image data. + for y in range(height): + for x in range(width): + # Determine foreground/background colors for this character cell. + fg_color, bg_color = color_map.pop(0) + + # Read the 14 row bitmaps. + for cy in range(14): + # Stop row bitmap processing if the file is truncated. + if index >= len(file_data): + width = height = 0 + break + + # Read bitmap byte. + bitmap = file_data[index] + index += 1 + + # Parse the foreground/background bitmap. + for cx in range(8): + # Determine palette color and write pixel. + color = (bitmap & (1 << cx)) and fg_color or bg_color + image.putpixel(((x * 8) + (7 - cx), (y * 14) + cy), + ((color >> 16) & 0xff, (color >> 8) & 0xff, color & 0xff)) + + # Stop column processing if the file is truncated. + if width == 0 or len(color_map) == 0: + break + + # Stop row processing if the file is truncated. + if height == 0 or len(color_map) == 0: + break + + # Save output image. + return self._save_image(image, dest_dir_0) + + def _convert_epav2_4b(self, file_data, width, height, dest_dir_0): + # Read palette if the file contains one, while + # writing the file type as a header accordingly. + palette = self._read_palette_epav2(file_data, -52, False) + if palette: + self._write_type(dest_dir_0, 'EPA v2 4-bit (with palette)') + else: + self._write_type(dest_dir_0, 'EPA v2 4-bit (without palette)') + + # Use standard EGA palette. + palette = self._vga_palette + + # Create output image. + image = PIL.Image.new('RGB', (width, height)) + + # Read image data. + index = 8 + bitmap_width = math.ceil(width / 8) + for y in range(height): + for x in range(bitmap_width): + # Stop column processing if the file is truncated. + if index + x + (bitmap_width * 3) >= len(file_data): + index = 0 + break + + for cx in range(8): + # Skip this pixel if it's outside the image width. + output_x = (x * 8) + cx + if output_x >= width: + continue + + # Read color values. Each bit is stored in a separate bitmap. + pixel = (file_data[index + x] >> (7 - cx)) & 1 + pixel |= ((file_data[index + x + bitmap_width] >> (7 - cx)) & 1) << 1 + pixel |= ((file_data[index + x + (bitmap_width * 2)] >> (7 - cx)) & 1) << 2 + pixel |= ((file_data[index + x + (bitmap_width * 3)] >> (7 - cx)) & 1) << 3 + + # Determine palette color and write pixel. + if pixel > len(palette): + pixel = len(palette) - 1 + color = palette[pixel] + image.putpixel((output_x, y), + ((color >> 16) & 0xff, (color >> 8) & 0xff, color & 0xff)) + + # Stop row processing if the file is truncated. + if index == 0: + break + + # Move on to the next set of 4 bitmaps. + index += bitmap_width * 4 + + # Save output image. + return self._save_image(image, dest_dir_0) + + def _convert_epav2_8b(self, file_data, width, height, dest_dir_0): + # Read palette if the file contains one, while + # writing the file type as a header accordingly. + palette = self._read_palette_epav2(file_data, -772) + if palette: + self._write_type(dest_dir_0, 'EPA v2 8-bit (with palette)') + else: + self._write_type(dest_dir_0, 'EPA v2 8-bit (without palette)') + + # Use standard VGA palette. + palette = self._vga_palette + + # Create output image. + image = PIL.Image.new('RGB', (width, height)) + + # Read image data. + index = 8 + for y in range(height): + for x in range(width): + # Read pixel. + pixel = file_data[index] + index += 1 + + # Determine palette color and write pixel. + if pixel > len(palette): + pixel = len(palette) - 1 + color = palette[pixel] + image.putpixel((x, y), + ((color >> 16) & 0xff, (color >> 8) & 0xff, color & 0xff)) + + # Save output image. + return self._save_image(image, dest_dir_0) + + def _convert_pil(self, file_data, width, height, dest_dir_0): + # Load image. + try: + image = PIL.Image.open(io.BytesIO(file_data)) + if not image: + raise Exception('no image') + except: + return False + + # Write the file type as a header. + self._write_type(dest_dir_0, image.format) + + # Save output image. + return self._save_image(image, dest_dir_0) + + def _read_palette_epav2(self, file_data, rgbs_offset, rgb=True): + # Stop if this file has no palette. + if file_data[rgbs_offset:rgbs_offset + 4] != b'RGB ': + return None + + # Read 6-bit palette entries, while converting to 8-bit. + palette = [] + index = rgbs_offset + 4 + while index < 0: + palette.append((file_data[index] << (rgb and 18 or 2)) | + (file_data[index + 1] << 10) | + (file_data[index + 2] << (rgb and 2 or 18))) + index += 3 + + return palette + + def _save_image(self, image, dest_dir_0): + # Save image to destination directory. + image_path = os.path.join(dest_dir_0, 'image.png') + try: + image.save(image_path) + return True + except: + # Clean up. + try: + os.remove(image_path) + except: + pass + try: + os.remove(os.path.join(dest_dir_0, ':header:')) + except: + pass + return False + + def _write_type(self, dest_dir_0, identifier): + f = open(os.path.join(dest_dir_0, ':header:'), 'w') + f.write(identifier) + f.close() + +class FATExtractor(ArchiveExtractor): + """Extract FAT disk images.""" + + def extract(self, file_path, file_header, dest_dir, dest_dir_0): + # Determine if this is a FAT filesystem. + + # Stop if this file is too small. + if len(file_header) < 512: + return False + + # Stop if there's no bootstrap jump. + if (file_header[0] != 0xEB or file_header[2] != 0x90) and file_header[0] != 0xE9: + return False + + # Stop if there's no media descriptor type. + if file_header[21] < 0xF0: + return False + + # Extract this as an archive. + return self._extract_archive(file_path, dest_dir) + + +class HexExtractor(ArchiveExtractor): + """Extract Intel HEX format ROMs.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._hex_start_pattern = re.compile(b''':(?:[0-9A-F]{2}){1,}\\r?\\n''') + self._hex_eof_pattern = re.compile(b''':00[0-9A-F]{4}01[0-9A-F]{2}\\r?\\n?$''') + self._hex_data_pattern = re.compile(b''':([0-9A-F]{2})([0-9A-F]{4})00([0-9A-F]{2,})\\r?\\n''') + + def extract(self, file_path, file_header, dest_dir, dest_dir_0): + # Stop if this is not a HEX. + if not self._hex_start_pattern.match(file_header): + return False + + # Read up to 16 MB as a safety net. + file_header += util.read_complement(file_path, file_header) + + # Stop if no EOF was found. + if not self._hex_eof_pattern.search(file_header): + return False + + # Create destination directory and stop if it couldn't be created. + if not util.try_makedirs(dest_dir): + return True + + # Create destination file. + f = open(os.path.join(dest_dir, 'intelhex.bin'), 'wb') + + # Extract data blocks. + for match in self._hex_data_pattern.finditer(file_header): + length, addr, data = match.groups() + + # Move on to the next block if the data length doesn't match. + if ((len(data) >> 1) - 1) != int(length, 16): + continue + + # Decode data. + data = codecs.decode(data[:-2], 'hex') + + # Write data block at the specified address. + f.seek(int(addr, 16)) + f.write(data) + + # Finish destination file. + f.close() + + # Create dummy header file. + open(os.path.join(dest_dir, ':header:'), 'wb').close() + + # Remove file. + try: + os.remove(file_path) + except: + pass + + # Return destination directory. + return dest_dir + + +class ISOExtractor(ArchiveExtractor): + """Extract ISO 9660 images.""" + + def extract(self, file_path, file_header, dest_dir, dest_dir_0): + # Stop if this is not an ISO. + if file_header[32769:32775] != 'CD001\x01': + return False + + # Extract this as an archive. + return self._extract_archive(file_path, dest_dir) + + +class IntelExtractor(Extractor): + """Extract Intel multi-part BIOS updates.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self._part_extensions = [] + for base_extension in ('bio', 'bbo'): # potential extensions for main BIOS part files + # Produce all possible variants (ext, ex1-ex9, exa-) for this extension. + extension_chars = base_extension[-1] + '123456789abcdefghijklm' + for x in range(len(extension_chars)): + extension = base_extension[:2] + extension_chars[x] + # Every pair should be inverted. + if (x % 2) == 0: + self._part_extensions.append(extension) + else: + self._part_extensions.insert(len(self._part_extensions) - 1, extension) + + def extract(self, file_path, file_header, dest_dir, dest_dir_0): + # Stop if this is not an Intel BIOS update. + if file_header[90:95] != b'FLASH' and file_header[602:607] != b'FLASH': + return False + + # Stop if this file has no extension. + file_name = os.path.basename(file_path) + if file_name[-4:-3] != '.': + return True + + # Stop if this file is too small (may be a copied header). + if len(file_header) <= 608: + return True + + # Stop if this file has an irrelevant extension. + file_name_lower = file_name.lower() + if file_name_lower[-3:] not in self._part_extensions: + # Remove file. + try: + os.remove(file_path) + except: + import traceback + traceback.print_exc() + pass + return True + + # Scan this directory's contents. + dir_path = os.path.dirname(file_path) + dir_files = {} + for dir_file_name in os.listdir(dir_path): + dir_file_name_lower = dir_file_name.lower() + dir_file_path = os.path.join(dir_path, dir_file_name) + + # Remove irrelevant files which lack an Intel header. + if dir_file_name_lower[-4:] in ('.lng', '.rcv', '.rec'): + try: + os.remove(dir_file_path) + except: + pass + continue + + # Add to the file list. + dir_files[dir_file_name_lower] = dir_file_path + + # Try to find matching parts in the same directory. + file_name_base = file_name[:-3] + file_name_base_lower = file_name_lower[:-3] + found_parts = [] + largest_part_size = 0 + + # Try all part extensions. + for extension in self._part_extensions: + # Check if this part exists in the directory. + found_part_path = dir_files.get(file_name_base_lower + extension, None) + if found_part_path: + # Get the part's file size. + try: + found_part_size = os.path.getsize(found_part_path) + except: + continue + + # Add it to the part list. + found_parts.append((found_part_path, found_part_size)) + + # Update the largest part size. + if found_part_size > largest_part_size: + largest_part_size = found_part_size + + # Stop if no parts were found somehow. + if len(found_parts) == 0: + return True + + # Create destination directory and stop if it couldn't be created. + if not util.try_makedirs(dest_dir): + return True + + # Copy the header to a file, so we can still get the BIOS version from + # it in case the payload somehow cannot be decompressed successfully. + out_f = open(os.path.join(dest_dir, ':header:'), 'wb') + start_offset = (file_header[90:95] != b'FLASH') and 512 or 0 + part_data_offset = (file_header[start_offset + 127:start_offset + 128] == b'\x00') and 128 or 160 + out_f.write(file_header[start_offset:start_offset + part_data_offset]) + out_f.close() + + # Create destination file. + out_f = open(os.path.join(dest_dir, 'intel.bin'), 'wb') + + # Create a copy of the found parts list for concurrent modification. + found_parts_copy = found_parts[::] + + # Copy parts to the destination file. + while len(found_parts_copy) > 0: + found_part_path, found_part_size = found_parts_copy.pop(0) + + try: + f = open(found_part_path, 'rb') + + # Skip header. + file_header = f.read(128) + if file_header[127:128] != b'\x00': + f.seek(160) + + # Copy data. + part_data = b' ' + while part_data: + part_data = f.read(1048576) + out_f.write(part_data) + + # Write padding. + padding_size = largest_part_size - found_part_size + while padding_size > 0: + out_f.write(b'\xFF' * min(padding_size, 1048576)) + padding_size -= 1048576 + + f.close() + except: + import traceback + traceback.print_exc() + pass + + # Remove this part. + try: + os.remove(found_part_path) + except: + pass + + # Finish destination file. + out_f.close() + + # Return destination directory. + return dest_dir + + +class InterleaveExtractor(Extractor): + """Detect and de-interleave any interleaved ROMs.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # List of strings an interleaved BIOS might contain once deinterleaved. + self._deinterleaved_strings = [ + b'ALL RIGHTS RESERVED', + b'All Rights Reserved', + b'Illegal Interrupt No.', + b'Phoenix Technologies Ltd.', # Phoenix + b' COPR. IBM 198', # IBM and Tandon + b'memory (parity error)', + b'Copyright COMPAQ Computer Corporation', # Compaq + ] + + # Interleave the strings. + self._interleaved_odd = [string[1::2] for string in self._deinterleaved_strings] + self._interleaved_even = [string[::2] for string in self._deinterleaved_strings] + + def extract(self, file_path, file_header, dest_dir, dest_dir_0): + # Stop if this was already deinterleaved. + dir_path = os.path.dirname(file_path) + if os.path.exists(os.path.join(dir_path, ':combined:')): + return False + + # Read up to 128 KB. + file_header += util.read_complement(file_path, file_header, max_size=131072) + + # Check for interleaved strings. + interleaved = 0 + for string in self._interleaved_odd: + if string in file_header: + interleaved = 1 + break + if not interleaved: + for string in self._interleaved_even: + if string in file_header: + interleaved = 2 + break + + # Stop if not interleaved. + if not interleaved: + return False + + # Try to find this file's counterpart in the directory. + counterpart_candidates = [] + file_name = os.path.basename(file_path) + file_size = os.path.getsize(file_path) + for file_in_dir in os.listdir(dir_path): + # Skip this file. + if file_in_dir == file_name: + continue + + # Skip any files which differ in size. + file_in_dir_path = os.path.join(dir_path, file_in_dir) + if os.path.getsize(file_in_dir_path) != file_size: + continue + + # Read up to 128 KB. + file_in_dir_data = util.read_complement(file_in_dir_path, max_size=131072) + if not file_in_dir_data: + continue + + # Determine if this is a counterpart. + counterpart = False + if interleaved == 1: + for string in self._interleaved_even: + if string in file_in_dir_data: + counterpart = True + break + elif interleaved == 2: + for string in self._interleaved_odd: + if string in file_in_dir_data: + counterpart = True + break + + # Move on if this is not a counterpart. + if not counterpart: + continue + + # Add to the list of candidates. + counterpart_candidates.append(file_in_dir) + + # Remove any file extension for comparison purposes. + file_name_base = util.remove_extension(file_name) + + # If we have more than one candidate, try to narrow down by filename + # similarity, removing one letter at a time. Our ultimate goal is for + # the copied candidates list to be narrowed down to one candidate. + limit = len(file_name_base) + candidates_copy = counterpart_candidates # not a copy, but if we have one candidate already, this will do + while len(candidates_copy) != 1 and limit > 0: + # Copy the candidates list. + candidates_copy = counterpart_candidates[::] + + # Compare all candidates. + for candidate in counterpart_candidates: + # Remove candidate if the file name (up to the limit) doesn't match. + candidate_base = util.remove_extension(candidate) + if candidate_base[:limit] != file_name_base[:limit]: + candidates_copy.remove(candidate) + + # Remove next letter. + limit -= 1 + + # Stop if we have no candidates left. + if limit == 0 or len(candidates_copy) < 1: + return False + counterpart_path = os.path.join(dir_path, candidates_copy[0]) + + # Create destination directory and stop if it couldn't be created. + if not util.try_makedirs(dest_dir): + return True + + # Deinterleave in both directions, as some pairs may contain the + # same interleaved string on both parts. Also save interleaved + # copies, as some pairs deinterleave incorrectly. + f_ia = open(file_path, 'rb') + f_ib = open(counterpart_path, 'rb') + f_oa = open(os.path.join(dest_dir, 'deinterleaved_a.bin'), 'wb') + f_ob = open(os.path.join(dest_dir, 'deinterleaved_b.bin'), 'wb') + f_ca = open(os.path.join(dest_dir, 'interleaved_a.bin'), 'wb') + f_cb = open(os.path.join(dest_dir, 'interleaved_b.bin'), 'wb') + data = bytearray(1048576) + write_len = 1 + while True: + # Read both parts. + data_a = f_ia.read(len(data)) + data_b = f_ib.read(len(data)) + write_len = min(len(data_a), len(data_b)) + + # Stop if we've read everything. + if not write_len: + break + + # Set slice lengths. + data_a_slice = len(data_a) * 2 + data_b_slice = len(data_b) * 2 + write_len *= 2 + + # Write in one direction. + data[:data_a_slice:2] = data_a + data[1:data_b_slice:2] = data_b + f_oa.write(data[:write_len]) + + # Write in the other direction. + data[:data_b_slice:2] = data_b + data[1:data_a_slice:2] = data_a + f_ob.write(data[:write_len]) + + # Write interleaved copies. + f_ca.write(data_a) + f_cb.write(data_b) + f_ia.close() + f_ib.close() + f_oa.close() + f_ob.close() + f_ca.close() + f_cb.close() + + # Remove both files. + try: + os.remove(file_path) + except: + pass + try: + os.remove(counterpart_path) + except: + pass + + # Create flag file on the destination directory for the analyzer to + # treat it as a big chunk of data, combining both deinterleave directions. + open(os.path.join(dest_dir, ':combined:'), 'wb').close() + + # Return destination directory path. + return dest_dir + + +class OMFExtractor(ArchiveExtractor): + """Extract Fujitsu/ICL OMF BIOS files.""" + + def extract(self, file_path, file_header, dest_dir, dest_dir_0): + # Stop if this is not an OMF file. + if file_header[0:1] != b'\xB2': + return False + + # Stop if this file is too small (may be a copied header). + if len(file_header) <= 112: + return False + + # Stop if the OMF payload is incomplete or the sizes are invalid. + # Should catch other files which start with 0xB2. + file_size = os.path.getsize(file_path) + if struct.unpack(' file_size: + return False + elif struct.unpack(' file_size - 112: + return False + + # Create destination directory and stop if it couldn't be created. + if not util.try_makedirs(dest_dir): + return True + + # Separate file and header. + try: + # Open OMF file. + in_f = open(file_path, 'rb') + + # Copy header. + out_f = open(os.path.join(dest_dir, ':header:'), 'wb') + out_f.write(in_f.read(112)) + out_f.close() + + # Copy payload. + out_f = open(os.path.join(dest_dir, 'omf.bin'), 'wb') + data = b' ' + while data: + data = in_f.read(1048576) + out_f.write(data) + out_f.close() + + # Remove OMF file. + in_f.close() + os.remove(file_path) + except: + pass + + # Return destination directory path. + return dest_dir + + +class PEExtractor(ArchiveExtractor): + """Extract PE executables.""" + + def extract(self, file_path, file_header, dest_dir, dest_dir_0): + # Determine if this is a PE/MZ. + # The MZ signature is way too short. Check extension as well to be safe. + if file_header[:2] != b'MZ' or file_path[-4:].lower() not in ('.exe', '.dll', '.scr'): + return False + + # Read up to 16 MB as a safety net. + file_header += util.read_complement(file_path, file_header) + + # Extract embedded ROM from AMIBIOS 8 AFUWIN. + if b'Software\\AMI\\AFUWIN' in file_header: + afuwin_result = self._extract_afuwin(file_path, file_header, dest_dir) + if afuwin_result: + return afuwin_result + + # Extract this as an archive. + return self._extract_archive(file_path, dest_dir) + + def _extract_afuwin(self, file_path, file_header, dest_dir): + # Stop if there's no embedded ROM. + rom_start_idx = file_header.find(b'_EMBEDDED_ROM_START_\x00') + if rom_start_idx == -1: + return False + rom_end_idx = file_header.find(b'_EMBEDDED_ROM_END_\x00', rom_start_idx) + if rom_end_idx == -1: + return False + + # Create destination directory and stop if it couldn't be created. + if not util.try_makedirs(dest_dir): + return True + + # Write area before and after the embedded ROM as a header. + try: + f = open(os.path.join(dest_dir, ':header:'), 'wb') + f.write(file_header[:rom_start_idx]) + f.write(file_header[rom_end_idx + 19:]) + f.close() + except: + pass + + # Extract ROM. + try: + f = open(os.path.join(dest_dir, 'afuwin.bin'), 'wb') + f.write(file_header[rom_start_idx + 21:rom_end_idx]) + f.close() + except: + return True + + # Remove file. + try: + os.remove(file_path) + except: + pass + + # Return destination directory path. + return dest_dir + + +class TarExtractor(ArchiveExtractor): + """Extract tar archives.""" + + def extract(self, file_path, file_header, dest_dir, dest_dir_0): + # Determine if this is a tar archive. + for offset in (0, 257): + for pattern in ( + b'ustar\x00\x00\x00', # POSIX tar + b'ustar\x20\x20\x00', # GNU tar + b'ustar\x00\x30\x30', # some other form of tar? + ): + if file_header[offset:offset + len(pattern)] == pattern: + # Extract this as an archive. + return self._extract_archive(file_path, dest_dir) + + # Not a tar archive. + return False + + +class UEFIExtractor(Extractor): + """Extract UEFI BIOS images.""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # Ignore padding and microcode files. + self._invalid_file_pattern = re.compile('''(?:Padding|Microcode)_''') + + # Path to the UEFIExtract utility. + self._uefiextract_path = os.path.abspath('UEFIExtract') + if not os.path.exists(self._uefiextract_path): + self._uefiextract_path = None + + # /dev/null handle for suppressing output. + self._devnull = open(os.devnull, 'wb') + + def extract(self, file_path, file_header, dest_dir, dest_dir_0): + # Stop if UEFIExtract is not available. + if not self._uefiextract_path: + return False + + # Start UEFIExtract process. + file_path_abs = os.path.abspath(file_path) + try: + subprocess.run([self._uefiextract_path, file_path_abs, 'unpack'], timeout=30, stdout=self._devnull, stderr=subprocess.STDOUT) + except: + pass + + # Remove report file. + try: + os.remove(file_path_abs + '.report.txt') + except: + pass + + # Stop if the dump directory was somehow not created. + dump_dir = file_path_abs + '.dump' + if not os.path.isdir(dump_dir): + try: + os.remove(report_file) + except: + pass + return False + + # Move dump directory over to the destination. + try: + # Move within the same filesystem. + os.rename(dump_dir, dest_dir_0) + if not os.path.isdir(dest_dir_0): + raise Exception() + except: + try: + # Move across filesystems. + shutil.move(dump_dir, dest_dir_0) + if not os.path.isdir(dest_dir_0): + raise Exception() + except: + # Remove left-overs and stop if the move failed. + for to_remove in (dump_dir, dest_dir_0): + try: + shutil.rmtree(to_remove) + except: + pass + return True + + # Go through the dump, counting valid .bin files and removing .txt ones. + valid_file_count = 0 + for scan_file_name in os.listdir(dest_dir_0): + if scan_file_name[-4:] == '.bin': + # Non-UEFI images will only produce padding and microcode files. + if not self._invalid_file_pattern.match(scan_file_name): + valid_file_count += 1 + else: + try: + os.remove(os.path.join(dest_dir_0, scan_file_name)) + except: + pass + + # Assume failure if nothing valid was extracted. + # Actual UEFI images produce thousands of files, so 5 is a safe barrier. + if valid_file_count < 1: + return False + elif valid_file_count < 5: + # Remove left-overs and stop. + try: + shutil.rmtree(dest_dir_0) + except: + pass + return False + + # Create header file with a dummy string, to tell the analyzer + # this BIOS went through this extractor. + f = open(os.path.join(dest_dir_0, ':header:'), 'wb') + f.write(b'\x00\xFFUEFIExtract\xFF\x00') + f.close() + + # Create flag file on the destination directory for the analyzer to + # treat it as a big chunk of data. + open(os.path.join(dest_dir_0, ':combined:'), 'wb').close() + + # Remove BIOS file. + try: + os.remove(file_path) + except: + pass + + # Return destination directory path. + return dest_dir_0 diff --git a/biostools/formatters.py b/biostools/formatters.py new file mode 100644 index 0000000..ab54043 --- /dev/null +++ b/biostools/formatters.py @@ -0,0 +1,199 @@ +#!/usr/bin/python3 +# +# 86Box A hypervisor and IBM PC system emulator that specializes in +# running old operating systems and software designed for IBM +# PC systems and compatibles from 1981 through fairly recent +# system designs based on the PCI bus. +# +# This file is part of the 86Box BIOS Tools distribution. +# +# Data output formatting classes. +# +# +# +# Authors: RichardG, +# +# Copyright 2021 RichardG. +# +import json, os, re + +class Formatter: + def __init__(self, out_file, options, args): + """Initialize a formatter with the given output file and options.""" + + self.out_file = out_file + self.options = options + self.args = args + + self.array = options.get('array') + + def begin(self): + """Begin the formatter's output.""" + pass + + def end(self): + """End the formatter's output.""" + pass + + def get_url(self, columns): + """Returns the download URL for a given row.""" + + # Start building the URL. + link_url = columns[0] + + # Remove www from original path. + if columns[0][:4] == 'www.': + columns[0] = columns[0][4:] + + # Make sure the components are slash-separated. + if os.sep != '/': + link_url = link_url.replace(os.sep, '/') + + # Stop at the first decompression layer. + archive_index = link_url.find(':/') + if archive_index > -1: + link_url = link_url[:archive_index] + + # Encode the URL. + link_url = link_url.replace('#', '%23') + link_url = re.sub('''\?(^[/]*)/''', '%3F\\1/', link_url) + + # Stop if the URL is not valid. + slash_index = link_url.find('/') + if slash_index == -1 or '.' not in link_url[:slash_index]: + return '' + + return 'http://' + link_url + + def join_if_required(self, c, l): + """Returns just l if array mode is enabled, or l joined by c otherwise.""" + if self.array: + return l + else: + return c.join(l) + + def output_headers(self, columns, do_output): + """Output column headers.""" + if do_output: + self.output_row(columns) + + def output_row(self, columns): + """Output an item.""" + raise NotImplementedError() + + def split_if_required(self, c, s): + """Returns s split by c if array mode is enabled, or just s otherwise.""" + if self.array: + return s.split(c) + else: + return s + + +class XSVFormatter(Formatter): + def __init__(self, delimiter, *args, **kwargs): + super().__init__(*args, **kwargs) + + # Not supported here. + self.array = False + + self.delimiter = delimiter + + if self.options.get('hyperlink'): + # Get the localized HYPERLINK formula name if specified. + if self.args: + self.hyperlink = self.args[0] + else: + self.hyperlink = 'HYPERLINK' + else: + self.hyperlink = None + + def output_row(self, columns): + # Add hyperlink if requested. + output = '' + if self.hyperlink: + link_url = self.get_url(columns) + if link_url: + link_prefix = '=' + self.hyperlink + '(""' + link_suffix = '""' + self.delimiter + '""\U0001F53D"")' # down arrow emoji + + # Build and output the final link, accounting for Excel's column size limit. + link = link_prefix + link_url[:256 - len(link_prefix) - len(link_suffix)] + link_suffix + output += '"' + link + '"' + else: + output += '""' + + # Add fields. + for field in columns: + if output: + output += self.delimiter + output += '"' + # Account for Excel's column size limit and lack of linebreak support. + output += field.replace('\n', ' - ').replace('"', '""')[:256] + output += '"' + + # Add linebreak. + output += '\n' + + # Write row. + self.out_file.write(output) + + +class JSONFormatter(Formatter): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self.hyperlink = self.options.get('hyperlink') + + def begin(self): + # Start root list. + self.out_file.write('[') + self.first_row = True + + def end(self): + # End root list. + self.out_file.write(']\n') + + def get_json_object(self, columns): + """Returns the JSON object to be output for this row.""" + raise NotImplementedError() + + def output_headers(self, columns, do_output): + # Insert URL column if requested. + hyperlink = self.hyperlink + if hyperlink: + columns.insert(0, 'URL') + + # Prevent output_row from adding a null header. + self.hyperlink = False + super().output_headers(columns, do_output) + self.hyperlink = hyperlink + + def output_row(self, columns): + # Add URL if requested. + if self.hyperlink: + columns.insert(0, self.get_url(columns)) + + # Write row. + obj = self.get_json_object(columns) + if obj: + if self.first_row: + self.first_row = False + else: + self.out_file.write('\n,') + self.out_file.write(json.dumps(obj)) + +class JSONObjectFormatter(JSONFormatter): + def get_json_object(self, columns): + return {self.headers[column_index]: columns[column_index] for column_index in range(len(columns)) if columns[column_index]} + + def output_headers(self, columns, do_output): + # Insert URL column if requested. + if self.hyperlink: + columns.insert(0, 'URL') + + # Save column headers for later. + self.headers = [column.lower().replace(' ', '').replace('-', '') for column in columns] + +class JSONTableFormatter(JSONFormatter): + def get_json_object(self, columns): + return columns diff --git a/biostools/pciutil.py b/biostools/pciutil.py new file mode 100644 index 0000000..2dff260 --- /dev/null +++ b/biostools/pciutil.py @@ -0,0 +1,257 @@ +#!/usr/bin/python3 +# +# 86Box A hypervisor and IBM PC system emulator that specializes in +# running old operating systems and software designed for IBM +# PC systems and compatibles from 1981 through fairly recent +# system designs based on the PCI bus. +# +# This file is part of the 86Box BIOS Tools distribution. +# +# Utility library for identifying PCI device/vendor IDs. +# +# +# +# Authors: RichardG, +# +# Copyright 2021 RichardG. +# +import io, re, urllib.request + +clean_device_abbr = [ + # Generic patterns to catch extended abbreviations: "Abbreviated Terms (AT)" + ('([A-Z])[^\s]+ ([A-Z])[^\s]+ (?:\(|\[|\{|/)\\2\\3(?:$|\)|\]|\})', '\\2\\3'), + ('([A-Z])[^\s]+ ([A-Z])[^\s]+ ([A-Z])[^\s]+ (?:\(|\[|\{|/)\\2\\3\\4(?:$|\)|\]|\})', '\\2\\3\\4'), + ('([A-Z])[^\s]+ ([A-Z])[^\s]+ ([A-Z])[^\s]+ ([A-Z])[^\s]+ (?:\(|\[|\{|/)\\2\\3\\4\\5(?:$|\)|\]|\})', '\\2\\3\\4\\5'), + + # Manual patterns + ('100Base-TX?', 'FE'), + ('1000Base-T', 'GbE'), + ('Accelerat(?:ion|or)', 'Accel.'), + ('Alert on LAN', 'AoL'), + ('\((.+) applications?\)', '(\\2)'), # 8086:105e + ('Chipset Family', 'Chipset'), + ('Chipset Graphics', 'iGPU'), + ('Connection', 'Conn.'), + ('DECchip', ''), + ('Dual (Lane|Port)', '2-\\2'), + ('Fast Ethernet', 'FE'), + ('Fibre Channel', 'FC'), + ('Function', 'Func.'), + ('([0-9]{1,3})G Ethernet', '\\2GbE'), + ('(?:([0-9]{1,3}) ?)?(?:G(?:bit|ig) Ethernet|GbE)', '\\2GbE'), + ('Graphics Processor', 'GPU'), + ('High Definition Audio', 'HDA'), + ('Host Adapter', 'HBA'), + ('Host Bus Adapter', 'HBA'), + ('Host Controller', 'HC'), + ('Input/Output', 'I/O'), + ('Integrated ([^\s]+) (?:Graphics|GPU)', '\\2 iGPU'), # VIA CLE266 + ('Integrated (?:Graphics|GPU)', 'iGPU'), + ('([0-9]) (lane|port)', '\\2-\\3'), + ('Local Area Network', 'LAN'), + ('Low Pin Count', 'LPC'), + ('Memory Controller Hub', 'MCH'), + ('Network (?:Interface )?(?:Adapter|Card|Controller)', 'NIC'), + ('NVM Express', 'NVMe'), + ('Parallel ATA', 'PATA'), + ('PCI(?:-E| Express)', 'PCIe'), + ('([^- ]+)[- ]to[- ]([^- ]+)', '\\2-\\3'), + ('Platform Controller Hub', 'PCH'), + ('Processor Graphics', 'iGPU'), + ('Quad (Lane|Port)', '4-\\2'), + ('Serial ATA', 'SATA'), + ('Serial Attached SCSI', 'SAS'), + ('Single (Lane|Port)', '1-\\2'), + ('USB ?([0-9])\\.0', 'USB\\2'), + ('USB ?([0-9])\\.[0-9] ?Gen([0-9x]+)', 'USB\\2.\\3'), + ('USB ?([0-9]\\.[0-9])', 'USB\\2'), + ('Virtual Machine', 'VM'), + ('Wake on LAN', 'WoL'), + ('Wireless LAN', 'WLAN'), + + # Generic pattern to remove duplicate abbreviations: "AT (AT)" + ('([^ \(\[\{/]+) (?: |\(|\[|\{|/)\\2(?: |\)|\]|\})', '\\2'), +] +clean_device_bit_pattern = re.compile('''( |^|\(|\[|\{|/)(?:([0-9]{1,4}) )?(?:(K)(?:ilo)?|(M)(?:ega)?|(G)(?:iga)?)bit( |$|\)|\]|\})''', re.I) +clean_device_suffix_pattern = re.compile(''' (?:Adapter|Card|Device|(?:Host )?Controller)( (?: [0-9#]+)?|$|\)|\]|\})''', re.I) +clean_vendor_abbr_pattern = re.compile(''' \[([^\]]+)\]''') +clean_vendor_suffix_pattern = re.compile(''' (?:Semiconductors?|(?:Micro)?electronics?|Interactive|Technolog(?:y|ies)|(?:Micro)?systems|Computer(?: works)?|Products|Group|and subsidiaries|of(?: America)?|Co(?:rp(?:oration)?|mpany)?|Inc|LLC|Ltd|GmbH|AB|AG|SA|(?:\(|\[|\{).*)$''', re.I) +clean_vendor_force = { + 'National Semiconductor Corporation': 'NSC', +} +clean_vendor_final = { + 'Chips and': 'C&T', + 'Digital Equipment': 'DEC', + 'Microchip Technology/SMSC': 'Microchip/SMSC', + 'NVidia/SGS Thomson': 'NVIDIA/ST', + 'S3 Graphics': 'S3', + 'Silicon Integrated': 'SiS', + 'Silicon Motion': 'SMI', + 'STMicroelectronics': 'ST', + 'Texas Instruments': 'TI', + 'VMWare': 'VMware', +} + +_clean_device_abbr_cache = [] +_pci_vendors = {} +_pci_devices = {} +_pci_subdevices = {} +_pci_classes = {} +_pci_subclasses = {} +_pci_progifs = {} + +def clean_device(device, vendor=None): + """Make a device name more compact if possible.""" + + # Generate pattern cache if required. + if not _clean_device_abbr_cache: + for pattern, replace in clean_device_abbr: + _clean_device_abbr_cache.append(( + re.compile('''(?P |^|\(|\[|\{|/)''' + pattern + '''(?P |$|\)|\]|\})''', re.I), + '\\g' + replace + '\\g', + )) + + # Apply patterns. + device = clean_device_bit_pattern.sub('\\1\\2\\3\\4\\5bit\\6', device) + for pattern, replace in _clean_device_abbr_cache: + device = pattern.sub(replace, device) + device = clean_device_suffix_pattern.sub('\\1', device) + + # Remove duplicate vendor ID. + if vendor and device[:len(vendor)] == vendor: + device = device[len(vendor):] + + # Remove duplicate spaces. + return ' '.join(device.split()) + +def clean_vendor(vendor): + """Make a vendor name more compact if possible.""" + + # Apply force table. + vendor_force = clean_vendor_force.get(vendor, None) + if vendor_force: + return vendor_force + + # Use an abbreviation if the name already includes it. + vendor = vendor.replace(' / ', '/') + match = clean_vendor_abbr_pattern.search(vendor) + if match: + return match.group(1) + + # Apply patterns. + match = True + while match: + vendor = vendor.rstrip(' ,.') + match = clean_vendor_suffix_pattern.search(vendor) + if match: + vendor = vendor[:match.start()] + + # Apply final cleanup table. + vendor = clean_vendor_final.get(vendor, vendor) + + # Remove duplicate spaces. + return ' '.join(vendor.split()) + +def download_compressed(url, skip_exts=[]): + """Downloads a file which may be available in compressed versions.""" + + # Try all files. + for ext, module_name in (('.xz', 'lzma'), ('.bz2', 'bz2'), ('.gz', 'gzip'), (None, None)): + # Skip extension if requested. + if ext in skip_exts: + continue + + # Import decompression module if required. + if module_name: + try: + module = __import__(module_name) + except: + continue + + # Connect to URL. + try: + f = urllib.request.urlopen(url + (ext or ''), timeout=30) + except: + # Move on to the next file if the connection failed. + continue + + # If this is uncompressed, return the file handle as is. + if not module_name: + return f + + # Decompress data into a BytesIO object. + try: + return io.BytesIO(module.decompress(f.read())) + except: + # Move on to the next file if decompression failed. + continue + + # No success with any files. + raise FileNotFoundError('All attempts to download "{0}" and variants thereof have failed'.format(url)) + +def get_pci_id(vendor_id, device_id): + """Get the PCI device vendor and name for vendor_id and device_id.""" + + # Load PCI ID database if required. + if not _pci_vendors: + load_pci_db() + + # Get identification. + vendor = _pci_vendors.get(vendor_id, '').strip() + return vendor or '[Unknown]', _pci_devices.get((vendor_id << 16) | device_id, vendor and '[Unknown]' or '').strip() + +def load_pci_db(): + """Loads PCI ID database from disk or the website.""" + + # Try loading from disk or the website. + try: + f = open('/usr/share/misc/pci.ids', 'rb') + except: + try: + f = download_compressed('https://pci-ids.ucw.cz/v2.2/pci.ids', ['.xz']) + except: + # No sources available. + return + + vendor = 0 + class_num = subclass_num = None + for line in f: + if len(line) < 2 or line[0] == 35: + continue + elif line[0] == 67: # class + class_num = int(line[2:4], 16) + _pci_classes[class_num] = line[6:-1].decode('utf8', 'ignore') + elif class_num != None: # subclass/progif + if line[1] != 9: # subclass + subclass_num = (class_num << 8) | int(line[1:3], 16) + _pci_subclasses[subclass_num] = line[5:-1].decode('utf8', 'ignore') + else: # progif + progif_num = (subclass_num << 8) | int(line[2:4], 16) + _pci_progifs[progif_num] = line[6:-1].decode('utf8', 'ignore') + elif line[0] != 9: # vendor + vendor = int(line[:4], 16) + _pci_vendors[vendor] = line[6:-1].decode('utf8', 'ignore') + elif line[1] != 9: # device + device = (vendor << 16) | int(line[1:5], 16) + _pci_devices[device] = line[7:-1].decode('utf8', 'ignore') + else: # subdevice + subdevice = (int(line[2:6], 16) << 16) | int(line[7:11], 16) + if device not in _pci_subdevices: + _pci_subdevices[device] = {} + _pci_subdevices[device][subdevice] = line[13:-1].decode('utf8', 'ignore') + + f.close() + +# Debugging feature. +if __name__ == '__main__': + s = input() + try: + if len(s) in (8, 9): + vendor, device = get_pci_id(int(s[:4], 16), int(s[-4:], 16)) + vendor = clean_vendor(vendor) + print(vendor) + print(clean_device(device, vendor)) + else: + raise Exception('not id') + except: + print(clean_device(s)) diff --git a/biostools/util.py b/biostools/util.py new file mode 100644 index 0000000..2481c48 --- /dev/null +++ b/biostools/util.py @@ -0,0 +1,139 @@ +#!/usr/bin/python3 +# +# 86Box A hypervisor and IBM PC system emulator that specializes in +# running old operating systems and software designed for IBM +# PC systems and compatibles from 1981 through fairly recent +# system designs based on the PCI bus. +# +# This file is part of the 86Box BIOS Tools distribution. +# +# Utility functions. +# +# +# +# Authors: RichardG, +# +# Copyright 2021 RichardG. +# +import multiprocessing, os, re, traceback, urllib.request +from biostools.pciutil import * + +date_pattern_mmddyy = re.compile('''(?P[0-9]{2})/(?P[0-9]{2})/(?P[0-9]{2,4})''') + +_error_log_lock = multiprocessing.Lock() + + +def all_match(patterns, data): + """Returns True if all re patterns can be found in data.""" + # Python is smart enough to stop generation when a None is found. + return None not in (pattern.search(data) for pattern in patterns) + +def date_gt(date1, date2, pattern): + """Returns True if date1 is greater than date2. + Date format set by the given pattern.""" + + # Run date regex. + date1_match = pattern.match(date1) + date2_match = pattern.match(date2) + if date1_match: + if not date2_match: + return True + else: + return False + + # Extract year, month and day. + date1_year = int(date1_match.group('year')) + date1_month = int(date1_match.group('month')) + date1_day = int(date1_match.group('day')) + date2_year = int(date2_match.group('year')) + date2_month = int(date2_match.group('month')) + date2_day = int(date2_match.group('day')) + + # Add century to two-digit years. + if date1_year < 100: + if date1_year < 80: + date1_year += 2000 + else: + date1_year += 1900 + if date2_year < 100: + if date2_year < 80: + date2_year += 2000 + else: + date2_year += 1900 + + # Perform the comparisons. + if date1_year != date2_year: + return date1_year > date2_year + elif date1_month != date2_month: + return date1_month > date2_month + elif date1_day != date2_day: + return date1_day > date2_day + else: + return False + +def log_traceback(*args): + """Log to biostools_error.log, including any outstanding traceback.""" + + elems = ['===[ While'] + for elem in args: + elems.append(str(elem)) + elems.append(']===\n') + output = ' '.join(elems) + + with _error_log_lock: + f = open('biostools_error.log', 'a') + f.write(output) + traceback.print_exc(file=f) + f.close() + +def read_complement(file_path, file_header=None, max_size=16777216): + """Read up to max_size from file_path starting at the end of file_header. + Usage: file_header += read_complement(file_path, file_header)""" + try: + f = open(file_path, 'rb') + if file_header: + f.seek(len(file_header)) + ret = f.read(max_size - len(file_header)) + else: + ret = f.read(max_size) + f.close() + return ret + except: + return b'' + +def read_string(data, terminator=b'\x00'): + """Read a terminated string (by NUL by default) from a bytes.""" + terminator_index = data.find(terminator) + if terminator_index > -1: + data = data[:terminator_index] + return data.decode('cp437', 'ignore') + +def rmdirs(dir_path): + """Remove empty dir_path, also removing any parent directory which ends up empty.""" + removed_count = 0 + while True: + try: + os.rmdir(dir_path) + removed_count += 1 + dir_path = os.path.dirname(dir_path) + except OSError: + break + except: + continue + return removed_count + +def remove_extension(file_name): + """Remove file_name's extension, if one is present.""" + extension_index = file_name.rfind('.') + if extension_index > -1: + return file_name[:extension_index] + else: + return file_name + +def try_makedirs(dir_path): + """Try to create dir_path. Returns True if successful, False if not.""" + try: + os.makedirs(dir_path) + except: + pass + return os.path.isdir(dir_path) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3b89f4d --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +Pillow>=8