#!/usr/bin/python3 -u
#
# 86Box          A hypervisor and IBM PC system emulator that specializes in
#                running old operating systems and software designed for IBM
#                PC systems and compatibles from 1981 through fairly recent
#                system designs based on the PCI bus.
#
#                This file is part of the 86Box BIOS Tools distribution.
#
#                Main BIOS extractor and analyzer program.
#
#
#
# Authors:       RichardG, <richardg867@gmail.com>
#
#                Copyright 2021 RichardG.
#

import errno, getopt, multiprocessing, os, pickle, re, socket, subprocess, sys, threading
from . import analyzers, extractors, formatters, util

# Constants.
ANALYZER_MAX_CACHE_MB = 512
DEFAULT_REMOTE_PORT = 8620


# Extraction module.

def extract_file(file_extractors, subdir_trim_index, path_trim_index, next_dir_number_path, scan_dir_path, scan_file_name):
	"""Process a given file for extraction."""

	# Build source file path.
	file_path = os.path.join(scan_dir_path, scan_file_name)

	# Remove links.
	if os.path.islink(file_path):
		try:
			os.remove(file_path)
		except:
			try:
				os.rmdir(file_path)
			except:
				pass
		return

	# Read header.
	try:
		f = open(file_path, 'rb')
		file_data = f.read(32782) # upper limit set by ISOExtractor
		f.close()
	except:
		# The file might have been removed after the fact by an extractor.
		return

	# Come up with a destination directory for this file.
	dest_subdir = scan_dir_path[subdir_trim_index:]
	while dest_subdir[:len(os.sep)] == os.sep:
		dest_subdir = dest_subdir[len(os.sep):]
	dest_file_path = os.path.join(dest_subdir, scan_file_name + ':')
	dest_dir = os.path.join(next_dir_number_path, dest_file_path)
	dest_dir_0 = os.path.join(os.path.dirname(next_dir_number_path), '0', dest_file_path)

	# Run through file extractors until one succeeds.
	for extractor in file_extractors:
		# Run the extractor.
		try:
			extractor_result = extractor.extract(file_path, file_data, dest_dir, dest_dir_0)
		except extractors.MultifileStaleException:
			# This file has gone missing between the multi-file lock being
			# requested and successfully acquired. Stop extraction immediately.
			break
		except Exception as e:
			if util.raise_enospc and getattr(e, 'errno', None) == errno.ENOSPC:
				# Abort on no space if requested.
				print('{0} => aborting extraction due to disk space\n'.format(file_path[path_trim_index:]), end='')
				raise

			# Log an error.
			util.log_traceback('extracting', file_path)
			continue
		finally:
			if extractor.multifile_locked:
				extractor.multifile_locked = False
				extractor.multifile_lock.release()

		# Check if the extractor produced any results.
		if extractor_result:
			# Handle the line break ourselves, since Python prints the main
			# body and line break separately, causing issues when multiple
			# threads/processes are printing simultaneously.
			print('{0} => {1}{2}\n'.format(file_path[path_trim_index:], extractor.__class__.__name__, (extractor_result == True) and ' (skipped)' or ''), end='')
			break

	# Remove destination directories if they were created but are empty.
	for to_remove in (dest_dir, dest_dir_0):
		util.rmdirs(to_remove)

def extract_process(queue, abort_flag, multifile_lock, dir_number_path, next_dir_number_path, options):
	"""Main loop for the extraction multiprocessing pool."""

	# Set up extractors.
	image_extractor = extractors.ImageExtractor()
	if options['unpack-only']:
		file_extractors = []
	else:
		file_extractors = [
			extractors.DiscardExtractor(),
		]
	file_extractors += [
		extractors.ISOExtractor(),
		extractors.VMExtractor(),
		extractors.PEExtractor(),
		extractors.ASTExtractor(),
		extractors.FATExtractor(),
		extractors.MBRSafeExtractor(),
		extractors.TarExtractor(),
		extractors.ArchiveExtractor(),
		extractors.CPUZExtractor(),
		extractors.HexExtractor(),
		image_extractor,
		extractors.ApricotExtractor(),
		extractors.IntelNewExtractor(),
	]
	if not options['unpack-only']:
		file_extractors += [
			extractors.DellExtractor(),
		]
	file_extractors += [
		extractors.IntelExtractor(),
		extractors.OMFExtractor(),
		extractors.TrimondExtractor(),
		extractors.InterleaveExtractor(),
	]
	if not options['unpack-only']:
		file_extractors += [
			extractors.BIOSExtractor(),
			extractors.UEFIExtractor(),
		]
	file_extractors += [
		extractors.MBRUnsafeExtractor(),
	]

	# Disable debug mode and add a reference to some common objects on all extractors.
	dummy_func = lambda self, *args: None
	for extractor in file_extractors:
		extractor.multifile_lock = multifile_lock
		extractor.image_extractor = image_extractor
		if not options['debug']:
			extractor.debug = False
			extractor.debug_print = dummy_func

	# Raise exceptions on no space if requested.
	util.raise_enospc = options['enospc']

	# Cache trim index values for determining a file's relative paths.
	dir_number_path = dir_number_path.rstrip(os.sep)
	subdir_trim_index = len(dir_number_path)
	path_trim_index = len(os.path.dirname(dir_number_path)) + len(os.sep)

	# Receive work from the queue.
	while True:
		item = queue.get()
		if item == None: # special item to stop the loop
			break
		elif abort_flag.value:
			continue
		try:
			extract_file(file_extractors, subdir_trim_index, path_trim_index, next_dir_number_path, *item)
		except Exception as e:
			if util.raise_enospc and getattr(e, 'errno', None) == errno.ENOSPC:
				# Abort all threads if ENOSPC was raised.
				abort_flag.value = 1
				continue
			raise

def extract(dir_path, _, options):
	"""Main function for extraction."""

	# Check if the structure is correct.
	if not os.path.exists(os.path.join(dir_path, '1')):
		print('Incorrect directory structure. All data to unpack should be located inside', file=sys.stderr)
		print('a directory named 1 in turn located inside the given directory.', file=sys.stderr)
		return 2

	# Check if bios_extract is there.
	if not os.path.exists(os.path.abspath(os.path.join('bios_extract', 'bios_extract'))):
		print('bios_extract binary not found, did you compile it?', file=sys.stderr)
		return 3

	# Open devnull file for shell command output.
	devnull = open(os.devnull, 'wb')

	# Recurse through directory numbers.
	dir_number = 1
	while True:
		dir_number_path = os.path.join(dir_path, str(dir_number))
		next_dir_number_path = os.path.join(dir_path, str(dir_number + 1))

		# Fix permissions on extracted archives.
		print('Fixing up directory {0}:'.format(dir_number), end=' ', flush=True)
		try:
			print('chown', end=' ', flush=True)
			subprocess.run(['chown', '-hR', '--reference=' + dir_path, '--', dir_number_path], stdout=devnull, stderr=subprocess.STDOUT)
			print('chmod', end=' ', flush=True)
			subprocess.run(['chmod', '-R', 'u+rwx', '--', dir_number_path], stdout=devnull, stderr=subprocess.STDOUT) # execute for listing directories
		except:
			pass
		print()

		# Start multiprocessing pool.
		print('Starting extraction on directory {0}'.format(dir_number), end='', flush=True)
		queue_size = options['threads'] + len(options['remote_servers'])
		queue = multiprocessing.Queue(maxsize=queue_size * 8)
		abort_flag = multiprocessing.Value('B', 0)
		initargs = (queue, abort_flag, multiprocessing.Lock(), dir_number_path, next_dir_number_path, options)
		mp_pool = multiprocessing.Pool(options['threads'], initializer=extract_process, initargs=initargs)
		print(flush=True)

		# Start remote clients.
		remote_clients = []
		for remote_server in options['remote_servers']:
			remote_clients.append(RemoteClient(remote_server, 'x', initargs))

		# Create next directory.
		if not os.path.isdir(next_dir_number_path):
			os.makedirs(next_dir_number_path)

		# Scan directory structure.
		found_any_files = False
		for scan_dir_path, scan_dir_names, scan_file_names in os.walk(dir_number_path):
			for scan_file_name in scan_file_names:
				found_any_files = True
				queue.put((scan_dir_path, scan_file_name))
				if abort_flag.value: # stop feeding queue if a thread abort was requested
					break

		# Stop if no files are left.
		if not found_any_files:
			# Remove this directory and the directory if they're empty.
			try:
				os.rmdir(dir_number_path)
				dir_number -= 1
			except:
				pass
			try:
				os.rmdir(next_dir_number_path)
			except:
				pass
			break

		# Increase number.
		dir_number += 1

		# Stop multiprocessing pool and wait for its workers to finish.
		for _ in range(queue_size):
			queue.put(None)
		mp_pool.close()
		mp_pool.join()

		# Wait for remote clients to finish.
		for client in remote_clients:
			client.join()

		# Abort extraction if a thread abort was requested.
		if abort_flag.value:
			return 1

	# Create 0 directory if it doesn't exist.
	print('Merging directories:', end=' ')
	merge_dest_path = os.path.join(dir_path, '0')
	if not os.path.isdir(merge_dest_path):
		os.makedirs(merge_dest_path)

	# Merge all directories into the 0 directory.
	for merge_dir_name in range(1, dir_number + 1):
		merge_dir_path = os.path.join(dir_path, str(merge_dir_name))
		if not os.path.isdir(merge_dir_path):
			continue
		print(merge_dir_name, end=' ')

		subprocess.run(['cp', '-rlaT', merge_dir_path, merge_dest_path], stdout=devnull, stderr=subprocess.STDOUT)
		subprocess.Popen(['rm', '-rf', merge_dir_path], stdout=devnull, stderr=subprocess.STDOUT)

	# Clean up.
	devnull.close()
	print()
	return 0


# Analysis module.

amipci_pattern = re.compile('''amipci_([0-9A-F]{4})_([0-9A-F]{4})\\.rom$''')

def analyze_files(formatter, scan_base, file_analyzers, scan_dir_path, scan_file_names):
	"""Process the given files for analysis."""

	# Set up caches.
	files_flags = {}
	files_data = {}
	combined_oroms = []
	header_data = None

	# In combined mode (enabled by InterleaveExtractor and BIOSExtractor), we
	# handle all files in the directory as a single large blob, to avoid any doubts.
	combined = ':combined:' in scan_file_names
	if combined:
		files_data[''] = b''

	# Sort file names for better predictability. The key= function forces
	# "original.tm1" to be combined after "original.tmp" for if the Award
	# identification data spans across both files (AOpen AX6B(+) R2.00)
	if len(scan_file_names) > 1:
		scan_file_names.sort(key=lambda fn: (fn == 'original.tm1') and 'original.tmq' or fn)

	# Read files into the cache.
	cache_quota = ANALYZER_MAX_CACHE_MB * 1073741824
	for scan_file_name in scan_file_names:
		# Read up to 16 MB as a safety net.
		file_data = util.read_complement(os.path.join(scan_dir_path, scan_file_name))

		# Write data to cache.
		if scan_file_name == ':header:':
			header_data = file_data
		elif combined and scan_file_name != ':combined:':
			files_data[''] += file_data

			# Add PCI option ROM IDs extracted from AMI BIOSes by bios_extract, since the ROM might not
			# contain a valid PCI header to begin with. (Apple PC Card with OPTi Viper and AMIBIOS 6)
			match = amipci_pattern.match(scan_file_name)
			if match:
				combined_oroms.append((int(match.group(1), 16), int(match.group(2), 16)))
		else:
			files_data[scan_file_name] = file_data

		# Stop reading if the cache has gotten too big.
		cache_quota -= len(file_data)
		if cache_quota <= 0:
			break

	# Prepare combined-mode analysis.
	if combined:
		# Set interleaved flag on de-interleaved blobs.
		try:
			flag_size = os.path.getsize(os.path.join(scan_dir_path, ':combined:'))
			if flag_size >= 2:
				combined = 'Interleaved'
				if flag_size > 2:
					combined += str(flag_size)
		except:
			pass

		# Commit to only analyzing the large blob.
		scan_file_names = ['']
	elif header_data:
		# Remove header flag file from list.
		scan_file_names.remove(':header:')

	# Analyze each file.
	for scan_file_name in scan_file_names:
		# Read file from cache if possible.
		scan_file_path = os.path.join(scan_dir_path, scan_file_name)
		file_data = files_data.get(scan_file_name, None)
		if file_data == None:
			# Read up to 16 MB as a safety net.
			file_data = util.read_complement(scan_file_path)

		# Check for an analyzer which can handle this file.
		analyzer_file_path = combined and scan_dir_path or scan_file_path
		bonus_analyzer_metadata = bonus_analyzer_oroms = None
		file_analyzer = None
		strings = None
		for analyzer in file_analyzers:
			# Reset this analyzer.
			analyzer.reset()
			analyzer._file_path = scan_file_path

			# Check if the analyzer can handle this file.
			try:
				analyzer_result = analyzer.can_handle(analyzer_file_path, file_data, header_data)
			except:
				# Log an error.
				util.log_traceback('searching for analyzers for', os.path.join(scan_dir_path, scan_file_name))
				continue

			# Move on if the analyzer responded negatively.
			if not analyzer_result:
				# Extract metadata and option ROMs from the bonus analyzer.
				if bonus_analyzer_metadata == None:
					bonus_analyzer_metadata = analyzer.metadata
					bonus_analyzer_oroms = analyzer.oroms
				continue

			# Run strings on the file data if required (only once if requested by analyzer).
			if analyzer.can_analyze():
				if not strings:
					try:
						strings = subprocess.run(['strings', '-n8'], input=file_data, stdout=subprocess.PIPE).stdout.decode('ascii', 'ignore').split('\n')
					except:
						util.log_traceback('running strings on', os.path.join(scan_dir_path, scan_file_name))
						continue

				# Analyze each string.
				try:
					for string in strings:
						analyzer.analyze_line(string)
				except analyzers.AbortAnalysisError:
					# Analysis aborted.
					pass
				except:
					# Log an error.
					util.log_traceback('analyzing', os.path.join(scan_dir_path, scan_file_name))
					continue

			# Take this analyzer if it produced a version.
			if analyzer.version:
				# Clean up version field if an unknown version was returned.
				if analyzer.version == '?':
					analyzer.version = ''

				# Stop looking for analyzers.
				file_analyzer = analyzer
				break

		# Did any analyzer successfully handle this file?
		if not file_analyzer:
			# Treat this as a standalone PCI option ROM file if BonusAnalyzer found any.
			if bonus_analyzer_oroms:
				bonus_analyzer_metadata = []
				file_analyzer = file_analyzers[0]
			else:
				# Move on to the next file if nothing else.
				continue

		# Add interleaved flag to metadata.
		if type(combined) == str:
			bonus_analyzer_metadata.append(('ROM', combined))

		# Clean up the file path.
		scan_file_path_full = os.path.join(scan_dir_path, scan_file_name)

		# Remove combined directories from the path.
		found_flag_file = True
		while found_flag_file:
			# Find archive indicator.
			archive_index = scan_file_path_full.rfind(':' + os.sep)
			if archive_index == -1:
				break

			# Check if a combined or header flag file exists.
			found_flag_file = False
			for flag_file in (':combined:', ':header:'):
				if os.path.exists(os.path.join(scan_file_path_full[:archive_index] + ':', flag_file)):
					# Trim the directory off.
					scan_file_path_full = scan_file_path_full[:archive_index]
					found_flag_file = True
					break

		scan_file_path = scan_file_path_full[len(scan_base) + len(os.sep):]

		# Remove root extraction directory.
		slash_index = scan_file_path.find(os.sep)
		if slash_index == 1 and scan_file_path[0] == '0':
			scan_file_path = scan_file_path[2:]

		# De-duplicate and sort metadata and option ROMs.
		metadata = list(set('[{0}] {1}'.format(key, value).strip() for key, value in (analyzer.metadata + bonus_analyzer_metadata)))
		metadata.sort()
		oroms = list(set(combined_oroms + analyzer.oroms + bonus_analyzer_oroms))
		oroms.sort()

		# Add names to option ROMs.
		previous_vendor = previous_device = None
		for x in range(len(oroms)):
			if type(oroms[x][0]) == str: # generic ROM
				# Format string.
				oroms[x] = '[{0}] {1}'.format(*oroms[x]).replace('\n', '\n' + (' ' * (len(oroms[x][0]) + 3)))
			elif len(oroms[x]) == 2: # PCI ROM
				# Get vendor and device IDs and names.
				vendor_id, device_id = oroms[x]
				vendor, device = util.get_pci_id(vendor_id, device_id)

				# Skip valid vendor IDs associated to a bogus device ID.
				if device == '[Unknown]' and device_id == 0x0000:
					oroms[x] = None
					continue

				# Clean up IDs.
				vendor = util.clean_vendor(vendor).strip()
				device = util.clean_device(device, vendor).strip()

				# De-duplicate vendor names.
				if vendor == previous_vendor and vendor != '[Unknown]':
					if device == previous_device:
						previous_device, device = device, ''
						previous_vendor, vendor = vendor, '\u2196' # up-left arrow
					else:
						previous_device = device
						previous_vendor, vendor = vendor, ' ' * len(vendor)
				else:
					previous_device = device
					previous_vendor = vendor

				# Format string.
				oroms[x] = '[{0:04x}:{1:04x}] {2} {3}'.format(vendor_id, device_id, vendor, device)
			else: # PnP ROM
				# Get PnP ID, vendor name and device name.
				device_id, vendor, device = oroms[x]

				# Extract ASCII letters from the PnP ID.
				pnp_id = ''.join(chr(0x40 + (letter & 0x1f)) for letter in (device_id >> 26, device_id >> 21, device_id >> 16))

				# Add the numeric part of the PnP ID.
				pnp_id += format(device_id & 0xffff, '04x').upper()

				# Clean up vendor and device names.
				vendor_device = ((vendor or '') + '\n' + (device or '')).replace('\r', '')
				vendor_device = '\n'.join(x.strip() for x in vendor_device.split('\n') if x.strip())

				# Format string.
				oroms[x] = '[{0}] {1}'.format(pnp_id, vendor_device.replace('\n', '\n' + (' ' * (len(pnp_id) + 3))))

		# Remove bogus option ROM device ID entries.
		while None in oroms:
			oroms.remove(None)

		# Add file name in single-file analysis.
		if not scan_dir_path and not scan_file_path:
			scan_file_path = os.path.basename(scan_base)

		# Collect the analyzer's results.
		fields = [((type(field) == str) and field.replace('\t', ' ').strip() or field) for field in [
			scan_file_path,
			file_analyzer.vendor,
			file_analyzer.version,
			formatter.split_if_required('\n', file_analyzer.string),
			formatter.split_if_required('\n', file_analyzer.signon),
			formatter.join_if_required('\n', metadata),
			formatter.join_if_required('\n', oroms),
		]]

		# Output the results.
		formatter.output_row(fields)

def analyze_process(queue, formatter, scan_base, options):
	"""Main loop for the analysis multiprocessing pool."""

	# Set up analyzers.
	file_analyzers = [
		analyzers.BonusAnalyzer(), # must be the first one
		analyzers.AwardPowerAnalyzer(), # must run before AwardAnalyzer
		analyzers.ToshibaAnalyzer(), # must run before AwardAnalyzer
		analyzers.AwardAnalyzer(), # must run before PhoenixAnalyzer
		analyzers.QuadtelAnalyzer(), # must run before PhoenixAnalyzer
		analyzers.PhoenixAnalyzer(), # must run before AMIDellAnalyzer and AMIIntelAnalyzer
		#analyzers.AMIDellAnalyzer(), # must run before AMIAnalyzer
		analyzers.AMIUEFIAnalyzer(), # must run before AMIAnalyzer
		analyzers.AMIAnalyzer(), # must run before AMIIntelAnalyzer
		analyzers.AMIIntelAnalyzer(),
		analyzers.MRAnalyzer(),
		# less common BIOSes with no dependencies on the common part begin here #
		analyzers.AcerAnalyzer(),
		analyzers.AcerMultitechAnalyzer(),
		analyzers.AmproAnalyzer(),
		analyzers.AmstradAnalyzer(),
		analyzers.CDIAnalyzer(),
		analyzers.CentralPointAnalyzer(),
		analyzers.ChipsAnalyzer(),
		analyzers.CommodoreAnalyzer(),
		analyzers.CompaqAnalyzer(),
		analyzers.CopamAnalyzer(),
		analyzers.CorebootAnalyzer(),
		analyzers.DTKGoldStarAnalyzer(),
		analyzers.GeneralSoftwareAnalyzer(),
		analyzers.IBMSurePathAnalyzer(),
		analyzers.IBMAnalyzer(),
		analyzers.ICLAnalyzer(),
		analyzers.InsydeAnalyzer(),
		analyzers.IntelUEFIAnalyzer(),
		analyzers.JukoAnalyzer(),
		analyzers.MRAnalyzer(),
		analyzers.MylexAnalyzer(),
		analyzers.OlivettiAnalyzer(),
		analyzers.PromagAnalyzer(),
		analyzers.SchneiderAnalyzer(),
		analyzers.SystemSoftAnalyzer(),
		analyzers.TandonAnalyzer(),
		analyzers.TinyBIOSAnalyzer(),
		analyzers.WhizproAnalyzer(),
		analyzers.ZenithAnalyzer(),
	]

	# Disable debug mode on all analyzers.
	if not options['debug']:
		dummy_func = lambda self, *args: None
		for analyzer in file_analyzers:
			analyzer.debug_print = dummy_func
			analyzer.debug = False

	# Receive work from the queue.
	while True:
		item = queue.get()
		if item == None: # special item to stop the loop
			break
		analyze_files(formatter, scan_base, file_analyzers, *item)

def analyze(dir_path, formatter_args, options):
	"""Main function for analysis."""

	# Initialize output formatter.
	output_formats = {
		'csv': (formatters.XSVFormatter, ','),
		'scsv': (formatters.XSVFormatter, ';'),
		'json': formatters.JSONObjectFormatter,
		'jsontable': formatters.JSONTableFormatter,
	}
	formatter = output_formats.get(options['format'], None)
	if not formatter:	
		raise Exception('unknown output format ' + options['format'])
	if type(formatter) == tuple:
		formatter = formatter[0](*formatter[1:], sys.stdout, options, formatter_args)
	else:
		formatter = formatter(sys.stdout, options, formatter_args)

	# Begin output.
	formatter.begin()
	formatter.output_headers(['File', 'Vendor', 'Version', 'String', 'Sign-on', 'Metadata', 'ROMs'], options.get('headers'))

	# Remove any trailing slash from the root path, as the output path cleanup
	# functions rely on it not being present.
	if dir_path[-len(os.sep):] == os.sep:
		dir_path = dir_path[:-len(os.sep)]
	elif dir_path[-1:] == '/':
		dir_path = dir_path[:-1]

	# Start multiprocessing pool.
	queue = multiprocessing.Queue(maxsize=options['threads'] * 8)
	mp_pool = multiprocessing.Pool(options['threads'], initializer=analyze_process, initargs=(queue, formatter, dir_path, options))

	if os.path.isdir(dir_path):
		# Scan directory structure.
		for scan_dir_path, scan_dir_names, scan_file_names in os.walk(dir_path):
			if ':combined:' in scan_file_names or ':header:' in scan_file_names: # combined mode: process entire directory at once
				queue.put((scan_dir_path, scan_file_names))
			else: # regular mode: process individual files
				for scan_file_name in scan_file_names:
					queue.put((scan_dir_path, [scan_file_name]))
	else:
		# Scan single file.
		queue.put(('', [dir_path]))

	# Stop multiprocessing pool and wait for its workers to finish.
	for _ in range(options['threads']):
		queue.put(None)
	mp_pool.close()
	mp_pool.join()

	# End output.
	formatter.end()

	return 0


# Remote server module.

class DummyAbortFlag:
	def __init__(self):
		self.value = False

class RemoteClient:
	"""State and functions for communicating with a remote server."""

	def __init__(self, addr, action, initargs):
		# Initialize state.
		self.action = action
		if isinstance(initargs[0], multiprocessing.Value):
			self.initargs = (DummyAbortFlag(),) + initargs[2:]
			self.abort_flag = initargs[0]
		else:
			self.initargs = initargs[1:]
			self.abort_flag = DummyAbortFlag()
		self.queue = initargs[0]

		self.sock = self.f = None
		self.queue_lock = threading.Lock()
		self.write_lock = threading.Lock()
		self.close_event = threading.Event()
		self.close_event.clear()

		# Parse address:port.
		addr_split = addr.split(':')
		self.port = DEFAULT_REMOTE_PORT
		if len(addr_split) == 0:
			return
		elif len(addr_split) == 1:
			self.addr = addr_split[0]
		else:
			self.port = int(addr_split[1])
			self.addr = addr_split[0]

		# Start client thread.
		self.queue_thread = None
		self.client_thread = threading.Thread(target=self.client_thread_func)
		self.client_thread.daemon = True
		self.client_thread.start()

	def client_thread_func(self):
		"""Thread function for a remote client."""

		# Connect to server.
		print('Connecting to {0}:{1}\n'.format(self.addr, self.port), end='')
		self.sock = socket.create_connection((self.addr, self.port))
		self.f = self.sock.makefile('rwb')
		print('Connected to {0}:{1}\n'.format(self.addr, self.port), end='')

		# Start multiprocessing pool.
		self.f.write((self.action + '\n').encode('utf8', 'ignore'))
		self.f.write(pickle.dumps(self.initargs))
		self.f.flush()

		# Read responses from server.
		while True:
			try:
				line = self.f.readline().rstrip(b'\r\n')
			except:
				break
			if not line:
				break

			if line[0:1] in b'xa':
				# Multiprocessing pool started, now start the queue thread.
				self.queue_thread = threading.Thread(target=self.queue_thread_func)
				self.queue_thread.daemon = True
				self.queue_thread.start()
			elif line[0:1] == b'q':
				# Allow queue thread to proceed.
				try:
					self.queue_lock.release()
				except:
					pass
			elif line[0:1] == b'j':
				# We're done.
				self.close_event.set()
				break

		# Close connection.
		try:
			self.f.close()
		except:
			pass
		try:
			self.sock.close()
		except:
			pass
		print('Disconnected from {0}:{1}\n'.format(self.addr, self.port), end='')

	def queue_thread_func(self):
		"""Thread function to remove items from the local
		   queue and push them to the remote server's queue."""

		while True:
			# Wait for the queue to be available.
			self.queue_lock.acquire()

			# Read queue item.
			item = self.queue.get()
			if item == None or self.abort_flag.value: # special item to stop the loop
				self.close()
				break

			# Send queue item to server.
			scan_dir_path, scan_file_names = item
			with self.write_lock:
				self.f.write(b'q' + scan_dir_path.encode('utf8', 'ignore'))
				for scan_file_name in scan_file_names:
					self.f.write(b'\x00' + scan_file_name.encode('utf8', 'ignore'))
				self.f.write(b'\n')
				self.f.flush()

	def close(self):
		"""Close connection to the server."""

		# Write stop message.
		with self.write_lock:
			try:
				self.f.write(b'j\n')
				self.f.flush()
			except:
				return

	def join(self):
		"""Wait for the server connection to be closed."""
		self.close_event.wait()

class RemoteServerClient:
	"""State and functions for communicating with remote clients."""

	def __init__(self, accept, options):
		# Initialize state.
		self.sock, self.addr = accept
		self.options = options
		self.queue = self.mp_pool = None
		self.write_lock = threading.Lock()
		self.queue_lock = threading.Lock()

		self.f = self.sock.makefile('rwb')

		# Start client thread.
		self.client_thread = threading.Thread(target=self.client_thread_func)
		self.client_thread.daemon = True
		self.client_thread.start()

	def client_thread_func(self):
		"""Thread function for a remote client."""

		print(self.addr, 'New connection')

		# Parse commands.		
		while True:
			try:
				line = self.f.readline().rstrip(b'\r\n')
			except:
				break
			if not line:
				break

			if line[0:1] in b'xa':
				# Start multiprocessing pool.
				print(self.addr, 'Starting pool for', (line[0] == b'x') and 'extraction' or 'analysis')
				self.queue = multiprocessing.Queue(maxsize=self.options['threads'] * 8)
				if line[0:1] == b'x':
					func = extract_process
				else:
					func = analyze_process
				self.mp_pool = multiprocessing.Pool(self.options['threads'], initializer=func, initargs=(self.queue,) + pickle.load(self.f))
			elif line[0:1] == b'q':
				# Add directory to queue.
				file_list = [item.decode('utf8', 'ignore') for item in line[1:].split(b'\x00')]
				if self.options['debug']:
					print(self.addr, 'Queuing', file_list[0], 'with', len(file_list) - 1, 'files')
				if self.queue:
					self.queue.put((file_list[0], file_list[1:]))
				else:
					print(self.addr, 'Attempted queuing with no queue')
			elif line[0:1] == b'j':
				# Stop multiprocessing pool and wait for its workers to finish.
				print(self.addr, 'Waiting for pool')
				if self.mp_pool and self.queue:
					for _ in range(self.options['threads']):
						self.queue.put(None)
					self.mp_pool.close()
					self.mp_pool.join()
					self.mp_pool = None
				else:
					print(self.addr, 'Attempted pool wait with no pool/queue')

			# Write acknowledgement.
			with self.write_lock:
				self.f.write(line[0:1] + b'\n')
				self.f.flush()

			# Stop if requested by the client.
			if line[0:1] == b'j':
				break

		# Close connection.
		print(self.addr, 'Closing connection')
		try:
			self.f.close()
		except:
			pass
		try:
			self.sock.close()
		except:
			pass
		if self.mp_pool:
			self.mp_pool.close()
			self.mp_pool.join()

def remote_server(dir_path, formatter_args, options):
	# Create server and listen for connections.
	server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
	server.bind(('', options['remote_port']))
	server.listen(5)

	print('Listening on port', options['remote_port'])

	# Receive connections.
	try:
		while True:
			RemoteServerClient(server.accept(), options)
	except KeyboardInterrupt:
		pass

	# Close server.
	print('Closing server')
	server.close()

	return 0


def main():
	# Set default options.
	mode = None
	options = {
		'array': False,
		'debug': False,
		'enospc': False,
		'format': 'csv',
		'headers': True,
		'hyperlink': False,
		'threads': 0,
		'unpack-only': False,
		'docker-usage': False,
		'remote_servers': [],
		'remote_port': 0,
	}

	# Parse arguments.
	args, remainder = getopt.gnu_getopt(sys.argv[1:], 'xadf:hnrtu', ['extract', 'analyze', 'debug', 'format=', 'hyperlink', 'no-headers', 'array', 'threads', 'unpack-only', 'remote=', 'remote-server', 'docker-usage'])
	for opt, arg in args:
		if opt in ('-x', '--extract'):
			mode = extract
		elif opt in ('-a', '--analyze'):
			mode = analyze
		elif opt in ('-d', '--debug'):
			options['debug'] = True
		elif opt in ('-f', '--format'):
			options['format'] = arg.lower()
		elif opt in ('-h', '--hyperlink'):
			options['hyperlink'] = True
		elif opt in ('-n', '--no-headers'):
			options['headers'] = False
			options['enospc'] = True
		elif opt in ('-r', '--array'):
			options['array'] = True
		elif opt in ('-t', '--threads'):
			try:
				options['threads'] = int(arg)
			except:
				pass
		elif opt in ('-u', '--unpack-only'):
			options['unpack-only'] = True
		elif opt == '--remote':
			options['remote_servers'].append(arg)
		elif opt == '--remote-server':
			mode = remote_server
			try:
				options['remote_port'] = int(remainder[0])
			except:
				pass
			remainder.append(None) # dummy
		elif opt == '--docker-usage':
			options['docker-usage'] = True

	if len(remainder) > 0:
		# Set default numeric options.
		if options['threads'] <= 0:
			options['threads'] = options['debug'] and 1 or (os.cpu_count() or 4)
		if options['remote_port'] <= 0:
			options['remote_port'] = DEFAULT_REMOTE_PORT

		# Run mode handler.
		if mode:
			return mode(remainder[0], remainder[1:], options)

	# Print usage.
	if options['docker-usage']:
		usage = '''
Usage: docker run -v directory:/bios biostools [-d] [-f output_format] [-h] [-n] [-r] [formatter_options]

       Archives and BIOS images in the directory mounted to /bios will be
       extracted and analyzed.
'''
	else:
		usage = '''
Usage: python3 -m biostools [-d] [-n] [-t threads] [-u] -x directory
       python3 -m biostools [-d] [-f output_format] [-h] [-n] [-r] [-t threads]
                            -a directory|single_file [formatter_options]

       -x    Extract archives and BIOS images recursively in the given directory
       -n    Abort extraction if disk space runs out.
       -u    Extract archives only, don't extract BIOS images.

       -a    Analyze extracted BIOS images in the given directory, or a single
             extracted file (extracting with -x first is recommended)'''
	usage += '''
       -f    Output format:
                 csv        Comma-separated values with quotes (default)
                 scsv       Semicolon-separated values with quotes
                 json       JSON object array
                 jsontable  JSON table
       -h    Generate download links for file paths representing HTTP URLs.
             csv/scsv: The Excel HYPERLINK formula is used; if you have
                       non-English Excel, you must provide your language's
                       HYPERLINK formula name in formatter_options.
       -n    csv/scsv/jsontable: Don't output column headers.
       -r    json/jsontable: Output multi-value cells as arrays.

       Common options (applicable to both -x and -a modes):
       -d    Enable debug output.
       -t    Set number of threads to use.
'''
	print(usage, file=sys.stderr)
	return 1

if __name__ == '__main__':
	sys.exit(main())