import sys
import threading
import csv
import json
# define the function you want to execute
def change_receipt(receipt_id, vat, link, config):
new_vat = vat * config['vat_multiplier']
new_link = config['base_url'] + link
return [receipt_id, new_vat, new_link]
# get the filename, thread count, and config filename arguments from the command line
args = sys.argv[1:]
options = {}
for i in range(0, len(args), 2):
options[args[i].replace('--', '')] = args[i+1]
filename = options.get('filename')
thread_count = int(options.get('thread-count'))
config_filename = options.get('config')
# load the config file
with open(config_filename) as config_file:
config = json.load(config_file)
# create a semaphore to limit the number of threads
semaphore = threading.Semaphore(thread_count)
# define a worker function that executes the target function with the given arguments
def worker(target, args):
target(*args)
semaphore.release()
# define a function that reads the input file and starts a thread for each line
def process_file(filename):
results = []
with open(filename, newline='') as csvfile:
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
for row in reader:
if len(row) >= 3:
receipt_id, vat, link = row[:3]
args = (receipt_id, float(vat), link, config)
semaphore.acquire()
thread = threading.Thread(target=worker, args=(change_receipt, args))
thread.start()
# start a thread for each input file
threads = []
with open(filename, newline='') as csvfile:
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
for row in reader:
if len(row) >= 3:
receipt_id, vat, link = row[:3]
args = (receipt_id, float(vat), link, config)
thread = threading.Thread(target=worker, args=(change_receipt, args))
thread.start()
threads.append(thread)
# wait for all threads to finish
for thread in threads:
thread.join()
# write the results back to the original file
results = [thread.result() for thread in threads]
with open(filename, 'w', newline='') as csvfile:
writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer.writerow(['receipt_id', 'vat', 'link'])
for result in results:
writer.writerow(result)