Untitled

mail@pastecode.io avatar
unknown
plain_text
2 years ago
1.9 kB
1
Indexable
Never
# 輸入測資
# 讀取檔案
file = open(input(), 'r')


label_name = [10, 12, 14]
first_line = 1

id_name = {}
company_id = {}

for line in file:
	line = line.strip()
	if first_line:
		label = line.split(',')
		first_line = 0

	else:		
		term_end, term_start = 0, 0		
		in_string = 0
		list_line = [] #存該行資料內容		

		while term_end <= len(line):
			if term_end < len(line) and line[term_end] == '"':
				in_string = not in_string  

			elif term_end == len(line) or (line[term_end] == ',' and not in_string):
				term = line[term_start:term_end].strip('"')
				list_line.append(term)			
				term_start = term_end + 1

			term_end += 1

		for i in label_name:
			if list_line[i] != '' and list_line[i + 1] != '':
				id_name[list_line[i]] = list_line[i+1] #行業代號:登記行業別
				company_id.setdefault(list_line[3], []).append(list_line[i])  # dict 營業人名稱:登記行業代號

# and (id_[j], id_[i])
count_pair = {}

# dict 營業人名稱:登記行業代號 要怎麼看兩兩配對? 每次從行業代號取兩個,放進一個dict(key行業代號 value出現次數),如果重複出現,就不再放進去,但次數要加一
for company, id_ in company_id.items():

	if len(id_) > 1:
		for i in range(len(id_)-1):
			for j in range(i+1, len(id_)):
				
				if id_[i] != id_[j]:
					if (id_[i], id_[j]) and (id_[j], id_[i]) not in count_pair:						
						count_pair[sorted(id_[i], id_[j])] = 1                     #行業代號小到大排序

					elif (id_[i], id_[j]) in count_pair or (id_[j], id_[i]) in count_pair:
						count_pair[sorted(id_[i], id_[j])] += 1                    #行業代號小到大排序

d = sorted(count_pair.items(), key=lambda x:-x[1])

print(d)
"""
#處理輸出
for id_pair, count in d.items():
	n += 1

	if n > 20:
		break
	
	else:
		#排序行業代碼,到id_name對應名稱

"""