Untitled
unknown
plain_text
4 years ago
1.9 kB
15
Indexable
# 輸入測資
# 讀取檔案
file = open(input(), 'r')
label_name = [10, 12, 14]
first_line = 1
id_name = {}
company_id = {}
for line in file:
line = line.strip()
if first_line:
label = line.split(',')
first_line = 0
else:
term_end, term_start = 0, 0
in_string = 0
list_line = [] #存該行資料內容
while term_end <= len(line):
if term_end < len(line) and line[term_end] == '"':
in_string = not in_string
elif term_end == len(line) or (line[term_end] == ',' and not in_string):
term = line[term_start:term_end].strip('"')
list_line.append(term)
term_start = term_end + 1
term_end += 1
for i in label_name:
if list_line[i] != '' and list_line[i + 1] != '':
id_name[list_line[i]] = list_line[i+1] #行業代號:登記行業別
company_id.setdefault(list_line[3], []).append(list_line[i]) # dict 營業人名稱:登記行業代號
# and (id_[j], id_[i])
count_pair = {}
# dict 營業人名稱:登記行業代號 要怎麼看兩兩配對? 每次從行業代號取兩個,放進一個dict(key行業代號 value出現次數),如果重複出現,就不再放進去,但次數要加一
for company, id_ in company_id.items():
if len(id_) > 1:
for i in range(len(id_)-1):
for j in range(i+1, len(id_)):
if id_[i] != id_[j]:
if (id_[i], id_[j]) and (id_[j], id_[i]) not in count_pair:
count_pair[sorted(id_[i], id_[j])] = 1 #行業代號小到大排序
elif (id_[i], id_[j]) in count_pair or (id_[j], id_[i]) in count_pair:
count_pair[sorted(id_[i], id_[j])] += 1 #行業代號小到大排序
d = sorted(count_pair.items(), key=lambda x:-x[1])
print(d)
"""
#處理輸出
for id_pair, count in d.items():
n += 1
if n > 20:
break
else:
#排序行業代碼,到id_name對應名稱
"""Editor is loading...