Untitled
unknown
plain_text
3 years ago
1.9 kB
2
Indexable
# 輸入測資 # 讀取檔案 file = open(input(), 'r') label_name = [10, 12, 14] first_line = 1 id_name = {} company_id = {} for line in file: line = line.strip() if first_line: label = line.split(',') first_line = 0 else: term_end, term_start = 0, 0 in_string = 0 list_line = [] #存該行資料內容 while term_end <= len(line): if term_end < len(line) and line[term_end] == '"': in_string = not in_string elif term_end == len(line) or (line[term_end] == ',' and not in_string): term = line[term_start:term_end].strip('"') list_line.append(term) term_start = term_end + 1 term_end += 1 for i in label_name: if list_line[i] != '' and list_line[i + 1] != '': id_name[list_line[i]] = list_line[i+1] #行業代號:登記行業別 company_id.setdefault(list_line[3], []).append(list_line[i]) # dict 營業人名稱:登記行業代號 # and (id_[j], id_[i]) count_pair = {} # dict 營業人名稱:登記行業代號 要怎麼看兩兩配對? 每次從行業代號取兩個,放進一個dict(key行業代號 value出現次數),如果重複出現,就不再放進去,但次數要加一 for company, id_ in company_id.items(): if len(id_) > 1: for i in range(len(id_)-1): for j in range(i+1, len(id_)): if id_[i] != id_[j]: if (id_[i], id_[j]) and (id_[j], id_[i]) not in count_pair: count_pair[sorted(id_[i], id_[j])] = 1 #行業代號小到大排序 elif (id_[i], id_[j]) in count_pair or (id_[j], id_[i]) in count_pair: count_pair[sorted(id_[i], id_[j])] += 1 #行業代號小到大排序 d = sorted(count_pair.items(), key=lambda x:-x[1]) print(d) """ #處理輸出 for id_pair, count in d.items(): n += 1 if n > 20: break else: #排序行業代碼,到id_name對應名稱 """
Editor is loading...