Untitled
unknown
plain_text
2 years ago
10 kB
8
Indexable
try: from PyQt5.QtGui import * from PyQt5.QtCore import * from PyQt5.QtWidgets import * except ImportError: from PyQt4.QtGui import * from PyQt4.QtCore import * import time import datetime import json import cv2 import numpy as np from libs.utils import newIcon BB = QDialogButtonBox class Worker(QThread): progressBarValue = pyqtSignal(int) listValue = pyqtSignal(str) endsignal = pyqtSignal(int, str) handle = 0 def __init__(self, ocr, mImgList, mainThread, model): super(Worker, self).__init__() self.ocr = ocr self.mImgList = mImgList self.mainThread = mainThread self.model = model self.setStackSize(1024*1024) ##MK def extract_output_doctr(self,json_export): #json_export = result.export() result_dic = [] for ele in json_export['pages']: h,w = ele['dimensions'] for ele2 in ele['blocks']: for ele3 in ele2['lines']: for ele4 in ele3['words']: text = ele4['value'] confidence = ele4['confidence'] ((xmin,ymin),(xmax,ymax)) = ele4['geometry'] xmin, xmax = float(round(w * xmin)), float(round(w * xmax)) ymin, ymax = float(round(h * ymin)), float(round(h * ymax)) result_dic.append([[[xmin,ymin],[xmax,ymin],[xmax,ymax],[xmin,ymax]],(text,confidence)]) return result_dic ##MK ##MK def extract_output_tesseract(self,json_export): #json_export = result.export() result_dic = [] n_boxes = len(json_export['level']) for idx in range(n_boxes): text = json_export['text'][idx] confidence = json_export['conf'][idx] xmin = float(round(json_export['left'][idx])) ymin = float(round(json_export['top'][idx])) xmax = float(round(json_export['left'][idx]+json_export['width'][idx])) ymax = float(round(json_export['top'][idx]+json_export['height'][idx])) result_dic.append([[[xmin,ymin],[xmax,ymin],[xmax,ymax],[xmin,ymax]],(text,confidence)]) return result_dic ##MK def extract_output(self, result_easyocr): result_dic = list() for result in result_easyocr: posi = [[float(i[0]), float(i[1])] for i in result[0]] chars = result[1] cond = result[2] result_dic.append([posi, (chars, cond)]) print(result_dic) return result_dic def run(self): try: findex = 0 for Imgpath in self.mImgList: if self.handle == 0: self.listValue.emit(Imgpath) print(Imgpath) if self.model == 'paddle': h, w, _ = cv2.imdecode(np.fromfile(Imgpath, dtype=np.uint8), 1).shape if h > 12 and w > 12: result_easyocr = self.ocr.readtext(Imgpath, mag_ratio = 3, min_size = 0.5, link_threshold = 1.5,width_ths=0.1) self.result_dic = self.extract_output(result_easyocr) else: print('The size of', Imgpath, 'is too small to be recognised') self.result_dic = None ##MK elif self.model =="doctr": print("doctr process is running") import os import json json_folder_path = "D:\\Mobius\Dodge\\dummy1_json" input_json = Imgpath.split("\\")[-1].replace(".jpg",".json") input_json_path = os.path.join(json_folder_path,input_json) with open(input_json_path) as ip_json: result_easyocr = json.load(ip_json) self.result_dic = self.extract_output_doctr(result_easyocr) ##MK ##MK elif self.model =="tesseract": print("tesseract process is running") import os import json print(f"Imgpath is {Imgpath}") input_json_path = Imgpath.replace("images","raw_json").replace(".jpg",".json") #json_folder_path = "D:\\Mobius\Dodge\\dummy1_json" #input_json = Imgpath.split("\\")[-1].replace(".jpg",".json") #input_json_path = os.path.join(json_folder_path,input_json) print(f"input_json_path is {input_json_path}") with open(input_json_path) as ip_json: result_easyocr = json.load(ip_json) self.result_dic = self.extract_output_tesseract(result_easyocr) ##MK # 结果保存 if self.result_dic is None or len(self.result_dic) == 0: print('Can not recognise file', Imgpath) pass else: strs = '' for res in self.result_dic: chars = res[1][0] cond = res[1][1] posi = res[0] strs += "Transcription: " + chars + " Probability: " + str(cond) + \ " Location: " + json.dumps(posi) +'\n' # for res in self.result_dic: # posi = [[float(i[0]), float(i[1])] for i in res[0]] # chars = res[1] # cond = res[2] # strs += "Transcription: " + chars + " Probability: " + str(cond) + \ # " Location: " + json.dumps(posi) +'\n' # Sending large amounts of data repeatedly through pyqtSignal may affect the program efficiency self.listValue.emit(strs) self.mainThread.result_dic = self.result_dic self.mainThread.filePath = Imgpath # 保存 self.mainThread.saveFile(mode='Auto') findex += 1 self.progressBarValue.emit(findex) else: break self.endsignal.emit(0, "readAll") self.exec() except Exception as e: print(e) raise class AutoDialog(QDialog): def __init__(self, text="Enter object label", parent=None, ocr=None, mImgList=None, lenbar=0): super(AutoDialog, self).__init__(parent) self.setFixedWidth(1000) self.parent = parent self.ocr = ocr self.mImgList = mImgList self.lender = lenbar self.pb = QProgressBar() self.pb.setRange(0, self.lender) self.pb.setValue(0) layout = QVBoxLayout() layout.addWidget(self.pb) self.model = 'paddle' self.listWidget = QListWidget(self) layout.addWidget(self.listWidget) self.buttonBox = bb = BB(BB.Ok | BB.Cancel, Qt.Horizontal, self) bb.button(BB.Ok).setIcon(newIcon('done')) bb.button(BB.Cancel).setIcon(newIcon('undo')) bb.accepted.connect(self.validate) bb.rejected.connect(self.reject) layout.addWidget(bb) bb.button(BB.Ok).setEnabled(False) self.setLayout(layout) # self.setWindowTitle("自动标注中") self.setWindowModality(Qt.ApplicationModal) # self.setWindowFlags(Qt.WindowCloseButtonHint) #self.thread_1 = Worker(self.ocr, self.mImgList, self.parent, 'paddle') self.thread_1 = Worker(self.ocr, self.mImgList, self.parent, 'tesseract') self.thread_1.progressBarValue.connect(self.handleProgressBarSingal) self.thread_1.listValue.connect(self.handleListWidgetSingal) self.thread_1.endsignal.connect(self.handleEndsignalSignal) self.time_start = time.time() # save start time def handleProgressBarSingal(self, i): self.pb.setValue(i) # calculate time left of auto labeling avg_time = (time.time() - self.time_start) / i # Use average time to prevent time fluctuations time_left = str(datetime.timedelta(seconds=avg_time * (self.lender - i))).split(".")[0] # Remove microseconds self.setWindowTitle("EasyOCRLabel -- " + f"Time Left: {time_left}") # show def handleListWidgetSingal(self, i): self.listWidget.addItem(i) titem = self.listWidget.item(self.listWidget.count() - 1) self.listWidget.scrollToItem(titem) def handleEndsignalSignal(self, i, str): if i == 0 and str == "readAll": self.buttonBox.button(BB.Ok).setEnabled(True) self.buttonBox.button(BB.Cancel).setEnabled(False) def reject(self): print("reject") self.thread_1.handle = -1 self.thread_1.quit() # del self.thread_1 # if self.thread_1.isRunning(): # self.thread_1.terminate() # self.thread_1.quit() # super(AutoDialog,self).reject() while not self.thread_1.isFinished(): pass self.accept() def validate(self): self.accept() def postProcess(self): try: self.edit.setText(self.edit.text().trimmed()) # print(self.edit.text()) except AttributeError: # PyQt5: AttributeError: 'str' object has no attribute 'trimmed' self.edit.setText(self.edit.text()) print(self.edit.text()) def popUp(self): self.thread_1.start() return 1 if self.exec_() else None def closeEvent(self, event): print("???") # if self.thread_1.isRunning(): # self.thread_1.quit() # # # self._thread.terminate() # # del self.thread_1 # super(AutoDialog, self).closeEvent(event) self.reject()
Editor is loading...