Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
10 kB
6
Indexable
Never
try:
    from PyQt5.QtGui import *
    from PyQt5.QtCore import *
    from PyQt5.QtWidgets import *
except ImportError:
    from PyQt4.QtGui import *
    from PyQt4.QtCore import *

import time
import datetime
import json
import cv2
import numpy as np

from libs.utils import newIcon

BB = QDialogButtonBox


class Worker(QThread):
    progressBarValue = pyqtSignal(int)
    listValue = pyqtSignal(str)
    endsignal = pyqtSignal(int, str)
    handle = 0

    def __init__(self, ocr, mImgList, mainThread, model):
        super(Worker, self).__init__()
        self.ocr = ocr
        self.mImgList = mImgList
        self.mainThread = mainThread
        self.model = model
        self.setStackSize(1024*1024)

    ##MK
    def extract_output_doctr(self,json_export):
        #json_export = result.export()
        result_dic = []
        for ele in json_export['pages']:
            h,w = ele['dimensions']
            for ele2 in ele['blocks']:
                for ele3 in ele2['lines']:
                    for ele4 in ele3['words']:
                        text = ele4['value']
                        confidence = ele4['confidence']
                        ((xmin,ymin),(xmax,ymax)) = ele4['geometry']
                        xmin, xmax = float(round(w * xmin)), float(round(w * xmax))
                        ymin, ymax = float(round(h * ymin)), float(round(h * ymax))
                        result_dic.append([[[xmin,ymin],[xmax,ymin],[xmax,ymax],[xmin,ymax]],(text,confidence)])
        return result_dic
    ##MK
    ##MK
    def extract_output_tesseract(self,json_export):
        #json_export = result.export()
        result_dic = []
        n_boxes = len(json_export['level'])
        for idx in range(n_boxes):
            text = json_export['text'][idx]
            confidence = json_export['conf'][idx]
            xmin =  float(round(json_export['left'][idx]))
            ymin =  float(round(json_export['top'][idx]))
            xmax =  float(round(json_export['left'][idx]+json_export['width'][idx]))
            ymax =  float(round(json_export['top'][idx]+json_export['height'][idx]))

            result_dic.append([[[xmin,ymin],[xmax,ymin],[xmax,ymax],[xmin,ymax]],(text,confidence)])
        return result_dic
    ##MK

    def extract_output(self, result_easyocr):
        result_dic = list()
        for result in result_easyocr:
            posi = [[float(i[0]), float(i[1])] for i in result[0]]
            chars = result[1]
            cond  = result[2]
            result_dic.append([posi, (chars, cond)])
        print(result_dic)
        return result_dic

    def run(self):
        try:
            findex = 0
            for Imgpath in self.mImgList:
                if self.handle == 0:
                    self.listValue.emit(Imgpath)
                    print(Imgpath)
                    if self.model == 'paddle':
                        h, w, _ = cv2.imdecode(np.fromfile(Imgpath, dtype=np.uint8), 1).shape
                        if h > 12 and w > 12:
                            result_easyocr = self.ocr.readtext(Imgpath, mag_ratio = 3, min_size = 0.5, link_threshold = 1.5,width_ths=0.1)
                            self.result_dic = self.extract_output(result_easyocr)
                        else:
                            print('The size of', Imgpath, 'is too small to be recognised')
                            self.result_dic = None
                    
                    ##MK
                    elif self.model =="doctr":
                        print("doctr process is running")
                        import os
                        import json

                        json_folder_path = "D:\\Mobius\Dodge\\dummy1_json"
                        input_json = Imgpath.split("\\")[-1].replace(".jpg",".json")
                        input_json_path = os.path.join(json_folder_path,input_json)

                        with open(input_json_path) as ip_json:
                            result_easyocr = json.load(ip_json)
                            self.result_dic = self.extract_output_doctr(result_easyocr)

                    ##MK
                    ##MK
                    elif self.model =="tesseract":
                        print("tesseract process is running")
                        import os
                        import json
                        
                        print(f"Imgpath is {Imgpath}")
                        input_json_path = Imgpath.replace("images","raw_json").replace(".jpg",".json")

                        #json_folder_path = "D:\\Mobius\Dodge\\dummy1_json"
                        #input_json = Imgpath.split("\\")[-1].replace(".jpg",".json")
                        #input_json_path = os.path.join(json_folder_path,input_json)
                        print(f"input_json_path is {input_json_path}")

                        with open(input_json_path) as ip_json:
                            result_easyocr = json.load(ip_json)
                            self.result_dic = self.extract_output_tesseract(result_easyocr)

                    ##MK

                    # 结果保存
                    if self.result_dic is None or len(self.result_dic) == 0:
                        print('Can not recognise file', Imgpath)
                        pass
                    else:
                        strs = ''
                        for res in self.result_dic:
                            chars = res[1][0]
                            cond = res[1][1]
                            posi = res[0]
                            strs += "Transcription: " + chars + " Probability: " + str(cond) + \
                                    " Location: " + json.dumps(posi) +'\n'
                        # for res in self.result_dic:

                        #     posi = [[float(i[0]), float(i[1])] for i in res[0]]
                        #     chars = res[1]
                        #     cond  = res[2]
                        #     strs += "Transcription: " + chars + " Probability: " + str(cond) + \
                        #             " Location: " + json.dumps(posi) +'\n'
                        # Sending large amounts of data repeatedly through pyqtSignal may affect the program efficiency
                        self.listValue.emit(strs)
                        self.mainThread.result_dic = self.result_dic
                        self.mainThread.filePath = Imgpath
                        # 保存
                        self.mainThread.saveFile(mode='Auto')
                    findex += 1
                    self.progressBarValue.emit(findex)
                else:
                    break
            self.endsignal.emit(0, "readAll")
            self.exec()
        except Exception as e:
            print(e)
            raise


class AutoDialog(QDialog):

    def __init__(self, text="Enter object label", parent=None, ocr=None, mImgList=None, lenbar=0):
        super(AutoDialog, self).__init__(parent)
        self.setFixedWidth(1000)
        self.parent = parent
        self.ocr = ocr
        self.mImgList = mImgList
        self.lender = lenbar
        self.pb = QProgressBar()
        self.pb.setRange(0, self.lender)
        self.pb.setValue(0)

        layout = QVBoxLayout()
        layout.addWidget(self.pb)
        self.model = 'paddle'
        self.listWidget = QListWidget(self)
        layout.addWidget(self.listWidget)

        self.buttonBox = bb = BB(BB.Ok | BB.Cancel, Qt.Horizontal, self)
        bb.button(BB.Ok).setIcon(newIcon('done'))
        bb.button(BB.Cancel).setIcon(newIcon('undo'))
        bb.accepted.connect(self.validate)
        bb.rejected.connect(self.reject)
        layout.addWidget(bb)
        bb.button(BB.Ok).setEnabled(False)

        self.setLayout(layout)
        # self.setWindowTitle("自动标注中")
        self.setWindowModality(Qt.ApplicationModal)

        # self.setWindowFlags(Qt.WindowCloseButtonHint)

        #self.thread_1 = Worker(self.ocr, self.mImgList, self.parent, 'paddle')
        self.thread_1 = Worker(self.ocr, self.mImgList, self.parent, 'tesseract')
        self.thread_1.progressBarValue.connect(self.handleProgressBarSingal)
        self.thread_1.listValue.connect(self.handleListWidgetSingal)
        self.thread_1.endsignal.connect(self.handleEndsignalSignal)
        self.time_start = time.time()  # save start time

    def handleProgressBarSingal(self, i):
        self.pb.setValue(i)

        # calculate time left of auto labeling
        avg_time = (time.time() - self.time_start) / i  # Use average time to prevent time fluctuations
        time_left = str(datetime.timedelta(seconds=avg_time * (self.lender - i))).split(".")[0]  # Remove microseconds
        self.setWindowTitle("EasyOCRLabel  --  " + f"Time Left: {time_left}")  # show

    def handleListWidgetSingal(self, i):
        self.listWidget.addItem(i)
        titem = self.listWidget.item(self.listWidget.count() - 1)
        self.listWidget.scrollToItem(titem)

    def handleEndsignalSignal(self, i, str):
        if i == 0 and str == "readAll":
            self.buttonBox.button(BB.Ok).setEnabled(True)
            self.buttonBox.button(BB.Cancel).setEnabled(False)

    def reject(self):
        print("reject")
        self.thread_1.handle = -1
        self.thread_1.quit()
        # del self.thread_1
        # if self.thread_1.isRunning():
        #     self.thread_1.terminate()
        # self.thread_1.quit()
        # super(AutoDialog,self).reject()
        while not self.thread_1.isFinished():
            pass
        self.accept()

    def validate(self):
        self.accept()

    def postProcess(self):
        try:
            self.edit.setText(self.edit.text().trimmed())
            # print(self.edit.text())
        except AttributeError:
            # PyQt5: AttributeError: 'str' object has no attribute 'trimmed'
            self.edit.setText(self.edit.text())
            print(self.edit.text())

    def popUp(self):
        self.thread_1.start()
        return 1 if self.exec_() else None

    def closeEvent(self, event):
        print("???")
        # if self.thread_1.isRunning():
        #     self.thread_1.quit()
        #
        #     # self._thread.terminate()
        # # del self.thread_1
        # super(AutoDialog, self).closeEvent(event)
        self.reject()