add line

cdc541fc · cxy · 103358ef · cdc541fc · cdc541fc · cdc541fc
Commit cdc541fc authored Aug 10, 2023 by cxy
Hide whitespace changes
Inline Side-by-side

Showing with 132 additions and 66 deletions

detect_with_ocr.py detect_with_ocr.py +79 -66

main_window.py main_window.py +19 -0

main_window_ui.py main_window_ui.py +34 -0

No files found.
--- a/detect_with_ocr.py
+++ b/detect_with_ocr.py
@@ -44,7 +44,7 @@ ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False, cl
 normal_speed = 4


-def get_position(video_path: str, start_time: float) -> Tuple[float, float]:
+def get_position(video_path: str, start_time: float, rate: float) -> Tuple[float, float]:
    # return (885.0, 989.0)
    """根据对视频中的画面进行分析，确定字幕的位置，以便后续的字幕识别

@@ -65,68 +65,76 @@ def get_position(video_path: str, start_time: float) -> Tuple[float, float]:
    txt_cnt = 0
    pre_txt = None
    video.set(cv2.CAP_PROP_POS_FRAMES, start)
-    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.6)
-    while True:
-        _, img = video.read()
-        # print("img:", img)
-        # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        # cv2.imshow('img', gray)
-        # cv2.waitKey(0)
-        # cv2.destroyAllWindows()
-        cnt += 1
-        if img is None or cnt > 10000:
-            break
-        if cnt % int(fps / 3) != 0:
-            continue
-        img = img[height:]
-        res = ocr.ocr(img, cls=True)
-        sorted(res, key=lambda text: text[0][0][1])
-        bottom_position = None
-        if len(res) == 0:
-            continue
-        log = []
-
-        print("cnt:", cnt, "rect_num:", len(res))
-        for x in res:
-            # print("x:", x)
-            rect, (txt, confidence) = x
-            [x1,y1],[x2,y2],[x3,y3],[x4,y4] = rect
-            # font_size = rect[2][1] - rect[0][1]
-            mid = (x1 + x2) / 2
-            gradient = np.arctan(abs((y2 - y1) / (x2 - x1)))
-            # 可能是字幕的文本
-            conf_thred = 0.9
-            # conf_thred = 0.8
-            if confidence > conf_thred and 0.4 * img.shape[1] < mid < 0.6 * img.shape[1] and gradient < 0.1:
-                if bottom_position is None:
-                    bottom_position = y1
-                # 判断是否与前一文本相同（是不是同一个字幕），非同一字幕的前提下，取对应上下边界，
-                keys = subtitle_position.keys()
-                if abs(y1 - bottom_position) < 10:
-                    if pre_txt is None or pre_txt != txt:
-                        txt_cnt += 1
-                        pre_txt = txt
-                        if (y1, y3) in keys:
-                            subtitle_position[(y1, y3)] += 1
-                        else:
-                            replace = False
-                            for k in keys:
-                                # 更新键值为最宽的上下限
-                                if abs(y1 - k[0]) + abs(y3 - k[1]) < 10:
-                                    subtitle_position[k] += 1
-                                    new_k = min(k[0], y1), max(k[1], y3)
-                                    if new_k != k:
-                                        subtitle_position[new_k] = subtitle_position[k]
-                                        subtitle_position.pop(k)
-                                    replace = True
-                                    break
-                            if not replace:
-                                subtitle_position[(y1, y3)] = 1
-        if txt_cnt == 3:
-            break
-    print(subtitle_position)
-    up_bounding, down_bounding = max(subtitle_position, key=subtitle_position.get)
-    return int(up_bounding + height), int(down_bounding + height)
+    # height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.6)
+    print(">>>>>>>>>>>>video height")
+    print(cv2.CAP_PROP_FRAME_HEIGHT)
+    up = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (rate))
+    # down = up + 20
+    # down = video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (0.73)
+    print(up)
+    # print(down)
+    return int(up), int(up + 20)
+    # while True:
+    #     _, img = video.read()
+    #     # print("img:", img)
+    #     # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    #     # cv2.imshow('img', gray)
+    #     # cv2.waitKey(0)
+    #     # cv2.destroyAllWindows()
+    #     cnt += 1
+    #     if img is None or cnt > 10000:
+    #         break
+    #     if cnt % int(fps / 3) != 0:
+    #         continue
+    #     img = img[height:]
+    #     res = ocr.ocr(img, cls=True)
+    #     sorted(res, key=lambda text: text[0][0][1])
+    #     bottom_position = None
+    #     if len(res) == 0:
+    #         continue
+    #     log = []
+
+    #     print("cnt:", cnt, "rect_num:", len(res))
+    #     for x in res:
+    #         # print("x:", x)
+    #         rect, (txt, confidence) = x
+    #         [x1,y1],[x2,y2],[x3,y3],[x4,y4] = rect
+    #         # font_size = rect[2][1] - rect[0][1]
+    #         mid = (x1 + x2) / 2
+    #         gradient = np.arctan(abs((y2 - y1) / (x2 - x1)))
+    #         # 可能是字幕的文本
+    #         conf_thred = 0.9
+    #         # conf_thred = 0.8
+    #         if confidence > conf_thred and 0.4 * img.shape[1] < mid < 0.6 * img.shape[1] and gradient < 0.1:
+    #             if bottom_position is None:
+    #                 bottom_position = y1
+    #             # 判断是否与前一文本相同（是不是同一个字幕），非同一字幕的前提下，取对应上下边界，
+    #             keys = subtitle_position.keys()
+    #             if abs(y1 - bottom_position) < 10:
+    #                 if pre_txt is None or pre_txt != txt:
+    #                     txt_cnt += 1
+    #                     pre_txt = txt
+    #                     if (y1, y3) in keys:
+    #                         subtitle_position[(y1, y3)] += 1
+    #                     else:
+    #                         replace = False
+    #                         for k in keys:
+    #                             # 更新键值为最宽的上下限
+    #                             if abs(y1 - k[0]) + abs(y3 - k[1]) < 10:
+    #                                 subtitle_position[k] += 1
+    #                                 new_k = min(k[0], y1), max(k[1], y3)
+    #                                 if new_k != k:
+    #                                     subtitle_position[new_k] = subtitle_position[k]
+    #                                     subtitle_position.pop(k)
+    #                                 replace = True
+    #                                 break
+    #                         if not replace:
+    #                             subtitle_position[(y1, y3)] = 1
+    #     if txt_cnt == 3:
+    #         break
+    # print(subtitle_position)
+    # up_bounding, down_bounding = max(subtitle_position, key=subtitle_position.get)
+    # return int(up_bounding + height), int(down_bounding + height)


 def erasePunc(txt: str) -> str:
@@ -194,8 +202,13 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
    """
    subTitle = ''
    height = down_b - up_b
-    img = img[int(up_b - height * 0.7):int(down_b + height * 0.7)]
+    img = img[int(up_b - height * 0.7):int(down_b)]
    # 针对低帧率的视频做图像放大处理
+    print(">>>>>>>>>>>>>>>>>>>>>img shape")
+    print(height)
+    print(up_b)
+    print(down_b)
+    print(img.shape)
    if img.shape[1] < 1000:
        img = cv2.resize(img, (int(img.shape[1] * 1.5), int(img.shape[0] * 1.5)))
    res = ocr.ocr(img, cls=True)
@@ -404,7 +417,7 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
        up_b, down_b = context.caption_boundings[0], context.caption_boundings[1]
    else:
        # 此处start_time + 300是为了节省用户调整视频开始时间的功夫（强行跳过前5分钟）
-        up_b, down_b = get_position(video_path, 0)
+        up_b, down_b = get_position(video_path, 0, mainWindow.rate)
        context.caption_boundings = [up_b, down_b]

    context.detected = True
@@ -418,4 +431,4 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time

 if __name__ == '__main__':
    path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
-    print("get_pos:", get_position(path, 0))
+    # print("get_pos:", get_position(path, 0))
--- a/main_window.py
+++ b/main_window.py
@@ -456,6 +456,25 @@ class MainWindow(QMainWindow, Ui_MainWindow):
        self.action_insert_aside_from_now.setEnabled(True)
        self.insert_aside_from_now_btn.setEnabled(True)

+    def up_ocr(self):
+        h = self.widget.up(5)
+        video_h = self.wgt_video.height()
+        self.rate = float(h)/float(video_h)
+        print(">>>>>up h:" + str(h))
+        print(self.wgt_video.height())
+        print(">>>>>>>>>rate" + str(self.rate))
+
+
+    def down_ocr(self):
+        h = self.widget.down(5)
+        video_h = self.wgt_video.height()
+        self.rate = float(h)/float(video_h)
+        print(">>>>>down h:" + str(h))
+        print(self.wgt_video.height())
+        print(">>>>>>>>>rate" + str(self.rate))
+
+
+
    #导入旁白excel
    def import_excel(self):
        # excel_path = self.openExcelFile()

--- a/main_window_ui.py
+++ b/main_window_ui.py
@@ -7,7 +7,33 @@
 # WARNING! All changes made in this file will be lost!

 from PyQt5 import QtCore, QtGui, QtWidgets
+from PyQt5.QtWidgets import QMainWindow, QFileDialog, QTableWidget, QTableWidgetItem, QAbstractItemView, QProgressBar, QLabel, QApplication, QPushButton, QMenu, QWidget
+from PyQt5.QtCore import QUrl, Qt, QTimer, QRect, pyqtSignal, QPersistentModelIndex
+from PyQt5.QtMultimedia import *
+from PyQt5.QtGui import QIcon, QPainter, QColor, QPen
+class MyWidget(QWidget):
+    def paintEvent(self, event):
+        print(">>>>>>>>>>>>>>>into paint")
+        painter = QPainter(self)
+        painter.setRenderHint(QPainter.Antialiasing)  # Optional: Enable anti-aliasing
+        # painter.setCompositionMode(QPainter.CompositionMode_SourceOver)  # Set composition mode

+        # # Draw existing content
+        # painter.fillRect(event.rect(), QColor(255, 255, 255))  # Fill with white color (you can adjust as needed)
+
+        # Draw a transparent horizontal line
+        painter.setPen(QPen(Qt.red, 2, Qt.SolidLine))
+        painter.drawLine(0, 1, 800, 1)
+
+    def up(self, mov_len):
+        print(">>>>>>>>>>>up" + str(mov_len))
+        self.move(0, self.y() - mov_len)
+        return self.y()
+
+    def down(self, mov_len):
+        print(">>>>>>>>>>>down" + str(mov_len))
+        self.move(0,self.y() + mov_len)
+        return self.y()

 class Ui_MainWindow(object):
    def setupUi(self, MainWindow):
@@ -34,6 +60,8 @@ class Ui_MainWindow(object):
        self.wgt_video = myVideoWidget(self.centralwidget)
        self.wgt_video.setMinimumSize(QtCore.QSize(410, 200))
        self.wgt_video.setMaximumSize(QtCore.QSize(16777215, 16777215))
+        self.widget = MyWidget(self.centralwidget)
+        self.widget.setGeometry(0,150,800,3)
        palette = QtGui.QPalette()
        brush = QtGui.QBrush(QtGui.QColor(0, 0, 0))
        brush.setStyle(QtCore.Qt.SolidPattern)
@@ -367,6 +395,10 @@ class Ui_MainWindow(object):
        self.action_4.setEnabled(False)
        self.action_5 = QtWidgets.QAction("旁白导入",self,triggered=self.import_excel)
        self.action_5.setEnabled(False)
+        self.action_6 = QtWidgets.QAction("OCR++",self,triggered=self.up_ocr)
+        self.action_6.setEnabled(True)
+        self.action_7 = QtWidgets.QAction("OCR--",self,triggered=self.down_ocr)
+        self.action_7.setEnabled(True)

        # self.action_3.setObjectName("action_3")
        # self.action_4 = QtWidgets.QAction(MainWindow)
@@ -405,6 +437,8 @@ class Ui_MainWindow(object):
        self.menubar.addAction(self.action_3)
        self.menubar.addAction(self.action_4)
        self.menubar.addAction(self.action_5)
+        self.menubar.addAction(self.action_6)
+        self.menubar.addAction(self.action_7)
        # self.menubar.addAction(self.menu_5.menuAction())
        # self.menubar.addAction(self.menu_6.menuAction())
        # self.menubar.addAction(self.menu_3.menuAction())