Commit cdc541fc authored by cxy's avatar cxy

add line

parent 103358ef
......@@ -44,7 +44,7 @@ ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False, cl
normal_speed = 4
def get_position(video_path: str, start_time: float) -> Tuple[float, float]:
def get_position(video_path: str, start_time: float, rate: float) -> Tuple[float, float]:
# return (885.0, 989.0)
"""根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
......@@ -65,68 +65,76 @@ def get_position(video_path: str, start_time: float) -> Tuple[float, float]:
txt_cnt = 0
pre_txt = None
video.set(cv2.CAP_PROP_POS_FRAMES, start)
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.6)
while True:
_, img = video.read()
# print("img:", img)
# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# cv2.imshow('img', gray)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
cnt += 1
if img is None or cnt > 10000:
break
if cnt % int(fps / 3) != 0:
continue
img = img[height:]
res = ocr.ocr(img, cls=True)
sorted(res, key=lambda text: text[0][0][1])
bottom_position = None
if len(res) == 0:
continue
log = []
print("cnt:", cnt, "rect_num:", len(res))
for x in res:
# print("x:", x)
rect, (txt, confidence) = x
[x1,y1],[x2,y2],[x3,y3],[x4,y4] = rect
# font_size = rect[2][1] - rect[0][1]
mid = (x1 + x2) / 2
gradient = np.arctan(abs((y2 - y1) / (x2 - x1)))
# 可能是字幕的文本
conf_thred = 0.9
# conf_thred = 0.8
if confidence > conf_thred and 0.4 * img.shape[1] < mid < 0.6 * img.shape[1] and gradient < 0.1:
if bottom_position is None:
bottom_position = y1
# 判断是否与前一文本相同(是不是同一个字幕),非同一字幕的前提下,取对应上下边界,
keys = subtitle_position.keys()
if abs(y1 - bottom_position) < 10:
if pre_txt is None or pre_txt != txt:
txt_cnt += 1
pre_txt = txt
if (y1, y3) in keys:
subtitle_position[(y1, y3)] += 1
else:
replace = False
for k in keys:
# 更新键值为最宽的上下限
if abs(y1 - k[0]) + abs(y3 - k[1]) < 10:
subtitle_position[k] += 1
new_k = min(k[0], y1), max(k[1], y3)
if new_k != k:
subtitle_position[new_k] = subtitle_position[k]
subtitle_position.pop(k)
replace = True
break
if not replace:
subtitle_position[(y1, y3)] = 1
if txt_cnt == 3:
break
print(subtitle_position)
up_bounding, down_bounding = max(subtitle_position, key=subtitle_position.get)
return int(up_bounding + height), int(down_bounding + height)
# height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.6)
print(">>>>>>>>>>>>video height")
print(cv2.CAP_PROP_FRAME_HEIGHT)
up = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (rate))
# down = up + 20
# down = video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (0.73)
print(up)
# print(down)
return int(up), int(up + 20)
# while True:
# _, img = video.read()
# # print("img:", img)
# # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# # cv2.imshow('img', gray)
# # cv2.waitKey(0)
# # cv2.destroyAllWindows()
# cnt += 1
# if img is None or cnt > 10000:
# break
# if cnt % int(fps / 3) != 0:
# continue
# img = img[height:]
# res = ocr.ocr(img, cls=True)
# sorted(res, key=lambda text: text[0][0][1])
# bottom_position = None
# if len(res) == 0:
# continue
# log = []
# print("cnt:", cnt, "rect_num:", len(res))
# for x in res:
# # print("x:", x)
# rect, (txt, confidence) = x
# [x1,y1],[x2,y2],[x3,y3],[x4,y4] = rect
# # font_size = rect[2][1] - rect[0][1]
# mid = (x1 + x2) / 2
# gradient = np.arctan(abs((y2 - y1) / (x2 - x1)))
# # 可能是字幕的文本
# conf_thred = 0.9
# # conf_thred = 0.8
# if confidence > conf_thred and 0.4 * img.shape[1] < mid < 0.6 * img.shape[1] and gradient < 0.1:
# if bottom_position is None:
# bottom_position = y1
# # 判断是否与前一文本相同(是不是同一个字幕),非同一字幕的前提下,取对应上下边界,
# keys = subtitle_position.keys()
# if abs(y1 - bottom_position) < 10:
# if pre_txt is None or pre_txt != txt:
# txt_cnt += 1
# pre_txt = txt
# if (y1, y3) in keys:
# subtitle_position[(y1, y3)] += 1
# else:
# replace = False
# for k in keys:
# # 更新键值为最宽的上下限
# if abs(y1 - k[0]) + abs(y3 - k[1]) < 10:
# subtitle_position[k] += 1
# new_k = min(k[0], y1), max(k[1], y3)
# if new_k != k:
# subtitle_position[new_k] = subtitle_position[k]
# subtitle_position.pop(k)
# replace = True
# break
# if not replace:
# subtitle_position[(y1, y3)] = 1
# if txt_cnt == 3:
# break
# print(subtitle_position)
# up_bounding, down_bounding = max(subtitle_position, key=subtitle_position.get)
# return int(up_bounding + height), int(down_bounding + height)
def erasePunc(txt: str) -> str:
......@@ -194,8 +202,13 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
"""
subTitle = ''
height = down_b - up_b
img = img[int(up_b - height * 0.7):int(down_b + height * 0.7)]
img = img[int(up_b - height * 0.7):int(down_b)]
# 针对低帧率的视频做图像放大处理
print(">>>>>>>>>>>>>>>>>>>>>img shape")
print(height)
print(up_b)
print(down_b)
print(img.shape)
if img.shape[1] < 1000:
img = cv2.resize(img, (int(img.shape[1] * 1.5), int(img.shape[0] * 1.5)))
res = ocr.ocr(img, cls=True)
......@@ -404,7 +417,7 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
up_b, down_b = context.caption_boundings[0], context.caption_boundings[1]
else:
# 此处start_time + 300是为了节省用户调整视频开始时间的功夫(强行跳过前5分钟)
up_b, down_b = get_position(video_path, 0)
up_b, down_b = get_position(video_path, 0, mainWindow.rate)
context.caption_boundings = [up_b, down_b]
context.detected = True
......@@ -418,4 +431,4 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
if __name__ == '__main__':
path = "D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
print("get_pos:", get_position(path, 0))
# print("get_pos:", get_position(path, 0))
......@@ -456,6 +456,25 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self.action_insert_aside_from_now.setEnabled(True)
self.insert_aside_from_now_btn.setEnabled(True)
def up_ocr(self):
h = self.widget.up(5)
video_h = self.wgt_video.height()
self.rate = float(h)/float(video_h)
print(">>>>>up h:" + str(h))
print(self.wgt_video.height())
print(">>>>>>>>>rate" + str(self.rate))
def down_ocr(self):
h = self.widget.down(5)
video_h = self.wgt_video.height()
self.rate = float(h)/float(video_h)
print(">>>>>down h:" + str(h))
print(self.wgt_video.height())
print(">>>>>>>>>rate" + str(self.rate))
#导入旁白excel
def import_excel(self):
# excel_path = self.openExcelFile()
......
......@@ -7,7 +7,33 @@
# WARNING! All changes made in this file will be lost!
from PyQt5 import QtCore, QtGui, QtWidgets
from PyQt5.QtWidgets import QMainWindow, QFileDialog, QTableWidget, QTableWidgetItem, QAbstractItemView, QProgressBar, QLabel, QApplication, QPushButton, QMenu, QWidget
from PyQt5.QtCore import QUrl, Qt, QTimer, QRect, pyqtSignal, QPersistentModelIndex
from PyQt5.QtMultimedia import *
from PyQt5.QtGui import QIcon, QPainter, QColor, QPen
class MyWidget(QWidget):
def paintEvent(self, event):
print(">>>>>>>>>>>>>>>into paint")
painter = QPainter(self)
painter.setRenderHint(QPainter.Antialiasing) # Optional: Enable anti-aliasing
# painter.setCompositionMode(QPainter.CompositionMode_SourceOver) # Set composition mode
# # Draw existing content
# painter.fillRect(event.rect(), QColor(255, 255, 255)) # Fill with white color (you can adjust as needed)
# Draw a transparent horizontal line
painter.setPen(QPen(Qt.red, 2, Qt.SolidLine))
painter.drawLine(0, 1, 800, 1)
def up(self, mov_len):
print(">>>>>>>>>>>up" + str(mov_len))
self.move(0, self.y() - mov_len)
return self.y()
def down(self, mov_len):
print(">>>>>>>>>>>down" + str(mov_len))
self.move(0,self.y() + mov_len)
return self.y()
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
......@@ -34,6 +60,8 @@ class Ui_MainWindow(object):
self.wgt_video = myVideoWidget(self.centralwidget)
self.wgt_video.setMinimumSize(QtCore.QSize(410, 200))
self.wgt_video.setMaximumSize(QtCore.QSize(16777215, 16777215))
self.widget = MyWidget(self.centralwidget)
self.widget.setGeometry(0,150,800,3)
palette = QtGui.QPalette()
brush = QtGui.QBrush(QtGui.QColor(0, 0, 0))
brush.setStyle(QtCore.Qt.SolidPattern)
......@@ -367,6 +395,10 @@ class Ui_MainWindow(object):
self.action_4.setEnabled(False)
self.action_5 = QtWidgets.QAction("旁白导入",self,triggered=self.import_excel)
self.action_5.setEnabled(False)
self.action_6 = QtWidgets.QAction("OCR++",self,triggered=self.up_ocr)
self.action_6.setEnabled(True)
self.action_7 = QtWidgets.QAction("OCR--",self,triggered=self.down_ocr)
self.action_7.setEnabled(True)
# self.action_3.setObjectName("action_3")
# self.action_4 = QtWidgets.QAction(MainWindow)
......@@ -405,6 +437,8 @@ class Ui_MainWindow(object):
self.menubar.addAction(self.action_3)
self.menubar.addAction(self.action_4)
self.menubar.addAction(self.action_5)
self.menubar.addAction(self.action_6)
self.menubar.addAction(self.action_7)
# self.menubar.addAction(self.menu_5.menuAction())
# self.menubar.addAction(self.menu_6.menuAction())
# self.menubar.addAction(self.menu_3.menuAction())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment