Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
A
accessibility_movie_2
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
赵心治
accessibility_movie_2
Commits
cdc541fc
Commit
cdc541fc
authored
Aug 10, 2023
by
cxy
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add line
parent
103358ef
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
132 additions
and
66 deletions
+132
-66
detect_with_ocr.py
detect_with_ocr.py
+79
-66
main_window.py
main_window.py
+19
-0
main_window_ui.py
main_window_ui.py
+34
-0
No files found.
detect_with_ocr.py
View file @
cdc541fc
...
...
@@ -44,7 +44,7 @@ ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False, use_gpu=False, cl
normal_speed
=
4
def
get_position
(
video_path
:
str
,
start_time
:
float
)
->
Tuple
[
float
,
float
]:
def
get_position
(
video_path
:
str
,
start_time
:
float
,
rate
:
float
)
->
Tuple
[
float
,
float
]:
# return (885.0, 989.0)
"""根据对视频中的画面进行分析,确定字幕的位置,以便后续的字幕识别
...
...
@@ -65,68 +65,76 @@ def get_position(video_path: str, start_time: float) -> Tuple[float, float]:
txt_cnt
=
0
pre_txt
=
None
video
.
set
(
cv2
.
CAP_PROP_POS_FRAMES
,
start
)
height
=
int
(
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
*
0.6
)
while
True
:
_
,
img
=
video
.
read
()
# print("img:", img)
# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# cv2.imshow('img', gray)
# cv2.waitKey(0)
# cv2.destroyAllWindows()
cnt
+=
1
if
img
is
None
or
cnt
>
10000
:
break
if
cnt
%
int
(
fps
/
3
)
!=
0
:
continue
img
=
img
[
height
:]
res
=
ocr
.
ocr
(
img
,
cls
=
True
)
sorted
(
res
,
key
=
lambda
text
:
text
[
0
][
0
][
1
])
bottom_position
=
None
if
len
(
res
)
==
0
:
continue
log
=
[]
print
(
"cnt:"
,
cnt
,
"rect_num:"
,
len
(
res
))
for
x
in
res
:
# print("x:", x)
rect
,
(
txt
,
confidence
)
=
x
[
x1
,
y1
],[
x2
,
y2
],[
x3
,
y3
],[
x4
,
y4
]
=
rect
# font_size = rect[2][1] - rect[0][1]
mid
=
(
x1
+
x2
)
/
2
gradient
=
np
.
arctan
(
abs
((
y2
-
y1
)
/
(
x2
-
x1
)))
# 可能是字幕的文本
conf_thred
=
0.9
# conf_thred = 0.8
if
confidence
>
conf_thred
and
0.4
*
img
.
shape
[
1
]
<
mid
<
0.6
*
img
.
shape
[
1
]
and
gradient
<
0.1
:
if
bottom_position
is
None
:
bottom_position
=
y1
# 判断是否与前一文本相同(是不是同一个字幕),非同一字幕的前提下,取对应上下边界,
keys
=
subtitle_position
.
keys
()
if
abs
(
y1
-
bottom_position
)
<
10
:
if
pre_txt
is
None
or
pre_txt
!=
txt
:
txt_cnt
+=
1
pre_txt
=
txt
if
(
y1
,
y3
)
in
keys
:
subtitle_position
[(
y1
,
y3
)]
+=
1
else
:
replace
=
False
for
k
in
keys
:
# 更新键值为最宽的上下限
if
abs
(
y1
-
k
[
0
])
+
abs
(
y3
-
k
[
1
])
<
10
:
subtitle_position
[
k
]
+=
1
new_k
=
min
(
k
[
0
],
y1
),
max
(
k
[
1
],
y3
)
if
new_k
!=
k
:
subtitle_position
[
new_k
]
=
subtitle_position
[
k
]
subtitle_position
.
pop
(
k
)
replace
=
True
break
if
not
replace
:
subtitle_position
[(
y1
,
y3
)]
=
1
if
txt_cnt
==
3
:
break
print
(
subtitle_position
)
up_bounding
,
down_bounding
=
max
(
subtitle_position
,
key
=
subtitle_position
.
get
)
return
int
(
up_bounding
+
height
),
int
(
down_bounding
+
height
)
# height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.6)
print
(
">>>>>>>>>>>>video height"
)
print
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
up
=
int
(
video
.
get
(
cv2
.
CAP_PROP_FRAME_HEIGHT
)
*
(
rate
))
# down = up + 20
# down = video.get(cv2.CAP_PROP_FRAME_HEIGHT) * (0.73)
print
(
up
)
# print(down)
return
int
(
up
),
int
(
up
+
20
)
# while True:
# _, img = video.read()
# # print("img:", img)
# # gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# # cv2.imshow('img', gray)
# # cv2.waitKey(0)
# # cv2.destroyAllWindows()
# cnt += 1
# if img is None or cnt > 10000:
# break
# if cnt % int(fps / 3) != 0:
# continue
# img = img[height:]
# res = ocr.ocr(img, cls=True)
# sorted(res, key=lambda text: text[0][0][1])
# bottom_position = None
# if len(res) == 0:
# continue
# log = []
# print("cnt:", cnt, "rect_num:", len(res))
# for x in res:
# # print("x:", x)
# rect, (txt, confidence) = x
# [x1,y1],[x2,y2],[x3,y3],[x4,y4] = rect
# # font_size = rect[2][1] - rect[0][1]
# mid = (x1 + x2) / 2
# gradient = np.arctan(abs((y2 - y1) / (x2 - x1)))
# # 可能是字幕的文本
# conf_thred = 0.9
# # conf_thred = 0.8
# if confidence > conf_thred and 0.4 * img.shape[1] < mid < 0.6 * img.shape[1] and gradient < 0.1:
# if bottom_position is None:
# bottom_position = y1
# # 判断是否与前一文本相同(是不是同一个字幕),非同一字幕的前提下,取对应上下边界,
# keys = subtitle_position.keys()
# if abs(y1 - bottom_position) < 10:
# if pre_txt is None or pre_txt != txt:
# txt_cnt += 1
# pre_txt = txt
# if (y1, y3) in keys:
# subtitle_position[(y1, y3)] += 1
# else:
# replace = False
# for k in keys:
# # 更新键值为最宽的上下限
# if abs(y1 - k[0]) + abs(y3 - k[1]) < 10:
# subtitle_position[k] += 1
# new_k = min(k[0], y1), max(k[1], y3)
# if new_k != k:
# subtitle_position[new_k] = subtitle_position[k]
# subtitle_position.pop(k)
# replace = True
# break
# if not replace:
# subtitle_position[(y1, y3)] = 1
# if txt_cnt == 3:
# break
# print(subtitle_position)
# up_bounding, down_bounding = max(subtitle_position, key=subtitle_position.get)
# return int(up_bounding + height), int(down_bounding + height)
def
erasePunc
(
txt
:
str
)
->
str
:
...
...
@@ -194,8 +202,13 @@ def detect_subtitle(img: np.ndarray) -> Tuple[Union[str, None], float]:
"""
subTitle
=
''
height
=
down_b
-
up_b
img
=
img
[
int
(
up_b
-
height
*
0.7
):
int
(
down_b
+
height
*
0.7
)]
img
=
img
[
int
(
up_b
-
height
*
0.7
):
int
(
down_b
)]
# 针对低帧率的视频做图像放大处理
print
(
">>>>>>>>>>>>>>>>>>>>>img shape"
)
print
(
height
)
print
(
up_b
)
print
(
down_b
)
print
(
img
.
shape
)
if
img
.
shape
[
1
]
<
1000
:
img
=
cv2
.
resize
(
img
,
(
int
(
img
.
shape
[
1
]
*
1.5
),
int
(
img
.
shape
[
0
]
*
1.5
)))
res
=
ocr
.
ocr
(
img
,
cls
=
True
)
...
...
@@ -404,7 +417,7 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
up_b
,
down_b
=
context
.
caption_boundings
[
0
],
context
.
caption_boundings
[
1
]
else
:
# 此处start_time + 300是为了节省用户调整视频开始时间的功夫(强行跳过前5分钟)
up_b
,
down_b
=
get_position
(
video_path
,
0
)
up_b
,
down_b
=
get_position
(
video_path
,
0
,
mainWindow
.
rate
)
context
.
caption_boundings
=
[
up_b
,
down_b
]
context
.
detected
=
True
...
...
@@ -418,4 +431,4 @@ def detect_with_ocr(video_path: str, book_path: str, start_time: float, end_time
if
__name__
==
'__main__'
:
path
=
"D:/mystudy/Eagle/accessibility_movie_1/test.mp4"
print
(
"get_pos:"
,
get_position
(
path
,
0
))
#
print("get_pos:", get_position(path, 0))
main_window.py
View file @
cdc541fc
...
...
@@ -456,6 +456,25 @@ class MainWindow(QMainWindow, Ui_MainWindow):
self
.
action_insert_aside_from_now
.
setEnabled
(
True
)
self
.
insert_aside_from_now_btn
.
setEnabled
(
True
)
def
up_ocr
(
self
):
h
=
self
.
widget
.
up
(
5
)
video_h
=
self
.
wgt_video
.
height
()
self
.
rate
=
float
(
h
)
/
float
(
video_h
)
print
(
">>>>>up h:"
+
str
(
h
))
print
(
self
.
wgt_video
.
height
())
print
(
">>>>>>>>>rate"
+
str
(
self
.
rate
))
def
down_ocr
(
self
):
h
=
self
.
widget
.
down
(
5
)
video_h
=
self
.
wgt_video
.
height
()
self
.
rate
=
float
(
h
)
/
float
(
video_h
)
print
(
">>>>>down h:"
+
str
(
h
))
print
(
self
.
wgt_video
.
height
())
print
(
">>>>>>>>>rate"
+
str
(
self
.
rate
))
#导入旁白excel
def
import_excel
(
self
):
# excel_path = self.openExcelFile()
...
...
main_window_ui.py
View file @
cdc541fc
...
...
@@ -7,7 +7,33 @@
# WARNING! All changes made in this file will be lost!
from
PyQt5
import
QtCore
,
QtGui
,
QtWidgets
from
PyQt5.QtWidgets
import
QMainWindow
,
QFileDialog
,
QTableWidget
,
QTableWidgetItem
,
QAbstractItemView
,
QProgressBar
,
QLabel
,
QApplication
,
QPushButton
,
QMenu
,
QWidget
from
PyQt5.QtCore
import
QUrl
,
Qt
,
QTimer
,
QRect
,
pyqtSignal
,
QPersistentModelIndex
from
PyQt5.QtMultimedia
import
*
from
PyQt5.QtGui
import
QIcon
,
QPainter
,
QColor
,
QPen
class
MyWidget
(
QWidget
):
def
paintEvent
(
self
,
event
):
print
(
">>>>>>>>>>>>>>>into paint"
)
painter
=
QPainter
(
self
)
painter
.
setRenderHint
(
QPainter
.
Antialiasing
)
# Optional: Enable anti-aliasing
# painter.setCompositionMode(QPainter.CompositionMode_SourceOver) # Set composition mode
# # Draw existing content
# painter.fillRect(event.rect(), QColor(255, 255, 255)) # Fill with white color (you can adjust as needed)
# Draw a transparent horizontal line
painter
.
setPen
(
QPen
(
Qt
.
red
,
2
,
Qt
.
SolidLine
))
painter
.
drawLine
(
0
,
1
,
800
,
1
)
def
up
(
self
,
mov_len
):
print
(
">>>>>>>>>>>up"
+
str
(
mov_len
))
self
.
move
(
0
,
self
.
y
()
-
mov_len
)
return
self
.
y
()
def
down
(
self
,
mov_len
):
print
(
">>>>>>>>>>>down"
+
str
(
mov_len
))
self
.
move
(
0
,
self
.
y
()
+
mov_len
)
return
self
.
y
()
class
Ui_MainWindow
(
object
):
def
setupUi
(
self
,
MainWindow
):
...
...
@@ -34,6 +60,8 @@ class Ui_MainWindow(object):
self
.
wgt_video
=
myVideoWidget
(
self
.
centralwidget
)
self
.
wgt_video
.
setMinimumSize
(
QtCore
.
QSize
(
410
,
200
))
self
.
wgt_video
.
setMaximumSize
(
QtCore
.
QSize
(
16777215
,
16777215
))
self
.
widget
=
MyWidget
(
self
.
centralwidget
)
self
.
widget
.
setGeometry
(
0
,
150
,
800
,
3
)
palette
=
QtGui
.
QPalette
()
brush
=
QtGui
.
QBrush
(
QtGui
.
QColor
(
0
,
0
,
0
))
brush
.
setStyle
(
QtCore
.
Qt
.
SolidPattern
)
...
...
@@ -367,6 +395,10 @@ class Ui_MainWindow(object):
self
.
action_4
.
setEnabled
(
False
)
self
.
action_5
=
QtWidgets
.
QAction
(
"旁白导入"
,
self
,
triggered
=
self
.
import_excel
)
self
.
action_5
.
setEnabled
(
False
)
self
.
action_6
=
QtWidgets
.
QAction
(
"OCR++"
,
self
,
triggered
=
self
.
up_ocr
)
self
.
action_6
.
setEnabled
(
True
)
self
.
action_7
=
QtWidgets
.
QAction
(
"OCR--"
,
self
,
triggered
=
self
.
down_ocr
)
self
.
action_7
.
setEnabled
(
True
)
# self.action_3.setObjectName("action_3")
# self.action_4 = QtWidgets.QAction(MainWindow)
...
...
@@ -405,6 +437,8 @@ class Ui_MainWindow(object):
self
.
menubar
.
addAction
(
self
.
action_3
)
self
.
menubar
.
addAction
(
self
.
action_4
)
self
.
menubar
.
addAction
(
self
.
action_5
)
self
.
menubar
.
addAction
(
self
.
action_6
)
self
.
menubar
.
addAction
(
self
.
action_7
)
# self.menubar.addAction(self.menu_5.menuAction())
# self.menubar.addAction(self.menu_6.menuAction())
# self.menubar.addAction(self.menu_3.menuAction())
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment