import time

import cv2
import numpy as np
from paddleocr import PaddleOCR
import difflib
import openpyxl
from openpyxl.styles import PatternFill, Alignment

# 字幕的上下边界
up_b, down_b = 0, 0

# 初始化ocr工具
ocr = PaddleOCR(use_angle_cls=True, lang="ch", show_log=False)

# 正常语速为4字/秒
normal_speed = 4


def get_position(video_path, start_time):
    """
    根据对视频中的画面进行分析，确定字幕的位置，以便后续的字幕识别
    :param video_path: 视频存储路径
    :return: 字幕在整个画面中的实际上下边界位置
    """
    video = cv2.VideoCapture(video_path)
    subtitle_position = {}
    fps = video.get(cv2.CAP_PROP_FPS)
    start = int(start_time * fps)
    cnt = 0
    txt_cnt = 0
    pre_txt = None
    video.set(cv2.CAP_PROP_POS_FRAMES, start)
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT) * 0.6)
    while True:
        _, img = video.read()
        cnt += 1
        if img is None or cnt > 1000:
            break
        if cnt % int(fps / 3) != 0:
            continue
        img = img[height:]
        res = ocr.ocr(img, cls=True)
        sorted(res, key=lambda x: x[0][0][1])
        bottom_position = None
        if len(res) == 0:
            continue
        log = []
        for x in res:
            rect, (txt, confidence) = x
            # font_size = rect[2][1] - rect[0][1]
            mid = (rect[0][0] + rect[1][0]) / 2
            gradient = np.arctan(abs((rect[1][1] - rect[0][1]) / (rect[1][0] - rect[0][0])))
            # 可能是字幕的文本
            if confidence > 0.9 and 0.4 * img.shape[1] < mid < 0.6 * img.shape[1] and gradient < 0.1:
                if bottom_position is None:
                    bottom_position = rect[0][1]
                # 判断是否与前一文本相同（是不是同一个字幕），非同一字幕的前提下，取对应上下边界，
                keys = subtitle_position.keys()
                if abs(rect[0][1] - bottom_position) < 10:
                    if pre_txt is None or pre_txt != txt:
                        txt_cnt += 1
                        pre_txt = txt
                        if (rect[0][0], rect[2][1]) in keys:
                            subtitle_position[(rect[0][1], rect[2][1])] += 1
                        else:
                            replace = False
                            for k in keys:
                                # 更新键值为最宽的上下限
                                if abs(rect[0][1] - k[0]) + abs(rect[2][1] - k[1]) < 10:
                                    new_k = min(k[0], rect[0][1]), max(k[1], rect[2][1])
                                    if new_k != k:
                                        subtitle_position[new_k] = subtitle_position[k]
                                        subtitle_position[new_k] += 1
                                        subtitle_position.pop(k)
                                    else:
                                        subtitle_position[k] += 1
                                    replace = True
                                    break
                            if not replace:
                                subtitle_position[(rect[0][1], rect[2][1])] = 1
        if txt_cnt == 3:
            break
    print(subtitle_position)
    up_b, down_b = max(subtitle_position, key=subtitle_position.get)
    return up_b + height, down_b + height


def string_similar(s1, s2):
    """
    比较字符串s1和s2的相似度，主要用于减少输出文件中相似字幕的重复
    :param s1:
    :param s2:
    :return: 字符串间的相似度
    """
    return difflib.SequenceMatcher(None, s1, s2).quick_ratio()


def detect_subtitle(img):
    """
    检测当前画面得到字幕信息
    :param img: 当前画面
    :return: 字幕信息
    """
    subTitle = ''
    img = img[int(up_b) - 30:int(down_b) + 30]
    # img = cv2.resize(img, (int(img.shape[1] * 0.5), int(img.shape[0] * 0.5)))
    res = ocr.ocr(img, cls=True)
    sorted(res, key=lambda x: x[0][0][1])
    bottom_position = None
    if len(res) == 0:
        return None
    log = []
    possible_txt = []
    for x in res:
        rect, (txt, confidence) = x
        font_size = rect[2][1] - rect[0][1]
        mid = (rect[0][0] + rect[1][0]) / 2
        gradient = np.arctan(abs((rect[1][1] - rect[0][1]) / (rect[1][0] - rect[0][0])))
        log.append("文本：{}，置信度：{}，中心点：{}，斜率：{}，字体大小：{}".format(txt, confidence, mid / img.shape[1], gradient, font_size))
        # 置信度>0.7 & 斜率<0.1 & 字幕偏移量<=25 & 字幕中心在画面宽的0.4-0.6之间
        if confidence > 0.7 and gradient < 0.1 and 0.4 < mid / img.shape[1] < 0.6 and \
                abs(rect[0][1] - 30) + abs(img.shape[0] - rect[2][1] - 30) <= 25:
            subTitle += txt
        # 如果字幕在一行中分为两个（或以上）对话文本
        elif confidence > 0.85 and gradient < 0.1:
            if 0.3 < mid / img.shape[1] < 0.4 or 0.6 < mid / img.shape[1] < 0.7:
                possible_txt.append(txt)
    if len(possible_txt) >= 2:
        subTitle = ''.join(possible_txt)
    if len(subTitle) > 0:
        return subTitle
    return None


def process_video(video_path, begin, end, state):
    """
    处理视频，主要完成对字幕的捕捉以及根据字幕分析得出旁白可能位置的任务
    :param video_path: 待处理视频的路径
    :param begin: 电影的实际开始位置（秒）
    :param end: 电影除演职表外的实际结束位置（秒）
    :return:
    """
    video = cv2.VideoCapture(video_path)
    fps = video.get(cv2.CAP_PROP_FPS)
    lastSubTitle = None
    # res是在视频遍历过程中获取的字幕文件，不掺杂对旁白的分析
    res = []
    # narratage_recommand是旁白推荐信息，用于输出为表格
    narratage_recommend = []
    cnt = 0
    start_time = 0
    end_time = 0
    video.set(cv2.CAP_PROP_POS_MSEC, begin * 1000)
    while True:
        _, frame = video.read()
        if frame is None:
            break
        cnt += 1
        # 每秒取4帧画面左右
        if cnt % int(fps / 4) == 0:
            state[0] = float((video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - begin) / (end - begin)) \
                if state[0] is None or state[0] < 0.99 else 0.99
            subTitle = detect_subtitle(frame)
            # 第一次找到字幕
            if lastSubTitle is None and subTitle is not None:
                start_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
            # 字幕消失
            elif lastSubTitle is not None and subTitle is None:
                end_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
                res.append([start_time, end_time, lastSubTitle])
                if len(res) == 1 or res[-1][0] - res[-2][1] >= 1:
                    print('--------------------------------------------------')
                    recommend_lens = int(res[-1][0] * normal_speed) if len(res) == 1 else int(
                        (res[-1][0] - res[-2][1]) * normal_speed)
                    narratage_recommend.append(['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
                print(start_time, end_time, lastSubTitle)
                narratage_recommend.append([round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
            # 两句话连在一起，但是两句话不一样
            elif lastSubTitle is not None and subTitle is not None:
                if string_similar(lastSubTitle, subTitle) < 0.7:
                    end_time = video.get(cv2.CAP_PROP_POS_MSEC) / 1000
                    res.append([start_time, end_time, lastSubTitle])
                    if len(res) == 1 or res[-1][0] - res[-2][1] >= 1:
                        print('--------------------------------------------------')
                        recommend_lens = int(res[-1][0] * normal_speed) if len(res) == 1 else int(
                            (res[-1][0] - res[-2][1]) * normal_speed)
                        narratage_recommend.append(['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
                    print(start_time, end_time, lastSubTitle)
                    narratage_recommend.append([round(start_time, 2), round(end_time, 2), lastSubTitle, ''])
                    start_time = end_time
                else:
                    lastSubTitle = subTitle if len(subTitle) > len(lastSubTitle) else lastSubTitle
                    continue
            # 当前字幕与上一段字幕不一样
            lastSubTitle = subTitle
        if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 > end:
            if video.get(cv2.CAP_PROP_POS_MSEC) / 1000 - end_time > 1:
                print('--------------------------------------------------')
                recommend_lens = int(res[-1][0] * normal_speed) if len(res) == 1 else int(
                    (res[-1][0] - res[-2][1]) * normal_speed)
                narratage_recommend.append(['', '', '', '插入旁白，推荐字数为%d' % recommend_lens])
                break
    return narratage_recommend


def write_excel_xlsx(path, sheet_name, value):
    """
    将旁白推荐信息输出表格
    :param path: 输出表格的存储路径
    :param sheet_name:表格中的表名
    :param value:输出到表格中的信息
    :return:
    """
    index = len(value)
    workbook = openpyxl.Workbook()
    sheet = workbook.active
    sheet.title = sheet_name
    # 将字幕对应的那一列扩宽一些
    sheet.column_dimensions['C'].width = 50
    sheet.column_dimensions['D'].width = 30
    for i in range(0, index):
        for j in range(0, len(value[i])):
            sheet.cell(row=i + 1, column=j + 1, value=str(value[i][j])).alignment = Alignment(wrapText=True)
            if value[i][j] == '' or '插入旁白' in str(value[i][j]) or value[i][j] == '翻译':
                sheet.cell(row=i + 1, column=j + 1).fill = PatternFill(fill_type='solid', fgColor='ffff00')
    workbook.save(path)


def detect_with_ocr(video_path, book_path, start_time, end_time, state):
    book_name_xlsx = book_path
    sheet_name_xlsx = "旁白插入位置建议"

    # 获取字幕在画面中的上下边界，方便在后续视频遍历过程中直接对字幕对应区域进行分析
    global up_b, down_b
    up_b, down_b = get_position(video_path, start_time + 30)

    # 获取并构建输出信息
    table_head = [["起始时间", "终止时间", "字幕", '建议', '解说脚本']]
    table_content = table_head + process_video(video_path, start_time, end_time, state)

    # 输出旁白位置推荐信息到表格
    write_excel_xlsx(book_name_xlsx, sheet_name_xlsx, table_content)
    state[0] = 1.00


if __name__ == '__main__':
    video_path = "D:/heelo/hysxm_1.mp4"
    book_path = '何以笙箫默.xlsx'
    detect_with_ocr(video_path, book_path, 0, 300, [None])
