# coding=utf-8
import os
import argparse
import time

from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer
from azure.cognitiveservices.speech.audio import AudioOutputConfig
import openpyxl

tmp_file = 'tmp.wav'
adjusted_wav_path = "adjusted.wav"

normal_speed = 4
normal_interval = 0.1


def speech_synthesis(text, output_file, speed):
    """
    用于合成讲解音频并输出
    :param text: 解说文本
    :param output_file: 输出文件路径
    :param speed: 指定的音频语速，默认为1.0
    :return:
    """
    if float(speed) != 1.0:
        audio_path = tmp_file
    else:
        audio_path = output_file
    speech_config = SpeechConfig(subscription="ffa331815f0f4c7fa418bb6c2e1c4e17", region="eastus")

    speech_config.speech_synthesis_language = "zh-CN"
    speech_config.speech_synthesis_voice_name = 'zh-CN-XiaomoNeural'

    # 先把合成的语音文件输出得到tmp.wav中，便于可能的调速需求

    audio_config = AudioOutputConfig(filename=audio_path)
    synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)

    synthesizer.speak_text(text)

    if float(speed) != 1.0:
        change_speed(output_file, speed)


def change_speed(wav_path, speed=1.0):
    """
    调整语速
    :param wav_path: 原音频路径
    :param speed: 转换后的语速
    :return:
    """
    cmd_line = 'ffmpeg -y -i {} -filter:a \"atempo={}\" {}'.format(tmp_file, speed, wav_path)
    os.system(cmd_line)


def read_sheet(book_path, sheet_name=None):
    """
    从表格中读出所有的内容，用dict保存（表格的格式固定，第一行为表头（起始时间|终止时间|字幕|建议|解说脚本））
    :param book_path: 表格的存储路径
    :param sheet_name: 想要读取的表在excel表格中的名字（可选项）
    :return: sheet_content (dict) 表格中的所有内容
    """
    workbook = openpyxl.load_workbook(book_path)
    sheet = workbook.active
    rows = sheet.max_row
    cols = sheet.max_column
    sheet_content = {}
    # 读取xlsx中每列的内容，默认第一行是各列的列名
    for i in range(1, rows + 1):
        for j in range(1, cols + 1):
            if i == 1:
                sheet_content[sheet.cell(1, j).value] = []
            else:
                sheet_content[sheet.cell(1, j).value].append(sheet.cell(i, j).value)
    return sheet_content


def get_narratage_text(sheet_content, speed):
    """
    根据从表格中获取到的内容，分析得到解说文本+对应开始时间
    :param sheet_content: dict，keys=["起始时间","终止时间","字幕","建议","解说脚本"]
    :param speed: float, 旁白语速
    :return: narratage_text: list, 旁白文本，
             narratage_start_time: list, 旁白对应开始时间
    """
    narratage = sheet_content['解说脚本']
    subtitle = sheet_content['字幕']
    start_time = sheet_content['起始时间']
    end_time = sheet_content['终止时间']
    narratage_start_time = []
    narratage_end_time = []
    narratage_text = []
    for i, text in enumerate(narratage):
        if text is not None:
            if text == '翻译':
                narratage_text.append(subtitle[i])
                narratage_start_time.append(float(start_time[i]))
                narratage_end_time.append(float(end_time[i]))
            else:
                # 如果旁白中有换行符，即分为n段，则按照换行符进行分割，并间隔0.5s
                text_split = text.split('\n')
                if subtitle[i] is None:
                    cur_start = float(end_time[i - 1]) + 0.1 if i > 0 else 0
                    cur_end = float(start_time[i + 1])
                else:
                    cur_start = float(start_time[i])
                    cur_end = float(end_time[i])
                for x in text_split:
                    cur_end = max(cur_end, cur_start + (len(x) / normal_speed + normal_interval) / speed)
                    narratage_text.append(x)
                    narratage_start_time.append(cur_start)
                    narratage_end_time.append(cur_end)
                    cur_start = cur_start + (len(x) / normal_speed + normal_interval) / speed
    return narratage_text, narratage_start_time, narratage_end_time


def second_to_str(seconds):
    seconds = float(seconds)
    hour = int(seconds / 3600)
    minute = int((seconds - hour * 3600) / 60)
    second = int(seconds - hour * 3600 - minute * 60)
    ms = int((seconds - second - minute * 60 - hour * 3600) * 1000)
    time_str = "%02d:%02d:%02d,%03d" % (hour, minute, second, ms)
    return time_str


def export_caption(sheet_content, caption_file):
    """
    将用户校正后的字幕输出为字幕文件（srt格式）
    :param sheet_content: 用户校正后的表格内容
    :return:
    """
    caption = sheet_content["字幕"]
    start_time = sheet_content['起始时间']
    end_time = sheet_content['终止时间']
    cnt = 0
    with open(caption_file, "w", encoding="utf-8") as f:
        for i, x in enumerate(caption):
            if x is not None:
                start, end = second_to_str(start_time[i]), second_to_str(end_time[i])
                cnt += 1
                f.write(str(cnt) + "\n")
                f.write(start + " --> " + end + "\n")
                f.write(x + "\n\n")


def adjust_volume(origin, start_timestamp, end_timestamp):
    global adjusted_wav_path
    adjusted_wav_path = os.path.join(os.path.dirname(origin), adjusted_wav_path)
    n = len(start_timestamp)
    command_line = "ffmpeg -i {} -af \"".format(origin)
    for i in range(n):
        command_line += "volume=enable='between(t,{},{})':volume=0.3".format(start_timestamp[i], end_timestamp[i])
        if i != n - 1:
            command_line += ","
    command_line += "\" -y {}".format(adjusted_wav_path)
    os.system(command_line)


def mix_speech(origin, narratage_paths, start_timestamps):
    composed_wav_path = os.path.join(os.path.dirname(origin), "composed.wav")
    command_line = 'ffmpeg -i {}'.format(origin)
    for i, narratage_path in enumerate(narratage_paths):
        command_line += " -i {}".format(narratage_path)
    command_line += " -filter_complex \""
    for i, start_timestamp in enumerate(start_timestamps):
        command_line += "[{}]adelay=delays={}:all=1[aud{}];".format(i + 1, int(start_timestamp * 1000), i + 1)
    command_line += "[0]"
    command_line = command_line + "".join(["[aud{}]".format(str(i + 1)) for i in range(len(start_timestamps))])
    command_line += "amix=inputs={}\" -vsync 2 -y {}".format(len(start_timestamps) + 1, composed_wav_path)
    os.system(command_line)


def ss_and_export(video_path, sheet_path, output_dir, speed, caption_file, state=None):
    """
    生成语音并导出字幕
    :param video_path: 原视频的位置
    :param sheet_path: 校对过的旁白脚本表格文件
    :param output_dir: 存放音频文件的文件夹
    :param speed: 旁白语速
    :param caption_file: 输出的字幕文件存放位置
    :param state: 用于与界面中的进度条状态进行通讯
    :return:
    """

    # 旁白解说表格的位置
    book_path = sheet_path
    # 音频输出位置路径
    root_path = output_dir

    # 如果文件夹不存在，则新建文件夹
    if not os.path.exists(root_path):
        os.mkdir(root_path)

    global tmp_file
    tmp_file = os.path.join(output_dir, tmp_file)

    # 读取表格，并获取旁白及对应插入位置
    sheet_content = read_sheet(book_path)
    narratages, start_timestamp, end_timestamp = get_narratage_text(sheet_content, speed)
    export_caption(sheet_content, caption_file)
    print("已导出字幕文件")

    narratage_paths = []
    # 生成旁白解说语音
    for i, text in enumerate(narratages):
        wav_path = os.path.join(root_path, '%.2f.wav' % start_timestamp[i])
        narratage_paths.append(wav_path)
        speech_synthesis(text, wav_path, speed)
        time.sleep(2)
        print("目前正在处理{}".format(wav_path))
        if state is not None:
            state[0] = float((i + 1) / len(narratages)) * 0.97

    # 合成总音频，并入原视频音频中
    # 提取原音频
    from split_wav import extract_audio
    origin_wav_path = extract_audio(video_path, output_dir, 0, -1)
    # 调整原音频中旁白对应位置的音量
    adjust_volume(origin_wav_path, start_timestamp, end_timestamp)
    # 将旁白混入原音频
    mix_speech(adjusted_wav_path, narratage_paths, start_timestamp)
    if state is not None:
        state[0] = 1.00

    # 删除临时语音文件、提取出来的原视频音频以及调整后的视频音频
    if os.path.exists(tmp_file):
        time.sleep(1)
        os.remove(tmp_file)
    os.remove(origin_wav_path)
    os.remove(adjusted_wav_path)


if __name__ == '__main__':
    # 定义参数
    # parser = argparse.ArgumentParser(description='Speech Synthesis guideness')
    # parser.add_argument("--video_path", required=True, type=str, help="原视频位置")
    # parser.add_argument("--output_dir", required=True, type=str, help="音频输出位置路径")
    # parser.add_argument("--sheet_path", required=True, type=str, help='旁白解说表格存储路径')
    # parser.add_argument("--caption_file", required=True, type=str, help="输出的字幕文件存储路径")
    # parser.add_argument("--speed", type=float, default=1.0, help="设置语速，默认为1.0")
    # args = parser.parse_args()
    # video_path, sheet_path, output_dir, speed, caption_file = args.video_path,\
    # args.sheet_path, args.output_dir, args.speed, args.caption_file

    # video_path = 'D:/heelo/hysxm_3.mp4'
    # sheet_path = 'D:/heelo/hysxm_3.xlsx'
    # output_dir = 'D:/AddCaption/hysxm_3'
    # speed = 1.25
    # caption_file = 'D:/AddCaption/hysxm_3/hysxm_3.srt'
    #
    # # 主函数执行
    # ss_and_export(video_path=video_path, sheet_path=sheet_path, output_dir=output_dir, speed=speed,
    #               caption_file=caption_file)
    pass
