# coding=utf-8
import os
import argparse
import time

from azure.cognitiveservices.speech import AudioDataStream, SpeechConfig, SpeechSynthesizer
from azure.cognitiveservices.speech.audio import AudioOutputConfig
import openpyxl

tmp_file = 'tmp.wav'

normal_speed = 4
normal_interval = 0.1


def speech_synthesis(text, output_file, speed):
    """
    用于合成讲解音频并输出
    :param text: 解说文本
    :param output_file: 输出文件路径
    :param speed: 指定的音频语速，默认为1.0
    :return:
    """
    if float(speed) != 1.0:
        audio_path = tmp_file
    else:
        audio_path = output_file
    speech_config = SpeechConfig(subscription="ffa331815f0f4c7fa418bb6c2e1c4e17", region="eastus")

    speech_config.speech_synthesis_language = "zh-CN"
    speech_config.speech_synthesis_voice_name = 'zh-CN-XiaomoNeural'

    # 先把合成的语音文件输出得到tmp.wav中，便于可能的调速需求

    audio_config = AudioOutputConfig(filename=audio_path)
    synthesizer = SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)

    synthesizer.speak_text_async(text)

    if float(speed) != 1.0:
        change_speed(output_file, speed)


def change_speed(wav_path, speed=1.0):
    """
    调整语速
    :param wav_path: 原音频路径
    :param speed: 转换后的语速
    :return:
    """
    print("调速")
    cmd_line = 'ffmpeg -y -i {} -filter:a \"atempo={}\" {}'.format(tmp_file, speed, wav_path)
    os.system(cmd_line)


def read_sheet(book_path, sheet_name=None):
    """
    从表格中读出所有的内容，用dict保存（表格的格式固定，第一行为表头（起始时间|终止时间|字幕|建议|解说脚本））
    :param book_path: 表格的存储路径
    :param sheet_name: 想要读取的表在excel表格中的名字（可选项）
    :return: sheet_content (dict) 表格中的所有内容
    """
    workbook = openpyxl.load_workbook(book_path)
    sheet = workbook.active
    rows = sheet.max_row
    cols = sheet.max_column
    sheet_content = {}
    # 读取xlsx中每列的内容，默认第一行是各列的列名
    for i in range(1, rows + 1):
        for j in range(1, cols + 1):
            if i == 1:
                sheet_content[sheet.cell(1, j).value] = []
            else:
                sheet_content[sheet.cell(1, j).value].append(sheet.cell(i, j).value)
    return sheet_content


def get_narratage_text(sheet_content, speed):
    """
    根据从表格中获取到的内容，分析得到解说文本+对应开始时间
    :param sheet_content: dict，keys=["起始时间","终止时间","字幕","建议","解说脚本"]
    :param speed: float, 旁白语速
    :return: narratage_text: list, 旁白文本，
             narratage_start_time: list, 旁白对应开始时间
    """
    narratage = sheet_content['解说脚本']
    subtitle = sheet_content['字幕']
    start_time = sheet_content['起始时间']
    end_time = sheet_content['终止时间']
    narratage_start_time = []
    narratage_text = []
    for i, text in enumerate(narratage):
        if text is not None:
            if text == '翻译':
                narratage_text.append(subtitle[i])
                narratage_start_time.append(float(start_time[i]))
            else:
                # 如果旁白中有换行符，即分为n段，则按照换行符进行分割，并间隔0.5s
                text_split = text.split('\n')
                cur_start = float(end_time[i - 1]) + 0.1 if i > 0 else 0
                for x in text_split:
                    narratage_text.append(x)
                    narratage_start_time.append(cur_start)
                    cur_start = cur_start + (len(x) / normal_speed + normal_interval) / speed
    return narratage_text, narratage_start_time


def second_to_str(seconds):
    seconds = float(seconds)
    hour = int(seconds / 3600)
    minute = int((seconds - hour * 3600) / 60)
    second = int(seconds - hour * 3600 - minute * 60)
    ms = int((seconds - second - minute * 60 - hour * 3600) * 1000)
    time_str = "%02d:%02d:%02d,%03d" % (hour, minute, second, ms)
    return time_str


def export_caption(sheet_content, caption_file):
    """
    将用户校正后的字幕输出为字幕文件（srt格式）
    :param sheet_content: 用户校正后的表格内容
    :return:
    """
    caption = sheet_content["字幕"]
    start_time = sheet_content['起始时间']
    end_time = sheet_content['终止时间']
    cnt = 0
    with open(caption_file, "w", encoding="utf-8") as f:
        for i, x in enumerate(caption):
            if x is not None:
                start, end = second_to_str(start_time[i]), second_to_str(end_time[i])
                cnt += 1
                f.write(str(cnt) + "\n")
                f.write(start + " --> " + end + "\n")
                f.write(x + "\n\n")


def ss_and_export(sheet_path, output_dir, speed, caption_file, state):
    """
    生成语音并导出字幕
    :param sheet_path: 校对过的旁白脚本表格文件
    :param output_dir: 存放音频文件的
    :param speed:
    :param caption_file:
    :return:
    """

    # 旁白解说表格的位置
    book_path = sheet_path
    # 音频输出位置路径
    root_path = output_dir

    # 如果文件夹不存在，则新建文件夹
    if not os.path.exists(root_path):
        os.mkdir(root_path)

    # 读取表格，并获取旁白及对应插入位置
    sheet_content = read_sheet(book_path)
    narratages, start_timepoint = get_narratage_text(sheet_content, speed)
    export_caption(sheet_content, caption_file)
    print("已导出旁白文件")

    # 生成旁白解说语音
    for i, text in enumerate(narratages):
        wav_path = os.path.join(root_path, '%.2f.wav' % start_timepoint[i])
        speech_synthesis(text, wav_path, speed)
        time.sleep(1)
        print("目前正在处理{}".format(wav_path))
        state[0] = float((i + 1) / len(narratages))

    # 合成总音频，并入原视频音频中

    # 删除临时语音文件
    if os.path.exists(tmp_file):
        time.sleep(1)
        os.remove(tmp_file)


if __name__ == '__main__':
    # 定义参数
    parser = argparse.ArgumentParser(description='Speech Synthesis guideness')
    parser.add_argument("--output_dir", required=True, type=str, help="音频输出位置路径")
    parser.add_argument("--sheet_path", required=True, type=str, help='旁白解说表格存储路径')
    parser.add_argument("--caption_file", required=True, type=str, help="输出的字幕文件存储路径")
    parser.add_argument("--speed", type=float, default=1.0, help="设置语速，默认为1.0")
    args = parser.parse_args()

    # 主函数执行
    ss_and_export(args.output_dir, args.sheet_path, args.speed, args.caption_file)
