# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

TAG=$1        # num of GPUs to use
#CODE_ROOT=$3   # path/to/code_root
OUTPUT_DIR=checkpoints  # output dir to save checkpoints, decodings, etc
FT_BIN=data/bin/ft_en_zh/

langs=en,zh
ft_langs=en-zh
ft_domain=LYRICS

lr=$2

TBS=1024
max_tokens=$3
max_pos=2048
update_freq=1

warmup=50


mepoch=${4}
#prefix=${4}

#word_shuffle=3
#word_dropout=0.1
#word_blank=0.1
word_shuffle=0
word_dropout=0.0
word_blank=0.0

mask_rate=0.3
poisson_lbd=3.5

task=xdae_multilingual_translation_with_melody
EXP="FT_baseline_lr${lr}_${ft_langs}_m${mepoch}_mtoken${max_tokens}_upf${update_freq}_M${TAG}"

SAVE=${OUTPUT_DIR}/$EXP
LOG=$SAVE/log

mkdir -p $SAVE
mkdir -p $LOG

SUFFIX=""
#if [ ! -f $SAVE/checkpoint_last.pt ]; then
   #echo "copy pretrained model to last"
   #cp $PRETRAIN $SAVE/checkpoint_last.pt
#fi

if [ ! -f $SAVE/checkpoint_last.pt ]; then
   SUFFIX="$SUFFIX --reset-dataloader --reset-lr-scheduler --reset-meters --reset-optimizer"
fi

NOW=`date '+%F_%H_%M_%S'`
CUDA_VISIBLE_DEVICES=$TAG fairseq-train $FT_BIN \
           --adam-eps 1e-06 \
           --adam-betas '(0.9, 0.98)' \
           --add-lang-token \
           --alignment-decoder-type 'simple' \
           --alignment-decoder-layers 4 \
           --alignment-lambda 0.5 \
           --attention-dropout 0.1  \
           --arch mbart_base_with_melody \
           --criterion label_smoothed_cross_entropy_with_alignment \
           --ddp-backend no_c10d \
           --decoder-layers 12 \
           --domains LYRICS,WMT \
           --dropout 0.1 \
           --dur-type-num 30 \
           --encoder-layers 12 \
           --eval-inference \
           --eval-inference-start-step 50 \
           --finetune-data $FT_BIN \
           --finetune-domain LYRICS \
           --finetune-langs $ft_langs \
           --keep-interval-updates 1 \
           --kernel-size 3 \
           --langs en,zh \
           --layernorm-embedding \
           --length-control-type 'attention' \
           --log-format simple --log-interval 5 \
           --lr-scheduler inverse_sqrt \
           --lr $lr \
           --max-tokens $max_tokens \
           --max-epoch $mepoch \
           --max-delta-note 20 \
           --max-source-positions $max_pos \
           --max-target-positions $max_pos \
           --note-num 128 \
           --optimizer adam \
           --pretrained-mt-ckpt-dir checkpoints/Pretrain_all_musescore_filtered_single_tag_lr5e-4_m50_r0.5_mtoken2048_upf5_M0,1,2,3,4,5/checkpoint_best.pt \
           --predictor-dropout 0.5 \
           --predictor-layers 5 \
           --save-dir $SAVE \
           --save-interval 5 --save-interval-updates 100000 \
           --skip-invalid-size-inputs-valid-test \
           --sample-break-mode eos \
           --share-all-embeddings \
           --stop-min-lr 1e-09 \
           --task $task \
           --tensorboard-logdir $SAVE \
           --warmup-init-lr 1e-07 \
           --warmup-updates $warmup \
           --weight-decay 0.01 \
           --update-freq $update_freq \
           $SUFFIX 2>&1 | tee $LOG/log_$NOW.txt