From 351a1a38366084092dcc3a2ab76fc6dc00c5ea4a Mon Sep 17 00:00:00 2001
From: Yu Li <lyo.gavin@gmail.com>
Date: Fri, 16 Jun 2023 08:28:24 +0800
Subject: [PATCH] add instructions for finetune based on merged models

---
 README.md                                     | 27 +++++++---
 README_en.md                                  | 28 ++++++++---
 .../run_finetune_raining_based_on_Anima.sh    | 50 +++++++++++++++++++
 3 files changed, 89 insertions(+), 16 deletions(-)
 create mode 100755 training/run_finetune_raining_based_on_Anima.sh

diff --git a/README.md b/README.md
index b2ce05c..8c43221 100644
--- a/README.md
+++ b/README.md
@@ -56,15 +56,26 @@ Anima模型基于QLoRA开源的[33B guanaco](https://huggingface.co/timdettmers/
 
 #### 如何训练
 
-使用以下步骤可以重现Anima 33B模型（单卡80GB H100或双卡 40GB A100均测试过可运行）：
+1. 重现Anima的模型训练过程：使用以下步骤可以重现Anima 33B模型（单卡80GB H100或双卡 40GB A100均测试过可运行）：
 
-```bash
-# 1. install dependencies
-pip install -r requirements.txt
-# 2. 
-cd training
-./run_Amina_training.sh
-```
+	```bash
+	# 1. install dependencies
+	pip install -r requirements.txt
+	# 2. 
+	cd training
+	./run_Amina_training.sh
+	```
+
+2. 基于Anima finetune训练其他model：
+
+	```bash
+	# 1. install dependencies
+	pip install -r requirements.txt
+	# 2. 
+	cd training
+	./run_finetune_raining_based_on_Anima.sh
+	```
+	注：可以修改run_finetune_raining_based_on_Anima.sh中的--dataset和--dataset_format参数使用其他训练数据dataset。
 
 #### 多卡训练
 由于使用Hugging Face Accelerate，天然支持多卡训练。
diff --git a/README_en.md b/README_en.md
index 05f4b6e..76ab103 100644
--- a/README_en.md
+++ b/README_en.md
@@ -57,15 +57,27 @@ For cost considerations, we mostly chose not to do too much grid search, assumin
 
 #### How to reproduce our training
 
-Anima 33B model could be reproduced fully with the following steps(tested on single GPU environment of 1x80GB H100, or multi-GPU of 2xA100 40GB)：
+1. Reproducing the Anima model's training: Anima 33B model could be reproduced fully with the following steps(tested on single GPU environment of 1x80GB H100, or multi-GPU of 2xA100 40GB)：
+	
+	```bash
+	# 1. install dependencies
+	pip install -r requirements.txt
+	# 2. 
+	cd training
+	./run_Amina_training.sh
+	```
+
+2. Finetuen train other models based on Anima：
+
+	```bash
+	# 1. install dependencies
+	pip install -r requirements.txt
+	# 2. 
+	cd training
+	./run_finetune_raining_based_on_Anima.sh
+	```
+	Note: please modify the --dataset and --dataset_format arguments in run_finetune_raining_based_on_Anima.sh accordinglly to point to your dataset。
 
-```bash
-# 1. install dependencies
-pip install -r requirements.txt
-# 2. 
-cd training
-./run_Amina_training.sh
-```
 #### Multi-GPU training
 Bause of Hugging Face Accelerate，multi-GPU training is supported out-of-box.
 
diff --git a/training/run_finetune_raining_based_on_Anima.sh b/training/run_finetune_raining_based_on_Anima.sh
new file mode 100755
index 0000000..e876a69
--- /dev/null
+++ b/training/run_finetune_raining_based_on_Anima.sh
@@ -0,0 +1,50 @@
+
+
+set -x -e
+
+run_id=$(date +%s)
+echo "RUN ID: $run_ts"
+
+echo "START TIME: $(date)"
+
+
+ROOT_DIR_BASE=/home/ubuntu/cloudfs/saved_models/qlora_cn
+OUTPUT_PATH=$ROOT_DIR_BASE/output_$run_id
+
+mkdir -p $OUTPUT_PATH
+
+
+
+# based on test in ./test_cn_dataset_lenghts.py :
+
+#source len @qt0.8: 188.0
+#target len @qt0.8: 222.0
+#source len @qt0.85: 228.0
+#target len @qt0.85: 267.0
+#source len @qt0.9: 297.0
+#target len @qt0.9: 342.0
+#source len @qt0.95: 396.0
+#target len @qt0.95: 491.0
+#source len @qt0.98: 515.0
+#target len @qt0.98: 670.2800000000279
+
+
+python qlora.py --dataset="chinese-vicuna" \
+    --dataset_format="alpaca-clean" `#alpaca-clean has similar format to chinese training dataset` \
+    --learning_rate 0.0001 `# QLoRA paper appendix B Table 9 `\
+    --per_device_train_batch_size 1 `# fix for fitting mem `\
+    --gradient_accumulation_steps 16 `# QLoRA paper appendix B Table 9  `\
+    --max_steps 10000 `# QLoRA paper appendix B Table 9, follow paper setting even though cn data is 690k much bigger than OASST1 9k, batch size considering accum`\
+    --model_name_or_path "timdettmers/guanaco-33b-merged" \
+    --source_max_len 512  `# default setting in code, cn model 2048 too long  `\
+    --target_max_len 512 `# follow QLoRA paper appendix B Table 9 `\
+    --eval_dataset_size 1 `# mainly for testing, no need to be big` \
+    --do_eval \
+    --evaluation_strategy "steps" \
+    --eval_steps 200 `# 10 for debug mode only, 200 for training`  \
+    --output_dir $OUTPUT_PATH \
+    --report_to 'wandb' \
+    --sample_generate `# test sample generation every once a while`  \
+    --save_steps 200 `# 20 for debug mode only, 200 for training`
+
+#    --debug_mode `# only set when it's debug mode` \