csm9493/67_five_dataset_shuffle_10000_fewshot_lora_all_r16_alpha32_lr_1e5_decay_1e2_cosine_epoch_3_mbs_4 Text Generation • 7B • Updated Mar 6 • 7
csm9493/26_one_dataset_cot_lora_all_r16_alpha32_lr_3e5_decay_1e2_cosine_epoch_3_mbs_4 Text Generation • 7B • Updated Mar 5 • 8
csm9493/43_five_dataset_shuffle_10000_cot_lora_all_r4_alpha8_lr_1e-05_decay_1e2_cosine_epoch_3_mbs_16 Text Generation • 7B • Updated Mar 5 • 3
csm9493/43_five_dataset_shuffle_10000_cot_lora_all_r32_alpha64_lr_1e-05_decay_1e2_cosine_epoch_3_mbs_16 Text Generation • 7B • Updated Mar 5 • 8
csm9493/43_five_dataset_shuffle_10000_cot_lora_all_r8_alpha16_lr_1e-05_decay_1e2_cosine_epoch_3_mbs_16 Text Generation • 7B • Updated Mar 5 • 5
csm9493/43_five_dataset_shuffle_10000_cot_lora_all_r16_alpha32_lr_1e-05_decay_1e2_cosine_epoch_3_mbs_16 Text Generation • 7B • Updated Mar 5 • 8
csm9493/24_three_dataset_shuffle_50000_cot_lora_all_r16_alpha32_lr_3e5_decay_1e2_cosine_epoch_2_mbs_4 Text Generation • 7B • Updated Mar 5 • 7
csm9493/41_three_dataset_shuffle_3200_cot_lora_all_r128_alpha256_lr_3e5_decay_1e2_cosine_epoch_3_mbs_4 Text Generation • 7B • Updated Mar 5 • 7
csm9493/23_one_dataset_cot_lora_all_r16_alpha32_lr_3e5_decay_1e2_cosine_epoch_2_mbs_4 Text Generation • 7B • Updated Mar 5 • 9
csm9493/37_three_dataset_shuffle_3200_cot_lora_all_r64_alpha128_lr_3e5_decay_1e2_cosine_epoch_3_mbs_4 Text Generation • 7B • Updated Mar 5 • 8
csm9493/18_three_dataset_cot_lora_qvud_r16_alpha32_lr_5e5_decay_1e2_cosine_epoch_1_mbs_8 Text Generation • 7B • Updated Mar 5 • 8
csm9493/20_three_dataset_cot_lora_qvud_r8_alpha16_lr_5e5_decay_1e2_cosine_epoch_1_mbs_16 Text Generation • 7B • Updated Mar 5 • 4
csm9493/36_three_dataset_shuffle_3200_cot_lora_all_r32_alpha64_lr_3e5_decay_1e2_cosine_epoch_3_mbs_4 Text Generation • 7B • Updated Mar 5 • 8
csm9493/35_three_dataset_shuffle_15000_cot_lora_all_r16_alpha32_lr_3e5_decay_1e2_cosine_epoch_3_mbs_4 Text Generation • 7B • Updated Mar 5 • 8
csm9493/17_three_dataset_cot_lora_qved_r16_alpha32_lr_5e5_decay_1e2_cosine_epoch_1_mbs_16 Text Generation • 7B • Updated Mar 5 • 6
csm9493/15_three_dataset_cot_lora_qv_r16_alpha32_lr_5e5_decay_1e2_cosine_epoch_1_mbs_8 Text Generation • 7B • Updated Mar 4 • 7
csm9493/19_three_dataset_cot_lora_qv_r8_alpha16_lr_5e5_decay_1e2_cosine_epoch_1_mbs_16 Text Generation • 7B • Updated Mar 4 • 8
csm9493/14_three_dataset_cot_lora_qv_r16_alpha32_lr_5e5_decay_1e2_cosine_epoch_1_mbs_16 Text Generation • 7B • Updated Mar 4 • 6
csm9493/23_three_dataset_shuffle_500_cot_lora_qv_r16_alpha32_lr_3e5_decay_1e2_constant_epoch_1_mbs_16 Text Generation • 7B • Updated Mar 4 • 8
csm9493/5_whole_alpaca_lora_all_r16_alpha32_lr_3e5_decay_1e2_cosine_warmup_05_epoch_3 Text Generation • 7B • Updated Mar 4 • 9
csm9493/2_whole_alpaca_lora_all_r8_alpha16_lr_3e5_decay_1e2_cosine_warmup_05_epoch_3 Text Generation • 7B • Updated Mar 4 • 10
csm9493/8_whole_alpaca_lora_all_r16_alpha32_lr_3e5_decay_1e2_constant_epoch_1 Text Generation • 7B • Updated Mar 4 • 9
csm9493/10_whole_alpaca_lora_all_r16_alpha32_lr_1e4_decay_1e3_constant_epoch_1 Text Generation • 7B • Updated Mar 4 • 8
csm9493/11_whole_alpaca_lora_all_head_r8_alpha16_lr_1e4_decay_1e3_constant_epoch_1 Text Generation • 7B • Updated Mar 4 • 8