Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +22 -0
- added_tokens.json +24 -0
- config.json +28 -0
- generation_config.json +14 -0
- merges.txt +0 -0
- model-00001-of-00004.safetensors +3 -0
- model-00002-of-00004.safetensors +3 -0
- model-00003-of-00004.safetensors +3 -0
- model-00004-of-00004.safetensors +3 -0
- model.safetensors.index.json +346 -0
- runs/Sep19_17-13-37_node1803/events.out.tfevents.1758316441.node1803.939342.0 +3 -0
- special_tokens_map.json +25 -0
- tokenizer.json +3 -0
- tokenizer_config.json +208 -0
- training_args.bin +3 -0
- vocab.json +0 -0
- wandb/debug-internal.log +14 -0
- wandb/debug.log +24 -0
- wandb/run-20250920_081121-2oxex54w/files/config.yaml +644 -0
- wandb/run-20250920_081121-2oxex54w/files/media/table/evaluation/eval_results_1_817b26b9b7489391f4e7.table.json +1 -0
- wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_bn_eval_results_2_f5e72dc1f8666e2a7b2d.table.json +3 -0
- wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_de_eval_results_3_fabaf33255f24add59c1.table.json +0 -0
- wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_en_eval_results_4_82791ccc59d2c61a093c.table.json +0 -0
- wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_es_eval_results_5_db5ed66b097e218c4da6.table.json +0 -0
- wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_fr_eval_results_6_214cb1a5a2cdee9f330e.table.json +0 -0
- wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_ja_eval_results_7_098df869a17abf277e8c.table.json +3 -0
- wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_ru_eval_results_8_19e62e412810312c2375.table.json +3 -0
- wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_sw_eval_results_9_d79a57dcd801f08109f8.table.json +3 -0
- wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_te_eval_results_10_c0fc0c81b8662e04cc0e.table.json +3 -0
- wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_th_eval_results_11_baa86a95f601e3c721a0.table.json +0 -0
- wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_zh_eval_results_12_2882185f7e816bbcc2fe.table.json +0 -0
- wandb/run-20250920_081121-2oxex54w/files/output.log +212 -0
- wandb/run-20250920_081121-2oxex54w/files/requirements.txt +378 -0
- wandb/run-20250920_081121-2oxex54w/files/wandb-metadata.json +110 -0
- wandb/run-20250920_081121-2oxex54w/files/wandb-summary.json +1 -0
- wandb/run-20250920_081121-2oxex54w/logs/debug-core.log +16 -0
- wandb/run-20250920_081121-2oxex54w/logs/debug-internal.log +15 -0
- wandb/run-20250920_081121-2oxex54w/logs/debug.log +29 -0
- wandb/run-20250920_081121-2oxex54w/run-2oxex54w.wandb +3 -0
- wandb/run-20250921_062002-cothceaw/files/config.yaml +252 -0
- wandb/run-20250921_062002-cothceaw/files/media/table/evaluation/eval_results_1_2a7ec9e10306569eae6e.table.json +1 -0
- wandb/run-20250921_062002-cothceaw/files/media/table/mmlu_prox_ar_math_eval_results_2_1d425cdaeb765848e725.table.json +3 -0
- wandb/run-20250921_062002-cothceaw/files/media/table/mmlu_prox_bn_math_eval_results_3_8f734c70b94d2677510f.table.json +3 -0
- wandb/run-20250921_062002-cothceaw/files/output.log +81 -0
- wandb/run-20250921_062002-cothceaw/files/requirements.txt +378 -0
- wandb/run-20250921_062002-cothceaw/files/wandb-metadata.json +110 -0
- wandb/run-20250921_062002-cothceaw/files/wandb-summary.json +1 -0
- wandb/run-20250921_062002-cothceaw/logs/debug-core.log +16 -0
- wandb/run-20250921_062002-cothceaw/logs/debug-internal.log +15 -0
- wandb/run-20250921_062002-cothceaw/logs/debug.log +29 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,25 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_bn_eval_results_2_f5e72dc1f8666e2a7b2d.table.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_ja_eval_results_7_098df869a17abf277e8c.table.json filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_ru_eval_results_8_19e62e412810312c2375.table.json filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_sw_eval_results_9_d79a57dcd801f08109f8.table.json filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_te_eval_results_10_c0fc0c81b8662e04cc0e.table.json filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
wandb/run-20250920_081121-2oxex54w/run-2oxex54w.wandb filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
wandb/run-20250921_062002-cothceaw/files/media/table/mmlu_prox_ar_math_eval_results_2_1d425cdaeb765848e725.table.json filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
wandb/run-20250921_062002-cothceaw/files/media/table/mmlu_prox_bn_math_eval_results_3_8f734c70b94d2677510f.table.json filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
wandb/run-20250921_062002-cothceaw/run-cothceaw.wandb filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
wandb/run-20250921_081515-hrm6dwvu/files/media/table/mmlu_prox_de_math_eval_results_2_6011ed1b84fa34f7a465.table.json filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
wandb/run-20250921_081515-hrm6dwvu/files/media/table/mmlu_prox_en_math_eval_results_3_574b6ec5b263ae2e258e.table.json filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
wandb/run-20250921_081515-hrm6dwvu/run-hrm6dwvu.wandb filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
wandb/run-20250921_090332-hrm6dwvu/files/media/table/mmlu_prox_es_math_eval_results_2_f5b5ddbcd29f48f36acc.table.json filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
wandb/run-20250921_090332-hrm6dwvu/files/media/table/mmlu_prox_fr_math_eval_results_3_ea21893417450a1c19c1.table.json filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
wandb/run-20250921_090332-hrm6dwvu/run-hrm6dwvu.wandb filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
wandb/run-20250921_092606-ztqaaqj5/files/media/table/mmlu_prox_ko_math_eval_results_2_0960ca8c88e3af630287.table.json filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
wandb/run-20250921_092606-ztqaaqj5/files/media/table/mmlu_prox_pt_math_eval_results_3_14279190f4728eaf809a.table.json filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
wandb/run-20250921_092606-ztqaaqj5/run-ztqaaqj5.wandb filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
wandb/run-20250921_123322-c7t8flvu/run-c7t8flvu.wandb filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
wandb/run-20250921_140327-c7t8flvu/files/media/table/mmlu_prox_zh_math_eval_results_2_9045c6c9481d0396b399.table.json filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
wandb/run-20250921_140327-c7t8flvu/run-c7t8flvu.wandb filter=lfs diff=lfs merge=lfs -text
|
added_tokens.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</tool_call>": 151658,
|
| 3 |
+
"<tool_call>": 151657,
|
| 4 |
+
"<|box_end|>": 151649,
|
| 5 |
+
"<|box_start|>": 151648,
|
| 6 |
+
"<|endoftext|>": 151643,
|
| 7 |
+
"<|file_sep|>": 151664,
|
| 8 |
+
"<|fim_middle|>": 151660,
|
| 9 |
+
"<|fim_pad|>": 151662,
|
| 10 |
+
"<|fim_prefix|>": 151659,
|
| 11 |
+
"<|fim_suffix|>": 151661,
|
| 12 |
+
"<|im_end|>": 151645,
|
| 13 |
+
"<|im_start|>": 151644,
|
| 14 |
+
"<|image_pad|>": 151655,
|
| 15 |
+
"<|object_ref_end|>": 151647,
|
| 16 |
+
"<|object_ref_start|>": 151646,
|
| 17 |
+
"<|quad_end|>": 151651,
|
| 18 |
+
"<|quad_start|>": 151650,
|
| 19 |
+
"<|repo_name|>": 151663,
|
| 20 |
+
"<|video_pad|>": 151656,
|
| 21 |
+
"<|vision_end|>": 151653,
|
| 22 |
+
"<|vision_pad|>": 151654,
|
| 23 |
+
"<|vision_start|>": 151652
|
| 24 |
+
}
|
config.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen2ForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_dropout": 0.0,
|
| 6 |
+
"bos_token_id": 151643,
|
| 7 |
+
"eos_token_id": 151645,
|
| 8 |
+
"hidden_act": "silu",
|
| 9 |
+
"hidden_size": 3584,
|
| 10 |
+
"initializer_range": 0.02,
|
| 11 |
+
"intermediate_size": 18944,
|
| 12 |
+
"max_position_embeddings": 32768,
|
| 13 |
+
"max_window_layers": 28,
|
| 14 |
+
"model_type": "qwen2",
|
| 15 |
+
"num_attention_heads": 28,
|
| 16 |
+
"num_hidden_layers": 28,
|
| 17 |
+
"num_key_value_heads": 4,
|
| 18 |
+
"rms_norm_eps": 1e-06,
|
| 19 |
+
"rope_scaling": null,
|
| 20 |
+
"rope_theta": 1000000.0,
|
| 21 |
+
"sliding_window": 131072,
|
| 22 |
+
"tie_word_embeddings": false,
|
| 23 |
+
"torch_dtype": "bfloat16",
|
| 24 |
+
"transformers_version": "4.51.3",
|
| 25 |
+
"use_cache": true,
|
| 26 |
+
"use_sliding_window": false,
|
| 27 |
+
"vocab_size": 152064
|
| 28 |
+
}
|
generation_config.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 151643,
|
| 3 |
+
"do_sample": true,
|
| 4 |
+
"eos_token_id": [
|
| 5 |
+
151645,
|
| 6 |
+
151643
|
| 7 |
+
],
|
| 8 |
+
"pad_token_id": 151643,
|
| 9 |
+
"repetition_penalty": 1.05,
|
| 10 |
+
"temperature": 0.7,
|
| 11 |
+
"top_k": 20,
|
| 12 |
+
"top_p": 0.8,
|
| 13 |
+
"transformers_version": "4.51.3"
|
| 14 |
+
}
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model-00001-of-00004.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b9b62bbfc2c3c82f51429dab8f74f685996c2a31dd1d2b7ea6ba47768ea4e1ab
|
| 3 |
+
size 4877660776
|
model-00002-of-00004.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:421b39cbbc81933e028ad678f3252ec9cdabe893fcc96b4a75836d9fa7e58be3
|
| 3 |
+
size 4932751008
|
model-00003-of-00004.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f63898433cd50eb366bf80588df88478eb1a1dba660aa086a457cd33cb44c50
|
| 3 |
+
size 4330865200
|
model-00004-of-00004.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:383305d92fbc2fbb3397d929ab45247196d7289e0fbbcd75f7239dea84d7916e
|
| 3 |
+
size 1089994880
|
model.safetensors.index.json
ADDED
|
@@ -0,0 +1,346 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"metadata": {
|
| 3 |
+
"total_size": 15231233024
|
| 4 |
+
},
|
| 5 |
+
"weight_map": {
|
| 6 |
+
"lm_head.weight": "model-00004-of-00004.safetensors",
|
| 7 |
+
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
| 8 |
+
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 9 |
+
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
| 10 |
+
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
| 11 |
+
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
| 12 |
+
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 13 |
+
"model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
| 14 |
+
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
| 15 |
+
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
| 16 |
+
"model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
| 17 |
+
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
| 18 |
+
"model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
| 19 |
+
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
| 20 |
+
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 21 |
+
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
| 22 |
+
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
| 23 |
+
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
| 24 |
+
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 25 |
+
"model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
| 26 |
+
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
| 27 |
+
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
| 28 |
+
"model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
| 29 |
+
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
| 30 |
+
"model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
| 31 |
+
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
| 32 |
+
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 33 |
+
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 34 |
+
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
| 35 |
+
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
| 36 |
+
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 37 |
+
"model.layers.10.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
| 38 |
+
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 39 |
+
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
| 40 |
+
"model.layers.10.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
| 41 |
+
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 42 |
+
"model.layers.10.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
| 43 |
+
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 44 |
+
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 45 |
+
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 46 |
+
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
| 47 |
+
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
| 48 |
+
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 49 |
+
"model.layers.11.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
| 50 |
+
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 51 |
+
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
| 52 |
+
"model.layers.11.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
| 53 |
+
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 54 |
+
"model.layers.11.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
| 55 |
+
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 56 |
+
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 57 |
+
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 58 |
+
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
| 59 |
+
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
| 60 |
+
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 61 |
+
"model.layers.12.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
| 62 |
+
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 63 |
+
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
| 64 |
+
"model.layers.12.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
| 65 |
+
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 66 |
+
"model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
| 67 |
+
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 68 |
+
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 69 |
+
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 70 |
+
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
| 71 |
+
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
| 72 |
+
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 73 |
+
"model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
| 74 |
+
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 75 |
+
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
| 76 |
+
"model.layers.13.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
| 77 |
+
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 78 |
+
"model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
| 79 |
+
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 80 |
+
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 81 |
+
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 82 |
+
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
| 83 |
+
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
| 84 |
+
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 85 |
+
"model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
| 86 |
+
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 87 |
+
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
| 88 |
+
"model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
| 89 |
+
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 90 |
+
"model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
| 91 |
+
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 92 |
+
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 93 |
+
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 94 |
+
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
| 95 |
+
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
| 96 |
+
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 97 |
+
"model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
| 98 |
+
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 99 |
+
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
| 100 |
+
"model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
| 101 |
+
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 102 |
+
"model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
| 103 |
+
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 104 |
+
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 105 |
+
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 106 |
+
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
| 107 |
+
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
| 108 |
+
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 109 |
+
"model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
| 110 |
+
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 111 |
+
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
| 112 |
+
"model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
| 113 |
+
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 114 |
+
"model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
| 115 |
+
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 116 |
+
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 117 |
+
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 118 |
+
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
| 119 |
+
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
| 120 |
+
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 121 |
+
"model.layers.17.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
| 122 |
+
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 123 |
+
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
| 124 |
+
"model.layers.17.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
| 125 |
+
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 126 |
+
"model.layers.17.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
| 127 |
+
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 128 |
+
"model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 129 |
+
"model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 130 |
+
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
| 131 |
+
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
| 132 |
+
"model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 133 |
+
"model.layers.18.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
| 134 |
+
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 135 |
+
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
| 136 |
+
"model.layers.18.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
| 137 |
+
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 138 |
+
"model.layers.18.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
| 139 |
+
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 140 |
+
"model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 141 |
+
"model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 142 |
+
"model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 143 |
+
"model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 144 |
+
"model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 145 |
+
"model.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
| 146 |
+
"model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
| 147 |
+
"model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
| 148 |
+
"model.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
|
| 149 |
+
"model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 150 |
+
"model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
| 151 |
+
"model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 152 |
+
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 153 |
+
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
| 154 |
+
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
| 155 |
+
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
| 156 |
+
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 157 |
+
"model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
| 158 |
+
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
| 159 |
+
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
| 160 |
+
"model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
| 161 |
+
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
| 162 |
+
"model.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
| 163 |
+
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
| 164 |
+
"model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 165 |
+
"model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 166 |
+
"model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 167 |
+
"model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 168 |
+
"model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 169 |
+
"model.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
| 170 |
+
"model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
| 171 |
+
"model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
| 172 |
+
"model.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
|
| 173 |
+
"model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 174 |
+
"model.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
| 175 |
+
"model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 176 |
+
"model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 177 |
+
"model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 178 |
+
"model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 179 |
+
"model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 180 |
+
"model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 181 |
+
"model.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
| 182 |
+
"model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
| 183 |
+
"model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
| 184 |
+
"model.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
|
| 185 |
+
"model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 186 |
+
"model.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
| 187 |
+
"model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 188 |
+
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 189 |
+
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 190 |
+
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 191 |
+
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 192 |
+
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 193 |
+
"model.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
| 194 |
+
"model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
| 195 |
+
"model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
| 196 |
+
"model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
|
| 197 |
+
"model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 198 |
+
"model.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
| 199 |
+
"model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 200 |
+
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 201 |
+
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 202 |
+
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 203 |
+
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 204 |
+
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 205 |
+
"model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
| 206 |
+
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
| 207 |
+
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
| 208 |
+
"model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
|
| 209 |
+
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 210 |
+
"model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
| 211 |
+
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 212 |
+
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 213 |
+
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 214 |
+
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 215 |
+
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 216 |
+
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 217 |
+
"model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
| 218 |
+
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
| 219 |
+
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
| 220 |
+
"model.layers.24.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
|
| 221 |
+
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 222 |
+
"model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
| 223 |
+
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 224 |
+
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 225 |
+
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 226 |
+
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 227 |
+
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 228 |
+
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 229 |
+
"model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
| 230 |
+
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
| 231 |
+
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
| 232 |
+
"model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
|
| 233 |
+
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 234 |
+
"model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
| 235 |
+
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 236 |
+
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 237 |
+
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 238 |
+
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 239 |
+
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 240 |
+
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 241 |
+
"model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
| 242 |
+
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
| 243 |
+
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
| 244 |
+
"model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
|
| 245 |
+
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 246 |
+
"model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
| 247 |
+
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 248 |
+
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 249 |
+
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
| 250 |
+
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
| 251 |
+
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
| 252 |
+
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
| 253 |
+
"model.layers.27.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
|
| 254 |
+
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
| 255 |
+
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
| 256 |
+
"model.layers.27.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
|
| 257 |
+
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
| 258 |
+
"model.layers.27.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
|
| 259 |
+
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
| 260 |
+
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 261 |
+
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
| 262 |
+
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
| 263 |
+
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
| 264 |
+
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 265 |
+
"model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
| 266 |
+
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
| 267 |
+
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
| 268 |
+
"model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
| 269 |
+
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
| 270 |
+
"model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
| 271 |
+
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
| 272 |
+
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 273 |
+
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
| 274 |
+
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
| 275 |
+
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
| 276 |
+
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 277 |
+
"model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
| 278 |
+
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
| 279 |
+
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
| 280 |
+
"model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
| 281 |
+
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
| 282 |
+
"model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
| 283 |
+
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
| 284 |
+
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 285 |
+
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
| 286 |
+
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
| 287 |
+
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
| 288 |
+
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 289 |
+
"model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
| 290 |
+
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
| 291 |
+
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
| 292 |
+
"model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
| 293 |
+
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
| 294 |
+
"model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
| 295 |
+
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
| 296 |
+
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 297 |
+
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
| 298 |
+
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
| 299 |
+
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
| 300 |
+
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 301 |
+
"model.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
| 302 |
+
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
| 303 |
+
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
| 304 |
+
"model.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
| 305 |
+
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
| 306 |
+
"model.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
| 307 |
+
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
| 308 |
+
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 309 |
+
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
| 310 |
+
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
| 311 |
+
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
| 312 |
+
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
| 313 |
+
"model.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
| 314 |
+
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
| 315 |
+
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
| 316 |
+
"model.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
| 317 |
+
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
| 318 |
+
"model.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
| 319 |
+
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
| 320 |
+
"model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 321 |
+
"model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 322 |
+
"model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
| 323 |
+
"model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
| 324 |
+
"model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 325 |
+
"model.layers.8.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
|
| 326 |
+
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
| 327 |
+
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
| 328 |
+
"model.layers.8.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
|
| 329 |
+
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
| 330 |
+
"model.layers.8.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
|
| 331 |
+
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
| 332 |
+
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 333 |
+
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
| 334 |
+
"model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
| 335 |
+
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
| 336 |
+
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
| 337 |
+
"model.layers.9.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
|
| 338 |
+
"model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
| 339 |
+
"model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
| 340 |
+
"model.layers.9.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
|
| 341 |
+
"model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
| 342 |
+
"model.layers.9.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
|
| 343 |
+
"model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
| 344 |
+
"model.norm.weight": "model-00003-of-00004.safetensors"
|
| 345 |
+
}
|
| 346 |
+
}
|
runs/Sep19_17-13-37_node1803/events.out.tfevents.1758316441.node1803.939342.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da1334be7669bb7582bf0cf1d5dd15b646bd1693505aa3419b94c9497a9e867b
|
| 3 |
+
size 1327067
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
+
],
|
| 17 |
+
"eos_token": {
|
| 18 |
+
"content": "<|im_end|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
},
|
| 24 |
+
"pad_token": "<|fim_pad|>"
|
| 25 |
+
}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
|
| 3 |
+
size 11421896
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_prefix_space": false,
|
| 4 |
+
"added_tokens_decoder": {
|
| 5 |
+
"151643": {
|
| 6 |
+
"content": "<|endoftext|>",
|
| 7 |
+
"lstrip": false,
|
| 8 |
+
"normalized": false,
|
| 9 |
+
"rstrip": false,
|
| 10 |
+
"single_word": false,
|
| 11 |
+
"special": true
|
| 12 |
+
},
|
| 13 |
+
"151644": {
|
| 14 |
+
"content": "<|im_start|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false,
|
| 19 |
+
"special": true
|
| 20 |
+
},
|
| 21 |
+
"151645": {
|
| 22 |
+
"content": "<|im_end|>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false,
|
| 27 |
+
"special": true
|
| 28 |
+
},
|
| 29 |
+
"151646": {
|
| 30 |
+
"content": "<|object_ref_start|>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false,
|
| 35 |
+
"special": true
|
| 36 |
+
},
|
| 37 |
+
"151647": {
|
| 38 |
+
"content": "<|object_ref_end|>",
|
| 39 |
+
"lstrip": false,
|
| 40 |
+
"normalized": false,
|
| 41 |
+
"rstrip": false,
|
| 42 |
+
"single_word": false,
|
| 43 |
+
"special": true
|
| 44 |
+
},
|
| 45 |
+
"151648": {
|
| 46 |
+
"content": "<|box_start|>",
|
| 47 |
+
"lstrip": false,
|
| 48 |
+
"normalized": false,
|
| 49 |
+
"rstrip": false,
|
| 50 |
+
"single_word": false,
|
| 51 |
+
"special": true
|
| 52 |
+
},
|
| 53 |
+
"151649": {
|
| 54 |
+
"content": "<|box_end|>",
|
| 55 |
+
"lstrip": false,
|
| 56 |
+
"normalized": false,
|
| 57 |
+
"rstrip": false,
|
| 58 |
+
"single_word": false,
|
| 59 |
+
"special": true
|
| 60 |
+
},
|
| 61 |
+
"151650": {
|
| 62 |
+
"content": "<|quad_start|>",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false,
|
| 67 |
+
"special": true
|
| 68 |
+
},
|
| 69 |
+
"151651": {
|
| 70 |
+
"content": "<|quad_end|>",
|
| 71 |
+
"lstrip": false,
|
| 72 |
+
"normalized": false,
|
| 73 |
+
"rstrip": false,
|
| 74 |
+
"single_word": false,
|
| 75 |
+
"special": true
|
| 76 |
+
},
|
| 77 |
+
"151652": {
|
| 78 |
+
"content": "<|vision_start|>",
|
| 79 |
+
"lstrip": false,
|
| 80 |
+
"normalized": false,
|
| 81 |
+
"rstrip": false,
|
| 82 |
+
"single_word": false,
|
| 83 |
+
"special": true
|
| 84 |
+
},
|
| 85 |
+
"151653": {
|
| 86 |
+
"content": "<|vision_end|>",
|
| 87 |
+
"lstrip": false,
|
| 88 |
+
"normalized": false,
|
| 89 |
+
"rstrip": false,
|
| 90 |
+
"single_word": false,
|
| 91 |
+
"special": true
|
| 92 |
+
},
|
| 93 |
+
"151654": {
|
| 94 |
+
"content": "<|vision_pad|>",
|
| 95 |
+
"lstrip": false,
|
| 96 |
+
"normalized": false,
|
| 97 |
+
"rstrip": false,
|
| 98 |
+
"single_word": false,
|
| 99 |
+
"special": true
|
| 100 |
+
},
|
| 101 |
+
"151655": {
|
| 102 |
+
"content": "<|image_pad|>",
|
| 103 |
+
"lstrip": false,
|
| 104 |
+
"normalized": false,
|
| 105 |
+
"rstrip": false,
|
| 106 |
+
"single_word": false,
|
| 107 |
+
"special": true
|
| 108 |
+
},
|
| 109 |
+
"151656": {
|
| 110 |
+
"content": "<|video_pad|>",
|
| 111 |
+
"lstrip": false,
|
| 112 |
+
"normalized": false,
|
| 113 |
+
"rstrip": false,
|
| 114 |
+
"single_word": false,
|
| 115 |
+
"special": true
|
| 116 |
+
},
|
| 117 |
+
"151657": {
|
| 118 |
+
"content": "<tool_call>",
|
| 119 |
+
"lstrip": false,
|
| 120 |
+
"normalized": false,
|
| 121 |
+
"rstrip": false,
|
| 122 |
+
"single_word": false,
|
| 123 |
+
"special": false
|
| 124 |
+
},
|
| 125 |
+
"151658": {
|
| 126 |
+
"content": "</tool_call>",
|
| 127 |
+
"lstrip": false,
|
| 128 |
+
"normalized": false,
|
| 129 |
+
"rstrip": false,
|
| 130 |
+
"single_word": false,
|
| 131 |
+
"special": false
|
| 132 |
+
},
|
| 133 |
+
"151659": {
|
| 134 |
+
"content": "<|fim_prefix|>",
|
| 135 |
+
"lstrip": false,
|
| 136 |
+
"normalized": false,
|
| 137 |
+
"rstrip": false,
|
| 138 |
+
"single_word": false,
|
| 139 |
+
"special": false
|
| 140 |
+
},
|
| 141 |
+
"151660": {
|
| 142 |
+
"content": "<|fim_middle|>",
|
| 143 |
+
"lstrip": false,
|
| 144 |
+
"normalized": false,
|
| 145 |
+
"rstrip": false,
|
| 146 |
+
"single_word": false,
|
| 147 |
+
"special": false
|
| 148 |
+
},
|
| 149 |
+
"151661": {
|
| 150 |
+
"content": "<|fim_suffix|>",
|
| 151 |
+
"lstrip": false,
|
| 152 |
+
"normalized": false,
|
| 153 |
+
"rstrip": false,
|
| 154 |
+
"single_word": false,
|
| 155 |
+
"special": false
|
| 156 |
+
},
|
| 157 |
+
"151662": {
|
| 158 |
+
"content": "<|fim_pad|>",
|
| 159 |
+
"lstrip": false,
|
| 160 |
+
"normalized": false,
|
| 161 |
+
"rstrip": false,
|
| 162 |
+
"single_word": false,
|
| 163 |
+
"special": false
|
| 164 |
+
},
|
| 165 |
+
"151663": {
|
| 166 |
+
"content": "<|repo_name|>",
|
| 167 |
+
"lstrip": false,
|
| 168 |
+
"normalized": false,
|
| 169 |
+
"rstrip": false,
|
| 170 |
+
"single_word": false,
|
| 171 |
+
"special": false
|
| 172 |
+
},
|
| 173 |
+
"151664": {
|
| 174 |
+
"content": "<|file_sep|>",
|
| 175 |
+
"lstrip": false,
|
| 176 |
+
"normalized": false,
|
| 177 |
+
"rstrip": false,
|
| 178 |
+
"single_word": false,
|
| 179 |
+
"special": false
|
| 180 |
+
}
|
| 181 |
+
},
|
| 182 |
+
"additional_special_tokens": [
|
| 183 |
+
"<|im_start|>",
|
| 184 |
+
"<|im_end|>",
|
| 185 |
+
"<|object_ref_start|>",
|
| 186 |
+
"<|object_ref_end|>",
|
| 187 |
+
"<|box_start|>",
|
| 188 |
+
"<|box_end|>",
|
| 189 |
+
"<|quad_start|>",
|
| 190 |
+
"<|quad_end|>",
|
| 191 |
+
"<|vision_start|>",
|
| 192 |
+
"<|vision_end|>",
|
| 193 |
+
"<|vision_pad|>",
|
| 194 |
+
"<|image_pad|>",
|
| 195 |
+
"<|video_pad|>"
|
| 196 |
+
],
|
| 197 |
+
"bos_token": null,
|
| 198 |
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
| 199 |
+
"clean_up_tokenization_spaces": false,
|
| 200 |
+
"eos_token": "<|im_end|>",
|
| 201 |
+
"errors": "replace",
|
| 202 |
+
"extra_special_tokens": {},
|
| 203 |
+
"model_max_length": 131072,
|
| 204 |
+
"pad_token": "<|fim_pad|>",
|
| 205 |
+
"split_special_tokens": false,
|
| 206 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 207 |
+
"unk_token": null
|
| 208 |
+
}
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f2be420de1f77067e60447c82f1dc47b174daa0d938a57517afb744395df404c
|
| 3 |
+
size 7096
|
vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
wandb/debug-internal.log
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-23T13:02:54.941896669-04:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
|
| 2 |
+
{"time":"2025-09-23T13:02:55.63168835-04:00","level":"INFO","msg":"stream: created new stream","id":"j5jpgoah"}
|
| 3 |
+
{"time":"2025-09-23T13:02:55.631723283-04:00","level":"INFO","msg":"stream: started","id":"j5jpgoah"}
|
| 4 |
+
{"time":"2025-09-23T13:02:55.63173562-04:00","level":"INFO","msg":"handler: started","stream_id":"j5jpgoah"}
|
| 5 |
+
{"time":"2025-09-23T13:02:55.631745751-04:00","level":"INFO","msg":"sender: started","stream_id":"j5jpgoah"}
|
| 6 |
+
{"time":"2025-09-23T13:02:55.631746765-04:00","level":"INFO","msg":"writer: Do: started","stream_id":"j5jpgoah"}
|
| 7 |
+
{"time":"2025-09-23T13:02:56.136638253-04:00","level":"WARN","msg":"handleCodeSave: program relative path is empty"}
|
| 8 |
+
{"time":"2025-09-23T13:02:56.136911276-04:00","level":"ERROR","msg":"git repo not found","error":"repository does not exist"}
|
| 9 |
+
{"time":"2025-09-23T13:03:13.563836157-04:00","level":"INFO","msg":"stream: closing","id":"j5jpgoah"}
|
| 10 |
+
{"time":"2025-09-23T13:03:14.427323175-04:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 11 |
+
{"time":"2025-09-23T13:03:14.567379829-04:00","level":"INFO","msg":"handler: closed","stream_id":"j5jpgoah"}
|
| 12 |
+
{"time":"2025-09-23T13:03:14.567413727-04:00","level":"INFO","msg":"writer: Close: closed","stream_id":"j5jpgoah"}
|
| 13 |
+
{"time":"2025-09-23T13:03:14.567456299-04:00","level":"INFO","msg":"sender: closed","stream_id":"j5jpgoah"}
|
| 14 |
+
{"time":"2025-09-23T13:03:14.668731827-04:00","level":"INFO","msg":"stream: closed","id":"j5jpgoah"}
|
wandb/debug.log
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-09-23 13:02:54,151 INFO MainThread:382253 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
|
| 2 |
+
2025-09-23 13:02:54,152 INFO MainThread:382253 [wandb_setup.py:_flush():80] Configure stats pid to 382253
|
| 3 |
+
2025-09-23 13:02:54,152 INFO MainThread:382253 [wandb_setup.py:_flush():80] Loading settings from /home/jdhwang/.config/wandb/settings
|
| 4 |
+
2025-09-23 13:02:54,152 INFO MainThread:382253 [wandb_setup.py:_flush():80] Loading settings from /orcd/home/002/jdhwang/BRIDGE/wandb/settings
|
| 5 |
+
2025-09-23 13:02:54,152 INFO MainThread:382253 [wandb_setup.py:_flush():80] Loading settings from environment variables
|
| 6 |
+
2025-09-23 13:02:54,152 INFO MainThread:382253 [wandb_setup.py:_flush():80] loaded run ID from ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/wandb-resume.json
|
| 7 |
+
2025-09-23 13:02:54,152 INFO MainThread:382253 [wandb_init.py:setup_run_log_directory():703] Logging user logs to ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/run-20250923_130250-j5jpgoah/logs/debug.log
|
| 8 |
+
2025-09-23 13:02:54,152 INFO MainThread:382253 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/run-20250923_130250-j5jpgoah/logs/debug-internal.log
|
| 9 |
+
2025-09-23 13:02:54,152 INFO MainThread:382253 [wandb_init.py:init():830] calling init triggers
|
| 10 |
+
2025-09-23 13:02:54,152 INFO MainThread:382253 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
|
| 11 |
+
config: {'_wandb': {}}
|
| 12 |
+
2025-09-23 13:02:54,152 INFO MainThread:382253 [wandb_init.py:init():871] starting backend
|
| 13 |
+
2025-09-23 13:02:54,499 INFO MainThread:382253 [wandb_init.py:init():874] sending inform_init request
|
| 14 |
+
2025-09-23 13:02:54,503 INFO MainThread:382253 [wandb_init.py:init():882] backend started and connected
|
| 15 |
+
2025-09-23 13:02:54,504 INFO MainThread:382253 [wandb_init.py:init():953] updated telemetry
|
| 16 |
+
2025-09-23 13:02:54,665 INFO MainThread:382253 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
|
| 17 |
+
2025-09-23 13:02:56,135 INFO MainThread:382253 [wandb_init.py:init():1024] run resumed
|
| 18 |
+
2025-09-23 13:02:56,135 INFO MainThread:382253 [wandb_init.py:init():1029] starting run threads in backend
|
| 19 |
+
2025-09-23 13:02:56,501 INFO MainThread:382253 [wandb_run.py:_console_start():2458] atexit reg
|
| 20 |
+
2025-09-23 13:02:56,518 INFO MainThread:382253 [wandb_run.py:_redirect():2306] redirect: wrap_raw
|
| 21 |
+
2025-09-23 13:02:56,518 INFO MainThread:382253 [wandb_run.py:_redirect():2375] Wrapping output streams.
|
| 22 |
+
2025-09-23 13:02:56,518 INFO MainThread:382253 [wandb_run.py:_redirect():2398] Redirects installed.
|
| 23 |
+
2025-09-23 13:02:56,521 INFO MainThread:382253 [wandb_init.py:init():1075] run started, returning control to user process
|
| 24 |
+
2025-09-23 13:03:13,563 INFO MsgRouterThr:382253 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
|
wandb/run-20250920_081121-2oxex54w/files/config.yaml
ADDED
|
@@ -0,0 +1,644 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_wandb:
|
| 2 |
+
value:
|
| 3 |
+
cli_version: 0.21.0
|
| 4 |
+
e:
|
| 5 |
+
bdsaggp24nt8kfc8qjgq21gi927g7e3o:
|
| 6 |
+
args:
|
| 7 |
+
- --model
|
| 8 |
+
- vllm
|
| 9 |
+
- --model_args
|
| 10 |
+
- pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000
|
| 11 |
+
- --tasks
|
| 12 |
+
- mgsm_native_cot_ja,mgsm_native_cot_en,mgsm_native_cot_th,mgsm_native_cot_es,mgsm_native_cot_bn,mgsm_native_cot_te,mgsm_native_cot_fr,mgsm_native_cot_zh,mgsm_native_cot_sw,mgsm_native_cot_de,mgsm_native_cot_ru
|
| 13 |
+
- --batch_size
|
| 14 |
+
- auto
|
| 15 |
+
- --apply_chat_template
|
| 16 |
+
- --output_path
|
| 17 |
+
- ckpts/rerun
|
| 18 |
+
- --log_samples
|
| 19 |
+
- --gen_kwargs
|
| 20 |
+
- max_gen_toks=20000
|
| 21 |
+
- --wandb_args=project=MRPO_eval,name=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dir=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,resume=auto
|
| 22 |
+
cpu_count: 64
|
| 23 |
+
cpu_count_logical: 128
|
| 24 |
+
cudaVersion: "12.4"
|
| 25 |
+
disk:
|
| 26 |
+
/:
|
| 27 |
+
total: "464506159104"
|
| 28 |
+
used: "12268101632"
|
| 29 |
+
email: [email protected]
|
| 30 |
+
executable: /orcd/home/001/jdhwang/.conda/envs/llm/bin/python
|
| 31 |
+
git:
|
| 32 |
+
commit: bb8b2be1f7420f9c6a3d65f0eaf3072732d73123
|
| 33 |
+
remote: [email protected]:jd730/BRIDGE-private.git
|
| 34 |
+
gpu: NVIDIA H100 80GB HBM3
|
| 35 |
+
gpu_count: 2
|
| 36 |
+
gpu_nvidia:
|
| 37 |
+
- architecture: Hopper
|
| 38 |
+
cudaCores: 16896
|
| 39 |
+
memoryTotal: "85520809984"
|
| 40 |
+
name: NVIDIA H100 80GB HBM3
|
| 41 |
+
uuid: GPU-9a8a7398-f810-a936-a36d-5b9c0b64a09b
|
| 42 |
+
- architecture: Hopper
|
| 43 |
+
cudaCores: 16896
|
| 44 |
+
memoryTotal: "85520809984"
|
| 45 |
+
name: NVIDIA H100 80GB HBM3
|
| 46 |
+
uuid: GPU-73b0efed-d92d-d0e8-3c5e-a3122d6e7c1b
|
| 47 |
+
host: node1803
|
| 48 |
+
memory:
|
| 49 |
+
total: "2163473002496"
|
| 50 |
+
os: Linux-4.18.0-372.9.1.el8.x86_64-x86_64-with-glibc2.28
|
| 51 |
+
program: /home/jdhwang/.conda/envs/llm/bin/lm_eval
|
| 52 |
+
python: CPython 3.11.11
|
| 53 |
+
root: ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309
|
| 54 |
+
slurm:
|
| 55 |
+
array_job_id: "4452191"
|
| 56 |
+
array_task_count: "1"
|
| 57 |
+
array_task_id: "0"
|
| 58 |
+
array_task_max: "0"
|
| 59 |
+
array_task_min: "0"
|
| 60 |
+
array_task_step: "1"
|
| 61 |
+
cluster_name: eofe7
|
| 62 |
+
conf: /etc/slurm/slurm.conf
|
| 63 |
+
cpus_on_node: "16"
|
| 64 |
+
cpus_per_task: "16"
|
| 65 |
+
gpus_on_node: "2"
|
| 66 |
+
gtids: "0"
|
| 67 |
+
job_account: mit_general
|
| 68 |
+
job_cpus_per_node: "16"
|
| 69 |
+
job_end_time: "1758413466"
|
| 70 |
+
job_gid: "209655"
|
| 71 |
+
job_gpus: 1,2
|
| 72 |
+
job_id: "4452191"
|
| 73 |
+
job_name: eval.sh
|
| 74 |
+
job_nodelist: node1803
|
| 75 |
+
job_num_nodes: "1"
|
| 76 |
+
job_partition: ou_bcs_normal
|
| 77 |
+
job_qos: normal
|
| 78 |
+
job_start_time: "1758370266"
|
| 79 |
+
job_uid: "209655"
|
| 80 |
+
job_user: jdhwang
|
| 81 |
+
jobid: "4452191"
|
| 82 |
+
localid: "0"
|
| 83 |
+
mem_per_node: "131072"
|
| 84 |
+
nnodes: "1"
|
| 85 |
+
nodeid: "0"
|
| 86 |
+
nodelist: node1803
|
| 87 |
+
nprocs: "1"
|
| 88 |
+
ntasks: "1"
|
| 89 |
+
ntasks_per_node: "1"
|
| 90 |
+
oom_kill_step: "0"
|
| 91 |
+
prio_process: "0"
|
| 92 |
+
procid: "0"
|
| 93 |
+
script_context: prolog_task
|
| 94 |
+
submit_dir: /orcd/home/002/jdhwang/BRIDGE
|
| 95 |
+
submit_host: orcd-login003.mit.edu
|
| 96 |
+
task_pid: "1143610"
|
| 97 |
+
tasks_per_node: "1"
|
| 98 |
+
topology_addr: node1803
|
| 99 |
+
topology_addr_pattern: node
|
| 100 |
+
tres_per_task: cpu=16
|
| 101 |
+
startedAt: "2025-09-20T12:11:21.301942Z"
|
| 102 |
+
writerId: bdsaggp24nt8kfc8qjgq21gi927g7e3o
|
| 103 |
+
m: []
|
| 104 |
+
python_version: 3.11.11
|
| 105 |
+
t:
|
| 106 |
+
"1":
|
| 107 |
+
- 1
|
| 108 |
+
- 5
|
| 109 |
+
- 11
|
| 110 |
+
- 30
|
| 111 |
+
- 41
|
| 112 |
+
- 49
|
| 113 |
+
- 51
|
| 114 |
+
- 53
|
| 115 |
+
- 71
|
| 116 |
+
- 95
|
| 117 |
+
- 98
|
| 118 |
+
- 100
|
| 119 |
+
- 105
|
| 120 |
+
"2":
|
| 121 |
+
- 1
|
| 122 |
+
- 5
|
| 123 |
+
- 11
|
| 124 |
+
- 30
|
| 125 |
+
- 41
|
| 126 |
+
- 49
|
| 127 |
+
- 51
|
| 128 |
+
- 53
|
| 129 |
+
- 71
|
| 130 |
+
- 95
|
| 131 |
+
- 98
|
| 132 |
+
- 100
|
| 133 |
+
- 105
|
| 134 |
+
"3":
|
| 135 |
+
- 2
|
| 136 |
+
- 13
|
| 137 |
+
- 62
|
| 138 |
+
"4": 3.11.11
|
| 139 |
+
"5": 0.21.0
|
| 140 |
+
"6": 4.51.3
|
| 141 |
+
"12": 0.21.0
|
| 142 |
+
"13": linux-x86_64
|
| 143 |
+
cli_configs:
|
| 144 |
+
value:
|
| 145 |
+
batch_size: auto
|
| 146 |
+
batch_sizes: []
|
| 147 |
+
bootstrap_iters: 0
|
| 148 |
+
device: null
|
| 149 |
+
fewshot_seed: 1234
|
| 150 |
+
gen_kwargs:
|
| 151 |
+
max_gen_toks: 20000
|
| 152 |
+
limit: null
|
| 153 |
+
model: vllm
|
| 154 |
+
model_args: pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000
|
| 155 |
+
numpy_seed: 1234
|
| 156 |
+
random_seed: 0
|
| 157 |
+
torch_seed: 1234
|
| 158 |
+
use_cache: null
|
| 159 |
+
task_configs:
|
| 160 |
+
value:
|
| 161 |
+
mgsm_native_cot_bn:
|
| 162 |
+
dataset_name: bn
|
| 163 |
+
dataset_path: juletxara/mgsm
|
| 164 |
+
description: ""
|
| 165 |
+
doc_to_target: '{% if answer is not none %}{{answer[17:]}}{% else %}{{answer_number|string}}{% endif %}'
|
| 166 |
+
doc_to_text: '{% if answer is not none %}{{question+"\nধাপে ধাপে উত্তর:"}}{% else %}{{"প্রশ্ন: "+question+"\nধাপে ধাপে উত্তর:"}}{% endif %}'
|
| 167 |
+
fewshot_delimiter: |4+
|
| 168 |
+
|
| 169 |
+
filter_list:
|
| 170 |
+
- filter:
|
| 171 |
+
- function: regex
|
| 172 |
+
regex_pattern: The answer is (\-?[0-9\.\,]+)
|
| 173 |
+
- function: take_first
|
| 174 |
+
name: strict-match
|
| 175 |
+
- filter:
|
| 176 |
+
- function: regex
|
| 177 |
+
group_select: -1
|
| 178 |
+
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
|
| 179 |
+
- function: take_first
|
| 180 |
+
name: flexible-extract
|
| 181 |
+
generation_kwargs:
|
| 182 |
+
do_sample: false
|
| 183 |
+
max_gen_toks: 20000
|
| 184 |
+
until:
|
| 185 |
+
- 'প্রশ্ন:'
|
| 186 |
+
- </s>
|
| 187 |
+
- <|im_end|>
|
| 188 |
+
metadata:
|
| 189 |
+
version: 4
|
| 190 |
+
metric_list:
|
| 191 |
+
- aggregation: mean
|
| 192 |
+
higher_is_better: true
|
| 193 |
+
ignore_case: true
|
| 194 |
+
ignore_punctuation: true
|
| 195 |
+
metric: exact_match
|
| 196 |
+
num_fewshot: 0
|
| 197 |
+
output_type: generate_until
|
| 198 |
+
repeats: 1
|
| 199 |
+
should_decontaminate: false
|
| 200 |
+
tag: mgsm_cot_native
|
| 201 |
+
target_delimiter: ' '
|
| 202 |
+
task: mgsm_native_cot_bn
|
| 203 |
+
test_split: test
|
| 204 |
+
training_split: train
|
| 205 |
+
mgsm_native_cot_de:
|
| 206 |
+
dataset_name: de
|
| 207 |
+
dataset_path: juletxara/mgsm
|
| 208 |
+
description: ""
|
| 209 |
+
doc_to_target: '{% if answer is not none %}{{answer[29:]}}{% else %}{{answer_number|string}}{% endif %}'
|
| 210 |
+
doc_to_text: '{% if answer is not none %}{{question+"\nSchritt-für-Schritt-Antwort:"}}{% else %}{{"Frage: "+question+"\nSchritt-für-Schritt-Antwort:"}}{% endif %}'
|
| 211 |
+
fewshot_delimiter: |4+
|
| 212 |
+
|
| 213 |
+
filter_list:
|
| 214 |
+
- filter:
|
| 215 |
+
- function: regex
|
| 216 |
+
regex_pattern: Die Antwort lautet (\-?[0-9\.\,]+)
|
| 217 |
+
- function: take_first
|
| 218 |
+
name: strict-match
|
| 219 |
+
- filter:
|
| 220 |
+
- function: regex
|
| 221 |
+
group_select: -1
|
| 222 |
+
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
|
| 223 |
+
- function: take_first
|
| 224 |
+
name: flexible-extract
|
| 225 |
+
generation_kwargs:
|
| 226 |
+
do_sample: false
|
| 227 |
+
max_gen_toks: 20000
|
| 228 |
+
until:
|
| 229 |
+
- 'Frage:'
|
| 230 |
+
- </s>
|
| 231 |
+
- <|im_end|>
|
| 232 |
+
metadata:
|
| 233 |
+
version: 4
|
| 234 |
+
metric_list:
|
| 235 |
+
- aggregation: mean
|
| 236 |
+
higher_is_better: true
|
| 237 |
+
ignore_case: true
|
| 238 |
+
ignore_punctuation: true
|
| 239 |
+
metric: exact_match
|
| 240 |
+
num_fewshot: 0
|
| 241 |
+
output_type: generate_until
|
| 242 |
+
repeats: 1
|
| 243 |
+
should_decontaminate: false
|
| 244 |
+
tag: mgsm_cot_native
|
| 245 |
+
target_delimiter: ' '
|
| 246 |
+
task: mgsm_native_cot_de
|
| 247 |
+
test_split: test
|
| 248 |
+
training_split: train
|
| 249 |
+
mgsm_native_cot_en:
|
| 250 |
+
dataset_name: en
|
| 251 |
+
dataset_path: juletxara/mgsm
|
| 252 |
+
description: ""
|
| 253 |
+
doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
|
| 254 |
+
doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
|
| 255 |
+
fewshot_delimiter: |4+
|
| 256 |
+
|
| 257 |
+
filter_list:
|
| 258 |
+
- filter:
|
| 259 |
+
- function: regex
|
| 260 |
+
regex_pattern: The answer is (\-?[0-9\.\,]+)
|
| 261 |
+
- function: take_first
|
| 262 |
+
name: strict-match
|
| 263 |
+
- filter:
|
| 264 |
+
- function: regex
|
| 265 |
+
group_select: -1
|
| 266 |
+
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
|
| 267 |
+
- function: take_first
|
| 268 |
+
name: flexible-extract
|
| 269 |
+
generation_kwargs:
|
| 270 |
+
do_sample: false
|
| 271 |
+
max_gen_toks: 20000
|
| 272 |
+
until:
|
| 273 |
+
- 'Question:'
|
| 274 |
+
- </s>
|
| 275 |
+
- <|im_end|>
|
| 276 |
+
metadata:
|
| 277 |
+
version: 4
|
| 278 |
+
metric_list:
|
| 279 |
+
- aggregation: mean
|
| 280 |
+
higher_is_better: true
|
| 281 |
+
ignore_case: true
|
| 282 |
+
ignore_punctuation: true
|
| 283 |
+
metric: exact_match
|
| 284 |
+
num_fewshot: 0
|
| 285 |
+
output_type: generate_until
|
| 286 |
+
repeats: 1
|
| 287 |
+
should_decontaminate: false
|
| 288 |
+
tag: mgsm_cot_native
|
| 289 |
+
target_delimiter: ' '
|
| 290 |
+
task: mgsm_native_cot_en
|
| 291 |
+
test_split: test
|
| 292 |
+
training_split: train
|
| 293 |
+
mgsm_native_cot_es:
|
| 294 |
+
dataset_name: es
|
| 295 |
+
dataset_path: juletxara/mgsm
|
| 296 |
+
description: ""
|
| 297 |
+
doc_to_target: '{% if answer is not none %}{{answer[23:]}}{% else %}{{answer_number|string}}{% endif %}'
|
| 298 |
+
doc_to_text: '{% if answer is not none %}{{question+"\nRespuesta paso a paso:"}}{% else %}{{"Pregunta: "+question+"\nRespuesta paso a paso:"}}{% endif %}'
|
| 299 |
+
fewshot_delimiter: |4+
|
| 300 |
+
|
| 301 |
+
filter_list:
|
| 302 |
+
- filter:
|
| 303 |
+
- function: regex
|
| 304 |
+
regex_pattern: La respuesta es (\-?[0-9\.\,]+)
|
| 305 |
+
- function: take_first
|
| 306 |
+
name: strict-match
|
| 307 |
+
- filter:
|
| 308 |
+
- function: regex
|
| 309 |
+
group_select: -1
|
| 310 |
+
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
|
| 311 |
+
- function: take_first
|
| 312 |
+
name: flexible-extract
|
| 313 |
+
generation_kwargs:
|
| 314 |
+
do_sample: false
|
| 315 |
+
max_gen_toks: 20000
|
| 316 |
+
until:
|
| 317 |
+
- 'Pregunta:'
|
| 318 |
+
- </s>
|
| 319 |
+
- <|im_end|>
|
| 320 |
+
metadata:
|
| 321 |
+
version: 4
|
| 322 |
+
metric_list:
|
| 323 |
+
- aggregation: mean
|
| 324 |
+
higher_is_better: true
|
| 325 |
+
ignore_case: true
|
| 326 |
+
ignore_punctuation: true
|
| 327 |
+
metric: exact_match
|
| 328 |
+
num_fewshot: 0
|
| 329 |
+
output_type: generate_until
|
| 330 |
+
repeats: 1
|
| 331 |
+
should_decontaminate: false
|
| 332 |
+
tag: mgsm_cot_native
|
| 333 |
+
target_delimiter: ' '
|
| 334 |
+
task: mgsm_native_cot_es
|
| 335 |
+
test_split: test
|
| 336 |
+
training_split: train
|
| 337 |
+
mgsm_native_cot_fr:
|
| 338 |
+
dataset_name: fr
|
| 339 |
+
dataset_path: juletxara/mgsm
|
| 340 |
+
description: ""
|
| 341 |
+
doc_to_target: '{% if answer is not none %}{{answer[26:]}}{% else %}{{answer_number|string}}{% endif %}'
|
| 342 |
+
doc_to_text: '{% if answer is not none %}{{question+"\nRéponse étape par étape :"}}{% else %}{{"Question : "+question+"\nRéponse étape par étape :"}}{% endif %}'
|
| 343 |
+
fewshot_delimiter: |4+
|
| 344 |
+
|
| 345 |
+
filter_list:
|
| 346 |
+
- filter:
|
| 347 |
+
- function: regex
|
| 348 |
+
regex_pattern: La réponse est (\-?[0-9\.\,]+)
|
| 349 |
+
- function: take_first
|
| 350 |
+
name: strict-match
|
| 351 |
+
- filter:
|
| 352 |
+
- function: regex
|
| 353 |
+
group_select: -1
|
| 354 |
+
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
|
| 355 |
+
- function: take_first
|
| 356 |
+
name: flexible-extract
|
| 357 |
+
generation_kwargs:
|
| 358 |
+
do_sample: false
|
| 359 |
+
max_gen_toks: 20000
|
| 360 |
+
until:
|
| 361 |
+
- 'Question :'
|
| 362 |
+
- </s>
|
| 363 |
+
- <|im_end|>
|
| 364 |
+
metadata:
|
| 365 |
+
version: 4
|
| 366 |
+
metric_list:
|
| 367 |
+
- aggregation: mean
|
| 368 |
+
higher_is_better: true
|
| 369 |
+
ignore_case: true
|
| 370 |
+
ignore_punctuation: true
|
| 371 |
+
metric: exact_match
|
| 372 |
+
num_fewshot: 0
|
| 373 |
+
output_type: generate_until
|
| 374 |
+
repeats: 1
|
| 375 |
+
should_decontaminate: false
|
| 376 |
+
tag: mgsm_cot_native
|
| 377 |
+
target_delimiter: ' '
|
| 378 |
+
task: mgsm_native_cot_fr
|
| 379 |
+
test_split: test
|
| 380 |
+
training_split: train
|
| 381 |
+
mgsm_native_cot_ja:
|
| 382 |
+
dataset_name: ja
|
| 383 |
+
dataset_path: juletxara/mgsm
|
| 384 |
+
description: ""
|
| 385 |
+
doc_to_target: '{% if answer is not none %}{{answer[11:]}}{% else %}{{answer_number|string}}{% endif %}'
|
| 386 |
+
doc_to_text: '{% if answer is not none %}{{question+"\nステップごとの答え:"}}{% else %}{{"問題: "+question+"\nステップごとの答え:"}}{% endif %}'
|
| 387 |
+
fewshot_delimiter: |4+
|
| 388 |
+
|
| 389 |
+
filter_list:
|
| 390 |
+
- filter:
|
| 391 |
+
- function: regex
|
| 392 |
+
regex_pattern: 答えは(\-?[0-9\.\,]+)です。
|
| 393 |
+
- function: take_first
|
| 394 |
+
name: strict-match
|
| 395 |
+
- filter:
|
| 396 |
+
- function: regex
|
| 397 |
+
group_select: -1
|
| 398 |
+
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
|
| 399 |
+
- function: take_first
|
| 400 |
+
name: flexible-extract
|
| 401 |
+
generation_kwargs:
|
| 402 |
+
do_sample: false
|
| 403 |
+
max_gen_toks: 20000
|
| 404 |
+
until:
|
| 405 |
+
- 問題:
|
| 406 |
+
- </s>
|
| 407 |
+
- <|im_end|>
|
| 408 |
+
metadata:
|
| 409 |
+
version: 4
|
| 410 |
+
metric_list:
|
| 411 |
+
- aggregation: mean
|
| 412 |
+
higher_is_better: true
|
| 413 |
+
ignore_case: true
|
| 414 |
+
ignore_punctuation: true
|
| 415 |
+
metric: exact_match
|
| 416 |
+
num_fewshot: 0
|
| 417 |
+
output_type: generate_until
|
| 418 |
+
repeats: 1
|
| 419 |
+
should_decontaminate: false
|
| 420 |
+
tag: mgsm_cot_native
|
| 421 |
+
target_delimiter: ' '
|
| 422 |
+
task: mgsm_native_cot_ja
|
| 423 |
+
test_split: test
|
| 424 |
+
training_split: train
|
| 425 |
+
mgsm_native_cot_ru:
|
| 426 |
+
dataset_name: ru
|
| 427 |
+
dataset_path: juletxara/mgsm
|
| 428 |
+
description: ""
|
| 429 |
+
doc_to_target: '{% if answer is not none %}{{answer[18:]}}{% else %}{{answer_number|string}}{% endif %}'
|
| 430 |
+
doc_to_text: '{% if answer is not none %}{{question+"\nПошаговоерешение:"}}{% else %}{{"Задача: "+question+"\nПошаговоерешение:"}}{% endif %}'
|
| 431 |
+
fewshot_delimiter: |4+
|
| 432 |
+
|
| 433 |
+
filter_list:
|
| 434 |
+
- filter:
|
| 435 |
+
- function: regex
|
| 436 |
+
regex_pattern: Ответ — (\-?[0-9\.\,]+)
|
| 437 |
+
- function: take_first
|
| 438 |
+
name: strict-match
|
| 439 |
+
- filter:
|
| 440 |
+
- function: regex
|
| 441 |
+
group_select: -1
|
| 442 |
+
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
|
| 443 |
+
- function: take_first
|
| 444 |
+
name: flexible-extract
|
| 445 |
+
generation_kwargs:
|
| 446 |
+
do_sample: false
|
| 447 |
+
max_gen_toks: 20000
|
| 448 |
+
until:
|
| 449 |
+
- 'Задача:'
|
| 450 |
+
- </s>
|
| 451 |
+
- <|im_end|>
|
| 452 |
+
metadata:
|
| 453 |
+
version: 4
|
| 454 |
+
metric_list:
|
| 455 |
+
- aggregation: mean
|
| 456 |
+
higher_is_better: true
|
| 457 |
+
ignore_case: true
|
| 458 |
+
ignore_punctuation: true
|
| 459 |
+
metric: exact_match
|
| 460 |
+
num_fewshot: 0
|
| 461 |
+
output_type: generate_until
|
| 462 |
+
repeats: 1
|
| 463 |
+
should_decontaminate: false
|
| 464 |
+
tag: mgsm_cot_native
|
| 465 |
+
target_delimiter: ' '
|
| 466 |
+
task: mgsm_native_cot_ru
|
| 467 |
+
test_split: test
|
| 468 |
+
training_split: train
|
| 469 |
+
mgsm_native_cot_sw:
|
| 470 |
+
dataset_name: sw
|
| 471 |
+
dataset_path: juletxara/mgsm
|
| 472 |
+
description: ""
|
| 473 |
+
doc_to_target: '{% if answer is not none %}{{answer[25:]}}{% else %}{{answer_number|string}}{% endif %}'
|
| 474 |
+
doc_to_text: '{% if answer is not none %}{{question+"\nJibu la Hatua kwa Hatua:"}}{% else %}{{"Swali: "+question+"\nJibu la Hatua kwa Hatua:"}}{% endif %}'
|
| 475 |
+
fewshot_delimiter: |4+
|
| 476 |
+
|
| 477 |
+
filter_list:
|
| 478 |
+
- filter:
|
| 479 |
+
- function: regex
|
| 480 |
+
regex_pattern: Jibu ni (\-?[0-9\.\,]+)
|
| 481 |
+
- function: take_first
|
| 482 |
+
name: strict-match
|
| 483 |
+
- filter:
|
| 484 |
+
- function: regex
|
| 485 |
+
group_select: -1
|
| 486 |
+
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
|
| 487 |
+
- function: take_first
|
| 488 |
+
name: flexible-extract
|
| 489 |
+
generation_kwargs:
|
| 490 |
+
do_sample: false
|
| 491 |
+
max_gen_toks: 20000
|
| 492 |
+
until:
|
| 493 |
+
- 'Swali:'
|
| 494 |
+
- </s>
|
| 495 |
+
- <|im_end|>
|
| 496 |
+
metadata:
|
| 497 |
+
version: 4
|
| 498 |
+
metric_list:
|
| 499 |
+
- aggregation: mean
|
| 500 |
+
higher_is_better: true
|
| 501 |
+
ignore_case: true
|
| 502 |
+
ignore_punctuation: true
|
| 503 |
+
metric: exact_match
|
| 504 |
+
num_fewshot: 0
|
| 505 |
+
output_type: generate_until
|
| 506 |
+
repeats: 1
|
| 507 |
+
should_decontaminate: false
|
| 508 |
+
tag: mgsm_cot_native
|
| 509 |
+
target_delimiter: ' '
|
| 510 |
+
task: mgsm_native_cot_sw
|
| 511 |
+
test_split: test
|
| 512 |
+
training_split: train
|
| 513 |
+
mgsm_native_cot_te:
|
| 514 |
+
dataset_name: te
|
| 515 |
+
dataset_path: juletxara/mgsm
|
| 516 |
+
description: ""
|
| 517 |
+
doc_to_target: '{% if answer is not none %}{{answer[19:]}}{% else %}{{answer_number|string}}{% endif %}'
|
| 518 |
+
doc_to_text: '{% if answer is not none %}{{question+"\nదశలవారీగా సమాధానం:"}}{% else %}{{"ప్రశ్న: "+question+"\nదశలవారీగా సమాధానం:"}}{% endif %}'
|
| 519 |
+
fewshot_delimiter: |4+
|
| 520 |
+
|
| 521 |
+
filter_list:
|
| 522 |
+
- filter:
|
| 523 |
+
- function: regex
|
| 524 |
+
regex_pattern: సమాధానం (\-?[0-9\.\,]+)
|
| 525 |
+
- function: take_first
|
| 526 |
+
name: strict-match
|
| 527 |
+
- filter:
|
| 528 |
+
- function: regex
|
| 529 |
+
group_select: -1
|
| 530 |
+
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
|
| 531 |
+
- function: take_first
|
| 532 |
+
name: flexible-extract
|
| 533 |
+
generation_kwargs:
|
| 534 |
+
do_sample: false
|
| 535 |
+
max_gen_toks: 20000
|
| 536 |
+
until:
|
| 537 |
+
- 'ప్రశ్న:'
|
| 538 |
+
- </s>
|
| 539 |
+
- <|im_end|>
|
| 540 |
+
metadata:
|
| 541 |
+
version: 4
|
| 542 |
+
metric_list:
|
| 543 |
+
- aggregation: mean
|
| 544 |
+
higher_is_better: true
|
| 545 |
+
ignore_case: true
|
| 546 |
+
ignore_punctuation: true
|
| 547 |
+
metric: exact_match
|
| 548 |
+
num_fewshot: 0
|
| 549 |
+
output_type: generate_until
|
| 550 |
+
repeats: 1
|
| 551 |
+
should_decontaminate: false
|
| 552 |
+
tag: mgsm_cot_native
|
| 553 |
+
target_delimiter: ' '
|
| 554 |
+
task: mgsm_native_cot_te
|
| 555 |
+
test_split: test
|
| 556 |
+
training_split: train
|
| 557 |
+
mgsm_native_cot_th:
|
| 558 |
+
dataset_name: th
|
| 559 |
+
dataset_path: juletxara/mgsm
|
| 560 |
+
description: ""
|
| 561 |
+
doc_to_target: '{% if answer is not none %}{{answer[18:]}}{% else %}{{answer_number|string}}{% endif %}'
|
| 562 |
+
doc_to_text: '{% if answer is not none %}{{question+"\nคำตอบทีละขั้นตอน:"}}{% else %}{{"โจทย์: "+question+"\nคำตอบทีละขั้นตอน:"}}{% endif %}'
|
| 563 |
+
fewshot_delimiter: |4+
|
| 564 |
+
|
| 565 |
+
filter_list:
|
| 566 |
+
- filter:
|
| 567 |
+
- function: regex
|
| 568 |
+
regex_pattern: คำตอบคือ (\-?[0-9\.\,]+)
|
| 569 |
+
- function: take_first
|
| 570 |
+
name: strict-match
|
| 571 |
+
- filter:
|
| 572 |
+
- function: regex
|
| 573 |
+
group_select: -1
|
| 574 |
+
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
|
| 575 |
+
- function: take_first
|
| 576 |
+
name: flexible-extract
|
| 577 |
+
generation_kwargs:
|
| 578 |
+
do_sample: false
|
| 579 |
+
max_gen_toks: 20000
|
| 580 |
+
until:
|
| 581 |
+
- 'โจทย์:'
|
| 582 |
+
- </s>
|
| 583 |
+
- <|im_end|>
|
| 584 |
+
metadata:
|
| 585 |
+
version: 4
|
| 586 |
+
metric_list:
|
| 587 |
+
- aggregation: mean
|
| 588 |
+
higher_is_better: true
|
| 589 |
+
ignore_case: true
|
| 590 |
+
ignore_punctuation: true
|
| 591 |
+
metric: exact_match
|
| 592 |
+
num_fewshot: 0
|
| 593 |
+
output_type: generate_until
|
| 594 |
+
repeats: 1
|
| 595 |
+
should_decontaminate: false
|
| 596 |
+
tag: mgsm_cot_native
|
| 597 |
+
target_delimiter: ' '
|
| 598 |
+
task: mgsm_native_cot_th
|
| 599 |
+
test_split: test
|
| 600 |
+
training_split: train
|
| 601 |
+
mgsm_native_cot_zh:
|
| 602 |
+
dataset_name: zh
|
| 603 |
+
dataset_path: juletxara/mgsm
|
| 604 |
+
description: ""
|
| 605 |
+
doc_to_target: '{% if answer is not none %}{{answer[6:]}}{% else %}{{answer_number|string}}{% endif %}'
|
| 606 |
+
doc_to_text: '{% if answer is not none %}{{question+"\n逐步解答:"}}{% else %}{{"问题: "+question+"\n逐步解答:"}}{% endif %}'
|
| 607 |
+
fewshot_delimiter: |4+
|
| 608 |
+
|
| 609 |
+
filter_list:
|
| 610 |
+
- filter:
|
| 611 |
+
- function: regex
|
| 612 |
+
regex_pattern: 答案是 (\-?[0-9\.\,]+)。
|
| 613 |
+
- function: take_first
|
| 614 |
+
name: strict-match
|
| 615 |
+
- filter:
|
| 616 |
+
- function: regex
|
| 617 |
+
group_select: -1
|
| 618 |
+
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
|
| 619 |
+
- function: take_first
|
| 620 |
+
name: flexible-extract
|
| 621 |
+
generation_kwargs:
|
| 622 |
+
do_sample: false
|
| 623 |
+
max_gen_toks: 20000
|
| 624 |
+
until:
|
| 625 |
+
- 问题:
|
| 626 |
+
- </s>
|
| 627 |
+
- <|im_end|>
|
| 628 |
+
metadata:
|
| 629 |
+
version: 4
|
| 630 |
+
metric_list:
|
| 631 |
+
- aggregation: mean
|
| 632 |
+
higher_is_better: true
|
| 633 |
+
ignore_case: true
|
| 634 |
+
ignore_punctuation: true
|
| 635 |
+
metric: exact_match
|
| 636 |
+
num_fewshot: 0
|
| 637 |
+
output_type: generate_until
|
| 638 |
+
repeats: 1
|
| 639 |
+
should_decontaminate: false
|
| 640 |
+
tag: mgsm_cot_native
|
| 641 |
+
target_delimiter: ' '
|
| 642 |
+
task: mgsm_native_cot_zh
|
| 643 |
+
test_split: test
|
| 644 |
+
training_split: train
|
wandb/run-20250920_081121-2oxex54w/files/media/table/evaluation/eval_results_1_817b26b9b7489391f4e7.table.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"columns": ["Tasks", "Version", "Filter", "num_fewshot", "Metric", "Value", "Stderr"], "data": [["mgsm_native_cot_bn", 4.0, "strict-match", 0, "exact_match", "0.0", "N/A"], ["mgsm_native_cot_bn", 4.0, "flexible-extract", 0, "exact_match", "0.528", "N/A"], ["mgsm_native_cot_de", 4.0, "strict-match", 0, "exact_match", "0.0", "N/A"], ["mgsm_native_cot_de", 4.0, "flexible-extract", 0, "exact_match", "0.74", "N/A"], ["mgsm_native_cot_en", 4.0, "strict-match", 0, "exact_match", "0.0", "N/A"], ["mgsm_native_cot_en", 4.0, "flexible-extract", 0, "exact_match", "0.872", "N/A"], ["mgsm_native_cot_es", 4.0, "strict-match", 0, "exact_match", "0.008", "N/A"], ["mgsm_native_cot_es", 4.0, "flexible-extract", 0, "exact_match", "0.78", "N/A"], ["mgsm_native_cot_fr", 4.0, "strict-match", 0, "exact_match", "0.004", "N/A"], ["mgsm_native_cot_fr", 4.0, "flexible-extract", 0, "exact_match", "0.76", "N/A"], ["mgsm_native_cot_ja", 4.0, "strict-match", 0, "exact_match", "0.008", "N/A"], ["mgsm_native_cot_ja", 4.0, "flexible-extract", 0, "exact_match", "0.644", "N/A"], ["mgsm_native_cot_ru", 4.0, "strict-match", 0, "exact_match", "0.004", "N/A"], ["mgsm_native_cot_ru", 4.0, "flexible-extract", 0, "exact_match", "0.832", "N/A"], ["mgsm_native_cot_sw", 4.0, "strict-match", 0, "exact_match", "0.0", "N/A"], ["mgsm_native_cot_sw", 4.0, "flexible-extract", 0, "exact_match", "0.008", "N/A"], ["mgsm_native_cot_te", 4.0, "strict-match", 0, "exact_match", "0.02", "N/A"], ["mgsm_native_cot_te", 4.0, "flexible-extract", 0, "exact_match", "0.12", "N/A"], ["mgsm_native_cot_th", 4.0, "strict-match", 0, "exact_match", "0.26", "N/A"], ["mgsm_native_cot_th", 4.0, "flexible-extract", 0, "exact_match", "0.652", "N/A"], ["mgsm_native_cot_zh", 4.0, "strict-match", 0, "exact_match", "0.0", "N/A"], ["mgsm_native_cot_zh", 4.0, "flexible-extract", 0, "exact_match", "0.784", "N/A"]]}
|
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_bn_eval_results_2_f5e72dc1f8666e2a7b2d.table.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f5e72dc1f8666e2a7b2dd420223eda667dbec11d2c10570f946fcc38e857476a
|
| 3 |
+
size 16518181
|
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_de_eval_results_3_fabaf33255f24add59c1.table.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_en_eval_results_4_82791ccc59d2c61a093c.table.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_es_eval_results_5_db5ed66b097e218c4da6.table.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_fr_eval_results_6_214cb1a5a2cdee9f330e.table.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_ja_eval_results_7_098df869a17abf277e8c.table.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:098df869a17abf277e8cf3b5d4966de124ba0fed82b267159ef0c1d32c7f38a2
|
| 3 |
+
size 19900362
|
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_ru_eval_results_8_19e62e412810312c2375.table.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19e62e412810312c2375525d388b308579f502daf4537e08ea3e97b90b3ef016
|
| 3 |
+
size 13109180
|
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_sw_eval_results_9_d79a57dcd801f08109f8.table.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d79a57dcd801f08109f8b083cce4884bf51b225d8127bc78429cf19d3df17aad
|
| 3 |
+
size 22151199
|
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_te_eval_results_10_c0fc0c81b8662e04cc0e.table.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0fc0c81b8662e04cc0e8fc30ac6053adcf13bd384f959eee54fbeb5039dbc01
|
| 3 |
+
size 23468672
|
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_th_eval_results_11_baa86a95f601e3c721a0.table.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_zh_eval_results_12_2882185f7e816bbcc2fe.table.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
wandb/run-20250920_081121-2oxex54w/files/output.log
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-09-20:08:11:22,626 INFO [__main__.py:291] Verbosity set to INFO
|
| 2 |
+
2025-09-20:08:11:57,895 INFO [__init__.py:464] The tag pawsx is already registered as a group, this tag will not be registered. This may affect tasks you want to call.
|
| 3 |
+
2025-09-20:08:11:59,107 INFO [__main__.py:388] Selected Tasks: ['mgsm_native_cot_bn', 'mgsm_native_cot_de', 'mgsm_native_cot_en', 'mgsm_native_cot_es', 'mgsm_native_cot_fr', 'mgsm_native_cot_ja', 'mgsm_native_cot_ru', 'mgsm_native_cot_sw', 'mgsm_native_cot_te', 'mgsm_native_cot_th', 'mgsm_native_cot_zh']
|
| 4 |
+
2025-09-20:08:11:59,119 INFO [evaluator.py:161] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234
|
| 5 |
+
2025-09-20:08:11:59,119 WARNING [evaluator.py:172] generation_kwargs specified through cli, these settings will update set parameters in yaml tasks. Ensure 'do_sample=True' for non-greedy decoding!
|
| 6 |
+
2025-09-20:08:11:59,119 INFO [evaluator.py:198] Initializing vllm model, with arguments: {'pretrained': 'ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309', 'dtype': 'auto', 'tensor_parallel_size': 2, 'max_model_len': 20000}
|
| 7 |
+
INFO 09-20 08:12:05 config.py:350] This model supports multiple tasks: {'embedding', 'generate'}. Defaulting to 'generate'.
|
| 8 |
+
INFO 09-20 08:12:05 config.py:1020] Defaulting to use mp for distributed inference
|
| 9 |
+
INFO 09-20 08:12:05 llm_engine.py:249] Initializing an LLM engine (v0.6.4.post1) with config: model='ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309', speculative_config=None, tokenizer='ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=20000, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=2, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=1234, served_model_name=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309, num_scheduler_steps=1, chunked_prefill_enabled=False multi_step_stream_outputs=True, enable_prefix_caching=False, use_async_output_proc=True, use_cached_outputs=False, chat_template_text_format=string, mm_processor_kwargs=None, pooler_config=None)
|
| 10 |
+
WARNING 09-20 08:12:06 multiproc_gpu_executor.py:130] CUDA was previously initialized. We must use the `spawn` multiprocessing start method. Setting VLLM_WORKER_MULTIPROC_METHOD to 'spawn'.
|
| 11 |
+
WARNING 09-20 08:12:06 multiproc_gpu_executor.py:56] Reducing Torch parallelism from 16 threads to 1 to avoid unnecessary CPU contention. Set OMP_NUM_THREADS in the external environment to tune this value as needed.
|
| 12 |
+
INFO 09-20 08:12:06 custom_cache_manager.py:17] Setting Triton cache manager to: vllm.triton_utils.custom_cache_manager:CustomCacheManager
|
| 13 |
+
INFO 09-20 08:12:06 selector.py:135] Using Flash Attention backend.
|
| 14 |
+
INFO 09-20 08:12:14 utils.py:961] Found nccl from library libnccl.so.2
|
| 15 |
+
INFO 09-20 08:12:14 pynccl.py:69] vLLM is using nccl==2.21.5
|
| 16 |
+
INFO 09-20 08:12:15 custom_all_reduce_utils.py:242] reading GPU P2P access cache from /home/jdhwang/.cache/vllm/gpu_p2p_access_cache_for_0,1.json
|
| 17 |
+
INFO 09-20 08:12:15 shm_broadcast.py:236] vLLM message queue communication handle: Handle(connect_ip='127.0.0.1', local_reader_ranks=[1], buffer=<vllm.distributed.device_communicators.shm_broadcast.ShmRingBuffer object at 0x145452a92810>, local_subscribe_port=53811, remote_subscribe_port=None)
|
| 18 |
+
INFO 09-20 08:12:15 model_runner.py:1072] Starting to load model ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309...
|
| 19 |
+
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
|
| 20 |
+
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:43<02:11, 43.77s/it]
|
| 21 |
+
Loading safetensors checkpoint shards: 50% Completed | 2/4 [01:16<01:14, 37.12s/it]
|
| 22 |
+
Loading safetensors checkpoint shards: 75% Completed | 3/4 [01:49<00:35, 35.23s/it]
|
| 23 |
+
Loading safetensors checkpoint shards: 100% Completed | 4/4 [02:44<00:00, 43.11s/it]
|
| 24 |
+
Loading safetensors checkpoint shards: 100% Completed | 4/4 [02:44<00:00, 41.10s/it]
|
| 25 |
+
INFO 09-20 08:15:00 model_runner.py:1077] Loading model weights took 7.1216 GB
|
| 26 |
+
INFO 09-20 08:15:02 worker.py:232] Memory profiling results: total_gpu_memory=79.10GiB initial_memory_usage=8.22GiB peak_torch_memory=8.75GiB memory_usage_post_profile=8.60GiB non_torch_memory=1.44GiB kv_cache_size=60.99GiB gpu_memory_utilization=0.90
|
| 27 |
+
INFO 09-20 08:15:02 distributed_gpu_executor.py:57] # GPU blocks: 142750, # CPU blocks: 9362
|
| 28 |
+
INFO 09-20 08:15:02 distributed_gpu_executor.py:61] Maximum concurrency for 20000 tokens per request: 114.20x
|
| 29 |
+
INFO 09-20 08:15:05 model_runner.py:1400] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
|
| 30 |
+
INFO 09-20 08:15:05 model_runner.py:1404] If out-of-memory error occurs during cudagraph capture, consider decreasing `gpu_memory_utilization` or switching to eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.
|
| 31 |
+
INFO 09-20 08:15:20 custom_all_reduce.py:224] Registering 1995 cuda graph addresses
|
| 32 |
+
INFO 09-20 08:15:20 model_runner.py:1518] Graph capturing finished in 16 secs, took 0.25 GiB
|
| 33 |
+
|
| 34 |
+
2025-09-20:08:15:28,125 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
|
| 35 |
+
2025-09-20:08:15:28,126 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
|
| 36 |
+
2025-09-20:08:15:28,126 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
|
| 37 |
+
2025-09-20:08:15:28,126 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
|
| 38 |
+
2025-09-20:08:15:28,126 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
|
| 39 |
+
2025-09-20:08:15:28,126 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
|
| 40 |
+
2025-09-20:08:15:28,126 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
|
| 41 |
+
2025-09-20:08:15:28,126 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
|
| 42 |
+
2025-09-20:08:15:28,126 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
|
| 43 |
+
2025-09-20:08:15:28,126 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
|
| 44 |
+
2025-09-20:08:15:28,126 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
|
| 45 |
+
2025-09-20:08:15:28,127 INFO [task.py:430] Building contexts for mgsm_native_cot_zh on rank 0...
|
| 46 |
+
100%|██████████| 250/250 [00:00<00:00, 1459.14it/s]
|
| 47 |
+
2025-09-20:08:15:28,305 INFO [task.py:430] Building contexts for mgsm_native_cot_th on rank 0...
|
| 48 |
+
100%|██████████| 250/250 [00:00<00:00, 1507.22it/s]
|
| 49 |
+
2025-09-20:08:15:28,476 INFO [task.py:430] Building contexts for mgsm_native_cot_te on rank 0...
|
| 50 |
+
100%|██████████| 250/250 [00:00<00:00, 1510.62it/s]
|
| 51 |
+
2025-09-20:08:15:28,647 INFO [task.py:430] Building contexts for mgsm_native_cot_sw on rank 0...
|
| 52 |
+
100%|██████████| 250/250 [00:00<00:00, 1538.05it/s]
|
| 53 |
+
2025-09-20:08:15:28,816 INFO [task.py:430] Building contexts for mgsm_native_cot_ru on rank 0...
|
| 54 |
+
100%|██████████| 250/250 [00:00<00:00, 1407.80it/s]
|
| 55 |
+
2025-09-20:08:15:28,999 INFO [task.py:430] Building contexts for mgsm_native_cot_ja on rank 0...
|
| 56 |
+
100%|██████████| 250/250 [00:00<00:00, 1480.79it/s]
|
| 57 |
+
2025-09-20:08:15:29,174 INFO [task.py:430] Building contexts for mgsm_native_cot_fr on rank 0...
|
| 58 |
+
100%|██████████| 250/250 [00:00<00:00, 1487.75it/s]
|
| 59 |
+
2025-09-20:08:15:29,353 INFO [task.py:430] Building contexts for mgsm_native_cot_es on rank 0...
|
| 60 |
+
100%|██████████| 250/250 [00:00<00:00, 1544.75it/s]
|
| 61 |
+
2025-09-20:08:15:29,520 INFO [task.py:430] Building contexts for mgsm_native_cot_en on rank 0...
|
| 62 |
+
100%|██████████| 250/250 [00:00<00:00, 1546.41it/s]
|
| 63 |
+
2025-09-20:08:15:29,687 INFO [task.py:430] Building contexts for mgsm_native_cot_de on rank 0...
|
| 64 |
+
100%|██████████| 250/250 [00:00<00:00, 1523.06it/s]
|
| 65 |
+
2025-09-20:08:15:29,857 INFO [task.py:430] Building contexts for mgsm_native_cot_bn on rank 0...
|
| 66 |
+
100%|██████████| 250/250 [00:00<00:00, 1512.86it/s]
|
| 67 |
+
2025-09-20:08:15:30,028 INFO [evaluator.py:495] Running generate_until requests
|
| 68 |
+
Processed prompts: 100%|██████████| 250/250 [03:23<00:00, 1.23it/s, est. speed input: 124.59 toks/s, output: 2248.44 toks/s]
|
| 69 |
+
final processing: 250it [00:00, 298229.81it/s] | 1/2750 [03:23<155:02:20, 203.03s/it] 124.59 toks/s, output: 2248.44 toks/s]
|
| 70 |
+
Processed prompts: 100%|██████████| 250/250 [02:56<00:00, 1.42it/s, est. speed input: 247.19 toks/s, output: 1630.51 toks/s]
|
| 71 |
+
final processing: 250it [00:00, 286888.10it/s] | 251/2750 [06:19<52:55, 1.27s/it] 247.19 toks/s, output: 1630.51 toks/s]
|
| 72 |
+
Processed prompts: 100%|██████████| 250/250 [14:58<00:00, 3.59s/it, est. speed input: 125.62 toks/s, output: 3596.00 toks/s]
|
| 73 |
+
final processing: 250it [00:00, 337923.30it/s] | 501/2750 [21:17<1:38:47, 2.64s/it]: 125.62 toks/s, output: 3596.00 toks/s]
|
| 74 |
+
WARNING 09-20 08:30:23 scheduler.py:1481] Sequence group 749 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=1
|
| 75 |
+
Processed prompts: 100%|██████████| 250/250 [27:49<00:00, 6.68s/it, est. speed input: 22.15 toks/s, output: 2927.12 toks/s]
|
| 76 |
+
final processing: 250it [00:00, 259163.62it/s] | 751/2750 [49:07<2:29:15, 4.48s/it]: 22.15 toks/s, output: 2927.12 toks/s]
|
| 77 |
+
WARNING 09-20 08:43:05 scheduler.py:1481] Sequence group 987 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=51
|
| 78 |
+
WARNING 09-20 08:46:25 scheduler.py:1481] Sequence group 936 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=101
|
| 79 |
+
WARNING 09-20 08:52:13 scheduler.py:1481] Sequence group 885 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=151
|
| 80 |
+
Processed prompts: 100%|██████████| 250/250 [03:26<00:00, 1.21it/s, est. speed input: 160.95 toks/s, output: 2481.49 toks/s]
|
| 81 |
+
final processing: 250it [00:00, 271440.85it/s] | 1001/2750 [52:34<1:28:34, 3.04s/it] 160.39 toks/s, output: 2385.40 toks/s]
|
| 82 |
+
Processed prompts: 100%|██████████| 250/250 [06:36<00:00, 1.58s/it, est. speed input: 75.03 toks/s, output: 3118.80 toks/s]
|
| 83 |
+
final processing: 250it [00:00, 440393.11it/s] | 1251/2750 [59:10<1:02:49, 2.51s/it] 74.36 toks/s, output: 2968.35 toks/s]
|
| 84 |
+
Processed prompts: 100%|██████████| 250/250 [03:18<00:00, 1.26it/s, est. speed input: 147.77 toks/s, output: 2350.89 toks/s]
|
| 85 |
+
final processing: 250it [00:00, 228846.79it/s] | 1501/2750 [1:02:28<40:09, 1.93s/it] 147.77 toks/s, output: 2350.89 toks/s]
|
| 86 |
+
Processed prompts: 100%|██████████| 250/250 [06:00<00:00, 1.44s/it, est. speed input: 80.18 toks/s, output: 3303.78 toks/s]
|
| 87 |
+
final processing: 250it [00:00, 304730.02it/s] | 1751/2750 [1:08:28<29:28, 1.77s/it] 79.75 toks/s, output: 3193.63 toks/s]
|
| 88 |
+
Processed prompts: 100%|██████████| 250/250 [03:54<00:00, 1.07it/s, est. speed input: 104.28 toks/s, output: 3089.34 toks/s]
|
| 89 |
+
final processing: 250it [00:00, 289422.03it/s] | 2001/2750 [1:12:23<18:47, 1.51s/it] 103.64 toks/s, output: 2920.65 toks/s]
|
| 90 |
+
Processed prompts: 100%|██████████| 250/250 [03:22<00:00, 1.23it/s, est. speed input: 153.49 toks/s, output: 2475.48 toks/s]
|
| 91 |
+
final processing: 250it [00:00, 379094.72it/s]▏ | 2251/2750 [1:15:46<10:42, 1.29s/it] 153.16 toks/s, output: 2378.31 toks/s]
|
| 92 |
+
Processed prompts: 100%|██████████| 250/250 [07:54<00:00, 1.90s/it, est. speed input: 163.92 toks/s, output: 3619.06 toks/s]
|
| 93 |
+
final processing: 250it [00:00, 384093.77it/s]█ | 2501/2750 [1:23:40<06:07, 1.48s/it] 163.92 toks/s, output: 3619.06 toks/s]
|
| 94 |
+
Running generate_until requests: 100%|██████████| 2750/2750 [1:23:40<00:00, 1.83s/it]
|
| 95 |
+
INFO 09-20 09:39:27 multiproc_worker_utils.py:133] Terminating local vLLM worker processes
|
| 96 |
+
wandb: WARNING Serializing object of type str that is 135596 bytes
|
| 97 |
+
wandb: WARNING Serializing object of type str that is 106518 bytes
|
| 98 |
+
wandb: WARNING Serializing object of type str that is 104812 bytes
|
| 99 |
+
wandb: WARNING Serializing object of type str that is 110652 bytes
|
| 100 |
+
wandb: WARNING Serializing object of type str that is 113566 bytes
|
| 101 |
+
wandb: WARNING Serializing object of type str that is 115434 bytes
|
| 102 |
+
wandb: WARNING Serializing object of type str that is 135596 bytes
|
| 103 |
+
wandb: WARNING Serializing object of type str that is 106518 bytes
|
| 104 |
+
wandb: WARNING Serializing object of type str that is 104812 bytes
|
| 105 |
+
wandb: WARNING Serializing object of type str that is 110652 bytes
|
| 106 |
+
wandb: WARNING Serializing object of type str that is 113566 bytes
|
| 107 |
+
wandb: WARNING Serializing object of type str that is 115434 bytes
|
| 108 |
+
wandb: WARNING Serializing object of type str that is 133206 bytes
|
| 109 |
+
wandb: WARNING Serializing object of type str that is 145832 bytes
|
| 110 |
+
wandb: WARNING Serializing object of type str that is 144918 bytes
|
| 111 |
+
wandb: WARNING Serializing object of type str that is 127702 bytes
|
| 112 |
+
wandb: WARNING Serializing object of type str that is 111602 bytes
|
| 113 |
+
wandb: WARNING Serializing object of type str that is 168368 bytes
|
| 114 |
+
wandb: WARNING Serializing object of type str that is 149048 bytes
|
| 115 |
+
wandb: WARNING Serializing object of type str that is 147662 bytes
|
| 116 |
+
wandb: WARNING Serializing object of type str that is 136456 bytes
|
| 117 |
+
wandb: WARNING Serializing object of type str that is 128568 bytes
|
| 118 |
+
wandb: WARNING Serializing object of type str that is 133206 bytes
|
| 119 |
+
wandb: WARNING Serializing object of type str that is 145832 bytes
|
| 120 |
+
wandb: WARNING Serializing object of type str that is 144918 bytes
|
| 121 |
+
wandb: WARNING Serializing object of type str that is 127702 bytes
|
| 122 |
+
wandb: WARNING Serializing object of type str that is 111602 bytes
|
| 123 |
+
wandb: WARNING Serializing object of type str that is 168368 bytes
|
| 124 |
+
wandb: WARNING Serializing object of type str that is 149048 bytes
|
| 125 |
+
wandb: WARNING Serializing object of type str that is 147662 bytes
|
| 126 |
+
wandb: WARNING Serializing object of type str that is 136456 bytes
|
| 127 |
+
wandb: WARNING Serializing object of type str that is 128568 bytes
|
| 128 |
+
wandb: WARNING Serializing object of type str that is 111938 bytes
|
| 129 |
+
wandb: WARNING Serializing object of type str that is 114530 bytes
|
| 130 |
+
wandb: WARNING Serializing object of type str that is 141662 bytes
|
| 131 |
+
wandb: WARNING Serializing object of type str that is 139800 bytes
|
| 132 |
+
wandb: WARNING Serializing object of type str that is 118212 bytes
|
| 133 |
+
wandb: WARNING Serializing object of type str that is 104678 bytes
|
| 134 |
+
wandb: WARNING Serializing object of type str that is 140896 bytes
|
| 135 |
+
wandb: WARNING Serializing object of type str that is 130060 bytes
|
| 136 |
+
wandb: WARNING Serializing object of type str that is 129634 bytes
|
| 137 |
+
wandb: WARNING Serializing object of type str that is 124454 bytes
|
| 138 |
+
wandb: WARNING Serializing object of type str that is 111938 bytes
|
| 139 |
+
wandb: WARNING Serializing object of type str that is 114530 bytes
|
| 140 |
+
wandb: WARNING Serializing object of type str that is 141662 bytes
|
| 141 |
+
wandb: WARNING Serializing object of type str that is 139800 bytes
|
| 142 |
+
wandb: WARNING Serializing object of type str that is 118212 bytes
|
| 143 |
+
wandb: WARNING Serializing object of type str that is 104678 bytes
|
| 144 |
+
wandb: WARNING Serializing object of type str that is 140896 bytes
|
| 145 |
+
wandb: WARNING Serializing object of type str that is 130060 bytes
|
| 146 |
+
wandb: WARNING Serializing object of type str that is 129634 bytes
|
| 147 |
+
wandb: WARNING Serializing object of type str that is 124454 bytes
|
| 148 |
+
wandb: WARNING Serializing object of type str that is 109374 bytes
|
| 149 |
+
wandb: WARNING Serializing object of type str that is 121236 bytes
|
| 150 |
+
wandb: WARNING Serializing object of type str that is 113208 bytes
|
| 151 |
+
wandb: WARNING Serializing object of type str that is 109374 bytes
|
| 152 |
+
wandb: WARNING Serializing object of type str that is 121236 bytes
|
| 153 |
+
wandb: WARNING Serializing object of type str that is 113208 bytes
|
| 154 |
+
wandb: WARNING Serializing object of type str that is 142386 bytes
|
| 155 |
+
wandb: WARNING Serializing object of type str that is 110008 bytes
|
| 156 |
+
wandb: WARNING Serializing object of type str that is 113878 bytes
|
| 157 |
+
wandb: WARNING Serializing object of type str that is 105530 bytes
|
| 158 |
+
wandb: WARNING Serializing object of type str that is 122158 bytes
|
| 159 |
+
wandb: WARNING Serializing object of type str that is 108502 bytes
|
| 160 |
+
wandb: WARNING Serializing object of type str that is 113968 bytes
|
| 161 |
+
wandb: WARNING Serializing object of type str that is 107180 bytes
|
| 162 |
+
wandb: WARNING Serializing object of type str that is 121728 bytes
|
| 163 |
+
wandb: WARNING Serializing object of type str that is 106106 bytes
|
| 164 |
+
wandb: WARNING Serializing object of type str that is 117514 bytes
|
| 165 |
+
wandb: WARNING Serializing object of type str that is 142386 bytes
|
| 166 |
+
wandb: WARNING Serializing object of type str that is 110008 bytes
|
| 167 |
+
wandb: WARNING Serializing object of type str that is 113878 bytes
|
| 168 |
+
wandb: WARNING Serializing object of type str that is 105530 bytes
|
| 169 |
+
wandb: WARNING Serializing object of type str that is 122158 bytes
|
| 170 |
+
wandb: WARNING Serializing object of type str that is 108502 bytes
|
| 171 |
+
wandb: WARNING Serializing object of type str that is 113968 bytes
|
| 172 |
+
wandb: WARNING Serializing object of type str that is 107180 bytes
|
| 173 |
+
wandb: WARNING Serializing object of type str that is 121728 bytes
|
| 174 |
+
wandb: WARNING Serializing object of type str that is 106106 bytes
|
| 175 |
+
wandb: WARNING Serializing object of type str that is 117514 bytes
|
| 176 |
+
2025-09-20:09:39:36,339 INFO [evaluation_tracker.py:207] Saving results aggregated
|
| 177 |
+
2025-09-20:09:39:36,444 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_bn
|
| 178 |
+
2025-09-20:09:39:36,612 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_de
|
| 179 |
+
2025-09-20:09:39:36,770 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_en
|
| 180 |
+
2025-09-20:09:39:36,926 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_es
|
| 181 |
+
2025-09-20:09:39:37,090 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_fr
|
| 182 |
+
2025-09-20:09:39:37,237 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_ja
|
| 183 |
+
2025-09-20:09:39:37,383 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_ru
|
| 184 |
+
2025-09-20:09:39:37,536 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_sw
|
| 185 |
+
2025-09-20:09:39:37,773 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_te
|
| 186 |
+
2025-09-20:09:39:37,940 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_th
|
| 187 |
+
2025-09-20:09:39:38,087 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_zh
|
| 188 |
+
vllm (pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000), gen_kwargs: (max_gen_toks=20000), limit: None, num_fewshot: None, batch_size: auto
|
| 189 |
+
| Tasks |Version| Filter |n-shot| Metric | |Value| |Stderr|
|
| 190 |
+
|------------------|------:|----------------|-----:|-----------|---|----:|---|------|
|
| 191 |
+
|mgsm_native_cot_bn| 4|flexible-extract| 0|exact_match|↑ |0.528|± | N/A|
|
| 192 |
+
| | |strict-match | 0|exact_match|↑ |0.000|± | N/A|
|
| 193 |
+
|mgsm_native_cot_de| 4|flexible-extract| 0|exact_match|↑ |0.740|± | N/A|
|
| 194 |
+
| | |strict-match | 0|exact_match|↑ |0.000|± | N/A|
|
| 195 |
+
|mgsm_native_cot_en| 4|flexible-extract| 0|exact_match|↑ |0.872|± | N/A|
|
| 196 |
+
| | |strict-match | 0|exact_match|↑ |0.000|± | N/A|
|
| 197 |
+
|mgsm_native_cot_es| 4|flexible-extract| 0|exact_match|↑ |0.780|± | N/A|
|
| 198 |
+
| | |strict-match | 0|exact_match|↑ |0.008|± | N/A|
|
| 199 |
+
|mgsm_native_cot_fr| 4|flexible-extract| 0|exact_match|↑ |0.760|± | N/A|
|
| 200 |
+
| | |strict-match | 0|exact_match|↑ |0.004|± | N/A|
|
| 201 |
+
|mgsm_native_cot_ja| 4|flexible-extract| 0|exact_match|↑ |0.644|± | N/A|
|
| 202 |
+
| | |strict-match | 0|exact_match|↑ |0.008|± | N/A|
|
| 203 |
+
|mgsm_native_cot_ru| 4|flexible-extract| 0|exact_match|↑ |0.832|± | N/A|
|
| 204 |
+
| | |strict-match | 0|exact_match|↑ |0.004|± | N/A|
|
| 205 |
+
|mgsm_native_cot_sw| 4|flexible-extract| 0|exact_match|↑ |0.008|± | N/A|
|
| 206 |
+
| | |strict-match | 0|exact_match|↑ |0.000|± | N/A|
|
| 207 |
+
|mgsm_native_cot_te| 4|flexible-extract| 0|exact_match|↑ |0.120|± | N/A|
|
| 208 |
+
| | |strict-match | 0|exact_match|↑ |0.020|± | N/A|
|
| 209 |
+
|mgsm_native_cot_th| 4|flexible-extract| 0|exact_match|↑ |0.652|± | N/A|
|
| 210 |
+
| | |strict-match | 0|exact_match|↑ |0.260|± | N/A|
|
| 211 |
+
|mgsm_native_cot_zh| 4|flexible-extract| 0|exact_match|↑ |0.784|± | N/A|
|
| 212 |
+
| | |strict-match | 0|exact_match|↑ |0.000|± | N/A|
|
wandb/run-20250920_081121-2oxex54w/files/requirements.txt
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
setproctitle==1.2.2
|
| 2 |
+
colorama==0.4.6
|
| 3 |
+
psutil==7.0.0
|
| 4 |
+
GitPython==3.1.43
|
| 5 |
+
docker-pycreds==0.4.0
|
| 6 |
+
gitdb==4.0.11
|
| 7 |
+
opencv-python==4.10.0.84
|
| 8 |
+
sentry-sdk==2.19.0
|
| 9 |
+
setproctitle==1.3.4
|
| 10 |
+
smmap==5.0.1
|
| 11 |
+
hjson==3.1.0
|
| 12 |
+
deepspeed==0.16.7
|
| 13 |
+
transformers==4.46.1
|
| 14 |
+
transformers==4.54.1
|
| 15 |
+
protobuf==6.31.1
|
| 16 |
+
accelerate==1.6.0
|
| 17 |
+
docopt==0.6.2
|
| 18 |
+
gguf==0.10.0
|
| 19 |
+
webencodings==0.5.1
|
| 20 |
+
pickleshare==0.7.5
|
| 21 |
+
fastjsonschema==2.21.1
|
| 22 |
+
backcall==0.2.0
|
| 23 |
+
tinycss2==1.4.0
|
| 24 |
+
soupsieve==2.7
|
| 25 |
+
pandocfilters==1.5.1
|
| 26 |
+
mistune==3.1.3
|
| 27 |
+
jupyterlab_pygments==0.3.0
|
| 28 |
+
defusedxml==0.7.1
|
| 29 |
+
bleach==6.2.0
|
| 30 |
+
yarg==0.1.9
|
| 31 |
+
ipython==8.12.3
|
| 32 |
+
beautifulsoup4==4.13.4
|
| 33 |
+
nbformat==5.10.4
|
| 34 |
+
nbclient==0.10.2
|
| 35 |
+
nbconvert==7.16.6
|
| 36 |
+
pipreqs==0.5.0
|
| 37 |
+
wandb==0.21.0
|
| 38 |
+
trl==0.17.0
|
| 39 |
+
lm_eval==0.4.4
|
| 40 |
+
langid==1.1.6
|
| 41 |
+
annotated-types==0.7.0
|
| 42 |
+
vllm==0.6.4.post1
|
| 43 |
+
typing-inspection==0.4.1
|
| 44 |
+
xformers==0.0.28.post3
|
| 45 |
+
pydantic_core==2.33.2
|
| 46 |
+
outlines==0.0.46
|
| 47 |
+
pydantic==2.11.7
|
| 48 |
+
compressed-tensors==0.8.0
|
| 49 |
+
click==8.2.1
|
| 50 |
+
lightning-utilities==0.15.0
|
| 51 |
+
torchmetrics==1.8.0
|
| 52 |
+
nvidia-ml-py==13.580.65
|
| 53 |
+
blessed==1.21.0
|
| 54 |
+
gpustat==1.1.1
|
| 55 |
+
nvidia-cufile-cu12==1.13.1.3
|
| 56 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 57 |
+
mpmath==1.3.0
|
| 58 |
+
typing_extensions==4.12.2
|
| 59 |
+
sympy==1.13.1
|
| 60 |
+
pillow==11.0.0
|
| 61 |
+
nvidia-nvtx-cu12==12.4.127
|
| 62 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 63 |
+
nvidia-nccl-cu12==2.21.5
|
| 64 |
+
numpy==2.1.2
|
| 65 |
+
nvidia-curand-cu12==10.3.5.147
|
| 66 |
+
networkx==3.3
|
| 67 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 68 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 69 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 70 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 71 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 72 |
+
MarkupSafe==2.1.5
|
| 73 |
+
setuptools==80.9.0
|
| 74 |
+
certifi==2025.8.3
|
| 75 |
+
einops==0.8.1
|
| 76 |
+
fsspec==2024.6.1
|
| 77 |
+
Jinja2==3.1.4
|
| 78 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 79 |
+
urllib3==2.5.0
|
| 80 |
+
tqdm==4.67.1
|
| 81 |
+
safetensors==0.6.2
|
| 82 |
+
regex==2025.7.34
|
| 83 |
+
PyYAML==6.0.2
|
| 84 |
+
packaging==25.0
|
| 85 |
+
idna==3.10
|
| 86 |
+
filelock==3.13.1
|
| 87 |
+
hf-xet==1.1.8
|
| 88 |
+
torch==2.6.0+cu124
|
| 89 |
+
charset-normalizer==3.4.3
|
| 90 |
+
requests==2.32.5
|
| 91 |
+
huggingface-hub==0.34.4
|
| 92 |
+
torchaudio==2.6.0+cu124
|
| 93 |
+
liger_kernel==0.6.2
|
| 94 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 95 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 96 |
+
torchvision==0.21.0+cu124
|
| 97 |
+
blingfire==0.1.8
|
| 98 |
+
triton==3.1.0
|
| 99 |
+
sentence-transformers==5.1.0
|
| 100 |
+
tabledata==1.3.4
|
| 101 |
+
lxml==5.3.1
|
| 102 |
+
accelerate==1.5.2
|
| 103 |
+
absl-py==2.1.0
|
| 104 |
+
Markdown==3.7
|
| 105 |
+
uvicorn==0.34.0
|
| 106 |
+
ruff==0.11.0
|
| 107 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 108 |
+
airportsdata==20250224
|
| 109 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 110 |
+
astor==0.8.1
|
| 111 |
+
DataProperty==1.1.0
|
| 112 |
+
lm-format-enforcer==0.10.11
|
| 113 |
+
mdurl==0.1.2
|
| 114 |
+
nvidia-nccl-cu12==2.21.5
|
| 115 |
+
tabulate==0.9.0
|
| 116 |
+
python-dotenv==1.0.1
|
| 117 |
+
h5py==3.13.0
|
| 118 |
+
chardet==5.2.0
|
| 119 |
+
cupy-cuda12x==13.4.0
|
| 120 |
+
tiktoken==0.9.0
|
| 121 |
+
jiter==0.8.2
|
| 122 |
+
Pygments==2.19.1
|
| 123 |
+
typing_extensions==4.12.2
|
| 124 |
+
datasets==3.1.0
|
| 125 |
+
zipp==3.21.0
|
| 126 |
+
more-itertools==10.6.0
|
| 127 |
+
MarkupSafe==2.1.5
|
| 128 |
+
comm==0.2.2
|
| 129 |
+
pycountry==24.6.1
|
| 130 |
+
partial-json-parser==0.2.1.1.post5
|
| 131 |
+
gradio==4.44.0
|
| 132 |
+
prometheus_client==0.21.1
|
| 133 |
+
six==1.17.0
|
| 134 |
+
pytz==2025.1
|
| 135 |
+
unsloth_zoo==2025.3.12
|
| 136 |
+
starlette==0.46.0
|
| 137 |
+
llvmlite==0.44.0
|
| 138 |
+
peft==0.14.0
|
| 139 |
+
aiohttp==3.11.13
|
| 140 |
+
aiofiles==23.2.1
|
| 141 |
+
importlib_resources==6.5.2
|
| 142 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 143 |
+
semantic-version==2.10.0
|
| 144 |
+
decorator==5.2.1
|
| 145 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 146 |
+
contourpy==1.3.1
|
| 147 |
+
torch==2.6.0
|
| 148 |
+
pytest==8.3.5
|
| 149 |
+
fastapi==0.115.11
|
| 150 |
+
seaborn==0.13.2
|
| 151 |
+
sympy==1.13.1
|
| 152 |
+
threadpoolctl==3.6.0
|
| 153 |
+
networkx==3.4.2
|
| 154 |
+
python-dateutil==2.9.0.post0
|
| 155 |
+
depyf==0.18.0
|
| 156 |
+
nvidia-ml-py==12.570.86
|
| 157 |
+
jedi==0.19.2
|
| 158 |
+
joblib==1.4.2
|
| 159 |
+
referencing==0.36.2
|
| 160 |
+
diskcache==5.6.3
|
| 161 |
+
httpcore==1.0.7
|
| 162 |
+
httpx==0.28.1
|
| 163 |
+
pyairports==2.1.1
|
| 164 |
+
protobuf==3.20.3
|
| 165 |
+
portalocker==3.1.1
|
| 166 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 167 |
+
Pebble==5.1.0
|
| 168 |
+
fsspec==2024.9.0
|
| 169 |
+
hf_transfer==0.1.9
|
| 170 |
+
ptyprocess==0.7.0
|
| 171 |
+
pexpect==4.9.0
|
| 172 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 173 |
+
scipy==1.15.2
|
| 174 |
+
sentencepiece==0.2.0
|
| 175 |
+
cycler==0.12.1
|
| 176 |
+
packaging==24.2
|
| 177 |
+
openai==1.56.1
|
| 178 |
+
frozenlist==1.5.0
|
| 179 |
+
lark==1.2.2
|
| 180 |
+
filelock==3.17.0
|
| 181 |
+
opentelemetry-exporter-otlp==1.26.0
|
| 182 |
+
yarl==1.18.3
|
| 183 |
+
rouge_score==0.1.2
|
| 184 |
+
grpcio==1.70.0
|
| 185 |
+
googleapis-common-protos==1.70.0
|
| 186 |
+
aiohappyeyeballs==2.4.6
|
| 187 |
+
multiprocess==0.70.16
|
| 188 |
+
tornado==6.4.2
|
| 189 |
+
numpy==1.26.4
|
| 190 |
+
nltk==3.9.1
|
| 191 |
+
pip==25.0
|
| 192 |
+
charset-normalizer==3.3.2
|
| 193 |
+
prometheus-fastapi-instrumentator==7.0.2
|
| 194 |
+
numexpr==2.10.2
|
| 195 |
+
pyarrow==19.0.1
|
| 196 |
+
attrs==25.1.0
|
| 197 |
+
lm_eval==0.4.4
|
| 198 |
+
urllib3==2.3.0
|
| 199 |
+
mkl_random==1.2.8
|
| 200 |
+
httptools==0.6.4
|
| 201 |
+
gpustat==1.1.1
|
| 202 |
+
pluggy==1.5.0
|
| 203 |
+
huggingface-hub==0.30.2
|
| 204 |
+
triton==3.1.0
|
| 205 |
+
idna==3.7
|
| 206 |
+
ipython==8.20.0
|
| 207 |
+
pyparsing==3.2.1
|
| 208 |
+
rich-toolkit==0.13.2
|
| 209 |
+
googletrans==4.0.2
|
| 210 |
+
jupyter_core==5.7.2
|
| 211 |
+
zstandard==0.23.0
|
| 212 |
+
aiosignal==1.3.2
|
| 213 |
+
tyro==0.9.17
|
| 214 |
+
traitlets==5.14.3
|
| 215 |
+
h11==0.14.0
|
| 216 |
+
outlines==0.1.11
|
| 217 |
+
jupyter_client==8.6.3
|
| 218 |
+
loralib==0.1.2
|
| 219 |
+
kiwisolver==1.4.8
|
| 220 |
+
blake3==1.0.4
|
| 221 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 222 |
+
rich==13.9.4
|
| 223 |
+
hf-xet==1.0.2
|
| 224 |
+
certifi==2025.1.31
|
| 225 |
+
wheel==0.45.1
|
| 226 |
+
pybind11==2.13.6
|
| 227 |
+
regex==2024.11.6
|
| 228 |
+
mpmath==1.3.0
|
| 229 |
+
transformers==4.51.3
|
| 230 |
+
flash_attn==2.7.4.post1
|
| 231 |
+
nvidia-curand-cu12==10.3.5.147
|
| 232 |
+
PySocks==1.7.1
|
| 233 |
+
gmpy2==2.2.1
|
| 234 |
+
iniconfig==2.0.0
|
| 235 |
+
pandas==2.2.3
|
| 236 |
+
Jinja2==3.1.5
|
| 237 |
+
msgpack==1.1.0
|
| 238 |
+
gguf==0.16.2
|
| 239 |
+
email_validator==2.2.0
|
| 240 |
+
tzdata==2025.1
|
| 241 |
+
cut-cross-entropy==25.1.1
|
| 242 |
+
tensorboard==2.19.0
|
| 243 |
+
matplotlib==3.10.1
|
| 244 |
+
jsonschema-specifications==2024.10.1
|
| 245 |
+
unsloth==2025.3.14
|
| 246 |
+
Werkzeug==3.1.3
|
| 247 |
+
opentelemetry-proto==1.26.0
|
| 248 |
+
fastrlock==0.8.3
|
| 249 |
+
dnspython==2.7.0
|
| 250 |
+
typeguard==4.4.2
|
| 251 |
+
opentelemetry-api==1.26.0
|
| 252 |
+
platformdirs==4.3.6
|
| 253 |
+
importlib_metadata==8.0.0
|
| 254 |
+
opentelemetry-semantic-conventions==0.47b0
|
| 255 |
+
sniffio==1.3.1
|
| 256 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 257 |
+
scikit-learn==1.6.1
|
| 258 |
+
hpack==4.1.0
|
| 259 |
+
parso==0.8.4
|
| 260 |
+
torchaudio==2.6.0
|
| 261 |
+
xgrammar==0.1.18
|
| 262 |
+
executing==2.2.0
|
| 263 |
+
mkl_fft==1.3.11
|
| 264 |
+
vllm==0.8.4
|
| 265 |
+
word2number==1.1
|
| 266 |
+
pure_eval==0.2.3
|
| 267 |
+
watchfiles==1.0.4
|
| 268 |
+
pydub==0.25.1
|
| 269 |
+
mbstrdecoder==1.1.4
|
| 270 |
+
markdown-it-py==3.0.0
|
| 271 |
+
jsonschema==4.23.0
|
| 272 |
+
msgspec==0.19.0
|
| 273 |
+
rpds-py==0.23.1
|
| 274 |
+
wandb==0.19.9
|
| 275 |
+
matplotlib-inline==0.1.7
|
| 276 |
+
requests==2.32.3
|
| 277 |
+
interegular==0.3.3
|
| 278 |
+
pytablewriter==1.2.1
|
| 279 |
+
orjson==3.10.15
|
| 280 |
+
xformers==0.0.29.post2
|
| 281 |
+
fastapi-cli==0.0.7
|
| 282 |
+
mkl-service==2.4.0
|
| 283 |
+
opencv-python-headless==4.11.0.86
|
| 284 |
+
prompt_toolkit==3.0.50
|
| 285 |
+
trl==0.16.1
|
| 286 |
+
debugpy==1.8.13
|
| 287 |
+
pydantic==2.10.6
|
| 288 |
+
stack-data==0.6.3
|
| 289 |
+
tqdm-multiprocess==0.0.11
|
| 290 |
+
gradio_client==1.3.0
|
| 291 |
+
dill==0.3.8
|
| 292 |
+
evaluate==0.4.3
|
| 293 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 294 |
+
nest-asyncio==1.6.0
|
| 295 |
+
pyzmq==26.2.1
|
| 296 |
+
tensorboard-data-server==0.7.2
|
| 297 |
+
docstring_parser==0.16
|
| 298 |
+
click==8.1.8
|
| 299 |
+
psutil==7.0.0
|
| 300 |
+
annotated-types==0.7.0
|
| 301 |
+
ninja==1.11.1.4
|
| 302 |
+
pillow==10.4.0
|
| 303 |
+
tcolorpy==0.1.7
|
| 304 |
+
einops==0.8.1
|
| 305 |
+
wcwidth==0.2.13
|
| 306 |
+
typer==0.15.2
|
| 307 |
+
tqdm==4.67.1
|
| 308 |
+
tomlkit==0.12.0
|
| 309 |
+
ipykernel==6.28.0
|
| 310 |
+
diffusers==0.32.2
|
| 311 |
+
mistral_common==1.5.4
|
| 312 |
+
setuptools==75.8.0
|
| 313 |
+
h2==4.2.0
|
| 314 |
+
cachetools==5.5.2
|
| 315 |
+
wrapt==1.17.2
|
| 316 |
+
pydantic_core==2.27.2
|
| 317 |
+
ffmpy==0.5.0
|
| 318 |
+
sacrebleu==2.5.1
|
| 319 |
+
outlines_core==0.1.26
|
| 320 |
+
jsonlines==4.0.0
|
| 321 |
+
fonttools==4.56.0
|
| 322 |
+
nvidia-nvtx-cu12==12.4.127
|
| 323 |
+
safetensors==0.5.3
|
| 324 |
+
opentelemetry-exporter-otlp-proto-grpc==1.26.0
|
| 325 |
+
bitsandbytes==0.45.3
|
| 326 |
+
nanobind==2.6.1
|
| 327 |
+
tokenizers==0.21.1
|
| 328 |
+
propcache==0.3.0
|
| 329 |
+
distro==1.9.0
|
| 330 |
+
python-multipart==0.0.20
|
| 331 |
+
uvloop==0.21.0
|
| 332 |
+
liger_kernel==0.5.5
|
| 333 |
+
python-json-logger==3.3.0
|
| 334 |
+
multidict==6.1.0
|
| 335 |
+
ray==2.43.0
|
| 336 |
+
opentelemetry-exporter-otlp-proto-http==1.26.0
|
| 337 |
+
typepy==1.3.4
|
| 338 |
+
torchvision==0.21.0
|
| 339 |
+
PyYAML==6.0.2
|
| 340 |
+
xxhash==3.5.0
|
| 341 |
+
anthropic==0.49.0
|
| 342 |
+
py-cpuinfo==9.0.0
|
| 343 |
+
compressed-tensors==0.9.3
|
| 344 |
+
opentelemetry-exporter-otlp-proto-common==1.26.0
|
| 345 |
+
opentelemetry-sdk==1.26.0
|
| 346 |
+
shtab==1.7.1
|
| 347 |
+
websockets==12.0
|
| 348 |
+
numba==0.61.2
|
| 349 |
+
llguidance==0.7.13
|
| 350 |
+
hyperframe==6.1.0
|
| 351 |
+
anyio==4.8.0
|
| 352 |
+
asttokens==3.0.0
|
| 353 |
+
blessed==1.20.0
|
| 354 |
+
colorama==0.4.6
|
| 355 |
+
shellingham==1.5.4
|
| 356 |
+
Brotli==1.0.9
|
| 357 |
+
sqlitedict==2.1.0
|
| 358 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 359 |
+
Deprecated==1.2.18
|
| 360 |
+
cloudpickle==3.1.1
|
| 361 |
+
pathvalidate==3.2.3
|
| 362 |
+
opentelemetry-semantic-conventions-ai==0.4.3
|
| 363 |
+
platformdirs==4.2.2
|
| 364 |
+
autocommand==2.2.2
|
| 365 |
+
backports.tarfile==1.2.0
|
| 366 |
+
importlib_metadata==8.0.0
|
| 367 |
+
inflect==7.3.1
|
| 368 |
+
jaraco.collections==5.1.0
|
| 369 |
+
jaraco.context==5.3.0
|
| 370 |
+
jaraco.functools==4.0.1
|
| 371 |
+
jaraco.text==3.12.1
|
| 372 |
+
more-itertools==10.3.0
|
| 373 |
+
packaging==24.2
|
| 374 |
+
tomli==2.0.1
|
| 375 |
+
typeguard==4.3.0
|
| 376 |
+
typing_extensions==4.12.2
|
| 377 |
+
wheel==0.45.1
|
| 378 |
+
zipp==3.19.2
|
wandb/run-20250920_081121-2oxex54w/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-4.18.0-372.9.1.el8.x86_64-x86_64-with-glibc2.28",
|
| 3 |
+
"python": "CPython 3.11.11",
|
| 4 |
+
"startedAt": "2025-09-20T12:11:21.301942Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--model",
|
| 7 |
+
"vllm",
|
| 8 |
+
"--model_args",
|
| 9 |
+
"pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000",
|
| 10 |
+
"--tasks",
|
| 11 |
+
"mgsm_native_cot_ja,mgsm_native_cot_en,mgsm_native_cot_th,mgsm_native_cot_es,mgsm_native_cot_bn,mgsm_native_cot_te,mgsm_native_cot_fr,mgsm_native_cot_zh,mgsm_native_cot_sw,mgsm_native_cot_de,mgsm_native_cot_ru",
|
| 12 |
+
"--batch_size",
|
| 13 |
+
"auto",
|
| 14 |
+
"--apply_chat_template",
|
| 15 |
+
"--output_path",
|
| 16 |
+
"ckpts/rerun",
|
| 17 |
+
"--log_samples",
|
| 18 |
+
"--gen_kwargs",
|
| 19 |
+
"max_gen_toks=20000",
|
| 20 |
+
"--wandb_args=project=MRPO_eval,name=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dir=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,resume=auto"
|
| 21 |
+
],
|
| 22 |
+
"program": "/home/jdhwang/.conda/envs/llm/bin/lm_eval",
|
| 23 |
+
"git": {
|
| 24 |
+
"remote": "[email protected]:jd730/BRIDGE-private.git",
|
| 25 |
+
"commit": "bb8b2be1f7420f9c6a3d65f0eaf3072732d73123"
|
| 26 |
+
},
|
| 27 |
+
"email": "[email protected]",
|
| 28 |
+
"root": "ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309",
|
| 29 |
+
"host": "node1803",
|
| 30 |
+
"executable": "/orcd/home/001/jdhwang/.conda/envs/llm/bin/python",
|
| 31 |
+
"cpu_count": 64,
|
| 32 |
+
"cpu_count_logical": 128,
|
| 33 |
+
"gpu": "NVIDIA H100 80GB HBM3",
|
| 34 |
+
"gpu_count": 2,
|
| 35 |
+
"disk": {
|
| 36 |
+
"/": {
|
| 37 |
+
"total": "464506159104",
|
| 38 |
+
"used": "12268101632"
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"memory": {
|
| 42 |
+
"total": "2163473002496"
|
| 43 |
+
},
|
| 44 |
+
"gpu_nvidia": [
|
| 45 |
+
{
|
| 46 |
+
"name": "NVIDIA H100 80GB HBM3",
|
| 47 |
+
"memoryTotal": "85520809984",
|
| 48 |
+
"cudaCores": 16896,
|
| 49 |
+
"architecture": "Hopper",
|
| 50 |
+
"uuid": "GPU-9a8a7398-f810-a936-a36d-5b9c0b64a09b"
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"name": "NVIDIA H100 80GB HBM3",
|
| 54 |
+
"memoryTotal": "85520809984",
|
| 55 |
+
"cudaCores": 16896,
|
| 56 |
+
"architecture": "Hopper",
|
| 57 |
+
"uuid": "GPU-73b0efed-d92d-d0e8-3c5e-a3122d6e7c1b"
|
| 58 |
+
}
|
| 59 |
+
],
|
| 60 |
+
"cudaVersion": "12.4",
|
| 61 |
+
"slurm": {
|
| 62 |
+
"array_job_id": "4452191",
|
| 63 |
+
"array_task_count": "1",
|
| 64 |
+
"array_task_id": "0",
|
| 65 |
+
"array_task_max": "0",
|
| 66 |
+
"array_task_min": "0",
|
| 67 |
+
"array_task_step": "1",
|
| 68 |
+
"cluster_name": "eofe7",
|
| 69 |
+
"conf": "/etc/slurm/slurm.conf",
|
| 70 |
+
"cpus_on_node": "16",
|
| 71 |
+
"cpus_per_task": "16",
|
| 72 |
+
"gpus_on_node": "2",
|
| 73 |
+
"gtids": "0",
|
| 74 |
+
"job_account": "mit_general",
|
| 75 |
+
"job_cpus_per_node": "16",
|
| 76 |
+
"job_end_time": "1758413466",
|
| 77 |
+
"job_gid": "209655",
|
| 78 |
+
"job_gpus": "1,2",
|
| 79 |
+
"job_id": "4452191",
|
| 80 |
+
"job_name": "eval.sh",
|
| 81 |
+
"job_nodelist": "node1803",
|
| 82 |
+
"job_num_nodes": "1",
|
| 83 |
+
"job_partition": "ou_bcs_normal",
|
| 84 |
+
"job_qos": "normal",
|
| 85 |
+
"job_start_time": "1758370266",
|
| 86 |
+
"job_uid": "209655",
|
| 87 |
+
"job_user": "jdhwang",
|
| 88 |
+
"jobid": "4452191",
|
| 89 |
+
"localid": "0",
|
| 90 |
+
"mem_per_node": "131072",
|
| 91 |
+
"nnodes": "1",
|
| 92 |
+
"nodeid": "0",
|
| 93 |
+
"nodelist": "node1803",
|
| 94 |
+
"nprocs": "1",
|
| 95 |
+
"ntasks": "1",
|
| 96 |
+
"ntasks_per_node": "1",
|
| 97 |
+
"oom_kill_step": "0",
|
| 98 |
+
"prio_process": "0",
|
| 99 |
+
"procid": "0",
|
| 100 |
+
"script_context": "prolog_task",
|
| 101 |
+
"submit_dir": "/orcd/home/002/jdhwang/BRIDGE",
|
| 102 |
+
"submit_host": "orcd-login003.mit.edu",
|
| 103 |
+
"task_pid": "1143610",
|
| 104 |
+
"tasks_per_node": "1",
|
| 105 |
+
"topology_addr": "node1803",
|
| 106 |
+
"topology_addr_pattern": "node",
|
| 107 |
+
"tres_per_task": "cpu=16"
|
| 108 |
+
},
|
| 109 |
+
"writerId": "bdsaggp24nt8kfc8qjgq21gi927g7e3o"
|
| 110 |
+
}
|
wandb/run-20250920_081121-2oxex54w/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"mgsm_native_cot_zh_eval_results":{"path":"media/table/mgsm_native_cot_zh_eval_results_12_2882185f7e816bbcc2fe.table.json","artifact_path":"wandb-client-artifact://nkp1984crhkeh7851auhm8cmiz0c966uge9yj76ipqritrecvc5bfb9c38ag3q5n9lxlxv0a592qujt5hv20d5ou72uzs0da2311ong59bzu3eu2j7sjwoi1ham5i3wi/mgsm_native_cot_zh_eval_results.table.json","_type":"table-file","nrows":500,"size":6816454,"sha256":"2882185f7e816bbcc2fe7db847cf400946e1eecb9c136b26ba49d658f74f6aa1","log_mode":"IMMUTABLE","ncols":8,"_latest_artifact_path":"wandb-client-artifact://kdq56ygszyonuw3iawyllv3myb0yvhb1lelksg4a7blhqf51vl8fzo7pfqegv64u4r6ndaz7pbste9hbc68sir2hvdemy03qgvhjn4ol3q87z73n0lb6yikdy3o1ujb8:latest/mgsm_native_cot_zh_eval_results.table.json"},"mgsm_native_cot_th_eval_results":{"_type":"table-file","sha256":"baa86a95f601e3c721a0fbcf0ee7623b3d2011e2c01da8b0c3cb2d98b16684ac","size":5007472,"path":"media/table/mgsm_native_cot_th_eval_results_11_baa86a95f601e3c721a0.table.json","ncols":8,"_latest_artifact_path":"wandb-client-artifact://9yj2698oonww08k7xlrlqosfnpi8bf8whg3mu81535kr3f8sv1lrk187fyckresyat8nemrzjmbs8y8f5xkb6594i7jjen2yaigaf6909w8ptgr8i6hb1efl3y4culsb:latest/mgsm_native_cot_th_eval_results.table.json","log_mode":"IMMUTABLE","nrows":500,"artifact_path":"wandb-client-artifact://gja5by4u29qqcgmr6sb6dwapnkradcp3asz7dfpbn8wh8omw28idhrqljkjewim9009pew6vmw82s1xubz6lr0x7z2ounjv209iivpf93x9ymx5x83ckxj48a44i7xn0/mgsm_native_cot_th_eval_results.table.json"},"mgsm_native_cot_en_eval_results":{"_latest_artifact_path":"wandb-client-artifact://g1upq5u3jz3pag4sh1m0vjv5rmwplu9a9a9abfqhq83nl6kkyphd4joh0hg9zbe5l3ra4qjxygdt7x7m15gnuguznxif28ptxvea75zhbjlmr8m3vfe265a8dttuhpb1:latest/mgsm_native_cot_en_eval_results.table.json","size":5089950,"artifact_path":"wandb-client-artifact://7qqodhesd67ke2alwfwf9m6q5zw8oj0stuwgoszb386l13ofees3uxxwcly1gij11waas038xcn059krmog8cskfh8ywl5r5qc1o06i1ytq6g709pdq5bik1vgggxm8c/mgsm_native_cot_en_eval_results.table.json","nrows":500,"log_mode":"IMMUTABLE","path":"media/table/mgsm_native_cot_en_eval_results_4_82791ccc59d2c61a093c.table.json","_type":"table-file","ncols":8,"sha256":"82791ccc59d2c61a093cbdd818ae3ff870ab3d55a931b224bfbca4751c1765bf"},"mgsm_native_cot_sw/alias":"mgsm_native_cot_sw","mgsm_native_cot_fr/exact_match,strict-match":0.004,"mgsm_native_cot_ja/alias":"mgsm_native_cot_ja","mgsm_native_cot_sw_eval_results":{"nrows":500,"ncols":8,"size":22151199,"sha256":"d79a57dcd801f08109f8b083cce4884bf51b225d8127bc78429cf19d3df17aad","_type":"table-file","_latest_artifact_path":"wandb-client-artifact://uwdhtiin3cgey08x46sll6rbkjr6kz632eelzimhsh1pxqn6vbrkwczotv3aj0qfan0xj5kffsltbfe2sbqpljvcyl9dqefjwmggqf9tx47zzzeqwdnfw4lkruafdn07:latest/mgsm_native_cot_sw_eval_results.table.json","path":"media/table/mgsm_native_cot_sw_eval_results_9_d79a57dcd801f08109f8.table.json","artifact_path":"wandb-client-artifact://bbvbahu84etz52vzztm3c6g9ifdlaksmuq7b0ck0nhzo9x19db225j1npqxx09xekaa48m3yu9rsyl247urothkf4emwf6087de2miq7cb7i5z5fba29f6b6vcio5iaw/mgsm_native_cot_sw_eval_results.table.json","log_mode":"IMMUTABLE"},"mgsm_native_cot_te/exact_match,flexible-extract":0.12,"mgsm_native_cot_ja_eval_results":{"_type":"table-file","artifact_path":"wandb-client-artifact://iwu92mws1e8f263xazaxilfbyhxdxe7hpkir0numy6cug9tf3r48relnykckfq0j3aii4hbodiln44j4iqo7fexjvnhuano4k9rh1t048hfrg7eh4lo7p6bc5x5ec8l9/mgsm_native_cot_ja_eval_results.table.json","ncols":8,"path":"media/table/mgsm_native_cot_ja_eval_results_7_098df869a17abf277e8c.table.json","log_mode":"IMMUTABLE","_latest_artifact_path":"wandb-client-artifact://f5tg80y2zzbku7qkoirlmlhz7d02n91o6hzsfj91cws7t60gt015qockdoscq97c4gcpn4sm3ggetskv44zqylmqai9i2j5f1no6s2eanygyakr7z04e9g4pqbwq82v2:latest/mgsm_native_cot_ja_eval_results.table.json","size":19900362,"nrows":500,"sha256":"098df869a17abf277e8cf3b5d4966de124ba0fed82b267159ef0c1d32c7f38a2"},"mgsm_native_cot_sw/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_fr/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_th/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_de/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_th/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_te/alias":"mgsm_native_cot_te","mgsm_native_cot_zh/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_ja/exact_match,strict-match":0.008,"mgsm_native_cot_zh/alias":"mgsm_native_cot_zh","mgsm_native_cot_es/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_ru_eval_results":{"_type":"table-file","sha256":"19e62e412810312c2375525d388b308579f502daf4537e08ea3e97b90b3ef016","_latest_artifact_path":"wandb-client-artifact://ssphf9mv1o6gb0j85z3gfrhwargykdb0luavoyk0pha4wg7ovjkne5oln8tfzygwi6tfmuvgu0fdjl2noewx4kvqtjovdj87tt3vf6tnucvvookj2m9skxydcb1q3ikr:latest/mgsm_native_cot_ru_eval_results.table.json","nrows":500,"log_mode":"IMMUTABLE","artifact_path":"wandb-client-artifact://nju8zlsx5likjjdiq9ix9kpfbd128ffhidfei5n1o9ns883akydn3oe0edbq588qwtck6y53yar6vqg4ghqytfvb2udoxiaj3jqdspbze43bveqr2untjk3r5h8kov6v/mgsm_native_cot_ru_eval_results.table.json","ncols":8,"path":"media/table/mgsm_native_cot_ru_eval_results_8_19e62e412810312c2375.table.json","size":13109180},"mgsm_native_cot_th/alias":"mgsm_native_cot_th","mgsm_native_cot_te/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_fr/alias":"mgsm_native_cot_fr","mgsm_native_cot_ru/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_ja/exact_match,flexible-extract":0.644,"_timestamp":1.7583755761531694e+09,"mgsm_native_cot_es/exact_match,flexible-extract":0.78,"mgsm_native_cot_bn_eval_results":{"size":16518181,"sha256":"f5e72dc1f8666e2a7b2dd420223eda667dbec11d2c10570f946fcc38e857476a","_latest_artifact_path":"wandb-client-artifact://yr3i8r2cgym32wmpmzdoi89agvfbqf8rkmcw5czgif8twgbpd2ifhdzum1b0720p6gh46uxwok7zj56ycgodgqetjrxiin5dio9y4ubjuzh8wprfviboyo88it4fowsi:latest/mgsm_native_cot_bn_eval_results.table.json","path":"media/table/mgsm_native_cot_bn_eval_results_2_f5e72dc1f8666e2a7b2d.table.json","nrows":500,"_type":"table-file","ncols":8,"artifact_path":"wandb-client-artifact://7pfp3stdhg0ptnttd5vi0di3j2rpm43qn1obywl8v754gx2yqso9s3qk71no2ddq1zu4czdztrn8shcqof2idvturotswrgh2ohkliv477lbym5tnu7mg2eksb5hgniw/mgsm_native_cot_bn_eval_results.table.json","log_mode":"IMMUTABLE"},"_step":12,"mgsm_native_cot_sw/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_en/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_sw/exact_match,strict-match":0,"mgsm_native_cot_bn/exact_match,flexible-extract":0.528,"mgsm_native_cot_zh/exact_match,flexible-extract":0.784,"mgsm_native_cot_es_eval_results":{"nrows":500,"log_mode":"IMMUTABLE","sha256":"db5ed66b097e218c4da6ade4820a3b57500460332d23695a9d082453f9eaef4e","path":"media/table/mgsm_native_cot_es_eval_results_5_db5ed66b097e218c4da6.table.json","size":8588068,"_type":"table-file","_latest_artifact_path":"wandb-client-artifact://ux5vlh8wacyfuof7pc4t55agsyg3nwzac4w6xfmjhi2j5z63gc767hzmqva63i1z2shq9549vn7ircit5e7xf1heikgjioppjtr6mic5petyfizupkh85owvqmlahyde:latest/mgsm_native_cot_es_eval_results.table.json","artifact_path":"wandb-client-artifact://3ez40i75t7j3zaj3xnzyhfqq39henad90lipliml5f3bcb1h4b9yreifkue9u5zfzv360fvrug7fiotercqa4ik31tivrzppgrqr86mm1isvaggoe1mlmchqhpmtl8k9/mgsm_native_cot_es_eval_results.table.json","ncols":8},"mgsm_native_cot_zh/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_te_eval_results":{"_type":"table-file","_latest_artifact_path":"wandb-client-artifact://3wvd0feekbllytg5c07f216mnn6ugilhacj5j353flytf0xzchfj577v6n6n3wms0wdkeamvey7fukilst8v5zvf7jzzof8snmqjy0ikvysqf8nvaelhir90xv3xfvxq:latest/mgsm_native_cot_te_eval_results.table.json","path":"media/table/mgsm_native_cot_te_eval_results_10_c0fc0c81b8662e04cc0e.table.json","artifact_path":"wandb-client-artifact://wu0kyoyi54m51v0k8rl2digz31m6mfp2r40fyzjzjr1ruo0fp8df41iqiin5ieeayd14bj5s3ete22cd5qg0jjmve9et5xjxw6z05pltmxvmlam7rmr9o6y8p7epty38/mgsm_native_cot_te_eval_results.table.json","log_mode":"IMMUTABLE","size":23468672,"ncols":8,"sha256":"c0fc0c81b8662e04cc0e8fc30ac6053adcf13bd384f959eee54fbeb5039dbc01","nrows":500},"mgsm_native_cot_th/exact_match,flexible-extract":0.652,"mgsm_native_cot_de/alias":"mgsm_native_cot_de","mgsm_native_cot_ru/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_ru/exact_match,strict-match":0.004,"mgsm_native_cot_en/exact_match,strict-match":0,"mgsm_native_cot_de_eval_results":{"_latest_artifact_path":"wandb-client-artifact://l92nyplmx9l83jo3sw80tinrvnfoko7v1kc02phiy9h46b3v5ls9e1gbvy8xjjmgs8eczamzmsr9v54g7s4m2b5olhevlttcz98hb61gq72gk4fj9bv71ge8da91qxkn:latest/mgsm_native_cot_de_eval_results.table.json","_type":"table-file","artifact_path":"wandb-client-artifact://sy5rdhvvobn3pshimnyuxyo8gj1riw214tz1yc5vnrt1lnown73yh4fjl37mecgdhlv3g91dmcbxb7sk4hicr06s8aowfno29bxcj0vxl4wbym0kdtrmpa3zwjkev27b/mgsm_native_cot_de_eval_results.table.json","sha256":"fabaf33255f24add59c137ef77a33afd9e00d9d17b451c92e556f9ab861bb2ad","log_mode":"IMMUTABLE","ncols":8,"size":3328757,"path":"media/table/mgsm_native_cot_de_eval_results_3_fabaf33255f24add59c1.table.json","nrows":500},"mgsm_native_cot_te/exact_match,strict-match":0.02,"mgsm_native_cot_en/exact_match,flexible-extract":0.872,"mgsm_native_cot_ru/exact_match,flexible-extract":0.832,"mgsm_native_cot_te/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_ja/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_fr_eval_results":{"size":3273008,"nrows":500,"log_mode":"IMMUTABLE","_latest_artifact_path":"wandb-client-artifact://klcjxv56dck9r5v7yixxzhs1vkalztbzpxi2g32kee1ieaqwpp5xzehy3b0it5z25qexpkvrivqefau0xsyre0j1hrw6lcuqz15w18v3t2ogsykczvkqev3ogwsrwa6k:latest/mgsm_native_cot_fr_eval_results.table.json","artifact_path":"wandb-client-artifact://digdeal9nyp9dh9ge1fe04twhqx7zqfin73cuhsx7f5x1c3sg1hvv27v5bv7kap7hntudd9ptp6i9d1laldsfty5xyf64eed1mj07e1cq9ekpkbt59fjzsgj5qdn5kqf/mgsm_native_cot_fr_eval_results.table.json","ncols":8,"sha256":"214cb1a5a2cdee9f330e1982a6ee87d274f796f5fae1bfd02596f69cf3108b1c","_type":"table-file","path":"media/table/mgsm_native_cot_fr_eval_results_6_214cb1a5a2cdee9f330e.table.json"},"mgsm_native_cot_ja/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_bn/exact_match,strict-match":0,"mgsm_native_cot_th/exact_match,strict-match":0.26,"mgsm_native_cot_es/alias":"mgsm_native_cot_es","mgsm_native_cot_de/exact_match,flexible-extract":0.74,"mgsm_native_cot_en/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_fr/exact_match,flexible-extract":0.76,"mgsm_native_cot_es/exact_match,strict-match":0.008,"mgsm_native_cot_fr/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_de/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_ru/alias":"mgsm_native_cot_ru","_wandb":{"runtime":5296},"_runtime":5296,"mgsm_native_cot_sw/exact_match,flexible-extract":0.008,"evaluation/eval_results":{"sha256":"817b26b9b7489391f4e7629070c960eaee51c5e2fd819c827f94ee7a6945c1cf","_type":"table-file","_latest_artifact_path":"wandb-client-artifact://5qlehutp8s8du57s7goeq5l74fpjy27nr9584dwoqfdcv3y64mc6zenbt2nh17yqiysjti7pjfebr8yi9e9ueixtoy0so8e4qul4atn4ypuw7rhmbefsvqoqtk4aezed:latest/evaluation/eval_results.table.json","size":1863,"artifact_path":"wandb-client-artifact://uo3la8vmc83mzok4j4viu8j3yxbp5ygjzktfg1w124h49nvkq4fj5puf329sx4e79ecqrd2pqncuh050bnum14gysm4ets9wq7om8dk0c57etth40kwl2wquof82a1o5/evaluation/eval_results.table.json","log_mode":"IMMUTABLE","path":"media/table/evaluation/eval_results_1_817b26b9b7489391f4e7.table.json","nrows":22,"ncols":7},"mgsm_native_cot_bn/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_bn/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_de/exact_match,strict-match":0,"mgsm_native_cot_zh/exact_match,strict-match":0,"mgsm_native_cot_bn/alias":"mgsm_native_cot_bn","mgsm_native_cot_es/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_en/alias":"mgsm_native_cot_en"}
|
wandb/run-20250920_081121-2oxex54w/logs/debug-core.log
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-20T08:11:21.452750753-04:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp3lkn2ocs/port-1143625.txt","pid":1143625,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
|
| 2 |
+
{"time":"2025-09-20T08:11:21.453095559-04:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":1143625}
|
| 3 |
+
{"time":"2025-09-20T08:11:21.453077825-04:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1143625-1143815-3992152877/socket","Net":"unix"}}
|
| 4 |
+
{"time":"2025-09-20T08:11:21.634813033-04:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
|
| 5 |
+
{"time":"2025-09-20T08:11:21.640098784-04:00","level":"INFO","msg":"handleInformInit: received","streamId":"2oxex54w","id":"1(@)"}
|
| 6 |
+
{"time":"2025-09-20T08:11:21.866468885-04:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"2oxex54w","id":"1(@)"}
|
| 7 |
+
{"time":"2025-09-20T09:39:39.549058154-04:00","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"2oxex54w","id":"1(@)"}
|
| 8 |
+
{"time":"2025-09-20T09:39:39.567640542-04:00","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"2oxex54w","id":"1(@)"}
|
| 9 |
+
{"time":"2025-09-20T09:39:40.549091059-04:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
|
| 10 |
+
{"time":"2025-09-20T09:39:40.549121192-04:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
|
| 11 |
+
{"time":"2025-09-20T09:39:40.549126164-04:00","level":"INFO","msg":"server is shutting down"}
|
| 12 |
+
{"time":"2025-09-20T09:39:40.549132677-04:00","level":"INFO","msg":"connection: closing","id":"1(@)"}
|
| 13 |
+
{"time":"2025-09-20T09:39:40.549192283-04:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-1143625-1143815-3992152877/socket","Net":"unix"}}
|
| 14 |
+
{"time":"2025-09-20T09:39:40.54921864-04:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
|
| 15 |
+
{"time":"2025-09-20T09:39:40.549221165-04:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
|
| 16 |
+
{"time":"2025-09-20T09:39:40.549224135-04:00","level":"INFO","msg":"server is closed"}
|
wandb/run-20250920_081121-2oxex54w/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-20T08:11:21.640622676-04:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
|
| 2 |
+
{"time":"2025-09-20T08:11:21.866445386-04:00","level":"INFO","msg":"stream: created new stream","id":"2oxex54w"}
|
| 3 |
+
{"time":"2025-09-20T08:11:21.866465564-04:00","level":"INFO","msg":"stream: started","id":"2oxex54w"}
|
| 4 |
+
{"time":"2025-09-20T08:11:21.866482696-04:00","level":"INFO","msg":"writer: Do: started","stream_id":"2oxex54w"}
|
| 5 |
+
{"time":"2025-09-20T08:11:21.866483998-04:00","level":"INFO","msg":"handler: started","stream_id":"2oxex54w"}
|
| 6 |
+
{"time":"2025-09-20T08:11:21.866482679-04:00","level":"INFO","msg":"sender: started","stream_id":"2oxex54w"}
|
| 7 |
+
{"time":"2025-09-20T08:11:22.258337567-04:00","level":"WARN","msg":"handleCodeSave: program relative path is empty"}
|
| 8 |
+
{"time":"2025-09-20T08:11:22.258646248-04:00","level":"ERROR","msg":"git repo not found","error":"repository does not exist"}
|
| 9 |
+
{"time":"2025-09-20T09:39:39.399281107-04:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 10 |
+
{"time":"2025-09-20T09:39:39.487606624-04:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading summary, console lines 184-211","runtime_seconds":0.088083458}],"total_operations":1}}
|
| 11 |
+
{"time":"2025-09-20T09:39:39.549286458-04:00","level":"INFO","msg":"stream: closing","id":"2oxex54w"}
|
| 12 |
+
{"time":"2025-09-20T09:39:39.549294944-04:00","level":"INFO","msg":"handler: closed","stream_id":"2oxex54w"}
|
| 13 |
+
{"time":"2025-09-20T09:39:39.549300294-04:00","level":"INFO","msg":"writer: Close: closed","stream_id":"2oxex54w"}
|
| 14 |
+
{"time":"2025-09-20T09:39:39.549303318-04:00","level":"INFO","msg":"sender: closed","stream_id":"2oxex54w"}
|
| 15 |
+
{"time":"2025-09-20T09:39:39.549556658-04:00","level":"INFO","msg":"stream: closed","id":"2oxex54w"}
|
wandb/run-20250920_081121-2oxex54w/logs/debug.log
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-09-20 08:11:21,421 INFO MainThread:1143625 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
|
| 2 |
+
2025-09-20 08:11:21,422 INFO MainThread:1143625 [wandb_setup.py:_flush():80] Configure stats pid to 1143625
|
| 3 |
+
2025-09-20 08:11:21,422 INFO MainThread:1143625 [wandb_setup.py:_flush():80] Loading settings from /home/jdhwang/.config/wandb/settings
|
| 4 |
+
2025-09-20 08:11:21,422 INFO MainThread:1143625 [wandb_setup.py:_flush():80] Loading settings from /orcd/home/002/jdhwang/BRIDGE/wandb/settings
|
| 5 |
+
2025-09-20 08:11:21,422 INFO MainThread:1143625 [wandb_setup.py:_flush():80] Loading settings from environment variables
|
| 6 |
+
2025-09-20 08:11:21,422 INFO MainThread:1143625 [wandb_init.py:setup_run_log_directory():703] Logging user logs to ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/run-20250920_081121-2oxex54w/logs/debug.log
|
| 7 |
+
2025-09-20 08:11:21,422 INFO MainThread:1143625 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/run-20250920_081121-2oxex54w/logs/debug-internal.log
|
| 8 |
+
2025-09-20 08:11:21,422 INFO MainThread:1143625 [wandb_init.py:init():830] calling init triggers
|
| 9 |
+
2025-09-20 08:11:21,422 INFO MainThread:1143625 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-09-20 08:11:21,422 INFO MainThread:1143625 [wandb_init.py:init():871] starting backend
|
| 12 |
+
2025-09-20 08:11:21,634 INFO MainThread:1143625 [wandb_init.py:init():874] sending inform_init request
|
| 13 |
+
2025-09-20 08:11:21,638 INFO MainThread:1143625 [wandb_init.py:init():882] backend started and connected
|
| 14 |
+
2025-09-20 08:11:21,639 INFO MainThread:1143625 [wandb_init.py:init():953] updated telemetry
|
| 15 |
+
2025-09-20 08:11:21,655 INFO MainThread:1143625 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
|
| 16 |
+
2025-09-20 08:11:22,257 INFO MainThread:1143625 [wandb_init.py:init():1029] starting run threads in backend
|
| 17 |
+
2025-09-20 08:11:22,623 INFO MainThread:1143625 [wandb_run.py:_console_start():2458] atexit reg
|
| 18 |
+
2025-09-20 08:11:22,623 INFO MainThread:1143625 [wandb_run.py:_redirect():2306] redirect: wrap_raw
|
| 19 |
+
2025-09-20 08:11:22,623 INFO MainThread:1143625 [wandb_run.py:_redirect():2375] Wrapping output streams.
|
| 20 |
+
2025-09-20 08:11:22,623 INFO MainThread:1143625 [wandb_run.py:_redirect():2398] Redirects installed.
|
| 21 |
+
2025-09-20 08:11:22,625 INFO MainThread:1143625 [wandb_init.py:init():1075] run started, returning control to user process
|
| 22 |
+
2025-09-20 09:39:29,566 INFO MainThread:1143625 [wandb_run.py:_config_callback():1363] config_cb None None {'task_configs': {'mgsm_native_cot_bn': {'task': 'mgsm_native_cot_bn', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'bn', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nধাপে ধাপে উত্তর:"}}{% else %}{{"প্রশ্ন: "+question+"\\nধাপে ধাপে উত্তর:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[17:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['প্রশ্ন:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'The answer is (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_de': {'task': 'mgsm_native_cot_de', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'de', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nSchritt-für-Schritt-Antwort:"}}{% else %}{{"Frage: "+question+"\\nSchritt-für-Schritt-Antwort:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[29:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['Frage:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'Die Antwort lautet (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_en': {'task': 'mgsm_native_cot_en', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'en', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\\nStep-by-Step Answer:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['Question:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'The answer is (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_es': {'task': 'mgsm_native_cot_es', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'es', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nRespuesta paso a paso:"}}{% else %}{{"Pregunta: "+question+"\\nRespuesta paso a paso:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[23:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['Pregunta:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'La respuesta es (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_fr': {'task': 'mgsm_native_cot_fr', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'fr', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nRéponse étape par étape :"}}{% else %}{{"Question : "+question+"\\nRéponse étape par étape :"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[26:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['Question :', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'La réponse est (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_ja': {'task': 'mgsm_native_cot_ja', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'ja', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nステップごとの答え:"}}{% else %}{{"問題: "+question+"\\nステップごとの答え:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[11:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['問題:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': '答えは(\\-?[0-9\\.\\,]+)です。'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_ru': {'task': 'mgsm_native_cot_ru', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'ru', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nПошаговоерешение:"}}{% else %}{{"Задача: "+question+"\\nПошаговоерешение:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[18:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['Задача:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'Ответ — (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_sw': {'task': 'mgsm_native_cot_sw', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'sw', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nJibu la Hatua kwa Hatua:"}}{% else %}{{"Swali: "+question+"\\nJibu la Hatua kwa Hatua:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[25:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['Swali:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'Jibu ni (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_te': {'task': 'mgsm_native_cot_te', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'te', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nదశలవారీగా సమాధానం:"}}{% else %}{{"ప్రశ్న: "+question+"\\nదశలవారీగా సమాధానం:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[19:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['ప్రశ్న:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'సమాధానం (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_th': {'task': 'mgsm_native_cot_th', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'th', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nคำตอบทีละขั้นตอน:"}}{% else %}{{"โจทย์: "+question+"\\nคำตอบทีละขั้นตอน:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[18:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['โจทย์:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'คำตอบคือ (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_zh': {'task': 'mgsm_native_cot_zh', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'zh', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\n逐步解答:"}}{% else %}{{"问题: "+question+"\\n逐步解答:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[6:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['问题:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': '答案是 (\\-?[0-9\\.\\,]+)。'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}}, 'cli_configs': {'model': 'vllm', 'model_args': 'pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000', 'batch_size': 'auto', 'batch_sizes': [], 'device': None, 'use_cache': None, 'limit': None, 'bootstrap_iters': 0, 'gen_kwargs': {'max_gen_toks': 20000}, 'random_seed': 0, 'numpy_seed': 1234, 'torch_seed': 1234, 'fewshot_seed': 1234}}
|
| 23 |
+
2025-09-20 09:39:38,409 INFO MainThread:1143625 [wandb_run.py:_finish():2224] finishing run jdhwang/MRPO_eval/2oxex54w
|
| 24 |
+
2025-09-20 09:39:38,409 INFO MainThread:1143625 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
|
| 25 |
+
2025-09-20 09:39:38,486 INFO MainThread:1143625 [wandb_run.py:_restore():2405] restore
|
| 26 |
+
2025-09-20 09:39:38,486 INFO MainThread:1143625 [wandb_run.py:_restore():2411] restore done
|
| 27 |
+
2025-09-20 09:39:39,547 INFO MainThread:1143625 [wandb_run.py:_footer_history_summary_info():3903] rendering history
|
| 28 |
+
2025-09-20 09:39:39,548 INFO MainThread:1143625 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
|
| 29 |
+
2025-09-20 09:39:39,548 INFO MainThread:1143625 [wandb_run.py:_footer_sync_info():3864] logging synced files
|
wandb/run-20250920_081121-2oxex54w/run-2oxex54w.wandb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07a3dd0ca1cf928a27f52210df217c77c140eaef367b9cde8b60d69e33e2857d
|
| 3 |
+
size 929156
|
wandb/run-20250921_062002-cothceaw/files/config.yaml
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_wandb:
|
| 2 |
+
value:
|
| 3 |
+
cli_version: 0.21.0
|
| 4 |
+
e:
|
| 5 |
+
b4tnkkvikp4ud9d1f7t2v1f37ei0r2ow:
|
| 6 |
+
args:
|
| 7 |
+
- --model
|
| 8 |
+
- vllm
|
| 9 |
+
- --model_args
|
| 10 |
+
- pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000
|
| 11 |
+
- --tasks
|
| 12 |
+
- mmlu_prox_ar_math,mmlu_prox_bn_math
|
| 13 |
+
- --batch_size
|
| 14 |
+
- auto
|
| 15 |
+
- --apply_chat_template
|
| 16 |
+
- --output_path
|
| 17 |
+
- ckpts/rerun
|
| 18 |
+
- --log_samples
|
| 19 |
+
- --gen_kwargs
|
| 20 |
+
- max_gen_toks=20000
|
| 21 |
+
- --wandb_args=project=MRPO_eval,name=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dir=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,resume=auto
|
| 22 |
+
cpu_count: 64
|
| 23 |
+
cpu_count_logical: 128
|
| 24 |
+
cudaVersion: "12.4"
|
| 25 |
+
disk:
|
| 26 |
+
/:
|
| 27 |
+
total: "464506159104"
|
| 28 |
+
used: "12265783296"
|
| 29 |
+
email: [email protected]
|
| 30 |
+
executable: /orcd/home/001/jdhwang/.conda/envs/llm/bin/python
|
| 31 |
+
git:
|
| 32 |
+
commit: 57228cdf2aa2656e94d94dc3f5530986c0f48545
|
| 33 |
+
remote: [email protected]:jd730/BRIDGE-private.git
|
| 34 |
+
gpu: NVIDIA H100 80GB HBM3
|
| 35 |
+
gpu_count: 2
|
| 36 |
+
gpu_nvidia:
|
| 37 |
+
- architecture: Hopper
|
| 38 |
+
cudaCores: 16896
|
| 39 |
+
memoryTotal: "85520809984"
|
| 40 |
+
name: NVIDIA H100 80GB HBM3
|
| 41 |
+
uuid: GPU-9a8a7398-f810-a936-a36d-5b9c0b64a09b
|
| 42 |
+
- architecture: Hopper
|
| 43 |
+
cudaCores: 16896
|
| 44 |
+
memoryTotal: "85520809984"
|
| 45 |
+
name: NVIDIA H100 80GB HBM3
|
| 46 |
+
uuid: GPU-73b0efed-d92d-d0e8-3c5e-a3122d6e7c1b
|
| 47 |
+
host: node1803
|
| 48 |
+
memory:
|
| 49 |
+
total: "2163473002496"
|
| 50 |
+
os: Linux-4.18.0-372.9.1.el8.x86_64-x86_64-with-glibc2.28
|
| 51 |
+
program: /home/jdhwang/.conda/envs/llm/bin/lm_eval
|
| 52 |
+
python: CPython 3.11.11
|
| 53 |
+
root: ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309
|
| 54 |
+
slurm:
|
| 55 |
+
array_job_id: "4507342"
|
| 56 |
+
array_task_count: "8"
|
| 57 |
+
array_task_id: "0"
|
| 58 |
+
array_task_max: "7"
|
| 59 |
+
array_task_min: "0"
|
| 60 |
+
array_task_step: "1"
|
| 61 |
+
cluster_name: eofe7
|
| 62 |
+
conf: /etc/slurm/slurm.conf
|
| 63 |
+
cpus_on_node: "16"
|
| 64 |
+
cpus_per_task: "16"
|
| 65 |
+
gpus_on_node: "2"
|
| 66 |
+
gtids: "0"
|
| 67 |
+
job_account: mit_general
|
| 68 |
+
job_cpus_per_node: "16"
|
| 69 |
+
job_end_time: "1758493186"
|
| 70 |
+
job_gid: "209655"
|
| 71 |
+
job_gpus: 1,2
|
| 72 |
+
job_id: "4535465"
|
| 73 |
+
job_name: mmlu_prox.sh
|
| 74 |
+
job_nodelist: node1803
|
| 75 |
+
job_num_nodes: "1"
|
| 76 |
+
job_partition: ou_bcs_low
|
| 77 |
+
job_qos: normal
|
| 78 |
+
job_start_time: "1758449986"
|
| 79 |
+
job_uid: "209655"
|
| 80 |
+
job_user: jdhwang
|
| 81 |
+
jobid: "4535465"
|
| 82 |
+
localid: "0"
|
| 83 |
+
mem_per_node: "131072"
|
| 84 |
+
nnodes: "1"
|
| 85 |
+
nodeid: "0"
|
| 86 |
+
nodelist: node1803
|
| 87 |
+
nprocs: "1"
|
| 88 |
+
ntasks: "1"
|
| 89 |
+
ntasks_per_node: "1"
|
| 90 |
+
oom_kill_step: "0"
|
| 91 |
+
prio_process: "0"
|
| 92 |
+
procid: "0"
|
| 93 |
+
script_context: prolog_task
|
| 94 |
+
submit_dir: /orcd/home/002/jdhwang/BRIDGE
|
| 95 |
+
submit_host: orcd-login003.mit.edu
|
| 96 |
+
task_pid: "1320304"
|
| 97 |
+
tasks_per_node: "1"
|
| 98 |
+
topology_addr: node1803
|
| 99 |
+
topology_addr_pattern: node
|
| 100 |
+
tres_per_task: cpu=16
|
| 101 |
+
startedAt: "2025-09-21T10:20:02.294501Z"
|
| 102 |
+
writerId: b4tnkkvikp4ud9d1f7t2v1f37ei0r2ow
|
| 103 |
+
m: []
|
| 104 |
+
python_version: 3.11.11
|
| 105 |
+
t:
|
| 106 |
+
"1":
|
| 107 |
+
- 1
|
| 108 |
+
- 5
|
| 109 |
+
- 11
|
| 110 |
+
- 30
|
| 111 |
+
- 41
|
| 112 |
+
- 49
|
| 113 |
+
- 51
|
| 114 |
+
- 53
|
| 115 |
+
- 71
|
| 116 |
+
- 95
|
| 117 |
+
- 98
|
| 118 |
+
- 100
|
| 119 |
+
- 105
|
| 120 |
+
"2":
|
| 121 |
+
- 1
|
| 122 |
+
- 5
|
| 123 |
+
- 11
|
| 124 |
+
- 30
|
| 125 |
+
- 41
|
| 126 |
+
- 49
|
| 127 |
+
- 51
|
| 128 |
+
- 53
|
| 129 |
+
- 71
|
| 130 |
+
- 95
|
| 131 |
+
- 98
|
| 132 |
+
- 100
|
| 133 |
+
- 105
|
| 134 |
+
"3":
|
| 135 |
+
- 2
|
| 136 |
+
- 13
|
| 137 |
+
- 62
|
| 138 |
+
"4": 3.11.11
|
| 139 |
+
"5": 0.21.0
|
| 140 |
+
"6": 4.51.3
|
| 141 |
+
"12": 0.21.0
|
| 142 |
+
"13": linux-x86_64
|
| 143 |
+
cli_configs:
|
| 144 |
+
value:
|
| 145 |
+
batch_size: auto
|
| 146 |
+
batch_sizes: []
|
| 147 |
+
bootstrap_iters: 0
|
| 148 |
+
device: null
|
| 149 |
+
fewshot_seed: 1234
|
| 150 |
+
gen_kwargs:
|
| 151 |
+
max_gen_toks: 20000
|
| 152 |
+
limit: null
|
| 153 |
+
model: vllm
|
| 154 |
+
model_args: pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000
|
| 155 |
+
numpy_seed: 1234
|
| 156 |
+
random_seed: 0
|
| 157 |
+
torch_seed: 1234
|
| 158 |
+
use_cache: null
|
| 159 |
+
task_configs:
|
| 160 |
+
value:
|
| 161 |
+
mmlu_prox_ar_math:
|
| 162 |
+
dataset_name: ar
|
| 163 |
+
dataset_path: li-lab/MMLU-ProX
|
| 164 |
+
description: |
|
| 165 |
+
فيما يلي أسئلة اختيار من متعدد (مع إجابات) حول الرياضيات. فكر خطوة بخطوة ثم أنهِ إجابتك بـ 'الإجابة هي (X)' حيث X هو حرف الخيار الصحيح.
|
| 166 |
+
doc_to_target: answer
|
| 167 |
+
doc_to_text: functools.partial(<function format_cot_example at 0x14d51fcc0b80>, including_answer=False)
|
| 168 |
+
fewshot_config:
|
| 169 |
+
doc_to_target: ""
|
| 170 |
+
doc_to_text: functools.partial(<function format_cot_example at 0x14d51fcc0e00>, including_answer=True)
|
| 171 |
+
sampler: first_n
|
| 172 |
+
fewshot_delimiter: |4+
|
| 173 |
+
|
| 174 |
+
fewshot_split: validation
|
| 175 |
+
filter_list:
|
| 176 |
+
- filter:
|
| 177 |
+
- function: regex
|
| 178 |
+
regex_pattern: الإجابة هي \(?([ABCDEFGHIJ])\)?
|
| 179 |
+
- function: take_first
|
| 180 |
+
name: custom-extract
|
| 181 |
+
generation_kwargs:
|
| 182 |
+
do_sample: false
|
| 183 |
+
max_gen_toks: 20000
|
| 184 |
+
temperature: 0
|
| 185 |
+
until:
|
| 186 |
+
- </s>
|
| 187 |
+
- 'Q:'
|
| 188 |
+
- 'سؤال:'
|
| 189 |
+
- <|im_end|>
|
| 190 |
+
metadata:
|
| 191 |
+
version: 0
|
| 192 |
+
metric_list:
|
| 193 |
+
- aggregation: mean
|
| 194 |
+
higher_is_better: true
|
| 195 |
+
ignore_case: true
|
| 196 |
+
ignore_punctuation: true
|
| 197 |
+
metric: exact_match
|
| 198 |
+
num_fewshot: 5
|
| 199 |
+
output_type: generate_until
|
| 200 |
+
process_docs: functools.partial(<function process_docs at 0x14d51fcc0860>, subject='math')
|
| 201 |
+
repeats: 1
|
| 202 |
+
should_decontaminate: false
|
| 203 |
+
target_delimiter: ' '
|
| 204 |
+
task: mmlu_prox_ar_math
|
| 205 |
+
task_alias: math
|
| 206 |
+
test_split: test
|
| 207 |
+
mmlu_prox_bn_math:
|
| 208 |
+
dataset_name: bn
|
| 209 |
+
dataset_path: li-lab/MMLU-ProX
|
| 210 |
+
description: |
|
| 211 |
+
নিম্নলিখিত গণিত সম্পর্কে বহুনির্বাচনী প্রশ্ন (উত্তরসহ)। ধাপে ধাপে চিন্তা করুন এবং তারপর আপনার উত্তর "উত্তর হল (X)" দিয়ে শেষ করুন যেখানে X হল সঠিক বিকল্পের অক্ষর।
|
| 212 |
+
doc_to_target: answer
|
| 213 |
+
doc_to_text: functools.partial(<function format_cot_example at 0x14d51fb7a200>, including_answer=False)
|
| 214 |
+
fewshot_config:
|
| 215 |
+
doc_to_target: ""
|
| 216 |
+
doc_to_text: functools.partial(<function format_cot_example at 0x14d51fb7b740>, including_answer=True)
|
| 217 |
+
sampler: first_n
|
| 218 |
+
fewshot_delimiter: |4+
|
| 219 |
+
|
| 220 |
+
fewshot_split: validation
|
| 221 |
+
filter_list:
|
| 222 |
+
- filter:
|
| 223 |
+
- function: regex
|
| 224 |
+
regex_pattern: উত্তর হল \(?([ABCDEFGHIJ])\)?
|
| 225 |
+
- function: take_first
|
| 226 |
+
name: custom-extract
|
| 227 |
+
generation_kwargs:
|
| 228 |
+
do_sample: false
|
| 229 |
+
max_gen_toks: 20000
|
| 230 |
+
temperature: 0
|
| 231 |
+
until:
|
| 232 |
+
- </s>
|
| 233 |
+
- 'Q:'
|
| 234 |
+
- 'প্রশ্ন:'
|
| 235 |
+
- <|im_end|>
|
| 236 |
+
metadata:
|
| 237 |
+
version: 0
|
| 238 |
+
metric_list:
|
| 239 |
+
- aggregation: mean
|
| 240 |
+
higher_is_better: true
|
| 241 |
+
ignore_case: true
|
| 242 |
+
ignore_punctuation: true
|
| 243 |
+
metric: exact_match
|
| 244 |
+
num_fewshot: 5
|
| 245 |
+
output_type: generate_until
|
| 246 |
+
process_docs: functools.partial(<function process_docs at 0x14d51fb79e40>, subject='math')
|
| 247 |
+
repeats: 1
|
| 248 |
+
should_decontaminate: false
|
| 249 |
+
target_delimiter: ' '
|
| 250 |
+
task: mmlu_prox_bn_math
|
| 251 |
+
task_alias: math
|
| 252 |
+
test_split: test
|
wandb/run-20250921_062002-cothceaw/files/media/table/evaluation/eval_results_1_2a7ec9e10306569eae6e.table.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"columns": ["Tasks", "Version", "Filter", "num_fewshot", "Metric", "Value", "Stderr"], "data": [["mmlu_prox_ar_math", 0.0, "custom-extract", 5, "exact_match", "0.27905255366395265", "N/A"], ["mmlu_prox_bn_math", 0.0, "custom-extract", 5, "exact_match", "0.14433752775721687", "N/A"]]}
|
wandb/run-20250921_062002-cothceaw/files/media/table/mmlu_prox_ar_math_eval_results_2_1d425cdaeb765848e725.table.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d425cdaeb765848e725c89a34944d791bdbf0248c6361032236b97f9611117e
|
| 3 |
+
size 89484352
|
wandb/run-20250921_062002-cothceaw/files/media/table/mmlu_prox_bn_math_eval_results_3_8f734c70b94d2677510f.table.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8f734c70b94d2677510f9200822c1810a65f0546ff35668ea1df59d81ec4ba33
|
| 3 |
+
size 55489730
|
wandb/run-20250921_062002-cothceaw/files/output.log
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-09-21:06:20:03,677 INFO [__main__.py:291] Verbosity set to INFO
|
| 2 |
+
2025-09-21:06:20:37,183 INFO [__init__.py:464] The tag pawsx is already registered as a group, this tag will not be registered. This may affect tasks you want to call.
|
| 3 |
+
2025-09-21:06:20:38,310 INFO [__main__.py:388] Selected Tasks: ['mmlu_prox_ar_math', 'mmlu_prox_bn_math']
|
| 4 |
+
2025-09-21:06:20:38,320 INFO [evaluator.py:161] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234
|
| 5 |
+
2025-09-21:06:20:38,320 WARNING [evaluator.py:172] generation_kwargs specified through cli, these settings will update set parameters in yaml tasks. Ensure 'do_sample=True' for non-greedy decoding!
|
| 6 |
+
2025-09-21:06:20:38,321 INFO [evaluator.py:198] Initializing vllm model, with arguments: {'pretrained': 'ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309', 'dtype': 'auto', 'tensor_parallel_size': 2, 'max_model_len': 20000}
|
| 7 |
+
INFO 09-21 06:20:45 config.py:350] This model supports multiple tasks: {'generate', 'embedding'}. Defaulting to 'generate'.
|
| 8 |
+
INFO 09-21 06:20:45 config.py:1020] Defaulting to use mp for distributed inference
|
| 9 |
+
INFO 09-21 06:20:45 llm_engine.py:249] Initializing an LLM engine (v0.6.4.post1) with config: model='ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309', speculative_config=None, tokenizer='ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=20000, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=2, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=1234, served_model_name=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309, num_scheduler_steps=1, chunked_prefill_enabled=False multi_step_stream_outputs=True, enable_prefix_caching=False, use_async_output_proc=True, use_cached_outputs=False, chat_template_text_format=string, mm_processor_kwargs=None, pooler_config=None)
|
| 10 |
+
WARNING 09-21 06:20:45 multiproc_gpu_executor.py:56] Reducing Torch parallelism from 16 threads to 1 to avoid unnecessary CPU contention. Set OMP_NUM_THREADS in the external environment to tune this value as needed.
|
| 11 |
+
INFO 09-21 06:20:45 custom_cache_manager.py:17] Setting Triton cache manager to: vllm.triton_utils.custom_cache_manager:CustomCacheManager
|
| 12 |
+
INFO 09-21 06:20:45 selector.py:135] Using Flash Attention backend.
|
| 13 |
+
INFO 09-21 06:20:54 utils.py:961] Found nccl from library libnccl.so.2
|
| 14 |
+
INFO 09-21 06:20:54 pynccl.py:69] vLLM is using nccl==2.21.5
|
| 15 |
+
INFO 09-21 06:20:55 custom_all_reduce_utils.py:242] reading GPU P2P access cache from /home/jdhwang/.cache/vllm/gpu_p2p_access_cache_for_0,1.json
|
| 16 |
+
INFO 09-21 06:20:55 shm_broadcast.py:236] vLLM message queue communication handle: Handle(connect_ip='127.0.0.1', local_reader_ranks=[1], buffer=<vllm.distributed.device_communicators.shm_broadcast.ShmRingBuffer object at 0x14d81f47a010>, local_subscribe_port=46667, remote_subscribe_port=None)
|
| 17 |
+
INFO 09-21 06:20:55 model_runner.py:1072] Starting to load model ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309...
|
| 18 |
+
Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
|
| 19 |
+
Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:09<00:28, 9.34s/it]
|
| 20 |
+
Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:09<00:07, 3.97s/it]
|
| 21 |
+
Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:10<00:02, 2.41s/it]
|
| 22 |
+
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00, 1.72s/it]
|
| 23 |
+
Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00, 2.69s/it]
|
| 24 |
+
INFO 09-21 06:21:06 model_runner.py:1077] Loading model weights took 7.1216 GB
|
| 25 |
+
INFO 09-21 06:21:08 worker.py:232] Memory profiling results: total_gpu_memory=79.10GiB initial_memory_usage=8.22GiB peak_torch_memory=8.75GiB memory_usage_post_profile=8.60GiB non_torch_memory=1.44GiB kv_cache_size=60.99GiB gpu_memory_utilization=0.90
|
| 26 |
+
INFO 09-21 06:21:09 distributed_gpu_executor.py:57] # GPU blocks: 142750, # CPU blocks: 9362
|
| 27 |
+
INFO 09-21 06:21:09 distributed_gpu_executor.py:61] Maximum concurrency for 20000 tokens per request: 114.20x
|
| 28 |
+
INFO 09-21 06:21:11 model_runner.py:1400] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
|
| 29 |
+
INFO 09-21 06:21:11 model_runner.py:1404] If out-of-memory error occurs during cudagraph capture, consider decreasing `gpu_memory_utilization` or switching to eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.
|
| 30 |
+
INFO 09-21 06:21:26 custom_all_reduce.py:224] Registering 1995 cuda graph addresses
|
| 31 |
+
INFO 09-21 06:21:27 model_runner.py:1518] Graph capturing finished in 16 secs, took 0.25 GiB
|
| 32 |
+
|
| 33 |
+
Generating validation split: 100%|██████████| 70/70 [00:00<00:00, 2041.89 examples/s]
|
| 34 |
+
Generating test split: 100%|██████████| 11759/11759 [00:00<00:00, 196190.89 examples/s]
|
| 35 |
+
Filter: 100%|██████████| 70/70 [00:00<00:00, 6775.31 examples/s]
|
| 36 |
+
Filter: 100%|██████████| 11759/11759 [00:00<00:00, 78762.47 examples/s]
|
| 37 |
+
Generating validation split: 100%|██████████| 70/70 [00:00<00:00, 20925.19 examples/s]
|
| 38 |
+
Generating test split: 100%|██████████| 11759/11759 [00:00<00:00, 144140.86 examples/s]
|
| 39 |
+
Filter: 100%|██████████| 70/70 [00:00<00:00, 17085.74 examples/s]
|
| 40 |
+
Filter: 100%|██████████| 11759/11759 [00:00<00:00, 72996.18 examples/s]
|
| 41 |
+
2025-09-21:06:21:32,266 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
|
| 42 |
+
2025-09-21:06:21:32,266 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
|
| 43 |
+
2025-09-21:06:21:32,267 INFO [task.py:430] Building contexts for mmlu_prox_bn_math on rank 0...
|
| 44 |
+
100%|██████████| 1351/1351 [00:01<00:00, 1268.23it/s]
|
| 45 |
+
2025-09-21:06:21:33,474 INFO [task.py:430] Building contexts for mmlu_prox_ar_math on rank 0...
|
| 46 |
+
100%|██████████| 1351/1351 [00:01<00:00, 1264.36it/s]
|
| 47 |
+
2025-09-21:06:21:34,673 INFO [evaluator.py:495] Running generate_until requests
|
| 48 |
+
Processed prompts: 100%|██████████| 1351/1351 [34:54<00:00, 1.55s/it, est. speed input: 2878.49 toks/s, output: 3301.25 toks/s]
|
| 49 |
+
final processing: 1351it [00:00, 213146.69it/s] | 1/2702 [34:55<1571:56:02, 2095.14s/it] 2874.66 toks/s, output: 3286.44 toks/s]
|
| 50 |
+
WARNING 09-21 06:26:49 scheduler.py:1481] Sequence group 627 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=1
|
| 51 |
+
WARNING 09-21 06:28:23 scheduler.py:1481] Sequence group 523 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=51
|
| 52 |
+
WARNING 09-21 06:31:28 scheduler.py:1481] Sequence group 393 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=101
|
| 53 |
+
WARNING 09-21 06:41:11 scheduler.py:1481] Sequence group 908 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=151
|
| 54 |
+
Processed prompts: 100%|██████████| 1351/1351 [43:26<00:00, 1.93s/it, est. speed input: 1088.08 toks/s, output: 2998.64 toks/s]
|
| 55 |
+
final processing: 1351it [00:00, 191818.31it/s] | 1352/2702 [1:18:25<1:07:51, 3.02s/it] 1088.08 toks/s, output: 2998.64 toks/s]
|
| 56 |
+
WARNING 09-21 07:04:02 scheduler.py:1481] Sequence group 2127 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=201
|
| 57 |
+
WARNING 09-21 07:06:42 scheduler.py:1481] Sequence group 1934 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=251
|
| 58 |
+
WARNING 09-21 07:12:57 scheduler.py:1481] Sequence group 1761 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=301
|
| 59 |
+
Running generate_until requests: 100%|██████████| 2702/2702 [1:18:25<00:00, 1.74s/it]
|
| 60 |
+
INFO 09-21 07:40:13 multiproc_worker_utils.py:133] Terminating local vLLM worker processes
|
| 61 |
+
wandb: WARNING Serializing object of type str that is 104466 bytes
|
| 62 |
+
wandb: WARNING Serializing object of type str that is 102542 bytes
|
| 63 |
+
wandb: WARNING Serializing object of type str that is 107944 bytes
|
| 64 |
+
wandb: WARNING Serializing object of type str that is 103572 bytes
|
| 65 |
+
wandb: WARNING Serializing object of type str that is 105466 bytes
|
| 66 |
+
wandb: WARNING Serializing object of type str that is 100810 bytes
|
| 67 |
+
wandb: WARNING Serializing object of type str that is 100048 bytes
|
| 68 |
+
wandb: WARNING Serializing object of type str that is 106760 bytes
|
| 69 |
+
wandb: WARNING Serializing object of type str that is 100584 bytes
|
| 70 |
+
wandb: WARNING Serializing object of type str that is 103056 bytes
|
| 71 |
+
wandb: WARNING Serializing object of type str that is 102142 bytes
|
| 72 |
+
wandb: WARNING Serializing object of type str that is 101654 bytes
|
| 73 |
+
wandb: WARNING Serializing object of type str that is 105388 bytes
|
| 74 |
+
2025-09-21:07:40:20,195 INFO [evaluation_tracker.py:207] Saving results aggregated
|
| 75 |
+
2025-09-21:07:40:20,372 INFO [evaluation_tracker.py:291] Saving per-sample results for: mmlu_prox_ar_math
|
| 76 |
+
2025-09-21:07:40:20,897 INFO [evaluation_tracker.py:291] Saving per-sample results for: mmlu_prox_bn_math
|
| 77 |
+
vllm (pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000), gen_kwargs: (max_gen_toks=20000), limit: None, num_fewshot: None, batch_size: auto
|
| 78 |
+
|Tasks|Version| Filter |n-shot| Metric | |Value | |Stderr|
|
| 79 |
+
|-----|------:|--------------|-----:|-----------|---|-----:|---|------|
|
| 80 |
+
|math | 0|custom-extract| 5|exact_match|↑ |0.2791|± | N/A|
|
| 81 |
+
|math | 0|custom-extract| 5|exact_match|↑ |0.1443|± | N/A|
|
wandb/run-20250921_062002-cothceaw/files/requirements.txt
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
setproctitle==1.2.2
|
| 2 |
+
colorama==0.4.6
|
| 3 |
+
psutil==7.0.0
|
| 4 |
+
GitPython==3.1.43
|
| 5 |
+
docker-pycreds==0.4.0
|
| 6 |
+
gitdb==4.0.11
|
| 7 |
+
opencv-python==4.10.0.84
|
| 8 |
+
sentry-sdk==2.19.0
|
| 9 |
+
setproctitle==1.3.4
|
| 10 |
+
smmap==5.0.1
|
| 11 |
+
hjson==3.1.0
|
| 12 |
+
deepspeed==0.16.7
|
| 13 |
+
transformers==4.46.1
|
| 14 |
+
transformers==4.54.1
|
| 15 |
+
protobuf==6.31.1
|
| 16 |
+
accelerate==1.6.0
|
| 17 |
+
docopt==0.6.2
|
| 18 |
+
gguf==0.10.0
|
| 19 |
+
webencodings==0.5.1
|
| 20 |
+
pickleshare==0.7.5
|
| 21 |
+
fastjsonschema==2.21.1
|
| 22 |
+
backcall==0.2.0
|
| 23 |
+
tinycss2==1.4.0
|
| 24 |
+
soupsieve==2.7
|
| 25 |
+
pandocfilters==1.5.1
|
| 26 |
+
mistune==3.1.3
|
| 27 |
+
jupyterlab_pygments==0.3.0
|
| 28 |
+
defusedxml==0.7.1
|
| 29 |
+
bleach==6.2.0
|
| 30 |
+
yarg==0.1.9
|
| 31 |
+
ipython==8.12.3
|
| 32 |
+
beautifulsoup4==4.13.4
|
| 33 |
+
nbformat==5.10.4
|
| 34 |
+
nbclient==0.10.2
|
| 35 |
+
nbconvert==7.16.6
|
| 36 |
+
pipreqs==0.5.0
|
| 37 |
+
wandb==0.21.0
|
| 38 |
+
trl==0.17.0
|
| 39 |
+
lm_eval==0.4.4
|
| 40 |
+
langid==1.1.6
|
| 41 |
+
annotated-types==0.7.0
|
| 42 |
+
vllm==0.6.4.post1
|
| 43 |
+
typing-inspection==0.4.1
|
| 44 |
+
xformers==0.0.28.post3
|
| 45 |
+
pydantic_core==2.33.2
|
| 46 |
+
outlines==0.0.46
|
| 47 |
+
pydantic==2.11.7
|
| 48 |
+
compressed-tensors==0.8.0
|
| 49 |
+
click==8.2.1
|
| 50 |
+
lightning-utilities==0.15.0
|
| 51 |
+
torchmetrics==1.8.0
|
| 52 |
+
nvidia-ml-py==13.580.65
|
| 53 |
+
blessed==1.21.0
|
| 54 |
+
gpustat==1.1.1
|
| 55 |
+
nvidia-cufile-cu12==1.13.1.3
|
| 56 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 57 |
+
mpmath==1.3.0
|
| 58 |
+
typing_extensions==4.12.2
|
| 59 |
+
sympy==1.13.1
|
| 60 |
+
pillow==11.0.0
|
| 61 |
+
nvidia-nvtx-cu12==12.4.127
|
| 62 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 63 |
+
nvidia-nccl-cu12==2.21.5
|
| 64 |
+
numpy==2.1.2
|
| 65 |
+
nvidia-curand-cu12==10.3.5.147
|
| 66 |
+
networkx==3.3
|
| 67 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 68 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 69 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 70 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 71 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 72 |
+
MarkupSafe==2.1.5
|
| 73 |
+
setuptools==80.9.0
|
| 74 |
+
certifi==2025.8.3
|
| 75 |
+
einops==0.8.1
|
| 76 |
+
fsspec==2024.6.1
|
| 77 |
+
Jinja2==3.1.4
|
| 78 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 79 |
+
urllib3==2.5.0
|
| 80 |
+
tqdm==4.67.1
|
| 81 |
+
safetensors==0.6.2
|
| 82 |
+
regex==2025.7.34
|
| 83 |
+
PyYAML==6.0.2
|
| 84 |
+
packaging==25.0
|
| 85 |
+
idna==3.10
|
| 86 |
+
filelock==3.13.1
|
| 87 |
+
hf-xet==1.1.8
|
| 88 |
+
torch==2.6.0+cu124
|
| 89 |
+
charset-normalizer==3.4.3
|
| 90 |
+
requests==2.32.5
|
| 91 |
+
huggingface-hub==0.34.4
|
| 92 |
+
torchaudio==2.6.0+cu124
|
| 93 |
+
liger_kernel==0.6.2
|
| 94 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 95 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 96 |
+
torchvision==0.21.0+cu124
|
| 97 |
+
blingfire==0.1.8
|
| 98 |
+
triton==3.1.0
|
| 99 |
+
sentence-transformers==5.1.0
|
| 100 |
+
tabledata==1.3.4
|
| 101 |
+
lxml==5.3.1
|
| 102 |
+
accelerate==1.5.2
|
| 103 |
+
absl-py==2.1.0
|
| 104 |
+
Markdown==3.7
|
| 105 |
+
uvicorn==0.34.0
|
| 106 |
+
ruff==0.11.0
|
| 107 |
+
nvidia-cuda-runtime-cu12==12.4.127
|
| 108 |
+
airportsdata==20250224
|
| 109 |
+
nvidia-cusolver-cu12==11.6.1.9
|
| 110 |
+
astor==0.8.1
|
| 111 |
+
DataProperty==1.1.0
|
| 112 |
+
lm-format-enforcer==0.10.11
|
| 113 |
+
mdurl==0.1.2
|
| 114 |
+
nvidia-nccl-cu12==2.21.5
|
| 115 |
+
tabulate==0.9.0
|
| 116 |
+
python-dotenv==1.0.1
|
| 117 |
+
h5py==3.13.0
|
| 118 |
+
chardet==5.2.0
|
| 119 |
+
cupy-cuda12x==13.4.0
|
| 120 |
+
tiktoken==0.9.0
|
| 121 |
+
jiter==0.8.2
|
| 122 |
+
Pygments==2.19.1
|
| 123 |
+
typing_extensions==4.12.2
|
| 124 |
+
datasets==3.1.0
|
| 125 |
+
zipp==3.21.0
|
| 126 |
+
more-itertools==10.6.0
|
| 127 |
+
MarkupSafe==2.1.5
|
| 128 |
+
comm==0.2.2
|
| 129 |
+
pycountry==24.6.1
|
| 130 |
+
partial-json-parser==0.2.1.1.post5
|
| 131 |
+
gradio==4.44.0
|
| 132 |
+
prometheus_client==0.21.1
|
| 133 |
+
six==1.17.0
|
| 134 |
+
pytz==2025.1
|
| 135 |
+
unsloth_zoo==2025.3.12
|
| 136 |
+
starlette==0.46.0
|
| 137 |
+
llvmlite==0.44.0
|
| 138 |
+
peft==0.14.0
|
| 139 |
+
aiohttp==3.11.13
|
| 140 |
+
aiofiles==23.2.1
|
| 141 |
+
importlib_resources==6.5.2
|
| 142 |
+
nvidia-nvjitlink-cu12==12.4.127
|
| 143 |
+
semantic-version==2.10.0
|
| 144 |
+
decorator==5.2.1
|
| 145 |
+
nvidia-cublas-cu12==12.4.5.8
|
| 146 |
+
contourpy==1.3.1
|
| 147 |
+
torch==2.6.0
|
| 148 |
+
pytest==8.3.5
|
| 149 |
+
fastapi==0.115.11
|
| 150 |
+
seaborn==0.13.2
|
| 151 |
+
sympy==1.13.1
|
| 152 |
+
threadpoolctl==3.6.0
|
| 153 |
+
networkx==3.4.2
|
| 154 |
+
python-dateutil==2.9.0.post0
|
| 155 |
+
depyf==0.18.0
|
| 156 |
+
nvidia-ml-py==12.570.86
|
| 157 |
+
jedi==0.19.2
|
| 158 |
+
joblib==1.4.2
|
| 159 |
+
referencing==0.36.2
|
| 160 |
+
diskcache==5.6.3
|
| 161 |
+
httpcore==1.0.7
|
| 162 |
+
httpx==0.28.1
|
| 163 |
+
pyairports==2.1.1
|
| 164 |
+
protobuf==3.20.3
|
| 165 |
+
portalocker==3.1.1
|
| 166 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 167 |
+
Pebble==5.1.0
|
| 168 |
+
fsspec==2024.9.0
|
| 169 |
+
hf_transfer==0.1.9
|
| 170 |
+
ptyprocess==0.7.0
|
| 171 |
+
pexpect==4.9.0
|
| 172 |
+
nvidia-cuda-nvrtc-cu12==12.4.127
|
| 173 |
+
scipy==1.15.2
|
| 174 |
+
sentencepiece==0.2.0
|
| 175 |
+
cycler==0.12.1
|
| 176 |
+
packaging==24.2
|
| 177 |
+
openai==1.56.1
|
| 178 |
+
frozenlist==1.5.0
|
| 179 |
+
lark==1.2.2
|
| 180 |
+
filelock==3.17.0
|
| 181 |
+
opentelemetry-exporter-otlp==1.26.0
|
| 182 |
+
yarl==1.18.3
|
| 183 |
+
rouge_score==0.1.2
|
| 184 |
+
grpcio==1.70.0
|
| 185 |
+
googleapis-common-protos==1.70.0
|
| 186 |
+
aiohappyeyeballs==2.4.6
|
| 187 |
+
multiprocess==0.70.16
|
| 188 |
+
tornado==6.4.2
|
| 189 |
+
numpy==1.26.4
|
| 190 |
+
nltk==3.9.1
|
| 191 |
+
pip==25.0
|
| 192 |
+
charset-normalizer==3.3.2
|
| 193 |
+
prometheus-fastapi-instrumentator==7.0.2
|
| 194 |
+
numexpr==2.10.2
|
| 195 |
+
pyarrow==19.0.1
|
| 196 |
+
attrs==25.1.0
|
| 197 |
+
lm_eval==0.4.4
|
| 198 |
+
urllib3==2.3.0
|
| 199 |
+
mkl_random==1.2.8
|
| 200 |
+
httptools==0.6.4
|
| 201 |
+
gpustat==1.1.1
|
| 202 |
+
pluggy==1.5.0
|
| 203 |
+
huggingface-hub==0.30.2
|
| 204 |
+
triton==3.1.0
|
| 205 |
+
idna==3.7
|
| 206 |
+
ipython==8.20.0
|
| 207 |
+
pyparsing==3.2.1
|
| 208 |
+
rich-toolkit==0.13.2
|
| 209 |
+
googletrans==4.0.2
|
| 210 |
+
jupyter_core==5.7.2
|
| 211 |
+
zstandard==0.23.0
|
| 212 |
+
aiosignal==1.3.2
|
| 213 |
+
tyro==0.9.17
|
| 214 |
+
traitlets==5.14.3
|
| 215 |
+
h11==0.14.0
|
| 216 |
+
outlines==0.1.11
|
| 217 |
+
jupyter_client==8.6.3
|
| 218 |
+
loralib==0.1.2
|
| 219 |
+
kiwisolver==1.4.8
|
| 220 |
+
blake3==1.0.4
|
| 221 |
+
nvidia-cusparselt-cu12==0.6.2
|
| 222 |
+
rich==13.9.4
|
| 223 |
+
hf-xet==1.0.2
|
| 224 |
+
certifi==2025.1.31
|
| 225 |
+
wheel==0.45.1
|
| 226 |
+
pybind11==2.13.6
|
| 227 |
+
regex==2024.11.6
|
| 228 |
+
mpmath==1.3.0
|
| 229 |
+
transformers==4.51.3
|
| 230 |
+
flash_attn==2.7.4.post1
|
| 231 |
+
nvidia-curand-cu12==10.3.5.147
|
| 232 |
+
PySocks==1.7.1
|
| 233 |
+
gmpy2==2.2.1
|
| 234 |
+
iniconfig==2.0.0
|
| 235 |
+
pandas==2.2.3
|
| 236 |
+
Jinja2==3.1.5
|
| 237 |
+
msgpack==1.1.0
|
| 238 |
+
gguf==0.16.2
|
| 239 |
+
email_validator==2.2.0
|
| 240 |
+
tzdata==2025.1
|
| 241 |
+
cut-cross-entropy==25.1.1
|
| 242 |
+
tensorboard==2.19.0
|
| 243 |
+
matplotlib==3.10.1
|
| 244 |
+
jsonschema-specifications==2024.10.1
|
| 245 |
+
unsloth==2025.3.14
|
| 246 |
+
Werkzeug==3.1.3
|
| 247 |
+
opentelemetry-proto==1.26.0
|
| 248 |
+
fastrlock==0.8.3
|
| 249 |
+
dnspython==2.7.0
|
| 250 |
+
typeguard==4.4.2
|
| 251 |
+
opentelemetry-api==1.26.0
|
| 252 |
+
platformdirs==4.3.6
|
| 253 |
+
importlib_metadata==8.0.0
|
| 254 |
+
opentelemetry-semantic-conventions==0.47b0
|
| 255 |
+
sniffio==1.3.1
|
| 256 |
+
nvidia-cuda-cupti-cu12==12.4.127
|
| 257 |
+
scikit-learn==1.6.1
|
| 258 |
+
hpack==4.1.0
|
| 259 |
+
parso==0.8.4
|
| 260 |
+
torchaudio==2.6.0
|
| 261 |
+
xgrammar==0.1.18
|
| 262 |
+
executing==2.2.0
|
| 263 |
+
mkl_fft==1.3.11
|
| 264 |
+
vllm==0.8.4
|
| 265 |
+
word2number==1.1
|
| 266 |
+
pure_eval==0.2.3
|
| 267 |
+
watchfiles==1.0.4
|
| 268 |
+
pydub==0.25.1
|
| 269 |
+
mbstrdecoder==1.1.4
|
| 270 |
+
markdown-it-py==3.0.0
|
| 271 |
+
jsonschema==4.23.0
|
| 272 |
+
msgspec==0.19.0
|
| 273 |
+
rpds-py==0.23.1
|
| 274 |
+
wandb==0.19.9
|
| 275 |
+
matplotlib-inline==0.1.7
|
| 276 |
+
requests==2.32.3
|
| 277 |
+
interegular==0.3.3
|
| 278 |
+
pytablewriter==1.2.1
|
| 279 |
+
orjson==3.10.15
|
| 280 |
+
xformers==0.0.29.post2
|
| 281 |
+
fastapi-cli==0.0.7
|
| 282 |
+
mkl-service==2.4.0
|
| 283 |
+
opencv-python-headless==4.11.0.86
|
| 284 |
+
prompt_toolkit==3.0.50
|
| 285 |
+
trl==0.16.1
|
| 286 |
+
debugpy==1.8.13
|
| 287 |
+
pydantic==2.10.6
|
| 288 |
+
stack-data==0.6.3
|
| 289 |
+
tqdm-multiprocess==0.0.11
|
| 290 |
+
gradio_client==1.3.0
|
| 291 |
+
dill==0.3.8
|
| 292 |
+
evaluate==0.4.3
|
| 293 |
+
nvidia-cufft-cu12==11.2.1.3
|
| 294 |
+
nest-asyncio==1.6.0
|
| 295 |
+
pyzmq==26.2.1
|
| 296 |
+
tensorboard-data-server==0.7.2
|
| 297 |
+
docstring_parser==0.16
|
| 298 |
+
click==8.1.8
|
| 299 |
+
psutil==7.0.0
|
| 300 |
+
annotated-types==0.7.0
|
| 301 |
+
ninja==1.11.1.4
|
| 302 |
+
pillow==10.4.0
|
| 303 |
+
tcolorpy==0.1.7
|
| 304 |
+
einops==0.8.1
|
| 305 |
+
wcwidth==0.2.13
|
| 306 |
+
typer==0.15.2
|
| 307 |
+
tqdm==4.67.1
|
| 308 |
+
tomlkit==0.12.0
|
| 309 |
+
ipykernel==6.28.0
|
| 310 |
+
diffusers==0.32.2
|
| 311 |
+
mistral_common==1.5.4
|
| 312 |
+
setuptools==75.8.0
|
| 313 |
+
h2==4.2.0
|
| 314 |
+
cachetools==5.5.2
|
| 315 |
+
wrapt==1.17.2
|
| 316 |
+
pydantic_core==2.27.2
|
| 317 |
+
ffmpy==0.5.0
|
| 318 |
+
sacrebleu==2.5.1
|
| 319 |
+
outlines_core==0.1.26
|
| 320 |
+
jsonlines==4.0.0
|
| 321 |
+
fonttools==4.56.0
|
| 322 |
+
nvidia-nvtx-cu12==12.4.127
|
| 323 |
+
safetensors==0.5.3
|
| 324 |
+
opentelemetry-exporter-otlp-proto-grpc==1.26.0
|
| 325 |
+
bitsandbytes==0.45.3
|
| 326 |
+
nanobind==2.6.1
|
| 327 |
+
tokenizers==0.21.1
|
| 328 |
+
propcache==0.3.0
|
| 329 |
+
distro==1.9.0
|
| 330 |
+
python-multipart==0.0.20
|
| 331 |
+
uvloop==0.21.0
|
| 332 |
+
liger_kernel==0.5.5
|
| 333 |
+
python-json-logger==3.3.0
|
| 334 |
+
multidict==6.1.0
|
| 335 |
+
ray==2.43.0
|
| 336 |
+
opentelemetry-exporter-otlp-proto-http==1.26.0
|
| 337 |
+
typepy==1.3.4
|
| 338 |
+
torchvision==0.21.0
|
| 339 |
+
PyYAML==6.0.2
|
| 340 |
+
xxhash==3.5.0
|
| 341 |
+
anthropic==0.49.0
|
| 342 |
+
py-cpuinfo==9.0.0
|
| 343 |
+
compressed-tensors==0.9.3
|
| 344 |
+
opentelemetry-exporter-otlp-proto-common==1.26.0
|
| 345 |
+
opentelemetry-sdk==1.26.0
|
| 346 |
+
shtab==1.7.1
|
| 347 |
+
websockets==12.0
|
| 348 |
+
numba==0.61.2
|
| 349 |
+
llguidance==0.7.13
|
| 350 |
+
hyperframe==6.1.0
|
| 351 |
+
anyio==4.8.0
|
| 352 |
+
asttokens==3.0.0
|
| 353 |
+
blessed==1.20.0
|
| 354 |
+
colorama==0.4.6
|
| 355 |
+
shellingham==1.5.4
|
| 356 |
+
Brotli==1.0.9
|
| 357 |
+
sqlitedict==2.1.0
|
| 358 |
+
nvidia-cusparse-cu12==12.3.1.170
|
| 359 |
+
Deprecated==1.2.18
|
| 360 |
+
cloudpickle==3.1.1
|
| 361 |
+
pathvalidate==3.2.3
|
| 362 |
+
opentelemetry-semantic-conventions-ai==0.4.3
|
| 363 |
+
platformdirs==4.2.2
|
| 364 |
+
autocommand==2.2.2
|
| 365 |
+
backports.tarfile==1.2.0
|
| 366 |
+
importlib_metadata==8.0.0
|
| 367 |
+
inflect==7.3.1
|
| 368 |
+
jaraco.collections==5.1.0
|
| 369 |
+
jaraco.context==5.3.0
|
| 370 |
+
jaraco.functools==4.0.1
|
| 371 |
+
jaraco.text==3.12.1
|
| 372 |
+
more-itertools==10.3.0
|
| 373 |
+
packaging==24.2
|
| 374 |
+
tomli==2.0.1
|
| 375 |
+
typeguard==4.3.0
|
| 376 |
+
typing_extensions==4.12.2
|
| 377 |
+
wheel==0.45.1
|
| 378 |
+
zipp==3.19.2
|
wandb/run-20250921_062002-cothceaw/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-4.18.0-372.9.1.el8.x86_64-x86_64-with-glibc2.28",
|
| 3 |
+
"python": "CPython 3.11.11",
|
| 4 |
+
"startedAt": "2025-09-21T10:20:02.294501Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--model",
|
| 7 |
+
"vllm",
|
| 8 |
+
"--model_args",
|
| 9 |
+
"pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000",
|
| 10 |
+
"--tasks",
|
| 11 |
+
"mmlu_prox_ar_math,mmlu_prox_bn_math",
|
| 12 |
+
"--batch_size",
|
| 13 |
+
"auto",
|
| 14 |
+
"--apply_chat_template",
|
| 15 |
+
"--output_path",
|
| 16 |
+
"ckpts/rerun",
|
| 17 |
+
"--log_samples",
|
| 18 |
+
"--gen_kwargs",
|
| 19 |
+
"max_gen_toks=20000",
|
| 20 |
+
"--wandb_args=project=MRPO_eval,name=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dir=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,resume=auto"
|
| 21 |
+
],
|
| 22 |
+
"program": "/home/jdhwang/.conda/envs/llm/bin/lm_eval",
|
| 23 |
+
"git": {
|
| 24 |
+
"remote": "[email protected]:jd730/BRIDGE-private.git",
|
| 25 |
+
"commit": "57228cdf2aa2656e94d94dc3f5530986c0f48545"
|
| 26 |
+
},
|
| 27 |
+
"email": "[email protected]",
|
| 28 |
+
"root": "ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309",
|
| 29 |
+
"host": "node1803",
|
| 30 |
+
"executable": "/orcd/home/001/jdhwang/.conda/envs/llm/bin/python",
|
| 31 |
+
"cpu_count": 64,
|
| 32 |
+
"cpu_count_logical": 128,
|
| 33 |
+
"gpu": "NVIDIA H100 80GB HBM3",
|
| 34 |
+
"gpu_count": 2,
|
| 35 |
+
"disk": {
|
| 36 |
+
"/": {
|
| 37 |
+
"total": "464506159104",
|
| 38 |
+
"used": "12265783296"
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"memory": {
|
| 42 |
+
"total": "2163473002496"
|
| 43 |
+
},
|
| 44 |
+
"gpu_nvidia": [
|
| 45 |
+
{
|
| 46 |
+
"name": "NVIDIA H100 80GB HBM3",
|
| 47 |
+
"memoryTotal": "85520809984",
|
| 48 |
+
"cudaCores": 16896,
|
| 49 |
+
"architecture": "Hopper",
|
| 50 |
+
"uuid": "GPU-9a8a7398-f810-a936-a36d-5b9c0b64a09b"
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"name": "NVIDIA H100 80GB HBM3",
|
| 54 |
+
"memoryTotal": "85520809984",
|
| 55 |
+
"cudaCores": 16896,
|
| 56 |
+
"architecture": "Hopper",
|
| 57 |
+
"uuid": "GPU-73b0efed-d92d-d0e8-3c5e-a3122d6e7c1b"
|
| 58 |
+
}
|
| 59 |
+
],
|
| 60 |
+
"cudaVersion": "12.4",
|
| 61 |
+
"slurm": {
|
| 62 |
+
"array_job_id": "4507342",
|
| 63 |
+
"array_task_count": "8",
|
| 64 |
+
"array_task_id": "0",
|
| 65 |
+
"array_task_max": "7",
|
| 66 |
+
"array_task_min": "0",
|
| 67 |
+
"array_task_step": "1",
|
| 68 |
+
"cluster_name": "eofe7",
|
| 69 |
+
"conf": "/etc/slurm/slurm.conf",
|
| 70 |
+
"cpus_on_node": "16",
|
| 71 |
+
"cpus_per_task": "16",
|
| 72 |
+
"gpus_on_node": "2",
|
| 73 |
+
"gtids": "0",
|
| 74 |
+
"job_account": "mit_general",
|
| 75 |
+
"job_cpus_per_node": "16",
|
| 76 |
+
"job_end_time": "1758493186",
|
| 77 |
+
"job_gid": "209655",
|
| 78 |
+
"job_gpus": "1,2",
|
| 79 |
+
"job_id": "4535465",
|
| 80 |
+
"job_name": "mmlu_prox.sh",
|
| 81 |
+
"job_nodelist": "node1803",
|
| 82 |
+
"job_num_nodes": "1",
|
| 83 |
+
"job_partition": "ou_bcs_low",
|
| 84 |
+
"job_qos": "normal",
|
| 85 |
+
"job_start_time": "1758449986",
|
| 86 |
+
"job_uid": "209655",
|
| 87 |
+
"job_user": "jdhwang",
|
| 88 |
+
"jobid": "4535465",
|
| 89 |
+
"localid": "0",
|
| 90 |
+
"mem_per_node": "131072",
|
| 91 |
+
"nnodes": "1",
|
| 92 |
+
"nodeid": "0",
|
| 93 |
+
"nodelist": "node1803",
|
| 94 |
+
"nprocs": "1",
|
| 95 |
+
"ntasks": "1",
|
| 96 |
+
"ntasks_per_node": "1",
|
| 97 |
+
"oom_kill_step": "0",
|
| 98 |
+
"prio_process": "0",
|
| 99 |
+
"procid": "0",
|
| 100 |
+
"script_context": "prolog_task",
|
| 101 |
+
"submit_dir": "/orcd/home/002/jdhwang/BRIDGE",
|
| 102 |
+
"submit_host": "orcd-login003.mit.edu",
|
| 103 |
+
"task_pid": "1320304",
|
| 104 |
+
"tasks_per_node": "1",
|
| 105 |
+
"topology_addr": "node1803",
|
| 106 |
+
"topology_addr_pattern": "node",
|
| 107 |
+
"tres_per_task": "cpu=16"
|
| 108 |
+
},
|
| 109 |
+
"writerId": "b4tnkkvikp4ud9d1f7t2v1f37ei0r2ow"
|
| 110 |
+
}
|
wandb/run-20250921_062002-cothceaw/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"_runtime":4818,"_step":3,"mmlu_prox_ar_math_eval_results":{"nrows":1351,"ncols":8,"size":89484352,"sha256":"1d425cdaeb765848e725c89a34944d791bdbf0248c6361032236b97f9611117e","_latest_artifact_path":"wandb-client-artifact://r4rn6kktqo8l0um7ttx1ue77yefblg4bjuul5xh36eo036nw65had7u778c1o0h584duz5spxu93ogp1sydi6ps1f7e3imex3lze4vb4gslw6odv77pvqpc282057p94:latest/mmlu_prox_ar_math_eval_results.table.json","_type":"table-file","log_mode":"IMMUTABLE","artifact_path":"wandb-client-artifact://4o3rezrpees9hoiy123dfot64jhr1avjzbsglu96w3nb6bvqo7r9nea1zpq6zrui21syjx86ytqafiqadkxalsovbu9hbu5xuig9vmqholaskqwf4fwujyo7wtf12a0n/mmlu_prox_ar_math_eval_results.table.json","path":"media/table/mmlu_prox_ar_math_eval_results_2_1d425cdaeb765848e725.table.json"},"mmlu_prox_bn_math_eval_results":{"path":"media/table/mmlu_prox_bn_math_eval_results_3_8f734c70b94d2677510f.table.json","log_mode":"IMMUTABLE","_type":"table-file","ncols":8,"_latest_artifact_path":"wandb-client-artifact://yrkq6zvhmu4okkq3sfdgrab1lnmdmfhv1de8t3mmak4lzthxfrmi8anklhmkr55kv80kuxxms6g4nss6yhg5y1of7bferet1zu65qvb21hgfr1y9ahpspphz37lf2nmq:latest/mmlu_prox_bn_math_eval_results.table.json","size":55489730,"sha256":"8f734c70b94d2677510f9200822c1810a65f0546ff35668ea1df59d81ec4ba33","nrows":1351,"artifact_path":"wandb-client-artifact://p7hlv4z2fp2097ris2q56ne0zq48hm4dhjzvq5oly1ch8bfxb5vrvg12wvtfxoe6yg7o671ug2r9ukhw2uqyzkdu3s1nsoq7fajb1mv7bgfdc1vimxlhjan1auugg7fx/mmlu_prox_bn_math_eval_results.table.json"},"mmlu_prox_ar_math/exact_match_stderr,custom-extract":"N/A","mmlu_prox_bn_math/alias":"math","mmlu_prox_ar_math/exact_match,custom-extract":0.27905255366395265,"_timestamp":1.7584548197219381e+09,"mmlu_prox_bn_math/exact_match,custom-extract":0.14433752775721687,"evaluation/eval_results":{"_latest_artifact_path":"wandb-client-artifact://fs7vvo359n7zetw0n93n6no3jnok4v4xr41uurzhi3oe51ox6y2umo5v79g77afo64weve9g6v7lxxe7k19dji47nxjd5fz1r4mva1ftvcpnlezc70b60s027i8r67ze:latest/evaluation/eval_results.table.json","ncols":7,"artifact_path":"wandb-client-artifact://nkoxm3fcoh17kqp4tvv4avy26i5fq3eaju412t711emnjajv88orrsf3ri9qsynz54066d4ze25cbi9v5x7avwdh7scv36ttkgoid25eqaq6y0z4c6ltc088wifvn50e/evaluation/eval_results.table.json","log_mode":"IMMUTABLE","sha256":"2a7ec9e10306569eae6efb589dd0cd352624b8846fd793982cce71b425f2b2c3","path":"media/table/evaluation/eval_results_1_2a7ec9e10306569eae6e.table.json","size":285,"nrows":2,"_type":"table-file"},"mmlu_prox_ar_math/alias":"math","mmlu_prox_bn_math/exact_match_stderr,custom-extract":"N/A","_wandb":{"runtime":4818}}
|
wandb/run-20250921_062002-cothceaw/logs/debug-core.log
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-21T06:20:02.49713505-04:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpoo93japy/port-1320321.txt","pid":1320321,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
|
| 2 |
+
{"time":"2025-09-21T06:20:02.497479399-04:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":1320321}
|
| 3 |
+
{"time":"2025-09-21T06:20:02.497469852-04:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1320321-1320505-1610173251/socket","Net":"unix"}}
|
| 4 |
+
{"time":"2025-09-21T06:20:02.683932338-04:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
|
| 5 |
+
{"time":"2025-09-21T06:20:02.688959867-04:00","level":"INFO","msg":"handleInformInit: received","streamId":"cothceaw","id":"1(@)"}
|
| 6 |
+
{"time":"2025-09-21T06:20:02.919780696-04:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"cothceaw","id":"1(@)"}
|
| 7 |
+
{"time":"2025-09-21T07:40:27.042657191-04:00","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"cothceaw","id":"1(@)"}
|
| 8 |
+
{"time":"2025-09-21T07:40:27.058852194-04:00","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"cothceaw","id":"1(@)"}
|
| 9 |
+
{"time":"2025-09-21T07:40:28.042788818-04:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
|
| 10 |
+
{"time":"2025-09-21T07:40:28.042813831-04:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
|
| 11 |
+
{"time":"2025-09-21T07:40:28.042819045-04:00","level":"INFO","msg":"server is shutting down"}
|
| 12 |
+
{"time":"2025-09-21T07:40:28.04282347-04:00","level":"INFO","msg":"connection: closing","id":"1(@)"}
|
| 13 |
+
{"time":"2025-09-21T07:40:28.042871233-04:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
|
| 14 |
+
{"time":"2025-09-21T07:40:28.042885088-04:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
|
| 15 |
+
{"time":"2025-09-21T07:40:28.042898748-04:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-1320321-1320505-1610173251/socket","Net":"unix"}}
|
| 16 |
+
{"time":"2025-09-21T07:40:28.042918708-04:00","level":"INFO","msg":"server is closed"}
|
wandb/run-20250921_062002-cothceaw/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-09-21T06:20:02.689611613-04:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
|
| 2 |
+
{"time":"2025-09-21T06:20:02.91974477-04:00","level":"INFO","msg":"stream: created new stream","id":"cothceaw"}
|
| 3 |
+
{"time":"2025-09-21T06:20:02.919776235-04:00","level":"INFO","msg":"stream: started","id":"cothceaw"}
|
| 4 |
+
{"time":"2025-09-21T06:20:02.919783396-04:00","level":"INFO","msg":"handler: started","stream_id":"cothceaw"}
|
| 5 |
+
{"time":"2025-09-21T06:20:02.919795631-04:00","level":"INFO","msg":"writer: Do: started","stream_id":"cothceaw"}
|
| 6 |
+
{"time":"2025-09-21T06:20:02.919791628-04:00","level":"INFO","msg":"sender: started","stream_id":"cothceaw"}
|
| 7 |
+
{"time":"2025-09-21T06:20:03.320481778-04:00","level":"WARN","msg":"handleCodeSave: program relative path is empty"}
|
| 8 |
+
{"time":"2025-09-21T06:20:03.32057814-04:00","level":"ERROR","msg":"git repo not found","error":"repository does not exist"}
|
| 9 |
+
{"time":"2025-09-21T07:40:22.552065372-04:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading artifact run-cothceaw-mmlu_prox_ar_math_eval_results","runtime_seconds":4.866579888,"subtasks":[{"desc":"mmlu_prox_ar_math_eval_results.table.json","runtime_seconds":4.47880473,"progress":"36.1MB/85.3MB"}]},{"desc":"uploading media/table/mmlu_prox_ar_math_eval_results_2_1d425cdaeb765848e725.table.json","runtime_seconds":4.303737975,"progress":"67.8MB/85.3MB"},{"desc":"uploading artifact mmlu_prox_ar_math","runtime_seconds":3.824002162,"subtasks":[{"desc":"mmlu_prox_ar_math_eval_samples.json","runtime_seconds":3.315562372,"progress":"21.4MB/37.3MB"}]},{"desc":"uploading artifact run-cothceaw-mmlu_prox_bn_math_eval_results","runtime_seconds":3.051542787,"subtasks":[{"desc":"mmlu_prox_bn_math_eval_results.table.json","runtime_seconds":2.70140314,"progress":"16.1MB/52.9MB"}]},{"desc":"uploading media/table/mmlu_prox_bn_math_eval_results_3_8f734c70b94d2677510f.table.json","runtime_seconds":2.7112040950000003,"progress":"31.2MB/52.9MB"},{"desc":"uploading artifact mmlu_prox_bn_math","runtime_seconds":2.359250196,"subtasks":[{"desc":"mmlu_prox_bn_math_eval_samples.json","runtime_seconds":1.890807991,"progress":"9.9MB/31.3MB"}]}],"total_operations":6}}
|
| 10 |
+
{"time":"2025-09-21T07:40:26.842250567-04:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 11 |
+
{"time":"2025-09-21T07:40:27.042881025-04:00","level":"INFO","msg":"stream: closing","id":"cothceaw"}
|
| 12 |
+
{"time":"2025-09-21T07:40:27.042891927-04:00","level":"INFO","msg":"handler: closed","stream_id":"cothceaw"}
|
| 13 |
+
{"time":"2025-09-21T07:40:27.042901472-04:00","level":"INFO","msg":"sender: closed","stream_id":"cothceaw"}
|
| 14 |
+
{"time":"2025-09-21T07:40:27.042898277-04:00","level":"INFO","msg":"writer: Close: closed","stream_id":"cothceaw"}
|
| 15 |
+
{"time":"2025-09-21T07:40:27.043173368-04:00","level":"INFO","msg":"stream: closed","id":"cothceaw"}
|
wandb/run-20250921_062002-cothceaw/logs/debug.log
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-09-21 06:20:02,469 INFO MainThread:1320321 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
|
| 2 |
+
2025-09-21 06:20:02,469 INFO MainThread:1320321 [wandb_setup.py:_flush():80] Configure stats pid to 1320321
|
| 3 |
+
2025-09-21 06:20:02,470 INFO MainThread:1320321 [wandb_setup.py:_flush():80] Loading settings from /home/jdhwang/.config/wandb/settings
|
| 4 |
+
2025-09-21 06:20:02,470 INFO MainThread:1320321 [wandb_setup.py:_flush():80] Loading settings from /orcd/home/002/jdhwang/BRIDGE/wandb/settings
|
| 5 |
+
2025-09-21 06:20:02,470 INFO MainThread:1320321 [wandb_setup.py:_flush():80] Loading settings from environment variables
|
| 6 |
+
2025-09-21 06:20:02,470 INFO MainThread:1320321 [wandb_init.py:setup_run_log_directory():703] Logging user logs to ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/run-20250921_062002-cothceaw/logs/debug.log
|
| 7 |
+
2025-09-21 06:20:02,470 INFO MainThread:1320321 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/run-20250921_062002-cothceaw/logs/debug-internal.log
|
| 8 |
+
2025-09-21 06:20:02,470 INFO MainThread:1320321 [wandb_init.py:init():830] calling init triggers
|
| 9 |
+
2025-09-21 06:20:02,470 INFO MainThread:1320321 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'_wandb': {}}
|
| 11 |
+
2025-09-21 06:20:02,470 INFO MainThread:1320321 [wandb_init.py:init():871] starting backend
|
| 12 |
+
2025-09-21 06:20:02,684 INFO MainThread:1320321 [wandb_init.py:init():874] sending inform_init request
|
| 13 |
+
2025-09-21 06:20:02,686 INFO MainThread:1320321 [wandb_init.py:init():882] backend started and connected
|
| 14 |
+
2025-09-21 06:20:02,688 INFO MainThread:1320321 [wandb_init.py:init():953] updated telemetry
|
| 15 |
+
2025-09-21 06:20:02,711 INFO MainThread:1320321 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
|
| 16 |
+
2025-09-21 06:20:03,319 INFO MainThread:1320321 [wandb_init.py:init():1029] starting run threads in backend
|
| 17 |
+
2025-09-21 06:20:03,673 INFO MainThread:1320321 [wandb_run.py:_console_start():2458] atexit reg
|
| 18 |
+
2025-09-21 06:20:03,673 INFO MainThread:1320321 [wandb_run.py:_redirect():2306] redirect: wrap_raw
|
| 19 |
+
2025-09-21 06:20:03,673 INFO MainThread:1320321 [wandb_run.py:_redirect():2375] Wrapping output streams.
|
| 20 |
+
2025-09-21 06:20:03,673 INFO MainThread:1320321 [wandb_run.py:_redirect():2398] Redirects installed.
|
| 21 |
+
2025-09-21 06:20:03,676 INFO MainThread:1320321 [wandb_init.py:init():1075] run started, returning control to user process
|
| 22 |
+
2025-09-21 07:40:15,859 INFO MainThread:1320321 [wandb_run.py:_config_callback():1363] config_cb None None {'task_configs': {'mmlu_prox_ar_math': {'task': 'mmlu_prox_ar_math', 'task_alias': 'math', 'dataset_path': 'li-lab/MMLU-ProX', 'dataset_name': 'ar', 'test_split': 'test', 'fewshot_split': 'validation', 'process_docs': "functools.partial(<function process_docs at 0x14d51fcc0860>, subject='math')", 'doc_to_text': 'functools.partial(<function format_cot_example at 0x14d51fcc0b80>, including_answer=False)', 'doc_to_target': 'answer', 'description': "فيما يلي أسئلة اختيار من متعدد (مع إجابات) حول الرياضيات. فكر خطوة بخطوة ثم أنهِ إجابتك بـ 'الإجابة هي (X)' حيث X هو حرف الخيار الصحيح.\n", 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'fewshot_config': {'sampler': 'first_n', 'doc_to_text': 'functools.partial(<function format_cot_example at 0x14d51fcc0e00>, including_answer=True)', 'doc_to_target': ''}, 'num_fewshot': 5, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'until': ['</s>', 'Q:', 'سؤال:', '<|im_end|>'], 'do_sample': False, 'temperature': 0.0, 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'name': 'custom-extract', 'filter': [{'function': 'regex', 'regex_pattern': 'الإجابة هي \\(?([ABCDEFGHIJ])\\)?'}, {'function': 'take_first'}]}], 'should_decontaminate': False, 'metadata': {'version': 0.0}}, 'mmlu_prox_bn_math': {'task': 'mmlu_prox_bn_math', 'task_alias': 'math', 'dataset_path': 'li-lab/MMLU-ProX', 'dataset_name': 'bn', 'test_split': 'test', 'fewshot_split': 'validation', 'process_docs': "functools.partial(<function process_docs at 0x14d51fb79e40>, subject='math')", 'doc_to_text': 'functools.partial(<function format_cot_example at 0x14d51fb7a200>, including_answer=False)', 'doc_to_target': 'answer', 'description': 'নিম্নলিখিত গণিত সম্পর্কে বহুনির্বাচনী প্রশ্ন (উত্তরসহ)। ধাপে ধাপে চিন্তা করুন এবং তারপর আপনার উত্তর "উত্তর হল (X)" দিয়ে শেষ করুন যেখানে X হল সঠিক বিকল্পের অক্ষর।\n', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'fewshot_config': {'sampler': 'first_n', 'doc_to_text': 'functools.partial(<function format_cot_example at 0x14d51fb7b740>, including_answer=True)', 'doc_to_target': ''}, 'num_fewshot': 5, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'until': ['</s>', 'Q:', 'প্রশ্ন:', '<|im_end|>'], 'do_sample': False, 'temperature': 0.0, 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'name': 'custom-extract', 'filter': [{'function': 'regex', 'regex_pattern': 'উত্তর হল \\(?([ABCDEFGHIJ])\\)?'}, {'function': 'take_first'}]}], 'should_decontaminate': False, 'metadata': {'version': 0.0}}}, 'cli_configs': {'model': 'vllm', 'model_args': 'pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000', 'batch_size': 'auto', 'batch_sizes': [], 'device': None, 'use_cache': None, 'limit': None, 'bootstrap_iters': 0, 'gen_kwargs': {'max_gen_toks': 20000}, 'random_seed': 0, 'numpy_seed': 1234, 'torch_seed': 1234, 'fewshot_seed': 1234}}
|
| 23 |
+
2025-09-21 07:40:21,458 INFO MainThread:1320321 [wandb_run.py:_finish():2224] finishing run jdhwang/MRPO_eval/cothceaw
|
| 24 |
+
2025-09-21 07:40:21,458 INFO MainThread:1320321 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
|
| 25 |
+
2025-09-21 07:40:21,550 INFO MainThread:1320321 [wandb_run.py:_restore():2405] restore
|
| 26 |
+
2025-09-21 07:40:21,550 INFO MainThread:1320321 [wandb_run.py:_restore():2411] restore done
|
| 27 |
+
2025-09-21 07:40:27,041 INFO MainThread:1320321 [wandb_run.py:_footer_history_summary_info():3903] rendering history
|
| 28 |
+
2025-09-21 07:40:27,042 INFO MainThread:1320321 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
|
| 29 |
+
2025-09-21 07:40:27,042 INFO MainThread:1320321 [wandb_run.py:_footer_sync_info():3864] logging synced files
|