jdhwang commited on
Commit
d7753f7
·
verified ·
1 Parent(s): fe29901

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +22 -0
  2. added_tokens.json +24 -0
  3. config.json +28 -0
  4. generation_config.json +14 -0
  5. merges.txt +0 -0
  6. model-00001-of-00004.safetensors +3 -0
  7. model-00002-of-00004.safetensors +3 -0
  8. model-00003-of-00004.safetensors +3 -0
  9. model-00004-of-00004.safetensors +3 -0
  10. model.safetensors.index.json +346 -0
  11. runs/Sep19_17-13-37_node1803/events.out.tfevents.1758316441.node1803.939342.0 +3 -0
  12. special_tokens_map.json +25 -0
  13. tokenizer.json +3 -0
  14. tokenizer_config.json +208 -0
  15. training_args.bin +3 -0
  16. vocab.json +0 -0
  17. wandb/debug-internal.log +14 -0
  18. wandb/debug.log +24 -0
  19. wandb/run-20250920_081121-2oxex54w/files/config.yaml +644 -0
  20. wandb/run-20250920_081121-2oxex54w/files/media/table/evaluation/eval_results_1_817b26b9b7489391f4e7.table.json +1 -0
  21. wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_bn_eval_results_2_f5e72dc1f8666e2a7b2d.table.json +3 -0
  22. wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_de_eval_results_3_fabaf33255f24add59c1.table.json +0 -0
  23. wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_en_eval_results_4_82791ccc59d2c61a093c.table.json +0 -0
  24. wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_es_eval_results_5_db5ed66b097e218c4da6.table.json +0 -0
  25. wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_fr_eval_results_6_214cb1a5a2cdee9f330e.table.json +0 -0
  26. wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_ja_eval_results_7_098df869a17abf277e8c.table.json +3 -0
  27. wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_ru_eval_results_8_19e62e412810312c2375.table.json +3 -0
  28. wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_sw_eval_results_9_d79a57dcd801f08109f8.table.json +3 -0
  29. wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_te_eval_results_10_c0fc0c81b8662e04cc0e.table.json +3 -0
  30. wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_th_eval_results_11_baa86a95f601e3c721a0.table.json +0 -0
  31. wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_zh_eval_results_12_2882185f7e816bbcc2fe.table.json +0 -0
  32. wandb/run-20250920_081121-2oxex54w/files/output.log +212 -0
  33. wandb/run-20250920_081121-2oxex54w/files/requirements.txt +378 -0
  34. wandb/run-20250920_081121-2oxex54w/files/wandb-metadata.json +110 -0
  35. wandb/run-20250920_081121-2oxex54w/files/wandb-summary.json +1 -0
  36. wandb/run-20250920_081121-2oxex54w/logs/debug-core.log +16 -0
  37. wandb/run-20250920_081121-2oxex54w/logs/debug-internal.log +15 -0
  38. wandb/run-20250920_081121-2oxex54w/logs/debug.log +29 -0
  39. wandb/run-20250920_081121-2oxex54w/run-2oxex54w.wandb +3 -0
  40. wandb/run-20250921_062002-cothceaw/files/config.yaml +252 -0
  41. wandb/run-20250921_062002-cothceaw/files/media/table/evaluation/eval_results_1_2a7ec9e10306569eae6e.table.json +1 -0
  42. wandb/run-20250921_062002-cothceaw/files/media/table/mmlu_prox_ar_math_eval_results_2_1d425cdaeb765848e725.table.json +3 -0
  43. wandb/run-20250921_062002-cothceaw/files/media/table/mmlu_prox_bn_math_eval_results_3_8f734c70b94d2677510f.table.json +3 -0
  44. wandb/run-20250921_062002-cothceaw/files/output.log +81 -0
  45. wandb/run-20250921_062002-cothceaw/files/requirements.txt +378 -0
  46. wandb/run-20250921_062002-cothceaw/files/wandb-metadata.json +110 -0
  47. wandb/run-20250921_062002-cothceaw/files/wandb-summary.json +1 -0
  48. wandb/run-20250921_062002-cothceaw/logs/debug-core.log +16 -0
  49. wandb/run-20250921_062002-cothceaw/logs/debug-internal.log +15 -0
  50. wandb/run-20250921_062002-cothceaw/logs/debug.log +29 -0
.gitattributes CHANGED
@@ -33,3 +33,25 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_bn_eval_results_2_f5e72dc1f8666e2a7b2d.table.json filter=lfs diff=lfs merge=lfs -text
38
+ wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_ja_eval_results_7_098df869a17abf277e8c.table.json filter=lfs diff=lfs merge=lfs -text
39
+ wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_ru_eval_results_8_19e62e412810312c2375.table.json filter=lfs diff=lfs merge=lfs -text
40
+ wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_sw_eval_results_9_d79a57dcd801f08109f8.table.json filter=lfs diff=lfs merge=lfs -text
41
+ wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_te_eval_results_10_c0fc0c81b8662e04cc0e.table.json filter=lfs diff=lfs merge=lfs -text
42
+ wandb/run-20250920_081121-2oxex54w/run-2oxex54w.wandb filter=lfs diff=lfs merge=lfs -text
43
+ wandb/run-20250921_062002-cothceaw/files/media/table/mmlu_prox_ar_math_eval_results_2_1d425cdaeb765848e725.table.json filter=lfs diff=lfs merge=lfs -text
44
+ wandb/run-20250921_062002-cothceaw/files/media/table/mmlu_prox_bn_math_eval_results_3_8f734c70b94d2677510f.table.json filter=lfs diff=lfs merge=lfs -text
45
+ wandb/run-20250921_062002-cothceaw/run-cothceaw.wandb filter=lfs diff=lfs merge=lfs -text
46
+ wandb/run-20250921_081515-hrm6dwvu/files/media/table/mmlu_prox_de_math_eval_results_2_6011ed1b84fa34f7a465.table.json filter=lfs diff=lfs merge=lfs -text
47
+ wandb/run-20250921_081515-hrm6dwvu/files/media/table/mmlu_prox_en_math_eval_results_3_574b6ec5b263ae2e258e.table.json filter=lfs diff=lfs merge=lfs -text
48
+ wandb/run-20250921_081515-hrm6dwvu/run-hrm6dwvu.wandb filter=lfs diff=lfs merge=lfs -text
49
+ wandb/run-20250921_090332-hrm6dwvu/files/media/table/mmlu_prox_es_math_eval_results_2_f5b5ddbcd29f48f36acc.table.json filter=lfs diff=lfs merge=lfs -text
50
+ wandb/run-20250921_090332-hrm6dwvu/files/media/table/mmlu_prox_fr_math_eval_results_3_ea21893417450a1c19c1.table.json filter=lfs diff=lfs merge=lfs -text
51
+ wandb/run-20250921_090332-hrm6dwvu/run-hrm6dwvu.wandb filter=lfs diff=lfs merge=lfs -text
52
+ wandb/run-20250921_092606-ztqaaqj5/files/media/table/mmlu_prox_ko_math_eval_results_2_0960ca8c88e3af630287.table.json filter=lfs diff=lfs merge=lfs -text
53
+ wandb/run-20250921_092606-ztqaaqj5/files/media/table/mmlu_prox_pt_math_eval_results_3_14279190f4728eaf809a.table.json filter=lfs diff=lfs merge=lfs -text
54
+ wandb/run-20250921_092606-ztqaaqj5/run-ztqaaqj5.wandb filter=lfs diff=lfs merge=lfs -text
55
+ wandb/run-20250921_123322-c7t8flvu/run-c7t8flvu.wandb filter=lfs diff=lfs merge=lfs -text
56
+ wandb/run-20250921_140327-c7t8flvu/files/media/table/mmlu_prox_zh_math_eval_results_2_9045c6c9481d0396b399.table.json filter=lfs diff=lfs merge=lfs -text
57
+ wandb/run-20250921_140327-c7t8flvu/run-c7t8flvu.wandb filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 3584,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 18944,
12
+ "max_position_embeddings": 32768,
13
+ "max_window_layers": 28,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 28,
16
+ "num_hidden_layers": 28,
17
+ "num_key_value_heads": 4,
18
+ "rms_norm_eps": 1e-06,
19
+ "rope_scaling": null,
20
+ "rope_theta": 1000000.0,
21
+ "sliding_window": 131072,
22
+ "tie_word_embeddings": false,
23
+ "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.51.3",
25
+ "use_cache": true,
26
+ "use_sliding_window": false,
27
+ "vocab_size": 152064
28
+ }
generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.05,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.51.3"
14
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9b62bbfc2c3c82f51429dab8f74f685996c2a31dd1d2b7ea6ba47768ea4e1ab
3
+ size 4877660776
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:421b39cbbc81933e028ad678f3252ec9cdabe893fcc96b4a75836d9fa7e58be3
3
+ size 4932751008
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f63898433cd50eb366bf80588df88478eb1a1dba660aa086a457cd33cb44c50
3
+ size 4330865200
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:383305d92fbc2fbb3397d929ab45247196d7289e0fbbcd75f7239dea84d7916e
3
+ size 1089994880
model.safetensors.index.json ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 15231233024
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00004-of-00004.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
13
+ "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
14
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
15
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
16
+ "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
17
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
18
+ "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
19
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
20
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
21
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
22
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
23
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
24
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
25
+ "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
26
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
27
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
28
+ "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
29
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
30
+ "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
31
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
32
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
33
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
34
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
35
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
36
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
37
+ "model.layers.10.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
38
+ "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
39
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
40
+ "model.layers.10.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
41
+ "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
42
+ "model.layers.10.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
43
+ "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
44
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
45
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
46
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
47
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
48
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
49
+ "model.layers.11.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
50
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
51
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
52
+ "model.layers.11.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
53
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
54
+ "model.layers.11.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
55
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
56
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
57
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
58
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
59
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
60
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
61
+ "model.layers.12.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
62
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
63
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
64
+ "model.layers.12.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
65
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
66
+ "model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
67
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
68
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
69
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
70
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
71
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
72
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
73
+ "model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
74
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
75
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
76
+ "model.layers.13.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
77
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
78
+ "model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
79
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
80
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
81
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
82
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
83
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
84
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
85
+ "model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
86
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
87
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
88
+ "model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
89
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
90
+ "model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
91
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
92
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
93
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
94
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
95
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
96
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
97
+ "model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
98
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
99
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
100
+ "model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
101
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
102
+ "model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
103
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
104
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
105
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
106
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
107
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
108
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
109
+ "model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
110
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
111
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
112
+ "model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
113
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
114
+ "model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
115
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
116
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
117
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
118
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
119
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
120
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
121
+ "model.layers.17.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
122
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
123
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
124
+ "model.layers.17.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
125
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
126
+ "model.layers.17.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
127
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
128
+ "model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors",
129
+ "model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
130
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
131
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
132
+ "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
133
+ "model.layers.18.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
134
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
135
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
136
+ "model.layers.18.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
137
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
138
+ "model.layers.18.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
139
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
140
+ "model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
141
+ "model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
142
+ "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
143
+ "model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
144
+ "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
145
+ "model.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
146
+ "model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
147
+ "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
148
+ "model.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
149
+ "model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
150
+ "model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
151
+ "model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
152
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
153
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
154
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
155
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
156
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
157
+ "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
158
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
159
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
160
+ "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
161
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
162
+ "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
163
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
164
+ "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
165
+ "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
166
+ "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
167
+ "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
168
+ "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
169
+ "model.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
170
+ "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
171
+ "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
172
+ "model.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
173
+ "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
174
+ "model.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
175
+ "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
176
+ "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
177
+ "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
178
+ "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
179
+ "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
180
+ "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
181
+ "model.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
182
+ "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
183
+ "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
184
+ "model.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
185
+ "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
186
+ "model.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
187
+ "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
188
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
189
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
190
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
191
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
192
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
193
+ "model.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
194
+ "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
195
+ "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
196
+ "model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
197
+ "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
198
+ "model.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
199
+ "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
200
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
201
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
202
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
203
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
204
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
205
+ "model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
206
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
207
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
208
+ "model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
209
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
210
+ "model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
211
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
212
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
213
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
214
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
215
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
216
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
217
+ "model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
218
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
219
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
220
+ "model.layers.24.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
221
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
222
+ "model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
223
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
224
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
225
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
226
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
227
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
228
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
229
+ "model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
230
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
231
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
232
+ "model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
233
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
234
+ "model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
235
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
236
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
237
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
238
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
239
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
240
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
241
+ "model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
242
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
243
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
244
+ "model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
245
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
246
+ "model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
247
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
248
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
249
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
250
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
251
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
252
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
253
+ "model.layers.27.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
254
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
255
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
256
+ "model.layers.27.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
257
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
258
+ "model.layers.27.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
259
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
260
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
261
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
262
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
263
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
264
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
265
+ "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
266
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
267
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
268
+ "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
269
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
270
+ "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
271
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
272
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
273
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
274
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
275
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
276
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
277
+ "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
278
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
279
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
280
+ "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
281
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
282
+ "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
283
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
284
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
285
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
286
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
287
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
288
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
289
+ "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
290
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
291
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
292
+ "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
293
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
294
+ "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
295
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
296
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
297
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
298
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
299
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
300
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
301
+ "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
302
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
303
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
304
+ "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
305
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
306
+ "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
307
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
308
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
309
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
310
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
311
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
312
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
313
+ "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
314
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
315
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
316
+ "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
317
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
318
+ "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
319
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
320
+ "model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
321
+ "model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
322
+ "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
323
+ "model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
324
+ "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
325
+ "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
326
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
327
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
328
+ "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
329
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
330
+ "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
331
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
332
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
333
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
334
+ "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
335
+ "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
336
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
337
+ "model.layers.9.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
338
+ "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
339
+ "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
340
+ "model.layers.9.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
341
+ "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
342
+ "model.layers.9.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
343
+ "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
344
+ "model.norm.weight": "model-00003-of-00004.safetensors"
345
+ }
346
+ }
runs/Sep19_17-13-37_node1803/events.out.tfevents.1758316441.node1803.939342.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da1334be7669bb7582bf0cf1d5dd15b646bd1693505aa3419b94c9497a9e867b
3
+ size 1327067
special_tokens_map.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": "<|fim_pad|>"
25
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
tokenizer_config.json ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|im_end|>",
201
+ "errors": "replace",
202
+ "extra_special_tokens": {},
203
+ "model_max_length": 131072,
204
+ "pad_token": "<|fim_pad|>",
205
+ "split_special_tokens": false,
206
+ "tokenizer_class": "Qwen2Tokenizer",
207
+ "unk_token": null
208
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2be420de1f77067e60447c82f1dc47b174daa0d938a57517afb744395df404c
3
+ size 7096
vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
wandb/debug-internal.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-23T13:02:54.941896669-04:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2025-09-23T13:02:55.63168835-04:00","level":"INFO","msg":"stream: created new stream","id":"j5jpgoah"}
3
+ {"time":"2025-09-23T13:02:55.631723283-04:00","level":"INFO","msg":"stream: started","id":"j5jpgoah"}
4
+ {"time":"2025-09-23T13:02:55.63173562-04:00","level":"INFO","msg":"handler: started","stream_id":"j5jpgoah"}
5
+ {"time":"2025-09-23T13:02:55.631745751-04:00","level":"INFO","msg":"sender: started","stream_id":"j5jpgoah"}
6
+ {"time":"2025-09-23T13:02:55.631746765-04:00","level":"INFO","msg":"writer: Do: started","stream_id":"j5jpgoah"}
7
+ {"time":"2025-09-23T13:02:56.136638253-04:00","level":"WARN","msg":"handleCodeSave: program relative path is empty"}
8
+ {"time":"2025-09-23T13:02:56.136911276-04:00","level":"ERROR","msg":"git repo not found","error":"repository does not exist"}
9
+ {"time":"2025-09-23T13:03:13.563836157-04:00","level":"INFO","msg":"stream: closing","id":"j5jpgoah"}
10
+ {"time":"2025-09-23T13:03:14.427323175-04:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
11
+ {"time":"2025-09-23T13:03:14.567379829-04:00","level":"INFO","msg":"handler: closed","stream_id":"j5jpgoah"}
12
+ {"time":"2025-09-23T13:03:14.567413727-04:00","level":"INFO","msg":"writer: Close: closed","stream_id":"j5jpgoah"}
13
+ {"time":"2025-09-23T13:03:14.567456299-04:00","level":"INFO","msg":"sender: closed","stream_id":"j5jpgoah"}
14
+ {"time":"2025-09-23T13:03:14.668731827-04:00","level":"INFO","msg":"stream: closed","id":"j5jpgoah"}
wandb/debug.log ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-09-23 13:02:54,151 INFO MainThread:382253 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2025-09-23 13:02:54,152 INFO MainThread:382253 [wandb_setup.py:_flush():80] Configure stats pid to 382253
3
+ 2025-09-23 13:02:54,152 INFO MainThread:382253 [wandb_setup.py:_flush():80] Loading settings from /home/jdhwang/.config/wandb/settings
4
+ 2025-09-23 13:02:54,152 INFO MainThread:382253 [wandb_setup.py:_flush():80] Loading settings from /orcd/home/002/jdhwang/BRIDGE/wandb/settings
5
+ 2025-09-23 13:02:54,152 INFO MainThread:382253 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-09-23 13:02:54,152 INFO MainThread:382253 [wandb_setup.py:_flush():80] loaded run ID from ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/wandb-resume.json
7
+ 2025-09-23 13:02:54,152 INFO MainThread:382253 [wandb_init.py:setup_run_log_directory():703] Logging user logs to ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/run-20250923_130250-j5jpgoah/logs/debug.log
8
+ 2025-09-23 13:02:54,152 INFO MainThread:382253 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/run-20250923_130250-j5jpgoah/logs/debug-internal.log
9
+ 2025-09-23 13:02:54,152 INFO MainThread:382253 [wandb_init.py:init():830] calling init triggers
10
+ 2025-09-23 13:02:54,152 INFO MainThread:382253 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
11
+ config: {'_wandb': {}}
12
+ 2025-09-23 13:02:54,152 INFO MainThread:382253 [wandb_init.py:init():871] starting backend
13
+ 2025-09-23 13:02:54,499 INFO MainThread:382253 [wandb_init.py:init():874] sending inform_init request
14
+ 2025-09-23 13:02:54,503 INFO MainThread:382253 [wandb_init.py:init():882] backend started and connected
15
+ 2025-09-23 13:02:54,504 INFO MainThread:382253 [wandb_init.py:init():953] updated telemetry
16
+ 2025-09-23 13:02:54,665 INFO MainThread:382253 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
17
+ 2025-09-23 13:02:56,135 INFO MainThread:382253 [wandb_init.py:init():1024] run resumed
18
+ 2025-09-23 13:02:56,135 INFO MainThread:382253 [wandb_init.py:init():1029] starting run threads in backend
19
+ 2025-09-23 13:02:56,501 INFO MainThread:382253 [wandb_run.py:_console_start():2458] atexit reg
20
+ 2025-09-23 13:02:56,518 INFO MainThread:382253 [wandb_run.py:_redirect():2306] redirect: wrap_raw
21
+ 2025-09-23 13:02:56,518 INFO MainThread:382253 [wandb_run.py:_redirect():2375] Wrapping output streams.
22
+ 2025-09-23 13:02:56,518 INFO MainThread:382253 [wandb_run.py:_redirect():2398] Redirects installed.
23
+ 2025-09-23 13:02:56,521 INFO MainThread:382253 [wandb_init.py:init():1075] run started, returning control to user process
24
+ 2025-09-23 13:03:13,563 INFO MsgRouterThr:382253 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
wandb/run-20250920_081121-2oxex54w/files/config.yaml ADDED
@@ -0,0 +1,644 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.21.0
4
+ e:
5
+ bdsaggp24nt8kfc8qjgq21gi927g7e3o:
6
+ args:
7
+ - --model
8
+ - vllm
9
+ - --model_args
10
+ - pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000
11
+ - --tasks
12
+ - mgsm_native_cot_ja,mgsm_native_cot_en,mgsm_native_cot_th,mgsm_native_cot_es,mgsm_native_cot_bn,mgsm_native_cot_te,mgsm_native_cot_fr,mgsm_native_cot_zh,mgsm_native_cot_sw,mgsm_native_cot_de,mgsm_native_cot_ru
13
+ - --batch_size
14
+ - auto
15
+ - --apply_chat_template
16
+ - --output_path
17
+ - ckpts/rerun
18
+ - --log_samples
19
+ - --gen_kwargs
20
+ - max_gen_toks=20000
21
+ - --wandb_args=project=MRPO_eval,name=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dir=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,resume=auto
22
+ cpu_count: 64
23
+ cpu_count_logical: 128
24
+ cudaVersion: "12.4"
25
+ disk:
26
+ /:
27
+ total: "464506159104"
28
+ used: "12268101632"
29
30
+ executable: /orcd/home/001/jdhwang/.conda/envs/llm/bin/python
31
+ git:
32
+ commit: bb8b2be1f7420f9c6a3d65f0eaf3072732d73123
33
+ remote: [email protected]:jd730/BRIDGE-private.git
34
+ gpu: NVIDIA H100 80GB HBM3
35
+ gpu_count: 2
36
+ gpu_nvidia:
37
+ - architecture: Hopper
38
+ cudaCores: 16896
39
+ memoryTotal: "85520809984"
40
+ name: NVIDIA H100 80GB HBM3
41
+ uuid: GPU-9a8a7398-f810-a936-a36d-5b9c0b64a09b
42
+ - architecture: Hopper
43
+ cudaCores: 16896
44
+ memoryTotal: "85520809984"
45
+ name: NVIDIA H100 80GB HBM3
46
+ uuid: GPU-73b0efed-d92d-d0e8-3c5e-a3122d6e7c1b
47
+ host: node1803
48
+ memory:
49
+ total: "2163473002496"
50
+ os: Linux-4.18.0-372.9.1.el8.x86_64-x86_64-with-glibc2.28
51
+ program: /home/jdhwang/.conda/envs/llm/bin/lm_eval
52
+ python: CPython 3.11.11
53
+ root: ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309
54
+ slurm:
55
+ array_job_id: "4452191"
56
+ array_task_count: "1"
57
+ array_task_id: "0"
58
+ array_task_max: "0"
59
+ array_task_min: "0"
60
+ array_task_step: "1"
61
+ cluster_name: eofe7
62
+ conf: /etc/slurm/slurm.conf
63
+ cpus_on_node: "16"
64
+ cpus_per_task: "16"
65
+ gpus_on_node: "2"
66
+ gtids: "0"
67
+ job_account: mit_general
68
+ job_cpus_per_node: "16"
69
+ job_end_time: "1758413466"
70
+ job_gid: "209655"
71
+ job_gpus: 1,2
72
+ job_id: "4452191"
73
+ job_name: eval.sh
74
+ job_nodelist: node1803
75
+ job_num_nodes: "1"
76
+ job_partition: ou_bcs_normal
77
+ job_qos: normal
78
+ job_start_time: "1758370266"
79
+ job_uid: "209655"
80
+ job_user: jdhwang
81
+ jobid: "4452191"
82
+ localid: "0"
83
+ mem_per_node: "131072"
84
+ nnodes: "1"
85
+ nodeid: "0"
86
+ nodelist: node1803
87
+ nprocs: "1"
88
+ ntasks: "1"
89
+ ntasks_per_node: "1"
90
+ oom_kill_step: "0"
91
+ prio_process: "0"
92
+ procid: "0"
93
+ script_context: prolog_task
94
+ submit_dir: /orcd/home/002/jdhwang/BRIDGE
95
+ submit_host: orcd-login003.mit.edu
96
+ task_pid: "1143610"
97
+ tasks_per_node: "1"
98
+ topology_addr: node1803
99
+ topology_addr_pattern: node
100
+ tres_per_task: cpu=16
101
+ startedAt: "2025-09-20T12:11:21.301942Z"
102
+ writerId: bdsaggp24nt8kfc8qjgq21gi927g7e3o
103
+ m: []
104
+ python_version: 3.11.11
105
+ t:
106
+ "1":
107
+ - 1
108
+ - 5
109
+ - 11
110
+ - 30
111
+ - 41
112
+ - 49
113
+ - 51
114
+ - 53
115
+ - 71
116
+ - 95
117
+ - 98
118
+ - 100
119
+ - 105
120
+ "2":
121
+ - 1
122
+ - 5
123
+ - 11
124
+ - 30
125
+ - 41
126
+ - 49
127
+ - 51
128
+ - 53
129
+ - 71
130
+ - 95
131
+ - 98
132
+ - 100
133
+ - 105
134
+ "3":
135
+ - 2
136
+ - 13
137
+ - 62
138
+ "4": 3.11.11
139
+ "5": 0.21.0
140
+ "6": 4.51.3
141
+ "12": 0.21.0
142
+ "13": linux-x86_64
143
+ cli_configs:
144
+ value:
145
+ batch_size: auto
146
+ batch_sizes: []
147
+ bootstrap_iters: 0
148
+ device: null
149
+ fewshot_seed: 1234
150
+ gen_kwargs:
151
+ max_gen_toks: 20000
152
+ limit: null
153
+ model: vllm
154
+ model_args: pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000
155
+ numpy_seed: 1234
156
+ random_seed: 0
157
+ torch_seed: 1234
158
+ use_cache: null
159
+ task_configs:
160
+ value:
161
+ mgsm_native_cot_bn:
162
+ dataset_name: bn
163
+ dataset_path: juletxara/mgsm
164
+ description: ""
165
+ doc_to_target: '{% if answer is not none %}{{answer[17:]}}{% else %}{{answer_number|string}}{% endif %}'
166
+ doc_to_text: '{% if answer is not none %}{{question+"\nধাপে ধাপে উত্তর:"}}{% else %}{{"প্রশ্ন: "+question+"\nধাপে ধাপে উত্তর:"}}{% endif %}'
167
+ fewshot_delimiter: |4+
168
+
169
+ filter_list:
170
+ - filter:
171
+ - function: regex
172
+ regex_pattern: The answer is (\-?[0-9\.\,]+)
173
+ - function: take_first
174
+ name: strict-match
175
+ - filter:
176
+ - function: regex
177
+ group_select: -1
178
+ regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
179
+ - function: take_first
180
+ name: flexible-extract
181
+ generation_kwargs:
182
+ do_sample: false
183
+ max_gen_toks: 20000
184
+ until:
185
+ - 'প্রশ্ন:'
186
+ - </s>
187
+ - <|im_end|>
188
+ metadata:
189
+ version: 4
190
+ metric_list:
191
+ - aggregation: mean
192
+ higher_is_better: true
193
+ ignore_case: true
194
+ ignore_punctuation: true
195
+ metric: exact_match
196
+ num_fewshot: 0
197
+ output_type: generate_until
198
+ repeats: 1
199
+ should_decontaminate: false
200
+ tag: mgsm_cot_native
201
+ target_delimiter: ' '
202
+ task: mgsm_native_cot_bn
203
+ test_split: test
204
+ training_split: train
205
+ mgsm_native_cot_de:
206
+ dataset_name: de
207
+ dataset_path: juletxara/mgsm
208
+ description: ""
209
+ doc_to_target: '{% if answer is not none %}{{answer[29:]}}{% else %}{{answer_number|string}}{% endif %}'
210
+ doc_to_text: '{% if answer is not none %}{{question+"\nSchritt-für-Schritt-Antwort:"}}{% else %}{{"Frage: "+question+"\nSchritt-für-Schritt-Antwort:"}}{% endif %}'
211
+ fewshot_delimiter: |4+
212
+
213
+ filter_list:
214
+ - filter:
215
+ - function: regex
216
+ regex_pattern: Die Antwort lautet (\-?[0-9\.\,]+)
217
+ - function: take_first
218
+ name: strict-match
219
+ - filter:
220
+ - function: regex
221
+ group_select: -1
222
+ regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
223
+ - function: take_first
224
+ name: flexible-extract
225
+ generation_kwargs:
226
+ do_sample: false
227
+ max_gen_toks: 20000
228
+ until:
229
+ - 'Frage:'
230
+ - </s>
231
+ - <|im_end|>
232
+ metadata:
233
+ version: 4
234
+ metric_list:
235
+ - aggregation: mean
236
+ higher_is_better: true
237
+ ignore_case: true
238
+ ignore_punctuation: true
239
+ metric: exact_match
240
+ num_fewshot: 0
241
+ output_type: generate_until
242
+ repeats: 1
243
+ should_decontaminate: false
244
+ tag: mgsm_cot_native
245
+ target_delimiter: ' '
246
+ task: mgsm_native_cot_de
247
+ test_split: test
248
+ training_split: train
249
+ mgsm_native_cot_en:
250
+ dataset_name: en
251
+ dataset_path: juletxara/mgsm
252
+ description: ""
253
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
254
+ doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
255
+ fewshot_delimiter: |4+
256
+
257
+ filter_list:
258
+ - filter:
259
+ - function: regex
260
+ regex_pattern: The answer is (\-?[0-9\.\,]+)
261
+ - function: take_first
262
+ name: strict-match
263
+ - filter:
264
+ - function: regex
265
+ group_select: -1
266
+ regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
267
+ - function: take_first
268
+ name: flexible-extract
269
+ generation_kwargs:
270
+ do_sample: false
271
+ max_gen_toks: 20000
272
+ until:
273
+ - 'Question:'
274
+ - </s>
275
+ - <|im_end|>
276
+ metadata:
277
+ version: 4
278
+ metric_list:
279
+ - aggregation: mean
280
+ higher_is_better: true
281
+ ignore_case: true
282
+ ignore_punctuation: true
283
+ metric: exact_match
284
+ num_fewshot: 0
285
+ output_type: generate_until
286
+ repeats: 1
287
+ should_decontaminate: false
288
+ tag: mgsm_cot_native
289
+ target_delimiter: ' '
290
+ task: mgsm_native_cot_en
291
+ test_split: test
292
+ training_split: train
293
+ mgsm_native_cot_es:
294
+ dataset_name: es
295
+ dataset_path: juletxara/mgsm
296
+ description: ""
297
+ doc_to_target: '{% if answer is not none %}{{answer[23:]}}{% else %}{{answer_number|string}}{% endif %}'
298
+ doc_to_text: '{% if answer is not none %}{{question+"\nRespuesta paso a paso:"}}{% else %}{{"Pregunta: "+question+"\nRespuesta paso a paso:"}}{% endif %}'
299
+ fewshot_delimiter: |4+
300
+
301
+ filter_list:
302
+ - filter:
303
+ - function: regex
304
+ regex_pattern: La respuesta es (\-?[0-9\.\,]+)
305
+ - function: take_first
306
+ name: strict-match
307
+ - filter:
308
+ - function: regex
309
+ group_select: -1
310
+ regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
311
+ - function: take_first
312
+ name: flexible-extract
313
+ generation_kwargs:
314
+ do_sample: false
315
+ max_gen_toks: 20000
316
+ until:
317
+ - 'Pregunta:'
318
+ - </s>
319
+ - <|im_end|>
320
+ metadata:
321
+ version: 4
322
+ metric_list:
323
+ - aggregation: mean
324
+ higher_is_better: true
325
+ ignore_case: true
326
+ ignore_punctuation: true
327
+ metric: exact_match
328
+ num_fewshot: 0
329
+ output_type: generate_until
330
+ repeats: 1
331
+ should_decontaminate: false
332
+ tag: mgsm_cot_native
333
+ target_delimiter: ' '
334
+ task: mgsm_native_cot_es
335
+ test_split: test
336
+ training_split: train
337
+ mgsm_native_cot_fr:
338
+ dataset_name: fr
339
+ dataset_path: juletxara/mgsm
340
+ description: ""
341
+ doc_to_target: '{% if answer is not none %}{{answer[26:]}}{% else %}{{answer_number|string}}{% endif %}'
342
+ doc_to_text: '{% if answer is not none %}{{question+"\nRéponse étape par étape :"}}{% else %}{{"Question : "+question+"\nRéponse étape par étape :"}}{% endif %}'
343
+ fewshot_delimiter: |4+
344
+
345
+ filter_list:
346
+ - filter:
347
+ - function: regex
348
+ regex_pattern: La réponse est (\-?[0-9\.\,]+)
349
+ - function: take_first
350
+ name: strict-match
351
+ - filter:
352
+ - function: regex
353
+ group_select: -1
354
+ regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
355
+ - function: take_first
356
+ name: flexible-extract
357
+ generation_kwargs:
358
+ do_sample: false
359
+ max_gen_toks: 20000
360
+ until:
361
+ - 'Question :'
362
+ - </s>
363
+ - <|im_end|>
364
+ metadata:
365
+ version: 4
366
+ metric_list:
367
+ - aggregation: mean
368
+ higher_is_better: true
369
+ ignore_case: true
370
+ ignore_punctuation: true
371
+ metric: exact_match
372
+ num_fewshot: 0
373
+ output_type: generate_until
374
+ repeats: 1
375
+ should_decontaminate: false
376
+ tag: mgsm_cot_native
377
+ target_delimiter: ' '
378
+ task: mgsm_native_cot_fr
379
+ test_split: test
380
+ training_split: train
381
+ mgsm_native_cot_ja:
382
+ dataset_name: ja
383
+ dataset_path: juletxara/mgsm
384
+ description: ""
385
+ doc_to_target: '{% if answer is not none %}{{answer[11:]}}{% else %}{{answer_number|string}}{% endif %}'
386
+ doc_to_text: '{% if answer is not none %}{{question+"\nステップごとの答え:"}}{% else %}{{"問題: "+question+"\nステップごとの答え:"}}{% endif %}'
387
+ fewshot_delimiter: |4+
388
+
389
+ filter_list:
390
+ - filter:
391
+ - function: regex
392
+ regex_pattern: 答えは(\-?[0-9\.\,]+)です。
393
+ - function: take_first
394
+ name: strict-match
395
+ - filter:
396
+ - function: regex
397
+ group_select: -1
398
+ regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
399
+ - function: take_first
400
+ name: flexible-extract
401
+ generation_kwargs:
402
+ do_sample: false
403
+ max_gen_toks: 20000
404
+ until:
405
+ - 問題:
406
+ - </s>
407
+ - <|im_end|>
408
+ metadata:
409
+ version: 4
410
+ metric_list:
411
+ - aggregation: mean
412
+ higher_is_better: true
413
+ ignore_case: true
414
+ ignore_punctuation: true
415
+ metric: exact_match
416
+ num_fewshot: 0
417
+ output_type: generate_until
418
+ repeats: 1
419
+ should_decontaminate: false
420
+ tag: mgsm_cot_native
421
+ target_delimiter: ' '
422
+ task: mgsm_native_cot_ja
423
+ test_split: test
424
+ training_split: train
425
+ mgsm_native_cot_ru:
426
+ dataset_name: ru
427
+ dataset_path: juletxara/mgsm
428
+ description: ""
429
+ doc_to_target: '{% if answer is not none %}{{answer[18:]}}{% else %}{{answer_number|string}}{% endif %}'
430
+ doc_to_text: '{% if answer is not none %}{{question+"\nПошаговоерешение:"}}{% else %}{{"Задача: "+question+"\nПошаговоерешение:"}}{% endif %}'
431
+ fewshot_delimiter: |4+
432
+
433
+ filter_list:
434
+ - filter:
435
+ - function: regex
436
+ regex_pattern: Ответ — (\-?[0-9\.\,]+)
437
+ - function: take_first
438
+ name: strict-match
439
+ - filter:
440
+ - function: regex
441
+ group_select: -1
442
+ regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
443
+ - function: take_first
444
+ name: flexible-extract
445
+ generation_kwargs:
446
+ do_sample: false
447
+ max_gen_toks: 20000
448
+ until:
449
+ - 'Задача:'
450
+ - </s>
451
+ - <|im_end|>
452
+ metadata:
453
+ version: 4
454
+ metric_list:
455
+ - aggregation: mean
456
+ higher_is_better: true
457
+ ignore_case: true
458
+ ignore_punctuation: true
459
+ metric: exact_match
460
+ num_fewshot: 0
461
+ output_type: generate_until
462
+ repeats: 1
463
+ should_decontaminate: false
464
+ tag: mgsm_cot_native
465
+ target_delimiter: ' '
466
+ task: mgsm_native_cot_ru
467
+ test_split: test
468
+ training_split: train
469
+ mgsm_native_cot_sw:
470
+ dataset_name: sw
471
+ dataset_path: juletxara/mgsm
472
+ description: ""
473
+ doc_to_target: '{% if answer is not none %}{{answer[25:]}}{% else %}{{answer_number|string}}{% endif %}'
474
+ doc_to_text: '{% if answer is not none %}{{question+"\nJibu la Hatua kwa Hatua:"}}{% else %}{{"Swali: "+question+"\nJibu la Hatua kwa Hatua:"}}{% endif %}'
475
+ fewshot_delimiter: |4+
476
+
477
+ filter_list:
478
+ - filter:
479
+ - function: regex
480
+ regex_pattern: Jibu ni (\-?[0-9\.\,]+)
481
+ - function: take_first
482
+ name: strict-match
483
+ - filter:
484
+ - function: regex
485
+ group_select: -1
486
+ regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
487
+ - function: take_first
488
+ name: flexible-extract
489
+ generation_kwargs:
490
+ do_sample: false
491
+ max_gen_toks: 20000
492
+ until:
493
+ - 'Swali:'
494
+ - </s>
495
+ - <|im_end|>
496
+ metadata:
497
+ version: 4
498
+ metric_list:
499
+ - aggregation: mean
500
+ higher_is_better: true
501
+ ignore_case: true
502
+ ignore_punctuation: true
503
+ metric: exact_match
504
+ num_fewshot: 0
505
+ output_type: generate_until
506
+ repeats: 1
507
+ should_decontaminate: false
508
+ tag: mgsm_cot_native
509
+ target_delimiter: ' '
510
+ task: mgsm_native_cot_sw
511
+ test_split: test
512
+ training_split: train
513
+ mgsm_native_cot_te:
514
+ dataset_name: te
515
+ dataset_path: juletxara/mgsm
516
+ description: ""
517
+ doc_to_target: '{% if answer is not none %}{{answer[19:]}}{% else %}{{answer_number|string}}{% endif %}'
518
+ doc_to_text: '{% if answer is not none %}{{question+"\nదశలవారీగా సమాధానం:"}}{% else %}{{"ప్రశ్న: "+question+"\nదశలవారీగా సమాధానం:"}}{% endif %}'
519
+ fewshot_delimiter: |4+
520
+
521
+ filter_list:
522
+ - filter:
523
+ - function: regex
524
+ regex_pattern: సమాధానం (\-?[0-9\.\,]+)
525
+ - function: take_first
526
+ name: strict-match
527
+ - filter:
528
+ - function: regex
529
+ group_select: -1
530
+ regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
531
+ - function: take_first
532
+ name: flexible-extract
533
+ generation_kwargs:
534
+ do_sample: false
535
+ max_gen_toks: 20000
536
+ until:
537
+ - 'ప్రశ్న:'
538
+ - </s>
539
+ - <|im_end|>
540
+ metadata:
541
+ version: 4
542
+ metric_list:
543
+ - aggregation: mean
544
+ higher_is_better: true
545
+ ignore_case: true
546
+ ignore_punctuation: true
547
+ metric: exact_match
548
+ num_fewshot: 0
549
+ output_type: generate_until
550
+ repeats: 1
551
+ should_decontaminate: false
552
+ tag: mgsm_cot_native
553
+ target_delimiter: ' '
554
+ task: mgsm_native_cot_te
555
+ test_split: test
556
+ training_split: train
557
+ mgsm_native_cot_th:
558
+ dataset_name: th
559
+ dataset_path: juletxara/mgsm
560
+ description: ""
561
+ doc_to_target: '{% if answer is not none %}{{answer[18:]}}{% else %}{{answer_number|string}}{% endif %}'
562
+ doc_to_text: '{% if answer is not none %}{{question+"\nคำตอบทีละขั้นตอน:"}}{% else %}{{"โจทย์: "+question+"\nคำตอบทีละขั้นตอน:"}}{% endif %}'
563
+ fewshot_delimiter: |4+
564
+
565
+ filter_list:
566
+ - filter:
567
+ - function: regex
568
+ regex_pattern: คำตอบคือ (\-?[0-9\.\,]+)
569
+ - function: take_first
570
+ name: strict-match
571
+ - filter:
572
+ - function: regex
573
+ group_select: -1
574
+ regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
575
+ - function: take_first
576
+ name: flexible-extract
577
+ generation_kwargs:
578
+ do_sample: false
579
+ max_gen_toks: 20000
580
+ until:
581
+ - 'โจทย์:'
582
+ - </s>
583
+ - <|im_end|>
584
+ metadata:
585
+ version: 4
586
+ metric_list:
587
+ - aggregation: mean
588
+ higher_is_better: true
589
+ ignore_case: true
590
+ ignore_punctuation: true
591
+ metric: exact_match
592
+ num_fewshot: 0
593
+ output_type: generate_until
594
+ repeats: 1
595
+ should_decontaminate: false
596
+ tag: mgsm_cot_native
597
+ target_delimiter: ' '
598
+ task: mgsm_native_cot_th
599
+ test_split: test
600
+ training_split: train
601
+ mgsm_native_cot_zh:
602
+ dataset_name: zh
603
+ dataset_path: juletxara/mgsm
604
+ description: ""
605
+ doc_to_target: '{% if answer is not none %}{{answer[6:]}}{% else %}{{answer_number|string}}{% endif %}'
606
+ doc_to_text: '{% if answer is not none %}{{question+"\n逐步解答:"}}{% else %}{{"问题: "+question+"\n逐步解答:"}}{% endif %}'
607
+ fewshot_delimiter: |4+
608
+
609
+ filter_list:
610
+ - filter:
611
+ - function: regex
612
+ regex_pattern: 答案是 (\-?[0-9\.\,]+)。
613
+ - function: take_first
614
+ name: strict-match
615
+ - filter:
616
+ - function: regex
617
+ group_select: -1
618
+ regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
619
+ - function: take_first
620
+ name: flexible-extract
621
+ generation_kwargs:
622
+ do_sample: false
623
+ max_gen_toks: 20000
624
+ until:
625
+ - 问题:
626
+ - </s>
627
+ - <|im_end|>
628
+ metadata:
629
+ version: 4
630
+ metric_list:
631
+ - aggregation: mean
632
+ higher_is_better: true
633
+ ignore_case: true
634
+ ignore_punctuation: true
635
+ metric: exact_match
636
+ num_fewshot: 0
637
+ output_type: generate_until
638
+ repeats: 1
639
+ should_decontaminate: false
640
+ tag: mgsm_cot_native
641
+ target_delimiter: ' '
642
+ task: mgsm_native_cot_zh
643
+ test_split: test
644
+ training_split: train
wandb/run-20250920_081121-2oxex54w/files/media/table/evaluation/eval_results_1_817b26b9b7489391f4e7.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["Tasks", "Version", "Filter", "num_fewshot", "Metric", "Value", "Stderr"], "data": [["mgsm_native_cot_bn", 4.0, "strict-match", 0, "exact_match", "0.0", "N/A"], ["mgsm_native_cot_bn", 4.0, "flexible-extract", 0, "exact_match", "0.528", "N/A"], ["mgsm_native_cot_de", 4.0, "strict-match", 0, "exact_match", "0.0", "N/A"], ["mgsm_native_cot_de", 4.0, "flexible-extract", 0, "exact_match", "0.74", "N/A"], ["mgsm_native_cot_en", 4.0, "strict-match", 0, "exact_match", "0.0", "N/A"], ["mgsm_native_cot_en", 4.0, "flexible-extract", 0, "exact_match", "0.872", "N/A"], ["mgsm_native_cot_es", 4.0, "strict-match", 0, "exact_match", "0.008", "N/A"], ["mgsm_native_cot_es", 4.0, "flexible-extract", 0, "exact_match", "0.78", "N/A"], ["mgsm_native_cot_fr", 4.0, "strict-match", 0, "exact_match", "0.004", "N/A"], ["mgsm_native_cot_fr", 4.0, "flexible-extract", 0, "exact_match", "0.76", "N/A"], ["mgsm_native_cot_ja", 4.0, "strict-match", 0, "exact_match", "0.008", "N/A"], ["mgsm_native_cot_ja", 4.0, "flexible-extract", 0, "exact_match", "0.644", "N/A"], ["mgsm_native_cot_ru", 4.0, "strict-match", 0, "exact_match", "0.004", "N/A"], ["mgsm_native_cot_ru", 4.0, "flexible-extract", 0, "exact_match", "0.832", "N/A"], ["mgsm_native_cot_sw", 4.0, "strict-match", 0, "exact_match", "0.0", "N/A"], ["mgsm_native_cot_sw", 4.0, "flexible-extract", 0, "exact_match", "0.008", "N/A"], ["mgsm_native_cot_te", 4.0, "strict-match", 0, "exact_match", "0.02", "N/A"], ["mgsm_native_cot_te", 4.0, "flexible-extract", 0, "exact_match", "0.12", "N/A"], ["mgsm_native_cot_th", 4.0, "strict-match", 0, "exact_match", "0.26", "N/A"], ["mgsm_native_cot_th", 4.0, "flexible-extract", 0, "exact_match", "0.652", "N/A"], ["mgsm_native_cot_zh", 4.0, "strict-match", 0, "exact_match", "0.0", "N/A"], ["mgsm_native_cot_zh", 4.0, "flexible-extract", 0, "exact_match", "0.784", "N/A"]]}
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_bn_eval_results_2_f5e72dc1f8666e2a7b2d.table.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5e72dc1f8666e2a7b2dd420223eda667dbec11d2c10570f946fcc38e857476a
3
+ size 16518181
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_de_eval_results_3_fabaf33255f24add59c1.table.json ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_en_eval_results_4_82791ccc59d2c61a093c.table.json ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_es_eval_results_5_db5ed66b097e218c4da6.table.json ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_fr_eval_results_6_214cb1a5a2cdee9f330e.table.json ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_ja_eval_results_7_098df869a17abf277e8c.table.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:098df869a17abf277e8cf3b5d4966de124ba0fed82b267159ef0c1d32c7f38a2
3
+ size 19900362
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_ru_eval_results_8_19e62e412810312c2375.table.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19e62e412810312c2375525d388b308579f502daf4537e08ea3e97b90b3ef016
3
+ size 13109180
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_sw_eval_results_9_d79a57dcd801f08109f8.table.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d79a57dcd801f08109f8b083cce4884bf51b225d8127bc78429cf19d3df17aad
3
+ size 22151199
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_te_eval_results_10_c0fc0c81b8662e04cc0e.table.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0fc0c81b8662e04cc0e8fc30ac6053adcf13bd384f959eee54fbeb5039dbc01
3
+ size 23468672
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_th_eval_results_11_baa86a95f601e3c721a0.table.json ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_zh_eval_results_12_2882185f7e816bbcc2fe.table.json ADDED
The diff for this file is too large to render. See raw diff
 
wandb/run-20250920_081121-2oxex54w/files/output.log ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-09-20:08:11:22,626 INFO [__main__.py:291] Verbosity set to INFO
2
+ 2025-09-20:08:11:57,895 INFO [__init__.py:464] The tag pawsx is already registered as a group, this tag will not be registered. This may affect tasks you want to call.
3
+ 2025-09-20:08:11:59,107 INFO [__main__.py:388] Selected Tasks: ['mgsm_native_cot_bn', 'mgsm_native_cot_de', 'mgsm_native_cot_en', 'mgsm_native_cot_es', 'mgsm_native_cot_fr', 'mgsm_native_cot_ja', 'mgsm_native_cot_ru', 'mgsm_native_cot_sw', 'mgsm_native_cot_te', 'mgsm_native_cot_th', 'mgsm_native_cot_zh']
4
+ 2025-09-20:08:11:59,119 INFO [evaluator.py:161] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234
5
+ 2025-09-20:08:11:59,119 WARNING [evaluator.py:172] generation_kwargs specified through cli, these settings will update set parameters in yaml tasks. Ensure 'do_sample=True' for non-greedy decoding!
6
+ 2025-09-20:08:11:59,119 INFO [evaluator.py:198] Initializing vllm model, with arguments: {'pretrained': 'ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309', 'dtype': 'auto', 'tensor_parallel_size': 2, 'max_model_len': 20000}
7
+ INFO 09-20 08:12:05 config.py:350] This model supports multiple tasks: {'embedding', 'generate'}. Defaulting to 'generate'.
8
+ INFO 09-20 08:12:05 config.py:1020] Defaulting to use mp for distributed inference
9
+ INFO 09-20 08:12:05 llm_engine.py:249] Initializing an LLM engine (v0.6.4.post1) with config: model='ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309', speculative_config=None, tokenizer='ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=20000, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=2, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=1234, served_model_name=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309, num_scheduler_steps=1, chunked_prefill_enabled=False multi_step_stream_outputs=True, enable_prefix_caching=False, use_async_output_proc=True, use_cached_outputs=False, chat_template_text_format=string, mm_processor_kwargs=None, pooler_config=None)
10
+ WARNING 09-20 08:12:06 multiproc_gpu_executor.py:130] CUDA was previously initialized. We must use the `spawn` multiprocessing start method. Setting VLLM_WORKER_MULTIPROC_METHOD to 'spawn'.
11
+ WARNING 09-20 08:12:06 multiproc_gpu_executor.py:56] Reducing Torch parallelism from 16 threads to 1 to avoid unnecessary CPU contention. Set OMP_NUM_THREADS in the external environment to tune this value as needed.
12
+ INFO 09-20 08:12:06 custom_cache_manager.py:17] Setting Triton cache manager to: vllm.triton_utils.custom_cache_manager:CustomCacheManager
13
+ INFO 09-20 08:12:06 selector.py:135] Using Flash Attention backend.
14
+ INFO 09-20 08:12:14 utils.py:961] Found nccl from library libnccl.so.2
15
+ INFO 09-20 08:12:14 pynccl.py:69] vLLM is using nccl==2.21.5
16
+ INFO 09-20 08:12:15 custom_all_reduce_utils.py:242] reading GPU P2P access cache from /home/jdhwang/.cache/vllm/gpu_p2p_access_cache_for_0,1.json
17
+ INFO 09-20 08:12:15 shm_broadcast.py:236] vLLM message queue communication handle: Handle(connect_ip='127.0.0.1', local_reader_ranks=[1], buffer=<vllm.distributed.device_communicators.shm_broadcast.ShmRingBuffer object at 0x145452a92810>, local_subscribe_port=53811, remote_subscribe_port=None)
18
+ INFO 09-20 08:12:15 model_runner.py:1072] Starting to load model ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309...
19
+ Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
20
+ Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:43<02:11, 43.77s/it]
21
+ Loading safetensors checkpoint shards: 50% Completed | 2/4 [01:16<01:14, 37.12s/it]
22
+ Loading safetensors checkpoint shards: 75% Completed | 3/4 [01:49<00:35, 35.23s/it]
23
+ Loading safetensors checkpoint shards: 100% Completed | 4/4 [02:44<00:00, 43.11s/it]
24
+ Loading safetensors checkpoint shards: 100% Completed | 4/4 [02:44<00:00, 41.10s/it]
25
+ INFO 09-20 08:15:00 model_runner.py:1077] Loading model weights took 7.1216 GB
26
+ INFO 09-20 08:15:02 worker.py:232] Memory profiling results: total_gpu_memory=79.10GiB initial_memory_usage=8.22GiB peak_torch_memory=8.75GiB memory_usage_post_profile=8.60GiB non_torch_memory=1.44GiB kv_cache_size=60.99GiB gpu_memory_utilization=0.90
27
+ INFO 09-20 08:15:02 distributed_gpu_executor.py:57] # GPU blocks: 142750, # CPU blocks: 9362
28
+ INFO 09-20 08:15:02 distributed_gpu_executor.py:61] Maximum concurrency for 20000 tokens per request: 114.20x
29
+ INFO 09-20 08:15:05 model_runner.py:1400] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
30
+ INFO 09-20 08:15:05 model_runner.py:1404] If out-of-memory error occurs during cudagraph capture, consider decreasing `gpu_memory_utilization` or switching to eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.
31
+ INFO 09-20 08:15:20 custom_all_reduce.py:224] Registering 1995 cuda graph addresses
32
+ INFO 09-20 08:15:20 model_runner.py:1518] Graph capturing finished in 16 secs, took 0.25 GiB
33
+
34
+ 2025-09-20:08:15:28,125 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
35
+ 2025-09-20:08:15:28,126 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
36
+ 2025-09-20:08:15:28,126 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
37
+ 2025-09-20:08:15:28,126 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
38
+ 2025-09-20:08:15:28,126 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
39
+ 2025-09-20:08:15:28,126 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
40
+ 2025-09-20:08:15:28,126 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
41
+ 2025-09-20:08:15:28,126 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
42
+ 2025-09-20:08:15:28,126 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
43
+ 2025-09-20:08:15:28,126 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
44
+ 2025-09-20:08:15:28,126 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
45
+ 2025-09-20:08:15:28,127 INFO [task.py:430] Building contexts for mgsm_native_cot_zh on rank 0...
46
+ 100%|██████████| 250/250 [00:00<00:00, 1459.14it/s]
47
+ 2025-09-20:08:15:28,305 INFO [task.py:430] Building contexts for mgsm_native_cot_th on rank 0...
48
+ 100%|██████████| 250/250 [00:00<00:00, 1507.22it/s]
49
+ 2025-09-20:08:15:28,476 INFO [task.py:430] Building contexts for mgsm_native_cot_te on rank 0...
50
+ 100%|██████████| 250/250 [00:00<00:00, 1510.62it/s]
51
+ 2025-09-20:08:15:28,647 INFO [task.py:430] Building contexts for mgsm_native_cot_sw on rank 0...
52
+ 100%|██████████| 250/250 [00:00<00:00, 1538.05it/s]
53
+ 2025-09-20:08:15:28,816 INFO [task.py:430] Building contexts for mgsm_native_cot_ru on rank 0...
54
+ 100%|██████████| 250/250 [00:00<00:00, 1407.80it/s]
55
+ 2025-09-20:08:15:28,999 INFO [task.py:430] Building contexts for mgsm_native_cot_ja on rank 0...
56
+ 100%|██████████| 250/250 [00:00<00:00, 1480.79it/s]
57
+ 2025-09-20:08:15:29,174 INFO [task.py:430] Building contexts for mgsm_native_cot_fr on rank 0...
58
+ 100%|██████████| 250/250 [00:00<00:00, 1487.75it/s]
59
+ 2025-09-20:08:15:29,353 INFO [task.py:430] Building contexts for mgsm_native_cot_es on rank 0...
60
+ 100%|██████████| 250/250 [00:00<00:00, 1544.75it/s]
61
+ 2025-09-20:08:15:29,520 INFO [task.py:430] Building contexts for mgsm_native_cot_en on rank 0...
62
+ 100%|██████████| 250/250 [00:00<00:00, 1546.41it/s]
63
+ 2025-09-20:08:15:29,687 INFO [task.py:430] Building contexts for mgsm_native_cot_de on rank 0...
64
+ 100%|██████████| 250/250 [00:00<00:00, 1523.06it/s]
65
+ 2025-09-20:08:15:29,857 INFO [task.py:430] Building contexts for mgsm_native_cot_bn on rank 0...
66
+ 100%|██████████| 250/250 [00:00<00:00, 1512.86it/s]
67
+ 2025-09-20:08:15:30,028 INFO [evaluator.py:495] Running generate_until requests
68
+ Processed prompts: 100%|██████████| 250/250 [03:23<00:00, 1.23it/s, est. speed input: 124.59 toks/s, output: 2248.44 toks/s]
69
+ final processing: 250it [00:00, 298229.81it/s] | 1/2750 [03:23<155:02:20, 203.03s/it] 124.59 toks/s, output: 2248.44 toks/s]
70
+ Processed prompts: 100%|██████████| 250/250 [02:56<00:00, 1.42it/s, est. speed input: 247.19 toks/s, output: 1630.51 toks/s]
71
+ final processing: 250it [00:00, 286888.10it/s] | 251/2750 [06:19<52:55, 1.27s/it] 247.19 toks/s, output: 1630.51 toks/s]
72
+ Processed prompts: 100%|██████████| 250/250 [14:58<00:00, 3.59s/it, est. speed input: 125.62 toks/s, output: 3596.00 toks/s]
73
+ final processing: 250it [00:00, 337923.30it/s] | 501/2750 [21:17<1:38:47, 2.64s/it]: 125.62 toks/s, output: 3596.00 toks/s]
74
+ WARNING 09-20 08:30:23 scheduler.py:1481] Sequence group 749 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=1
75
+ Processed prompts: 100%|██████████| 250/250 [27:49<00:00, 6.68s/it, est. speed input: 22.15 toks/s, output: 2927.12 toks/s]
76
+ final processing: 250it [00:00, 259163.62it/s] | 751/2750 [49:07<2:29:15, 4.48s/it]: 22.15 toks/s, output: 2927.12 toks/s]
77
+ WARNING 09-20 08:43:05 scheduler.py:1481] Sequence group 987 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=51
78
+ WARNING 09-20 08:46:25 scheduler.py:1481] Sequence group 936 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=101
79
+ WARNING 09-20 08:52:13 scheduler.py:1481] Sequence group 885 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=151
80
+ Processed prompts: 100%|██████████| 250/250 [03:26<00:00, 1.21it/s, est. speed input: 160.95 toks/s, output: 2481.49 toks/s]
81
+ final processing: 250it [00:00, 271440.85it/s] | 1001/2750 [52:34<1:28:34, 3.04s/it] 160.39 toks/s, output: 2385.40 toks/s]
82
+ Processed prompts: 100%|██████████| 250/250 [06:36<00:00, 1.58s/it, est. speed input: 75.03 toks/s, output: 3118.80 toks/s]
83
+ final processing: 250it [00:00, 440393.11it/s] | 1251/2750 [59:10<1:02:49, 2.51s/it] 74.36 toks/s, output: 2968.35 toks/s]
84
+ Processed prompts: 100%|██████████| 250/250 [03:18<00:00, 1.26it/s, est. speed input: 147.77 toks/s, output: 2350.89 toks/s]
85
+ final processing: 250it [00:00, 228846.79it/s] | 1501/2750 [1:02:28<40:09, 1.93s/it] 147.77 toks/s, output: 2350.89 toks/s]
86
+ Processed prompts: 100%|██████████| 250/250 [06:00<00:00, 1.44s/it, est. speed input: 80.18 toks/s, output: 3303.78 toks/s]
87
+ final processing: 250it [00:00, 304730.02it/s] | 1751/2750 [1:08:28<29:28, 1.77s/it] 79.75 toks/s, output: 3193.63 toks/s]
88
+ Processed prompts: 100%|██████████| 250/250 [03:54<00:00, 1.07it/s, est. speed input: 104.28 toks/s, output: 3089.34 toks/s]
89
+ final processing: 250it [00:00, 289422.03it/s] | 2001/2750 [1:12:23<18:47, 1.51s/it] 103.64 toks/s, output: 2920.65 toks/s]
90
+ Processed prompts: 100%|██████████| 250/250 [03:22<00:00, 1.23it/s, est. speed input: 153.49 toks/s, output: 2475.48 toks/s]
91
+ final processing: 250it [00:00, 379094.72it/s]▏ | 2251/2750 [1:15:46<10:42, 1.29s/it] 153.16 toks/s, output: 2378.31 toks/s]
92
+ Processed prompts: 100%|██████████| 250/250 [07:54<00:00, 1.90s/it, est. speed input: 163.92 toks/s, output: 3619.06 toks/s]
93
+ final processing: 250it [00:00, 384093.77it/s]█ | 2501/2750 [1:23:40<06:07, 1.48s/it] 163.92 toks/s, output: 3619.06 toks/s]
94
+ Running generate_until requests: 100%|██████████| 2750/2750 [1:23:40<00:00, 1.83s/it]
95
+ INFO 09-20 09:39:27 multiproc_worker_utils.py:133] Terminating local vLLM worker processes
96
+ wandb: WARNING Serializing object of type str that is 135596 bytes
97
+ wandb: WARNING Serializing object of type str that is 106518 bytes
98
+ wandb: WARNING Serializing object of type str that is 104812 bytes
99
+ wandb: WARNING Serializing object of type str that is 110652 bytes
100
+ wandb: WARNING Serializing object of type str that is 113566 bytes
101
+ wandb: WARNING Serializing object of type str that is 115434 bytes
102
+ wandb: WARNING Serializing object of type str that is 135596 bytes
103
+ wandb: WARNING Serializing object of type str that is 106518 bytes
104
+ wandb: WARNING Serializing object of type str that is 104812 bytes
105
+ wandb: WARNING Serializing object of type str that is 110652 bytes
106
+ wandb: WARNING Serializing object of type str that is 113566 bytes
107
+ wandb: WARNING Serializing object of type str that is 115434 bytes
108
+ wandb: WARNING Serializing object of type str that is 133206 bytes
109
+ wandb: WARNING Serializing object of type str that is 145832 bytes
110
+ wandb: WARNING Serializing object of type str that is 144918 bytes
111
+ wandb: WARNING Serializing object of type str that is 127702 bytes
112
+ wandb: WARNING Serializing object of type str that is 111602 bytes
113
+ wandb: WARNING Serializing object of type str that is 168368 bytes
114
+ wandb: WARNING Serializing object of type str that is 149048 bytes
115
+ wandb: WARNING Serializing object of type str that is 147662 bytes
116
+ wandb: WARNING Serializing object of type str that is 136456 bytes
117
+ wandb: WARNING Serializing object of type str that is 128568 bytes
118
+ wandb: WARNING Serializing object of type str that is 133206 bytes
119
+ wandb: WARNING Serializing object of type str that is 145832 bytes
120
+ wandb: WARNING Serializing object of type str that is 144918 bytes
121
+ wandb: WARNING Serializing object of type str that is 127702 bytes
122
+ wandb: WARNING Serializing object of type str that is 111602 bytes
123
+ wandb: WARNING Serializing object of type str that is 168368 bytes
124
+ wandb: WARNING Serializing object of type str that is 149048 bytes
125
+ wandb: WARNING Serializing object of type str that is 147662 bytes
126
+ wandb: WARNING Serializing object of type str that is 136456 bytes
127
+ wandb: WARNING Serializing object of type str that is 128568 bytes
128
+ wandb: WARNING Serializing object of type str that is 111938 bytes
129
+ wandb: WARNING Serializing object of type str that is 114530 bytes
130
+ wandb: WARNING Serializing object of type str that is 141662 bytes
131
+ wandb: WARNING Serializing object of type str that is 139800 bytes
132
+ wandb: WARNING Serializing object of type str that is 118212 bytes
133
+ wandb: WARNING Serializing object of type str that is 104678 bytes
134
+ wandb: WARNING Serializing object of type str that is 140896 bytes
135
+ wandb: WARNING Serializing object of type str that is 130060 bytes
136
+ wandb: WARNING Serializing object of type str that is 129634 bytes
137
+ wandb: WARNING Serializing object of type str that is 124454 bytes
138
+ wandb: WARNING Serializing object of type str that is 111938 bytes
139
+ wandb: WARNING Serializing object of type str that is 114530 bytes
140
+ wandb: WARNING Serializing object of type str that is 141662 bytes
141
+ wandb: WARNING Serializing object of type str that is 139800 bytes
142
+ wandb: WARNING Serializing object of type str that is 118212 bytes
143
+ wandb: WARNING Serializing object of type str that is 104678 bytes
144
+ wandb: WARNING Serializing object of type str that is 140896 bytes
145
+ wandb: WARNING Serializing object of type str that is 130060 bytes
146
+ wandb: WARNING Serializing object of type str that is 129634 bytes
147
+ wandb: WARNING Serializing object of type str that is 124454 bytes
148
+ wandb: WARNING Serializing object of type str that is 109374 bytes
149
+ wandb: WARNING Serializing object of type str that is 121236 bytes
150
+ wandb: WARNING Serializing object of type str that is 113208 bytes
151
+ wandb: WARNING Serializing object of type str that is 109374 bytes
152
+ wandb: WARNING Serializing object of type str that is 121236 bytes
153
+ wandb: WARNING Serializing object of type str that is 113208 bytes
154
+ wandb: WARNING Serializing object of type str that is 142386 bytes
155
+ wandb: WARNING Serializing object of type str that is 110008 bytes
156
+ wandb: WARNING Serializing object of type str that is 113878 bytes
157
+ wandb: WARNING Serializing object of type str that is 105530 bytes
158
+ wandb: WARNING Serializing object of type str that is 122158 bytes
159
+ wandb: WARNING Serializing object of type str that is 108502 bytes
160
+ wandb: WARNING Serializing object of type str that is 113968 bytes
161
+ wandb: WARNING Serializing object of type str that is 107180 bytes
162
+ wandb: WARNING Serializing object of type str that is 121728 bytes
163
+ wandb: WARNING Serializing object of type str that is 106106 bytes
164
+ wandb: WARNING Serializing object of type str that is 117514 bytes
165
+ wandb: WARNING Serializing object of type str that is 142386 bytes
166
+ wandb: WARNING Serializing object of type str that is 110008 bytes
167
+ wandb: WARNING Serializing object of type str that is 113878 bytes
168
+ wandb: WARNING Serializing object of type str that is 105530 bytes
169
+ wandb: WARNING Serializing object of type str that is 122158 bytes
170
+ wandb: WARNING Serializing object of type str that is 108502 bytes
171
+ wandb: WARNING Serializing object of type str that is 113968 bytes
172
+ wandb: WARNING Serializing object of type str that is 107180 bytes
173
+ wandb: WARNING Serializing object of type str that is 121728 bytes
174
+ wandb: WARNING Serializing object of type str that is 106106 bytes
175
+ wandb: WARNING Serializing object of type str that is 117514 bytes
176
+ 2025-09-20:09:39:36,339 INFO [evaluation_tracker.py:207] Saving results aggregated
177
+ 2025-09-20:09:39:36,444 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_bn
178
+ 2025-09-20:09:39:36,612 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_de
179
+ 2025-09-20:09:39:36,770 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_en
180
+ 2025-09-20:09:39:36,926 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_es
181
+ 2025-09-20:09:39:37,090 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_fr
182
+ 2025-09-20:09:39:37,237 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_ja
183
+ 2025-09-20:09:39:37,383 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_ru
184
+ 2025-09-20:09:39:37,536 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_sw
185
+ 2025-09-20:09:39:37,773 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_te
186
+ 2025-09-20:09:39:37,940 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_th
187
+ 2025-09-20:09:39:38,087 INFO [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_zh
188
+ vllm (pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000), gen_kwargs: (max_gen_toks=20000), limit: None, num_fewshot: None, batch_size: auto
189
+ | Tasks |Version| Filter |n-shot| Metric | |Value| |Stderr|
190
+ |------------------|------:|----------------|-----:|-----------|---|----:|---|------|
191
+ |mgsm_native_cot_bn| 4|flexible-extract| 0|exact_match|↑ |0.528|± | N/A|
192
+ | | |strict-match | 0|exact_match|↑ |0.000|± | N/A|
193
+ |mgsm_native_cot_de| 4|flexible-extract| 0|exact_match|↑ |0.740|± | N/A|
194
+ | | |strict-match | 0|exact_match|↑ |0.000|± | N/A|
195
+ |mgsm_native_cot_en| 4|flexible-extract| 0|exact_match|↑ |0.872|± | N/A|
196
+ | | |strict-match | 0|exact_match|↑ |0.000|± | N/A|
197
+ |mgsm_native_cot_es| 4|flexible-extract| 0|exact_match|↑ |0.780|± | N/A|
198
+ | | |strict-match | 0|exact_match|↑ |0.008|± | N/A|
199
+ |mgsm_native_cot_fr| 4|flexible-extract| 0|exact_match|↑ |0.760|± | N/A|
200
+ | | |strict-match | 0|exact_match|↑ |0.004|± | N/A|
201
+ |mgsm_native_cot_ja| 4|flexible-extract| 0|exact_match|↑ |0.644|± | N/A|
202
+ | | |strict-match | 0|exact_match|↑ |0.008|± | N/A|
203
+ |mgsm_native_cot_ru| 4|flexible-extract| 0|exact_match|↑ |0.832|± | N/A|
204
+ | | |strict-match | 0|exact_match|↑ |0.004|± | N/A|
205
+ |mgsm_native_cot_sw| 4|flexible-extract| 0|exact_match|↑ |0.008|± | N/A|
206
+ | | |strict-match | 0|exact_match|↑ |0.000|± | N/A|
207
+ |mgsm_native_cot_te| 4|flexible-extract| 0|exact_match|↑ |0.120|± | N/A|
208
+ | | |strict-match | 0|exact_match|↑ |0.020|± | N/A|
209
+ |mgsm_native_cot_th| 4|flexible-extract| 0|exact_match|↑ |0.652|± | N/A|
210
+ | | |strict-match | 0|exact_match|↑ |0.260|± | N/A|
211
+ |mgsm_native_cot_zh| 4|flexible-extract| 0|exact_match|↑ |0.784|± | N/A|
212
+ | | |strict-match | 0|exact_match|↑ |0.000|± | N/A|
wandb/run-20250920_081121-2oxex54w/files/requirements.txt ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setproctitle==1.2.2
2
+ colorama==0.4.6
3
+ psutil==7.0.0
4
+ GitPython==3.1.43
5
+ docker-pycreds==0.4.0
6
+ gitdb==4.0.11
7
+ opencv-python==4.10.0.84
8
+ sentry-sdk==2.19.0
9
+ setproctitle==1.3.4
10
+ smmap==5.0.1
11
+ hjson==3.1.0
12
+ deepspeed==0.16.7
13
+ transformers==4.46.1
14
+ transformers==4.54.1
15
+ protobuf==6.31.1
16
+ accelerate==1.6.0
17
+ docopt==0.6.2
18
+ gguf==0.10.0
19
+ webencodings==0.5.1
20
+ pickleshare==0.7.5
21
+ fastjsonschema==2.21.1
22
+ backcall==0.2.0
23
+ tinycss2==1.4.0
24
+ soupsieve==2.7
25
+ pandocfilters==1.5.1
26
+ mistune==3.1.3
27
+ jupyterlab_pygments==0.3.0
28
+ defusedxml==0.7.1
29
+ bleach==6.2.0
30
+ yarg==0.1.9
31
+ ipython==8.12.3
32
+ beautifulsoup4==4.13.4
33
+ nbformat==5.10.4
34
+ nbclient==0.10.2
35
+ nbconvert==7.16.6
36
+ pipreqs==0.5.0
37
+ wandb==0.21.0
38
+ trl==0.17.0
39
+ lm_eval==0.4.4
40
+ langid==1.1.6
41
+ annotated-types==0.7.0
42
+ vllm==0.6.4.post1
43
+ typing-inspection==0.4.1
44
+ xformers==0.0.28.post3
45
+ pydantic_core==2.33.2
46
+ outlines==0.0.46
47
+ pydantic==2.11.7
48
+ compressed-tensors==0.8.0
49
+ click==8.2.1
50
+ lightning-utilities==0.15.0
51
+ torchmetrics==1.8.0
52
+ nvidia-ml-py==13.580.65
53
+ blessed==1.21.0
54
+ gpustat==1.1.1
55
+ nvidia-cufile-cu12==1.13.1.3
56
+ nvidia-cusparselt-cu12==0.6.2
57
+ mpmath==1.3.0
58
+ typing_extensions==4.12.2
59
+ sympy==1.13.1
60
+ pillow==11.0.0
61
+ nvidia-nvtx-cu12==12.4.127
62
+ nvidia-nvjitlink-cu12==12.4.127
63
+ nvidia-nccl-cu12==2.21.5
64
+ numpy==2.1.2
65
+ nvidia-curand-cu12==10.3.5.147
66
+ networkx==3.3
67
+ nvidia-cufft-cu12==11.2.1.3
68
+ nvidia-cuda-runtime-cu12==12.4.127
69
+ nvidia-cuda-nvrtc-cu12==12.4.127
70
+ nvidia-cuda-cupti-cu12==12.4.127
71
+ nvidia-cublas-cu12==12.4.5.8
72
+ MarkupSafe==2.1.5
73
+ setuptools==80.9.0
74
+ certifi==2025.8.3
75
+ einops==0.8.1
76
+ fsspec==2024.6.1
77
+ Jinja2==3.1.4
78
+ nvidia-cusolver-cu12==11.6.1.9
79
+ urllib3==2.5.0
80
+ tqdm==4.67.1
81
+ safetensors==0.6.2
82
+ regex==2025.7.34
83
+ PyYAML==6.0.2
84
+ packaging==25.0
85
+ idna==3.10
86
+ filelock==3.13.1
87
+ hf-xet==1.1.8
88
+ torch==2.6.0+cu124
89
+ charset-normalizer==3.4.3
90
+ requests==2.32.5
91
+ huggingface-hub==0.34.4
92
+ torchaudio==2.6.0+cu124
93
+ liger_kernel==0.6.2
94
+ nvidia-cusparse-cu12==12.3.1.170
95
+ nvidia-cudnn-cu12==9.1.0.70
96
+ torchvision==0.21.0+cu124
97
+ blingfire==0.1.8
98
+ triton==3.1.0
99
+ sentence-transformers==5.1.0
100
+ tabledata==1.3.4
101
+ lxml==5.3.1
102
+ accelerate==1.5.2
103
+ absl-py==2.1.0
104
+ Markdown==3.7
105
+ uvicorn==0.34.0
106
+ ruff==0.11.0
107
+ nvidia-cuda-runtime-cu12==12.4.127
108
+ airportsdata==20250224
109
+ nvidia-cusolver-cu12==11.6.1.9
110
+ astor==0.8.1
111
+ DataProperty==1.1.0
112
+ lm-format-enforcer==0.10.11
113
+ mdurl==0.1.2
114
+ nvidia-nccl-cu12==2.21.5
115
+ tabulate==0.9.0
116
+ python-dotenv==1.0.1
117
+ h5py==3.13.0
118
+ chardet==5.2.0
119
+ cupy-cuda12x==13.4.0
120
+ tiktoken==0.9.0
121
+ jiter==0.8.2
122
+ Pygments==2.19.1
123
+ typing_extensions==4.12.2
124
+ datasets==3.1.0
125
+ zipp==3.21.0
126
+ more-itertools==10.6.0
127
+ MarkupSafe==2.1.5
128
+ comm==0.2.2
129
+ pycountry==24.6.1
130
+ partial-json-parser==0.2.1.1.post5
131
+ gradio==4.44.0
132
+ prometheus_client==0.21.1
133
+ six==1.17.0
134
+ pytz==2025.1
135
+ unsloth_zoo==2025.3.12
136
+ starlette==0.46.0
137
+ llvmlite==0.44.0
138
+ peft==0.14.0
139
+ aiohttp==3.11.13
140
+ aiofiles==23.2.1
141
+ importlib_resources==6.5.2
142
+ nvidia-nvjitlink-cu12==12.4.127
143
+ semantic-version==2.10.0
144
+ decorator==5.2.1
145
+ nvidia-cublas-cu12==12.4.5.8
146
+ contourpy==1.3.1
147
+ torch==2.6.0
148
+ pytest==8.3.5
149
+ fastapi==0.115.11
150
+ seaborn==0.13.2
151
+ sympy==1.13.1
152
+ threadpoolctl==3.6.0
153
+ networkx==3.4.2
154
+ python-dateutil==2.9.0.post0
155
+ depyf==0.18.0
156
+ nvidia-ml-py==12.570.86
157
+ jedi==0.19.2
158
+ joblib==1.4.2
159
+ referencing==0.36.2
160
+ diskcache==5.6.3
161
+ httpcore==1.0.7
162
+ httpx==0.28.1
163
+ pyairports==2.1.1
164
+ protobuf==3.20.3
165
+ portalocker==3.1.1
166
+ nvidia-cudnn-cu12==9.1.0.70
167
+ Pebble==5.1.0
168
+ fsspec==2024.9.0
169
+ hf_transfer==0.1.9
170
+ ptyprocess==0.7.0
171
+ pexpect==4.9.0
172
+ nvidia-cuda-nvrtc-cu12==12.4.127
173
+ scipy==1.15.2
174
+ sentencepiece==0.2.0
175
+ cycler==0.12.1
176
+ packaging==24.2
177
+ openai==1.56.1
178
+ frozenlist==1.5.0
179
+ lark==1.2.2
180
+ filelock==3.17.0
181
+ opentelemetry-exporter-otlp==1.26.0
182
+ yarl==1.18.3
183
+ rouge_score==0.1.2
184
+ grpcio==1.70.0
185
+ googleapis-common-protos==1.70.0
186
+ aiohappyeyeballs==2.4.6
187
+ multiprocess==0.70.16
188
+ tornado==6.4.2
189
+ numpy==1.26.4
190
+ nltk==3.9.1
191
+ pip==25.0
192
+ charset-normalizer==3.3.2
193
+ prometheus-fastapi-instrumentator==7.0.2
194
+ numexpr==2.10.2
195
+ pyarrow==19.0.1
196
+ attrs==25.1.0
197
+ lm_eval==0.4.4
198
+ urllib3==2.3.0
199
+ mkl_random==1.2.8
200
+ httptools==0.6.4
201
+ gpustat==1.1.1
202
+ pluggy==1.5.0
203
+ huggingface-hub==0.30.2
204
+ triton==3.1.0
205
+ idna==3.7
206
+ ipython==8.20.0
207
+ pyparsing==3.2.1
208
+ rich-toolkit==0.13.2
209
+ googletrans==4.0.2
210
+ jupyter_core==5.7.2
211
+ zstandard==0.23.0
212
+ aiosignal==1.3.2
213
+ tyro==0.9.17
214
+ traitlets==5.14.3
215
+ h11==0.14.0
216
+ outlines==0.1.11
217
+ jupyter_client==8.6.3
218
+ loralib==0.1.2
219
+ kiwisolver==1.4.8
220
+ blake3==1.0.4
221
+ nvidia-cusparselt-cu12==0.6.2
222
+ rich==13.9.4
223
+ hf-xet==1.0.2
224
+ certifi==2025.1.31
225
+ wheel==0.45.1
226
+ pybind11==2.13.6
227
+ regex==2024.11.6
228
+ mpmath==1.3.0
229
+ transformers==4.51.3
230
+ flash_attn==2.7.4.post1
231
+ nvidia-curand-cu12==10.3.5.147
232
+ PySocks==1.7.1
233
+ gmpy2==2.2.1
234
+ iniconfig==2.0.0
235
+ pandas==2.2.3
236
+ Jinja2==3.1.5
237
+ msgpack==1.1.0
238
+ gguf==0.16.2
239
+ email_validator==2.2.0
240
+ tzdata==2025.1
241
+ cut-cross-entropy==25.1.1
242
+ tensorboard==2.19.0
243
+ matplotlib==3.10.1
244
+ jsonschema-specifications==2024.10.1
245
+ unsloth==2025.3.14
246
+ Werkzeug==3.1.3
247
+ opentelemetry-proto==1.26.0
248
+ fastrlock==0.8.3
249
+ dnspython==2.7.0
250
+ typeguard==4.4.2
251
+ opentelemetry-api==1.26.0
252
+ platformdirs==4.3.6
253
+ importlib_metadata==8.0.0
254
+ opentelemetry-semantic-conventions==0.47b0
255
+ sniffio==1.3.1
256
+ nvidia-cuda-cupti-cu12==12.4.127
257
+ scikit-learn==1.6.1
258
+ hpack==4.1.0
259
+ parso==0.8.4
260
+ torchaudio==2.6.0
261
+ xgrammar==0.1.18
262
+ executing==2.2.0
263
+ mkl_fft==1.3.11
264
+ vllm==0.8.4
265
+ word2number==1.1
266
+ pure_eval==0.2.3
267
+ watchfiles==1.0.4
268
+ pydub==0.25.1
269
+ mbstrdecoder==1.1.4
270
+ markdown-it-py==3.0.0
271
+ jsonschema==4.23.0
272
+ msgspec==0.19.0
273
+ rpds-py==0.23.1
274
+ wandb==0.19.9
275
+ matplotlib-inline==0.1.7
276
+ requests==2.32.3
277
+ interegular==0.3.3
278
+ pytablewriter==1.2.1
279
+ orjson==3.10.15
280
+ xformers==0.0.29.post2
281
+ fastapi-cli==0.0.7
282
+ mkl-service==2.4.0
283
+ opencv-python-headless==4.11.0.86
284
+ prompt_toolkit==3.0.50
285
+ trl==0.16.1
286
+ debugpy==1.8.13
287
+ pydantic==2.10.6
288
+ stack-data==0.6.3
289
+ tqdm-multiprocess==0.0.11
290
+ gradio_client==1.3.0
291
+ dill==0.3.8
292
+ evaluate==0.4.3
293
+ nvidia-cufft-cu12==11.2.1.3
294
+ nest-asyncio==1.6.0
295
+ pyzmq==26.2.1
296
+ tensorboard-data-server==0.7.2
297
+ docstring_parser==0.16
298
+ click==8.1.8
299
+ psutil==7.0.0
300
+ annotated-types==0.7.0
301
+ ninja==1.11.1.4
302
+ pillow==10.4.0
303
+ tcolorpy==0.1.7
304
+ einops==0.8.1
305
+ wcwidth==0.2.13
306
+ typer==0.15.2
307
+ tqdm==4.67.1
308
+ tomlkit==0.12.0
309
+ ipykernel==6.28.0
310
+ diffusers==0.32.2
311
+ mistral_common==1.5.4
312
+ setuptools==75.8.0
313
+ h2==4.2.0
314
+ cachetools==5.5.2
315
+ wrapt==1.17.2
316
+ pydantic_core==2.27.2
317
+ ffmpy==0.5.0
318
+ sacrebleu==2.5.1
319
+ outlines_core==0.1.26
320
+ jsonlines==4.0.0
321
+ fonttools==4.56.0
322
+ nvidia-nvtx-cu12==12.4.127
323
+ safetensors==0.5.3
324
+ opentelemetry-exporter-otlp-proto-grpc==1.26.0
325
+ bitsandbytes==0.45.3
326
+ nanobind==2.6.1
327
+ tokenizers==0.21.1
328
+ propcache==0.3.0
329
+ distro==1.9.0
330
+ python-multipart==0.0.20
331
+ uvloop==0.21.0
332
+ liger_kernel==0.5.5
333
+ python-json-logger==3.3.0
334
+ multidict==6.1.0
335
+ ray==2.43.0
336
+ opentelemetry-exporter-otlp-proto-http==1.26.0
337
+ typepy==1.3.4
338
+ torchvision==0.21.0
339
+ PyYAML==6.0.2
340
+ xxhash==3.5.0
341
+ anthropic==0.49.0
342
+ py-cpuinfo==9.0.0
343
+ compressed-tensors==0.9.3
344
+ opentelemetry-exporter-otlp-proto-common==1.26.0
345
+ opentelemetry-sdk==1.26.0
346
+ shtab==1.7.1
347
+ websockets==12.0
348
+ numba==0.61.2
349
+ llguidance==0.7.13
350
+ hyperframe==6.1.0
351
+ anyio==4.8.0
352
+ asttokens==3.0.0
353
+ blessed==1.20.0
354
+ colorama==0.4.6
355
+ shellingham==1.5.4
356
+ Brotli==1.0.9
357
+ sqlitedict==2.1.0
358
+ nvidia-cusparse-cu12==12.3.1.170
359
+ Deprecated==1.2.18
360
+ cloudpickle==3.1.1
361
+ pathvalidate==3.2.3
362
+ opentelemetry-semantic-conventions-ai==0.4.3
363
+ platformdirs==4.2.2
364
+ autocommand==2.2.2
365
+ backports.tarfile==1.2.0
366
+ importlib_metadata==8.0.0
367
+ inflect==7.3.1
368
+ jaraco.collections==5.1.0
369
+ jaraco.context==5.3.0
370
+ jaraco.functools==4.0.1
371
+ jaraco.text==3.12.1
372
+ more-itertools==10.3.0
373
+ packaging==24.2
374
+ tomli==2.0.1
375
+ typeguard==4.3.0
376
+ typing_extensions==4.12.2
377
+ wheel==0.45.1
378
+ zipp==3.19.2
wandb/run-20250920_081121-2oxex54w/files/wandb-metadata.json ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-4.18.0-372.9.1.el8.x86_64-x86_64-with-glibc2.28",
3
+ "python": "CPython 3.11.11",
4
+ "startedAt": "2025-09-20T12:11:21.301942Z",
5
+ "args": [
6
+ "--model",
7
+ "vllm",
8
+ "--model_args",
9
+ "pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000",
10
+ "--tasks",
11
+ "mgsm_native_cot_ja,mgsm_native_cot_en,mgsm_native_cot_th,mgsm_native_cot_es,mgsm_native_cot_bn,mgsm_native_cot_te,mgsm_native_cot_fr,mgsm_native_cot_zh,mgsm_native_cot_sw,mgsm_native_cot_de,mgsm_native_cot_ru",
12
+ "--batch_size",
13
+ "auto",
14
+ "--apply_chat_template",
15
+ "--output_path",
16
+ "ckpts/rerun",
17
+ "--log_samples",
18
+ "--gen_kwargs",
19
+ "max_gen_toks=20000",
20
+ "--wandb_args=project=MRPO_eval,name=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dir=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,resume=auto"
21
+ ],
22
+ "program": "/home/jdhwang/.conda/envs/llm/bin/lm_eval",
23
+ "git": {
24
+ "remote": "[email protected]:jd730/BRIDGE-private.git",
25
+ "commit": "bb8b2be1f7420f9c6a3d65f0eaf3072732d73123"
26
+ },
27
+ "email": "[email protected]",
28
+ "root": "ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309",
29
+ "host": "node1803",
30
+ "executable": "/orcd/home/001/jdhwang/.conda/envs/llm/bin/python",
31
+ "cpu_count": 64,
32
+ "cpu_count_logical": 128,
33
+ "gpu": "NVIDIA H100 80GB HBM3",
34
+ "gpu_count": 2,
35
+ "disk": {
36
+ "/": {
37
+ "total": "464506159104",
38
+ "used": "12268101632"
39
+ }
40
+ },
41
+ "memory": {
42
+ "total": "2163473002496"
43
+ },
44
+ "gpu_nvidia": [
45
+ {
46
+ "name": "NVIDIA H100 80GB HBM3",
47
+ "memoryTotal": "85520809984",
48
+ "cudaCores": 16896,
49
+ "architecture": "Hopper",
50
+ "uuid": "GPU-9a8a7398-f810-a936-a36d-5b9c0b64a09b"
51
+ },
52
+ {
53
+ "name": "NVIDIA H100 80GB HBM3",
54
+ "memoryTotal": "85520809984",
55
+ "cudaCores": 16896,
56
+ "architecture": "Hopper",
57
+ "uuid": "GPU-73b0efed-d92d-d0e8-3c5e-a3122d6e7c1b"
58
+ }
59
+ ],
60
+ "cudaVersion": "12.4",
61
+ "slurm": {
62
+ "array_job_id": "4452191",
63
+ "array_task_count": "1",
64
+ "array_task_id": "0",
65
+ "array_task_max": "0",
66
+ "array_task_min": "0",
67
+ "array_task_step": "1",
68
+ "cluster_name": "eofe7",
69
+ "conf": "/etc/slurm/slurm.conf",
70
+ "cpus_on_node": "16",
71
+ "cpus_per_task": "16",
72
+ "gpus_on_node": "2",
73
+ "gtids": "0",
74
+ "job_account": "mit_general",
75
+ "job_cpus_per_node": "16",
76
+ "job_end_time": "1758413466",
77
+ "job_gid": "209655",
78
+ "job_gpus": "1,2",
79
+ "job_id": "4452191",
80
+ "job_name": "eval.sh",
81
+ "job_nodelist": "node1803",
82
+ "job_num_nodes": "1",
83
+ "job_partition": "ou_bcs_normal",
84
+ "job_qos": "normal",
85
+ "job_start_time": "1758370266",
86
+ "job_uid": "209655",
87
+ "job_user": "jdhwang",
88
+ "jobid": "4452191",
89
+ "localid": "0",
90
+ "mem_per_node": "131072",
91
+ "nnodes": "1",
92
+ "nodeid": "0",
93
+ "nodelist": "node1803",
94
+ "nprocs": "1",
95
+ "ntasks": "1",
96
+ "ntasks_per_node": "1",
97
+ "oom_kill_step": "0",
98
+ "prio_process": "0",
99
+ "procid": "0",
100
+ "script_context": "prolog_task",
101
+ "submit_dir": "/orcd/home/002/jdhwang/BRIDGE",
102
+ "submit_host": "orcd-login003.mit.edu",
103
+ "task_pid": "1143610",
104
+ "tasks_per_node": "1",
105
+ "topology_addr": "node1803",
106
+ "topology_addr_pattern": "node",
107
+ "tres_per_task": "cpu=16"
108
+ },
109
+ "writerId": "bdsaggp24nt8kfc8qjgq21gi927g7e3o"
110
+ }
wandb/run-20250920_081121-2oxex54w/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"mgsm_native_cot_zh_eval_results":{"path":"media/table/mgsm_native_cot_zh_eval_results_12_2882185f7e816bbcc2fe.table.json","artifact_path":"wandb-client-artifact://nkp1984crhkeh7851auhm8cmiz0c966uge9yj76ipqritrecvc5bfb9c38ag3q5n9lxlxv0a592qujt5hv20d5ou72uzs0da2311ong59bzu3eu2j7sjwoi1ham5i3wi/mgsm_native_cot_zh_eval_results.table.json","_type":"table-file","nrows":500,"size":6816454,"sha256":"2882185f7e816bbcc2fe7db847cf400946e1eecb9c136b26ba49d658f74f6aa1","log_mode":"IMMUTABLE","ncols":8,"_latest_artifact_path":"wandb-client-artifact://kdq56ygszyonuw3iawyllv3myb0yvhb1lelksg4a7blhqf51vl8fzo7pfqegv64u4r6ndaz7pbste9hbc68sir2hvdemy03qgvhjn4ol3q87z73n0lb6yikdy3o1ujb8:latest/mgsm_native_cot_zh_eval_results.table.json"},"mgsm_native_cot_th_eval_results":{"_type":"table-file","sha256":"baa86a95f601e3c721a0fbcf0ee7623b3d2011e2c01da8b0c3cb2d98b16684ac","size":5007472,"path":"media/table/mgsm_native_cot_th_eval_results_11_baa86a95f601e3c721a0.table.json","ncols":8,"_latest_artifact_path":"wandb-client-artifact://9yj2698oonww08k7xlrlqosfnpi8bf8whg3mu81535kr3f8sv1lrk187fyckresyat8nemrzjmbs8y8f5xkb6594i7jjen2yaigaf6909w8ptgr8i6hb1efl3y4culsb:latest/mgsm_native_cot_th_eval_results.table.json","log_mode":"IMMUTABLE","nrows":500,"artifact_path":"wandb-client-artifact://gja5by4u29qqcgmr6sb6dwapnkradcp3asz7dfpbn8wh8omw28idhrqljkjewim9009pew6vmw82s1xubz6lr0x7z2ounjv209iivpf93x9ymx5x83ckxj48a44i7xn0/mgsm_native_cot_th_eval_results.table.json"},"mgsm_native_cot_en_eval_results":{"_latest_artifact_path":"wandb-client-artifact://g1upq5u3jz3pag4sh1m0vjv5rmwplu9a9a9abfqhq83nl6kkyphd4joh0hg9zbe5l3ra4qjxygdt7x7m15gnuguznxif28ptxvea75zhbjlmr8m3vfe265a8dttuhpb1:latest/mgsm_native_cot_en_eval_results.table.json","size":5089950,"artifact_path":"wandb-client-artifact://7qqodhesd67ke2alwfwf9m6q5zw8oj0stuwgoszb386l13ofees3uxxwcly1gij11waas038xcn059krmog8cskfh8ywl5r5qc1o06i1ytq6g709pdq5bik1vgggxm8c/mgsm_native_cot_en_eval_results.table.json","nrows":500,"log_mode":"IMMUTABLE","path":"media/table/mgsm_native_cot_en_eval_results_4_82791ccc59d2c61a093c.table.json","_type":"table-file","ncols":8,"sha256":"82791ccc59d2c61a093cbdd818ae3ff870ab3d55a931b224bfbca4751c1765bf"},"mgsm_native_cot_sw/alias":"mgsm_native_cot_sw","mgsm_native_cot_fr/exact_match,strict-match":0.004,"mgsm_native_cot_ja/alias":"mgsm_native_cot_ja","mgsm_native_cot_sw_eval_results":{"nrows":500,"ncols":8,"size":22151199,"sha256":"d79a57dcd801f08109f8b083cce4884bf51b225d8127bc78429cf19d3df17aad","_type":"table-file","_latest_artifact_path":"wandb-client-artifact://uwdhtiin3cgey08x46sll6rbkjr6kz632eelzimhsh1pxqn6vbrkwczotv3aj0qfan0xj5kffsltbfe2sbqpljvcyl9dqefjwmggqf9tx47zzzeqwdnfw4lkruafdn07:latest/mgsm_native_cot_sw_eval_results.table.json","path":"media/table/mgsm_native_cot_sw_eval_results_9_d79a57dcd801f08109f8.table.json","artifact_path":"wandb-client-artifact://bbvbahu84etz52vzztm3c6g9ifdlaksmuq7b0ck0nhzo9x19db225j1npqxx09xekaa48m3yu9rsyl247urothkf4emwf6087de2miq7cb7i5z5fba29f6b6vcio5iaw/mgsm_native_cot_sw_eval_results.table.json","log_mode":"IMMUTABLE"},"mgsm_native_cot_te/exact_match,flexible-extract":0.12,"mgsm_native_cot_ja_eval_results":{"_type":"table-file","artifact_path":"wandb-client-artifact://iwu92mws1e8f263xazaxilfbyhxdxe7hpkir0numy6cug9tf3r48relnykckfq0j3aii4hbodiln44j4iqo7fexjvnhuano4k9rh1t048hfrg7eh4lo7p6bc5x5ec8l9/mgsm_native_cot_ja_eval_results.table.json","ncols":8,"path":"media/table/mgsm_native_cot_ja_eval_results_7_098df869a17abf277e8c.table.json","log_mode":"IMMUTABLE","_latest_artifact_path":"wandb-client-artifact://f5tg80y2zzbku7qkoirlmlhz7d02n91o6hzsfj91cws7t60gt015qockdoscq97c4gcpn4sm3ggetskv44zqylmqai9i2j5f1no6s2eanygyakr7z04e9g4pqbwq82v2:latest/mgsm_native_cot_ja_eval_results.table.json","size":19900362,"nrows":500,"sha256":"098df869a17abf277e8cf3b5d4966de124ba0fed82b267159ef0c1d32c7f38a2"},"mgsm_native_cot_sw/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_fr/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_th/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_de/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_th/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_te/alias":"mgsm_native_cot_te","mgsm_native_cot_zh/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_ja/exact_match,strict-match":0.008,"mgsm_native_cot_zh/alias":"mgsm_native_cot_zh","mgsm_native_cot_es/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_ru_eval_results":{"_type":"table-file","sha256":"19e62e412810312c2375525d388b308579f502daf4537e08ea3e97b90b3ef016","_latest_artifact_path":"wandb-client-artifact://ssphf9mv1o6gb0j85z3gfrhwargykdb0luavoyk0pha4wg7ovjkne5oln8tfzygwi6tfmuvgu0fdjl2noewx4kvqtjovdj87tt3vf6tnucvvookj2m9skxydcb1q3ikr:latest/mgsm_native_cot_ru_eval_results.table.json","nrows":500,"log_mode":"IMMUTABLE","artifact_path":"wandb-client-artifact://nju8zlsx5likjjdiq9ix9kpfbd128ffhidfei5n1o9ns883akydn3oe0edbq588qwtck6y53yar6vqg4ghqytfvb2udoxiaj3jqdspbze43bveqr2untjk3r5h8kov6v/mgsm_native_cot_ru_eval_results.table.json","ncols":8,"path":"media/table/mgsm_native_cot_ru_eval_results_8_19e62e412810312c2375.table.json","size":13109180},"mgsm_native_cot_th/alias":"mgsm_native_cot_th","mgsm_native_cot_te/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_fr/alias":"mgsm_native_cot_fr","mgsm_native_cot_ru/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_ja/exact_match,flexible-extract":0.644,"_timestamp":1.7583755761531694e+09,"mgsm_native_cot_es/exact_match,flexible-extract":0.78,"mgsm_native_cot_bn_eval_results":{"size":16518181,"sha256":"f5e72dc1f8666e2a7b2dd420223eda667dbec11d2c10570f946fcc38e857476a","_latest_artifact_path":"wandb-client-artifact://yr3i8r2cgym32wmpmzdoi89agvfbqf8rkmcw5czgif8twgbpd2ifhdzum1b0720p6gh46uxwok7zj56ycgodgqetjrxiin5dio9y4ubjuzh8wprfviboyo88it4fowsi:latest/mgsm_native_cot_bn_eval_results.table.json","path":"media/table/mgsm_native_cot_bn_eval_results_2_f5e72dc1f8666e2a7b2d.table.json","nrows":500,"_type":"table-file","ncols":8,"artifact_path":"wandb-client-artifact://7pfp3stdhg0ptnttd5vi0di3j2rpm43qn1obywl8v754gx2yqso9s3qk71no2ddq1zu4czdztrn8shcqof2idvturotswrgh2ohkliv477lbym5tnu7mg2eksb5hgniw/mgsm_native_cot_bn_eval_results.table.json","log_mode":"IMMUTABLE"},"_step":12,"mgsm_native_cot_sw/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_en/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_sw/exact_match,strict-match":0,"mgsm_native_cot_bn/exact_match,flexible-extract":0.528,"mgsm_native_cot_zh/exact_match,flexible-extract":0.784,"mgsm_native_cot_es_eval_results":{"nrows":500,"log_mode":"IMMUTABLE","sha256":"db5ed66b097e218c4da6ade4820a3b57500460332d23695a9d082453f9eaef4e","path":"media/table/mgsm_native_cot_es_eval_results_5_db5ed66b097e218c4da6.table.json","size":8588068,"_type":"table-file","_latest_artifact_path":"wandb-client-artifact://ux5vlh8wacyfuof7pc4t55agsyg3nwzac4w6xfmjhi2j5z63gc767hzmqva63i1z2shq9549vn7ircit5e7xf1heikgjioppjtr6mic5petyfizupkh85owvqmlahyde:latest/mgsm_native_cot_es_eval_results.table.json","artifact_path":"wandb-client-artifact://3ez40i75t7j3zaj3xnzyhfqq39henad90lipliml5f3bcb1h4b9yreifkue9u5zfzv360fvrug7fiotercqa4ik31tivrzppgrqr86mm1isvaggoe1mlmchqhpmtl8k9/mgsm_native_cot_es_eval_results.table.json","ncols":8},"mgsm_native_cot_zh/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_te_eval_results":{"_type":"table-file","_latest_artifact_path":"wandb-client-artifact://3wvd0feekbllytg5c07f216mnn6ugilhacj5j353flytf0xzchfj577v6n6n3wms0wdkeamvey7fukilst8v5zvf7jzzof8snmqjy0ikvysqf8nvaelhir90xv3xfvxq:latest/mgsm_native_cot_te_eval_results.table.json","path":"media/table/mgsm_native_cot_te_eval_results_10_c0fc0c81b8662e04cc0e.table.json","artifact_path":"wandb-client-artifact://wu0kyoyi54m51v0k8rl2digz31m6mfp2r40fyzjzjr1ruo0fp8df41iqiin5ieeayd14bj5s3ete22cd5qg0jjmve9et5xjxw6z05pltmxvmlam7rmr9o6y8p7epty38/mgsm_native_cot_te_eval_results.table.json","log_mode":"IMMUTABLE","size":23468672,"ncols":8,"sha256":"c0fc0c81b8662e04cc0e8fc30ac6053adcf13bd384f959eee54fbeb5039dbc01","nrows":500},"mgsm_native_cot_th/exact_match,flexible-extract":0.652,"mgsm_native_cot_de/alias":"mgsm_native_cot_de","mgsm_native_cot_ru/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_ru/exact_match,strict-match":0.004,"mgsm_native_cot_en/exact_match,strict-match":0,"mgsm_native_cot_de_eval_results":{"_latest_artifact_path":"wandb-client-artifact://l92nyplmx9l83jo3sw80tinrvnfoko7v1kc02phiy9h46b3v5ls9e1gbvy8xjjmgs8eczamzmsr9v54g7s4m2b5olhevlttcz98hb61gq72gk4fj9bv71ge8da91qxkn:latest/mgsm_native_cot_de_eval_results.table.json","_type":"table-file","artifact_path":"wandb-client-artifact://sy5rdhvvobn3pshimnyuxyo8gj1riw214tz1yc5vnrt1lnown73yh4fjl37mecgdhlv3g91dmcbxb7sk4hicr06s8aowfno29bxcj0vxl4wbym0kdtrmpa3zwjkev27b/mgsm_native_cot_de_eval_results.table.json","sha256":"fabaf33255f24add59c137ef77a33afd9e00d9d17b451c92e556f9ab861bb2ad","log_mode":"IMMUTABLE","ncols":8,"size":3328757,"path":"media/table/mgsm_native_cot_de_eval_results_3_fabaf33255f24add59c1.table.json","nrows":500},"mgsm_native_cot_te/exact_match,strict-match":0.02,"mgsm_native_cot_en/exact_match,flexible-extract":0.872,"mgsm_native_cot_ru/exact_match,flexible-extract":0.832,"mgsm_native_cot_te/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_ja/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_fr_eval_results":{"size":3273008,"nrows":500,"log_mode":"IMMUTABLE","_latest_artifact_path":"wandb-client-artifact://klcjxv56dck9r5v7yixxzhs1vkalztbzpxi2g32kee1ieaqwpp5xzehy3b0it5z25qexpkvrivqefau0xsyre0j1hrw6lcuqz15w18v3t2ogsykczvkqev3ogwsrwa6k:latest/mgsm_native_cot_fr_eval_results.table.json","artifact_path":"wandb-client-artifact://digdeal9nyp9dh9ge1fe04twhqx7zqfin73cuhsx7f5x1c3sg1hvv27v5bv7kap7hntudd9ptp6i9d1laldsfty5xyf64eed1mj07e1cq9ekpkbt59fjzsgj5qdn5kqf/mgsm_native_cot_fr_eval_results.table.json","ncols":8,"sha256":"214cb1a5a2cdee9f330e1982a6ee87d274f796f5fae1bfd02596f69cf3108b1c","_type":"table-file","path":"media/table/mgsm_native_cot_fr_eval_results_6_214cb1a5a2cdee9f330e.table.json"},"mgsm_native_cot_ja/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_bn/exact_match,strict-match":0,"mgsm_native_cot_th/exact_match,strict-match":0.26,"mgsm_native_cot_es/alias":"mgsm_native_cot_es","mgsm_native_cot_de/exact_match,flexible-extract":0.74,"mgsm_native_cot_en/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_fr/exact_match,flexible-extract":0.76,"mgsm_native_cot_es/exact_match,strict-match":0.008,"mgsm_native_cot_fr/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_de/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_ru/alias":"mgsm_native_cot_ru","_wandb":{"runtime":5296},"_runtime":5296,"mgsm_native_cot_sw/exact_match,flexible-extract":0.008,"evaluation/eval_results":{"sha256":"817b26b9b7489391f4e7629070c960eaee51c5e2fd819c827f94ee7a6945c1cf","_type":"table-file","_latest_artifact_path":"wandb-client-artifact://5qlehutp8s8du57s7goeq5l74fpjy27nr9584dwoqfdcv3y64mc6zenbt2nh17yqiysjti7pjfebr8yi9e9ueixtoy0so8e4qul4atn4ypuw7rhmbefsvqoqtk4aezed:latest/evaluation/eval_results.table.json","size":1863,"artifact_path":"wandb-client-artifact://uo3la8vmc83mzok4j4viu8j3yxbp5ygjzktfg1w124h49nvkq4fj5puf329sx4e79ecqrd2pqncuh050bnum14gysm4ets9wq7om8dk0c57etth40kwl2wquof82a1o5/evaluation/eval_results.table.json","log_mode":"IMMUTABLE","path":"media/table/evaluation/eval_results_1_817b26b9b7489391f4e7.table.json","nrows":22,"ncols":7},"mgsm_native_cot_bn/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_bn/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_de/exact_match,strict-match":0,"mgsm_native_cot_zh/exact_match,strict-match":0,"mgsm_native_cot_bn/alias":"mgsm_native_cot_bn","mgsm_native_cot_es/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_en/alias":"mgsm_native_cot_en"}
wandb/run-20250920_081121-2oxex54w/logs/debug-core.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-20T08:11:21.452750753-04:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp3lkn2ocs/port-1143625.txt","pid":1143625,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-09-20T08:11:21.453095559-04:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":1143625}
3
+ {"time":"2025-09-20T08:11:21.453077825-04:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1143625-1143815-3992152877/socket","Net":"unix"}}
4
+ {"time":"2025-09-20T08:11:21.634813033-04:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-09-20T08:11:21.640098784-04:00","level":"INFO","msg":"handleInformInit: received","streamId":"2oxex54w","id":"1(@)"}
6
+ {"time":"2025-09-20T08:11:21.866468885-04:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"2oxex54w","id":"1(@)"}
7
+ {"time":"2025-09-20T09:39:39.549058154-04:00","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"2oxex54w","id":"1(@)"}
8
+ {"time":"2025-09-20T09:39:39.567640542-04:00","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"2oxex54w","id":"1(@)"}
9
+ {"time":"2025-09-20T09:39:40.549091059-04:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
10
+ {"time":"2025-09-20T09:39:40.549121192-04:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
11
+ {"time":"2025-09-20T09:39:40.549126164-04:00","level":"INFO","msg":"server is shutting down"}
12
+ {"time":"2025-09-20T09:39:40.549132677-04:00","level":"INFO","msg":"connection: closing","id":"1(@)"}
13
+ {"time":"2025-09-20T09:39:40.549192283-04:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-1143625-1143815-3992152877/socket","Net":"unix"}}
14
+ {"time":"2025-09-20T09:39:40.54921864-04:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
15
+ {"time":"2025-09-20T09:39:40.549221165-04:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
16
+ {"time":"2025-09-20T09:39:40.549224135-04:00","level":"INFO","msg":"server is closed"}
wandb/run-20250920_081121-2oxex54w/logs/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-20T08:11:21.640622676-04:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2025-09-20T08:11:21.866445386-04:00","level":"INFO","msg":"stream: created new stream","id":"2oxex54w"}
3
+ {"time":"2025-09-20T08:11:21.866465564-04:00","level":"INFO","msg":"stream: started","id":"2oxex54w"}
4
+ {"time":"2025-09-20T08:11:21.866482696-04:00","level":"INFO","msg":"writer: Do: started","stream_id":"2oxex54w"}
5
+ {"time":"2025-09-20T08:11:21.866483998-04:00","level":"INFO","msg":"handler: started","stream_id":"2oxex54w"}
6
+ {"time":"2025-09-20T08:11:21.866482679-04:00","level":"INFO","msg":"sender: started","stream_id":"2oxex54w"}
7
+ {"time":"2025-09-20T08:11:22.258337567-04:00","level":"WARN","msg":"handleCodeSave: program relative path is empty"}
8
+ {"time":"2025-09-20T08:11:22.258646248-04:00","level":"ERROR","msg":"git repo not found","error":"repository does not exist"}
9
+ {"time":"2025-09-20T09:39:39.399281107-04:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
10
+ {"time":"2025-09-20T09:39:39.487606624-04:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading summary, console lines 184-211","runtime_seconds":0.088083458}],"total_operations":1}}
11
+ {"time":"2025-09-20T09:39:39.549286458-04:00","level":"INFO","msg":"stream: closing","id":"2oxex54w"}
12
+ {"time":"2025-09-20T09:39:39.549294944-04:00","level":"INFO","msg":"handler: closed","stream_id":"2oxex54w"}
13
+ {"time":"2025-09-20T09:39:39.549300294-04:00","level":"INFO","msg":"writer: Close: closed","stream_id":"2oxex54w"}
14
+ {"time":"2025-09-20T09:39:39.549303318-04:00","level":"INFO","msg":"sender: closed","stream_id":"2oxex54w"}
15
+ {"time":"2025-09-20T09:39:39.549556658-04:00","level":"INFO","msg":"stream: closed","id":"2oxex54w"}
wandb/run-20250920_081121-2oxex54w/logs/debug.log ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-09-20 08:11:21,421 INFO MainThread:1143625 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2025-09-20 08:11:21,422 INFO MainThread:1143625 [wandb_setup.py:_flush():80] Configure stats pid to 1143625
3
+ 2025-09-20 08:11:21,422 INFO MainThread:1143625 [wandb_setup.py:_flush():80] Loading settings from /home/jdhwang/.config/wandb/settings
4
+ 2025-09-20 08:11:21,422 INFO MainThread:1143625 [wandb_setup.py:_flush():80] Loading settings from /orcd/home/002/jdhwang/BRIDGE/wandb/settings
5
+ 2025-09-20 08:11:21,422 INFO MainThread:1143625 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-09-20 08:11:21,422 INFO MainThread:1143625 [wandb_init.py:setup_run_log_directory():703] Logging user logs to ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/run-20250920_081121-2oxex54w/logs/debug.log
7
+ 2025-09-20 08:11:21,422 INFO MainThread:1143625 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/run-20250920_081121-2oxex54w/logs/debug-internal.log
8
+ 2025-09-20 08:11:21,422 INFO MainThread:1143625 [wandb_init.py:init():830] calling init triggers
9
+ 2025-09-20 08:11:21,422 INFO MainThread:1143625 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-09-20 08:11:21,422 INFO MainThread:1143625 [wandb_init.py:init():871] starting backend
12
+ 2025-09-20 08:11:21,634 INFO MainThread:1143625 [wandb_init.py:init():874] sending inform_init request
13
+ 2025-09-20 08:11:21,638 INFO MainThread:1143625 [wandb_init.py:init():882] backend started and connected
14
+ 2025-09-20 08:11:21,639 INFO MainThread:1143625 [wandb_init.py:init():953] updated telemetry
15
+ 2025-09-20 08:11:21,655 INFO MainThread:1143625 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
16
+ 2025-09-20 08:11:22,257 INFO MainThread:1143625 [wandb_init.py:init():1029] starting run threads in backend
17
+ 2025-09-20 08:11:22,623 INFO MainThread:1143625 [wandb_run.py:_console_start():2458] atexit reg
18
+ 2025-09-20 08:11:22,623 INFO MainThread:1143625 [wandb_run.py:_redirect():2306] redirect: wrap_raw
19
+ 2025-09-20 08:11:22,623 INFO MainThread:1143625 [wandb_run.py:_redirect():2375] Wrapping output streams.
20
+ 2025-09-20 08:11:22,623 INFO MainThread:1143625 [wandb_run.py:_redirect():2398] Redirects installed.
21
+ 2025-09-20 08:11:22,625 INFO MainThread:1143625 [wandb_init.py:init():1075] run started, returning control to user process
22
+ 2025-09-20 09:39:29,566 INFO MainThread:1143625 [wandb_run.py:_config_callback():1363] config_cb None None {'task_configs': {'mgsm_native_cot_bn': {'task': 'mgsm_native_cot_bn', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'bn', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nধাপে ধাপে উত্তর:"}}{% else %}{{"প্রশ্ন: "+question+"\\nধাপে ধাপে উত্তর:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[17:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['প্রশ্ন:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'The answer is (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_de': {'task': 'mgsm_native_cot_de', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'de', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nSchritt-für-Schritt-Antwort:"}}{% else %}{{"Frage: "+question+"\\nSchritt-für-Schritt-Antwort:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[29:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['Frage:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'Die Antwort lautet (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_en': {'task': 'mgsm_native_cot_en', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'en', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\\nStep-by-Step Answer:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['Question:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'The answer is (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_es': {'task': 'mgsm_native_cot_es', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'es', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nRespuesta paso a paso:"}}{% else %}{{"Pregunta: "+question+"\\nRespuesta paso a paso:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[23:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['Pregunta:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'La respuesta es (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_fr': {'task': 'mgsm_native_cot_fr', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'fr', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nRéponse étape par étape :"}}{% else %}{{"Question : "+question+"\\nRéponse étape par étape :"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[26:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['Question :', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'La réponse est (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_ja': {'task': 'mgsm_native_cot_ja', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'ja', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nステップごとの答え:"}}{% else %}{{"問題: "+question+"\\nステップごとの答え:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[11:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['問題:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': '答えは(\\-?[0-9\\.\\,]+)です。'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_ru': {'task': 'mgsm_native_cot_ru', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'ru', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nПошаговоерешение:"}}{% else %}{{"Задача: "+question+"\\nПошаговоерешение:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[18:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['Задача:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'Ответ — (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_sw': {'task': 'mgsm_native_cot_sw', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'sw', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nJibu la Hatua kwa Hatua:"}}{% else %}{{"Swali: "+question+"\\nJibu la Hatua kwa Hatua:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[25:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['Swali:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'Jibu ni (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_te': {'task': 'mgsm_native_cot_te', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'te', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nదశలవారీగా సమాధానం:"}}{% else %}{{"ప్రశ్న: "+question+"\\nదశలవారీగా సమాధానం:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[19:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['ప్రశ్న:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'సమాధానం (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_th': {'task': 'mgsm_native_cot_th', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'th', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nคำตอบทีละขั้นตอน:"}}{% else %}{{"โจทย์: "+question+"\\nคำตอบทีละขั้นตอน:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[18:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['โจทย์:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'คำตอบคือ (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_zh': {'task': 'mgsm_native_cot_zh', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'zh', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\n逐步解答:"}}{% else %}{{"问题: "+question+"\\n逐步解答:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[6:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['问题:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': '答案是 (\\-?[0-9\\.\\,]+)。'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}}, 'cli_configs': {'model': 'vllm', 'model_args': 'pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000', 'batch_size': 'auto', 'batch_sizes': [], 'device': None, 'use_cache': None, 'limit': None, 'bootstrap_iters': 0, 'gen_kwargs': {'max_gen_toks': 20000}, 'random_seed': 0, 'numpy_seed': 1234, 'torch_seed': 1234, 'fewshot_seed': 1234}}
23
+ 2025-09-20 09:39:38,409 INFO MainThread:1143625 [wandb_run.py:_finish():2224] finishing run jdhwang/MRPO_eval/2oxex54w
24
+ 2025-09-20 09:39:38,409 INFO MainThread:1143625 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
25
+ 2025-09-20 09:39:38,486 INFO MainThread:1143625 [wandb_run.py:_restore():2405] restore
26
+ 2025-09-20 09:39:38,486 INFO MainThread:1143625 [wandb_run.py:_restore():2411] restore done
27
+ 2025-09-20 09:39:39,547 INFO MainThread:1143625 [wandb_run.py:_footer_history_summary_info():3903] rendering history
28
+ 2025-09-20 09:39:39,548 INFO MainThread:1143625 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
29
+ 2025-09-20 09:39:39,548 INFO MainThread:1143625 [wandb_run.py:_footer_sync_info():3864] logging synced files
wandb/run-20250920_081121-2oxex54w/run-2oxex54w.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07a3dd0ca1cf928a27f52210df217c77c140eaef367b9cde8b60d69e33e2857d
3
+ size 929156
wandb/run-20250921_062002-cothceaw/files/config.yaml ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.21.0
4
+ e:
5
+ b4tnkkvikp4ud9d1f7t2v1f37ei0r2ow:
6
+ args:
7
+ - --model
8
+ - vllm
9
+ - --model_args
10
+ - pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000
11
+ - --tasks
12
+ - mmlu_prox_ar_math,mmlu_prox_bn_math
13
+ - --batch_size
14
+ - auto
15
+ - --apply_chat_template
16
+ - --output_path
17
+ - ckpts/rerun
18
+ - --log_samples
19
+ - --gen_kwargs
20
+ - max_gen_toks=20000
21
+ - --wandb_args=project=MRPO_eval,name=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dir=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,resume=auto
22
+ cpu_count: 64
23
+ cpu_count_logical: 128
24
+ cudaVersion: "12.4"
25
+ disk:
26
+ /:
27
+ total: "464506159104"
28
+ used: "12265783296"
29
30
+ executable: /orcd/home/001/jdhwang/.conda/envs/llm/bin/python
31
+ git:
32
+ commit: 57228cdf2aa2656e94d94dc3f5530986c0f48545
33
+ remote: [email protected]:jd730/BRIDGE-private.git
34
+ gpu: NVIDIA H100 80GB HBM3
35
+ gpu_count: 2
36
+ gpu_nvidia:
37
+ - architecture: Hopper
38
+ cudaCores: 16896
39
+ memoryTotal: "85520809984"
40
+ name: NVIDIA H100 80GB HBM3
41
+ uuid: GPU-9a8a7398-f810-a936-a36d-5b9c0b64a09b
42
+ - architecture: Hopper
43
+ cudaCores: 16896
44
+ memoryTotal: "85520809984"
45
+ name: NVIDIA H100 80GB HBM3
46
+ uuid: GPU-73b0efed-d92d-d0e8-3c5e-a3122d6e7c1b
47
+ host: node1803
48
+ memory:
49
+ total: "2163473002496"
50
+ os: Linux-4.18.0-372.9.1.el8.x86_64-x86_64-with-glibc2.28
51
+ program: /home/jdhwang/.conda/envs/llm/bin/lm_eval
52
+ python: CPython 3.11.11
53
+ root: ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309
54
+ slurm:
55
+ array_job_id: "4507342"
56
+ array_task_count: "8"
57
+ array_task_id: "0"
58
+ array_task_max: "7"
59
+ array_task_min: "0"
60
+ array_task_step: "1"
61
+ cluster_name: eofe7
62
+ conf: /etc/slurm/slurm.conf
63
+ cpus_on_node: "16"
64
+ cpus_per_task: "16"
65
+ gpus_on_node: "2"
66
+ gtids: "0"
67
+ job_account: mit_general
68
+ job_cpus_per_node: "16"
69
+ job_end_time: "1758493186"
70
+ job_gid: "209655"
71
+ job_gpus: 1,2
72
+ job_id: "4535465"
73
+ job_name: mmlu_prox.sh
74
+ job_nodelist: node1803
75
+ job_num_nodes: "1"
76
+ job_partition: ou_bcs_low
77
+ job_qos: normal
78
+ job_start_time: "1758449986"
79
+ job_uid: "209655"
80
+ job_user: jdhwang
81
+ jobid: "4535465"
82
+ localid: "0"
83
+ mem_per_node: "131072"
84
+ nnodes: "1"
85
+ nodeid: "0"
86
+ nodelist: node1803
87
+ nprocs: "1"
88
+ ntasks: "1"
89
+ ntasks_per_node: "1"
90
+ oom_kill_step: "0"
91
+ prio_process: "0"
92
+ procid: "0"
93
+ script_context: prolog_task
94
+ submit_dir: /orcd/home/002/jdhwang/BRIDGE
95
+ submit_host: orcd-login003.mit.edu
96
+ task_pid: "1320304"
97
+ tasks_per_node: "1"
98
+ topology_addr: node1803
99
+ topology_addr_pattern: node
100
+ tres_per_task: cpu=16
101
+ startedAt: "2025-09-21T10:20:02.294501Z"
102
+ writerId: b4tnkkvikp4ud9d1f7t2v1f37ei0r2ow
103
+ m: []
104
+ python_version: 3.11.11
105
+ t:
106
+ "1":
107
+ - 1
108
+ - 5
109
+ - 11
110
+ - 30
111
+ - 41
112
+ - 49
113
+ - 51
114
+ - 53
115
+ - 71
116
+ - 95
117
+ - 98
118
+ - 100
119
+ - 105
120
+ "2":
121
+ - 1
122
+ - 5
123
+ - 11
124
+ - 30
125
+ - 41
126
+ - 49
127
+ - 51
128
+ - 53
129
+ - 71
130
+ - 95
131
+ - 98
132
+ - 100
133
+ - 105
134
+ "3":
135
+ - 2
136
+ - 13
137
+ - 62
138
+ "4": 3.11.11
139
+ "5": 0.21.0
140
+ "6": 4.51.3
141
+ "12": 0.21.0
142
+ "13": linux-x86_64
143
+ cli_configs:
144
+ value:
145
+ batch_size: auto
146
+ batch_sizes: []
147
+ bootstrap_iters: 0
148
+ device: null
149
+ fewshot_seed: 1234
150
+ gen_kwargs:
151
+ max_gen_toks: 20000
152
+ limit: null
153
+ model: vllm
154
+ model_args: pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000
155
+ numpy_seed: 1234
156
+ random_seed: 0
157
+ torch_seed: 1234
158
+ use_cache: null
159
+ task_configs:
160
+ value:
161
+ mmlu_prox_ar_math:
162
+ dataset_name: ar
163
+ dataset_path: li-lab/MMLU-ProX
164
+ description: |
165
+ فيما يلي أسئلة اختيار من متعدد (مع إجابات) حول الرياضيات. فكر خطوة بخطوة ثم أنهِ إجابتك بـ 'الإجابة هي (X)' حيث X هو حرف الخيار الصحيح.
166
+ doc_to_target: answer
167
+ doc_to_text: functools.partial(<function format_cot_example at 0x14d51fcc0b80>, including_answer=False)
168
+ fewshot_config:
169
+ doc_to_target: ""
170
+ doc_to_text: functools.partial(<function format_cot_example at 0x14d51fcc0e00>, including_answer=True)
171
+ sampler: first_n
172
+ fewshot_delimiter: |4+
173
+
174
+ fewshot_split: validation
175
+ filter_list:
176
+ - filter:
177
+ - function: regex
178
+ regex_pattern: الإجابة هي \(?([ABCDEFGHIJ])\)?
179
+ - function: take_first
180
+ name: custom-extract
181
+ generation_kwargs:
182
+ do_sample: false
183
+ max_gen_toks: 20000
184
+ temperature: 0
185
+ until:
186
+ - </s>
187
+ - 'Q:'
188
+ - 'سؤال:'
189
+ - <|im_end|>
190
+ metadata:
191
+ version: 0
192
+ metric_list:
193
+ - aggregation: mean
194
+ higher_is_better: true
195
+ ignore_case: true
196
+ ignore_punctuation: true
197
+ metric: exact_match
198
+ num_fewshot: 5
199
+ output_type: generate_until
200
+ process_docs: functools.partial(<function process_docs at 0x14d51fcc0860>, subject='math')
201
+ repeats: 1
202
+ should_decontaminate: false
203
+ target_delimiter: ' '
204
+ task: mmlu_prox_ar_math
205
+ task_alias: math
206
+ test_split: test
207
+ mmlu_prox_bn_math:
208
+ dataset_name: bn
209
+ dataset_path: li-lab/MMLU-ProX
210
+ description: |
211
+ নিম্নলিখিত গণিত সম্পর্কে বহুনির্বাচনী প্রশ্ন (উত্তরসহ)। ধাপে ধাপে চিন্তা করুন এবং তারপর আপনার উত্তর "উত্তর হল (X)" দিয়ে শেষ করুন যেখানে X হল সঠিক বিকল্পের অক্ষর।
212
+ doc_to_target: answer
213
+ doc_to_text: functools.partial(<function format_cot_example at 0x14d51fb7a200>, including_answer=False)
214
+ fewshot_config:
215
+ doc_to_target: ""
216
+ doc_to_text: functools.partial(<function format_cot_example at 0x14d51fb7b740>, including_answer=True)
217
+ sampler: first_n
218
+ fewshot_delimiter: |4+
219
+
220
+ fewshot_split: validation
221
+ filter_list:
222
+ - filter:
223
+ - function: regex
224
+ regex_pattern: উত্তর হল \(?([ABCDEFGHIJ])\)?
225
+ - function: take_first
226
+ name: custom-extract
227
+ generation_kwargs:
228
+ do_sample: false
229
+ max_gen_toks: 20000
230
+ temperature: 0
231
+ until:
232
+ - </s>
233
+ - 'Q:'
234
+ - 'প্রশ্ন:'
235
+ - <|im_end|>
236
+ metadata:
237
+ version: 0
238
+ metric_list:
239
+ - aggregation: mean
240
+ higher_is_better: true
241
+ ignore_case: true
242
+ ignore_punctuation: true
243
+ metric: exact_match
244
+ num_fewshot: 5
245
+ output_type: generate_until
246
+ process_docs: functools.partial(<function process_docs at 0x14d51fb79e40>, subject='math')
247
+ repeats: 1
248
+ should_decontaminate: false
249
+ target_delimiter: ' '
250
+ task: mmlu_prox_bn_math
251
+ task_alias: math
252
+ test_split: test
wandb/run-20250921_062002-cothceaw/files/media/table/evaluation/eval_results_1_2a7ec9e10306569eae6e.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["Tasks", "Version", "Filter", "num_fewshot", "Metric", "Value", "Stderr"], "data": [["mmlu_prox_ar_math", 0.0, "custom-extract", 5, "exact_match", "0.27905255366395265", "N/A"], ["mmlu_prox_bn_math", 0.0, "custom-extract", 5, "exact_match", "0.14433752775721687", "N/A"]]}
wandb/run-20250921_062002-cothceaw/files/media/table/mmlu_prox_ar_math_eval_results_2_1d425cdaeb765848e725.table.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d425cdaeb765848e725c89a34944d791bdbf0248c6361032236b97f9611117e
3
+ size 89484352
wandb/run-20250921_062002-cothceaw/files/media/table/mmlu_prox_bn_math_eval_results_3_8f734c70b94d2677510f.table.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f734c70b94d2677510f9200822c1810a65f0546ff35668ea1df59d81ec4ba33
3
+ size 55489730
wandb/run-20250921_062002-cothceaw/files/output.log ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-09-21:06:20:03,677 INFO [__main__.py:291] Verbosity set to INFO
2
+ 2025-09-21:06:20:37,183 INFO [__init__.py:464] The tag pawsx is already registered as a group, this tag will not be registered. This may affect tasks you want to call.
3
+ 2025-09-21:06:20:38,310 INFO [__main__.py:388] Selected Tasks: ['mmlu_prox_ar_math', 'mmlu_prox_bn_math']
4
+ 2025-09-21:06:20:38,320 INFO [evaluator.py:161] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234
5
+ 2025-09-21:06:20:38,320 WARNING [evaluator.py:172] generation_kwargs specified through cli, these settings will update set parameters in yaml tasks. Ensure 'do_sample=True' for non-greedy decoding!
6
+ 2025-09-21:06:20:38,321 INFO [evaluator.py:198] Initializing vllm model, with arguments: {'pretrained': 'ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309', 'dtype': 'auto', 'tensor_parallel_size': 2, 'max_model_len': 20000}
7
+ INFO 09-21 06:20:45 config.py:350] This model supports multiple tasks: {'generate', 'embedding'}. Defaulting to 'generate'.
8
+ INFO 09-21 06:20:45 config.py:1020] Defaulting to use mp for distributed inference
9
+ INFO 09-21 06:20:45 llm_engine.py:249] Initializing an LLM engine (v0.6.4.post1) with config: model='ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309', speculative_config=None, tokenizer='ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=20000, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=2, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=1234, served_model_name=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309, num_scheduler_steps=1, chunked_prefill_enabled=False multi_step_stream_outputs=True, enable_prefix_caching=False, use_async_output_proc=True, use_cached_outputs=False, chat_template_text_format=string, mm_processor_kwargs=None, pooler_config=None)
10
+ WARNING 09-21 06:20:45 multiproc_gpu_executor.py:56] Reducing Torch parallelism from 16 threads to 1 to avoid unnecessary CPU contention. Set OMP_NUM_THREADS in the external environment to tune this value as needed.
11
+ INFO 09-21 06:20:45 custom_cache_manager.py:17] Setting Triton cache manager to: vllm.triton_utils.custom_cache_manager:CustomCacheManager
12
+ INFO 09-21 06:20:45 selector.py:135] Using Flash Attention backend.
13
+ INFO 09-21 06:20:54 utils.py:961] Found nccl from library libnccl.so.2
14
+ INFO 09-21 06:20:54 pynccl.py:69] vLLM is using nccl==2.21.5
15
+ INFO 09-21 06:20:55 custom_all_reduce_utils.py:242] reading GPU P2P access cache from /home/jdhwang/.cache/vllm/gpu_p2p_access_cache_for_0,1.json
16
+ INFO 09-21 06:20:55 shm_broadcast.py:236] vLLM message queue communication handle: Handle(connect_ip='127.0.0.1', local_reader_ranks=[1], buffer=<vllm.distributed.device_communicators.shm_broadcast.ShmRingBuffer object at 0x14d81f47a010>, local_subscribe_port=46667, remote_subscribe_port=None)
17
+ INFO 09-21 06:20:55 model_runner.py:1072] Starting to load model ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309...
18
+ Loading safetensors checkpoint shards: 0% Completed | 0/4 [00:00<?, ?it/s]
19
+ Loading safetensors checkpoint shards: 25% Completed | 1/4 [00:09<00:28, 9.34s/it]
20
+ Loading safetensors checkpoint shards: 50% Completed | 2/4 [00:09<00:07, 3.97s/it]
21
+ Loading safetensors checkpoint shards: 75% Completed | 3/4 [00:10<00:02, 2.41s/it]
22
+ Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00, 1.72s/it]
23
+ Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00, 2.69s/it]
24
+ INFO 09-21 06:21:06 model_runner.py:1077] Loading model weights took 7.1216 GB
25
+ INFO 09-21 06:21:08 worker.py:232] Memory profiling results: total_gpu_memory=79.10GiB initial_memory_usage=8.22GiB peak_torch_memory=8.75GiB memory_usage_post_profile=8.60GiB non_torch_memory=1.44GiB kv_cache_size=60.99GiB gpu_memory_utilization=0.90
26
+ INFO 09-21 06:21:09 distributed_gpu_executor.py:57] # GPU blocks: 142750, # CPU blocks: 9362
27
+ INFO 09-21 06:21:09 distributed_gpu_executor.py:61] Maximum concurrency for 20000 tokens per request: 114.20x
28
+ INFO 09-21 06:21:11 model_runner.py:1400] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
29
+ INFO 09-21 06:21:11 model_runner.py:1404] If out-of-memory error occurs during cudagraph capture, consider decreasing `gpu_memory_utilization` or switching to eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.
30
+ INFO 09-21 06:21:26 custom_all_reduce.py:224] Registering 1995 cuda graph addresses
31
+ INFO 09-21 06:21:27 model_runner.py:1518] Graph capturing finished in 16 secs, took 0.25 GiB
32
+
33
+ Generating validation split: 100%|██████████| 70/70 [00:00<00:00, 2041.89 examples/s]
34
+ Generating test split: 100%|██████████| 11759/11759 [00:00<00:00, 196190.89 examples/s]
35
+ Filter: 100%|██████████| 70/70 [00:00<00:00, 6775.31 examples/s]
36
+ Filter: 100%|██████████| 11759/11759 [00:00<00:00, 78762.47 examples/s]
37
+ Generating validation split: 100%|██████████| 70/70 [00:00<00:00, 20925.19 examples/s]
38
+ Generating test split: 100%|██████████| 11759/11759 [00:00<00:00, 144140.86 examples/s]
39
+ Filter: 100%|██████████| 70/70 [00:00<00:00, 17085.74 examples/s]
40
+ Filter: 100%|██████████| 11759/11759 [00:00<00:00, 72996.18 examples/s]
41
+ 2025-09-21:06:21:32,266 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
42
+ 2025-09-21:06:21:32,266 INFO [evaluator.py:279] Setting fewshot random generator seed to 1234
43
+ 2025-09-21:06:21:32,267 INFO [task.py:430] Building contexts for mmlu_prox_bn_math on rank 0...
44
+ 100%|██████████| 1351/1351 [00:01<00:00, 1268.23it/s]
45
+ 2025-09-21:06:21:33,474 INFO [task.py:430] Building contexts for mmlu_prox_ar_math on rank 0...
46
+ 100%|██████████| 1351/1351 [00:01<00:00, 1264.36it/s]
47
+ 2025-09-21:06:21:34,673 INFO [evaluator.py:495] Running generate_until requests
48
+ Processed prompts: 100%|██████████| 1351/1351 [34:54<00:00, 1.55s/it, est. speed input: 2878.49 toks/s, output: 3301.25 toks/s]
49
+ final processing: 1351it [00:00, 213146.69it/s] | 1/2702 [34:55<1571:56:02, 2095.14s/it] 2874.66 toks/s, output: 3286.44 toks/s]
50
+ WARNING 09-21 06:26:49 scheduler.py:1481] Sequence group 627 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=1
51
+ WARNING 09-21 06:28:23 scheduler.py:1481] Sequence group 523 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=51
52
+ WARNING 09-21 06:31:28 scheduler.py:1481] Sequence group 393 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=101
53
+ WARNING 09-21 06:41:11 scheduler.py:1481] Sequence group 908 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=151
54
+ Processed prompts: 100%|██████████| 1351/1351 [43:26<00:00, 1.93s/it, est. speed input: 1088.08 toks/s, output: 2998.64 toks/s]
55
+ final processing: 1351it [00:00, 191818.31it/s] | 1352/2702 [1:18:25<1:07:51, 3.02s/it] 1088.08 toks/s, output: 2998.64 toks/s]
56
+ WARNING 09-21 07:04:02 scheduler.py:1481] Sequence group 2127 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=201
57
+ WARNING 09-21 07:06:42 scheduler.py:1481] Sequence group 1934 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=251
58
+ WARNING 09-21 07:12:57 scheduler.py:1481] Sequence group 1761 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=301
59
+ Running generate_until requests: 100%|██████████| 2702/2702 [1:18:25<00:00, 1.74s/it]
60
+ INFO 09-21 07:40:13 multiproc_worker_utils.py:133] Terminating local vLLM worker processes
61
+ wandb: WARNING Serializing object of type str that is 104466 bytes
62
+ wandb: WARNING Serializing object of type str that is 102542 bytes
63
+ wandb: WARNING Serializing object of type str that is 107944 bytes
64
+ wandb: WARNING Serializing object of type str that is 103572 bytes
65
+ wandb: WARNING Serializing object of type str that is 105466 bytes
66
+ wandb: WARNING Serializing object of type str that is 100810 bytes
67
+ wandb: WARNING Serializing object of type str that is 100048 bytes
68
+ wandb: WARNING Serializing object of type str that is 106760 bytes
69
+ wandb: WARNING Serializing object of type str that is 100584 bytes
70
+ wandb: WARNING Serializing object of type str that is 103056 bytes
71
+ wandb: WARNING Serializing object of type str that is 102142 bytes
72
+ wandb: WARNING Serializing object of type str that is 101654 bytes
73
+ wandb: WARNING Serializing object of type str that is 105388 bytes
74
+ 2025-09-21:07:40:20,195 INFO [evaluation_tracker.py:207] Saving results aggregated
75
+ 2025-09-21:07:40:20,372 INFO [evaluation_tracker.py:291] Saving per-sample results for: mmlu_prox_ar_math
76
+ 2025-09-21:07:40:20,897 INFO [evaluation_tracker.py:291] Saving per-sample results for: mmlu_prox_bn_math
77
+ vllm (pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000), gen_kwargs: (max_gen_toks=20000), limit: None, num_fewshot: None, batch_size: auto
78
+ |Tasks|Version| Filter |n-shot| Metric | |Value | |Stderr|
79
+ |-----|------:|--------------|-----:|-----------|---|-----:|---|------|
80
+ |math | 0|custom-extract| 5|exact_match|↑ |0.2791|± | N/A|
81
+ |math | 0|custom-extract| 5|exact_match|↑ |0.1443|± | N/A|
wandb/run-20250921_062002-cothceaw/files/requirements.txt ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setproctitle==1.2.2
2
+ colorama==0.4.6
3
+ psutil==7.0.0
4
+ GitPython==3.1.43
5
+ docker-pycreds==0.4.0
6
+ gitdb==4.0.11
7
+ opencv-python==4.10.0.84
8
+ sentry-sdk==2.19.0
9
+ setproctitle==1.3.4
10
+ smmap==5.0.1
11
+ hjson==3.1.0
12
+ deepspeed==0.16.7
13
+ transformers==4.46.1
14
+ transformers==4.54.1
15
+ protobuf==6.31.1
16
+ accelerate==1.6.0
17
+ docopt==0.6.2
18
+ gguf==0.10.0
19
+ webencodings==0.5.1
20
+ pickleshare==0.7.5
21
+ fastjsonschema==2.21.1
22
+ backcall==0.2.0
23
+ tinycss2==1.4.0
24
+ soupsieve==2.7
25
+ pandocfilters==1.5.1
26
+ mistune==3.1.3
27
+ jupyterlab_pygments==0.3.0
28
+ defusedxml==0.7.1
29
+ bleach==6.2.0
30
+ yarg==0.1.9
31
+ ipython==8.12.3
32
+ beautifulsoup4==4.13.4
33
+ nbformat==5.10.4
34
+ nbclient==0.10.2
35
+ nbconvert==7.16.6
36
+ pipreqs==0.5.0
37
+ wandb==0.21.0
38
+ trl==0.17.0
39
+ lm_eval==0.4.4
40
+ langid==1.1.6
41
+ annotated-types==0.7.0
42
+ vllm==0.6.4.post1
43
+ typing-inspection==0.4.1
44
+ xformers==0.0.28.post3
45
+ pydantic_core==2.33.2
46
+ outlines==0.0.46
47
+ pydantic==2.11.7
48
+ compressed-tensors==0.8.0
49
+ click==8.2.1
50
+ lightning-utilities==0.15.0
51
+ torchmetrics==1.8.0
52
+ nvidia-ml-py==13.580.65
53
+ blessed==1.21.0
54
+ gpustat==1.1.1
55
+ nvidia-cufile-cu12==1.13.1.3
56
+ nvidia-cusparselt-cu12==0.6.2
57
+ mpmath==1.3.0
58
+ typing_extensions==4.12.2
59
+ sympy==1.13.1
60
+ pillow==11.0.0
61
+ nvidia-nvtx-cu12==12.4.127
62
+ nvidia-nvjitlink-cu12==12.4.127
63
+ nvidia-nccl-cu12==2.21.5
64
+ numpy==2.1.2
65
+ nvidia-curand-cu12==10.3.5.147
66
+ networkx==3.3
67
+ nvidia-cufft-cu12==11.2.1.3
68
+ nvidia-cuda-runtime-cu12==12.4.127
69
+ nvidia-cuda-nvrtc-cu12==12.4.127
70
+ nvidia-cuda-cupti-cu12==12.4.127
71
+ nvidia-cublas-cu12==12.4.5.8
72
+ MarkupSafe==2.1.5
73
+ setuptools==80.9.0
74
+ certifi==2025.8.3
75
+ einops==0.8.1
76
+ fsspec==2024.6.1
77
+ Jinja2==3.1.4
78
+ nvidia-cusolver-cu12==11.6.1.9
79
+ urllib3==2.5.0
80
+ tqdm==4.67.1
81
+ safetensors==0.6.2
82
+ regex==2025.7.34
83
+ PyYAML==6.0.2
84
+ packaging==25.0
85
+ idna==3.10
86
+ filelock==3.13.1
87
+ hf-xet==1.1.8
88
+ torch==2.6.0+cu124
89
+ charset-normalizer==3.4.3
90
+ requests==2.32.5
91
+ huggingface-hub==0.34.4
92
+ torchaudio==2.6.0+cu124
93
+ liger_kernel==0.6.2
94
+ nvidia-cusparse-cu12==12.3.1.170
95
+ nvidia-cudnn-cu12==9.1.0.70
96
+ torchvision==0.21.0+cu124
97
+ blingfire==0.1.8
98
+ triton==3.1.0
99
+ sentence-transformers==5.1.0
100
+ tabledata==1.3.4
101
+ lxml==5.3.1
102
+ accelerate==1.5.2
103
+ absl-py==2.1.0
104
+ Markdown==3.7
105
+ uvicorn==0.34.0
106
+ ruff==0.11.0
107
+ nvidia-cuda-runtime-cu12==12.4.127
108
+ airportsdata==20250224
109
+ nvidia-cusolver-cu12==11.6.1.9
110
+ astor==0.8.1
111
+ DataProperty==1.1.0
112
+ lm-format-enforcer==0.10.11
113
+ mdurl==0.1.2
114
+ nvidia-nccl-cu12==2.21.5
115
+ tabulate==0.9.0
116
+ python-dotenv==1.0.1
117
+ h5py==3.13.0
118
+ chardet==5.2.0
119
+ cupy-cuda12x==13.4.0
120
+ tiktoken==0.9.0
121
+ jiter==0.8.2
122
+ Pygments==2.19.1
123
+ typing_extensions==4.12.2
124
+ datasets==3.1.0
125
+ zipp==3.21.0
126
+ more-itertools==10.6.0
127
+ MarkupSafe==2.1.5
128
+ comm==0.2.2
129
+ pycountry==24.6.1
130
+ partial-json-parser==0.2.1.1.post5
131
+ gradio==4.44.0
132
+ prometheus_client==0.21.1
133
+ six==1.17.0
134
+ pytz==2025.1
135
+ unsloth_zoo==2025.3.12
136
+ starlette==0.46.0
137
+ llvmlite==0.44.0
138
+ peft==0.14.0
139
+ aiohttp==3.11.13
140
+ aiofiles==23.2.1
141
+ importlib_resources==6.5.2
142
+ nvidia-nvjitlink-cu12==12.4.127
143
+ semantic-version==2.10.0
144
+ decorator==5.2.1
145
+ nvidia-cublas-cu12==12.4.5.8
146
+ contourpy==1.3.1
147
+ torch==2.6.0
148
+ pytest==8.3.5
149
+ fastapi==0.115.11
150
+ seaborn==0.13.2
151
+ sympy==1.13.1
152
+ threadpoolctl==3.6.0
153
+ networkx==3.4.2
154
+ python-dateutil==2.9.0.post0
155
+ depyf==0.18.0
156
+ nvidia-ml-py==12.570.86
157
+ jedi==0.19.2
158
+ joblib==1.4.2
159
+ referencing==0.36.2
160
+ diskcache==5.6.3
161
+ httpcore==1.0.7
162
+ httpx==0.28.1
163
+ pyairports==2.1.1
164
+ protobuf==3.20.3
165
+ portalocker==3.1.1
166
+ nvidia-cudnn-cu12==9.1.0.70
167
+ Pebble==5.1.0
168
+ fsspec==2024.9.0
169
+ hf_transfer==0.1.9
170
+ ptyprocess==0.7.0
171
+ pexpect==4.9.0
172
+ nvidia-cuda-nvrtc-cu12==12.4.127
173
+ scipy==1.15.2
174
+ sentencepiece==0.2.0
175
+ cycler==0.12.1
176
+ packaging==24.2
177
+ openai==1.56.1
178
+ frozenlist==1.5.0
179
+ lark==1.2.2
180
+ filelock==3.17.0
181
+ opentelemetry-exporter-otlp==1.26.0
182
+ yarl==1.18.3
183
+ rouge_score==0.1.2
184
+ grpcio==1.70.0
185
+ googleapis-common-protos==1.70.0
186
+ aiohappyeyeballs==2.4.6
187
+ multiprocess==0.70.16
188
+ tornado==6.4.2
189
+ numpy==1.26.4
190
+ nltk==3.9.1
191
+ pip==25.0
192
+ charset-normalizer==3.3.2
193
+ prometheus-fastapi-instrumentator==7.0.2
194
+ numexpr==2.10.2
195
+ pyarrow==19.0.1
196
+ attrs==25.1.0
197
+ lm_eval==0.4.4
198
+ urllib3==2.3.0
199
+ mkl_random==1.2.8
200
+ httptools==0.6.4
201
+ gpustat==1.1.1
202
+ pluggy==1.5.0
203
+ huggingface-hub==0.30.2
204
+ triton==3.1.0
205
+ idna==3.7
206
+ ipython==8.20.0
207
+ pyparsing==3.2.1
208
+ rich-toolkit==0.13.2
209
+ googletrans==4.0.2
210
+ jupyter_core==5.7.2
211
+ zstandard==0.23.0
212
+ aiosignal==1.3.2
213
+ tyro==0.9.17
214
+ traitlets==5.14.3
215
+ h11==0.14.0
216
+ outlines==0.1.11
217
+ jupyter_client==8.6.3
218
+ loralib==0.1.2
219
+ kiwisolver==1.4.8
220
+ blake3==1.0.4
221
+ nvidia-cusparselt-cu12==0.6.2
222
+ rich==13.9.4
223
+ hf-xet==1.0.2
224
+ certifi==2025.1.31
225
+ wheel==0.45.1
226
+ pybind11==2.13.6
227
+ regex==2024.11.6
228
+ mpmath==1.3.0
229
+ transformers==4.51.3
230
+ flash_attn==2.7.4.post1
231
+ nvidia-curand-cu12==10.3.5.147
232
+ PySocks==1.7.1
233
+ gmpy2==2.2.1
234
+ iniconfig==2.0.0
235
+ pandas==2.2.3
236
+ Jinja2==3.1.5
237
+ msgpack==1.1.0
238
+ gguf==0.16.2
239
+ email_validator==2.2.0
240
+ tzdata==2025.1
241
+ cut-cross-entropy==25.1.1
242
+ tensorboard==2.19.0
243
+ matplotlib==3.10.1
244
+ jsonschema-specifications==2024.10.1
245
+ unsloth==2025.3.14
246
+ Werkzeug==3.1.3
247
+ opentelemetry-proto==1.26.0
248
+ fastrlock==0.8.3
249
+ dnspython==2.7.0
250
+ typeguard==4.4.2
251
+ opentelemetry-api==1.26.0
252
+ platformdirs==4.3.6
253
+ importlib_metadata==8.0.0
254
+ opentelemetry-semantic-conventions==0.47b0
255
+ sniffio==1.3.1
256
+ nvidia-cuda-cupti-cu12==12.4.127
257
+ scikit-learn==1.6.1
258
+ hpack==4.1.0
259
+ parso==0.8.4
260
+ torchaudio==2.6.0
261
+ xgrammar==0.1.18
262
+ executing==2.2.0
263
+ mkl_fft==1.3.11
264
+ vllm==0.8.4
265
+ word2number==1.1
266
+ pure_eval==0.2.3
267
+ watchfiles==1.0.4
268
+ pydub==0.25.1
269
+ mbstrdecoder==1.1.4
270
+ markdown-it-py==3.0.0
271
+ jsonschema==4.23.0
272
+ msgspec==0.19.0
273
+ rpds-py==0.23.1
274
+ wandb==0.19.9
275
+ matplotlib-inline==0.1.7
276
+ requests==2.32.3
277
+ interegular==0.3.3
278
+ pytablewriter==1.2.1
279
+ orjson==3.10.15
280
+ xformers==0.0.29.post2
281
+ fastapi-cli==0.0.7
282
+ mkl-service==2.4.0
283
+ opencv-python-headless==4.11.0.86
284
+ prompt_toolkit==3.0.50
285
+ trl==0.16.1
286
+ debugpy==1.8.13
287
+ pydantic==2.10.6
288
+ stack-data==0.6.3
289
+ tqdm-multiprocess==0.0.11
290
+ gradio_client==1.3.0
291
+ dill==0.3.8
292
+ evaluate==0.4.3
293
+ nvidia-cufft-cu12==11.2.1.3
294
+ nest-asyncio==1.6.0
295
+ pyzmq==26.2.1
296
+ tensorboard-data-server==0.7.2
297
+ docstring_parser==0.16
298
+ click==8.1.8
299
+ psutil==7.0.0
300
+ annotated-types==0.7.0
301
+ ninja==1.11.1.4
302
+ pillow==10.4.0
303
+ tcolorpy==0.1.7
304
+ einops==0.8.1
305
+ wcwidth==0.2.13
306
+ typer==0.15.2
307
+ tqdm==4.67.1
308
+ tomlkit==0.12.0
309
+ ipykernel==6.28.0
310
+ diffusers==0.32.2
311
+ mistral_common==1.5.4
312
+ setuptools==75.8.0
313
+ h2==4.2.0
314
+ cachetools==5.5.2
315
+ wrapt==1.17.2
316
+ pydantic_core==2.27.2
317
+ ffmpy==0.5.0
318
+ sacrebleu==2.5.1
319
+ outlines_core==0.1.26
320
+ jsonlines==4.0.0
321
+ fonttools==4.56.0
322
+ nvidia-nvtx-cu12==12.4.127
323
+ safetensors==0.5.3
324
+ opentelemetry-exporter-otlp-proto-grpc==1.26.0
325
+ bitsandbytes==0.45.3
326
+ nanobind==2.6.1
327
+ tokenizers==0.21.1
328
+ propcache==0.3.0
329
+ distro==1.9.0
330
+ python-multipart==0.0.20
331
+ uvloop==0.21.0
332
+ liger_kernel==0.5.5
333
+ python-json-logger==3.3.0
334
+ multidict==6.1.0
335
+ ray==2.43.0
336
+ opentelemetry-exporter-otlp-proto-http==1.26.0
337
+ typepy==1.3.4
338
+ torchvision==0.21.0
339
+ PyYAML==6.0.2
340
+ xxhash==3.5.0
341
+ anthropic==0.49.0
342
+ py-cpuinfo==9.0.0
343
+ compressed-tensors==0.9.3
344
+ opentelemetry-exporter-otlp-proto-common==1.26.0
345
+ opentelemetry-sdk==1.26.0
346
+ shtab==1.7.1
347
+ websockets==12.0
348
+ numba==0.61.2
349
+ llguidance==0.7.13
350
+ hyperframe==6.1.0
351
+ anyio==4.8.0
352
+ asttokens==3.0.0
353
+ blessed==1.20.0
354
+ colorama==0.4.6
355
+ shellingham==1.5.4
356
+ Brotli==1.0.9
357
+ sqlitedict==2.1.0
358
+ nvidia-cusparse-cu12==12.3.1.170
359
+ Deprecated==1.2.18
360
+ cloudpickle==3.1.1
361
+ pathvalidate==3.2.3
362
+ opentelemetry-semantic-conventions-ai==0.4.3
363
+ platformdirs==4.2.2
364
+ autocommand==2.2.2
365
+ backports.tarfile==1.2.0
366
+ importlib_metadata==8.0.0
367
+ inflect==7.3.1
368
+ jaraco.collections==5.1.0
369
+ jaraco.context==5.3.0
370
+ jaraco.functools==4.0.1
371
+ jaraco.text==3.12.1
372
+ more-itertools==10.3.0
373
+ packaging==24.2
374
+ tomli==2.0.1
375
+ typeguard==4.3.0
376
+ typing_extensions==4.12.2
377
+ wheel==0.45.1
378
+ zipp==3.19.2
wandb/run-20250921_062002-cothceaw/files/wandb-metadata.json ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-4.18.0-372.9.1.el8.x86_64-x86_64-with-glibc2.28",
3
+ "python": "CPython 3.11.11",
4
+ "startedAt": "2025-09-21T10:20:02.294501Z",
5
+ "args": [
6
+ "--model",
7
+ "vllm",
8
+ "--model_args",
9
+ "pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000",
10
+ "--tasks",
11
+ "mmlu_prox_ar_math,mmlu_prox_bn_math",
12
+ "--batch_size",
13
+ "auto",
14
+ "--apply_chat_template",
15
+ "--output_path",
16
+ "ckpts/rerun",
17
+ "--log_samples",
18
+ "--gen_kwargs",
19
+ "max_gen_toks=20000",
20
+ "--wandb_args=project=MRPO_eval,name=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dir=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,resume=auto"
21
+ ],
22
+ "program": "/home/jdhwang/.conda/envs/llm/bin/lm_eval",
23
+ "git": {
24
+ "remote": "[email protected]:jd730/BRIDGE-private.git",
25
+ "commit": "57228cdf2aa2656e94d94dc3f5530986c0f48545"
26
+ },
27
+ "email": "[email protected]",
28
+ "root": "ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309",
29
+ "host": "node1803",
30
+ "executable": "/orcd/home/001/jdhwang/.conda/envs/llm/bin/python",
31
+ "cpu_count": 64,
32
+ "cpu_count_logical": 128,
33
+ "gpu": "NVIDIA H100 80GB HBM3",
34
+ "gpu_count": 2,
35
+ "disk": {
36
+ "/": {
37
+ "total": "464506159104",
38
+ "used": "12265783296"
39
+ }
40
+ },
41
+ "memory": {
42
+ "total": "2163473002496"
43
+ },
44
+ "gpu_nvidia": [
45
+ {
46
+ "name": "NVIDIA H100 80GB HBM3",
47
+ "memoryTotal": "85520809984",
48
+ "cudaCores": 16896,
49
+ "architecture": "Hopper",
50
+ "uuid": "GPU-9a8a7398-f810-a936-a36d-5b9c0b64a09b"
51
+ },
52
+ {
53
+ "name": "NVIDIA H100 80GB HBM3",
54
+ "memoryTotal": "85520809984",
55
+ "cudaCores": 16896,
56
+ "architecture": "Hopper",
57
+ "uuid": "GPU-73b0efed-d92d-d0e8-3c5e-a3122d6e7c1b"
58
+ }
59
+ ],
60
+ "cudaVersion": "12.4",
61
+ "slurm": {
62
+ "array_job_id": "4507342",
63
+ "array_task_count": "8",
64
+ "array_task_id": "0",
65
+ "array_task_max": "7",
66
+ "array_task_min": "0",
67
+ "array_task_step": "1",
68
+ "cluster_name": "eofe7",
69
+ "conf": "/etc/slurm/slurm.conf",
70
+ "cpus_on_node": "16",
71
+ "cpus_per_task": "16",
72
+ "gpus_on_node": "2",
73
+ "gtids": "0",
74
+ "job_account": "mit_general",
75
+ "job_cpus_per_node": "16",
76
+ "job_end_time": "1758493186",
77
+ "job_gid": "209655",
78
+ "job_gpus": "1,2",
79
+ "job_id": "4535465",
80
+ "job_name": "mmlu_prox.sh",
81
+ "job_nodelist": "node1803",
82
+ "job_num_nodes": "1",
83
+ "job_partition": "ou_bcs_low",
84
+ "job_qos": "normal",
85
+ "job_start_time": "1758449986",
86
+ "job_uid": "209655",
87
+ "job_user": "jdhwang",
88
+ "jobid": "4535465",
89
+ "localid": "0",
90
+ "mem_per_node": "131072",
91
+ "nnodes": "1",
92
+ "nodeid": "0",
93
+ "nodelist": "node1803",
94
+ "nprocs": "1",
95
+ "ntasks": "1",
96
+ "ntasks_per_node": "1",
97
+ "oom_kill_step": "0",
98
+ "prio_process": "0",
99
+ "procid": "0",
100
+ "script_context": "prolog_task",
101
+ "submit_dir": "/orcd/home/002/jdhwang/BRIDGE",
102
+ "submit_host": "orcd-login003.mit.edu",
103
+ "task_pid": "1320304",
104
+ "tasks_per_node": "1",
105
+ "topology_addr": "node1803",
106
+ "topology_addr_pattern": "node",
107
+ "tres_per_task": "cpu=16"
108
+ },
109
+ "writerId": "b4tnkkvikp4ud9d1f7t2v1f37ei0r2ow"
110
+ }
wandb/run-20250921_062002-cothceaw/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_runtime":4818,"_step":3,"mmlu_prox_ar_math_eval_results":{"nrows":1351,"ncols":8,"size":89484352,"sha256":"1d425cdaeb765848e725c89a34944d791bdbf0248c6361032236b97f9611117e","_latest_artifact_path":"wandb-client-artifact://r4rn6kktqo8l0um7ttx1ue77yefblg4bjuul5xh36eo036nw65had7u778c1o0h584duz5spxu93ogp1sydi6ps1f7e3imex3lze4vb4gslw6odv77pvqpc282057p94:latest/mmlu_prox_ar_math_eval_results.table.json","_type":"table-file","log_mode":"IMMUTABLE","artifact_path":"wandb-client-artifact://4o3rezrpees9hoiy123dfot64jhr1avjzbsglu96w3nb6bvqo7r9nea1zpq6zrui21syjx86ytqafiqadkxalsovbu9hbu5xuig9vmqholaskqwf4fwujyo7wtf12a0n/mmlu_prox_ar_math_eval_results.table.json","path":"media/table/mmlu_prox_ar_math_eval_results_2_1d425cdaeb765848e725.table.json"},"mmlu_prox_bn_math_eval_results":{"path":"media/table/mmlu_prox_bn_math_eval_results_3_8f734c70b94d2677510f.table.json","log_mode":"IMMUTABLE","_type":"table-file","ncols":8,"_latest_artifact_path":"wandb-client-artifact://yrkq6zvhmu4okkq3sfdgrab1lnmdmfhv1de8t3mmak4lzthxfrmi8anklhmkr55kv80kuxxms6g4nss6yhg5y1of7bferet1zu65qvb21hgfr1y9ahpspphz37lf2nmq:latest/mmlu_prox_bn_math_eval_results.table.json","size":55489730,"sha256":"8f734c70b94d2677510f9200822c1810a65f0546ff35668ea1df59d81ec4ba33","nrows":1351,"artifact_path":"wandb-client-artifact://p7hlv4z2fp2097ris2q56ne0zq48hm4dhjzvq5oly1ch8bfxb5vrvg12wvtfxoe6yg7o671ug2r9ukhw2uqyzkdu3s1nsoq7fajb1mv7bgfdc1vimxlhjan1auugg7fx/mmlu_prox_bn_math_eval_results.table.json"},"mmlu_prox_ar_math/exact_match_stderr,custom-extract":"N/A","mmlu_prox_bn_math/alias":"math","mmlu_prox_ar_math/exact_match,custom-extract":0.27905255366395265,"_timestamp":1.7584548197219381e+09,"mmlu_prox_bn_math/exact_match,custom-extract":0.14433752775721687,"evaluation/eval_results":{"_latest_artifact_path":"wandb-client-artifact://fs7vvo359n7zetw0n93n6no3jnok4v4xr41uurzhi3oe51ox6y2umo5v79g77afo64weve9g6v7lxxe7k19dji47nxjd5fz1r4mva1ftvcpnlezc70b60s027i8r67ze:latest/evaluation/eval_results.table.json","ncols":7,"artifact_path":"wandb-client-artifact://nkoxm3fcoh17kqp4tvv4avy26i5fq3eaju412t711emnjajv88orrsf3ri9qsynz54066d4ze25cbi9v5x7avwdh7scv36ttkgoid25eqaq6y0z4c6ltc088wifvn50e/evaluation/eval_results.table.json","log_mode":"IMMUTABLE","sha256":"2a7ec9e10306569eae6efb589dd0cd352624b8846fd793982cce71b425f2b2c3","path":"media/table/evaluation/eval_results_1_2a7ec9e10306569eae6e.table.json","size":285,"nrows":2,"_type":"table-file"},"mmlu_prox_ar_math/alias":"math","mmlu_prox_bn_math/exact_match_stderr,custom-extract":"N/A","_wandb":{"runtime":4818}}
wandb/run-20250921_062002-cothceaw/logs/debug-core.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-21T06:20:02.49713505-04:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpoo93japy/port-1320321.txt","pid":1320321,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-09-21T06:20:02.497479399-04:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":1320321}
3
+ {"time":"2025-09-21T06:20:02.497469852-04:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1320321-1320505-1610173251/socket","Net":"unix"}}
4
+ {"time":"2025-09-21T06:20:02.683932338-04:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-09-21T06:20:02.688959867-04:00","level":"INFO","msg":"handleInformInit: received","streamId":"cothceaw","id":"1(@)"}
6
+ {"time":"2025-09-21T06:20:02.919780696-04:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"cothceaw","id":"1(@)"}
7
+ {"time":"2025-09-21T07:40:27.042657191-04:00","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"cothceaw","id":"1(@)"}
8
+ {"time":"2025-09-21T07:40:27.058852194-04:00","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"cothceaw","id":"1(@)"}
9
+ {"time":"2025-09-21T07:40:28.042788818-04:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
10
+ {"time":"2025-09-21T07:40:28.042813831-04:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
11
+ {"time":"2025-09-21T07:40:28.042819045-04:00","level":"INFO","msg":"server is shutting down"}
12
+ {"time":"2025-09-21T07:40:28.04282347-04:00","level":"INFO","msg":"connection: closing","id":"1(@)"}
13
+ {"time":"2025-09-21T07:40:28.042871233-04:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
14
+ {"time":"2025-09-21T07:40:28.042885088-04:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
15
+ {"time":"2025-09-21T07:40:28.042898748-04:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-1320321-1320505-1610173251/socket","Net":"unix"}}
16
+ {"time":"2025-09-21T07:40:28.042918708-04:00","level":"INFO","msg":"server is closed"}
wandb/run-20250921_062002-cothceaw/logs/debug-internal.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-21T06:20:02.689611613-04:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2025-09-21T06:20:02.91974477-04:00","level":"INFO","msg":"stream: created new stream","id":"cothceaw"}
3
+ {"time":"2025-09-21T06:20:02.919776235-04:00","level":"INFO","msg":"stream: started","id":"cothceaw"}
4
+ {"time":"2025-09-21T06:20:02.919783396-04:00","level":"INFO","msg":"handler: started","stream_id":"cothceaw"}
5
+ {"time":"2025-09-21T06:20:02.919795631-04:00","level":"INFO","msg":"writer: Do: started","stream_id":"cothceaw"}
6
+ {"time":"2025-09-21T06:20:02.919791628-04:00","level":"INFO","msg":"sender: started","stream_id":"cothceaw"}
7
+ {"time":"2025-09-21T06:20:03.320481778-04:00","level":"WARN","msg":"handleCodeSave: program relative path is empty"}
8
+ {"time":"2025-09-21T06:20:03.32057814-04:00","level":"ERROR","msg":"git repo not found","error":"repository does not exist"}
9
+ {"time":"2025-09-21T07:40:22.552065372-04:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading artifact run-cothceaw-mmlu_prox_ar_math_eval_results","runtime_seconds":4.866579888,"subtasks":[{"desc":"mmlu_prox_ar_math_eval_results.table.json","runtime_seconds":4.47880473,"progress":"36.1MB/85.3MB"}]},{"desc":"uploading media/table/mmlu_prox_ar_math_eval_results_2_1d425cdaeb765848e725.table.json","runtime_seconds":4.303737975,"progress":"67.8MB/85.3MB"},{"desc":"uploading artifact mmlu_prox_ar_math","runtime_seconds":3.824002162,"subtasks":[{"desc":"mmlu_prox_ar_math_eval_samples.json","runtime_seconds":3.315562372,"progress":"21.4MB/37.3MB"}]},{"desc":"uploading artifact run-cothceaw-mmlu_prox_bn_math_eval_results","runtime_seconds":3.051542787,"subtasks":[{"desc":"mmlu_prox_bn_math_eval_results.table.json","runtime_seconds":2.70140314,"progress":"16.1MB/52.9MB"}]},{"desc":"uploading media/table/mmlu_prox_bn_math_eval_results_3_8f734c70b94d2677510f.table.json","runtime_seconds":2.7112040950000003,"progress":"31.2MB/52.9MB"},{"desc":"uploading artifact mmlu_prox_bn_math","runtime_seconds":2.359250196,"subtasks":[{"desc":"mmlu_prox_bn_math_eval_samples.json","runtime_seconds":1.890807991,"progress":"9.9MB/31.3MB"}]}],"total_operations":6}}
10
+ {"time":"2025-09-21T07:40:26.842250567-04:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
11
+ {"time":"2025-09-21T07:40:27.042881025-04:00","level":"INFO","msg":"stream: closing","id":"cothceaw"}
12
+ {"time":"2025-09-21T07:40:27.042891927-04:00","level":"INFO","msg":"handler: closed","stream_id":"cothceaw"}
13
+ {"time":"2025-09-21T07:40:27.042901472-04:00","level":"INFO","msg":"sender: closed","stream_id":"cothceaw"}
14
+ {"time":"2025-09-21T07:40:27.042898277-04:00","level":"INFO","msg":"writer: Close: closed","stream_id":"cothceaw"}
15
+ {"time":"2025-09-21T07:40:27.043173368-04:00","level":"INFO","msg":"stream: closed","id":"cothceaw"}
wandb/run-20250921_062002-cothceaw/logs/debug.log ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-09-21 06:20:02,469 INFO MainThread:1320321 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2025-09-21 06:20:02,469 INFO MainThread:1320321 [wandb_setup.py:_flush():80] Configure stats pid to 1320321
3
+ 2025-09-21 06:20:02,470 INFO MainThread:1320321 [wandb_setup.py:_flush():80] Loading settings from /home/jdhwang/.config/wandb/settings
4
+ 2025-09-21 06:20:02,470 INFO MainThread:1320321 [wandb_setup.py:_flush():80] Loading settings from /orcd/home/002/jdhwang/BRIDGE/wandb/settings
5
+ 2025-09-21 06:20:02,470 INFO MainThread:1320321 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-09-21 06:20:02,470 INFO MainThread:1320321 [wandb_init.py:setup_run_log_directory():703] Logging user logs to ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/run-20250921_062002-cothceaw/logs/debug.log
7
+ 2025-09-21 06:20:02,470 INFO MainThread:1320321 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/run-20250921_062002-cothceaw/logs/debug-internal.log
8
+ 2025-09-21 06:20:02,470 INFO MainThread:1320321 [wandb_init.py:init():830] calling init triggers
9
+ 2025-09-21 06:20:02,470 INFO MainThread:1320321 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
10
+ config: {'_wandb': {}}
11
+ 2025-09-21 06:20:02,470 INFO MainThread:1320321 [wandb_init.py:init():871] starting backend
12
+ 2025-09-21 06:20:02,684 INFO MainThread:1320321 [wandb_init.py:init():874] sending inform_init request
13
+ 2025-09-21 06:20:02,686 INFO MainThread:1320321 [wandb_init.py:init():882] backend started and connected
14
+ 2025-09-21 06:20:02,688 INFO MainThread:1320321 [wandb_init.py:init():953] updated telemetry
15
+ 2025-09-21 06:20:02,711 INFO MainThread:1320321 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
16
+ 2025-09-21 06:20:03,319 INFO MainThread:1320321 [wandb_init.py:init():1029] starting run threads in backend
17
+ 2025-09-21 06:20:03,673 INFO MainThread:1320321 [wandb_run.py:_console_start():2458] atexit reg
18
+ 2025-09-21 06:20:03,673 INFO MainThread:1320321 [wandb_run.py:_redirect():2306] redirect: wrap_raw
19
+ 2025-09-21 06:20:03,673 INFO MainThread:1320321 [wandb_run.py:_redirect():2375] Wrapping output streams.
20
+ 2025-09-21 06:20:03,673 INFO MainThread:1320321 [wandb_run.py:_redirect():2398] Redirects installed.
21
+ 2025-09-21 06:20:03,676 INFO MainThread:1320321 [wandb_init.py:init():1075] run started, returning control to user process
22
+ 2025-09-21 07:40:15,859 INFO MainThread:1320321 [wandb_run.py:_config_callback():1363] config_cb None None {'task_configs': {'mmlu_prox_ar_math': {'task': 'mmlu_prox_ar_math', 'task_alias': 'math', 'dataset_path': 'li-lab/MMLU-ProX', 'dataset_name': 'ar', 'test_split': 'test', 'fewshot_split': 'validation', 'process_docs': "functools.partial(<function process_docs at 0x14d51fcc0860>, subject='math')", 'doc_to_text': 'functools.partial(<function format_cot_example at 0x14d51fcc0b80>, including_answer=False)', 'doc_to_target': 'answer', 'description': "فيما يلي أسئلة اختيار من متعدد (مع إجابات) حول الرياضيات. فكر خطوة بخطوة ثم أنهِ إجابتك بـ 'الإجابة هي (X)' حيث X هو حرف الخيار الصحيح.\n", 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'fewshot_config': {'sampler': 'first_n', 'doc_to_text': 'functools.partial(<function format_cot_example at 0x14d51fcc0e00>, including_answer=True)', 'doc_to_target': ''}, 'num_fewshot': 5, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'until': ['</s>', 'Q:', 'سؤال:', '<|im_end|>'], 'do_sample': False, 'temperature': 0.0, 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'name': 'custom-extract', 'filter': [{'function': 'regex', 'regex_pattern': 'الإجابة هي \\(?([ABCDEFGHIJ])\\)?'}, {'function': 'take_first'}]}], 'should_decontaminate': False, 'metadata': {'version': 0.0}}, 'mmlu_prox_bn_math': {'task': 'mmlu_prox_bn_math', 'task_alias': 'math', 'dataset_path': 'li-lab/MMLU-ProX', 'dataset_name': 'bn', 'test_split': 'test', 'fewshot_split': 'validation', 'process_docs': "functools.partial(<function process_docs at 0x14d51fb79e40>, subject='math')", 'doc_to_text': 'functools.partial(<function format_cot_example at 0x14d51fb7a200>, including_answer=False)', 'doc_to_target': 'answer', 'description': 'নিম্নলিখিত গণিত সম্পর্কে বহুনির্বাচনী প্রশ্ন (উত্তরসহ)। ধাপে ধাপে চিন্তা করুন এবং তারপর আপনার উত্তর "উত্তর হল (X)" দিয়ে শেষ করুন যেখানে X হল সঠিক বিকল্পের অক্ষর।\n', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'fewshot_config': {'sampler': 'first_n', 'doc_to_text': 'functools.partial(<function format_cot_example at 0x14d51fb7b740>, including_answer=True)', 'doc_to_target': ''}, 'num_fewshot': 5, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'until': ['</s>', 'Q:', 'প্রশ্ন:', '<|im_end|>'], 'do_sample': False, 'temperature': 0.0, 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'name': 'custom-extract', 'filter': [{'function': 'regex', 'regex_pattern': 'উত্তর হল \\(?([ABCDEFGHIJ])\\)?'}, {'function': 'take_first'}]}], 'should_decontaminate': False, 'metadata': {'version': 0.0}}}, 'cli_configs': {'model': 'vllm', 'model_args': 'pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000', 'batch_size': 'auto', 'batch_sizes': [], 'device': None, 'use_cache': None, 'limit': None, 'bootstrap_iters': 0, 'gen_kwargs': {'max_gen_toks': 20000}, 'random_seed': 0, 'numpy_seed': 1234, 'torch_seed': 1234, 'fewshot_seed': 1234}}
23
+ 2025-09-21 07:40:21,458 INFO MainThread:1320321 [wandb_run.py:_finish():2224] finishing run jdhwang/MRPO_eval/cothceaw
24
+ 2025-09-21 07:40:21,458 INFO MainThread:1320321 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
25
+ 2025-09-21 07:40:21,550 INFO MainThread:1320321 [wandb_run.py:_restore():2405] restore
26
+ 2025-09-21 07:40:21,550 INFO MainThread:1320321 [wandb_run.py:_restore():2411] restore done
27
+ 2025-09-21 07:40:27,041 INFO MainThread:1320321 [wandb_run.py:_footer_history_summary_info():3903] rendering history
28
+ 2025-09-21 07:40:27,042 INFO MainThread:1320321 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
29
+ 2025-09-21 07:40:27,042 INFO MainThread:1320321 [wandb_run.py:_footer_sync_info():3864] logging synced files