jdhwang commited on Sep 23, 2025

Commit

d7753f7

verified ·

1 Parent(s): fe29901

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +22 -0
added_tokens.json +24 -0
config.json +28 -0
generation_config.json +14 -0
merges.txt +0 -0
model-00001-of-00004.safetensors +3 -0
model-00002-of-00004.safetensors +3 -0
model-00003-of-00004.safetensors +3 -0
model-00004-of-00004.safetensors +3 -0
model.safetensors.index.json +346 -0
runs/Sep19_17-13-37_node1803/events.out.tfevents.1758316441.node1803.939342.0 +3 -0
special_tokens_map.json +25 -0
tokenizer.json +3 -0
tokenizer_config.json +208 -0
training_args.bin +3 -0
vocab.json +0 -0
wandb/debug-internal.log +14 -0
wandb/debug.log +24 -0
wandb/run-20250920_081121-2oxex54w/files/config.yaml +644 -0
wandb/run-20250920_081121-2oxex54w/files/media/table/evaluation/eval_results_1_817b26b9b7489391f4e7.table.json +1 -0
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_bn_eval_results_2_f5e72dc1f8666e2a7b2d.table.json +3 -0
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_de_eval_results_3_fabaf33255f24add59c1.table.json +0 -0
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_en_eval_results_4_82791ccc59d2c61a093c.table.json +0 -0
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_es_eval_results_5_db5ed66b097e218c4da6.table.json +0 -0
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_fr_eval_results_6_214cb1a5a2cdee9f330e.table.json +0 -0
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_ja_eval_results_7_098df869a17abf277e8c.table.json +3 -0
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_ru_eval_results_8_19e62e412810312c2375.table.json +3 -0
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_sw_eval_results_9_d79a57dcd801f08109f8.table.json +3 -0
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_te_eval_results_10_c0fc0c81b8662e04cc0e.table.json +3 -0
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_th_eval_results_11_baa86a95f601e3c721a0.table.json +0 -0
wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_zh_eval_results_12_2882185f7e816bbcc2fe.table.json +0 -0
wandb/run-20250920_081121-2oxex54w/files/output.log +212 -0
wandb/run-20250920_081121-2oxex54w/files/requirements.txt +378 -0
wandb/run-20250920_081121-2oxex54w/files/wandb-metadata.json +110 -0
wandb/run-20250920_081121-2oxex54w/files/wandb-summary.json +1 -0
wandb/run-20250920_081121-2oxex54w/logs/debug-core.log +16 -0
wandb/run-20250920_081121-2oxex54w/logs/debug-internal.log +15 -0
wandb/run-20250920_081121-2oxex54w/logs/debug.log +29 -0
wandb/run-20250920_081121-2oxex54w/run-2oxex54w.wandb +3 -0
wandb/run-20250921_062002-cothceaw/files/config.yaml +252 -0
wandb/run-20250921_062002-cothceaw/files/media/table/evaluation/eval_results_1_2a7ec9e10306569eae6e.table.json +1 -0
wandb/run-20250921_062002-cothceaw/files/media/table/mmlu_prox_ar_math_eval_results_2_1d425cdaeb765848e725.table.json +3 -0
wandb/run-20250921_062002-cothceaw/files/media/table/mmlu_prox_bn_math_eval_results_3_8f734c70b94d2677510f.table.json +3 -0
wandb/run-20250921_062002-cothceaw/files/output.log +81 -0
wandb/run-20250921_062002-cothceaw/files/requirements.txt +378 -0
wandb/run-20250921_062002-cothceaw/files/wandb-metadata.json +110 -0
wandb/run-20250921_062002-cothceaw/files/wandb-summary.json +1 -0
wandb/run-20250921_062002-cothceaw/logs/debug-core.log +16 -0
wandb/run-20250921_062002-cothceaw/logs/debug-internal.log +15 -0
wandb/run-20250921_062002-cothceaw/logs/debug.log +29 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,25 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_bn_eval_results_2_f5e72dc1f8666e2a7b2d.table.json filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_ja_eval_results_7_098df869a17abf277e8c.table.json filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_ru_eval_results_8_19e62e412810312c2375.table.json filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_sw_eval_results_9_d79a57dcd801f08109f8.table.json filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_te_eval_results_10_c0fc0c81b8662e04cc0e.table.json filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250920_081121-2oxex54w/run-2oxex54w.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250921_062002-cothceaw/files/media/table/mmlu_prox_ar_math_eval_results_2_1d425cdaeb765848e725.table.json filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250921_062002-cothceaw/files/media/table/mmlu_prox_bn_math_eval_results_3_8f734c70b94d2677510f.table.json filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250921_062002-cothceaw/run-cothceaw.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250921_081515-hrm6dwvu/files/media/table/mmlu_prox_de_math_eval_results_2_6011ed1b84fa34f7a465.table.json filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250921_081515-hrm6dwvu/files/media/table/mmlu_prox_en_math_eval_results_3_574b6ec5b263ae2e258e.table.json filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250921_081515-hrm6dwvu/run-hrm6dwvu.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250921_090332-hrm6dwvu/files/media/table/mmlu_prox_es_math_eval_results_2_f5b5ddbcd29f48f36acc.table.json filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250921_090332-hrm6dwvu/files/media/table/mmlu_prox_fr_math_eval_results_3_ea21893417450a1c19c1.table.json filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250921_090332-hrm6dwvu/run-hrm6dwvu.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250921_092606-ztqaaqj5/files/media/table/mmlu_prox_ko_math_eval_results_2_0960ca8c88e3af630287.table.json filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250921_092606-ztqaaqj5/files/media/table/mmlu_prox_pt_math_eval_results_3_14279190f4728eaf809a.table.json filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250921_092606-ztqaaqj5/run-ztqaaqj5.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250921_123322-c7t8flvu/run-c7t8flvu.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250921_140327-c7t8flvu/files/media/table/mmlu_prox_zh_math_eval_results_2_9045c6c9481d0396b399.table.json filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250921_140327-c7t8flvu/run-c7t8flvu.wandb filter=lfs diff=lfs merge=lfs -text

added_tokens.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "</tool_call>": 151658,
+  "<tool_call>": 151657,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 3584,
+  "initializer_range": 0.02,
+  "intermediate_size": 18944,
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen2",
+  "num_attention_heads": 28,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 4,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": 131072,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.3",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 152064
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "repetition_penalty": 1.05,
+  "temperature": 0.7,
+  "top_k": 20,
+  "top_p": 0.8,
+  "transformers_version": "4.51.3"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model-00001-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9b62bbfc2c3c82f51429dab8f74f685996c2a31dd1d2b7ea6ba47768ea4e1ab
+size 4877660776

model-00002-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:421b39cbbc81933e028ad678f3252ec9cdabe893fcc96b4a75836d9fa7e58be3
+size 4932751008

model-00003-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f63898433cd50eb366bf80588df88478eb1a1dba660aa086a457cd33cb44c50
+size 4330865200

model-00004-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:383305d92fbc2fbb3397d929ab45247196d7289e0fbbcd75f7239dea84d7916e
+size 1089994880

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,346 @@

+{
+  "metadata": {
+    "total_size": 15231233024
+  },
+  "weight_map": {
+    "lm_head.weight": "model-00004-of-00004.safetensors",
+    "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.18.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "model.norm.weight": "model-00003-of-00004.safetensors"
+  }
+}

runs/Sep19_17-13-37_node1803/events.out.tfevents.1758316441.node1803.939342.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da1334be7669bb7582bf0cf1d5dd15b646bd1693505aa3419b94c9497a9e867b
+size 1327067

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|fim_pad|>"
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
+size 11421896

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,208 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|fim_pad|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2be420de1f77067e60447c82f1dc47b174daa0d938a57517afb744395df404c
+size 7096

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2025-09-23T13:02:54.941896669-04:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-09-23T13:02:55.63168835-04:00","level":"INFO","msg":"stream: created new stream","id":"j5jpgoah"}
+{"time":"2025-09-23T13:02:55.631723283-04:00","level":"INFO","msg":"stream: started","id":"j5jpgoah"}
+{"time":"2025-09-23T13:02:55.63173562-04:00","level":"INFO","msg":"handler: started","stream_id":"j5jpgoah"}
+{"time":"2025-09-23T13:02:55.631745751-04:00","level":"INFO","msg":"sender: started","stream_id":"j5jpgoah"}
+{"time":"2025-09-23T13:02:55.631746765-04:00","level":"INFO","msg":"writer: Do: started","stream_id":"j5jpgoah"}
+{"time":"2025-09-23T13:02:56.136638253-04:00","level":"WARN","msg":"handleCodeSave: program relative path is empty"}
+{"time":"2025-09-23T13:02:56.136911276-04:00","level":"ERROR","msg":"git repo not found","error":"repository does not exist"}
+{"time":"2025-09-23T13:03:13.563836157-04:00","level":"INFO","msg":"stream: closing","id":"j5jpgoah"}
+{"time":"2025-09-23T13:03:14.427323175-04:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-09-23T13:03:14.567379829-04:00","level":"INFO","msg":"handler: closed","stream_id":"j5jpgoah"}
+{"time":"2025-09-23T13:03:14.567413727-04:00","level":"INFO","msg":"writer: Close: closed","stream_id":"j5jpgoah"}
+{"time":"2025-09-23T13:03:14.567456299-04:00","level":"INFO","msg":"sender: closed","stream_id":"j5jpgoah"}
+{"time":"2025-09-23T13:03:14.668731827-04:00","level":"INFO","msg":"stream: closed","id":"j5jpgoah"}

wandb/debug.log ADDED Viewed

	@@ -0,0 +1,24 @@

+2025-09-23 13:02:54,151 INFO    MainThread:382253 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-09-23 13:02:54,152 INFO    MainThread:382253 [wandb_setup.py:_flush():80] Configure stats pid to 382253
+2025-09-23 13:02:54,152 INFO    MainThread:382253 [wandb_setup.py:_flush():80] Loading settings from /home/jdhwang/.config/wandb/settings
+2025-09-23 13:02:54,152 INFO    MainThread:382253 [wandb_setup.py:_flush():80] Loading settings from /orcd/home/002/jdhwang/BRIDGE/wandb/settings
+2025-09-23 13:02:54,152 INFO    MainThread:382253 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-09-23 13:02:54,152 INFO    MainThread:382253 [wandb_setup.py:_flush():80] loaded run ID from ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/wandb-resume.json
+2025-09-23 13:02:54,152 INFO    MainThread:382253 [wandb_init.py:setup_run_log_directory():703] Logging user logs to ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/run-20250923_130250-j5jpgoah/logs/debug.log
+2025-09-23 13:02:54,152 INFO    MainThread:382253 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/run-20250923_130250-j5jpgoah/logs/debug-internal.log
+2025-09-23 13:02:54,152 INFO    MainThread:382253 [wandb_init.py:init():830] calling init triggers
+2025-09-23 13:02:54,152 INFO    MainThread:382253 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-09-23 13:02:54,152 INFO    MainThread:382253 [wandb_init.py:init():871] starting backend
+2025-09-23 13:02:54,499 INFO    MainThread:382253 [wandb_init.py:init():874] sending inform_init request
+2025-09-23 13:02:54,503 INFO    MainThread:382253 [wandb_init.py:init():882] backend started and connected
+2025-09-23 13:02:54,504 INFO    MainThread:382253 [wandb_init.py:init():953] updated telemetry
+2025-09-23 13:02:54,665 INFO    MainThread:382253 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-09-23 13:02:56,135 INFO    MainThread:382253 [wandb_init.py:init():1024] run resumed
+2025-09-23 13:02:56,135 INFO    MainThread:382253 [wandb_init.py:init():1029] starting run threads in backend
+2025-09-23 13:02:56,501 INFO    MainThread:382253 [wandb_run.py:_console_start():2458] atexit reg
+2025-09-23 13:02:56,518 INFO    MainThread:382253 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-09-23 13:02:56,518 INFO    MainThread:382253 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-09-23 13:02:56,518 INFO    MainThread:382253 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-09-23 13:02:56,521 INFO    MainThread:382253 [wandb_init.py:init():1075] run started, returning control to user process
+2025-09-23 13:03:13,563 INFO    MsgRouterThr:382253 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.

wandb/run-20250920_081121-2oxex54w/files/config.yaml ADDED Viewed

	@@ -0,0 +1,644 @@

+_wandb:
+    value:
+        cli_version: 0.21.0
+        e:
+            bdsaggp24nt8kfc8qjgq21gi927g7e3o:
+                args:
+                    - --model
+                    - vllm
+                    - --model_args
+                    - pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000
+                    - --tasks
+                    - mgsm_native_cot_ja,mgsm_native_cot_en,mgsm_native_cot_th,mgsm_native_cot_es,mgsm_native_cot_bn,mgsm_native_cot_te,mgsm_native_cot_fr,mgsm_native_cot_zh,mgsm_native_cot_sw,mgsm_native_cot_de,mgsm_native_cot_ru
+                    - --batch_size
+                    - auto
+                    - --apply_chat_template
+                    - --output_path
+                    - ckpts/rerun
+                    - --log_samples
+                    - --gen_kwargs
+                    - max_gen_toks=20000
+                    - --wandb_args=project=MRPO_eval,name=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dir=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,resume=auto
+                cpu_count: 64
+                cpu_count_logical: 128
+                cudaVersion: "12.4"
+                disk:
+                    /:
+                        total: "464506159104"
+                        used: "12268101632"
+                email: [email protected]
+                executable: /orcd/home/001/jdhwang/.conda/envs/llm/bin/python
+                git:
+                    commit: bb8b2be1f7420f9c6a3d65f0eaf3072732d73123
+                    remote: [email protected]:jd730/BRIDGE-private.git
+                gpu: NVIDIA H100 80GB HBM3
+                gpu_count: 2
+                gpu_nvidia:
+                    - architecture: Hopper
+                      cudaCores: 16896
+                      memoryTotal: "85520809984"
+                      name: NVIDIA H100 80GB HBM3
+                      uuid: GPU-9a8a7398-f810-a936-a36d-5b9c0b64a09b
+                    - architecture: Hopper
+                      cudaCores: 16896
+                      memoryTotal: "85520809984"
+                      name: NVIDIA H100 80GB HBM3
+                      uuid: GPU-73b0efed-d92d-d0e8-3c5e-a3122d6e7c1b
+                host: node1803
+                memory:
+                    total: "2163473002496"
+                os: Linux-4.18.0-372.9.1.el8.x86_64-x86_64-with-glibc2.28
+                program: /home/jdhwang/.conda/envs/llm/bin/lm_eval
+                python: CPython 3.11.11
+                root: ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309
+                slurm:
+                    array_job_id: "4452191"
+                    array_task_count: "1"
+                    array_task_id: "0"
+                    array_task_max: "0"
+                    array_task_min: "0"
+                    array_task_step: "1"
+                    cluster_name: eofe7
+                    conf: /etc/slurm/slurm.conf
+                    cpus_on_node: "16"
+                    cpus_per_task: "16"
+                    gpus_on_node: "2"
+                    gtids: "0"
+                    job_account: mit_general
+                    job_cpus_per_node: "16"
+                    job_end_time: "1758413466"
+                    job_gid: "209655"
+                    job_gpus: 1,2
+                    job_id: "4452191"
+                    job_name: eval.sh
+                    job_nodelist: node1803
+                    job_num_nodes: "1"
+                    job_partition: ou_bcs_normal
+                    job_qos: normal
+                    job_start_time: "1758370266"
+                    job_uid: "209655"
+                    job_user: jdhwang
+                    jobid: "4452191"
+                    localid: "0"
+                    mem_per_node: "131072"
+                    nnodes: "1"
+                    nodeid: "0"
+                    nodelist: node1803
+                    nprocs: "1"
+                    ntasks: "1"
+                    ntasks_per_node: "1"
+                    oom_kill_step: "0"
+                    prio_process: "0"
+                    procid: "0"
+                    script_context: prolog_task
+                    submit_dir: /orcd/home/002/jdhwang/BRIDGE
+                    submit_host: orcd-login003.mit.edu
+                    task_pid: "1143610"
+                    tasks_per_node: "1"
+                    topology_addr: node1803
+                    topology_addr_pattern: node
+                    tres_per_task: cpu=16
+                startedAt: "2025-09-20T12:11:21.301942Z"
+                writerId: bdsaggp24nt8kfc8qjgq21gi927g7e3o
+        m: []
+        python_version: 3.11.11
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 30
+                - 41
+                - 49
+                - 51
+                - 53
+                - 71
+                - 95
+                - 98
+                - 100
+                - 105
+            "2":
+                - 1
+                - 5
+                - 11
+                - 30
+                - 41
+                - 49
+                - 51
+                - 53
+                - 71
+                - 95
+                - 98
+                - 100
+                - 105
+            "3":
+                - 2
+                - 13
+                - 62
+            "4": 3.11.11
+            "5": 0.21.0
+            "6": 4.51.3
+            "12": 0.21.0
+            "13": linux-x86_64
+cli_configs:
+    value:
+        batch_size: auto
+        batch_sizes: []
+        bootstrap_iters: 0
+        device: null
+        fewshot_seed: 1234
+        gen_kwargs:
+            max_gen_toks: 20000
+        limit: null
+        model: vllm
+        model_args: pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000
+        numpy_seed: 1234
+        random_seed: 0
+        torch_seed: 1234
+        use_cache: null
+task_configs:
+    value:
+        mgsm_native_cot_bn:
+            dataset_name: bn
+            dataset_path: juletxara/mgsm
+            description: ""
+            doc_to_target: '{% if answer is not none %}{{answer[17:]}}{% else %}{{answer_number|string}}{% endif %}'
+            doc_to_text: '{% if answer is not none %}{{question+"\nধাপে ধাপে উত্তর:"}}{% else %}{{"প্রশ্ন: "+question+"\nধাপে ধাপে উত্তর:"}}{% endif %}'
+            fewshot_delimiter: |4+
+            filter_list:
+                - filter:
+                    - function: regex
+                      regex_pattern: The answer is (\-?[0-9\.\,]+)
+                    - function: take_first
+                  name: strict-match
+                - filter:
+                    - function: regex
+                      group_select: -1
+                      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+                    - function: take_first
+                  name: flexible-extract
+            generation_kwargs:
+                do_sample: false
+                max_gen_toks: 20000
+                until:
+                    - 'প্রশ্ন:'
+                    - </s>
+                    - <|im_end|>
+            metadata:
+                version: 4
+            metric_list:
+                - aggregation: mean
+                  higher_is_better: true
+                  ignore_case: true
+                  ignore_punctuation: true
+                  metric: exact_match
+            num_fewshot: 0
+            output_type: generate_until
+            repeats: 1
+            should_decontaminate: false
+            tag: mgsm_cot_native
+            target_delimiter: ' '
+            task: mgsm_native_cot_bn
+            test_split: test
+            training_split: train
+        mgsm_native_cot_de:
+            dataset_name: de
+            dataset_path: juletxara/mgsm
+            description: ""
+            doc_to_target: '{% if answer is not none %}{{answer[29:]}}{% else %}{{answer_number|string}}{% endif %}'
+            doc_to_text: '{% if answer is not none %}{{question+"\nSchritt-für-Schritt-Antwort:"}}{% else %}{{"Frage: "+question+"\nSchritt-für-Schritt-Antwort:"}}{% endif %}'
+            fewshot_delimiter: |4+
+            filter_list:
+                - filter:
+                    - function: regex
+                      regex_pattern: Die Antwort lautet (\-?[0-9\.\,]+)
+                    - function: take_first
+                  name: strict-match
+                - filter:
+                    - function: regex
+                      group_select: -1
+                      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+                    - function: take_first
+                  name: flexible-extract
+            generation_kwargs:
+                do_sample: false
+                max_gen_toks: 20000
+                until:
+                    - 'Frage:'
+                    - </s>
+                    - <|im_end|>
+            metadata:
+                version: 4
+            metric_list:
+                - aggregation: mean
+                  higher_is_better: true
+                  ignore_case: true
+                  ignore_punctuation: true
+                  metric: exact_match
+            num_fewshot: 0
+            output_type: generate_until
+            repeats: 1
+            should_decontaminate: false
+            tag: mgsm_cot_native
+            target_delimiter: ' '
+            task: mgsm_native_cot_de
+            test_split: test
+            training_split: train
+        mgsm_native_cot_en:
+            dataset_name: en
+            dataset_path: juletxara/mgsm
+            description: ""
+            doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
+            doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
+            fewshot_delimiter: |4+
+            filter_list:
+                - filter:
+                    - function: regex
+                      regex_pattern: The answer is (\-?[0-9\.\,]+)
+                    - function: take_first
+                  name: strict-match
+                - filter:
+                    - function: regex
+                      group_select: -1
+                      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+                    - function: take_first
+                  name: flexible-extract
+            generation_kwargs:
+                do_sample: false
+                max_gen_toks: 20000
+                until:
+                    - 'Question:'
+                    - </s>
+                    - <|im_end|>
+            metadata:
+                version: 4
+            metric_list:
+                - aggregation: mean
+                  higher_is_better: true
+                  ignore_case: true
+                  ignore_punctuation: true
+                  metric: exact_match
+            num_fewshot: 0
+            output_type: generate_until
+            repeats: 1
+            should_decontaminate: false
+            tag: mgsm_cot_native
+            target_delimiter: ' '
+            task: mgsm_native_cot_en
+            test_split: test
+            training_split: train
+        mgsm_native_cot_es:
+            dataset_name: es
+            dataset_path: juletxara/mgsm
+            description: ""
+            doc_to_target: '{% if answer is not none %}{{answer[23:]}}{% else %}{{answer_number|string}}{% endif %}'
+            doc_to_text: '{% if answer is not none %}{{question+"\nRespuesta paso a paso:"}}{% else %}{{"Pregunta: "+question+"\nRespuesta paso a paso:"}}{% endif %}'
+            fewshot_delimiter: |4+
+            filter_list:
+                - filter:
+                    - function: regex
+                      regex_pattern: La respuesta es (\-?[0-9\.\,]+)
+                    - function: take_first
+                  name: strict-match
+                - filter:
+                    - function: regex
+                      group_select: -1
+                      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+                    - function: take_first
+                  name: flexible-extract
+            generation_kwargs:
+                do_sample: false
+                max_gen_toks: 20000
+                until:
+                    - 'Pregunta:'
+                    - </s>
+                    - <|im_end|>
+            metadata:
+                version: 4
+            metric_list:
+                - aggregation: mean
+                  higher_is_better: true
+                  ignore_case: true
+                  ignore_punctuation: true
+                  metric: exact_match
+            num_fewshot: 0
+            output_type: generate_until
+            repeats: 1
+            should_decontaminate: false
+            tag: mgsm_cot_native
+            target_delimiter: ' '
+            task: mgsm_native_cot_es
+            test_split: test
+            training_split: train
+        mgsm_native_cot_fr:
+            dataset_name: fr
+            dataset_path: juletxara/mgsm
+            description: ""
+            doc_to_target: '{% if answer is not none %}{{answer[26:]}}{% else %}{{answer_number|string}}{% endif %}'
+            doc_to_text: '{% if answer is not none %}{{question+"\nRéponse étape par étape :"}}{% else %}{{"Question : "+question+"\nRéponse étape par étape :"}}{% endif %}'
+            fewshot_delimiter: |4+
+            filter_list:
+                - filter:
+                    - function: regex
+                      regex_pattern: La réponse est (\-?[0-9\.\,]+)
+                    - function: take_first
+                  name: strict-match
+                - filter:
+                    - function: regex
+                      group_select: -1
+                      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+                    - function: take_first
+                  name: flexible-extract
+            generation_kwargs:
+                do_sample: false
+                max_gen_toks: 20000
+                until:
+                    - 'Question :'
+                    - </s>
+                    - <|im_end|>
+            metadata:
+                version: 4
+            metric_list:
+                - aggregation: mean
+                  higher_is_better: true
+                  ignore_case: true
+                  ignore_punctuation: true
+                  metric: exact_match
+            num_fewshot: 0
+            output_type: generate_until
+            repeats: 1
+            should_decontaminate: false
+            tag: mgsm_cot_native
+            target_delimiter: ' '
+            task: mgsm_native_cot_fr
+            test_split: test
+            training_split: train
+        mgsm_native_cot_ja:
+            dataset_name: ja
+            dataset_path: juletxara/mgsm
+            description: ""
+            doc_to_target: '{% if answer is not none %}{{answer[11:]}}{% else %}{{answer_number|string}}{% endif %}'
+            doc_to_text: '{% if answer is not none %}{{question+"\nステップごとの答え:"}}{% else %}{{"問題： "+question+"\nステップごとの答え:"}}{% endif %}'
+            fewshot_delimiter: |4+
+            filter_list:
+                - filter:
+                    - function: regex
+                      regex_pattern: 答えは(\-?[0-9\.\,]+)です。
+                    - function: take_first
+                  name: strict-match
+                - filter:
+                    - function: regex
+                      group_select: -1
+                      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+                    - function: take_first
+                  name: flexible-extract
+            generation_kwargs:
+                do_sample: false
+                max_gen_toks: 20000
+                until:
+                    - 問題：
+                    - </s>
+                    - <|im_end|>
+            metadata:
+                version: 4
+            metric_list:
+                - aggregation: mean
+                  higher_is_better: true
+                  ignore_case: true
+                  ignore_punctuation: true
+                  metric: exact_match
+            num_fewshot: 0
+            output_type: generate_until
+            repeats: 1
+            should_decontaminate: false
+            tag: mgsm_cot_native
+            target_delimiter: ' '
+            task: mgsm_native_cot_ja
+            test_split: test
+            training_split: train
+        mgsm_native_cot_ru:
+            dataset_name: ru
+            dataset_path: juletxara/mgsm
+            description: ""
+            doc_to_target: '{% if answer is not none %}{{answer[18:]}}{% else %}{{answer_number|string}}{% endif %}'
+            doc_to_text: '{% if answer is not none %}{{question+"\nПошаговоерешение:"}}{% else %}{{"Задача: "+question+"\nПошаговоерешение:"}}{% endif %}'
+            fewshot_delimiter: |4+
+            filter_list:
+                - filter:
+                    - function: regex
+                      regex_pattern: Ответ — (\-?[0-9\.\,]+)
+                    - function: take_first
+                  name: strict-match
+                - filter:
+                    - function: regex
+                      group_select: -1
+                      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+                    - function: take_first
+                  name: flexible-extract
+            generation_kwargs:
+                do_sample: false
+                max_gen_toks: 20000
+                until:
+                    - 'Задача:'
+                    - </s>
+                    - <|im_end|>
+            metadata:
+                version: 4
+            metric_list:
+                - aggregation: mean
+                  higher_is_better: true
+                  ignore_case: true
+                  ignore_punctuation: true
+                  metric: exact_match
+            num_fewshot: 0
+            output_type: generate_until
+            repeats: 1
+            should_decontaminate: false
+            tag: mgsm_cot_native
+            target_delimiter: ' '
+            task: mgsm_native_cot_ru
+            test_split: test
+            training_split: train
+        mgsm_native_cot_sw:
+            dataset_name: sw
+            dataset_path: juletxara/mgsm
+            description: ""
+            doc_to_target: '{% if answer is not none %}{{answer[25:]}}{% else %}{{answer_number|string}}{% endif %}'
+            doc_to_text: '{% if answer is not none %}{{question+"\nJibu la Hatua kwa Hatua:"}}{% else %}{{"Swali: "+question+"\nJibu la Hatua kwa Hatua:"}}{% endif %}'
+            fewshot_delimiter: |4+
+            filter_list:
+                - filter:
+                    - function: regex
+                      regex_pattern: Jibu ni (\-?[0-9\.\,]+)
+                    - function: take_first
+                  name: strict-match
+                - filter:
+                    - function: regex
+                      group_select: -1
+                      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+                    - function: take_first
+                  name: flexible-extract
+            generation_kwargs:
+                do_sample: false
+                max_gen_toks: 20000
+                until:
+                    - 'Swali:'
+                    - </s>
+                    - <|im_end|>
+            metadata:
+                version: 4
+            metric_list:
+                - aggregation: mean
+                  higher_is_better: true
+                  ignore_case: true
+                  ignore_punctuation: true
+                  metric: exact_match
+            num_fewshot: 0
+            output_type: generate_until
+            repeats: 1
+            should_decontaminate: false
+            tag: mgsm_cot_native
+            target_delimiter: ' '
+            task: mgsm_native_cot_sw
+            test_split: test
+            training_split: train
+        mgsm_native_cot_te:
+            dataset_name: te
+            dataset_path: juletxara/mgsm
+            description: ""
+            doc_to_target: '{% if answer is not none %}{{answer[19:]}}{% else %}{{answer_number|string}}{% endif %}'
+            doc_to_text: '{% if answer is not none %}{{question+"\nదశలవారీగా సమాధానం:"}}{% else %}{{"ప్రశ్న: "+question+"\nదశలవారీగా సమాధానం:"}}{% endif %}'
+            fewshot_delimiter: |4+
+            filter_list:
+                - filter:
+                    - function: regex
+                      regex_pattern: సమాధానం (\-?[0-9\.\,]+)
+                    - function: take_first
+                  name: strict-match
+                - filter:
+                    - function: regex
+                      group_select: -1
+                      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+                    - function: take_first
+                  name: flexible-extract
+            generation_kwargs:
+                do_sample: false
+                max_gen_toks: 20000
+                until:
+                    - 'ప్రశ్న:'
+                    - </s>
+                    - <|im_end|>
+            metadata:
+                version: 4
+            metric_list:
+                - aggregation: mean
+                  higher_is_better: true
+                  ignore_case: true
+                  ignore_punctuation: true
+                  metric: exact_match
+            num_fewshot: 0
+            output_type: generate_until
+            repeats: 1
+            should_decontaminate: false
+            tag: mgsm_cot_native
+            target_delimiter: ' '
+            task: mgsm_native_cot_te
+            test_split: test
+            training_split: train
+        mgsm_native_cot_th:
+            dataset_name: th
+            dataset_path: juletxara/mgsm
+            description: ""
+            doc_to_target: '{% if answer is not none %}{{answer[18:]}}{% else %}{{answer_number|string}}{% endif %}'
+            doc_to_text: '{% if answer is not none %}{{question+"\nคำตอบทีละขั้นตอน:"}}{% else %}{{"โจทย์: "+question+"\nคำตอบทีละขั้นตอน:"}}{% endif %}'
+            fewshot_delimiter: |4+
+            filter_list:
+                - filter:
+                    - function: regex
+                      regex_pattern: คำตอบคือ (\-?[0-9\.\,]+)
+                    - function: take_first
+                  name: strict-match
+                - filter:
+                    - function: regex
+                      group_select: -1
+                      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+                    - function: take_first
+                  name: flexible-extract
+            generation_kwargs:
+                do_sample: false
+                max_gen_toks: 20000
+                until:
+                    - 'โจทย์:'
+                    - </s>
+                    - <|im_end|>
+            metadata:
+                version: 4
+            metric_list:
+                - aggregation: mean
+                  higher_is_better: true
+                  ignore_case: true
+                  ignore_punctuation: true
+                  metric: exact_match
+            num_fewshot: 0
+            output_type: generate_until
+            repeats: 1
+            should_decontaminate: false
+            tag: mgsm_cot_native
+            target_delimiter: ' '
+            task: mgsm_native_cot_th
+            test_split: test
+            training_split: train
+        mgsm_native_cot_zh:
+            dataset_name: zh
+            dataset_path: juletxara/mgsm
+            description: ""
+            doc_to_target: '{% if answer is not none %}{{answer[6:]}}{% else %}{{answer_number|string}}{% endif %}'
+            doc_to_text: '{% if answer is not none %}{{question+"\n逐步解答:"}}{% else %}{{"问题： "+question+"\n逐步解答:"}}{% endif %}'
+            fewshot_delimiter: |4+
+            filter_list:
+                - filter:
+                    - function: regex
+                      regex_pattern: 答案是 (\-?[0-9\.\,]+)。
+                    - function: take_first
+                  name: strict-match
+                - filter:
+                    - function: regex
+                      group_select: -1
+                      regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
+                    - function: take_first
+                  name: flexible-extract
+            generation_kwargs:
+                do_sample: false
+                max_gen_toks: 20000
+                until:
+                    - 问题：
+                    - </s>
+                    - <|im_end|>
+            metadata:
+                version: 4
+            metric_list:
+                - aggregation: mean
+                  higher_is_better: true
+                  ignore_case: true
+                  ignore_punctuation: true
+                  metric: exact_match
+            num_fewshot: 0
+            output_type: generate_until
+            repeats: 1
+            should_decontaminate: false
+            tag: mgsm_cot_native
+            target_delimiter: ' '
+            task: mgsm_native_cot_zh
+            test_split: test
+            training_split: train

wandb/run-20250920_081121-2oxex54w/files/media/table/evaluation/eval_results_1_817b26b9b7489391f4e7.table.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"columns": ["Tasks", "Version", "Filter", "num_fewshot", "Metric", "Value", "Stderr"], "data": [["mgsm_native_cot_bn", 4.0, "strict-match", 0, "exact_match", "0.0", "N/A"], ["mgsm_native_cot_bn", 4.0, "flexible-extract", 0, "exact_match", "0.528", "N/A"], ["mgsm_native_cot_de", 4.0, "strict-match", 0, "exact_match", "0.0", "N/A"], ["mgsm_native_cot_de", 4.0, "flexible-extract", 0, "exact_match", "0.74", "N/A"], ["mgsm_native_cot_en", 4.0, "strict-match", 0, "exact_match", "0.0", "N/A"], ["mgsm_native_cot_en", 4.0, "flexible-extract", 0, "exact_match", "0.872", "N/A"], ["mgsm_native_cot_es", 4.0, "strict-match", 0, "exact_match", "0.008", "N/A"], ["mgsm_native_cot_es", 4.0, "flexible-extract", 0, "exact_match", "0.78", "N/A"], ["mgsm_native_cot_fr", 4.0, "strict-match", 0, "exact_match", "0.004", "N/A"], ["mgsm_native_cot_fr", 4.0, "flexible-extract", 0, "exact_match", "0.76", "N/A"], ["mgsm_native_cot_ja", 4.0, "strict-match", 0, "exact_match", "0.008", "N/A"], ["mgsm_native_cot_ja", 4.0, "flexible-extract", 0, "exact_match", "0.644", "N/A"], ["mgsm_native_cot_ru", 4.0, "strict-match", 0, "exact_match", "0.004", "N/A"], ["mgsm_native_cot_ru", 4.0, "flexible-extract", 0, "exact_match", "0.832", "N/A"], ["mgsm_native_cot_sw", 4.0, "strict-match", 0, "exact_match", "0.0", "N/A"], ["mgsm_native_cot_sw", 4.0, "flexible-extract", 0, "exact_match", "0.008", "N/A"], ["mgsm_native_cot_te", 4.0, "strict-match", 0, "exact_match", "0.02", "N/A"], ["mgsm_native_cot_te", 4.0, "flexible-extract", 0, "exact_match", "0.12", "N/A"], ["mgsm_native_cot_th", 4.0, "strict-match", 0, "exact_match", "0.26", "N/A"], ["mgsm_native_cot_th", 4.0, "flexible-extract", 0, "exact_match", "0.652", "N/A"], ["mgsm_native_cot_zh", 4.0, "strict-match", 0, "exact_match", "0.0", "N/A"], ["mgsm_native_cot_zh", 4.0, "flexible-extract", 0, "exact_match", "0.784", "N/A"]]}

wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_bn_eval_results_2_f5e72dc1f8666e2a7b2d.table.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5e72dc1f8666e2a7b2dd420223eda667dbec11d2c10570f946fcc38e857476a
+size 16518181

wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_de_eval_results_3_fabaf33255f24add59c1.table.json ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_en_eval_results_4_82791ccc59d2c61a093c.table.json ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_es_eval_results_5_db5ed66b097e218c4da6.table.json ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_fr_eval_results_6_214cb1a5a2cdee9f330e.table.json ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_ja_eval_results_7_098df869a17abf277e8c.table.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:098df869a17abf277e8cf3b5d4966de124ba0fed82b267159ef0c1d32c7f38a2
+size 19900362

wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_ru_eval_results_8_19e62e412810312c2375.table.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:19e62e412810312c2375525d388b308579f502daf4537e08ea3e97b90b3ef016
+size 13109180

wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_sw_eval_results_9_d79a57dcd801f08109f8.table.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d79a57dcd801f08109f8b083cce4884bf51b225d8127bc78429cf19d3df17aad
+size 22151199

wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_te_eval_results_10_c0fc0c81b8662e04cc0e.table.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c0fc0c81b8662e04cc0e8fc30ac6053adcf13bd384f959eee54fbeb5039dbc01
+size 23468672

wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_th_eval_results_11_baa86a95f601e3c721a0.table.json ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/run-20250920_081121-2oxex54w/files/media/table/mgsm_native_cot_zh_eval_results_12_2882185f7e816bbcc2fe.table.json ADDED Viewed

The diff for this file is too large to render. See raw diff

wandb/run-20250920_081121-2oxex54w/files/output.log ADDED Viewed

	@@ -0,0 +1,212 @@

+2025-09-20:08:11:22,626 INFO     [__main__.py:291] Verbosity set to INFO
+2025-09-20:08:11:57,895 INFO     [__init__.py:464] The tag pawsx is already registered as a group, this tag will not be registered. This may affect tasks you want to call.
+2025-09-20:08:11:59,107 INFO     [__main__.py:388] Selected Tasks: ['mgsm_native_cot_bn', 'mgsm_native_cot_de', 'mgsm_native_cot_en', 'mgsm_native_cot_es', 'mgsm_native_cot_fr', 'mgsm_native_cot_ja', 'mgsm_native_cot_ru', 'mgsm_native_cot_sw', 'mgsm_native_cot_te', 'mgsm_native_cot_th', 'mgsm_native_cot_zh']
+2025-09-20:08:11:59,119 INFO     [evaluator.py:161] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234
+2025-09-20:08:11:59,119 WARNING  [evaluator.py:172] generation_kwargs specified through cli, these settings will update set parameters in yaml tasks. Ensure 'do_sample=True' for non-greedy decoding!
+2025-09-20:08:11:59,119 INFO     [evaluator.py:198] Initializing vllm model, with arguments: {'pretrained': 'ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309', 'dtype': 'auto', 'tensor_parallel_size': 2, 'max_model_len': 20000}
+INFO 09-20 08:12:05 config.py:350] This model supports multiple tasks: {'embedding', 'generate'}. Defaulting to 'generate'.
+INFO 09-20 08:12:05 config.py:1020] Defaulting to use mp for distributed inference
+INFO 09-20 08:12:05 llm_engine.py:249] Initializing an LLM engine (v0.6.4.post1) with config: model='ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309', speculative_config=None, tokenizer='ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=20000, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=2, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=1234, served_model_name=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309, num_scheduler_steps=1, chunked_prefill_enabled=False multi_step_stream_outputs=True, enable_prefix_caching=False, use_async_output_proc=True, use_cached_outputs=False, chat_template_text_format=string, mm_processor_kwargs=None, pooler_config=None)
+WARNING 09-20 08:12:06 multiproc_gpu_executor.py:130] CUDA was previously initialized. We must use the `spawn` multiprocessing start method. Setting VLLM_WORKER_MULTIPROC_METHOD to 'spawn'.
+WARNING 09-20 08:12:06 multiproc_gpu_executor.py:56] Reducing Torch parallelism from 16 threads to 1 to avoid unnecessary CPU contention. Set OMP_NUM_THREADS in the external environment to tune this value as needed.
+INFO 09-20 08:12:06 custom_cache_manager.py:17] Setting Triton cache manager to: vllm.triton_utils.custom_cache_manager:CustomCacheManager
+INFO 09-20 08:12:06 selector.py:135] Using Flash Attention backend.
+INFO 09-20 08:12:14 utils.py:961] Found nccl from library libnccl.so.2
+INFO 09-20 08:12:14 pynccl.py:69] vLLM is using nccl==2.21.5
+INFO 09-20 08:12:15 custom_all_reduce_utils.py:242] reading GPU P2P access cache from /home/jdhwang/.cache/vllm/gpu_p2p_access_cache_for_0,1.json
+INFO 09-20 08:12:15 shm_broadcast.py:236] vLLM message queue communication handle: Handle(connect_ip='127.0.0.1', local_reader_ranks=[1], buffer=<vllm.distributed.device_communicators.shm_broadcast.ShmRingBuffer object at 0x145452a92810>, local_subscribe_port=53811, remote_subscribe_port=None)
+INFO 09-20 08:12:15 model_runner.py:1072] Starting to load model ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309...
+Loading safetensors checkpoint shards:   0% Completed | 0/4 [00:00<?, ?it/s]
+Loading safetensors checkpoint shards:  25% Completed | 1/4 [00:43<02:11, 43.77s/it]
+Loading safetensors checkpoint shards:  50% Completed | 2/4 [01:16<01:14, 37.12s/it]
+Loading safetensors checkpoint shards:  75% Completed | 3/4 [01:49<00:35, 35.23s/it]
+Loading safetensors checkpoint shards: 100% Completed | 4/4 [02:44<00:00, 43.11s/it]
+Loading safetensors checkpoint shards: 100% Completed | 4/4 [02:44<00:00, 41.10s/it]
+INFO 09-20 08:15:00 model_runner.py:1077] Loading model weights took 7.1216 GB
+INFO 09-20 08:15:02 worker.py:232] Memory profiling results: total_gpu_memory=79.10GiB initial_memory_usage=8.22GiB peak_torch_memory=8.75GiB memory_usage_post_profile=8.60GiB non_torch_memory=1.44GiB kv_cache_size=60.99GiB gpu_memory_utilization=0.90
+INFO 09-20 08:15:02 distributed_gpu_executor.py:57] # GPU blocks: 142750, # CPU blocks: 9362
+INFO 09-20 08:15:02 distributed_gpu_executor.py:61] Maximum concurrency for 20000 tokens per request: 114.20x
+INFO 09-20 08:15:05 model_runner.py:1400] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
+INFO 09-20 08:15:05 model_runner.py:1404] If out-of-memory error occurs during cudagraph capture, consider decreasing `gpu_memory_utilization` or switching to eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.
+INFO 09-20 08:15:20 custom_all_reduce.py:224] Registering 1995 cuda graph addresses
+INFO 09-20 08:15:20 model_runner.py:1518] Graph capturing finished in 16 secs, took 0.25 GiB
+2025-09-20:08:15:28,125 INFO     [evaluator.py:279] Setting fewshot random generator seed to 1234
+2025-09-20:08:15:28,126 INFO     [evaluator.py:279] Setting fewshot random generator seed to 1234
+2025-09-20:08:15:28,126 INFO     [evaluator.py:279] Setting fewshot random generator seed to 1234
+2025-09-20:08:15:28,126 INFO     [evaluator.py:279] Setting fewshot random generator seed to 1234
+2025-09-20:08:15:28,126 INFO     [evaluator.py:279] Setting fewshot random generator seed to 1234
+2025-09-20:08:15:28,126 INFO     [evaluator.py:279] Setting fewshot random generator seed to 1234
+2025-09-20:08:15:28,126 INFO     [evaluator.py:279] Setting fewshot random generator seed to 1234
+2025-09-20:08:15:28,126 INFO     [evaluator.py:279] Setting fewshot random generator seed to 1234
+2025-09-20:08:15:28,126 INFO     [evaluator.py:279] Setting fewshot random generator seed to 1234
+2025-09-20:08:15:28,126 INFO     [evaluator.py:279] Setting fewshot random generator seed to 1234
+2025-09-20:08:15:28,126 INFO     [evaluator.py:279] Setting fewshot random generator seed to 1234
+2025-09-20:08:15:28,127 INFO     [task.py:430] Building contexts for mgsm_native_cot_zh on rank 0...
+100%|██████████| 250/250 [00:00<00:00, 1459.14it/s]
+2025-09-20:08:15:28,305 INFO     [task.py:430] Building contexts for mgsm_native_cot_th on rank 0...
+100%|██████████| 250/250 [00:00<00:00, 1507.22it/s]
+2025-09-20:08:15:28,476 INFO     [task.py:430] Building contexts for mgsm_native_cot_te on rank 0...
+100%|██████████| 250/250 [00:00<00:00, 1510.62it/s]
+2025-09-20:08:15:28,647 INFO     [task.py:430] Building contexts for mgsm_native_cot_sw on rank 0...
+100%|██████████| 250/250 [00:00<00:00, 1538.05it/s]
+2025-09-20:08:15:28,816 INFO     [task.py:430] Building contexts for mgsm_native_cot_ru on rank 0...
+100%|██████████| 250/250 [00:00<00:00, 1407.80it/s]
+2025-09-20:08:15:28,999 INFO     [task.py:430] Building contexts for mgsm_native_cot_ja on rank 0...
+100%|██████████| 250/250 [00:00<00:00, 1480.79it/s]
+2025-09-20:08:15:29,174 INFO     [task.py:430] Building contexts for mgsm_native_cot_fr on rank 0...
+100%|██████████| 250/250 [00:00<00:00, 1487.75it/s]
+2025-09-20:08:15:29,353 INFO     [task.py:430] Building contexts for mgsm_native_cot_es on rank 0...
+100%|██████████| 250/250 [00:00<00:00, 1544.75it/s]
+2025-09-20:08:15:29,520 INFO     [task.py:430] Building contexts for mgsm_native_cot_en on rank 0...
+100%|██████████| 250/250 [00:00<00:00, 1546.41it/s]
+2025-09-20:08:15:29,687 INFO     [task.py:430] Building contexts for mgsm_native_cot_de on rank 0...
+100%|██████████| 250/250 [00:00<00:00, 1523.06it/s]
+2025-09-20:08:15:29,857 INFO     [task.py:430] Building contexts for mgsm_native_cot_bn on rank 0...
+100%|██████████| 250/250 [00:00<00:00, 1512.86it/s]
+2025-09-20:08:15:30,028 INFO     [evaluator.py:495] Running generate_until requests
+Processed prompts: 100%|██████████| 250/250 [03:23<00:00,  1.23it/s, est. speed input: 124.59 toks/s, output: 2248.44 toks/s]
+final processing: 250it [00:00, 298229.81it/s]  | 1/2750 [03:23<155:02:20, 203.03s/it] 124.59 toks/s, output: 2248.44 toks/s]
+Processed prompts: 100%|██████████| 250/250 [02:56<00:00,  1.42it/s, est. speed input: 247.19 toks/s, output: 1630.51 toks/s]
+final processing: 250it [00:00, 286888.10it/s]  | 251/2750 [06:19<52:55,  1.27s/it]    247.19 toks/s, output: 1630.51 toks/s]
+Processed prompts: 100%|██████████| 250/250 [14:58<00:00,  3.59s/it, est. speed input: 125.62 toks/s, output: 3596.00 toks/s]
+final processing: 250it [00:00, 337923.30it/s]  | 501/2750 [21:17<1:38:47,  2.64s/it]: 125.62 toks/s, output: 3596.00 toks/s]
+WARNING 09-20 08:30:23 scheduler.py:1481] Sequence group 749 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=1
+Processed prompts: 100%|██████████| 250/250 [27:49<00:00,  6.68s/it, est. speed input: 22.15 toks/s, output: 2927.12 toks/s]
+final processing: 250it [00:00, 259163.62it/s]  | 751/2750 [49:07<2:29:15,  4.48s/it]: 22.15 toks/s, output: 2927.12 toks/s]
+WARNING 09-20 08:43:05 scheduler.py:1481] Sequence group 987 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=51
+WARNING 09-20 08:46:25 scheduler.py:1481] Sequence group 936 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=101
+WARNING 09-20 08:52:13 scheduler.py:1481] Sequence group 885 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=151
+Processed prompts: 100%|██████████| 250/250 [03:26<00:00,  1.21it/s, est. speed input: 160.95 toks/s, output: 2481.49 toks/s]
+final processing: 250it [00:00, 271440.85it/s]  | 1001/2750 [52:34<1:28:34,  3.04s/it] 160.39 toks/s, output: 2385.40 toks/s]
+Processed prompts: 100%|██████████| 250/250 [06:36<00:00,  1.58s/it, est. speed input: 75.03 toks/s, output: 3118.80 toks/s]
+final processing: 250it [00:00, 440393.11it/s]  | 1251/2750 [59:10<1:02:49,  2.51s/it] 74.36 toks/s, output: 2968.35 toks/s]
+Processed prompts: 100%|██████████| 250/250 [03:18<00:00,  1.26it/s, est. speed input: 147.77 toks/s, output: 2350.89 toks/s]
+final processing: 250it [00:00, 228846.79it/s]  | 1501/2750 [1:02:28<40:09,  1.93s/it] 147.77 toks/s, output: 2350.89 toks/s]
+Processed prompts: 100%|██████████| 250/250 [06:00<00:00,  1.44s/it, est. speed input: 80.18 toks/s, output: 3303.78 toks/s]
+final processing: 250it [00:00, 304730.02it/s]  | 1751/2750 [1:08:28<29:28,  1.77s/it] 79.75 toks/s, output: 3193.63 toks/s]
+Processed prompts: 100%|██████████| 250/250 [03:54<00:00,  1.07it/s, est. speed input: 104.28 toks/s, output: 3089.34 toks/s]
+final processing: 250it [00:00, 289422.03it/s]  | 2001/2750 [1:12:23<18:47,  1.51s/it] 103.64 toks/s, output: 2920.65 toks/s]
+Processed prompts: 100%|██████████| 250/250 [03:22<00:00,  1.23it/s, est. speed input: 153.49 toks/s, output: 2475.48 toks/s]
+final processing: 250it [00:00, 379094.72it/s]▏ | 2251/2750 [1:15:46<10:42,  1.29s/it] 153.16 toks/s, output: 2378.31 toks/s]
+Processed prompts: 100%|██████████| 250/250 [07:54<00:00,  1.90s/it, est. speed input: 163.92 toks/s, output: 3619.06 toks/s]
+final processing: 250it [00:00, 384093.77it/s]█ | 2501/2750 [1:23:40<06:07,  1.48s/it] 163.92 toks/s, output: 3619.06 toks/s]
+Running generate_until requests: 100%|██████████| 2750/2750 [1:23:40<00:00,  1.83s/it]
+INFO 09-20 09:39:27 multiproc_worker_utils.py:133] Terminating local vLLM worker processes
+wandb: WARNING Serializing object of type str that is 135596 bytes
+wandb: WARNING Serializing object of type str that is 106518 bytes
+wandb: WARNING Serializing object of type str that is 104812 bytes
+wandb: WARNING Serializing object of type str that is 110652 bytes
+wandb: WARNING Serializing object of type str that is 113566 bytes
+wandb: WARNING Serializing object of type str that is 115434 bytes
+wandb: WARNING Serializing object of type str that is 135596 bytes
+wandb: WARNING Serializing object of type str that is 106518 bytes
+wandb: WARNING Serializing object of type str that is 104812 bytes
+wandb: WARNING Serializing object of type str that is 110652 bytes
+wandb: WARNING Serializing object of type str that is 113566 bytes
+wandb: WARNING Serializing object of type str that is 115434 bytes
+wandb: WARNING Serializing object of type str that is 133206 bytes
+wandb: WARNING Serializing object of type str that is 145832 bytes
+wandb: WARNING Serializing object of type str that is 144918 bytes
+wandb: WARNING Serializing object of type str that is 127702 bytes
+wandb: WARNING Serializing object of type str that is 111602 bytes
+wandb: WARNING Serializing object of type str that is 168368 bytes
+wandb: WARNING Serializing object of type str that is 149048 bytes
+wandb: WARNING Serializing object of type str that is 147662 bytes
+wandb: WARNING Serializing object of type str that is 136456 bytes
+wandb: WARNING Serializing object of type str that is 128568 bytes
+wandb: WARNING Serializing object of type str that is 133206 bytes
+wandb: WARNING Serializing object of type str that is 145832 bytes
+wandb: WARNING Serializing object of type str that is 144918 bytes
+wandb: WARNING Serializing object of type str that is 127702 bytes
+wandb: WARNING Serializing object of type str that is 111602 bytes
+wandb: WARNING Serializing object of type str that is 168368 bytes
+wandb: WARNING Serializing object of type str that is 149048 bytes
+wandb: WARNING Serializing object of type str that is 147662 bytes
+wandb: WARNING Serializing object of type str that is 136456 bytes
+wandb: WARNING Serializing object of type str that is 128568 bytes
+wandb: WARNING Serializing object of type str that is 111938 bytes
+wandb: WARNING Serializing object of type str that is 114530 bytes
+wandb: WARNING Serializing object of type str that is 141662 bytes
+wandb: WARNING Serializing object of type str that is 139800 bytes
+wandb: WARNING Serializing object of type str that is 118212 bytes
+wandb: WARNING Serializing object of type str that is 104678 bytes
+wandb: WARNING Serializing object of type str that is 140896 bytes
+wandb: WARNING Serializing object of type str that is 130060 bytes
+wandb: WARNING Serializing object of type str that is 129634 bytes
+wandb: WARNING Serializing object of type str that is 124454 bytes
+wandb: WARNING Serializing object of type str that is 111938 bytes
+wandb: WARNING Serializing object of type str that is 114530 bytes
+wandb: WARNING Serializing object of type str that is 141662 bytes
+wandb: WARNING Serializing object of type str that is 139800 bytes
+wandb: WARNING Serializing object of type str that is 118212 bytes
+wandb: WARNING Serializing object of type str that is 104678 bytes
+wandb: WARNING Serializing object of type str that is 140896 bytes
+wandb: WARNING Serializing object of type str that is 130060 bytes
+wandb: WARNING Serializing object of type str that is 129634 bytes
+wandb: WARNING Serializing object of type str that is 124454 bytes
+wandb: WARNING Serializing object of type str that is 109374 bytes
+wandb: WARNING Serializing object of type str that is 121236 bytes
+wandb: WARNING Serializing object of type str that is 113208 bytes
+wandb: WARNING Serializing object of type str that is 109374 bytes
+wandb: WARNING Serializing object of type str that is 121236 bytes
+wandb: WARNING Serializing object of type str that is 113208 bytes
+wandb: WARNING Serializing object of type str that is 142386 bytes
+wandb: WARNING Serializing object of type str that is 110008 bytes
+wandb: WARNING Serializing object of type str that is 113878 bytes
+wandb: WARNING Serializing object of type str that is 105530 bytes
+wandb: WARNING Serializing object of type str that is 122158 bytes
+wandb: WARNING Serializing object of type str that is 108502 bytes
+wandb: WARNING Serializing object of type str that is 113968 bytes
+wandb: WARNING Serializing object of type str that is 107180 bytes
+wandb: WARNING Serializing object of type str that is 121728 bytes
+wandb: WARNING Serializing object of type str that is 106106 bytes
+wandb: WARNING Serializing object of type str that is 117514 bytes
+wandb: WARNING Serializing object of type str that is 142386 bytes
+wandb: WARNING Serializing object of type str that is 110008 bytes
+wandb: WARNING Serializing object of type str that is 113878 bytes
+wandb: WARNING Serializing object of type str that is 105530 bytes
+wandb: WARNING Serializing object of type str that is 122158 bytes
+wandb: WARNING Serializing object of type str that is 108502 bytes
+wandb: WARNING Serializing object of type str that is 113968 bytes
+wandb: WARNING Serializing object of type str that is 107180 bytes
+wandb: WARNING Serializing object of type str that is 121728 bytes
+wandb: WARNING Serializing object of type str that is 106106 bytes
+wandb: WARNING Serializing object of type str that is 117514 bytes
+2025-09-20:09:39:36,339 INFO     [evaluation_tracker.py:207] Saving results aggregated
+2025-09-20:09:39:36,444 INFO     [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_bn
+2025-09-20:09:39:36,612 INFO     [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_de
+2025-09-20:09:39:36,770 INFO     [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_en
+2025-09-20:09:39:36,926 INFO     [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_es
+2025-09-20:09:39:37,090 INFO     [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_fr
+2025-09-20:09:39:37,237 INFO     [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_ja
+2025-09-20:09:39:37,383 INFO     [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_ru
+2025-09-20:09:39:37,536 INFO     [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_sw
+2025-09-20:09:39:37,773 INFO     [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_te
+2025-09-20:09:39:37,940 INFO     [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_th
+2025-09-20:09:39:38,087 INFO     [evaluation_tracker.py:291] Saving per-sample results for: mgsm_native_cot_zh
+vllm (pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000), gen_kwargs: (max_gen_toks=20000), limit: None, num_fewshot: None, batch_size: auto
+|      Tasks       |Version|     Filter     |n-shot|  Metric   |   |Value|   |Stderr|
+|------------------|------:|----------------|-----:|-----------|---|----:|---|------|
+|mgsm_native_cot_bn|      4|flexible-extract|     0|exact_match|↑  |0.528|±  |   N/A|
+|                  |       |strict-match    |     0|exact_match|↑  |0.000|±  |   N/A|
+|mgsm_native_cot_de|      4|flexible-extract|     0|exact_match|↑  |0.740|±  |   N/A|
+|                  |       |strict-match    |     0|exact_match|↑  |0.000|±  |   N/A|
+|mgsm_native_cot_en|      4|flexible-extract|     0|exact_match|↑  |0.872|±  |   N/A|
+|                  |       |strict-match    |     0|exact_match|↑  |0.000|±  |   N/A|
+|mgsm_native_cot_es|      4|flexible-extract|     0|exact_match|↑  |0.780|±  |   N/A|
+|                  |       |strict-match    |     0|exact_match|↑  |0.008|±  |   N/A|
+|mgsm_native_cot_fr|      4|flexible-extract|     0|exact_match|↑  |0.760|±  |   N/A|
+|                  |       |strict-match    |     0|exact_match|↑  |0.004|±  |   N/A|
+|mgsm_native_cot_ja|      4|flexible-extract|     0|exact_match|↑  |0.644|±  |   N/A|
+|                  |       |strict-match    |     0|exact_match|↑  |0.008|±  |   N/A|
+|mgsm_native_cot_ru|      4|flexible-extract|     0|exact_match|↑  |0.832|±  |   N/A|
+|                  |       |strict-match    |     0|exact_match|↑  |0.004|±  |   N/A|
+|mgsm_native_cot_sw|      4|flexible-extract|     0|exact_match|↑  |0.008|±  |   N/A|
+|                  |       |strict-match    |     0|exact_match|↑  |0.000|±  |   N/A|
+|mgsm_native_cot_te|      4|flexible-extract|     0|exact_match|↑  |0.120|±  |   N/A|
+|                  |       |strict-match    |     0|exact_match|↑  |0.020|±  |   N/A|
+|mgsm_native_cot_th|      4|flexible-extract|     0|exact_match|↑  |0.652|±  |   N/A|
+|                  |       |strict-match    |     0|exact_match|↑  |0.260|±  |   N/A|
+|mgsm_native_cot_zh|      4|flexible-extract|     0|exact_match|↑  |0.784|±  |   N/A|
+|                  |       |strict-match    |     0|exact_match|↑  |0.000|±  |   N/A|

wandb/run-20250920_081121-2oxex54w/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,378 @@

+setproctitle==1.2.2
+colorama==0.4.6
+psutil==7.0.0
+GitPython==3.1.43
+docker-pycreds==0.4.0
+gitdb==4.0.11
+opencv-python==4.10.0.84
+sentry-sdk==2.19.0
+setproctitle==1.3.4
+smmap==5.0.1
+hjson==3.1.0
+deepspeed==0.16.7
+transformers==4.46.1
+transformers==4.54.1
+protobuf==6.31.1
+accelerate==1.6.0
+docopt==0.6.2
+gguf==0.10.0
+webencodings==0.5.1
+pickleshare==0.7.5
+fastjsonschema==2.21.1
+backcall==0.2.0
+tinycss2==1.4.0
+soupsieve==2.7
+pandocfilters==1.5.1
+mistune==3.1.3
+jupyterlab_pygments==0.3.0
+defusedxml==0.7.1
+bleach==6.2.0
+yarg==0.1.9
+ipython==8.12.3
+beautifulsoup4==4.13.4
+nbformat==5.10.4
+nbclient==0.10.2
+nbconvert==7.16.6
+pipreqs==0.5.0
+wandb==0.21.0
+trl==0.17.0
+lm_eval==0.4.4
+langid==1.1.6
+annotated-types==0.7.0
+vllm==0.6.4.post1
+typing-inspection==0.4.1
+xformers==0.0.28.post3
+pydantic_core==2.33.2
+outlines==0.0.46
+pydantic==2.11.7
+compressed-tensors==0.8.0
+click==8.2.1
+lightning-utilities==0.15.0
+torchmetrics==1.8.0
+nvidia-ml-py==13.580.65
+blessed==1.21.0
+gpustat==1.1.1
+nvidia-cufile-cu12==1.13.1.3
+nvidia-cusparselt-cu12==0.6.2
+mpmath==1.3.0
+typing_extensions==4.12.2
+sympy==1.13.1
+pillow==11.0.0
+nvidia-nvtx-cu12==12.4.127
+nvidia-nvjitlink-cu12==12.4.127
+nvidia-nccl-cu12==2.21.5
+numpy==2.1.2
+nvidia-curand-cu12==10.3.5.147
+networkx==3.3
+nvidia-cufft-cu12==11.2.1.3
+nvidia-cuda-runtime-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cublas-cu12==12.4.5.8
+MarkupSafe==2.1.5
+setuptools==80.9.0
+certifi==2025.8.3
+einops==0.8.1
+fsspec==2024.6.1
+Jinja2==3.1.4
+nvidia-cusolver-cu12==11.6.1.9
+urllib3==2.5.0
+tqdm==4.67.1
+safetensors==0.6.2
+regex==2025.7.34
+PyYAML==6.0.2
+packaging==25.0
+idna==3.10
+filelock==3.13.1
+hf-xet==1.1.8
+torch==2.6.0+cu124
+charset-normalizer==3.4.3
+requests==2.32.5
+huggingface-hub==0.34.4
+torchaudio==2.6.0+cu124
+liger_kernel==0.6.2
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+torchvision==0.21.0+cu124
+blingfire==0.1.8
+triton==3.1.0
+sentence-transformers==5.1.0
+tabledata==1.3.4
+lxml==5.3.1
+accelerate==1.5.2
+absl-py==2.1.0
+Markdown==3.7
+uvicorn==0.34.0
+ruff==0.11.0
+nvidia-cuda-runtime-cu12==12.4.127
+airportsdata==20250224
+nvidia-cusolver-cu12==11.6.1.9
+astor==0.8.1
+DataProperty==1.1.0
+lm-format-enforcer==0.10.11
+mdurl==0.1.2
+nvidia-nccl-cu12==2.21.5
+tabulate==0.9.0
+python-dotenv==1.0.1
+h5py==3.13.0
+chardet==5.2.0
+cupy-cuda12x==13.4.0
+tiktoken==0.9.0
+jiter==0.8.2
+Pygments==2.19.1
+typing_extensions==4.12.2
+datasets==3.1.0
+zipp==3.21.0
+more-itertools==10.6.0
+MarkupSafe==2.1.5
+comm==0.2.2
+pycountry==24.6.1
+partial-json-parser==0.2.1.1.post5
+gradio==4.44.0
+prometheus_client==0.21.1
+six==1.17.0
+pytz==2025.1
+unsloth_zoo==2025.3.12
+starlette==0.46.0
+llvmlite==0.44.0
+peft==0.14.0
+aiohttp==3.11.13
+aiofiles==23.2.1
+importlib_resources==6.5.2
+nvidia-nvjitlink-cu12==12.4.127
+semantic-version==2.10.0
+decorator==5.2.1
+nvidia-cublas-cu12==12.4.5.8
+contourpy==1.3.1
+torch==2.6.0
+pytest==8.3.5
+fastapi==0.115.11
+seaborn==0.13.2
+sympy==1.13.1
+threadpoolctl==3.6.0
+networkx==3.4.2
+python-dateutil==2.9.0.post0
+depyf==0.18.0
+nvidia-ml-py==12.570.86
+jedi==0.19.2
+joblib==1.4.2
+referencing==0.36.2
+diskcache==5.6.3
+httpcore==1.0.7
+httpx==0.28.1
+pyairports==2.1.1
+protobuf==3.20.3
+portalocker==3.1.1
+nvidia-cudnn-cu12==9.1.0.70
+Pebble==5.1.0
+fsspec==2024.9.0
+hf_transfer==0.1.9
+ptyprocess==0.7.0
+pexpect==4.9.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+scipy==1.15.2
+sentencepiece==0.2.0
+cycler==0.12.1
+packaging==24.2
+openai==1.56.1
+frozenlist==1.5.0
+lark==1.2.2
+filelock==3.17.0
+opentelemetry-exporter-otlp==1.26.0
+yarl==1.18.3
+rouge_score==0.1.2
+grpcio==1.70.0
+googleapis-common-protos==1.70.0
+aiohappyeyeballs==2.4.6
+multiprocess==0.70.16
+tornado==6.4.2
+numpy==1.26.4
+nltk==3.9.1
+pip==25.0
+charset-normalizer==3.3.2
+prometheus-fastapi-instrumentator==7.0.2
+numexpr==2.10.2
+pyarrow==19.0.1
+attrs==25.1.0
+lm_eval==0.4.4
+urllib3==2.3.0
+mkl_random==1.2.8
+httptools==0.6.4
+gpustat==1.1.1
+pluggy==1.5.0
+huggingface-hub==0.30.2
+triton==3.1.0
+idna==3.7
+ipython==8.20.0
+pyparsing==3.2.1
+rich-toolkit==0.13.2
+googletrans==4.0.2
+jupyter_core==5.7.2
+zstandard==0.23.0
+aiosignal==1.3.2
+tyro==0.9.17
+traitlets==5.14.3
+h11==0.14.0
+outlines==0.1.11
+jupyter_client==8.6.3
+loralib==0.1.2
+kiwisolver==1.4.8
+blake3==1.0.4
+nvidia-cusparselt-cu12==0.6.2
+rich==13.9.4
+hf-xet==1.0.2
+certifi==2025.1.31
+wheel==0.45.1
+pybind11==2.13.6
+regex==2024.11.6
+mpmath==1.3.0
+transformers==4.51.3
+flash_attn==2.7.4.post1
+nvidia-curand-cu12==10.3.5.147
+PySocks==1.7.1
+gmpy2==2.2.1
+iniconfig==2.0.0
+pandas==2.2.3
+Jinja2==3.1.5
+msgpack==1.1.0
+gguf==0.16.2
+email_validator==2.2.0
+tzdata==2025.1
+cut-cross-entropy==25.1.1
+tensorboard==2.19.0
+matplotlib==3.10.1
+jsonschema-specifications==2024.10.1
+unsloth==2025.3.14
+Werkzeug==3.1.3
+opentelemetry-proto==1.26.0
+fastrlock==0.8.3
+dnspython==2.7.0
+typeguard==4.4.2
+opentelemetry-api==1.26.0
+platformdirs==4.3.6
+importlib_metadata==8.0.0
+opentelemetry-semantic-conventions==0.47b0
+sniffio==1.3.1
+nvidia-cuda-cupti-cu12==12.4.127
+scikit-learn==1.6.1
+hpack==4.1.0
+parso==0.8.4
+torchaudio==2.6.0
+xgrammar==0.1.18
+executing==2.2.0
+mkl_fft==1.3.11
+vllm==0.8.4
+word2number==1.1
+pure_eval==0.2.3
+watchfiles==1.0.4
+pydub==0.25.1
+mbstrdecoder==1.1.4
+markdown-it-py==3.0.0
+jsonschema==4.23.0
+msgspec==0.19.0
+rpds-py==0.23.1
+wandb==0.19.9
+matplotlib-inline==0.1.7
+requests==2.32.3
+interegular==0.3.3
+pytablewriter==1.2.1
+orjson==3.10.15
+xformers==0.0.29.post2
+fastapi-cli==0.0.7
+mkl-service==2.4.0
+opencv-python-headless==4.11.0.86
+prompt_toolkit==3.0.50
+trl==0.16.1
+debugpy==1.8.13
+pydantic==2.10.6
+stack-data==0.6.3
+tqdm-multiprocess==0.0.11
+gradio_client==1.3.0
+dill==0.3.8
+evaluate==0.4.3
+nvidia-cufft-cu12==11.2.1.3
+nest-asyncio==1.6.0
+pyzmq==26.2.1
+tensorboard-data-server==0.7.2
+docstring_parser==0.16
+click==8.1.8
+psutil==7.0.0
+annotated-types==0.7.0
+ninja==1.11.1.4
+pillow==10.4.0
+tcolorpy==0.1.7
+einops==0.8.1
+wcwidth==0.2.13
+typer==0.15.2
+tqdm==4.67.1
+tomlkit==0.12.0
+ipykernel==6.28.0
+diffusers==0.32.2
+mistral_common==1.5.4
+setuptools==75.8.0
+h2==4.2.0
+cachetools==5.5.2
+wrapt==1.17.2
+pydantic_core==2.27.2
+ffmpy==0.5.0
+sacrebleu==2.5.1
+outlines_core==0.1.26
+jsonlines==4.0.0
+fonttools==4.56.0
+nvidia-nvtx-cu12==12.4.127
+safetensors==0.5.3
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+bitsandbytes==0.45.3
+nanobind==2.6.1
+tokenizers==0.21.1
+propcache==0.3.0
+distro==1.9.0
+python-multipart==0.0.20
+uvloop==0.21.0
+liger_kernel==0.5.5
+python-json-logger==3.3.0
+multidict==6.1.0
+ray==2.43.0
+opentelemetry-exporter-otlp-proto-http==1.26.0
+typepy==1.3.4
+torchvision==0.21.0
+PyYAML==6.0.2
+xxhash==3.5.0
+anthropic==0.49.0
+py-cpuinfo==9.0.0
+compressed-tensors==0.9.3
+opentelemetry-exporter-otlp-proto-common==1.26.0
+opentelemetry-sdk==1.26.0
+shtab==1.7.1
+websockets==12.0
+numba==0.61.2
+llguidance==0.7.13
+hyperframe==6.1.0
+anyio==4.8.0
+asttokens==3.0.0
+blessed==1.20.0
+colorama==0.4.6
+shellingham==1.5.4
+Brotli==1.0.9
+sqlitedict==2.1.0
+nvidia-cusparse-cu12==12.3.1.170
+Deprecated==1.2.18
+cloudpickle==3.1.1
+pathvalidate==3.2.3
+opentelemetry-semantic-conventions-ai==0.4.3
+platformdirs==4.2.2
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

wandb/run-20250920_081121-2oxex54w/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,110 @@

+{
+  "os":  "Linux-4.18.0-372.9.1.el8.x86_64-x86_64-with-glibc2.28",
+  "python":  "CPython 3.11.11",
+  "startedAt":  "2025-09-20T12:11:21.301942Z",
+  "args":  [
+    "--model",
+    "vllm",
+    "--model_args",
+    "pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000",
+    "--tasks",
+    "mgsm_native_cot_ja,mgsm_native_cot_en,mgsm_native_cot_th,mgsm_native_cot_es,mgsm_native_cot_bn,mgsm_native_cot_te,mgsm_native_cot_fr,mgsm_native_cot_zh,mgsm_native_cot_sw,mgsm_native_cot_de,mgsm_native_cot_ru",
+    "--batch_size",
+    "auto",
+    "--apply_chat_template",
+    "--output_path",
+    "ckpts/rerun",
+    "--log_samples",
+    "--gen_kwargs",
+    "max_gen_toks=20000",
+    "--wandb_args=project=MRPO_eval,name=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dir=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,resume=auto"
+  ],
+  "program":  "/home/jdhwang/.conda/envs/llm/bin/lm_eval",
+  "git":  {
+    "remote":  "[email protected]:jd730/BRIDGE-private.git",
+    "commit":  "bb8b2be1f7420f9c6a3d65f0eaf3072732d73123"
+  },
+  "email":  "[email protected]",
+  "root":  "ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309",
+  "host":  "node1803",
+  "executable":  "/orcd/home/001/jdhwang/.conda/envs/llm/bin/python",
+  "cpu_count":  64,
+  "cpu_count_logical":  128,
+  "gpu":  "NVIDIA H100 80GB HBM3",
+  "gpu_count":  2,
+  "disk":  {
+    "/":  {
+      "total":  "464506159104",
+      "used":  "12268101632"
+    }
+  },
+  "memory":  {
+    "total":  "2163473002496"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA H100 80GB HBM3",
+      "memoryTotal":  "85520809984",
+      "cudaCores":  16896,
+      "architecture":  "Hopper",
+      "uuid":  "GPU-9a8a7398-f810-a936-a36d-5b9c0b64a09b"
+    },
+    {
+      "name":  "NVIDIA H100 80GB HBM3",
+      "memoryTotal":  "85520809984",
+      "cudaCores":  16896,
+      "architecture":  "Hopper",
+      "uuid":  "GPU-73b0efed-d92d-d0e8-3c5e-a3122d6e7c1b"
+    }
+  ],
+  "cudaVersion":  "12.4",
+  "slurm":  {
+    "array_job_id":  "4452191",
+    "array_task_count":  "1",
+    "array_task_id":  "0",
+    "array_task_max":  "0",
+    "array_task_min":  "0",
+    "array_task_step":  "1",
+    "cluster_name":  "eofe7",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "16",
+    "cpus_per_task":  "16",
+    "gpus_on_node":  "2",
+    "gtids":  "0",
+    "job_account":  "mit_general",
+    "job_cpus_per_node":  "16",
+    "job_end_time":  "1758413466",
+    "job_gid":  "209655",
+    "job_gpus":  "1,2",
+    "job_id":  "4452191",
+    "job_name":  "eval.sh",
+    "job_nodelist":  "node1803",
+    "job_num_nodes":  "1",
+    "job_partition":  "ou_bcs_normal",
+    "job_qos":  "normal",
+    "job_start_time":  "1758370266",
+    "job_uid":  "209655",
+    "job_user":  "jdhwang",
+    "jobid":  "4452191",
+    "localid":  "0",
+    "mem_per_node":  "131072",
+    "nnodes":  "1",
+    "nodeid":  "0",
+    "nodelist":  "node1803",
+    "nprocs":  "1",
+    "ntasks":  "1",
+    "ntasks_per_node":  "1",
+    "oom_kill_step":  "0",
+    "prio_process":  "0",
+    "procid":  "0",
+    "script_context":  "prolog_task",
+    "submit_dir":  "/orcd/home/002/jdhwang/BRIDGE",
+    "submit_host":  "orcd-login003.mit.edu",
+    "task_pid":  "1143610",
+    "tasks_per_node":  "1",
+    "topology_addr":  "node1803",
+    "topology_addr_pattern":  "node",
+    "tres_per_task":  "cpu=16"
+  },
+  "writerId":  "bdsaggp24nt8kfc8qjgq21gi927g7e3o"
+}

wandb/run-20250920_081121-2oxex54w/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"mgsm_native_cot_zh_eval_results":{"path":"media/table/mgsm_native_cot_zh_eval_results_12_2882185f7e816bbcc2fe.table.json","artifact_path":"wandb-client-artifact://nkp1984crhkeh7851auhm8cmiz0c966uge9yj76ipqritrecvc5bfb9c38ag3q5n9lxlxv0a592qujt5hv20d5ou72uzs0da2311ong59bzu3eu2j7sjwoi1ham5i3wi/mgsm_native_cot_zh_eval_results.table.json","_type":"table-file","nrows":500,"size":6816454,"sha256":"2882185f7e816bbcc2fe7db847cf400946e1eecb9c136b26ba49d658f74f6aa1","log_mode":"IMMUTABLE","ncols":8,"_latest_artifact_path":"wandb-client-artifact://kdq56ygszyonuw3iawyllv3myb0yvhb1lelksg4a7blhqf51vl8fzo7pfqegv64u4r6ndaz7pbste9hbc68sir2hvdemy03qgvhjn4ol3q87z73n0lb6yikdy3o1ujb8:latest/mgsm_native_cot_zh_eval_results.table.json"},"mgsm_native_cot_th_eval_results":{"_type":"table-file","sha256":"baa86a95f601e3c721a0fbcf0ee7623b3d2011e2c01da8b0c3cb2d98b16684ac","size":5007472,"path":"media/table/mgsm_native_cot_th_eval_results_11_baa86a95f601e3c721a0.table.json","ncols":8,"_latest_artifact_path":"wandb-client-artifact://9yj2698oonww08k7xlrlqosfnpi8bf8whg3mu81535kr3f8sv1lrk187fyckresyat8nemrzjmbs8y8f5xkb6594i7jjen2yaigaf6909w8ptgr8i6hb1efl3y4culsb:latest/mgsm_native_cot_th_eval_results.table.json","log_mode":"IMMUTABLE","nrows":500,"artifact_path":"wandb-client-artifact://gja5by4u29qqcgmr6sb6dwapnkradcp3asz7dfpbn8wh8omw28idhrqljkjewim9009pew6vmw82s1xubz6lr0x7z2ounjv209iivpf93x9ymx5x83ckxj48a44i7xn0/mgsm_native_cot_th_eval_results.table.json"},"mgsm_native_cot_en_eval_results":{"_latest_artifact_path":"wandb-client-artifact://g1upq5u3jz3pag4sh1m0vjv5rmwplu9a9a9abfqhq83nl6kkyphd4joh0hg9zbe5l3ra4qjxygdt7x7m15gnuguznxif28ptxvea75zhbjlmr8m3vfe265a8dttuhpb1:latest/mgsm_native_cot_en_eval_results.table.json","size":5089950,"artifact_path":"wandb-client-artifact://7qqodhesd67ke2alwfwf9m6q5zw8oj0stuwgoszb386l13ofees3uxxwcly1gij11waas038xcn059krmog8cskfh8ywl5r5qc1o06i1ytq6g709pdq5bik1vgggxm8c/mgsm_native_cot_en_eval_results.table.json","nrows":500,"log_mode":"IMMUTABLE","path":"media/table/mgsm_native_cot_en_eval_results_4_82791ccc59d2c61a093c.table.json","_type":"table-file","ncols":8,"sha256":"82791ccc59d2c61a093cbdd818ae3ff870ab3d55a931b224bfbca4751c1765bf"},"mgsm_native_cot_sw/alias":"mgsm_native_cot_sw","mgsm_native_cot_fr/exact_match,strict-match":0.004,"mgsm_native_cot_ja/alias":"mgsm_native_cot_ja","mgsm_native_cot_sw_eval_results":{"nrows":500,"ncols":8,"size":22151199,"sha256":"d79a57dcd801f08109f8b083cce4884bf51b225d8127bc78429cf19d3df17aad","_type":"table-file","_latest_artifact_path":"wandb-client-artifact://uwdhtiin3cgey08x46sll6rbkjr6kz632eelzimhsh1pxqn6vbrkwczotv3aj0qfan0xj5kffsltbfe2sbqpljvcyl9dqefjwmggqf9tx47zzzeqwdnfw4lkruafdn07:latest/mgsm_native_cot_sw_eval_results.table.json","path":"media/table/mgsm_native_cot_sw_eval_results_9_d79a57dcd801f08109f8.table.json","artifact_path":"wandb-client-artifact://bbvbahu84etz52vzztm3c6g9ifdlaksmuq7b0ck0nhzo9x19db225j1npqxx09xekaa48m3yu9rsyl247urothkf4emwf6087de2miq7cb7i5z5fba29f6b6vcio5iaw/mgsm_native_cot_sw_eval_results.table.json","log_mode":"IMMUTABLE"},"mgsm_native_cot_te/exact_match,flexible-extract":0.12,"mgsm_native_cot_ja_eval_results":{"_type":"table-file","artifact_path":"wandb-client-artifact://iwu92mws1e8f263xazaxilfbyhxdxe7hpkir0numy6cug9tf3r48relnykckfq0j3aii4hbodiln44j4iqo7fexjvnhuano4k9rh1t048hfrg7eh4lo7p6bc5x5ec8l9/mgsm_native_cot_ja_eval_results.table.json","ncols":8,"path":"media/table/mgsm_native_cot_ja_eval_results_7_098df869a17abf277e8c.table.json","log_mode":"IMMUTABLE","_latest_artifact_path":"wandb-client-artifact://f5tg80y2zzbku7qkoirlmlhz7d02n91o6hzsfj91cws7t60gt015qockdoscq97c4gcpn4sm3ggetskv44zqylmqai9i2j5f1no6s2eanygyakr7z04e9g4pqbwq82v2:latest/mgsm_native_cot_ja_eval_results.table.json","size":19900362,"nrows":500,"sha256":"098df869a17abf277e8cf3b5d4966de124ba0fed82b267159ef0c1d32c7f38a2"},"mgsm_native_cot_sw/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_fr/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_th/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_de/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_th/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_te/alias":"mgsm_native_cot_te","mgsm_native_cot_zh/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_ja/exact_match,strict-match":0.008,"mgsm_native_cot_zh/alias":"mgsm_native_cot_zh","mgsm_native_cot_es/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_ru_eval_results":{"_type":"table-file","sha256":"19e62e412810312c2375525d388b308579f502daf4537e08ea3e97b90b3ef016","_latest_artifact_path":"wandb-client-artifact://ssphf9mv1o6gb0j85z3gfrhwargykdb0luavoyk0pha4wg7ovjkne5oln8tfzygwi6tfmuvgu0fdjl2noewx4kvqtjovdj87tt3vf6tnucvvookj2m9skxydcb1q3ikr:latest/mgsm_native_cot_ru_eval_results.table.json","nrows":500,"log_mode":"IMMUTABLE","artifact_path":"wandb-client-artifact://nju8zlsx5likjjdiq9ix9kpfbd128ffhidfei5n1o9ns883akydn3oe0edbq588qwtck6y53yar6vqg4ghqytfvb2udoxiaj3jqdspbze43bveqr2untjk3r5h8kov6v/mgsm_native_cot_ru_eval_results.table.json","ncols":8,"path":"media/table/mgsm_native_cot_ru_eval_results_8_19e62e412810312c2375.table.json","size":13109180},"mgsm_native_cot_th/alias":"mgsm_native_cot_th","mgsm_native_cot_te/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_fr/alias":"mgsm_native_cot_fr","mgsm_native_cot_ru/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_ja/exact_match,flexible-extract":0.644,"_timestamp":1.7583755761531694e+09,"mgsm_native_cot_es/exact_match,flexible-extract":0.78,"mgsm_native_cot_bn_eval_results":{"size":16518181,"sha256":"f5e72dc1f8666e2a7b2dd420223eda667dbec11d2c10570f946fcc38e857476a","_latest_artifact_path":"wandb-client-artifact://yr3i8r2cgym32wmpmzdoi89agvfbqf8rkmcw5czgif8twgbpd2ifhdzum1b0720p6gh46uxwok7zj56ycgodgqetjrxiin5dio9y4ubjuzh8wprfviboyo88it4fowsi:latest/mgsm_native_cot_bn_eval_results.table.json","path":"media/table/mgsm_native_cot_bn_eval_results_2_f5e72dc1f8666e2a7b2d.table.json","nrows":500,"_type":"table-file","ncols":8,"artifact_path":"wandb-client-artifact://7pfp3stdhg0ptnttd5vi0di3j2rpm43qn1obywl8v754gx2yqso9s3qk71no2ddq1zu4czdztrn8shcqof2idvturotswrgh2ohkliv477lbym5tnu7mg2eksb5hgniw/mgsm_native_cot_bn_eval_results.table.json","log_mode":"IMMUTABLE"},"_step":12,"mgsm_native_cot_sw/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_en/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_sw/exact_match,strict-match":0,"mgsm_native_cot_bn/exact_match,flexible-extract":0.528,"mgsm_native_cot_zh/exact_match,flexible-extract":0.784,"mgsm_native_cot_es_eval_results":{"nrows":500,"log_mode":"IMMUTABLE","sha256":"db5ed66b097e218c4da6ade4820a3b57500460332d23695a9d082453f9eaef4e","path":"media/table/mgsm_native_cot_es_eval_results_5_db5ed66b097e218c4da6.table.json","size":8588068,"_type":"table-file","_latest_artifact_path":"wandb-client-artifact://ux5vlh8wacyfuof7pc4t55agsyg3nwzac4w6xfmjhi2j5z63gc767hzmqva63i1z2shq9549vn7ircit5e7xf1heikgjioppjtr6mic5petyfizupkh85owvqmlahyde:latest/mgsm_native_cot_es_eval_results.table.json","artifact_path":"wandb-client-artifact://3ez40i75t7j3zaj3xnzyhfqq39henad90lipliml5f3bcb1h4b9yreifkue9u5zfzv360fvrug7fiotercqa4ik31tivrzppgrqr86mm1isvaggoe1mlmchqhpmtl8k9/mgsm_native_cot_es_eval_results.table.json","ncols":8},"mgsm_native_cot_zh/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_te_eval_results":{"_type":"table-file","_latest_artifact_path":"wandb-client-artifact://3wvd0feekbllytg5c07f216mnn6ugilhacj5j353flytf0xzchfj577v6n6n3wms0wdkeamvey7fukilst8v5zvf7jzzof8snmqjy0ikvysqf8nvaelhir90xv3xfvxq:latest/mgsm_native_cot_te_eval_results.table.json","path":"media/table/mgsm_native_cot_te_eval_results_10_c0fc0c81b8662e04cc0e.table.json","artifact_path":"wandb-client-artifact://wu0kyoyi54m51v0k8rl2digz31m6mfp2r40fyzjzjr1ruo0fp8df41iqiin5ieeayd14bj5s3ete22cd5qg0jjmve9et5xjxw6z05pltmxvmlam7rmr9o6y8p7epty38/mgsm_native_cot_te_eval_results.table.json","log_mode":"IMMUTABLE","size":23468672,"ncols":8,"sha256":"c0fc0c81b8662e04cc0e8fc30ac6053adcf13bd384f959eee54fbeb5039dbc01","nrows":500},"mgsm_native_cot_th/exact_match,flexible-extract":0.652,"mgsm_native_cot_de/alias":"mgsm_native_cot_de","mgsm_native_cot_ru/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_ru/exact_match,strict-match":0.004,"mgsm_native_cot_en/exact_match,strict-match":0,"mgsm_native_cot_de_eval_results":{"_latest_artifact_path":"wandb-client-artifact://l92nyplmx9l83jo3sw80tinrvnfoko7v1kc02phiy9h46b3v5ls9e1gbvy8xjjmgs8eczamzmsr9v54g7s4m2b5olhevlttcz98hb61gq72gk4fj9bv71ge8da91qxkn:latest/mgsm_native_cot_de_eval_results.table.json","_type":"table-file","artifact_path":"wandb-client-artifact://sy5rdhvvobn3pshimnyuxyo8gj1riw214tz1yc5vnrt1lnown73yh4fjl37mecgdhlv3g91dmcbxb7sk4hicr06s8aowfno29bxcj0vxl4wbym0kdtrmpa3zwjkev27b/mgsm_native_cot_de_eval_results.table.json","sha256":"fabaf33255f24add59c137ef77a33afd9e00d9d17b451c92e556f9ab861bb2ad","log_mode":"IMMUTABLE","ncols":8,"size":3328757,"path":"media/table/mgsm_native_cot_de_eval_results_3_fabaf33255f24add59c1.table.json","nrows":500},"mgsm_native_cot_te/exact_match,strict-match":0.02,"mgsm_native_cot_en/exact_match,flexible-extract":0.872,"mgsm_native_cot_ru/exact_match,flexible-extract":0.832,"mgsm_native_cot_te/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_ja/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_fr_eval_results":{"size":3273008,"nrows":500,"log_mode":"IMMUTABLE","_latest_artifact_path":"wandb-client-artifact://klcjxv56dck9r5v7yixxzhs1vkalztbzpxi2g32kee1ieaqwpp5xzehy3b0it5z25qexpkvrivqefau0xsyre0j1hrw6lcuqz15w18v3t2ogsykczvkqev3ogwsrwa6k:latest/mgsm_native_cot_fr_eval_results.table.json","artifact_path":"wandb-client-artifact://digdeal9nyp9dh9ge1fe04twhqx7zqfin73cuhsx7f5x1c3sg1hvv27v5bv7kap7hntudd9ptp6i9d1laldsfty5xyf64eed1mj07e1cq9ekpkbt59fjzsgj5qdn5kqf/mgsm_native_cot_fr_eval_results.table.json","ncols":8,"sha256":"214cb1a5a2cdee9f330e1982a6ee87d274f796f5fae1bfd02596f69cf3108b1c","_type":"table-file","path":"media/table/mgsm_native_cot_fr_eval_results_6_214cb1a5a2cdee9f330e.table.json"},"mgsm_native_cot_ja/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_bn/exact_match,strict-match":0,"mgsm_native_cot_th/exact_match,strict-match":0.26,"mgsm_native_cot_es/alias":"mgsm_native_cot_es","mgsm_native_cot_de/exact_match,flexible-extract":0.74,"mgsm_native_cot_en/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_fr/exact_match,flexible-extract":0.76,"mgsm_native_cot_es/exact_match,strict-match":0.008,"mgsm_native_cot_fr/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_de/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_ru/alias":"mgsm_native_cot_ru","_wandb":{"runtime":5296},"_runtime":5296,"mgsm_native_cot_sw/exact_match,flexible-extract":0.008,"evaluation/eval_results":{"sha256":"817b26b9b7489391f4e7629070c960eaee51c5e2fd819c827f94ee7a6945c1cf","_type":"table-file","_latest_artifact_path":"wandb-client-artifact://5qlehutp8s8du57s7goeq5l74fpjy27nr9584dwoqfdcv3y64mc6zenbt2nh17yqiysjti7pjfebr8yi9e9ueixtoy0so8e4qul4atn4ypuw7rhmbefsvqoqtk4aezed:latest/evaluation/eval_results.table.json","size":1863,"artifact_path":"wandb-client-artifact://uo3la8vmc83mzok4j4viu8j3yxbp5ygjzktfg1w124h49nvkq4fj5puf329sx4e79ecqrd2pqncuh050bnum14gysm4ets9wq7om8dk0c57etth40kwl2wquof82a1o5/evaluation/eval_results.table.json","log_mode":"IMMUTABLE","path":"media/table/evaluation/eval_results_1_817b26b9b7489391f4e7.table.json","nrows":22,"ncols":7},"mgsm_native_cot_bn/exact_match_stderr,strict-match":"N/A","mgsm_native_cot_bn/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_de/exact_match,strict-match":0,"mgsm_native_cot_zh/exact_match,strict-match":0,"mgsm_native_cot_bn/alias":"mgsm_native_cot_bn","mgsm_native_cot_es/exact_match_stderr,flexible-extract":"N/A","mgsm_native_cot_en/alias":"mgsm_native_cot_en"}

wandb/run-20250920_081121-2oxex54w/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,16 @@

+{"time":"2025-09-20T08:11:21.452750753-04:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp3lkn2ocs/port-1143625.txt","pid":1143625,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-09-20T08:11:21.453095559-04:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":1143625}
+{"time":"2025-09-20T08:11:21.453077825-04:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1143625-1143815-3992152877/socket","Net":"unix"}}
+{"time":"2025-09-20T08:11:21.634813033-04:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-09-20T08:11:21.640098784-04:00","level":"INFO","msg":"handleInformInit: received","streamId":"2oxex54w","id":"1(@)"}
+{"time":"2025-09-20T08:11:21.866468885-04:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"2oxex54w","id":"1(@)"}
+{"time":"2025-09-20T09:39:39.549058154-04:00","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"2oxex54w","id":"1(@)"}
+{"time":"2025-09-20T09:39:39.567640542-04:00","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"2oxex54w","id":"1(@)"}
+{"time":"2025-09-20T09:39:40.549091059-04:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-09-20T09:39:40.549121192-04:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-09-20T09:39:40.549126164-04:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-09-20T09:39:40.549132677-04:00","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-09-20T09:39:40.549192283-04:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-1143625-1143815-3992152877/socket","Net":"unix"}}
+{"time":"2025-09-20T09:39:40.54921864-04:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-09-20T09:39:40.549221165-04:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-09-20T09:39:40.549224135-04:00","level":"INFO","msg":"server is closed"}

wandb/run-20250920_081121-2oxex54w/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,15 @@

+{"time":"2025-09-20T08:11:21.640622676-04:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-09-20T08:11:21.866445386-04:00","level":"INFO","msg":"stream: created new stream","id":"2oxex54w"}
+{"time":"2025-09-20T08:11:21.866465564-04:00","level":"INFO","msg":"stream: started","id":"2oxex54w"}
+{"time":"2025-09-20T08:11:21.866482696-04:00","level":"INFO","msg":"writer: Do: started","stream_id":"2oxex54w"}
+{"time":"2025-09-20T08:11:21.866483998-04:00","level":"INFO","msg":"handler: started","stream_id":"2oxex54w"}
+{"time":"2025-09-20T08:11:21.866482679-04:00","level":"INFO","msg":"sender: started","stream_id":"2oxex54w"}
+{"time":"2025-09-20T08:11:22.258337567-04:00","level":"WARN","msg":"handleCodeSave: program relative path is empty"}
+{"time":"2025-09-20T08:11:22.258646248-04:00","level":"ERROR","msg":"git repo not found","error":"repository does not exist"}
+{"time":"2025-09-20T09:39:39.399281107-04:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-09-20T09:39:39.487606624-04:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading summary, console lines 184-211","runtime_seconds":0.088083458}],"total_operations":1}}
+{"time":"2025-09-20T09:39:39.549286458-04:00","level":"INFO","msg":"stream: closing","id":"2oxex54w"}
+{"time":"2025-09-20T09:39:39.549294944-04:00","level":"INFO","msg":"handler: closed","stream_id":"2oxex54w"}
+{"time":"2025-09-20T09:39:39.549300294-04:00","level":"INFO","msg":"writer: Close: closed","stream_id":"2oxex54w"}
+{"time":"2025-09-20T09:39:39.549303318-04:00","level":"INFO","msg":"sender: closed","stream_id":"2oxex54w"}
+{"time":"2025-09-20T09:39:39.549556658-04:00","level":"INFO","msg":"stream: closed","id":"2oxex54w"}

wandb/run-20250920_081121-2oxex54w/logs/debug.log ADDED Viewed

	@@ -0,0 +1,29 @@

+2025-09-20 08:11:21,421 INFO    MainThread:1143625 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-09-20 08:11:21,422 INFO    MainThread:1143625 [wandb_setup.py:_flush():80] Configure stats pid to 1143625
+2025-09-20 08:11:21,422 INFO    MainThread:1143625 [wandb_setup.py:_flush():80] Loading settings from /home/jdhwang/.config/wandb/settings
+2025-09-20 08:11:21,422 INFO    MainThread:1143625 [wandb_setup.py:_flush():80] Loading settings from /orcd/home/002/jdhwang/BRIDGE/wandb/settings
+2025-09-20 08:11:21,422 INFO    MainThread:1143625 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-09-20 08:11:21,422 INFO    MainThread:1143625 [wandb_init.py:setup_run_log_directory():703] Logging user logs to ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/run-20250920_081121-2oxex54w/logs/debug.log
+2025-09-20 08:11:21,422 INFO    MainThread:1143625 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/run-20250920_081121-2oxex54w/logs/debug-internal.log
+2025-09-20 08:11:21,422 INFO    MainThread:1143625 [wandb_init.py:init():830] calling init triggers
+2025-09-20 08:11:21,422 INFO    MainThread:1143625 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-09-20 08:11:21,422 INFO    MainThread:1143625 [wandb_init.py:init():871] starting backend
+2025-09-20 08:11:21,634 INFO    MainThread:1143625 [wandb_init.py:init():874] sending inform_init request
+2025-09-20 08:11:21,638 INFO    MainThread:1143625 [wandb_init.py:init():882] backend started and connected
+2025-09-20 08:11:21,639 INFO    MainThread:1143625 [wandb_init.py:init():953] updated telemetry
+2025-09-20 08:11:21,655 INFO    MainThread:1143625 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-09-20 08:11:22,257 INFO    MainThread:1143625 [wandb_init.py:init():1029] starting run threads in backend
+2025-09-20 08:11:22,623 INFO    MainThread:1143625 [wandb_run.py:_console_start():2458] atexit reg
+2025-09-20 08:11:22,623 INFO    MainThread:1143625 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-09-20 08:11:22,623 INFO    MainThread:1143625 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-09-20 08:11:22,623 INFO    MainThread:1143625 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-09-20 08:11:22,625 INFO    MainThread:1143625 [wandb_init.py:init():1075] run started, returning control to user process
+2025-09-20 09:39:29,566 INFO    MainThread:1143625 [wandb_run.py:_config_callback():1363] config_cb None None {'task_configs': {'mgsm_native_cot_bn': {'task': 'mgsm_native_cot_bn', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'bn', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nধাপে ধাপে উত্তর:"}}{% else %}{{"প্রশ্ন: "+question+"\\nধাপে ধাপে উত্তর:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[17:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['প্রশ্ন:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'The answer is (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_de': {'task': 'mgsm_native_cot_de', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'de', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nSchritt-für-Schritt-Antwort:"}}{% else %}{{"Frage: "+question+"\\nSchritt-für-Schritt-Antwort:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[29:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['Frage:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'Die Antwort lautet (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_en': {'task': 'mgsm_native_cot_en', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'en', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\\nStep-by-Step Answer:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['Question:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'The answer is (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_es': {'task': 'mgsm_native_cot_es', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'es', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nRespuesta paso a paso:"}}{% else %}{{"Pregunta: "+question+"\\nRespuesta paso a paso:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[23:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['Pregunta:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'La respuesta es (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_fr': {'task': 'mgsm_native_cot_fr', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'fr', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nRéponse étape par étape :"}}{% else %}{{"Question : "+question+"\\nRéponse étape par étape :"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[26:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['Question :', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'La réponse est (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_ja': {'task': 'mgsm_native_cot_ja', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'ja', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nステップごとの答え:"}}{% else %}{{"問題： "+question+"\\nステップごとの答え:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[11:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['問題：', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': '答えは(\\-?[0-9\\.\\,]+)です。'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_ru': {'task': 'mgsm_native_cot_ru', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'ru', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nПошаговоерешение:"}}{% else %}{{"Задача: "+question+"\\nПошаговоерешение:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[18:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['Задача:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'Ответ — (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_sw': {'task': 'mgsm_native_cot_sw', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'sw', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nJibu la Hatua kwa Hatua:"}}{% else %}{{"Swali: "+question+"\\nJibu la Hatua kwa Hatua:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[25:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['Swali:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'Jibu ni (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_te': {'task': 'mgsm_native_cot_te', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'te', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nదశలవారీగా సమాధానం:"}}{% else %}{{"ప్రశ్న: "+question+"\\nదశలవారీగా సమాధానం:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[19:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['ప్రశ్న:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'సమాధానం (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_th': {'task': 'mgsm_native_cot_th', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'th', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\nคำตอบทีละขั้นตอน:"}}{% else %}{{"โจทย์: "+question+"\\nคำตอบทีละขั้นตอน:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[18:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['โจทย์:', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': 'คำตอบคือ (\\-?[0-9\\.\\,]+)'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}, 'mgsm_native_cot_zh': {'task': 'mgsm_native_cot_zh', 'tag': 'mgsm_cot_native', 'dataset_path': 'juletxara/mgsm', 'dataset_name': 'zh', 'training_split': 'train', 'test_split': 'test', 'doc_to_text': '{% if answer is not none %}{{question+"\\n逐步解答:"}}{% else %}{{"问题： "+question+"\\n逐步解答:"}}{% endif %}', 'doc_to_target': '{% if answer is not none %}{{answer[6:]}}{% else %}{{answer_number|string}}{% endif %}', 'description': '', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'num_fewshot': 0, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'do_sample': False, 'until': ['问题：', '</s>', '<|im_end|>'], 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'filter': [{'function': 'regex', 'regex_pattern': '答案是 (\\-?[0-9\\.\\,]+)。'}, {'function': 'take_first'}], 'name': 'strict-match'}, {'filter': [{'function': 'regex', 'group_select': -1, 'regex_pattern': '(-?[$0-9.,]{2,})|(-?[0-9]+)'}, {'function': 'take_first'}], 'name': 'flexible-extract'}], 'should_decontaminate': False, 'metadata': {'version': 4.0}}}, 'cli_configs': {'model': 'vllm', 'model_args': 'pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000', 'batch_size': 'auto', 'batch_sizes': [], 'device': None, 'use_cache': None, 'limit': None, 'bootstrap_iters': 0, 'gen_kwargs': {'max_gen_toks': 20000}, 'random_seed': 0, 'numpy_seed': 1234, 'torch_seed': 1234, 'fewshot_seed': 1234}}
+2025-09-20 09:39:38,409 INFO    MainThread:1143625 [wandb_run.py:_finish():2224] finishing run jdhwang/MRPO_eval/2oxex54w
+2025-09-20 09:39:38,409 INFO    MainThread:1143625 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
+2025-09-20 09:39:38,486 INFO    MainThread:1143625 [wandb_run.py:_restore():2405] restore
+2025-09-20 09:39:38,486 INFO    MainThread:1143625 [wandb_run.py:_restore():2411] restore done
+2025-09-20 09:39:39,547 INFO    MainThread:1143625 [wandb_run.py:_footer_history_summary_info():3903] rendering history
+2025-09-20 09:39:39,548 INFO    MainThread:1143625 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
+2025-09-20 09:39:39,548 INFO    MainThread:1143625 [wandb_run.py:_footer_sync_info():3864] logging synced files

wandb/run-20250920_081121-2oxex54w/run-2oxex54w.wandb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:07a3dd0ca1cf928a27f52210df217c77c140eaef367b9cde8b60d69e33e2857d
+size 929156

wandb/run-20250921_062002-cothceaw/files/config.yaml ADDED Viewed

	@@ -0,0 +1,252 @@

+_wandb:
+    value:
+        cli_version: 0.21.0
+        e:
+            b4tnkkvikp4ud9d1f7t2v1f37ei0r2ow:
+                args:
+                    - --model
+                    - vllm
+                    - --model_args
+                    - pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000
+                    - --tasks
+                    - mmlu_prox_ar_math,mmlu_prox_bn_math
+                    - --batch_size
+                    - auto
+                    - --apply_chat_template
+                    - --output_path
+                    - ckpts/rerun
+                    - --log_samples
+                    - --gen_kwargs
+                    - max_gen_toks=20000
+                    - --wandb_args=project=MRPO_eval,name=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dir=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,resume=auto
+                cpu_count: 64
+                cpu_count_logical: 128
+                cudaVersion: "12.4"
+                disk:
+                    /:
+                        total: "464506159104"
+                        used: "12265783296"
+                email: [email protected]
+                executable: /orcd/home/001/jdhwang/.conda/envs/llm/bin/python
+                git:
+                    commit: 57228cdf2aa2656e94d94dc3f5530986c0f48545
+                    remote: [email protected]:jd730/BRIDGE-private.git
+                gpu: NVIDIA H100 80GB HBM3
+                gpu_count: 2
+                gpu_nvidia:
+                    - architecture: Hopper
+                      cudaCores: 16896
+                      memoryTotal: "85520809984"
+                      name: NVIDIA H100 80GB HBM3
+                      uuid: GPU-9a8a7398-f810-a936-a36d-5b9c0b64a09b
+                    - architecture: Hopper
+                      cudaCores: 16896
+                      memoryTotal: "85520809984"
+                      name: NVIDIA H100 80GB HBM3
+                      uuid: GPU-73b0efed-d92d-d0e8-3c5e-a3122d6e7c1b
+                host: node1803
+                memory:
+                    total: "2163473002496"
+                os: Linux-4.18.0-372.9.1.el8.x86_64-x86_64-with-glibc2.28
+                program: /home/jdhwang/.conda/envs/llm/bin/lm_eval
+                python: CPython 3.11.11
+                root: ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309
+                slurm:
+                    array_job_id: "4507342"
+                    array_task_count: "8"
+                    array_task_id: "0"
+                    array_task_max: "7"
+                    array_task_min: "0"
+                    array_task_step: "1"
+                    cluster_name: eofe7
+                    conf: /etc/slurm/slurm.conf
+                    cpus_on_node: "16"
+                    cpus_per_task: "16"
+                    gpus_on_node: "2"
+                    gtids: "0"
+                    job_account: mit_general
+                    job_cpus_per_node: "16"
+                    job_end_time: "1758493186"
+                    job_gid: "209655"
+                    job_gpus: 1,2
+                    job_id: "4535465"
+                    job_name: mmlu_prox.sh
+                    job_nodelist: node1803
+                    job_num_nodes: "1"
+                    job_partition: ou_bcs_low
+                    job_qos: normal
+                    job_start_time: "1758449986"
+                    job_uid: "209655"
+                    job_user: jdhwang
+                    jobid: "4535465"
+                    localid: "0"
+                    mem_per_node: "131072"
+                    nnodes: "1"
+                    nodeid: "0"
+                    nodelist: node1803
+                    nprocs: "1"
+                    ntasks: "1"
+                    ntasks_per_node: "1"
+                    oom_kill_step: "0"
+                    prio_process: "0"
+                    procid: "0"
+                    script_context: prolog_task
+                    submit_dir: /orcd/home/002/jdhwang/BRIDGE
+                    submit_host: orcd-login003.mit.edu
+                    task_pid: "1320304"
+                    tasks_per_node: "1"
+                    topology_addr: node1803
+                    topology_addr_pattern: node
+                    tres_per_task: cpu=16
+                startedAt: "2025-09-21T10:20:02.294501Z"
+                writerId: b4tnkkvikp4ud9d1f7t2v1f37ei0r2ow
+        m: []
+        python_version: 3.11.11
+        t:
+            "1":
+                - 1
+                - 5
+                - 11
+                - 30
+                - 41
+                - 49
+                - 51
+                - 53
+                - 71
+                - 95
+                - 98
+                - 100
+                - 105
+            "2":
+                - 1
+                - 5
+                - 11
+                - 30
+                - 41
+                - 49
+                - 51
+                - 53
+                - 71
+                - 95
+                - 98
+                - 100
+                - 105
+            "3":
+                - 2
+                - 13
+                - 62
+            "4": 3.11.11
+            "5": 0.21.0
+            "6": 4.51.3
+            "12": 0.21.0
+            "13": linux-x86_64
+cli_configs:
+    value:
+        batch_size: auto
+        batch_sizes: []
+        bootstrap_iters: 0
+        device: null
+        fewshot_seed: 1234
+        gen_kwargs:
+            max_gen_toks: 20000
+        limit: null
+        model: vllm
+        model_args: pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000
+        numpy_seed: 1234
+        random_seed: 0
+        torch_seed: 1234
+        use_cache: null
+task_configs:
+    value:
+        mmlu_prox_ar_math:
+            dataset_name: ar
+            dataset_path: li-lab/MMLU-ProX
+            description: |
+                فيما يلي أسئلة اختيار من متعدد (مع إجابات) حول الرياضيات. فكر خطوة بخطوة ثم أنهِ إجابتك بـ 'الإجابة هي (X)' حيث X هو حرف الخيار الصحيح.
+            doc_to_target: answer
+            doc_to_text: functools.partial(<function format_cot_example at 0x14d51fcc0b80>, including_answer=False)
+            fewshot_config:
+                doc_to_target: ""
+                doc_to_text: functools.partial(<function format_cot_example at 0x14d51fcc0e00>, including_answer=True)
+                sampler: first_n
+            fewshot_delimiter: |4+
+            fewshot_split: validation
+            filter_list:
+                - filter:
+                    - function: regex
+                      regex_pattern: الإجابة هي \(?([ABCDEFGHIJ])\)?
+                    - function: take_first
+                  name: custom-extract
+            generation_kwargs:
+                do_sample: false
+                max_gen_toks: 20000
+                temperature: 0
+                until:
+                    - </s>
+                    - 'Q:'
+                    - 'سؤال:'
+                    - <|im_end|>
+            metadata:
+                version: 0
+            metric_list:
+                - aggregation: mean
+                  higher_is_better: true
+                  ignore_case: true
+                  ignore_punctuation: true
+                  metric: exact_match
+            num_fewshot: 5
+            output_type: generate_until
+            process_docs: functools.partial(<function process_docs at 0x14d51fcc0860>, subject='math')
+            repeats: 1
+            should_decontaminate: false
+            target_delimiter: ' '
+            task: mmlu_prox_ar_math
+            task_alias: math
+            test_split: test
+        mmlu_prox_bn_math:
+            dataset_name: bn
+            dataset_path: li-lab/MMLU-ProX
+            description: |
+                নিম্নলিখিত গণিত সম্পর্কে বহুনির্বাচনী প্রশ্ন (উত্তরসহ)। ধাপে ধাপে চিন্তা করুন এবং তারপর আপনার উত্তর "উত্তর হল (X)" দিয়ে শেষ করুন যেখানে X হল সঠিক বিকল্পের অক্ষর।
+            doc_to_target: answer
+            doc_to_text: functools.partial(<function format_cot_example at 0x14d51fb7a200>, including_answer=False)
+            fewshot_config:
+                doc_to_target: ""
+                doc_to_text: functools.partial(<function format_cot_example at 0x14d51fb7b740>, including_answer=True)
+                sampler: first_n
+            fewshot_delimiter: |4+
+            fewshot_split: validation
+            filter_list:
+                - filter:
+                    - function: regex
+                      regex_pattern: উত্তর হল \(?([ABCDEFGHIJ])\)?
+                    - function: take_first
+                  name: custom-extract
+            generation_kwargs:
+                do_sample: false
+                max_gen_toks: 20000
+                temperature: 0
+                until:
+                    - </s>
+                    - 'Q:'
+                    - 'প্রশ্ন:'
+                    - <|im_end|>
+            metadata:
+                version: 0
+            metric_list:
+                - aggregation: mean
+                  higher_is_better: true
+                  ignore_case: true
+                  ignore_punctuation: true
+                  metric: exact_match
+            num_fewshot: 5
+            output_type: generate_until
+            process_docs: functools.partial(<function process_docs at 0x14d51fb79e40>, subject='math')
+            repeats: 1
+            should_decontaminate: false
+            target_delimiter: ' '
+            task: mmlu_prox_bn_math
+            task_alias: math
+            test_split: test

wandb/run-20250921_062002-cothceaw/files/media/table/evaluation/eval_results_1_2a7ec9e10306569eae6e.table.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"columns": ["Tasks", "Version", "Filter", "num_fewshot", "Metric", "Value", "Stderr"], "data": [["mmlu_prox_ar_math", 0.0, "custom-extract", 5, "exact_match", "0.27905255366395265", "N/A"], ["mmlu_prox_bn_math", 0.0, "custom-extract", 5, "exact_match", "0.14433752775721687", "N/A"]]}

wandb/run-20250921_062002-cothceaw/files/media/table/mmlu_prox_ar_math_eval_results_2_1d425cdaeb765848e725.table.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1d425cdaeb765848e725c89a34944d791bdbf0248c6361032236b97f9611117e
+size 89484352

wandb/run-20250921_062002-cothceaw/files/media/table/mmlu_prox_bn_math_eval_results_3_8f734c70b94d2677510f.table.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f734c70b94d2677510f9200822c1810a65f0546ff35668ea1df59d81ec4ba33
+size 55489730

wandb/run-20250921_062002-cothceaw/files/output.log ADDED Viewed

	@@ -0,0 +1,81 @@

+2025-09-21:06:20:03,677 INFO     [__main__.py:291] Verbosity set to INFO
+2025-09-21:06:20:37,183 INFO     [__init__.py:464] The tag pawsx is already registered as a group, this tag will not be registered. This may affect tasks you want to call.
+2025-09-21:06:20:38,310 INFO     [__main__.py:388] Selected Tasks: ['mmlu_prox_ar_math', 'mmlu_prox_bn_math']
+2025-09-21:06:20:38,320 INFO     [evaluator.py:161] Setting random seed to 0 | Setting numpy seed to 1234 | Setting torch manual seed to 1234
+2025-09-21:06:20:38,320 WARNING  [evaluator.py:172] generation_kwargs specified through cli, these settings will update set parameters in yaml tasks. Ensure 'do_sample=True' for non-greedy decoding!
+2025-09-21:06:20:38,321 INFO     [evaluator.py:198] Initializing vllm model, with arguments: {'pretrained': 'ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309', 'dtype': 'auto', 'tensor_parallel_size': 2, 'max_model_len': 20000}
+INFO 09-21 06:20:45 config.py:350] This model supports multiple tasks: {'generate', 'embedding'}. Defaulting to 'generate'.
+INFO 09-21 06:20:45 config.py:1020] Defaulting to use mp for distributed inference
+INFO 09-21 06:20:45 llm_engine.py:249] Initializing an LLM engine (v0.6.4.post1) with config: model='ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309', speculative_config=None, tokenizer='ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=20000, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=2, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=1234, served_model_name=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309, num_scheduler_steps=1, chunked_prefill_enabled=False multi_step_stream_outputs=True, enable_prefix_caching=False, use_async_output_proc=True, use_cached_outputs=False, chat_template_text_format=string, mm_processor_kwargs=None, pooler_config=None)
+WARNING 09-21 06:20:45 multiproc_gpu_executor.py:56] Reducing Torch parallelism from 16 threads to 1 to avoid unnecessary CPU contention. Set OMP_NUM_THREADS in the external environment to tune this value as needed.
+INFO 09-21 06:20:45 custom_cache_manager.py:17] Setting Triton cache manager to: vllm.triton_utils.custom_cache_manager:CustomCacheManager
+INFO 09-21 06:20:45 selector.py:135] Using Flash Attention backend.
+INFO 09-21 06:20:54 utils.py:961] Found nccl from library libnccl.so.2
+INFO 09-21 06:20:54 pynccl.py:69] vLLM is using nccl==2.21.5
+INFO 09-21 06:20:55 custom_all_reduce_utils.py:242] reading GPU P2P access cache from /home/jdhwang/.cache/vllm/gpu_p2p_access_cache_for_0,1.json
+INFO 09-21 06:20:55 shm_broadcast.py:236] vLLM message queue communication handle: Handle(connect_ip='127.0.0.1', local_reader_ranks=[1], buffer=<vllm.distributed.device_communicators.shm_broadcast.ShmRingBuffer object at 0x14d81f47a010>, local_subscribe_port=46667, remote_subscribe_port=None)
+INFO 09-21 06:20:55 model_runner.py:1072] Starting to load model ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309...
+Loading safetensors checkpoint shards:   0% Completed | 0/4 [00:00<?, ?it/s]
+Loading safetensors checkpoint shards:  25% Completed | 1/4 [00:09<00:28,  9.34s/it]
+Loading safetensors checkpoint shards:  50% Completed | 2/4 [00:09<00:07,  3.97s/it]
+Loading safetensors checkpoint shards:  75% Completed | 3/4 [00:10<00:02,  2.41s/it]
+Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00,  1.72s/it]
+Loading safetensors checkpoint shards: 100% Completed | 4/4 [00:10<00:00,  2.69s/it]
+INFO 09-21 06:21:06 model_runner.py:1077] Loading model weights took 7.1216 GB
+INFO 09-21 06:21:08 worker.py:232] Memory profiling results: total_gpu_memory=79.10GiB initial_memory_usage=8.22GiB peak_torch_memory=8.75GiB memory_usage_post_profile=8.60GiB non_torch_memory=1.44GiB kv_cache_size=60.99GiB gpu_memory_utilization=0.90
+INFO 09-21 06:21:09 distributed_gpu_executor.py:57] # GPU blocks: 142750, # CPU blocks: 9362
+INFO 09-21 06:21:09 distributed_gpu_executor.py:61] Maximum concurrency for 20000 tokens per request: 114.20x
+INFO 09-21 06:21:11 model_runner.py:1400] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI.
+INFO 09-21 06:21:11 model_runner.py:1404] If out-of-memory error occurs during cudagraph capture, consider decreasing `gpu_memory_utilization` or switching to eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.
+INFO 09-21 06:21:26 custom_all_reduce.py:224] Registering 1995 cuda graph addresses
+INFO 09-21 06:21:27 model_runner.py:1518] Graph capturing finished in 16 secs, took 0.25 GiB
+Generating validation split: 100%|██████████| 70/70 [00:00<00:00, 2041.89 examples/s]
+Generating test split: 100%|██████████| 11759/11759 [00:00<00:00, 196190.89 examples/s]
+Filter: 100%|██████████| 70/70 [00:00<00:00, 6775.31 examples/s]
+Filter: 100%|██████████| 11759/11759 [00:00<00:00, 78762.47 examples/s]
+Generating validation split: 100%|██████████| 70/70 [00:00<00:00, 20925.19 examples/s]
+Generating test split: 100%|██████████| 11759/11759 [00:00<00:00, 144140.86 examples/s]
+Filter: 100%|██████████| 70/70 [00:00<00:00, 17085.74 examples/s]
+Filter: 100%|██████████| 11759/11759 [00:00<00:00, 72996.18 examples/s]
+2025-09-21:06:21:32,266 INFO     [evaluator.py:279] Setting fewshot random generator seed to 1234
+2025-09-21:06:21:32,266 INFO     [evaluator.py:279] Setting fewshot random generator seed to 1234
+2025-09-21:06:21:32,267 INFO     [task.py:430] Building contexts for mmlu_prox_bn_math on rank 0...
+100%|██████████| 1351/1351 [00:01<00:00, 1268.23it/s]
+2025-09-21:06:21:33,474 INFO     [task.py:430] Building contexts for mmlu_prox_ar_math on rank 0...
+100%|██████████| 1351/1351 [00:01<00:00, 1264.36it/s]
+2025-09-21:06:21:34,673 INFO     [evaluator.py:495] Running generate_until requests
+Processed prompts: 100%|██████████| 1351/1351 [34:54<00:00,  1.55s/it, est. speed input: 2878.49 toks/s, output: 3301.25 toks/s]
+final processing: 1351it [00:00, 213146.69it/s] | 1/2702 [34:55<1571:56:02, 2095.14s/it] 2874.66 toks/s, output: 3286.44 toks/s]
+WARNING 09-21 06:26:49 scheduler.py:1481] Sequence group 627 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=1
+WARNING 09-21 06:28:23 scheduler.py:1481] Sequence group 523 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=51
+WARNING 09-21 06:31:28 scheduler.py:1481] Sequence group 393 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=101
+WARNING 09-21 06:41:11 scheduler.py:1481] Sequence group 908 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=151
+Processed prompts: 100%|██████████| 1351/1351 [43:26<00:00,  1.93s/it, est. speed input: 1088.08 toks/s, output: 2998.64 toks/s]
+final processing: 1351it [00:00, 191818.31it/s] | 1352/2702 [1:18:25<1:07:51,  3.02s/it] 1088.08 toks/s, output: 2998.64 toks/s]
+WARNING 09-21 07:04:02 scheduler.py:1481] Sequence group 2127 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=201
+WARNING 09-21 07:06:42 scheduler.py:1481] Sequence group 1934 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=251
+WARNING 09-21 07:12:57 scheduler.py:1481] Sequence group 1761 is preempted by PreemptionMode.RECOMPUTE mode because there is not enough KV cache space. This can affect the end-to-end performance. Increase gpu_memory_utilization or tensor_parallel_size to provide more KV cache memory. total_num_cumulative_preemption=301
+Running generate_until requests: 100%|██████████| 2702/2702 [1:18:25<00:00,  1.74s/it]
+INFO 09-21 07:40:13 multiproc_worker_utils.py:133] Terminating local vLLM worker processes
+wandb: WARNING Serializing object of type str that is 104466 bytes
+wandb: WARNING Serializing object of type str that is 102542 bytes
+wandb: WARNING Serializing object of type str that is 107944 bytes
+wandb: WARNING Serializing object of type str that is 103572 bytes
+wandb: WARNING Serializing object of type str that is 105466 bytes
+wandb: WARNING Serializing object of type str that is 100810 bytes
+wandb: WARNING Serializing object of type str that is 100048 bytes
+wandb: WARNING Serializing object of type str that is 106760 bytes
+wandb: WARNING Serializing object of type str that is 100584 bytes
+wandb: WARNING Serializing object of type str that is 103056 bytes
+wandb: WARNING Serializing object of type str that is 102142 bytes
+wandb: WARNING Serializing object of type str that is 101654 bytes
+wandb: WARNING Serializing object of type str that is 105388 bytes
+2025-09-21:07:40:20,195 INFO     [evaluation_tracker.py:207] Saving results aggregated
+2025-09-21:07:40:20,372 INFO     [evaluation_tracker.py:291] Saving per-sample results for: mmlu_prox_ar_math
+2025-09-21:07:40:20,897 INFO     [evaluation_tracker.py:291] Saving per-sample results for: mmlu_prox_bn_math
+vllm (pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000), gen_kwargs: (max_gen_toks=20000), limit: None, num_fewshot: None, batch_size: auto
+|Tasks|Version|    Filter    |n-shot|  Metric   |   |Value |   |Stderr|
+|-----|------:|--------------|-----:|-----------|---|-----:|---|------|
+|math |      0|custom-extract|     5|exact_match|↑  |0.2791|±  |   N/A|
+|math |      0|custom-extract|     5|exact_match|↑  |0.1443|±  |   N/A|

wandb/run-20250921_062002-cothceaw/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,378 @@

+setproctitle==1.2.2
+colorama==0.4.6
+psutil==7.0.0
+GitPython==3.1.43
+docker-pycreds==0.4.0
+gitdb==4.0.11
+opencv-python==4.10.0.84
+sentry-sdk==2.19.0
+setproctitle==1.3.4
+smmap==5.0.1
+hjson==3.1.0
+deepspeed==0.16.7
+transformers==4.46.1
+transformers==4.54.1
+protobuf==6.31.1
+accelerate==1.6.0
+docopt==0.6.2
+gguf==0.10.0
+webencodings==0.5.1
+pickleshare==0.7.5
+fastjsonschema==2.21.1
+backcall==0.2.0
+tinycss2==1.4.0
+soupsieve==2.7
+pandocfilters==1.5.1
+mistune==3.1.3
+jupyterlab_pygments==0.3.0
+defusedxml==0.7.1
+bleach==6.2.0
+yarg==0.1.9
+ipython==8.12.3
+beautifulsoup4==4.13.4
+nbformat==5.10.4
+nbclient==0.10.2
+nbconvert==7.16.6
+pipreqs==0.5.0
+wandb==0.21.0
+trl==0.17.0
+lm_eval==0.4.4
+langid==1.1.6
+annotated-types==0.7.0
+vllm==0.6.4.post1
+typing-inspection==0.4.1
+xformers==0.0.28.post3
+pydantic_core==2.33.2
+outlines==0.0.46
+pydantic==2.11.7
+compressed-tensors==0.8.0
+click==8.2.1
+lightning-utilities==0.15.0
+torchmetrics==1.8.0
+nvidia-ml-py==13.580.65
+blessed==1.21.0
+gpustat==1.1.1
+nvidia-cufile-cu12==1.13.1.3
+nvidia-cusparselt-cu12==0.6.2
+mpmath==1.3.0
+typing_extensions==4.12.2
+sympy==1.13.1
+pillow==11.0.0
+nvidia-nvtx-cu12==12.4.127
+nvidia-nvjitlink-cu12==12.4.127
+nvidia-nccl-cu12==2.21.5
+numpy==2.1.2
+nvidia-curand-cu12==10.3.5.147
+networkx==3.3
+nvidia-cufft-cu12==11.2.1.3
+nvidia-cuda-runtime-cu12==12.4.127
+nvidia-cuda-nvrtc-cu12==12.4.127
+nvidia-cuda-cupti-cu12==12.4.127
+nvidia-cublas-cu12==12.4.5.8
+MarkupSafe==2.1.5
+setuptools==80.9.0
+certifi==2025.8.3
+einops==0.8.1
+fsspec==2024.6.1
+Jinja2==3.1.4
+nvidia-cusolver-cu12==11.6.1.9
+urllib3==2.5.0
+tqdm==4.67.1
+safetensors==0.6.2
+regex==2025.7.34
+PyYAML==6.0.2
+packaging==25.0
+idna==3.10
+filelock==3.13.1
+hf-xet==1.1.8
+torch==2.6.0+cu124
+charset-normalizer==3.4.3
+requests==2.32.5
+huggingface-hub==0.34.4
+torchaudio==2.6.0+cu124
+liger_kernel==0.6.2
+nvidia-cusparse-cu12==12.3.1.170
+nvidia-cudnn-cu12==9.1.0.70
+torchvision==0.21.0+cu124
+blingfire==0.1.8
+triton==3.1.0
+sentence-transformers==5.1.0
+tabledata==1.3.4
+lxml==5.3.1
+accelerate==1.5.2
+absl-py==2.1.0
+Markdown==3.7
+uvicorn==0.34.0
+ruff==0.11.0
+nvidia-cuda-runtime-cu12==12.4.127
+airportsdata==20250224
+nvidia-cusolver-cu12==11.6.1.9
+astor==0.8.1
+DataProperty==1.1.0
+lm-format-enforcer==0.10.11
+mdurl==0.1.2
+nvidia-nccl-cu12==2.21.5
+tabulate==0.9.0
+python-dotenv==1.0.1
+h5py==3.13.0
+chardet==5.2.0
+cupy-cuda12x==13.4.0
+tiktoken==0.9.0
+jiter==0.8.2
+Pygments==2.19.1
+typing_extensions==4.12.2
+datasets==3.1.0
+zipp==3.21.0
+more-itertools==10.6.0
+MarkupSafe==2.1.5
+comm==0.2.2
+pycountry==24.6.1
+partial-json-parser==0.2.1.1.post5
+gradio==4.44.0
+prometheus_client==0.21.1
+six==1.17.0
+pytz==2025.1
+unsloth_zoo==2025.3.12
+starlette==0.46.0
+llvmlite==0.44.0
+peft==0.14.0
+aiohttp==3.11.13
+aiofiles==23.2.1
+importlib_resources==6.5.2
+nvidia-nvjitlink-cu12==12.4.127
+semantic-version==2.10.0
+decorator==5.2.1
+nvidia-cublas-cu12==12.4.5.8
+contourpy==1.3.1
+torch==2.6.0
+pytest==8.3.5
+fastapi==0.115.11
+seaborn==0.13.2
+sympy==1.13.1
+threadpoolctl==3.6.0
+networkx==3.4.2
+python-dateutil==2.9.0.post0
+depyf==0.18.0
+nvidia-ml-py==12.570.86
+jedi==0.19.2
+joblib==1.4.2
+referencing==0.36.2
+diskcache==5.6.3
+httpcore==1.0.7
+httpx==0.28.1
+pyairports==2.1.1
+protobuf==3.20.3
+portalocker==3.1.1
+nvidia-cudnn-cu12==9.1.0.70
+Pebble==5.1.0
+fsspec==2024.9.0
+hf_transfer==0.1.9
+ptyprocess==0.7.0
+pexpect==4.9.0
+nvidia-cuda-nvrtc-cu12==12.4.127
+scipy==1.15.2
+sentencepiece==0.2.0
+cycler==0.12.1
+packaging==24.2
+openai==1.56.1
+frozenlist==1.5.0
+lark==1.2.2
+filelock==3.17.0
+opentelemetry-exporter-otlp==1.26.0
+yarl==1.18.3
+rouge_score==0.1.2
+grpcio==1.70.0
+googleapis-common-protos==1.70.0
+aiohappyeyeballs==2.4.6
+multiprocess==0.70.16
+tornado==6.4.2
+numpy==1.26.4
+nltk==3.9.1
+pip==25.0
+charset-normalizer==3.3.2
+prometheus-fastapi-instrumentator==7.0.2
+numexpr==2.10.2
+pyarrow==19.0.1
+attrs==25.1.0
+lm_eval==0.4.4
+urllib3==2.3.0
+mkl_random==1.2.8
+httptools==0.6.4
+gpustat==1.1.1
+pluggy==1.5.0
+huggingface-hub==0.30.2
+triton==3.1.0
+idna==3.7
+ipython==8.20.0
+pyparsing==3.2.1
+rich-toolkit==0.13.2
+googletrans==4.0.2
+jupyter_core==5.7.2
+zstandard==0.23.0
+aiosignal==1.3.2
+tyro==0.9.17
+traitlets==5.14.3
+h11==0.14.0
+outlines==0.1.11
+jupyter_client==8.6.3
+loralib==0.1.2
+kiwisolver==1.4.8
+blake3==1.0.4
+nvidia-cusparselt-cu12==0.6.2
+rich==13.9.4
+hf-xet==1.0.2
+certifi==2025.1.31
+wheel==0.45.1
+pybind11==2.13.6
+regex==2024.11.6
+mpmath==1.3.0
+transformers==4.51.3
+flash_attn==2.7.4.post1
+nvidia-curand-cu12==10.3.5.147
+PySocks==1.7.1
+gmpy2==2.2.1
+iniconfig==2.0.0
+pandas==2.2.3
+Jinja2==3.1.5
+msgpack==1.1.0
+gguf==0.16.2
+email_validator==2.2.0
+tzdata==2025.1
+cut-cross-entropy==25.1.1
+tensorboard==2.19.0
+matplotlib==3.10.1
+jsonschema-specifications==2024.10.1
+unsloth==2025.3.14
+Werkzeug==3.1.3
+opentelemetry-proto==1.26.0
+fastrlock==0.8.3
+dnspython==2.7.0
+typeguard==4.4.2
+opentelemetry-api==1.26.0
+platformdirs==4.3.6
+importlib_metadata==8.0.0
+opentelemetry-semantic-conventions==0.47b0
+sniffio==1.3.1
+nvidia-cuda-cupti-cu12==12.4.127
+scikit-learn==1.6.1
+hpack==4.1.0
+parso==0.8.4
+torchaudio==2.6.0
+xgrammar==0.1.18
+executing==2.2.0
+mkl_fft==1.3.11
+vllm==0.8.4
+word2number==1.1
+pure_eval==0.2.3
+watchfiles==1.0.4
+pydub==0.25.1
+mbstrdecoder==1.1.4
+markdown-it-py==3.0.0
+jsonschema==4.23.0
+msgspec==0.19.0
+rpds-py==0.23.1
+wandb==0.19.9
+matplotlib-inline==0.1.7
+requests==2.32.3
+interegular==0.3.3
+pytablewriter==1.2.1
+orjson==3.10.15
+xformers==0.0.29.post2
+fastapi-cli==0.0.7
+mkl-service==2.4.0
+opencv-python-headless==4.11.0.86
+prompt_toolkit==3.0.50
+trl==0.16.1
+debugpy==1.8.13
+pydantic==2.10.6
+stack-data==0.6.3
+tqdm-multiprocess==0.0.11
+gradio_client==1.3.0
+dill==0.3.8
+evaluate==0.4.3
+nvidia-cufft-cu12==11.2.1.3
+nest-asyncio==1.6.0
+pyzmq==26.2.1
+tensorboard-data-server==0.7.2
+docstring_parser==0.16
+click==8.1.8
+psutil==7.0.0
+annotated-types==0.7.0
+ninja==1.11.1.4
+pillow==10.4.0
+tcolorpy==0.1.7
+einops==0.8.1
+wcwidth==0.2.13
+typer==0.15.2
+tqdm==4.67.1
+tomlkit==0.12.0
+ipykernel==6.28.0
+diffusers==0.32.2
+mistral_common==1.5.4
+setuptools==75.8.0
+h2==4.2.0
+cachetools==5.5.2
+wrapt==1.17.2
+pydantic_core==2.27.2
+ffmpy==0.5.0
+sacrebleu==2.5.1
+outlines_core==0.1.26
+jsonlines==4.0.0
+fonttools==4.56.0
+nvidia-nvtx-cu12==12.4.127
+safetensors==0.5.3
+opentelemetry-exporter-otlp-proto-grpc==1.26.0
+bitsandbytes==0.45.3
+nanobind==2.6.1
+tokenizers==0.21.1
+propcache==0.3.0
+distro==1.9.0
+python-multipart==0.0.20
+uvloop==0.21.0
+liger_kernel==0.5.5
+python-json-logger==3.3.0
+multidict==6.1.0
+ray==2.43.0
+opentelemetry-exporter-otlp-proto-http==1.26.0
+typepy==1.3.4
+torchvision==0.21.0
+PyYAML==6.0.2
+xxhash==3.5.0
+anthropic==0.49.0
+py-cpuinfo==9.0.0
+compressed-tensors==0.9.3
+opentelemetry-exporter-otlp-proto-common==1.26.0
+opentelemetry-sdk==1.26.0
+shtab==1.7.1
+websockets==12.0
+numba==0.61.2
+llguidance==0.7.13
+hyperframe==6.1.0
+anyio==4.8.0
+asttokens==3.0.0
+blessed==1.20.0
+colorama==0.4.6
+shellingham==1.5.4
+Brotli==1.0.9
+sqlitedict==2.1.0
+nvidia-cusparse-cu12==12.3.1.170
+Deprecated==1.2.18
+cloudpickle==3.1.1
+pathvalidate==3.2.3
+opentelemetry-semantic-conventions-ai==0.4.3
+platformdirs==4.2.2
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

wandb/run-20250921_062002-cothceaw/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,110 @@

+{
+  "os":  "Linux-4.18.0-372.9.1.el8.x86_64-x86_64-with-glibc2.28",
+  "python":  "CPython 3.11.11",
+  "startedAt":  "2025-09-21T10:20:02.294501Z",
+  "args":  [
+    "--model",
+    "vllm",
+    "--model_args",
+    "pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000",
+    "--tasks",
+    "mmlu_prox_ar_math,mmlu_prox_bn_math",
+    "--batch_size",
+    "auto",
+    "--apply_chat_template",
+    "--output_path",
+    "ckpts/rerun",
+    "--log_samples",
+    "--gen_kwargs",
+    "max_gen_toks=20000",
+    "--wandb_args=project=MRPO_eval,name=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dir=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,resume=auto"
+  ],
+  "program":  "/home/jdhwang/.conda/envs/llm/bin/lm_eval",
+  "git":  {
+    "remote":  "[email protected]:jd730/BRIDGE-private.git",
+    "commit":  "57228cdf2aa2656e94d94dc3f5530986c0f48545"
+  },
+  "email":  "[email protected]",
+  "root":  "ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309",
+  "host":  "node1803",
+  "executable":  "/orcd/home/001/jdhwang/.conda/envs/llm/bin/python",
+  "cpu_count":  64,
+  "cpu_count_logical":  128,
+  "gpu":  "NVIDIA H100 80GB HBM3",
+  "gpu_count":  2,
+  "disk":  {
+    "/":  {
+      "total":  "464506159104",
+      "used":  "12265783296"
+    }
+  },
+  "memory":  {
+    "total":  "2163473002496"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA H100 80GB HBM3",
+      "memoryTotal":  "85520809984",
+      "cudaCores":  16896,
+      "architecture":  "Hopper",
+      "uuid":  "GPU-9a8a7398-f810-a936-a36d-5b9c0b64a09b"
+    },
+    {
+      "name":  "NVIDIA H100 80GB HBM3",
+      "memoryTotal":  "85520809984",
+      "cudaCores":  16896,
+      "architecture":  "Hopper",
+      "uuid":  "GPU-73b0efed-d92d-d0e8-3c5e-a3122d6e7c1b"
+    }
+  ],
+  "cudaVersion":  "12.4",
+  "slurm":  {
+    "array_job_id":  "4507342",
+    "array_task_count":  "8",
+    "array_task_id":  "0",
+    "array_task_max":  "7",
+    "array_task_min":  "0",
+    "array_task_step":  "1",
+    "cluster_name":  "eofe7",
+    "conf":  "/etc/slurm/slurm.conf",
+    "cpus_on_node":  "16",
+    "cpus_per_task":  "16",
+    "gpus_on_node":  "2",
+    "gtids":  "0",
+    "job_account":  "mit_general",
+    "job_cpus_per_node":  "16",
+    "job_end_time":  "1758493186",
+    "job_gid":  "209655",
+    "job_gpus":  "1,2",
+    "job_id":  "4535465",
+    "job_name":  "mmlu_prox.sh",
+    "job_nodelist":  "node1803",
+    "job_num_nodes":  "1",
+    "job_partition":  "ou_bcs_low",
+    "job_qos":  "normal",
+    "job_start_time":  "1758449986",
+    "job_uid":  "209655",
+    "job_user":  "jdhwang",
+    "jobid":  "4535465",
+    "localid":  "0",
+    "mem_per_node":  "131072",
+    "nnodes":  "1",
+    "nodeid":  "0",
+    "nodelist":  "node1803",
+    "nprocs":  "1",
+    "ntasks":  "1",
+    "ntasks_per_node":  "1",
+    "oom_kill_step":  "0",
+    "prio_process":  "0",
+    "procid":  "0",
+    "script_context":  "prolog_task",
+    "submit_dir":  "/orcd/home/002/jdhwang/BRIDGE",
+    "submit_host":  "orcd-login003.mit.edu",
+    "task_pid":  "1320304",
+    "tasks_per_node":  "1",
+    "topology_addr":  "node1803",
+    "topology_addr_pattern":  "node",
+    "tres_per_task":  "cpu=16"
+  },
+  "writerId":  "b4tnkkvikp4ud9d1f7t2v1f37ei0r2ow"
+}

wandb/run-20250921_062002-cothceaw/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"_runtime":4818,"_step":3,"mmlu_prox_ar_math_eval_results":{"nrows":1351,"ncols":8,"size":89484352,"sha256":"1d425cdaeb765848e725c89a34944d791bdbf0248c6361032236b97f9611117e","_latest_artifact_path":"wandb-client-artifact://r4rn6kktqo8l0um7ttx1ue77yefblg4bjuul5xh36eo036nw65had7u778c1o0h584duz5spxu93ogp1sydi6ps1f7e3imex3lze4vb4gslw6odv77pvqpc282057p94:latest/mmlu_prox_ar_math_eval_results.table.json","_type":"table-file","log_mode":"IMMUTABLE","artifact_path":"wandb-client-artifact://4o3rezrpees9hoiy123dfot64jhr1avjzbsglu96w3nb6bvqo7r9nea1zpq6zrui21syjx86ytqafiqadkxalsovbu9hbu5xuig9vmqholaskqwf4fwujyo7wtf12a0n/mmlu_prox_ar_math_eval_results.table.json","path":"media/table/mmlu_prox_ar_math_eval_results_2_1d425cdaeb765848e725.table.json"},"mmlu_prox_bn_math_eval_results":{"path":"media/table/mmlu_prox_bn_math_eval_results_3_8f734c70b94d2677510f.table.json","log_mode":"IMMUTABLE","_type":"table-file","ncols":8,"_latest_artifact_path":"wandb-client-artifact://yrkq6zvhmu4okkq3sfdgrab1lnmdmfhv1de8t3mmak4lzthxfrmi8anklhmkr55kv80kuxxms6g4nss6yhg5y1of7bferet1zu65qvb21hgfr1y9ahpspphz37lf2nmq:latest/mmlu_prox_bn_math_eval_results.table.json","size":55489730,"sha256":"8f734c70b94d2677510f9200822c1810a65f0546ff35668ea1df59d81ec4ba33","nrows":1351,"artifact_path":"wandb-client-artifact://p7hlv4z2fp2097ris2q56ne0zq48hm4dhjzvq5oly1ch8bfxb5vrvg12wvtfxoe6yg7o671ug2r9ukhw2uqyzkdu3s1nsoq7fajb1mv7bgfdc1vimxlhjan1auugg7fx/mmlu_prox_bn_math_eval_results.table.json"},"mmlu_prox_ar_math/exact_match_stderr,custom-extract":"N/A","mmlu_prox_bn_math/alias":"math","mmlu_prox_ar_math/exact_match,custom-extract":0.27905255366395265,"_timestamp":1.7584548197219381e+09,"mmlu_prox_bn_math/exact_match,custom-extract":0.14433752775721687,"evaluation/eval_results":{"_latest_artifact_path":"wandb-client-artifact://fs7vvo359n7zetw0n93n6no3jnok4v4xr41uurzhi3oe51ox6y2umo5v79g77afo64weve9g6v7lxxe7k19dji47nxjd5fz1r4mva1ftvcpnlezc70b60s027i8r67ze:latest/evaluation/eval_results.table.json","ncols":7,"artifact_path":"wandb-client-artifact://nkoxm3fcoh17kqp4tvv4avy26i5fq3eaju412t711emnjajv88orrsf3ri9qsynz54066d4ze25cbi9v5x7avwdh7scv36ttkgoid25eqaq6y0z4c6ltc088wifvn50e/evaluation/eval_results.table.json","log_mode":"IMMUTABLE","sha256":"2a7ec9e10306569eae6efb589dd0cd352624b8846fd793982cce71b425f2b2c3","path":"media/table/evaluation/eval_results_1_2a7ec9e10306569eae6e.table.json","size":285,"nrows":2,"_type":"table-file"},"mmlu_prox_ar_math/alias":"math","mmlu_prox_bn_math/exact_match_stderr,custom-extract":"N/A","_wandb":{"runtime":4818}}

wandb/run-20250921_062002-cothceaw/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,16 @@

+{"time":"2025-09-21T06:20:02.49713505-04:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpoo93japy/port-1320321.txt","pid":1320321,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-09-21T06:20:02.497479399-04:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":1320321}
+{"time":"2025-09-21T06:20:02.497469852-04:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1320321-1320505-1610173251/socket","Net":"unix"}}
+{"time":"2025-09-21T06:20:02.683932338-04:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-09-21T06:20:02.688959867-04:00","level":"INFO","msg":"handleInformInit: received","streamId":"cothceaw","id":"1(@)"}
+{"time":"2025-09-21T06:20:02.919780696-04:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"cothceaw","id":"1(@)"}
+{"time":"2025-09-21T07:40:27.042657191-04:00","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"cothceaw","id":"1(@)"}
+{"time":"2025-09-21T07:40:27.058852194-04:00","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"cothceaw","id":"1(@)"}
+{"time":"2025-09-21T07:40:28.042788818-04:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-09-21T07:40:28.042813831-04:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-09-21T07:40:28.042819045-04:00","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-09-21T07:40:28.04282347-04:00","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-09-21T07:40:28.042871233-04:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-09-21T07:40:28.042885088-04:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-09-21T07:40:28.042898748-04:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-1320321-1320505-1610173251/socket","Net":"unix"}}
+{"time":"2025-09-21T07:40:28.042918708-04:00","level":"INFO","msg":"server is closed"}

wandb/run-20250921_062002-cothceaw/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,15 @@

+{"time":"2025-09-21T06:20:02.689611613-04:00","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-09-21T06:20:02.91974477-04:00","level":"INFO","msg":"stream: created new stream","id":"cothceaw"}
+{"time":"2025-09-21T06:20:02.919776235-04:00","level":"INFO","msg":"stream: started","id":"cothceaw"}
+{"time":"2025-09-21T06:20:02.919783396-04:00","level":"INFO","msg":"handler: started","stream_id":"cothceaw"}
+{"time":"2025-09-21T06:20:02.919795631-04:00","level":"INFO","msg":"writer: Do: started","stream_id":"cothceaw"}
+{"time":"2025-09-21T06:20:02.919791628-04:00","level":"INFO","msg":"sender: started","stream_id":"cothceaw"}
+{"time":"2025-09-21T06:20:03.320481778-04:00","level":"WARN","msg":"handleCodeSave: program relative path is empty"}
+{"time":"2025-09-21T06:20:03.32057814-04:00","level":"ERROR","msg":"git repo not found","error":"repository does not exist"}
+{"time":"2025-09-21T07:40:22.552065372-04:00","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"uploading artifact run-cothceaw-mmlu_prox_ar_math_eval_results","runtime_seconds":4.866579888,"subtasks":[{"desc":"mmlu_prox_ar_math_eval_results.table.json","runtime_seconds":4.47880473,"progress":"36.1MB/85.3MB"}]},{"desc":"uploading media/table/mmlu_prox_ar_math_eval_results_2_1d425cdaeb765848e725.table.json","runtime_seconds":4.303737975,"progress":"67.8MB/85.3MB"},{"desc":"uploading artifact mmlu_prox_ar_math","runtime_seconds":3.824002162,"subtasks":[{"desc":"mmlu_prox_ar_math_eval_samples.json","runtime_seconds":3.315562372,"progress":"21.4MB/37.3MB"}]},{"desc":"uploading artifact run-cothceaw-mmlu_prox_bn_math_eval_results","runtime_seconds":3.051542787,"subtasks":[{"desc":"mmlu_prox_bn_math_eval_results.table.json","runtime_seconds":2.70140314,"progress":"16.1MB/52.9MB"}]},{"desc":"uploading media/table/mmlu_prox_bn_math_eval_results_3_8f734c70b94d2677510f.table.json","runtime_seconds":2.7112040950000003,"progress":"31.2MB/52.9MB"},{"desc":"uploading artifact mmlu_prox_bn_math","runtime_seconds":2.359250196,"subtasks":[{"desc":"mmlu_prox_bn_math_eval_samples.json","runtime_seconds":1.890807991,"progress":"9.9MB/31.3MB"}]}],"total_operations":6}}
+{"time":"2025-09-21T07:40:26.842250567-04:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-09-21T07:40:27.042881025-04:00","level":"INFO","msg":"stream: closing","id":"cothceaw"}
+{"time":"2025-09-21T07:40:27.042891927-04:00","level":"INFO","msg":"handler: closed","stream_id":"cothceaw"}
+{"time":"2025-09-21T07:40:27.042901472-04:00","level":"INFO","msg":"sender: closed","stream_id":"cothceaw"}
+{"time":"2025-09-21T07:40:27.042898277-04:00","level":"INFO","msg":"writer: Close: closed","stream_id":"cothceaw"}
+{"time":"2025-09-21T07:40:27.043173368-04:00","level":"INFO","msg":"stream: closed","id":"cothceaw"}

wandb/run-20250921_062002-cothceaw/logs/debug.log ADDED Viewed

	@@ -0,0 +1,29 @@

+2025-09-21 06:20:02,469 INFO    MainThread:1320321 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-09-21 06:20:02,469 INFO    MainThread:1320321 [wandb_setup.py:_flush():80] Configure stats pid to 1320321
+2025-09-21 06:20:02,470 INFO    MainThread:1320321 [wandb_setup.py:_flush():80] Loading settings from /home/jdhwang/.config/wandb/settings
+2025-09-21 06:20:02,470 INFO    MainThread:1320321 [wandb_setup.py:_flush():80] Loading settings from /orcd/home/002/jdhwang/BRIDGE/wandb/settings
+2025-09-21 06:20:02,470 INFO    MainThread:1320321 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-09-21 06:20:02,470 INFO    MainThread:1320321 [wandb_init.py:setup_run_log_directory():703] Logging user logs to ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/run-20250921_062002-cothceaw/logs/debug.log
+2025-09-21 06:20:02,470 INFO    MainThread:1320321 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309/wandb/run-20250921_062002-cothceaw/logs/debug-internal.log
+2025-09-21 06:20:02,470 INFO    MainThread:1320321 [wandb_init.py:init():830] calling init triggers
+2025-09-21 06:20:02,470 INFO    MainThread:1320321 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'_wandb': {}}
+2025-09-21 06:20:02,470 INFO    MainThread:1320321 [wandb_init.py:init():871] starting backend
+2025-09-21 06:20:02,684 INFO    MainThread:1320321 [wandb_init.py:init():874] sending inform_init request
+2025-09-21 06:20:02,686 INFO    MainThread:1320321 [wandb_init.py:init():882] backend started and connected
+2025-09-21 06:20:02,688 INFO    MainThread:1320321 [wandb_init.py:init():953] updated telemetry
+2025-09-21 06:20:02,711 INFO    MainThread:1320321 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-09-21 06:20:03,319 INFO    MainThread:1320321 [wandb_init.py:init():1029] starting run threads in backend
+2025-09-21 06:20:03,673 INFO    MainThread:1320321 [wandb_run.py:_console_start():2458] atexit reg
+2025-09-21 06:20:03,673 INFO    MainThread:1320321 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-09-21 06:20:03,673 INFO    MainThread:1320321 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-09-21 06:20:03,673 INFO    MainThread:1320321 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-09-21 06:20:03,676 INFO    MainThread:1320321 [wandb_init.py:init():1075] run started, returning control to user process
+2025-09-21 07:40:15,859 INFO    MainThread:1320321 [wandb_run.py:_config_callback():1363] config_cb None None {'task_configs': {'mmlu_prox_ar_math': {'task': 'mmlu_prox_ar_math', 'task_alias': 'math', 'dataset_path': 'li-lab/MMLU-ProX', 'dataset_name': 'ar', 'test_split': 'test', 'fewshot_split': 'validation', 'process_docs': "functools.partial(<function process_docs at 0x14d51fcc0860>, subject='math')", 'doc_to_text': 'functools.partial(<function format_cot_example at 0x14d51fcc0b80>, including_answer=False)', 'doc_to_target': 'answer', 'description': "فيما يلي أسئلة اختيار من متعدد (مع إجابات) حول الرياضيات. فكر خطوة بخطوة ثم أنهِ إجابتك بـ 'الإجابة هي (X)' حيث X هو حرف الخيار الصحيح.\n", 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'fewshot_config': {'sampler': 'first_n', 'doc_to_text': 'functools.partial(<function format_cot_example at 0x14d51fcc0e00>, including_answer=True)', 'doc_to_target': ''}, 'num_fewshot': 5, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'until': ['</s>', 'Q:', 'سؤال:', '<|im_end|>'], 'do_sample': False, 'temperature': 0.0, 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'name': 'custom-extract', 'filter': [{'function': 'regex', 'regex_pattern': 'الإجابة هي \\(?([ABCDEFGHIJ])\\)?'}, {'function': 'take_first'}]}], 'should_decontaminate': False, 'metadata': {'version': 0.0}}, 'mmlu_prox_bn_math': {'task': 'mmlu_prox_bn_math', 'task_alias': 'math', 'dataset_path': 'li-lab/MMLU-ProX', 'dataset_name': 'bn', 'test_split': 'test', 'fewshot_split': 'validation', 'process_docs': "functools.partial(<function process_docs at 0x14d51fb79e40>, subject='math')", 'doc_to_text': 'functools.partial(<function format_cot_example at 0x14d51fb7a200>, including_answer=False)', 'doc_to_target': 'answer', 'description': 'নিম্নলিখিত গণিত সম্পর্কে বহুনির্বাচনী প্রশ্ন (উত্তরসহ)। ধাপে ধাপে চিন্তা করুন এবং তারপর আপনার উত্তর "উত্তর হল (X)" দিয়ে শেষ করুন যেখানে X হল সঠিক বিকল্পের অক্ষর।\n', 'target_delimiter': ' ', 'fewshot_delimiter': '\n\n', 'fewshot_config': {'sampler': 'first_n', 'doc_to_text': 'functools.partial(<function format_cot_example at 0x14d51fb7b740>, including_answer=True)', 'doc_to_target': ''}, 'num_fewshot': 5, 'metric_list': [{'metric': 'exact_match', 'aggregation': 'mean', 'higher_is_better': True, 'ignore_case': True, 'ignore_punctuation': True}], 'output_type': 'generate_until', 'generation_kwargs': {'until': ['</s>', 'Q:', 'প্রশ্ন:', '<|im_end|>'], 'do_sample': False, 'temperature': 0.0, 'max_gen_toks': 20000}, 'repeats': 1, 'filter_list': [{'name': 'custom-extract', 'filter': [{'function': 'regex', 'regex_pattern': 'উত্তর হল \\(?([ABCDEFGHIJ])\\)?'}, {'function': 'take_first'}]}], 'should_decontaminate': False, 'metadata': {'version': 0.0}}}, 'cli_configs': {'model': 'vllm', 'model_args': 'pretrained=ckpts/MTMathBRIDGE_7B-1randomNegCosmean+randomNegCosCoeff0.01Seed0-E5-20250919_171309,dtype=auto,tensor_parallel_size=2,max_model_len=20000', 'batch_size': 'auto', 'batch_sizes': [], 'device': None, 'use_cache': None, 'limit': None, 'bootstrap_iters': 0, 'gen_kwargs': {'max_gen_toks': 20000}, 'random_seed': 0, 'numpy_seed': 1234, 'torch_seed': 1234, 'fewshot_seed': 1234}}
+2025-09-21 07:40:21,458 INFO    MainThread:1320321 [wandb_run.py:_finish():2224] finishing run jdhwang/MRPO_eval/cothceaw
+2025-09-21 07:40:21,458 INFO    MainThread:1320321 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
+2025-09-21 07:40:21,550 INFO    MainThread:1320321 [wandb_run.py:_restore():2405] restore
+2025-09-21 07:40:21,550 INFO    MainThread:1320321 [wandb_run.py:_restore():2411] restore done
+2025-09-21 07:40:27,041 INFO    MainThread:1320321 [wandb_run.py:_footer_history_summary_info():3903] rendering history
+2025-09-21 07:40:27,042 INFO    MainThread:1320321 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
+2025-09-21 07:40:27,042 INFO    MainThread:1320321 [wandb_run.py:_footer_sync_info():3864] logging synced files