diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_128/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_128/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3600fb0e5b7e2bcd8741136b54aff66a5ce6cae6 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_128/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48aa5e8ed4b2f6a4e8e6edaeb92a31eebe7d69a2cda8c60df06ea94097590c4f +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_128/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_128/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3648f30d02cb6d8249d367d18f04ee12a81e336 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_128/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba42986e6d80b4361bfffc2181825ba6685c455f6231b3a1836673271889260a +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_128/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_128/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d4edb4794f698cb4a9e6dbdaec9abe4208cfd62 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_128/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcc8ad56c134b057a3e0d4a6b5ec32d412efb85a113f5414f83c3e0e5e846e39 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_128/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_128/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..636e8049561703f2f50cd5ad9aeead55b1cc892c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_128/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fdf46c4012fa38fb25060153c71cb258f8f815ee3763fe9f7702c87f08d07d2 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_192/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_192/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..df45443a5344935bbc7bf987b2c229d4b8858e1d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_192/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55061c3027843c09b9a081304b40a3808c27e15486a201fffbb3019a86cb00ee +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_192/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_192/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..eeed25611e91afdd922a0157d792210c673ac191 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_192/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65038fb659e62b80c297884c7ce8589c5c8575c34d5413456f6046fd358a886b +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_192/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_192/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..4837f5876529eddf5b5bb3d742c3fd061c22f53b --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_192/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93ba96461284acb608e69954576c5ddc9c383f7cbcd3fd91e60a939e18b5b9f1 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_192/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_192/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2b20de9cfb9e68e7d23cfae37d7b60bf6129acf --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_192/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b39aa87552d590887d7db1f7a9589896d60cfd6aac74cd386e158436871cc53 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_256/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_256/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..466834aa9b9aacea29b7c008e6d3f40356595838 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_256/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b21a85c857ea936ab2d50092e4525bd22d85c50788f9833aa546ab9c9766807 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_256/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_256/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..972b5ac56d6e8f004ba004474555469615715809 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_256/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:375d3f599ec1fdeca7a1b575418dc273dd46475d662389d67a5e0e8ceb5f6ad5 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_256/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_256/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf4f38cfd5981243bdbed684ac5277fcf29df1e5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_256/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38218bcd6d2f47f0f4005ef28067dc52af779d3b9c99a17678d5eb5b44ae8341 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_256/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_256/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..3595a9a832991842a1cb8ee201b8ce070368b583 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_256/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:791b0bdb15379c161eab0d2540a4cd123449317cadafdb19d2d9987381e350c5 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_320/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_320/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d04f653b8af13c1c9c271a4f0903fb34cc120cf --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_320/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3638338acdc0a53a53f6d84e09ea1c2d87ff9818537adac032accfecdb27ce6 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_320/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_320/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..906ca2701b7085a25626c8a1a36b674574fe82b5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_320/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab8913d5adc2f1fb08122e411bbe3ec09149adb5b97c5a3148475651cce71c2f +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_320/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_320/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..1de527035d5d0f4d2983912efea45b2d381f47cc --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_320/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d0446525a4dc05beb6e7e1b4f37552e6bfe7856095c1af2b4051d4ffd0b59f7 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_320/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_320/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..a8148b86ca9f9b4138da9c4c8d9dba765e12688f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_320/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b54f1c72edc7aa5044b5515c4bea342e076b2dc48e5db919e435297a8a7c9fe +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_384/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_384/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..b1a2c3f45418e3ae458bc1d7fe062ce7226ec6b2 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_384/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecd05df70478c3f1d2dc4c875234382fefa77be324a9c92cad423a716a125ec6 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_384/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_384/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..9089b7d4f88c197cabdf1e7abf04c053489f6375 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_384/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b75a9eb020eddcb56a49298d5804317f8b58618137ae1fa4b90f45e1865be84 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_384/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_384/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..efceb245b1c32c18dd18661dd27261fb84712e71 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_384/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04e50bb514aa21e34c23102a1ce5a4369d65fd1bd36c039e0a7782f65845a9d3 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_384/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_384/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..f81b68ceb5e55358b1a6418873516f8729aa9d90 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_384/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:946399c4b23a5b18f1c710f3b70c977760dac2d7e5dd42edcf67901628805c63 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_448/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_448/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..714371eb41338dca9be1e7581a1805623771e422 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_448/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2420848b2b409e1bed521d63c969a73dbc3a6d74633539d64dd6d0bcf47386a2 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_448/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_448/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2306c67b448d6de8a1100071be872c3b88cf875c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_448/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebc42032f2150475e35ecc08cd101efd92e16a18a95ed38fb396070deb5794ed +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_448/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_448/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..718854da07776e6d02ea8dc32140a05d98e986d4 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_448/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11f2c764eb51b66c31ab3bc221db12e2e7abd8721db2db4b9b9e8758435dd125 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_448/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_448/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..ade39604e63cfe278dd44c77469810bcc1256f40 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_448/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e408b7dc8730b83ebee93159848b41cde9484ca11be09d9571a4108f1a4b952 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_512/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_512/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..b078fd24bf6e28dc699c75344a70be02f2fb0b4f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_512/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0d7f474d7205966134325f63f492989a2ae4a796dcfb5a9bc169ee925cbef13 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_512/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_512/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..6fb845c2e92b617a71ae9d46f8e6d1780d309016 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_512/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec0c209a5fb678ca6c1d5b30787742bf1979bd14cbd9f0a604d627755d59e084 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_512/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_512/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..80c5a0bcf8eee63d9823df30c59252811fb2a9c0 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_512/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3d66d2e780f48fe3b965980f5c5064bca7462bdeac7750279cfdd094e96fd6c +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_512/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_512/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..78059c06ae8b349f150b86005e01419468059a40 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_512/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db804060a738ea83a2ff13a2f2a7caa961e4b1692bdcc84c5d45e933074d5673 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_64/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_64/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..775b0336ae9b08c1e783c31d9a90de25ed245b1b --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_64/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0693cb6c51a8bb3806e57e110b732d87e56dbcf98d07e8a0b1c5ecf23b1d955 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_64/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_64/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0521b11ab1f6ec9e4f3efaecd0529b9b0702fc9 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_64/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8073d7a36a2ddcd9c45ce1d70847603546ccdb46a02d3b4b34fbd3bd9d26891b +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_64/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_64/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f3d5a5567835ab495504cab922bd65725bb8487 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_64/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a8a32a081da9079beeedd874cec1f1ef0c3102f146d1edb201f6915ea6655d8 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_64/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_64/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a5d0d63008f286d53edf60127a575dd14e9c737 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank16/global_step_64/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b010932a3dca26532c3c4251f17bc47de879bee59f6bbf7a8933f660139b5299 +size 60527100 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_128/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_128/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..39c3c9a1c3c67da8cef1614e0a425d0e98a985cd --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_128/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fd71d559cf12ad081a4b593acb207ee2aec523d36a144f74aeb2a71919f655f +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_128/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_128/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e6fca7997e9d59c505e82b2752c209e3a898c9c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_128/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdf43081dd8a6dd77bc7e2f1da0dcffef9fee67ae9f6893405f47cb347980445 +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_128/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_128/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c7d43f29eca5467304816f4a981cf2d0fef2102 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_128/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edfe9fe9b8fc89b4f44bf5252176d8c3c67fb66ad48a30c0b068471d4d3d5b05 +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_128/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_128/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..a950ac99a8515cff5f382fa043f58cce7a8d67cf --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_128/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0f1f7ffcc681155bb3817515a4738588079f47b574b657b7ab4076b3469b9be +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_192/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_192/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9f1224acec7f2bc34cb29b6eacb1443c8b1294d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_192/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:837ed09d8198bc4ce137faaa688d3e4e18112a8864b92b9a29362f0a2d3cc7c9 +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_192/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_192/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..443c4e9ff895d5e26a4f52f2ac3893f4a1893346 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_192/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9332ac7512ee20ebb7576d93ed0e96db4a45135e3752a6df46ccb4ede6d9efe +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_192/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_192/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..5df1f94dfa3ad0bc8855220a194e2920bea22c1a --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_192/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f516bd806950c0332346534b62c5a2e69129c163f56291a14516685058452af6 +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_192/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_192/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ea797f2faa04da5f3204b953423eeabfeeaaa26 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_192/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca08e39f61bc4c85c2cb1544251a1b67f41d9bca10798a6cbb831f9aa64d52b7 +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_256/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_256/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..326e48b084dd64cbe549ebf2e5b17b027f93b7a6 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_256/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09730b37fc1f025d67e89f4b54dae467d8cd14a3fef1aed6fae2e5629d531bb6 +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_256/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_256/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..16010305028c60b6a4cba1148bbd3d1b1a050295 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_256/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e09324c798b0480653a83191ffeca5096efb71c95d72221eed34b3b8d34df22b +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_256/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_256/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5ecb643207c9ba11df5e2c97aba6baddb2f3a6d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_256/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90b52827fe180df8139bfd8e0144f34eef8e56daac996dd52aab0c0706b546fa +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_256/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_256/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..f7c4eba1b8e0ae4ab5debdb1f2f0d545f8ccd2c4 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_256/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2387a31a3bbc508b7a030214266bb7e199f49f64933b353d427d1d624c9419a +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_320/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_320/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..f17e3888cbae65a8974d374b9cc6d4a356bb18cb --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_320/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3a1cea69bad38b32f078ea47e5475464d19a9d8741a5d497e3bd287787f2a25 +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_320/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_320/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0884d04f9d202c09f7f300ba567e1feec060e8b2 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_320/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3acd092f24d6d42a804e8a2d5a0889c3135b7fc13d35b336bd577f9dd91ff65e +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_320/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_320/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..b79c75f8f77678045c03342bf459d06ffb5613fa --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_320/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:444e9e52217b8ae761263353f535102cef429ea4c44954d37e1726bb3280e9fd +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_320/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_320/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f2b4156143bfea01155df6a9e9b04f487288bd6 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_320/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5c946ae3620b45159a27427bd31e7c4bab3dd4efceb87dbfd844e86178b20f2 +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_384/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_384/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..34ee03b24157717e1064f22cedcf08fcdbee81c8 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_384/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd54ddef40c1c076dad7e52bbd5d810c1368fbaa7257c5e18f08c8ea875993fd +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_384/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_384/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..94ba98882c65a5212a71e6b1bb8a48dafd8da5d2 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_384/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:495839099d15f3e0647bbedeb5ef913b937bc67a54a2bdadd9c06b639c7dc6b1 +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_384/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_384/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..db9e3085e5b3a14501e1aa5c10a7985c2e48c21d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_384/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e31df076c1f1bf3f57548af137ec946fbe716d5ef063954fffca9a7c74df14bd +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_384/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_384/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..186bcd9161e4a1cfe55ece979c1ca435c181a35e --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_384/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:558c8c7181af6577da64f28b7991b5b46ad120c37d25b488af6b8ccbb278f72c +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_448/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_448/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..1412f75490e9b40bc6bac078d4a3145e6d78fd1e --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_448/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71fcff0643360074328655ebdeacbb493691f297829a76ac38025d11e298e7bc +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_448/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_448/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..629721c60044aba82299decda8fba330313b995c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_448/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ccb66bb53564f8ca720f1220045038c479781c68b44b4403691488c40e17679 +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_448/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_448/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..c14f42f2cdb3f32692f6a1eca721725cf064b22c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_448/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4522a5ee70e89952a7d9dde6e52e296be6a69e25373e26d257da3c2b2f45582 +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_448/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_448/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..872cfe5852b881b2a4d3c9614f1439f7b393d35d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_448/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:795a5cc9a5c25cafaac8babd537de8d15e7f72746980ffe7c0416996364cdebb +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_512/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_512/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..76d69153c08ac40ab498b078ee9c507b8b6efe62 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_512/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:996ddc9dad7f3d5e720c1a02a9c64943834763ad1c6419866d0b1e820586dad6 +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_512/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_512/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a31261b576fb24c66fb8a88de9c7fcfdce99b7d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_512/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00e513cf8680599f94134252aae5434afc1e23c48d06ceb4fca29be166285530 +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_512/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_512/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..40482cfd20991d9f217a86cb4a7858b103093147 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_512/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8095b44676237ec78abcf05e242448f25bad3d4c43ee7151bcfd30f88bda4aeb +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_512/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_512/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..34faafccc34d829bebc2807e46e665e930a5c379 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_512/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd26824cb2bd1c7b43ae32ef6590c660c7eccf715f8b8242864ab2f8ab6577fd +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_64/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_64/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e7c660ed6bd9b9cd66eaebe38668406fafc9b15 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_64/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f529a5ac4a1a4dba3e83f0831997ccb3fb68c3137f5e410ae891f766a59f086 +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_64/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_64/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3b1daaedec9ee2feb9e684a2c9bf54dfaa08df5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_64/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12ad5693018bcea216f1002acf38b1dc8086f168fc26e80944dda309eeb43a6c +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_64/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_64/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..5be6c89a80adef6998227c05e1ae454092c11b62 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_64/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c70c07d2dd989906c2dda2dbd2c4dc3255e57800183a0c1094d2ac1ef034817c +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_64/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_64/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..6594920ad0b74fb78fce88aff62cfcc2cb02864b --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank32/global_step_64/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee70ab702bf2fc171741f4251c0598bd0708e7a8e64dbd760cba63ad45ec2aaf +size 120394620 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_128/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_128/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..83f0177af6bdf888d64b5d0683936d350b957dab --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_128/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c555213aee4a7c7066dbbf1916ba1a83d348161942099634c196b18e0f7f1b99 +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_128/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_128/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..f7e357497472428df5d0b6c5b07be16387aea3d8 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_128/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99c7edffba32cdee80219cd97e16e152e45b2e67a89657a6cbff7c6db116648d +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_128/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_128/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8bd84de6f81a60337d3815bd06173e147af98f7 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_128/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e70e8974d05c6038d72fb6a773f6c1f11fcae9fe989133c51dfe97820c492083 +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_128/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_128/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..b8be494bb2f4e34bca65766bb63896dfafb14a4a --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_128/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1406a503734c07db9e735d4f68f7a0b6311b8b6d37312effa6cdb8e7b7b9b2df +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_192/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_192/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..bbe66c71af3842905b1585674ed1d5155f080099 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_192/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5832ffefbc6360295313732377be86f2b3875dcc1594f0ec2bd88832ed8a4ab +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_192/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_192/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2196f7ab343ba85630d54f555fd8581f7f5b72d2 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_192/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d152a0d3cdda44a287367e855116227b62dd4718906eb9ad68e6d0875c29347 +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_192/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_192/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..a25671d80eefef05f3b9b47a8bd52e12dbcf3cff --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_192/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cec7a2e53f7b657631fa42461dbebced0caa9d30d20b9b765ee0ce4f36c27f3f +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_192/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_192/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..36bb8a9efa23cc34d826925a9004cbcb1189bf0d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_192/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aadf7be47c5e9dd98cba3a701421e931064c0d0539696710fb002d64becb64d4 +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_256/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_256/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a9e8bf83b3e93f1b580f963dfc0ed7be30c839d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_256/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e23fef62e3e9a2ceb0f67c9da48d0423d5436101c81248a394ad830385bb0c79 +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_256/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_256/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d319e153f28600d99956e9a714596f8f31393ba --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_256/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c16bcdbcca0deefc65920b049002aa9f69fa920a39aebf74a496304317656a4 +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_256/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_256/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e746e43f0d3282a9b8dc422d7db5349090bb325 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_256/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4da0ed7a1d3028aba14be8c04f8cb6430ea465e8fde07bb53fb8164cc302f008 +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_256/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_256/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a77f3ae85cdb7adc4ff32a1705eb8879e41466f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_256/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b8d09235de0ed1e253be4de5b9b442930e51c355d3474a9629127a14350c355 +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_320/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_320/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a38387c249dcc98fc3a46485d2a9109f9069cf8 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_320/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7b06fe71b3b5020f7376737e50f072f1efc72f77a49a66012dce73ad30dcce3 +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_320/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_320/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..d6e32f04076f64c1428ea80300fde3540bbe340c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_320/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94b64c9afe3ae503753e8892472848a04d49115065879fb06fc065c116bc107d +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_320/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_320/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0c154402d51215babef95f1a8e9c166b635b26b --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_320/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72cab8a6c29b7df2fc2ec613b19ea4156764a3201efcbb9813fb0a9071f956f2 +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_320/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_320/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..b83c08d66c347d0665b5a7c6ff6e68bf6ec5e655 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_320/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17c8cc22bf51a92961764c75959795b17699555a5b6bf31fc9db75e5705c497b +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_384/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_384/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..62d604e83ecbc6481968d4d61360268d7c5d4979 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_384/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20164a5ae3d4434dc61f993212e16cb46c19257bef88ed35b7f2c8f367804ac9 +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_384/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_384/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3b219e0a05fbf2caa12fd711eb9089486f6565f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_384/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99bd415d2678ec10f27efa298b3055046114d8e9045a7d2bae3bb86a45cebace +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_384/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_384/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..ca5c85579d041237423edbbb7522882b4050f813 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_384/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:458e6864b2bec30c23da22cb9e13c3b0b01d22fd3543ac0dbcd65482edeff2df +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_384/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_384/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..69229a586b720974748837d131340fe38e05e4a6 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_384/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa7a50942eadb76285f7e8b63078c795e13a74c3eec4f0ade0c67de815c8610b +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_448/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_448/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..1bedcabd99a8778a088310ab22a8ab9f8567b4e4 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_448/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3ced1610c082dda805f52b2fa6ab048a143740b7bf80c1f42584edcbe62a1ea +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_448/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_448/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f6ba7032e2c5f821df42677576736009c404915 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_448/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff1fbc3c12a1a8a40ca07a57a2f461e07e6e7dc8b09862846d877028473217ef +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_448/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_448/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..55e801532aae5be33848922202957ce2d1748eb4 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_448/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ead8e63fee3ea5d5daecf410c61c51ff71838e37f2209386eb07ed346db38a +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_448/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_448/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..0bddc9dde38d26d52f1ef6eaf9ea1c85229fc315 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_448/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49e1cde016deb883b9f6dc49dd3557ddfa2e10e89b5e61abd6c5c886f18f7d45 +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_512/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_512/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b1bf21640e617c85f1557777cd3b732ccd37d39 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_512/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c3aebb88465cf3e38193c3aa3df045e7ef82534b249bd98332e799be4bfc407 +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_512/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_512/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..75967d6277c96fca5d138c39ed33531130415f3f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_512/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7546ce81eb0e4454a2ab45c8385af7891f1eb7789d99f4e505df69b2fae53324 +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_512/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_512/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e971ae17f936591a9be04d6eab775e1ce4ef8fb --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_512/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef6724a2e1f0442f0865e1bebee63392b4c9ce6d9398c22f3acd121cd4f36516 +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_512/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_512/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..636cfa5eed354d75e9a27ebdb978ee430b3d159f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_512/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44ddfcce0b0e2b86947174a89a60e89fe5db2ec3d608e2d54c3ca7eb8ec9b287 +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_64/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_64/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..a837d45c8392f5f009efc3ec1d30051146ee4f03 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_64/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae242dc03444e29dc076c8f83e5e3b7bb7bd05f2f3bb0d9e5f962b2f105ea4a +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_64/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_64/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ceb0ef9559f0aa2d91d7242d00e8b88868d2651 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_64/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b65da67d533b0fead44968790e75d09fda42cbed04c7e2def98353eaa4d0ea00 +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_64/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_64/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..3fba84d4073d465c551982d3e23bbbc10ffab5f5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_64/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23ce063269d298383f37d3ceb57c703ef584376e4ecf217ea2b59f1bc6eec2f4 +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_64/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_64/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..59c16604d727ece577f188198003591f404abf27 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank64/global_step_64/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9476ee8be9b00d8ed200527a6048f1f6f7173e76317ffe35a99f1d843c1baa0a +size 240128892 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_128/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_128/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..6806c8faa895db5ff539614812332b104771973a --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_128/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dff5e5b5bd6ae1826d37bad37b9e11dd1289373df97671fc065cb5d81fef5937 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_128/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_128/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb3c384f8982430687144799bf1f0a20b9855aa5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_128/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f951e4882d678f350b7027cfbf28e86002f8e51100d58bf9bdd79c6f08437a3c +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_128/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_128/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..98331a1a0a9afd73bd16c2721f8c7b66a53a9411 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_128/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3da3677f03d7093f1e1627bcb4c087bcc9b8dfac266ff149f3db78fcdafc9520 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_128/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_128/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..67efff71625a08f7b2a818d0a52e58f746539f13 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_128/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b4c56ad0f81055f282c890f678020ca1ed34479c12e6ef7c107d9f2e3510d1f +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_192/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_192/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a5bd7736153c5b241dcd7dc1d9863ce2207de46 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_192/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65aa3a189959c75d1a5546dab9160a2feee7832770ba19e2044b8d93adb3fe62 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_192/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_192/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..f53c17757c2b38f7f5f195928a5df03b35e784cb --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_192/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98ca3e338ef40b2e5879fc75fd0d3d3d616043f937d197acf2d9571c256565cd +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_192/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_192/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd049b96a754625c46259396d1c0aef29c0f0a0c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_192/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0054d5d149e9b8e3872e36f1c648fbd43f6580a60dcad8d0451ca6393c962687 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_192/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_192/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..051775410dd6be6f6063c948c4479569eeb31215 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_192/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d54f98f26a041817af0af3e8ac9c9123f76c92e3312af84c43502e69cd2e6d77 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_256/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_256/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ee1fdf55023f89f03a16e609713fec1992ce08a --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_256/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20836b1ad35a57a4c50c7b65012b1253bd2d09a20605e18978436d5a1c38639e +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_256/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_256/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0a1712c6a3c631c91ef89dc7f2665afd1311430 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_256/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:299f45771af7aaafb110311aab5bbaa30e31ae70ab3a210668bec32f56c40819 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_256/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_256/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..980e040090334fa614564f0fb21c98d2960e35d8 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_256/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ddffe4ee29a9a54cef5aeb43388cce7fe5c094203128850ac98d012b248e718 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_256/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_256/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b8f9e0a7d8a9e79fb4dfc57e13085b4de5e0980 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_256/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0648be099e137bca994118500cd809a2830542fc8f822a6ea6921eee1c2bef3f +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_320/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_320/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc12c47a795c1306b60edec58554d501478051f8 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_320/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3f2a4b8cf98bbe8d4a38e1a3af8abcdaaa5bc0c43a1f28e7794993d07964818 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_320/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_320/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..330aa48749a00ab8e56a8150d4a31c2c3fe91887 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_320/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ef6e3e963efac825cd58dc02686f73d5572f430f7315f0840ef8e33f722b5fc +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_320/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_320/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e1cd0eeb8a8421e8a0a2e17f79aaa6add45a4c9 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_320/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f761e12bba56c8514e7391d36a3ef44882d36e74974775834a4e116332cbf4c4 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_320/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_320/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..ace6045e1d2bca2c9705951c162df7047af4829e --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_320/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46c74bc4d8a5cc5aca4632402cc9d6e5ccdf6a7d236cbf9dacd903196b7a1e9f +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_384/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_384/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0dd90a26c60bd1d5c9cf06e18060126e458a8a8 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_384/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dba4a3e1ce46dc2a5c00e45ee982509baa1c518c917e9d6c618f77098fa3d0d1 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_384/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_384/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..5209b72894b1138295efec14c81923f64d93c557 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_384/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef88165d15a293a7297568c2b5f7b6334e7f0c594b107e7da50202c88dfa3e1a +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_384/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_384/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..5fa95a0ffe3ec2f315573c8258a284eb06d85aaa --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_384/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:094b8dc5b6f600aee267931d8283f48b59bd92b2cc25290f206746f56b4358dc +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_384/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_384/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..f76dfb865cc8bc1fb2eda01b730c8cb72221becd --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_384/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:556ed6e92e86282e3d41df096c95e5fa8c97588d008793363dcac9e7bd1f0d27 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_448/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_448/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8aaefd1afeb212dc06bc402afe7864694f00ca4 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_448/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc574a261b92874835eb0f2c04998c60991fef2f7af8392a97544b266075e1f5 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_448/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_448/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..cffa5d01d8bf517881685c595f460b37b41759bb --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_448/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf763c02b9d227877829503c6e1983a1737bf04d3b15608c59e441031be4a7a1 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_448/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_448/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..f66a0e190b35c07a3f7f10b8c0cf06ff5e50b304 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_448/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:703cae26ae7d3228f61d2353a5e45d147047b133191ef6f479155a381cf4b620 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_448/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_448/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f938aa5796e3e9e954fb16da3c64e2076966a80 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_448/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:580dcb55f07820a753dba9efd418f29cd470599dedb132d3158da11920cb87b1 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_512/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_512/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5f5b59d9e9f1b9f437b1aaf83bb4871e553cd9c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_512/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8fccc4d16f10edbe269056576ba6bc328aecbcec4dba90154f816aad3d89305 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_512/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_512/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b0701aa0c63d9f70424d4e1ef86c457eeebe901 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_512/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62be375d4c995985e5a642d7bb60394e6c1328751e18430afb68de6b2463f361 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_512/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_512/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0a86dd4621988c71e60307a45d8803686ba7a11 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_512/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6951d38d62c656fb8c82584bc852b21c5ccc0547c0f79b18cce9b883f23131d +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_512/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_512/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..f496b0f6cf8fa69e706dbc9eba736c10b9f6dda6 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_512/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf7027017b75cab2f7fa8d33583a53322ceb20c8086bc6db31d849ae41540241 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_64/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_64/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..17994875d4a7584921700c2d8208abbc4d18f8c0 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_64/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f0c95811961b3a8667f2385ba43a8b2c0f0ef23815f241d4823554a1b6837a5 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_64/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_64/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..a75880010d8e078ef2b319f83aad2f83bf1eedce --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_64/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51ba4f907587c8636307d3caa26d36f2f8a45fb2e1be6142056f6fe17c8e9d02 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_64/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_64/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..4605b43e921f0144d38267689ed7bfcffd7636b6 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_64/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e13bafb0c036984ab2bb5df59dfe3660adb138fae05167d5a64f6061eabe937 +size 30593532 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_64/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_64/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..28b0c60ee321e7a59677b8940e2d1472531e5fb6 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-LoRA-rank8/global_step_64/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dbc87e817482a5a30bcc608ad61f84bc50c4f48d9007f7eb7efd1302d1f0152 +size 30593532