bge-small-rrf-v3 / training_meta.json
Stffens's picture
add training_meta.json (arm_vol hyperparameters)
0cf06aa verified
{
"base_model": "BAAI/bge-small-en-v1.5",
"n_pairs": 39852,
"epochs": 2,
"batch_size": 32,
"lr": 3e-06,
"use_amp": true,
"max_seq_length": 256,
"seed": 42,
"training_time_s": 409.6,
"multi_eval": {
"per_dataset_baseline": {
"scifact": {
"ndcg_at_10": 0.7332589505448117,
"mrr": 0.6985621693121693,
"hit_at_10": 0.8666666666666667,
"n_queries": 300,
"ndcg_at_3": 0.6864461358824794,
"recall_at_100": 0.9633333333333334
},
"nfcorpus": {
"ndcg_at_10": 0.3537822502011714,
"mrr": 0.5585421888053467,
"hit_at_10": 0.718266253869969,
"n_queries": 323,
"ndcg_at_3": 0.42597599502093686,
"recall_at_100": 0.31346252170704986
},
"fiqa": {
"ndcg_at_10": 0.3916405282437908,
"mrr": 0.4678749020184205,
"hit_at_10": 0.6790123456790124,
"n_queries": 648,
"ndcg_at_3": 0.34901243718670777,
"recall_at_100": 0.692446185501741
}
},
"per_dataset_final": {
"scifact": {
"ndcg_at_10": 0.781752459899092,
"mrr": 0.7422301587301587,
"hit_at_10": 0.93,
"n_queries": 300,
"ndcg_at_3": 0.7335167753839059,
"recall_at_100": 0.99
},
"nfcorpus": {
"ndcg_at_10": 0.37574420884042387,
"mrr": 0.5812288073123986,
"hit_at_10": 0.7306501547987616,
"n_queries": 323,
"ndcg_at_3": 0.4442297150425726,
"recall_at_100": 0.3516690054451889
},
"fiqa": {
"ndcg_at_10": 0.48181104416454384,
"mrr": 0.554401822457378,
"hit_at_10": 0.7901234567901234,
"n_queries": 648,
"ndcg_at_3": 0.4284406049379992,
"recall_at_100": 0.8457548067501771
}
},
"per_dataset_pairs": {
"scifact": 4291,
"nfcorpus": 9713,
"fiqa": 25848
},
"per_dataset_budget": {
"scifact": 11601,
"nfcorpus": 9713,
"fiqa": 38686
},
"per_dataset_sizes": {
"scifact": 5183,
"nfcorpus": 3633,
"fiqa": 57638
},
"macro_baseline_ndcg": 0.49289,
"macro_final_ndcg": 0.54644,
"macro_delta_ndcg": 0.05354,
"sampling": "temperature",
"temperature": 0.5,
"total_triples_target": 60000,
"gated_out": false,
"per_dataset_gate": false,
"min_gain": -1.0,
"seed": 42
}
}