llm-eval-dashboard / data /mmlu /response_rec.csv
Blair Yang
done api
5264831
raw
history blame contribute delete
840 Bytes
sub_topic,model_name,card_idx,no_responses_human,no_correct_human,no_responses_llm,no_correct_llm,oracle_acc
high_school_physics,Mixtral-8x7B-Instruct-v0.1,-1,10,8,10,7,0.68
high_school_physics,Mixtral-8x7B-Instruct-v0.1,0,6,4,6,3,0.66
high_school_physics,Mixtral-8x7B-Instruct-v0.1,1,4,4,4,4,0.7
high_school_physics,Mistral-7B-Instruct-v0.2,-1,0,0,0,0,0
high_school_physics,Mistral-7B-Instruct-v0.2,0,0,0,0,0,0
high_school_physics,Mistral-7B-Instruct-v0.2,1,0,0,0,0,0
high_school_biology,Mixtral-8x7B-Instruct-v0.1,-1,10,8,10,7,0.68
high_school_biology,Mixtral-8x7B-Instruct-v0.1,0,6,4,6,3,0.66
high_school_biology,Mixtral-8x7B-Instruct-v0.1,1,4,4,4,4,0.7
high_school_biology,Mistral-7B-Instruct-v0.2,-1,0,0,0,0,0
high_school_biology,Mistral-7B-Instruct-v0.2,0,0,0,0,0,0
high_school_biology,Mistral-7B-Instruct-v0.2,1,0,0,0,0,0