YAML Metadata
Warning:
empty or missing yaml metadata in repo card
(https://huggingface.co/docs/hub/model-cards#model-card-metadata)
https://wandb.ai/alexwortega/tiny_llama/runs/2k4g018j?workspace=user-alexwortega
hf-causal (pretrained=tiny_3ep_freeeze,dtype=float16), limit: None, provide_description: False, num_fewshot: 0, batch_size: 16
| Task | Version | Metric | Value | Stderr | |
|---|---|---|---|---|---|
| danetqa | 1 | acc | 0.5201 | ± | 0.0174 |
| hendrycksTest-abstract_algebra | 1 | acc | 0.2900 | ± | 0.0456 |
| acc_norm | 0.2900 | ± | 0.0456 | ||
| hendrycksTest-anatomy | 1 | acc | 0.2593 | ± | 0.0379 |
| acc_norm | 0.2593 | ± | 0.0379 | ||
| hendrycksTest-astronomy | 1 | acc | 0.1447 | ± | 0.0286 |
| acc_norm | 0.1447 | ± | 0.0286 | ||
| hendrycksTest-business_ethics | 1 | acc | 0.2100 | ± | 0.0409 |
| acc_norm | 0.2100 | ± | 0.0409 | ||
| hendrycksTest-clinical_knowledge | 1 | acc | 0.2566 | ± | 0.0269 |
| acc_norm | 0.2566 | ± | 0.0269 | ||
| hendrycksTest-college_biology | 1 | acc | 0.2500 | ± | 0.0362 |
| acc_norm | 0.2500 | ± | 0.0362 | ||
| hendrycksTest-college_chemistry | 1 | acc | 0.2100 | ± | 0.0409 |
| acc_norm | 0.2100 | ± | 0.0409 | ||
| hendrycksTest-college_computer_science | 1 | acc | 0.3200 | ± | 0.0469 |
| acc_norm | 0.3200 | ± | 0.0469 | ||
| hendrycksTest-college_mathematics | 1 | acc | 0.2100 | ± | 0.0409 |
| acc_norm | 0.2100 | ± | 0.0409 | ||
| hendrycksTest-college_medicine | 1 | acc | 0.2428 | ± | 0.0327 |
| acc_norm | 0.2428 | ± | 0.0327 | ||
| hendrycksTest-college_physics | 1 | acc | 0.2549 | ± | 0.0434 |
| acc_norm | 0.2549 | ± | 0.0434 | ||
| hendrycksTest-computer_security | 1 | acc | 0.3100 | ± | 0.0465 |
| acc_norm | 0.3100 | ± | 0.0465 | ||
| hendrycksTest-conceptual_physics | 1 | acc | 0.2809 | ± | 0.0294 |
| acc_norm | 0.2809 | ± | 0.0294 | ||
| hendrycksTest-econometrics | 1 | acc | 0.2018 | ± | 0.0378 |
| acc_norm | 0.2018 | ± | 0.0378 | ||
| hendrycksTest-electrical_engineering | 1 | acc | 0.2483 | ± | 0.0360 |
| acc_norm | 0.2483 | ± | 0.0360 | ||
| hendrycksTest-elementary_mathematics | 1 | acc | 0.2513 | ± | 0.0223 |
| acc_norm | 0.2513 | ± | 0.0223 | ||
| hendrycksTest-formal_logic | 1 | acc | 0.2778 | ± | 0.0401 |
| acc_norm | 0.2778 | ± | 0.0401 | ||
| hendrycksTest-global_facts | 1 | acc | 0.3000 | ± | 0.0461 |
| acc_norm | 0.3000 | ± | 0.0461 | ||
| hendrycksTest-high_school_biology | 1 | acc | 0.2032 | ± | 0.0229 |
| acc_norm | 0.2032 | ± | 0.0229 | ||
| hendrycksTest-high_school_chemistry | 1 | acc | 0.1823 | ± | 0.0272 |
| acc_norm | 0.1823 | ± | 0.0272 | ||
| hendrycksTest-high_school_computer_science | 1 | acc | 0.3600 | ± | 0.0482 |
| acc_norm | 0.3600 | ± | 0.0482 | ||
| hendrycksTest-high_school_european_history | 1 | acc | 0.2364 | ± | 0.0332 |
| acc_norm | 0.2364 | ± | 0.0332 | ||
| hendrycksTest-high_school_geography | 1 | acc | 0.2222 | ± | 0.0296 |
| acc_norm | 0.2222 | ± | 0.0296 | ||
| hendrycksTest-high_school_government_and_politics | 1 | acc | 0.1917 | ± | 0.0284 |
| acc_norm | 0.1917 | ± | 0.0284 | ||
| hendrycksTest-high_school_macroeconomics | 1 | acc | 0.2179 | ± | 0.0209 |
| acc_norm | 0.2179 | ± | 0.0209 | ||
| hendrycksTest-high_school_mathematics | 1 | acc | 0.2704 | ± | 0.0271 |
| acc_norm | 0.2704 | ± | 0.0271 | ||
| hendrycksTest-high_school_microeconomics | 1 | acc | 0.2143 | ± | 0.0267 |
| acc_norm | 0.2143 | ± | 0.0267 | ||
| hendrycksTest-high_school_physics | 1 | acc | 0.2450 | ± | 0.0351 |
| acc_norm | 0.2450 | ± | 0.0351 | ||
| hendrycksTest-high_school_psychology | 1 | acc | 0.2349 | ± | 0.0182 |
| acc_norm | 0.2349 | ± | 0.0182 | ||
| hendrycksTest-high_school_statistics | 1 | acc | 0.2037 | ± | 0.0275 |
| acc_norm | 0.2037 | ± | 0.0275 | ||
| hendrycksTest-high_school_us_history | 1 | acc | 0.2500 | ± | 0.0304 |
| acc_norm | 0.2500 | ± | 0.0304 | ||
| hendrycksTest-high_school_world_history | 1 | acc | 0.2827 | ± | 0.0293 |
| acc_norm | 0.2827 | ± | 0.0293 | ||
| hendrycksTest-human_aging | 1 | acc | 0.3094 | ± | 0.0310 |
| acc_norm | 0.3094 | ± | 0.0310 | ||
| hendrycksTest-human_sexuality | 1 | acc | 0.2519 | ± | 0.0381 |
| acc_norm | 0.2519 | ± | 0.0381 | ||
| hendrycksTest-international_law | 1 | acc | 0.2397 | ± | 0.0390 |
| acc_norm | 0.2397 | ± | 0.0390 | ||
| hendrycksTest-jurisprudence | 1 | acc | 0.3426 | ± | 0.0459 |
| acc_norm | 0.3426 | ± | 0.0459 | ||
| hendrycksTest-logical_fallacies | 1 | acc | 0.2638 | ± | 0.0346 |
| acc_norm | 0.2638 | ± | 0.0346 | ||
| hendrycksTest-machine_learning | 1 | acc | 0.1875 | ± | 0.0370 |
| acc_norm | 0.1875 | ± | 0.0370 | ||
| hendrycksTest-management | 1 | acc | 0.2039 | ± | 0.0399 |
| acc_norm | 0.2039 | ± | 0.0399 | ||
| hendrycksTest-marketing | 1 | acc | 0.2735 | ± | 0.0292 |
| acc_norm | 0.2735 | ± | 0.0292 | ||
| hendrycksTest-medical_genetics | 1 | acc | 0.3600 | ± | 0.0482 |
| acc_norm | 0.3600 | ± | 0.0482 | ||
| hendrycksTest-miscellaneous | 1 | acc | 0.2580 | ± | 0.0156 |
| acc_norm | 0.2580 | ± | 0.0156 | ||
| hendrycksTest-moral_disputes | 1 | acc | 0.2630 | ± | 0.0237 |
| acc_norm | 0.2630 | ± | 0.0237 | ||
| hendrycksTest-moral_scenarios | 1 | acc | 0.2291 | ± | 0.0141 |
| acc_norm | 0.2291 | ± | 0.0141 | ||
| hendrycksTest-nutrition | 1 | acc | 0.2418 | ± | 0.0245 |
| acc_norm | 0.2418 | ± | 0.0245 | ||
| hendrycksTest-philosophy | 1 | acc | 0.2283 | ± | 0.0238 |
| acc_norm | 0.2283 | ± | 0.0238 | ||
| hendrycksTest-prehistory | 1 | acc | 0.2716 | ± | 0.0247 |
| acc_norm | 0.2716 | ± | 0.0247 | ||
| hendrycksTest-professional_accounting | 1 | acc | 0.2270 | ± | 0.0250 |
| acc_norm | 0.2270 | ± | 0.0250 | ||
| hendrycksTest-professional_law | 1 | acc | 0.2445 | ± | 0.0110 |
| acc_norm | 0.2445 | ± | 0.0110 | ||
| hendrycksTest-professional_medicine | 1 | acc | 0.1765 | ± | 0.0232 |
| acc_norm | 0.1765 | ± | 0.0232 | ||
| hendrycksTest-professional_psychology | 1 | acc | 0.2663 | ± | 0.0179 |
| acc_norm | 0.2663 | ± | 0.0179 | ||
| hendrycksTest-public_relations | 1 | acc | 0.3364 | ± | 0.0453 |
| acc_norm | 0.3364 | ± | 0.0453 | ||
| hendrycksTest-security_studies | 1 | acc | 0.2041 | ± | 0.0258 |
| acc_norm | 0.2041 | ± | 0.0258 | ||
| hendrycksTest-sociology | 1 | acc | 0.2090 | ± | 0.0287 |
| acc_norm | 0.2090 | ± | 0.0287 | ||
| hendrycksTest-us_foreign_policy | 1 | acc | 0.2800 | ± | 0.0451 |
| acc_norm | 0.2800 | ± | 0.0451 | ||
| hendrycksTest-virology | 1 | acc | 0.2892 | ± | 0.0353 |
| acc_norm | 0.2892 | ± | 0.0353 | ||
| hendrycksTest-world_religions | 1 | acc | 0.3158 | ± | 0.0357 |
| acc_norm | 0.3158 | ± | 0.0357 | ||
| hendrycksTestRu-abstract_algebra | 1 | acc | 0.2300 | ± | 0.0423 |
| acc_norm | 0.2300 | ± | 0.0423 | ||
| hendrycksTestRu-anatomy | 1 | acc | 0.1852 | ± | 0.0336 |
| acc_norm | 0.1852 | ± | 0.0336 | ||
| hendrycksTestRu-astronomy | 1 | acc | 0.1645 | ± | 0.0302 |
| acc_norm | 0.1645 | ± | 0.0302 | ||
| hendrycksTestRu-business_ethics | 1 | acc | 0.2000 | ± | 0.0402 |
| acc_norm | 0.2000 | ± | 0.0402 | ||
| hendrycksTestRu-clinical_knowledge | 1 | acc | 0.2113 | ± | 0.0251 |
| acc_norm | 0.2113 | ± | 0.0251 | ||
| hendrycksTestRu-college_biology | 1 | acc | 0.2569 | ± | 0.0365 |
| acc_norm | 0.2569 | ± | 0.0365 | ||
| hendrycksTestRu-college_chemistry | 1 | acc | 0.2300 | ± | 0.0423 |
| acc_norm | 0.2300 | ± | 0.0423 | ||
| hendrycksTestRu-college_computer_science | 1 | acc | 0.2200 | ± | 0.0416 |
| acc_norm | 0.2200 | ± | 0.0416 | ||
| hendrycksTestRu-college_mathematics | 1 | acc | 0.2000 | ± | 0.0402 |
| acc_norm | 0.2000 | ± | 0.0402 | ||
| hendrycksTestRu-college_medicine | 1 | acc | 0.1965 | ± | 0.0303 |
| acc_norm | 0.1965 | ± | 0.0303 | ||
| hendrycksTestRu-college_physics | 1 | acc | 0.2059 | ± | 0.0402 |
| acc_norm | 0.2059 | ± | 0.0402 | ||
| hendrycksTestRu-computer_security | 1 | acc | 0.2900 | ± | 0.0456 |
| acc_norm | 0.2900 | ± | 0.0456 | ||
| hendrycksTestRu-conceptual_physics | 1 | acc | 0.2638 | ± | 0.0288 |
| acc_norm | 0.2638 | ± | 0.0288 | ||
| hendrycksTestRu-econometrics | 1 | acc | 0.2281 | ± | 0.0395 |
| acc_norm | 0.2281 | ± | 0.0395 | ||
| hendrycksTestRu-electrical_engineering | 1 | acc | 0.2621 | ± | 0.0366 |
| acc_norm | 0.2621 | ± | 0.0366 | ||
| hendrycksTestRu-elementary_mathematics | 1 | acc | 0.2381 | ± | 0.0219 |
| acc_norm | 0.2381 | ± | 0.0219 | ||
| hendrycksTestRu-formal_logic | 1 | acc | 0.2937 | ± | 0.0407 |
| acc_norm | 0.2937 | ± | 0.0407 | ||
| hendrycksTestRu-global_facts | 1 | acc | 0.2100 | ± | 0.0409 |
| acc_norm | 0.2100 | ± | 0.0409 | ||
| hendrycksTestRu-high_school_biology | 1 | acc | 0.1903 | ± | 0.0223 |
| acc_norm | 0.1903 | ± | 0.0223 | ||
| hendrycksTestRu-high_school_chemistry | 1 | acc | 0.1872 | ± | 0.0274 |
| acc_norm | 0.1872 | ± | 0.0274 | ||
| hendrycksTestRu-high_school_computer_science | 1 | acc | 0.2800 | ± | 0.0451 |
| acc_norm | 0.2800 | ± | 0.0451 | ||
| hendrycksTestRu-high_school_european_history | 1 | acc | 0.2303 | ± | 0.0329 |
| acc_norm | 0.2303 | ± | 0.0329 | ||
| hendrycksTestRu-high_school_geography | 1 | acc | 0.1869 | ± | 0.0278 |
| acc_norm | 0.1869 | ± | 0.0278 | ||
| hendrycksTestRu-high_school_government_and_politics | 1 | acc | 0.1865 | ± | 0.0281 |
| acc_norm | 0.1865 | ± | 0.0281 | ||
| hendrycksTestRu-high_school_macroeconomics | 1 | acc | 0.2179 | ± | 0.0209 |
| acc_norm | 0.2179 | ± | 0.0209 | ||
| hendrycksTestRu-high_school_mathematics | 1 | acc | 0.2444 | ± | 0.0262 |
| acc_norm | 0.2444 | ± | 0.0262 | ||
| hendrycksTestRu-high_school_microeconomics | 1 | acc | 0.2227 | ± | 0.0270 |
| acc_norm | 0.2227 | ± | 0.0270 | ||
| hendrycksTestRu-high_school_physics | 1 | acc | 0.2450 | ± | 0.0351 |
| acc_norm | 0.2450 | ± | 0.0351 | ||
| hendrycksTestRu-high_school_psychology | 1 | acc | 0.2275 | ± | 0.0180 |
| acc_norm | 0.2275 | ± | 0.0180 | ||
| hendrycksTestRu-high_school_statistics | 1 | acc | 0.1806 | ± | 0.0262 |
| acc_norm | 0.1806 | ± | 0.0262 | ||
| hendrycksTestRu-high_school_us_history | 1 | acc | 0.2549 | ± | 0.0306 |
| acc_norm | 0.2549 | ± | 0.0306 | ||
| hendrycksTestRu-high_school_world_history | 1 | acc | 0.2321 | ± | 0.0275 |
| acc_norm | 0.2321 | ± | 0.0275 | ||
| hendrycksTestRu-human_aging | 1 | acc | 0.3094 | ± | 0.0310 |
| acc_norm | 0.3094 | ± | 0.0310 | ||
| hendrycksTestRu-human_sexuality | 1 | acc | 0.2443 | ± | 0.0377 |
| acc_norm | 0.2443 | ± | 0.0377 | ||
| hendrycksTestRu-international_law | 1 | acc | 0.2479 | ± | 0.0394 |
| acc_norm | 0.2479 | ± | 0.0394 | ||
| hendrycksTestRu-jurisprudence | 1 | acc | 0.2778 | ± | 0.0433 |
| acc_norm | 0.2778 | ± | 0.0433 | ||
| hendrycksTestRu-logical_fallacies | 1 | acc | 0.2025 | ± | 0.0316 |
| acc_norm | 0.2025 | ± | 0.0316 | ||
| hendrycksTestRu-machine_learning | 1 | acc | 0.2500 | ± | 0.0411 |
| acc_norm | 0.2500 | ± | 0.0411 | ||
| hendrycksTestRu-management | 1 | acc | 0.1845 | ± | 0.0384 |
| acc_norm | 0.1845 | ± | 0.0384 | ||
| hendrycksTestRu-marketing | 1 | acc | 0.2863 | ± | 0.0296 |
| acc_norm | 0.2863 | ± | 0.0296 | ||
| hendrycksTestRu-medical_genetics | 1 | acc | 0.2800 | ± | 0.0451 |
| acc_norm | 0.2800 | ± | 0.0451 | ||
| hendrycksTestRu-miscellaneous | 1 | acc | 0.2350 | ± | 0.0152 |
| acc_norm | 0.2350 | ± | 0.0152 | ||
| hendrycksTestRu-moral_disputes | 1 | acc | 0.2399 | ± | 0.0230 |
| acc_norm | 0.2399 | ± | 0.0230 | ||
| hendrycksTestRu-moral_scenarios | 1 | acc | 0.2380 | ± | 0.0142 |
| acc_norm | 0.2380 | ± | 0.0142 | ||
| hendrycksTestRu-nutrition | 1 | acc | 0.2320 | ± | 0.0242 |
| acc_norm | 0.2320 | ± | 0.0242 | ||
| hendrycksTestRu-philosophy | 1 | acc | 0.1929 | ± | 0.0224 |
| acc_norm | 0.1929 | ± | 0.0224 | ||
| hendrycksTestRu-prehistory | 1 | acc | 0.2377 | ± | 0.0237 |
| acc_norm | 0.2377 | ± | 0.0237 | ||
| hendrycksTestRu-professional_accounting | 1 | acc | 0.2163 | ± | 0.0246 |
| acc_norm | 0.2163 | ± | 0.0246 | ||
| hendrycksTestRu-professional_law | 1 | acc | 0.2445 | ± | 0.0110 |
| acc_norm | 0.2445 | ± | 0.0110 | ||
| hendrycksTestRu-professional_medicine | 1 | acc | 0.3015 | ± | 0.0279 |
| acc_norm | 0.3015 | ± | 0.0279 | ||
| hendrycksTestRu-professional_psychology | 1 | acc | 0.2467 | ± | 0.0174 |
| acc_norm | 0.2467 | ± | 0.0174 | ||
| hendrycksTestRu-public_relations | 1 | acc | 0.2545 | ± | 0.0417 |
| acc_norm | 0.2545 | ± | 0.0417 | ||
| hendrycksTestRu-security_studies | 1 | acc | 0.1959 | ± | 0.0254 |
| acc_norm | 0.1959 | ± | 0.0254 | ||
| hendrycksTestRu-sociology | 1 | acc | 0.2289 | ± | 0.0297 |
| acc_norm | 0.2289 | ± | 0.0297 | ||
| hendrycksTestRu-us_foreign_policy | 1 | acc | 0.3000 | ± | 0.0461 |
| acc_norm | 0.3000 | ± | 0.0461 | ||
| hendrycksTestRu-virology | 1 | acc | 0.2590 | ± | 0.0341 |
| acc_norm | 0.2590 | ± | 0.0341 | ||
| hendrycksTestRu-world_religions | 1 | acc | 0.3158 | ± | 0.0357 |
| acc_norm | 0.3158 | ± | 0.0357 | ||
| muserc | 1 | acc | 0.0340 | ± | 0.0079 |
| parus | 0 | acc | 0.6100 | ± | 0.0490 |
| rcb | 1 | acc | 0.5273 | ± | 0.0337 |
| f1 | 0.2302 | ||||
| rucos | 0 | f1 | 0.4231 | ± | 0.0056 |
| em | 0.4114 | ± | 0.0057 | ||
| russe | 0 | acc | 0.3877 | ± | 0.0053 |
| ruterra | 1 | acc | 0.5049 | ± | 0.0286 |
| f1 | 0.2666 | ||||
| rwsd | 0 | acc | 0.4363 | ± | 0.0348 |
| xwinograd_ru | 0 | acc | 0.5238 | ± | 0.0282 |
| xnli_ru | 0 | acc | 0.3611 | ± | 0.0068 |
- Downloads last month
- -