{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":290909192,"defaultBranch":"main","name":"lm-evaluation-harness","ownerLogin":"EleutherAI","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2020-08-28T00:09:15.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/68924597?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"","listCacheKey":"v0:1715099657.0","currentOid":""},"activityList":{"items":[{"before":"fe9fef4ece9f0df8c1c88c2c7e037ec52dd94096","after":"b24ac4b8eb7b32e30f45c16a5be78670dcb25f47","ref":"refs/heads/main","pushedAt":"2024-05-13T16:21:54.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"lintangsutawika","name":"Lintang Sutawika","path":"/lintangsutawika","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/5774558?s=80&v=4"},"commit":{"message":"interface doc update (#1807)","shortMessageHtmlLink":"interface doc update (#1807)"}},{"before":"1980a13c9d7bcdc6e2a19228c203f9f7834ac9b8","after":"fe9fef4ece9f0df8c1c88c2c7e037ec52dd94096","ref":"refs/heads/main","pushedAt":"2024-05-13T14:51:31.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"haileyschoelkopf","name":"Hailey Schoelkopf","path":"/haileyschoelkopf","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/65563625?s=80&v=4"},"commit":{"message":"Adding tinyBenchmarks datasets (#1545)\n\n* Add tinyBenchmarks\r\n\r\n* Add acknowledgements\r\n\r\n* Add ordering of outputs for data-parallel\r\n\r\n* Run pre-commit\r\n\r\n* Add few_shot specifications\r\n\r\n* Add tinyBenchmarks post-processing\r\n\r\n* add conditional import ; fix task names\r\n\r\n---------\r\n\r\nCo-authored-by: haileyschoelkopf ","shortMessageHtmlLink":"Adding tinyBenchmarks datasets (#1545)"}},{"before":"1b64d9cf811133a672f9221d5e682d58fbf0914c","after":"9aab1ba84bddcf50f9082a208685656b192e98e5","ref":"refs/heads/1715-nostderr-typerror","pushedAt":"2024-05-12T16:31:45.000Z","pushType":"push","commitsCount":11,"pusher":{"login":"haileyschoelkopf","name":"Hailey Schoelkopf","path":"/haileyschoelkopf","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/65563625?s=80&v=4"},"commit":{"message":"Merge branch 'main' into 1715-nostderr-typerror","shortMessageHtmlLink":"Merge branch 'main' into 1715-nostderr-typerror"}},{"before":"41e64b2ec587e3abb18a709f693197173b6f3cce","after":"79ce346d96b8ee62452e1d2914129c9b64419a71","ref":"refs/heads/group-agg-rework","pushedAt":"2024-05-11T11:13:35.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lintangsutawika","name":"Lintang Sutawika","path":"/lintangsutawika","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/5774558?s=80&v=4"},"commit":{"message":"Update truthfulqa_mc2.yaml","shortMessageHtmlLink":"Update truthfulqa_mc2.yaml"}},{"before":"f4d2e6e0d588229d43f25c51aec248b4592a833d","after":"41e64b2ec587e3abb18a709f693197173b6f3cce","ref":"refs/heads/group-agg-rework","pushedAt":"2024-05-11T11:12:11.000Z","pushType":"push","commitsCount":4,"pusher":{"login":"lintangsutawika","name":"Lintang Sutawika","path":"/lintangsutawika","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/5774558?s=80&v=4"},"commit":{"message":"new group config parameter `tag_to_task`","shortMessageHtmlLink":"new group config parameter tag_to_task"}},{"before":"78c3f7d3acdf7d01b9a2a16124fdf1541d8626a2","after":"f4d2e6e0d588229d43f25c51aec248b4592a833d","ref":"refs/heads/group-agg-rework","pushedAt":"2024-05-11T04:43:01.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"lintangsutawika","name":"Lintang Sutawika","path":"/lintangsutawika","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/5774558?s=80&v=4"},"commit":{"message":"add task_id for python tasks as well","shortMessageHtmlLink":"add task_id for python tasks as well"}},{"before":"0905615ff76b613c0e1e8eb3fd898731fbd7dc5a","after":"78c3f7d3acdf7d01b9a2a16124fdf1541d8626a2","ref":"refs/heads/group-agg-rework","pushedAt":"2024-05-11T04:40:15.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lintangsutawika","name":"Lintang Sutawika","path":"/lintangsutawika","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/5774558?s=80&v=4"},"commit":{"message":"add task_id for python tasks as well","shortMessageHtmlLink":"add task_id for python tasks as well"}},{"before":"39c40277eac61a1dfa673a803264c8ea9d4651bf","after":"0905615ff76b613c0e1e8eb3fd898731fbd7dc5a","ref":"refs/heads/group-agg-rework","pushedAt":"2024-05-10T16:59:50.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"lintangsutawika","name":"Lintang Sutawika","path":"/lintangsutawika","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/5774558?s=80&v=4"},"commit":{"message":"reformat","shortMessageHtmlLink":"reformat"}},{"before":"13203943b86e8420c7d4ebf272259a3f456e7b0d","after":"39c40277eac61a1dfa673a803264c8ea9d4651bf","ref":"refs/heads/group-agg-rework","pushedAt":"2024-05-10T16:57:54.000Z","pushType":"push","commitsCount":7,"pusher":{"login":"lintangsutawika","name":"Lintang Sutawika","path":"/lintangsutawika","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/5774558?s=80&v=4"},"commit":{"message":"use task_id","shortMessageHtmlLink":"use task_id"}},{"before":"9f06432c98c3518dbb274c0c36ed5bdc3a5b3619","after":"13203943b86e8420c7d4ebf272259a3f456e7b0d","ref":"refs/heads/group-agg-rework","pushedAt":"2024-05-10T13:08:20.000Z","pushType":"push","commitsCount":7,"pusher":{"login":"lintangsutawika","name":"Lintang Sutawika","path":"/lintangsutawika","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/5774558?s=80&v=4"},"commit":{"message":"reformat with pre-commit","shortMessageHtmlLink":"reformat with pre-commit"}},{"before":"d32ce5cf11608d59602e498d51f38d7d01f9d0ff","after":"1980a13c9d7bcdc6e2a19228c203f9f7834ac9b8","ref":"refs/heads/main","pushedAt":"2024-05-09T17:23:46.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"lintangsutawika","name":"Lintang Sutawika","path":"/lintangsutawika","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/5774558?s=80&v=4"},"commit":{"message":"Copal task (#1803)\n\n* add copal\r\n\r\n* change name to copal id for clarity and the task name\r\n\r\n* remove `copal_id...` to yaml to make it work\r\n\r\n* checkmark on README\r\n\r\n* change group name to `copal_id`","shortMessageHtmlLink":"Copal task (#1803)"}},{"before":"9097ad3e2857c7c30b4d37e9b90d4ac2929bc120","after":"d32ce5cf11608d59602e498d51f38d7d01f9d0ff","ref":"refs/heads/main","pushedAt":"2024-05-08T13:05:19.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"haileyschoelkopf","name":"Hailey Schoelkopf","path":"/haileyschoelkopf","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/65563625?s=80&v=4"},"commit":{"message":"Update flag `--hf_hub_log_args` in interface documentation (#1806)\n\n* update interface documentation with flag --hf_hub_logs_arg\r\n\r\n* update interface documentation with flag --hf_hub_logs_arg 2","shortMessageHtmlLink":"Update flag --hf_hub_log_args in interface documentation (#1806)"}},{"before":"885f48d62cb41589da4ab5aa9d0b6ace3cffb878","after":"9097ad3e2857c7c30b4d37e9b90d4ac2929bc120","ref":"refs/heads/main","pushedAt":"2024-05-08T12:37:56.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"lintangsutawika","name":"Lintang Sutawika","path":"/lintangsutawika","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/5774558?s=80&v=4"},"commit":{"message":"add task for mmlu evaluation in arc multiple choice format (#1745)\n\n* add mmlu arc style evaluation\r\n\r\n* rename arc_style to continuation\r\n\r\n---------\r\n\r\nCo-authored-by: Jonathan Burdge \r\nCo-authored-by: Jonathan Burdge ","shortMessageHtmlLink":"add task for mmlu evaluation in arc multiple choice format (#1745)"}},{"before":"5637397876a572ad5a6bebf0ffad4f35bf221988","after":"9f06432c98c3518dbb274c0c36ed5bdc3a5b3619","ref":"refs/heads/group-agg-rework","pushedAt":"2024-05-08T09:25:55.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"lintangsutawika","name":"Lintang Sutawika","path":"/lintangsutawika","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/5774558?s=80&v=4"},"commit":{"message":"update truthfulqa","shortMessageHtmlLink":"update truthfulqa"}},{"before":"d4a913c473ff2d33d30eae27e5627968a2e9b6bb","after":"885f48d62cb41589da4ab5aa9d0b6ace3cffb878","ref":"refs/heads/main","pushedAt":"2024-05-07T21:55:52.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"haileyschoelkopf","name":"Hailey Schoelkopf","path":"/haileyschoelkopf","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/65563625?s=80&v=4"},"commit":{"message":"Initial integration of the Unitxt to LM eval harness (#1615)\n\n* Initial support for Unitxt datasets in LM Eval Harness\r\n\r\nSee https://github.com/IBM/unitxt\r\n\r\nThe script 'generate_yamls.py' creates LM Eval Harness yaml files corresponding to Unitxt datasets specified in the 'unitxt_datasets' file.\r\n\r\nThe glue code required to register Unitxt metrics is in 'unitxt_wrapper.py'.\r\n\r\n* Added dataset loading check to generate_yaml\r\n\r\nImproved error messages.\r\n\r\n* Speed up generate_yaml\r\n\r\nAdded printouts and improved error message\r\n\r\n* Added output printout\r\n\r\n* Simplified integration of unitxt datasets\r\n\r\nStore all the common yaml configuration in a yaml include shared by all datasets of the same task.\r\n\r\n* Post code review comments - part 1\r\n\r\n1. Made sure include files don't end wth 'yaml' so they won't be marked as tasks\r\n2. Added more datasets and tasks (NER, GEC)\r\n3. Added README\r\n\r\n* Post code review comments - part 2\r\n\r\n1. Added install unitxt install option in pyproject.toml:\r\npip install 'lm_eval[unitxt]'\r\n2. Added a check that unitxt is installed and print a clear error message if not\r\n\r\n* Commited missing pyproject change\r\n\r\n* Added documentation on adding datasets\r\n\r\n* More doc changes\r\n\r\n* add unitxt extra to readme\r\n\r\n* run precommit\r\n\r\n---------\r\n\r\nCo-authored-by: haileyschoelkopf ","shortMessageHtmlLink":"Initial integration of the Unitxt to LM eval harness (#1615)"}},{"before":null,"after":"1646d799211668824b66e87418e85f0fe27b96ff","ref":"refs/heads/haileyschoelkopf-patch-1","pushedAt":"2024-05-07T16:34:17.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"haileyschoelkopf","name":"Hailey Schoelkopf","path":"/haileyschoelkopf","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/65563625?s=80&v=4"},"commit":{"message":"Update vllm_causallms.py","shortMessageHtmlLink":"Update vllm_causallms.py"}},{"before":"75dfac4358d85e4d57f066b9eed1253fb5df3502","after":"5637397876a572ad5a6bebf0ffad4f35bf221988","ref":"refs/heads/group-agg-rework","pushedAt":"2024-05-07T15:48:05.000Z","pushType":"push","commitsCount":25,"pusher":{"login":"lintangsutawika","name":"Lintang Sutawika","path":"/lintangsutawika","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/5774558?s=80&v=4"},"commit":{"message":"reversed task list","shortMessageHtmlLink":"reversed task list"}},{"before":"ad70d206866bd9bc4f09908a589986bb8c901b7a","after":"75dfac4358d85e4d57f066b9eed1253fb5df3502","ref":"refs/heads/group-agg-rework","pushedAt":"2024-05-07T14:23:55.000Z","pushType":"push","commitsCount":2,"pusher":{"login":"lintangsutawika","name":"Lintang Sutawika","path":"/lintangsutawika","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/5774558?s=80&v=4"},"commit":{"message":"readd files","shortMessageHtmlLink":"readd files"}},{"before":"dd9c9846659be69a65e3c7ac2a0284efd9431e1b","after":null,"ref":"refs/heads/1774-alphabetize-table-prints","pushedAt":"2024-05-07T13:21:05.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"haileyschoelkopf","name":"Hailey Schoelkopf","path":"/haileyschoelkopf","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/65563625?s=80&v=4"}},{"before":"d42a3e4482fad0c2587672254c24ad62afaa5702","after":"d4a913c473ff2d33d30eae27e5627968a2e9b6bb","ref":"refs/heads/main","pushedAt":"2024-05-07T13:21:04.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"haileyschoelkopf","name":"Hailey Schoelkopf","path":"/haileyschoelkopf","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/65563625?s=80&v=4"},"commit":{"message":"Logging Updates (Alphabetize table printouts, fix eval tracker bug) (#1774) (#1791)\n\n* fix auto-batch size bug for seq2seq models\r\n\r\n* alphabetize task + group tables ; fix eval tracker bug\r\n\r\n* fix eval tracker bug","shortMessageHtmlLink":"Logging Updates (Alphabetize table printouts, fix eval tracker bug) (#…"}},{"before":"6a7f97525422336ebbf39da94b90cb8977d7a223","after":null,"ref":"refs/heads/1644-hendrycks-math-variant","pushedAt":"2024-05-07T13:20:48.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"haileyschoelkopf","name":"Hailey Schoelkopf","path":"/haileyschoelkopf","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/65563625?s=80&v=4"}},{"before":"20be169b35891d214ed4ede4f26bbff461b124d8","after":"d42a3e4482fad0c2587672254c24ad62afaa5702","ref":"refs/heads/main","pushedAt":"2024-05-07T13:20:48.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"haileyschoelkopf","name":"Hailey Schoelkopf","path":"/haileyschoelkopf","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/65563625?s=80&v=4"},"commit":{"message":"Re-add Hendrycks MATH (no sympy checking, no Minerva hardcoded prompt) variant (#1793)\n\n* add Hendrycks MATH (no sympy checking) variant\r\n\r\n* add readmes for MATH tasks","shortMessageHtmlLink":"Re-add Hendrycks MATH (no sympy checking, no Minerva hardcoded prompt…"}},{"before":"7fe2b93cc68ea895a1eb1ab57f14da7ac8de6139","after":"20be169b35891d214ed4ede4f26bbff461b124d8","ref":"refs/heads/main","pushedAt":"2024-05-07T13:09:24.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"haileyschoelkopf","name":"Hailey Schoelkopf","path":"/haileyschoelkopf","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/65563625?s=80&v=4"},"commit":{"message":"link to the example output on the hub (#1798)","shortMessageHtmlLink":"link to the example output on the hub (#1798)"}},{"before":"3f810fa7672ee27e2096e145443918dd4638cfac","after":"dd9c9846659be69a65e3c7ac2a0284efd9431e1b","ref":"refs/heads/1774-alphabetize-table-prints","pushedAt":"2024-05-07T13:07:58.000Z","pushType":"push","commitsCount":4,"pusher":{"login":"haileyschoelkopf","name":"Hailey Schoelkopf","path":"/haileyschoelkopf","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/65563625?s=80&v=4"},"commit":{"message":"Merge branch 'main' into 1774-alphabetize-table-prints","shortMessageHtmlLink":"Merge branch 'main' into 1774-alphabetize-table-prints"}},{"before":"4abefd1ed4b9bdc868def8b8c5417a319e9b1228","after":"6a7f97525422336ebbf39da94b90cb8977d7a223","ref":"refs/heads/1644-hendrycks-math-variant","pushedAt":"2024-05-07T13:07:34.000Z","pushType":"push","commitsCount":4,"pusher":{"login":"haileyschoelkopf","name":"Hailey Schoelkopf","path":"/haileyschoelkopf","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/65563625?s=80&v=4"},"commit":{"message":"Merge branch 'main' into 1644-hendrycks-math-variant","shortMessageHtmlLink":"Merge branch 'main' into 1644-hendrycks-math-variant"}},{"before":"0579b304130b306da6a5834a374e41f0fffe4be6","after":"ad70d206866bd9bc4f09908a589986bb8c901b7a","ref":"refs/heads/group-agg-rework","pushedAt":"2024-05-07T12:28:58.000Z","pushType":"push","commitsCount":9,"pusher":{"login":"lintangsutawika","name":"Lintang Sutawika","path":"/lintangsutawika","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/5774558?s=80&v=4"},"commit":{"message":"update to work with new group and task configuration","shortMessageHtmlLink":"update to work with new group and task configuration"}},{"before":"66cf07ef1e04d96ae593ce9bea5f22ca02061385","after":"7fe2b93cc68ea895a1eb1ab57f14da7ac8de6139","ref":"refs/heads/main","pushedAt":"2024-05-07T10:29:20.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"lintangsutawika","name":"Lintang Sutawika","path":"/lintangsutawika","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/5774558?s=80&v=4"},"commit":{"message":"Fix Caching Tests ; Remove `pretrained=gpt2` default (#1775)","shortMessageHtmlLink":"Fix Caching Tests ; Remove pretrained=gpt2 default (#1775)"}},{"before":"c606d7330bfef21b594d170a09a732d1e12f1433","after":null,"ref":"refs/heads/fix-tests","pushedAt":"2024-05-07T10:29:20.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"lintangsutawika","name":"Lintang Sutawika","path":"/lintangsutawika","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/5774558?s=80&v=4"}},{"before":null,"after":"83c717e8684a0f460c789b30d12d6bec18638931","ref":"refs/heads/1795-write-out","pushedAt":"2024-05-07T00:02:49.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"haileyschoelkopf","name":"Hailey Schoelkopf","path":"/haileyschoelkopf","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/65563625?s=80&v=4"},"commit":{"message":"make write_out.py explicitly error if no splits match","shortMessageHtmlLink":"make write_out.py explicitly error if no splits match"}},{"before":"ae72cebc3e5e709165687d5fdb56f1a8d8a635e7","after":"66cf07ef1e04d96ae593ce9bea5f22ca02061385","ref":"refs/heads/main","pushedAt":"2024-05-06T18:49:44.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"haileyschoelkopf","name":"Hailey Schoelkopf","path":"/haileyschoelkopf","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/65563625?s=80&v=4"},"commit":{"message":"Update `--tasks list` option in interface documentation (#1792)","shortMessageHtmlLink":"Update --tasks list option in interface documentation (#1792)"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAESJztiwA","startCursor":null,"endCursor":null}},"title":"Activity · EleutherAI/lm-evaluation-harness"}