{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":601100396,"defaultBranch":"main","name":"awesome-RLHF","ownerLogin":"opendilab","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2023-02-13T11:19:23.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/86840398?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"","listCacheKey":"v0:1684232926.093004","currentOid":""},"activityList":{"items":[{"before":"8471374626d7976d717a04f99faf86f56afd7b9c","after":"41131cf75c7350cbd1fd636858b688e9cde35014","ref":"refs/heads/main","pushedAt":"2024-05-10T06:04:38.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"doc(cy): add some papers (#49)\n\n* visitors counter update\r\n\r\n* add some paper\r\n\r\n* Update README.md","shortMessageHtmlLink":"doc(cy): add some papers (#49)"}},{"before":"070ffe56576851e2b3566b38ac222e6010071736","after":"8471374626d7976d717a04f99faf86f56afd7b9c","ref":"refs/heads/main","pushedAt":"2024-04-18T03:29:01.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"add(rlhf deciphered): add one survey and analysis paper about RLHF (#48)","shortMessageHtmlLink":"add(rlhf deciphered): add one survey and analysis paper about RLHF (#48)"}},{"before":"970b2a2b7e27caa0f400fd4b343ae758d35fbcd2","after":"070ffe56576851e2b3566b38ac222e6010071736","ref":"refs/heads/main","pushedAt":"2024-04-07T03:02:14.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"add(ziyizhang): add one paper about reward overoptimization in diffusion model alignment (#47)","shortMessageHtmlLink":"add(ziyizhang): add one paper about reward overoptimization in diffus…"}},{"before":"2b8e10c3112626a1e1bf98d431be25fd08861137","after":"970b2a2b7e27caa0f400fd4b343ae758d35fbcd2","ref":"refs/heads/main","pushedAt":"2024-03-26T00:28:47.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"doc(hijkzzz): update OpenRLHF link and introduction (#46)","shortMessageHtmlLink":"doc(hijkzzz): update OpenRLHF link and introduction (#46)"}},{"before":"fc73798df4a5e6cb58cbf409d4bd896f6dae11d4","after":"2b8e10c3112626a1e1bf98d431be25fd08861137","ref":"refs/heads/main","pushedAt":"2024-03-22T11:16:09.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"doc(cy): visitors counter update (#45)","shortMessageHtmlLink":"doc(cy): visitors counter update (#45)"}},{"before":"a27240e2a09b75faa00a6362d0f8ceedf206932f","after":"fc73798df4a5e6cb58cbf409d4bd896f6dae11d4","ref":"refs/heads/main","pushedAt":"2024-03-20T04:46:37.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"add(dunzeng): add one paper about LLM alignment on diversified preferences (#44)\n\n* add(dunzeng): add one paper about LLM alignment on diversified preferences","shortMessageHtmlLink":"add(dunzeng): add one paper about LLM alignment on diversified prefer…"}},{"before":"0d42a7e0b5418873dd24b0c58c70443c7de7b930","after":"a27240e2a09b75faa00a6362d0f8ceedf206932f","ref":"refs/heads/main","pushedAt":"2024-03-19T04:25:14.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"add(liujie): add the repo of an paper about multi-objective DPO; add the repo of an paper about LLM inference-time attack (#43)","shortMessageHtmlLink":"add(liujie): add the repo of an paper about multi-objective DPO; add …"}},{"before":"b11f02f4e797da6775bb8f47d3821660d51f867e","after":"0d42a7e0b5418873dd24b0c58c70443c7de7b930","ref":"refs/heads/main","pushedAt":"2024-03-04T11:39:38.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"add(dx): add one paper about RLHF with distributional human preferences (#42)","shortMessageHtmlLink":"add(dx): add one paper about RLHF with distributional human preferenc…"}},{"before":"e3c5feb4010fe2c5a2be3e40e466947ca9f23a00","after":"b11f02f4e797da6775bb8f47d3821660d51f867e","ref":"refs/heads/main","pushedAt":"2024-03-03T10:30:24.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"add(lj): add one paper about multi-objective RLHF without reward modeling; add one paper about LLM inference-time attack (#41)","shortMessageHtmlLink":"add(lj): add one paper about multi-objective RLHF without reward mode…"}},{"before":"97b4bc02bd930b8b8f0eae00b2897862a0708c10","after":"e3c5feb4010fe2c5a2be3e40e466947ca9f23a00","ref":"refs/heads/main","pushedAt":"2024-02-18T02:00:34.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"add(WeiXiongUST): add two papers on RLHF; update the keyword of an existing paper (#40)","shortMessageHtmlLink":"add(WeiXiongUST): add two papers on RLHF; update the keyword of an ex…"}},{"before":"601e090b9b3f6e636104f8be95f01d07cde1ddb1","after":"97b4bc02bd930b8b8f0eae00b2897862a0708c10","ref":"refs/heads/main","pushedAt":"2024-02-04T16:06:07.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"doc(cy): add some papers (#39)","shortMessageHtmlLink":"doc(cy): add some papers (#39)"}},{"before":"c851a630be94177699263407b8681b20e48a36e9","after":"601e090b9b3f6e636104f8be95f01d07cde1ddb1","ref":"refs/heads/main","pushedAt":"2024-01-31T05:38:22.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"add(WeiXiongUST): update existing paper information (#38)","shortMessageHtmlLink":"add(WeiXiongUST): update existing paper information (#38)"}},{"before":"9dfcce51af89fd28ae90fbc8bce6e0ae0496e74f","after":"c851a630be94177699263407b8681b20e48a36e9","ref":"refs/heads/main","pushedAt":"2024-01-08T04:42:13.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"add(Linear95): add one paper about RLHF via adversarial games (#36)\n\n* Update README.md\r\n\r\n* add code of apo","shortMessageHtmlLink":"add(Linear95): add one paper about RLHF via adversarial games (#36)"}},{"before":"03fc94c12cd0db211ec6c9eb4bdc897713f6e5f6","after":"9dfcce51af89fd28ae90fbc8bce6e0ae0496e74f","ref":"refs/heads/main","pushedAt":"2023-12-21T08:28:06.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"add(WeiXiongUST): add one paper on the mathematical foundation (#34)\n\nCo-authored-by: weixiong ","shortMessageHtmlLink":"add(WeiXiongUST): add one paper on the mathematical foundation (#34)"}},{"before":"e694d2066bde0ad5b57525c8586f077cd8b4f561","after":"03fc94c12cd0db211ec6c9eb4bdc897713f6e5f6","ref":"refs/heads/main","pushedAt":"2023-12-07T03:35:20.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"add(Encord blog): Guide to Reinforcement Learning from Human Feedback (RLHF) for CV (#33)","shortMessageHtmlLink":"add(Encord blog): Guide to Reinforcement Learning from Human Feedback…"}},{"before":"0b972c34147981354491635c94b46bf18391b682","after":"e694d2066bde0ad5b57525c8586f077cd8b4f561","ref":"refs/heads/main","pushedAt":"2023-12-06T04:52:31.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"doc(cy): add some papers (#32)","shortMessageHtmlLink":"doc(cy): add some papers (#32)"}},{"before":"7739beaa5f0e3c6844610dba3628aaf10efffc02","after":"0b972c34147981354491635c94b46bf18391b682","ref":"refs/heads/main","pushedAt":"2023-10-30T06:23:57.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"add(dssrgu): add one paper about RLHF on NeurIPS 2023 (#30)","shortMessageHtmlLink":"add(dssrgu): add one paper about RLHF on NeurIPS 2023 (#30)"}},{"before":"252749e3e322a5875bc771afab6976d3d7b993d4","after":"7739beaa5f0e3c6844610dba3628aaf10efffc02","ref":"refs/heads/main","pushedAt":"2023-10-23T03:15:23.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":" doc(cy): add some new papers about Human Feedback (#29)\n\n* add one paper\r\n\r\n* add some papers\r\n\r\n* remove duplicates\r\n\r\n* doc(cy): add some new papers about Human Feedback","shortMessageHtmlLink":" doc(cy): add some new papers about Human Feedback (#29)"}},{"before":"90bcf0aeeecff5a0bf035315de0e802b31fd5d40","after":"252749e3e322a5875bc771afab6976d3d7b993d4","ref":"refs/heads/main","pushedAt":"2023-09-21T03:57:45.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"doc(dy): Zhihu article on adding PPO algorithm practice based on Chinese hh-rlhf data. (#28)","shortMessageHtmlLink":"doc(dy): Zhihu article on adding PPO algorithm practice based on Chin…"}},{"before":"3b8b45d2d3ffc591f4ddacb465c4fdfe7e0985e1","after":"90bcf0aeeecff5a0bf035315de0e802b31fd5d40","ref":"refs/heads/main","pushedAt":"2023-09-05T04:10:29.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"doc(cy): add some new papers about Human Feedback (#25)\n\n* add one paper\r\n\r\n* add some papers\r\n\r\n* remove duplicates","shortMessageHtmlLink":"doc(cy): add some new papers about Human Feedback (#25)"}},{"before":"2f705500e9fb8394665d116a71d4d497a572736e","after":"3b8b45d2d3ffc591f4ddacb465c4fdfe7e0985e1","ref":"refs/heads/main","pushedAt":"2023-08-25T09:28:29.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"feature(hijkzzz): add openllama2 codebase (#24)\n\nCo-authored-by: jianh ","shortMessageHtmlLink":"feature(hijkzzz): add openllama2 codebase (#24)"}},{"before":"e50679ad21a61b92173544e9a32e854aa28fd059","after":"2f705500e9fb8394665d116a71d4d497a572736e","ref":"refs/heads/main","pushedAt":"2023-08-25T09:28:03.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"feature(hijkzzz): add decision transformer alignment (#23)\n\nCo-authored-by: jianh ","shortMessageHtmlLink":"feature(hijkzzz): add decision transformer alignment (#23)"}},{"before":"81ae1ad0d0874f64770dbaf32c749b779fb52844","after":"e50679ad21a61b92173544e9a32e854aa28fd059","ref":"refs/heads/main","pushedAt":"2023-07-24T07:32:02.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"doc(F2-song): add one new paper about Human Feedback","shortMessageHtmlLink":"doc(F2-song): add one new paper about Human Feedback"}},{"before":"3f5b2161174e4c6f19fd786c44e7a147849d8a4d","after":"81ae1ad0d0874f64770dbaf32c749b779fb52844","ref":"refs/heads/main","pushedAt":"2023-07-06T02:45:43.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"feature(gry): add 6 datasets && 3 codebases (#21)","shortMessageHtmlLink":"feature(gry): add 6 datasets && 3 codebases (#21)"}},{"before":"c16be1045b7804dcfe92ac0a43bdb775b22e8e37","after":"3f5b2161174e4c6f19fd786c44e7a147849d8a4d","ref":"refs/heads/main","pushedAt":"2023-05-31T03:27:13.363Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"feature(gry): split dataset and codebase (#19)\n\n* feature(gry): split dataset and codebase","shortMessageHtmlLink":"feature(gry): split dataset and codebase (#19)"}},{"before":"3e2186e4652a609fd13413ebbf1207bb891dcdd9","after":"c16be1045b7804dcfe92ac0a43bdb775b22e8e37","ref":"refs/heads/main","pushedAt":"2023-05-22T11:04:24.380Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"add(publiusau): add one blog/podcast of OpenAI researchers on RLHF (#18)\n\nAdding a blog interview/podcast with Instruct GPT Authors and AI Pub on RLHF","shortMessageHtmlLink":"add(publiusau): add one blog/podcast of OpenAI researchers on RLHF (#18)"}},{"before":"92808e14ccbdec8fc79b11ad25755257c2dd285f","after":null,"ref":"refs/heads/doc-by-chatgpt","pushedAt":"2023-05-16T10:28:46.093Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"}},{"before":"7b62683111a3f1d3674f2168b0275ef1f3659522","after":"3e2186e4652a609fd13413ebbf1207bb891dcdd9","ref":"refs/heads/main","pushedAt":"2023-05-15T09:12:37.317Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"ruoyuGao","name":"Aron751","path":"/ruoyuGao","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/41601003?s=80&v=4"},"commit":{"message":"feature(tbx): Add a survey in 2023 (#17)","shortMessageHtmlLink":"feature(tbx): Add a survey in 2023 (#17)"}},{"before":"15f16f4059a3445e93dd53bd38d3eb96b24f72db","after":"7b62683111a3f1d3674f2168b0275ef1f3659522","ref":"refs/heads/main","pushedAt":"2023-05-05T07:44:06.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"PaParaZz1","name":"Swain","path":"/PaParaZz1","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/33195032?s=80&v=4"},"commit":{"message":"feature(gry): add 1 codebase and several badges (#16)\n\n* feature(gry): add 1 codebase and several badges","shortMessageHtmlLink":"feature(gry): add 1 codebase and several badges (#16)"}},{"before":"4b13287d6ea32b5527d52a6e7319b85edaa9c0f3","after":"15f16f4059a3445e93dd53bd38d3eb96b24f72db","ref":"refs/heads/main","pushedAt":"2023-04-27T04:05:50.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"ruoyuGao","name":"Aron751","path":"/ruoyuGao","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/41601003?s=80&v=4"},"commit":{"message":"feature(kevin): add Turkish support (#15)\n\n* Update README.md\r\n\r\n* Update README.md\r\n\r\n* Update CONTRIBUTING.md\r\n\r\n* Update README.md\r\n\r\n* Update README.md\r\n\r\n* Update README.md\r\n\r\n* feature(kevin): add turkish support\r\n\r\n* feature(kevin): add turkish support\r\n\r\n* fix typo\r\n\r\n---------\r\n\r\nCo-authored-by: 01Kevin01 <131346373+01Kevin01@users.noreply.github.com>","shortMessageHtmlLink":"feature(kevin): add Turkish support (#15)"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAERkQVNAA","startCursor":null,"endCursor":null}},"title":"Activity · opendilab/awesome-RLHF"}