Skip to content

Commit

Permalink
verified Yi (#309)
Browse files Browse the repository at this point in the history
  • Loading branch information
YannDubs committed May 12, 2024
1 parent 96508b7 commit d2dbc22
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 4 deletions.
@@ -1,12 +1,12 @@
,win_rate,standard_error,n_wins,n_wins_base,n_draws,n_total,discrete_win_rate,mode,avg_length,length_controlled_winrate
gpt-4-turbo-2024-04-09,46.11526538763708,1.474073957743638,370,426,9,805,46.52173913043478,minimal,1802,55.01530093647852
yi-large-preview,57.743214871913054,1.4136557115488118,467,334,4,805,58.26086956521739,community,2317,52.641094572374044
yi-large-preview,57.46724251946292,1.4305696667082746,463,338,4,805,57.7639751552795,verified,2335,51.894415134099546
Storm-7B-num-beams-10,55.39223031175099,1.4432354650537405,451,354,0,805,56.024844720496894,community,2582,51.76986749912786
gpt4_1106_preview_verbose,64.30360147101865,1.3348590089025316,525,268,12,805,65.96273291925466,dev,2402,51.57500797967598
gpt4_1106_preview,50.0,0.0,0,0,805,805,50.0,minimal,2049,50.0
Storm-7B,52.47113499955521,1.4665272219232597,431,374,0,805,53.54037267080746,community,2788,48.90648220146071
Nanbeige-Plus-Chat-v0.1,56.70300973017392,1.482841874951873,456,347,2,805,56.77018633540373,community,2587,44.45966240337981
Qwen1.5-110B-Chat,33.77709527565118,1.3776163153661627,255,545,5,805,31.987577639751553,community,1631,43.905552210786915
Qwen1.5-110B-Chat,33.77709527565118,1.3776163153661627,255,545,5,805,31.987577639751557,community,1631,43.90555221078692
gpt4_1106_preview_concise,22.92019444047205,1.232517714329424,172,622,11,805,22.049689440993788,dev,1136,41.896601591245386
aligner-2b_claude-3-opus-20240229,34.46337362321739,1.314666526302454,225,475,105,805,34.47204968944099,community,1669,41.823071715247664
claude-3-opus-20240229,29.10526953334248,1.3941539442369442,223,579,3,805,27.888198757763977,minimal,1388,40.5095080124761
Expand Down Expand Up @@ -37,7 +37,7 @@ internlm2-chat-20b-ExPO,46.185367468861,1.4638315245977938,375,430,0,805,46.5838
Yi-34B-Chat,29.65994671879504,1.3225712597906096,219,582,4,805,27.45341614906832,verified,2123,27.19054787762733
Starling-LM-7B-beta-ExPO,29.600851847906423,1.3252049542916096,225,580,0,805,27.95031055900621,community,2215,26.411156713811028
Snorkel-Mistral-PairRM-DPO,30.220052700671644,1.3328273012530358,231,572,1,804,28.79353233830846,community,2736,26.39144645733206
tulu-2-dpo-70b-ExPO,22.980619706104974,1.3591734082562228,184,620,1,805,22.919254658385093,community,1738,25.72330817134933
tulu-2-dpo-70b-ExPO,22.98061970610497,1.3591734082562228,184,620,1,805,22.919254658385093,community,1738,25.72330817134933
claude-instant-1.2,16.12739962159006,1.1341036838301686,120,682,3,805,15.093167701863356,community,1112,25.61225902543337
dbrx-instruct,18.44834898407453,1.255388020324377,150,655,0,805,18.633540372670808,verified,1450,25.37544974044448
claude-2.1,15.733506736409938,1.120315865445773,115,688,2,805,14.409937888198757,verified,1096,25.251943886133027
Expand Down
Expand Up @@ -155,4 +155,5 @@ Starling-LM-7B-alpha-ExPO,-1.1551552913433458,0.5427299165644314,-1.568123322810
Starling-LM-7B-beta-ExPO,-0.9995849824567026,0.8173555243885808,-1.2278737751496258
tulu-2-dpo-7b-ExPO,-1.2867594242188669,0.6986013668741516,-2.3831041176798933
tulu-2-dpo-13b-ExPO,-1.6247537554410800,0.6431373083501301,-1.7734311638958129
tulu-2-dpo-70b-ExPO,-1.2584665006823457,0.4518829275713181,-1.1294862478814247
tulu-2-dpo-70b-ExPO,-1.2584665006823457,0.4518829275713181,-1.1294862478814247
yi-large-preview,-1.4636444959266164,0.8454473244820504,0.2120258323902696

0 comments on commit d2dbc22

Please sign in to comment.