Experimental browser for the Atmosphere
The tasks in RE-Bench aim to cover a wide variety of skills required for AI R&D and enable apples-to-apples comparisons between humans and AI agents, while also being feasible for human experts given ≤8 hours and reasonable amounts of compute.
Nov 25, 2024, 7:42 PM
{ "uri": "at://did:plc:dll3hepzq76nymel5c3yt6nk/app.bsky.feed.post/3lbsbrrjfbc2b", "cid": "bafyreie4xnq36ka4jnux2poaxzs6aoivtxyam7nkzzztfccscskjvuagwa", "value": { "text": "The tasks in RE-Bench aim to cover a wide variety of skills required for AI R&D and enable apples-to-apples comparisons between humans and AI agents, while also being feasible for human experts given ≤8 hours and reasonable amounts of compute.", "$type": "app.bsky.feed.post", "embed": { "$type": "app.bsky.embed.images", "images": [ { "alt": "", "image": { "$type": "blob", "ref": { "$link": "bafkreifg55th2es6okxfgfwrnaplqlsgs6alvbnri3oba6kn2xjl25lksi" }, "mimeType": "image/jpeg", "size": 476366 }, "aspectRatio": { "width": 1164, "height": 1202 } } ] }, "langs": [ "en" ], "reply": { "root": { "cid": "bafyreiewghwpltsxrvzxb4pehqb2a4prnn5wee34pxbfkj3xmdrccvdyau", "uri": "at://did:plc:dll3hepzq76nymel5c3yt6nk/app.bsky.feed.post/3lbsbrpmg3s2b" }, "parent": { "cid": "bafyreic5uu5vf57bfigz2g2cfrsfico5wy6acycwt2iu7bkj3v6okju5ta", "uri": "at://did:plc:dll3hepzq76nymel5c3yt6nk/app.bsky.feed.post/3lbsbrq52d22b" } }, "createdAt": "2024-11-25T19:42:38.036Z" } }