Experimental browser for the Atmosphere
Many have highlighted automation of AI R&D by AI agents as a key capability to monitor for when scaling/deploying frontier ML systems. However, existing evals tend to focus on short, narrow tasks and lack direct comparisons with human experts.
Nov 25, 2024, 7:42 PM
{ "uri": "at://did:plc:dll3hepzq76nymel5c3yt6nk/app.bsky.feed.post/3lbsbrq52d22b", "cid": "bafyreic5uu5vf57bfigz2g2cfrsfico5wy6acycwt2iu7bkj3v6okju5ta", "value": { "text": "Many have highlighted automation of AI R&D by AI agents as a key capability to monitor for when scaling/deploying frontier ML systems. However, existing evals tend to focus on short, narrow tasks and lack direct comparisons with human experts.", "$type": "app.bsky.feed.post", "embed": { "$type": "app.bsky.embed.images", "images": [ { "alt": "", "image": { "$type": "blob", "ref": { "$link": "bafkreiacsfm7tfmg7xbiw5rd3dqhfagobmhgobwvedigm4d35fbfvvc6ny" }, "mimeType": "image/jpeg", "size": 148723 }, "aspectRatio": { "width": 1394, "height": 546 } } ] }, "langs": [ "en" ], "reply": { "root": { "cid": "bafyreiewghwpltsxrvzxb4pehqb2a4prnn5wee34pxbfkj3xmdrccvdyau", "uri": "at://did:plc:dll3hepzq76nymel5c3yt6nk/app.bsky.feed.post/3lbsbrpmg3s2b" }, "parent": { "cid": "bafyreiewghwpltsxrvzxb4pehqb2a4prnn5wee34pxbfkj3xmdrccvdyau", "uri": "at://did:plc:dll3hepzq76nymel5c3yt6nk/app.bsky.feed.post/3lbsbrpmg3s2b" } }, "createdAt": "2024-11-25T19:42:38.035Z" } }