Experimental browser for the Atmosphere
Main result #3. Insufficiency of training-time interventions. We show that the use of inference-time compute above is fundamental, in the sense that purely training time interventions (e.g., modifications to DPO) that produce proper policies cannot achieve similar guarantees in polynomial time. 15/
Mar 27, 2025, 5:28 PM
{ "uri": "at://did:plc:x2a3inabvfsn4wntrlbbndrv/app.bsky.feed.post/3llet5ubujf2c", "cid": "bafyreiahice3ju7iv3q2uqg3ys6w6eddb3dfuiesesicpqgssqldnbq2iu", "value": { "text": "Main result #3. Insufficiency of training-time interventions. We show that the use of inference-time compute above is fundamental, in the sense that purely training time interventions (e.g., modifications to DPO) that produce proper policies cannot achieve similar guarantees in polynomial time.\n\n15/", "$type": "app.bsky.feed.post", "embed": { "$type": "app.bsky.embed.images", "images": [ { "alt": "", "image": { "$type": "blob", "ref": { "$link": "bafkreihanps7vvryfe36yilzrouplo3nvtcrhbakjwtc4uecmckt5qk2ye" }, "mimeType": "image/jpeg", "size": 182016 }, "aspectRatio": { "width": 1160, "height": 250 } } ] }, "langs": [ "en" ], "reply": { "root": { "cid": "bafyreidzv7o2gllewwfqqi4ixremjopxmuyntc5gstydqektrxkcjni4pi", "uri": "at://did:plc:x2a3inabvfsn4wntrlbbndrv/app.bsky.feed.post/3llet5p66ac2c" }, "parent": { "cid": "bafyreigqmwhzfk5nz62ncjh7rpgjdwxgmtcwld6ozms624nbmsdzuwx5ym", "uri": "at://did:plc:x2a3inabvfsn4wntrlbbndrv/app.bsky.feed.post/3llet5ubskv2c" } }, "createdAt": "2025-03-27T17:28:13.784Z" } }