Experimental browser for the Atmosphere
Inference-time computation can boost language model performance, but scaling naively—like with Best-of-N sampling (selecting the top-scoring response from multiple candidates with a reward model)—can actually degrade results through reward hacking (eg, this fig from arxiv.org/abs/2210.10760). 2/11
May 3, 2025, 5:40 PM
{ "uri": "at://did:plc:x2a3inabvfsn4wntrlbbndrv/app.bsky.feed.post/3lobv4ecsl22d", "cid": "bafyreidtqsd3leeprtf22li2xtwmtenzimovupduyixcicvsq3ksofhs44", "value": { "text": "Inference-time computation can boost language model performance, but scaling naively—like with Best-of-N sampling (selecting the top-scoring response from multiple candidates with a reward model)—can actually degrade results through reward hacking (eg, this fig from arxiv.org/abs/2210.10760).\n\n2/11", "$type": "app.bsky.feed.post", "embed": { "$type": "app.bsky.embed.images", "images": [ { "alt": "", "image": { "$type": "blob", "ref": { "$link": "bafkreifvajumenlmrurnytp6br4lqbthlnbm43udhg3wawkuvohgflq4dm" }, "mimeType": "image/jpeg", "size": 251422 }, "aspectRatio": { "width": 984, "height": 710 } } ] }, "langs": [ "en" ], "reply": { "root": { "cid": "bafyreih6mvnxmoz4bgad7vcbv2fl63qhwwggtht5lpckbzj7yexbnz2qie", "uri": "at://did:plc:x2a3inabvfsn4wntrlbbndrv/app.bsky.feed.post/3lobv4byuec2d" }, "parent": { "cid": "bafyreih6mvnxmoz4bgad7vcbv2fl63qhwwggtht5lpckbzj7yexbnz2qie", "uri": "at://did:plc:x2a3inabvfsn4wntrlbbndrv/app.bsky.feed.post/3lobv4byuec2d" } }, "facets": [ { "index": { "byteEnd": 295, "byteStart": 271 }, "features": [ { "uri": "https://arxiv.org/abs/2210.10760", "$type": "app.bsky.richtext.facet#link" } ] } ], "createdAt": "2025-05-03T17:40:49.560Z" } }