Experimental browser for the Atmosphere
Main result #2: Inference-time exploration. We introduce a new RL algorithm, SpannerSampling, which uses active exploration to achieve optimal data efficiency irrespective of whether the base model has good coverage. 13/
Mar 27, 2025, 5:28 PM
{ "uri": "at://did:plc:x2a3inabvfsn4wntrlbbndrv/app.bsky.feed.post/3llet5tijxv2c", "cid": "bafyreigvmern4j6kusio433p62rmtud3ipeec4pwrxirigsxeshizyinl4", "value": { "text": "Main result #2: Inference-time exploration. We introduce a new RL algorithm, SpannerSampling, which uses active exploration to achieve optimal data efficiency irrespective of whether the base model has good coverage. \n\n13/", "$type": "app.bsky.feed.post", "embed": { "$type": "app.bsky.embed.images", "images": [ { "alt": "", "image": { "$type": "blob", "ref": { "$link": "bafkreihlw524k3y7pkbuk2pic6ivjsucoy3toygbprgjppjqxt5fcfckxu" }, "mimeType": "image/jpeg", "size": 145122 }, "aspectRatio": { "width": 1254, "height": 246 } } ] }, "langs": [ "en" ], "reply": { "root": { "cid": "bafyreidzv7o2gllewwfqqi4ixremjopxmuyntc5gstydqektrxkcjni4pi", "uri": "at://did:plc:x2a3inabvfsn4wntrlbbndrv/app.bsky.feed.post/3llet5p66ac2c" }, "parent": { "cid": "bafyreigb3z73k5zfsetrzgbpi4hyjiurv3ddejmf5btoiy4mpqtfnop7hu", "uri": "at://did:plc:x2a3inabvfsn4wntrlbbndrv/app.bsky.feed.post/3llet5st6ev2c" } }, "createdAt": "2025-03-27T17:28:13.782Z" } }