ATProto Browser

ATProto Browser

Experimental browser for the Atmosphere

Post

Main result #2: Inference-time exploration. We introduce a new RL algorithm, SpannerSampling, which uses active exploration to achieve optimal data efficiency irrespective of whether the base model has good coverage. 13/

Mar 27, 2025, 5:28 PM

{
  "text": "Main result #2: Inference-time exploration. We introduce a new RL algorithm, SpannerSampling, which uses active exploration to achieve optimal data efficiency irrespective of whether the base model has good coverage. \n\n13/",
  "$type": "app.bsky.feed.post",
  "embed": {
    "$type": "app.bsky.embed.images",
    "images": [
      {
        "alt": "",
        "image": {
          "$type": "blob",
          "ref": {
            "$link": "bafkreihlw524k3y7pkbuk2pic6ivjsucoy3toygbprgjppjqxt5fcfckxu"
          },
          "mimeType": "image/jpeg",
          "size": 145122
        },
        "aspectRatio": {
          "width": 1254,
          "height": 246
        }
      }
    ]
  },
  "langs": [
    "en"
  ],
  "reply": {
    "root": {
      "cid": "bafyreidzv7o2gllewwfqqi4ixremjopxmuyntc5gstydqektrxkcjni4pi",
      "uri": "at://did:plc:x2a3inabvfsn4wntrlbbndrv/app.bsky.feed.post/3llet5p66ac2c"
    },
    "parent": {
      "cid": "bafyreigb3z73k5zfsetrzgbpi4hyjiurv3ddejmf5btoiy4mpqtfnop7hu",
      "uri": "at://did:plc:x2a3inabvfsn4wntrlbbndrv/app.bsky.feed.post/3llet5st6ev2c"
    }
  },
  "createdAt": "2025-03-27T17:28:13.782Z"
}