ATProto Browser

Record data

{
  "uri": "at://did:plc:x2a3inabvfsn4wntrlbbndrv/app.bsky.feed.post/3lobv4g2sks2d",
  "cid": "bafyreiguqwkqbexp5hkayqbacusyerepe7fzs5u7oy7qidc2mt37t5cscy",
  "value": {
    "text": "ITP implements the principle of pessimism in the face of uncertainty through a rejection sampling scheme, and can be viewed as a purely inference-time counterpart to our χPO work here: arxiv.org/abs/2407.13399.\n\n7/11",
    "$type": "app.bsky.feed.post",
    "embed": {
      "$type": "app.bsky.embed.external",
      "external": {
        "uri": "https://arxiv.org/abs/2407.13399",
        "thumb": {
          "$type": "blob",
          "ref": {
            "$link": "bafkreieihf6p54kf7b4suuiy5kov6yxwlvjtrvyvynqbgqzn3jfcqy2b6q"
          },
          "mimeType": "image/jpeg",
          "size": 146959
        },
        "title": "Correcting the Mythos of KL-Regularization: Direct Alignment without Overoptimization via Chi-Squared Preference Optimization",
        "description": "Language model alignment methods such as reinforcement learning from human feedback (RLHF) have led to impressive advances in language model capabilities, but are limited by a widely observed phenomen..."
      }
    },
    "langs": [
      "en"
    ],
    "reply": {
      "root": {
        "cid": "bafyreih6mvnxmoz4bgad7vcbv2fl63qhwwggtht5lpckbzj7yexbnz2qie",
        "uri": "at://did:plc:x2a3inabvfsn4wntrlbbndrv/app.bsky.feed.post/3lobv4byuec2d"
      },
      "parent": {
        "cid": "bafyreic4egovi7uuzdkg3bifql6kkbujfzq5wbun2ukxzkhkw3fki2bbau",
        "uri": "at://did:plc:x2a3inabvfsn4wntrlbbndrv/app.bsky.feed.post/3lobv4g2qmc2d"
      }
    },
    "facets": [
      {
        "index": {
          "byteEnd": 210,
          "byteStart": 186
        },
        "features": [
          {
            "uri": "https://arxiv.org/abs/2407.13399",
            "$type": "app.bsky.richtext.facet#link"
          }
        ]
      }
    ],
    "createdAt": "2025-05-03T17:40:49.565Z"
  }
}
Post

In reply to 3lobv4g2qmc2d

Record data