ATProto Browser

Record data

{
  "uri": "at://did:plc:x2a3inabvfsn4wntrlbbndrv/app.bsky.feed.post/3llet5rprks2c",
  "cid": "bafyreifj4k4kbrds6km4y6j53lew2qc3v25k7alsawjobfujsexxf7gp4u",
  "value": {
    "text": "PPO/GRPO/etc have no explicit mechanism to promote novelty or diversity, and there is evidence that current approaches may only be amplifying/reinforcing behavior already present in the base model (see e.g., arxiv.org/abs/2503.01307).\n\n4/",
    "$type": "app.bsky.feed.post",
    "embed": {
      "$type": "app.bsky.embed.external",
      "external": {
        "uri": "https://arxiv.org/abs/2503.01307",
        "thumb": {
          "$type": "blob",
          "ref": {
            "$link": "bafkreieihf6p54kf7b4suuiy5kov6yxwlvjtrvyvynqbgqzn3jfcqy2b6q"
          },
          "mimeType": "image/jpeg",
          "size": 146959
        },
        "title": "Cognitive Behaviors that Enable Self-Improving Reasoners, or, Four Habits of Highly Effective STaRs",
        "description": "Test-time inference has emerged as a powerful paradigm for enabling language models to ``think'' longer and more carefully about complex challenges, much like skilled human experts. While reinforcemen..."
      }
    },
    "langs": [
      "en"
    ],
    "reply": {
      "root": {
        "cid": "bafyreidzv7o2gllewwfqqi4ixremjopxmuyntc5gstydqektrxkcjni4pi",
        "uri": "at://did:plc:x2a3inabvfsn4wntrlbbndrv/app.bsky.feed.post/3llet5p66ac2c"
      },
      "parent": {
        "cid": "bafyreic75xgy7hzxqmgdsmuqe4fdbiw43jqgr72l3vkbpupww6ecnpgvha",
        "uri": "at://did:plc:x2a3inabvfsn4wntrlbbndrv/app.bsky.feed.post/3llet5rpqlk2c"
      }
    },
    "facets": [
      {
        "index": {
          "byteEnd": 232,
          "byteStart": 208
        },
        "features": [
          {
            "uri": "https://arxiv.org/abs/2503.01307",
            "$type": "app.bsky.richtext.facet#link"
          }
        ]
      }
    ],
    "createdAt": "2025-03-27T17:28:13.773Z"
  }
}
Post

In reply to 3llet5rpqlk2c

Record data