ATProto Browser

{
  "text": "FWIW, this line of research seems to have largely been shown to be ineffective for model steering in practice! \n\narxiv.org/abs/2501.17148 from @aryaman.io is my reference but several others have shown similar results!",
  "$type": "app.bsky.feed.post",
  "embed": {
    "$type": "app.bsky.embed.external",
    "external": {
      "uri": "https://arxiv.org/abs/2501.17148",
      "thumb": {
        "$type": "blob",
        "ref": {
          "$link": "bafkreibxbhlzoexmt54v26ybqxwpolrmfdomekb44ll33q3vegy6bzvzfa"
        },
        "mimeType": "image/jpeg",
        "size": 146927
      },
      "title": "AxBench: Steering LLMs? Even Simple Baselines Outperform Sparse Autoencoders",
      "description": "Fine-grained steering of language model outputs is essential for safety and reliability. Prompting and finetuning are widely used to achieve these goals, but interpretability researchers have proposed..."
    }
  },
  "langs": [
    "en"
  ],
  "reply": {
    "root": {
      "cid": "bafyreidn7a7oxe7zz3jvhnejadleof3dtxup2qxbtu2y2j6vislf2olm7e",
      "uri": "at://did:plc:565ebob5f6hw33hjdkxty6qj/app.bsky.feed.post/3logv43nbuk2v"
    },
    "parent": {
      "cid": "bafyreihdyi5rndtpnhdd5swbm2qnnzfy4mssyv2nk42bn5onfh7xpiex54",
      "uri": "at://did:plc:565ebob5f6hw33hjdkxty6qj/app.bsky.feed.post/3logv453w2k2v"
    }
  },
  "facets": [
    {
      "index": {
        "byteEnd": 137,
        "byteStart": 113
      },
      "features": [
        {
          "uri": "https://arxiv.org/abs/2501.17148",
          "$type": "app.bsky.richtext.facet#link"
        }
      ]
    },
    {
      "$type": "app.bsky.richtext.facet",
      "index": {
        "byteEnd": 154,
        "byteStart": 143
      },
      "features": [
        {
          "did": "did:plc:x2zf77xvxo6ppl73mhhk3ihm",
          "$type": "app.bsky.richtext.facet#mention"
        }
      ]
    }
  ],
  "createdAt": "2025-05-05T18:15:19.083Z"
}
Post

In reply to 3logv453w2k2v