ATProto Browser

ATProto Browser

Experimental browser for the Atmosphere

Post

We scaled training data attribution (TDA) methods ~1000x to find influential pretraining examples for thousands of queries in an 8B-parameter LLM over the entire 160B-token C4 corpus! medium.com/people-ai-re...

Dec 13, 2024, 6:57 PM

Record data

{
  "uri": "at://did:plc:u4swdfntdfizkswnejceoi2y/app.bsky.feed.post/3ld7hmptt2s2a",
  "cid": "bafyreiclbc63wx6wfj2mnxra2deepcdlvsrvlsrow3rygn2miwgvdd6xqu",
  "value": {
    "text": "We scaled training data attribution (TDA) methods ~1000x to find influential pretraining examples for thousands of queries in an 8B-parameter LLM over the entire 160B-token C4 corpus!\nmedium.com/people-ai-re...",
    "$type": "app.bsky.feed.post",
    "embed": {
      "$type": "app.bsky.embed.images",
      "images": [
        {
          "alt": "",
          "image": {
            "$type": "blob",
            "ref": {
              "$link": "bafkreics6ujprixgffniysm36dxj7utqtkwyqt4hyhnieep62rwj6hlve4"
            },
            "mimeType": "image/jpeg",
            "size": 213260
          },
          "aspectRatio": {
            "width": 1180,
            "height": 574
          }
        }
      ]
    },
    "langs": [
      "en"
    ],
    "facets": [
      {
        "index": {
          "byteEnd": 210,
          "byteStart": 184
        },
        "features": [
          {
            "uri": "https://medium.com/people-ai-research/scaling-training-data-attribution-f7d1eddd85da",
            "$type": "app.bsky.richtext.facet#link"
          }
        ]
      }
    ],
    "createdAt": "2024-12-13T18:57:01.182Z"
  }
}