ATProto Browser

ATProto Browser

Experimental browser for the Atmosphere

Post

4. Efficiently Scaling Transformer Inference (Pope et al., 2022) An amazing paper about inference optimization for transformers. It provides a guideline to optimize for different aspects, e.g. lowest possible latency, highest possible throughput, or longest context length: arxiv.org/abs/2211.05102

Dec 13, 2024, 4:05 AM

Record data

{
  "uri": "at://did:plc:xdnifsedajxdcxn7wui6nxev/app.bsky.feed.post/3ld5vtdbm5c2w",
  "cid": "bafyreifghrwc64xdsjqwgojra3k63rssinmowrks3hds2ahm4tbmvmarsi",
  "value": {
    "text": "4. Efficiently Scaling Transformer Inference (Pope et al., 2022)\n\nAn amazing paper about inference optimization for transformers. It provides a guideline to optimize for different aspects, e.g. lowest possible latency, highest possible throughput, or longest context length: arxiv.org/abs/2211.05102",
    "$type": "app.bsky.feed.post",
    "embed": {
      "$type": "app.bsky.embed.external",
      "external": {
        "uri": "https://arxiv.org/abs/2211.05102",
        "thumb": {
          "$type": "blob",
          "ref": {
            "$link": "bafkreibdnoxxy325kte7feolq77arkvjvgsnmsmzwmsr4spghh3jug5cpy"
          },
          "mimeType": "image/jpeg",
          "size": 146963
        },
        "title": "Efficiently Scaling Transformer Inference",
        "description": "We study the problem of efficient generative inference for Transformer models, in one of its most challenging settings: large deep models, with tight latency targets and long sequence lengths. Better ..."
      }
    },
    "langs": [
      "en"
    ],
    "reply": {
      "root": {
        "cid": "bafyreiboicjtmoshf6b7i536sckhbylx3qronji5udz5woj3nwoqne76dy",
        "uri": "at://did:plc:xdnifsedajxdcxn7wui6nxev/app.bsky.feed.post/3ld5vjdpcwk2w"
      },
      "parent": {
        "cid": "bafyreiddssda44pszffilcf2bf3qefkd7l7bgfvur4upthwbv46f67w2we",
        "uri": "at://did:plc:xdnifsedajxdcxn7wui6nxev/app.bsky.feed.post/3ld5vtctuos2w"
      }
    },
    "facets": [
      {
        "index": {
          "byteEnd": 299,
          "byteStart": 275
        },
        "features": [
          {
            "uri": "https://arxiv.org/abs/2211.05102",
            "$type": "app.bsky.richtext.facet#link"
          }
        ]
      }
    ],
    "createdAt": "2024-12-13T04:05:53.196Z"
  }
}