Experimental browser for the Atmosphere
Next-token prediction with the logarithmic loss is the basis of language model pre-training, but a popular criticism is that it can suffer from error amplification, where generation quality degrades as sequence length $H$ increases due to compounding errors. 2/11
Feb 23, 2025, 9:00 PM
{ "uri": "at://did:plc:x2a3inabvfsn4wntrlbbndrv/app.bsky.feed.post/3liuq3lhrwk2e", "cid": "bafyreicgwb7ryagtjwbrsijvia6g7xbevbaoz4tr6r2hqcqdp2rvvv7smq", "value": { "text": "Next-token prediction with the logarithmic loss is the basis of language model pre-training, but a popular criticism is that it can suffer from error amplification, where generation quality degrades as sequence length $H$ increases due to compounding errors. \n\n2/11", "$type": "app.bsky.feed.post", "embed": { "$type": "app.bsky.embed.images", "images": [ { "alt": "", "image": { "$type": "blob", "ref": { "$link": "bafkreibca7utenbabgipwaug7rl3k47omlhgfnj2o37ptgged5mqdn4c3e" }, "mimeType": "image/jpeg", "size": 220694 }, "aspectRatio": { "width": 800, "height": 457 } }, { "alt": "", "image": { "$type": "blob", "ref": { "$link": "bafkreib3kg3425vuvwwsjy7vbxrfisken5y2oeudro4tx6m7a56ogbiybe" }, "mimeType": "image/jpeg", "size": 314688 }, "aspectRatio": { "width": 1999, "height": 470 } } ] }, "langs": [ "en" ], "reply": { "root": { "cid": "bafyreids3t5hbin5gzohv5ftcospo6v4izvleqfb2uzygkbc7dok3yncuq", "uri": "at://did:plc:x2a3inabvfsn4wntrlbbndrv/app.bsky.feed.post/3liuq3jtnxk2e" }, "parent": { "cid": "bafyreids3t5hbin5gzohv5ftcospo6v4izvleqfb2uzygkbc7dok3yncuq", "uri": "at://did:plc:x2a3inabvfsn4wntrlbbndrv/app.bsky.feed.post/3liuq3jtnxk2e" } }, "createdAt": "2025-02-23T21:00:20.781Z" } }