Experimental browser for the Atmosphere
my takeaway from Vighnesh's writeup: google's main argument about under-training/lack of pre-training isn't super convincing. convergence of train loss is not always desirable. early stopping can help, and their own paper shows mixed results on importance of pre-training
Dec 1, 2024, 9:03 AM
{ "uri": "at://did:plc:aesb2dqqqmodtvf24fs75y4r/app.bsky.feed.post/3lcaauro7wk2s", "cid": "bafyreiaqewpmmx2dcyt4sjstdmsgzyozrhtgd3dntwjxjhmtetoluwh72y", "value": { "text": "my takeaway from Vighnesh's writeup: google's main argument about under-training/lack of pre-training isn't super convincing. convergence of train loss is not always desirable. early stopping can help, and their own paper shows mixed results on importance of pre-training", "$type": "app.bsky.feed.post", "embed": { "$type": "app.bsky.embed.images", "images": [ { "alt": "", "image": { "$type": "blob", "ref": { "$link": "bafkreiak4lvojaoqukvo4xrn2rbkxddv5gikood2rw3fubexm22bv2rjja" }, "mimeType": "image/jpeg", "size": 579869 }, "aspectRatio": { "width": 2000, "height": 994 } } ] }, "langs": [ "en" ], "reply": { "root": { "cid": "bafyreicawsyrlpev7l7gaok4qjk5bleuqur3lw2y2ofasi4cjr4o6vrt2q", "uri": "at://did:plc:aesb2dqqqmodtvf24fs75y4r/app.bsky.feed.post/3lca7ue5qvc2s" }, "parent": { "cid": "bafyreicawsyrlpev7l7gaok4qjk5bleuqur3lw2y2ofasi4cjr4o6vrt2q", "uri": "at://did:plc:aesb2dqqqmodtvf24fs75y4r/app.bsky.feed.post/3lca7ue5qvc2s" } }, "createdAt": "2024-12-01T09:03:43.446Z" } }