{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 2.73972602739726,
  "eval_steps": 500,
  "global_step": 200,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "debug/num_lat_loss": 1603.0,
      "debug/num_lat_total": 2294.0,
      "debug/num_tok_loss": 1603.0,
      "debug/num_tok_total": 2294.0,
      "epoch": 0,
      "step": 0
    },
    {
      "epoch": 0,
      "step": 0,
      "train/ce_loss": 23.25,
      "train/diffusion_loss": 0.5052313208580017
    },
    {
      "epoch": 0,
      "step": 0,
      "train/learning_rate_real": 0.0
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2404.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2404.0,
      "epoch": 0,
      "step": 0
    },
    {
      "epoch": 0,
      "step": 0,
      "train/ce_loss": 23.375,
      "train/diffusion_loss": 0.5152708888053894
    },
    {
      "epoch": 0,
      "step": 0,
      "train/learning_rate_real": 0.0
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2651.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2651.0,
      "epoch": 0,
      "step": 0
    },
    {
      "epoch": 0,
      "step": 0,
      "train/ce_loss": 23.75,
      "train/diffusion_loss": 0.49939244985580444
    },
    {
      "epoch": 0,
      "step": 0,
      "train/learning_rate_real": 0.0
    },
    {
      "debug/num_lat_loss": 1809.0,
      "debug/num_lat_total": 2892.0,
      "debug/num_tok_loss": 1809.0,
      "debug/num_tok_total": 2892.0,
      "epoch": 0,
      "step": 0
    },
    {
      "epoch": 0,
      "step": 0,
      "train/ce_loss": 23.5,
      "train/diffusion_loss": 0.4265933632850647
    },
    {
      "epoch": 0,
      "step": 0,
      "train/learning_rate_real": 0.0
    },
    {
      "debug/num_lat_loss": 1753.0,
      "debug/num_lat_total": 3242.0,
      "debug/num_tok_loss": 1753.0,
      "debug/num_tok_total": 3242.0,
      "epoch": 0.0136986301369863,
      "step": 1
    },
    {
      "epoch": 0.0136986301369863,
      "step": 1,
      "train/ce_loss": 23.625,
      "train/diffusion_loss": 0.35353660583496094
    },
    {
      "epoch": 0.0136986301369863,
      "step": 1,
      "train/learning_rate_real": 1.1363636363636364e-06
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2013.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2013.0,
      "epoch": 0.0136986301369863,
      "step": 1
    },
    {
      "epoch": 0.0136986301369863,
      "step": 1,
      "train/ce_loss": 23.625,
      "train/diffusion_loss": 0.5851874947547913
    },
    {
      "epoch": 0.0136986301369863,
      "step": 1,
      "train/learning_rate_real": 1.1363636363636364e-06
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2652.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2652.0,
      "epoch": 0.0136986301369863,
      "step": 1
    },
    {
      "epoch": 0.0136986301369863,
      "step": 1,
      "train/ce_loss": 23.625,
      "train/diffusion_loss": 0.49107739329338074
    },
    {
      "epoch": 0.0136986301369863,
      "step": 1,
      "train/learning_rate_real": 1.1363636363636364e-06
    },
    {
      "debug/num_lat_loss": 1818.0,
      "debug/num_lat_total": 2911.0,
      "debug/num_tok_loss": 1818.0,
      "debug/num_tok_total": 2911.0,
      "epoch": 0.0136986301369863,
      "step": 1
    },
    {
      "epoch": 0.0136986301369863,
      "step": 1,
      "train/ce_loss": 23.625,
      "train/diffusion_loss": 0.461406946182251
    },
    {
      "epoch": 0.0136986301369863,
      "step": 1,
      "train/learning_rate_real": 1.1363636363636364e-06
    },
    {
      "debug/num_lat_loss": 1642.0,
      "debug/num_lat_total": 2128.0,
      "debug/num_tok_loss": 1642.0,
      "debug/num_tok_total": 2128.0,
      "epoch": 0.0273972602739726,
      "step": 2
    },
    {
      "epoch": 0.0273972602739726,
      "step": 2,
      "train/ce_loss": 23.375,
      "train/diffusion_loss": 0.5282515287399292
    },
    {
      "epoch": 0.0273972602739726,
      "step": 2,
      "train/learning_rate_real": 2.2727272727272728e-06
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2405.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2405.0,
      "epoch": 0.0273972602739726,
      "step": 2
    },
    {
      "epoch": 0.0273972602739726,
      "step": 2,
      "train/ce_loss": 23.5,
      "train/diffusion_loss": 0.5025462508201599
    },
    {
      "epoch": 0.0273972602739726,
      "step": 2,
      "train/learning_rate_real": 2.2727272727272728e-06
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2867.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2867.0,
      "epoch": 0.0273972602739726,
      "step": 2
    },
    {
      "epoch": 0.0273972602739726,
      "step": 2,
      "train/ce_loss": 23.125,
      "train/diffusion_loss": 0.42426061630249023
    },
    {
      "epoch": 0.0273972602739726,
      "step": 2,
      "train/learning_rate_real": 2.2727272727272728e-06
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2234.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2234.0,
      "epoch": 0.0273972602739726,
      "step": 2
    },
    {
      "epoch": 0.0273972602739726,
      "step": 2,
      "train/ce_loss": 24.125,
      "train/diffusion_loss": 0.5727062225341797
    },
    {
      "epoch": 0.0273972602739726,
      "step": 2,
      "train/learning_rate_real": 2.2727272727272728e-06
    },
    {
      "debug/num_lat_loss": 1648.0,
      "debug/num_lat_total": 3015.0,
      "debug/num_tok_loss": 1648.0,
      "debug/num_tok_total": 3015.0,
      "epoch": 0.0410958904109589,
      "step": 3
    },
    {
      "epoch": 0.0410958904109589,
      "step": 3,
      "train/ce_loss": 23.625,
      "train/diffusion_loss": 0.3512691855430603
    },
    {
      "epoch": 0.0410958904109589,
      "step": 3,
      "train/learning_rate_real": 3.409090909090909e-06
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2399.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2399.0,
      "epoch": 0.0410958904109589,
      "step": 3
    },
    {
      "epoch": 0.0410958904109589,
      "step": 3,
      "train/ce_loss": 23.875,
      "train/diffusion_loss": 0.48421576619148254
    },
    {
      "epoch": 0.0410958904109589,
      "step": 3,
      "train/learning_rate_real": 3.409090909090909e-06
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2825.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2825.0,
      "epoch": 0.0410958904109589,
      "step": 3
    },
    {
      "epoch": 0.0410958904109589,
      "step": 3,
      "train/ce_loss": 23.75,
      "train/diffusion_loss": 0.4260019361972809
    },
    {
      "epoch": 0.0410958904109589,
      "step": 3,
      "train/learning_rate_real": 3.409090909090909e-06
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2866.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2866.0,
      "epoch": 0.0410958904109589,
      "step": 3
    },
    {
      "epoch": 0.0410958904109589,
      "step": 3,
      "train/ce_loss": 23.625,
      "train/diffusion_loss": 0.42226922512054443
    },
    {
      "epoch": 0.0410958904109589,
      "step": 3,
      "train/learning_rate_real": 3.409090909090909e-06
    },
    {
      "debug/num_lat_loss": 1633.0,
      "debug/num_lat_total": 2354.0,
      "debug/num_tok_loss": 1633.0,
      "debug/num_tok_total": 2354.0,
      "epoch": 0.0547945205479452,
      "step": 4
    },
    {
      "epoch": 0.0547945205479452,
      "step": 4,
      "train/ce_loss": 23.25,
      "train/diffusion_loss": 0.4710257947444916
    },
    {
      "epoch": 0.0547945205479452,
      "step": 4,
      "train/learning_rate_real": 4.5454545454545455e-06
    },
    {
      "debug/num_lat_loss": 1811.0,
      "debug/num_lat_total": 2670.0,
      "debug/num_tok_loss": 1811.0,
      "debug/num_tok_total": 2670.0,
      "epoch": 0.0547945205479452,
      "step": 4
    },
    {
      "epoch": 0.0547945205479452,
      "step": 4,
      "train/ce_loss": 24.0,
      "train/diffusion_loss": 0.4669632911682129
    },
    {
      "epoch": 0.0547945205479452,
      "step": 4,
      "train/learning_rate_real": 4.5454545454545455e-06
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2660.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2660.0,
      "epoch": 0.0547945205479452,
      "step": 4
    },
    {
      "epoch": 0.0547945205479452,
      "step": 4,
      "train/ce_loss": 23.75,
      "train/diffusion_loss": 0.4852781295776367
    },
    {
      "epoch": 0.0547945205479452,
      "step": 4,
      "train/learning_rate_real": 4.5454545454545455e-06
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2247.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2247.0,
      "epoch": 0.0547945205479452,
      "step": 4
    },
    {
      "epoch": 0.0547945205479452,
      "step": 4,
      "train/ce_loss": 23.5,
      "train/diffusion_loss": 0.5597466826438904
    },
    {
      "epoch": 0.0547945205479452,
      "step": 4,
      "train/learning_rate_real": 4.5454545454545455e-06
    },
    {
      "debug/num_lat_loss": 1755.0,
      "debug/num_lat_total": 2583.0,
      "debug/num_tok_loss": 1755.0,
      "debug/num_tok_total": 2583.0,
      "epoch": 0.0684931506849315,
      "step": 5
    },
    {
      "epoch": 0.0684931506849315,
      "step": 5,
      "train/ce_loss": 23.625,
      "train/diffusion_loss": 0.48908230662345886
    },
    {
      "epoch": 0.0684931506849315,
      "step": 5,
      "train/learning_rate_real": 5.681818181818182e-06
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2606.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2606.0,
      "epoch": 0.0684931506849315,
      "step": 5
    },
    {
      "epoch": 0.0684931506849315,
      "step": 5,
      "train/ce_loss": 23.375,
      "train/diffusion_loss": 0.4757000505924225
    },
    {
      "epoch": 0.0684931506849315,
      "step": 5,
      "train/learning_rate_real": 5.681818181818182e-06
    },
    {
      "debug/num_lat_loss": 1797.0,
      "debug/num_lat_total": 2459.0,
      "debug/num_tok_loss": 1797.0,
      "debug/num_tok_total": 2459.0,
      "epoch": 0.0684931506849315,
      "step": 5
    },
    {
      "epoch": 0.0684931506849315,
      "step": 5,
      "train/ce_loss": 23.75,
      "train/diffusion_loss": 0.5041497945785522
    },
    {
      "epoch": 0.0684931506849315,
      "step": 5,
      "train/learning_rate_real": 5.681818181818182e-06
    },
    {
      "debug/num_lat_loss": 1617.0,
      "debug/num_lat_total": 1823.0,
      "debug/num_tok_loss": 1617.0,
      "debug/num_tok_total": 1823.0,
      "epoch": 0.0684931506849315,
      "step": 5
    },
    {
      "epoch": 0.0684931506849315,
      "step": 5,
      "train/ce_loss": 23.625,
      "train/diffusion_loss": 0.5805519223213196
    },
    {
      "epoch": 0.0684931506849315,
      "step": 5,
      "train/learning_rate_real": 5.681818181818182e-06
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2836.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2836.0,
      "epoch": 0.0821917808219178,
      "step": 6
    },
    {
      "epoch": 0.0821917808219178,
      "step": 6,
      "train/ce_loss": 23.25,
      "train/diffusion_loss": 0.4666789472103119
    },
    {
      "epoch": 0.0821917808219178,
      "step": 6,
      "train/learning_rate_real": 6.818181818181818e-06
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2437.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2437.0,
      "epoch": 0.0821917808219178,
      "step": 6
    },
    {
      "epoch": 0.0821917808219178,
      "step": 6,
      "train/ce_loss": 23.5,
      "train/diffusion_loss": 0.5358713865280151
    },
    {
      "epoch": 0.0821917808219178,
      "step": 6,
      "train/learning_rate_real": 6.818181818181818e-06
    },
    {
      "debug/num_lat_loss": 1634.0,
      "debug/num_lat_total": 2344.0,
      "debug/num_tok_loss": 1634.0,
      "debug/num_tok_total": 2344.0,
      "epoch": 0.0821917808219178,
      "step": 6
    },
    {
      "epoch": 0.0821917808219178,
      "step": 6,
      "train/ce_loss": 23.25,
      "train/diffusion_loss": 0.4909515976905823
    },
    {
      "epoch": 0.0821917808219178,
      "step": 6,
      "train/learning_rate_real": 6.818181818181818e-06
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 3088.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 3088.0,
      "epoch": 0.0821917808219178,
      "step": 6
    },
    {
      "epoch": 0.0821917808219178,
      "step": 6,
      "train/ce_loss": 23.5,
      "train/diffusion_loss": 0.3803665339946747
    },
    {
      "epoch": 0.0821917808219178,
      "step": 6,
      "train/learning_rate_real": 6.818181818181818e-06
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2623.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2623.0,
      "epoch": 0.0958904109589041,
      "step": 7
    },
    {
      "epoch": 0.0958904109589041,
      "step": 7,
      "train/ce_loss": 23.25,
      "train/diffusion_loss": 0.4644874334335327
    },
    {
      "epoch": 0.0958904109589041,
      "step": 7,
      "train/learning_rate_real": 7.954545454545455e-06
    },
    {
      "debug/num_lat_loss": 1765.0,
      "debug/num_lat_total": 2829.0,
      "debug/num_tok_loss": 1765.0,
      "debug/num_tok_total": 2829.0,
      "epoch": 0.0958904109589041,
      "step": 7
    },
    {
      "epoch": 0.0958904109589041,
      "step": 7,
      "train/ce_loss": 23.375,
      "train/diffusion_loss": 0.42349711060523987
    },
    {
      "epoch": 0.0958904109589041,
      "step": 7,
      "train/learning_rate_real": 7.954545454545455e-06
    },
    {
      "debug/num_lat_loss": 1600.0,
      "debug/num_lat_total": 2279.0,
      "debug/num_tok_loss": 1600.0,
      "debug/num_tok_total": 2279.0,
      "epoch": 0.0958904109589041,
      "step": 7
    },
    {
      "epoch": 0.0958904109589041,
      "step": 7,
      "train/ce_loss": 23.0,
      "train/diffusion_loss": 0.4753822982311249
    },
    {
      "epoch": 0.0958904109589041,
      "step": 7,
      "train/learning_rate_real": 7.954545454545455e-06
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 2679.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 2679.0,
      "epoch": 0.0958904109589041,
      "step": 7
    },
    {
      "epoch": 0.0958904109589041,
      "step": 7,
      "train/ce_loss": 23.5,
      "train/diffusion_loss": 0.4810636341571808
    },
    {
      "epoch": 0.0958904109589041,
      "step": 7,
      "train/learning_rate_real": 7.954545454545455e-06
    },
    {
      "debug/num_lat_loss": 1699.0,
      "debug/num_lat_total": 2348.0,
      "debug/num_tok_loss": 1699.0,
      "debug/num_tok_total": 2348.0,
      "epoch": 0.1095890410958904,
      "step": 8
    },
    {
      "epoch": 0.1095890410958904,
      "step": 8,
      "train/ce_loss": 23.0,
      "train/diffusion_loss": 0.5147892832756042
    },
    {
      "epoch": 0.1095890410958904,
      "step": 8,
      "train/learning_rate_real": 9.090909090909091e-06
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 3080.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 3080.0,
      "epoch": 0.1095890410958904,
      "step": 8
    },
    {
      "epoch": 0.1095890410958904,
      "step": 8,
      "train/ce_loss": 23.25,
      "train/diffusion_loss": 0.3648412227630615
    },
    {
      "epoch": 0.1095890410958904,
      "step": 8,
      "train/learning_rate_real": 9.090909090909091e-06
    },
    {
      "debug/num_lat_loss": 1765.0,
      "debug/num_lat_total": 2632.0,
      "debug/num_tok_loss": 1765.0,
      "debug/num_tok_total": 2632.0,
      "epoch": 0.1095890410958904,
      "step": 8
    },
    {
      "epoch": 0.1095890410958904,
      "step": 8,
      "train/ce_loss": 23.25,
      "train/diffusion_loss": 0.4737395942211151
    },
    {
      "epoch": 0.1095890410958904,
      "step": 8,
      "train/learning_rate_real": 9.090909090909091e-06
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2647.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2647.0,
      "epoch": 0.1095890410958904,
      "step": 8
    },
    {
      "epoch": 0.1095890410958904,
      "step": 8,
      "train/ce_loss": 23.125,
      "train/diffusion_loss": 0.4529899060726166
    },
    {
      "epoch": 0.1095890410958904,
      "step": 8,
      "train/learning_rate_real": 9.090909090909091e-06
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2645.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2645.0,
      "epoch": 0.1232876712328767,
      "step": 9
    },
    {
      "epoch": 0.1232876712328767,
      "step": 9,
      "train/ce_loss": 22.875,
      "train/diffusion_loss": 0.460472971200943
    },
    {
      "epoch": 0.1232876712328767,
      "step": 9,
      "train/learning_rate_real": 1.0227272727272729e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2845.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2845.0,
      "epoch": 0.1232876712328767,
      "step": 9
    },
    {
      "epoch": 0.1232876712328767,
      "step": 9,
      "train/ce_loss": 22.875,
      "train/diffusion_loss": 0.39518892765045166
    },
    {
      "epoch": 0.1232876712328767,
      "step": 9,
      "train/learning_rate_real": 1.0227272727272729e-05
    },
    {
      "debug/num_lat_loss": 1814.0,
      "debug/num_lat_total": 2689.0,
      "debug/num_tok_loss": 1814.0,
      "debug/num_tok_total": 2689.0,
      "epoch": 0.1232876712328767,
      "step": 9
    },
    {
      "epoch": 0.1232876712328767,
      "step": 9,
      "train/ce_loss": 22.875,
      "train/diffusion_loss": 0.4926188588142395
    },
    {
      "epoch": 0.1232876712328767,
      "step": 9,
      "train/learning_rate_real": 1.0227272727272729e-05
    },
    {
      "debug/num_lat_loss": 1575.0,
      "debug/num_lat_total": 2250.0,
      "debug/num_tok_loss": 1575.0,
      "debug/num_tok_total": 2250.0,
      "epoch": 0.1232876712328767,
      "step": 9
    },
    {
      "epoch": 0.1232876712328767,
      "step": 9,
      "train/ce_loss": 22.625,
      "train/diffusion_loss": 0.48270726203918457
    },
    {
      "epoch": 0.1232876712328767,
      "step": 9,
      "train/learning_rate_real": 1.0227272727272729e-05
    },
    {
      "epoch": 0.136986301369863,
      "grad_norm": 11.971195220947266,
      "learning_rate": 1.0227272727272729e-05,
      "loss": 6.3985,
      "step": 10
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 3053.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 3053.0,
      "epoch": 0.136986301369863,
      "step": 10
    },
    {
      "epoch": 0.136986301369863,
      "step": 10,
      "train/ce_loss": 22.875,
      "train/diffusion_loss": 0.35791292786598206
    },
    {
      "epoch": 0.136986301369863,
      "step": 10,
      "train/learning_rate_real": 1.1363636363636365e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 3087.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 3087.0,
      "epoch": 0.136986301369863,
      "step": 10
    },
    {
      "epoch": 0.136986301369863,
      "step": 10,
      "train/ce_loss": 22.875,
      "train/diffusion_loss": 0.38981443643569946
    },
    {
      "epoch": 0.136986301369863,
      "step": 10,
      "train/learning_rate_real": 1.1363636363636365e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2231.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2231.0,
      "epoch": 0.136986301369863,
      "step": 10
    },
    {
      "epoch": 0.136986301369863,
      "step": 10,
      "train/ce_loss": 22.5,
      "train/diffusion_loss": 0.5622515678405762
    },
    {
      "epoch": 0.136986301369863,
      "step": 10,
      "train/learning_rate_real": 1.1363636363636365e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2867.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2867.0,
      "epoch": 0.136986301369863,
      "step": 10
    },
    {
      "epoch": 0.136986301369863,
      "step": 10,
      "train/ce_loss": 22.625,
      "train/diffusion_loss": 0.3926439881324768
    },
    {
      "epoch": 0.136986301369863,
      "step": 10,
      "train/learning_rate_real": 1.1363636363636365e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2437.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2437.0,
      "epoch": 0.1506849315068493,
      "step": 11
    },
    {
      "epoch": 0.1506849315068493,
      "step": 11,
      "train/ce_loss": 22.375,
      "train/diffusion_loss": 0.4806886315345764
    },
    {
      "epoch": 0.1506849315068493,
      "step": 11,
      "train/learning_rate_real": 1.25e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2848.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2848.0,
      "epoch": 0.1506849315068493,
      "step": 11
    },
    {
      "epoch": 0.1506849315068493,
      "step": 11,
      "train/ce_loss": 22.125,
      "train/diffusion_loss": 0.4312594532966614
    },
    {
      "epoch": 0.1506849315068493,
      "step": 11,
      "train/learning_rate_real": 1.25e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2875.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2875.0,
      "epoch": 0.1506849315068493,
      "step": 11
    },
    {
      "epoch": 0.1506849315068493,
      "step": 11,
      "train/ce_loss": 22.125,
      "train/diffusion_loss": 0.4150593876838684
    },
    {
      "epoch": 0.1506849315068493,
      "step": 11,
      "train/learning_rate_real": 1.25e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2418.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2418.0,
      "epoch": 0.1506849315068493,
      "step": 11
    },
    {
      "epoch": 0.1506849315068493,
      "step": 11,
      "train/ce_loss": 22.0,
      "train/diffusion_loss": 0.49140727519989014
    },
    {
      "epoch": 0.1506849315068493,
      "step": 11,
      "train/learning_rate_real": 1.25e-05
    },
    {
      "debug/num_lat_loss": 1732.0,
      "debug/num_lat_total": 2536.0,
      "debug/num_tok_loss": 1732.0,
      "debug/num_tok_total": 2536.0,
      "epoch": 0.1643835616438356,
      "step": 12
    },
    {
      "epoch": 0.1643835616438356,
      "step": 12,
      "train/ce_loss": 21.5,
      "train/diffusion_loss": 0.4987460970878601
    },
    {
      "epoch": 0.1643835616438356,
      "step": 12,
      "train/learning_rate_real": 1.3636363636363637e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2399.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2399.0,
      "epoch": 0.1643835616438356,
      "step": 12
    },
    {
      "epoch": 0.1643835616438356,
      "step": 12,
      "train/ce_loss": 21.375,
      "train/diffusion_loss": 0.4842108190059662
    },
    {
      "epoch": 0.1643835616438356,
      "step": 12,
      "train/learning_rate_real": 1.3636363636363637e-05
    },
    {
      "debug/num_lat_loss": 1702.0,
      "debug/num_lat_total": 2363.0,
      "debug/num_tok_loss": 1702.0,
      "debug/num_tok_total": 2363.0,
      "epoch": 0.1643835616438356,
      "step": 12
    },
    {
      "epoch": 0.1643835616438356,
      "step": 12,
      "train/ce_loss": 21.5,
      "train/diffusion_loss": 0.4798933267593384
    },
    {
      "epoch": 0.1643835616438356,
      "step": 12,
      "train/learning_rate_real": 1.3636363636363637e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2623.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2623.0,
      "epoch": 0.1643835616438356,
      "step": 12
    },
    {
      "epoch": 0.1643835616438356,
      "step": 12,
      "train/ce_loss": 21.625,
      "train/diffusion_loss": 0.48570314049720764
    },
    {
      "epoch": 0.1643835616438356,
      "step": 12,
      "train/learning_rate_real": 1.3636363636363637e-05
    },
    {
      "debug/num_lat_loss": 1765.0,
      "debug/num_lat_total": 2404.0,
      "debug/num_tok_loss": 1765.0,
      "debug/num_tok_total": 2404.0,
      "epoch": 0.1780821917808219,
      "step": 13
    },
    {
      "epoch": 0.1780821917808219,
      "step": 13,
      "train/ce_loss": 20.75,
      "train/diffusion_loss": 0.4996339678764343
    },
    {
      "epoch": 0.1780821917808219,
      "step": 13,
      "train/learning_rate_real": 1.4772727272727274e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2452.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2452.0,
      "epoch": 0.1780821917808219,
      "step": 13
    },
    {
      "epoch": 0.1780821917808219,
      "step": 13,
      "train/ce_loss": 20.75,
      "train/diffusion_loss": 0.5086291432380676
    },
    {
      "epoch": 0.1780821917808219,
      "step": 13,
      "train/learning_rate_real": 1.4772727272727274e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2656.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2656.0,
      "epoch": 0.1780821917808219,
      "step": 13
    },
    {
      "epoch": 0.1780821917808219,
      "step": 13,
      "train/ce_loss": 20.875,
      "train/diffusion_loss": 0.4561443328857422
    },
    {
      "epoch": 0.1780821917808219,
      "step": 13,
      "train/learning_rate_real": 1.4772727272727274e-05
    },
    {
      "debug/num_lat_loss": 1655.0,
      "debug/num_lat_total": 3249.0,
      "debug/num_tok_loss": 1655.0,
      "debug/num_tok_total": 3249.0,
      "epoch": 0.1780821917808219,
      "step": 13
    },
    {
      "epoch": 0.1780821917808219,
      "step": 13,
      "train/ce_loss": 21.125,
      "train/diffusion_loss": 0.26086410880088806
    },
    {
      "epoch": 0.1780821917808219,
      "step": 13,
      "train/learning_rate_real": 1.4772727272727274e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2870.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2870.0,
      "epoch": 0.1917808219178082,
      "step": 14
    },
    {
      "epoch": 0.1917808219178082,
      "step": 14,
      "train/ce_loss": 20.0,
      "train/diffusion_loss": 0.408465713262558
    },
    {
      "epoch": 0.1917808219178082,
      "step": 14,
      "train/learning_rate_real": 1.590909090909091e-05
    },
    {
      "debug/num_lat_loss": 1749.0,
      "debug/num_lat_total": 2379.0,
      "debug/num_tok_loss": 1749.0,
      "debug/num_tok_total": 2379.0,
      "epoch": 0.1917808219178082,
      "step": 14
    },
    {
      "epoch": 0.1917808219178082,
      "step": 14,
      "train/ce_loss": 19.875,
      "train/diffusion_loss": 0.4767815172672272
    },
    {
      "epoch": 0.1917808219178082,
      "step": 14,
      "train/learning_rate_real": 1.590909090909091e-05
    },
    {
      "debug/num_lat_loss": 1728.0,
      "debug/num_lat_total": 2612.0,
      "debug/num_tok_loss": 1728.0,
      "debug/num_tok_total": 2612.0,
      "epoch": 0.1917808219178082,
      "step": 14
    },
    {
      "epoch": 0.1917808219178082,
      "step": 14,
      "train/ce_loss": 20.0,
      "train/diffusion_loss": 0.45503005385398865
    },
    {
      "epoch": 0.1917808219178082,
      "step": 14,
      "train/learning_rate_real": 1.590909090909091e-05
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 2892.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 2892.0,
      "epoch": 0.1917808219178082,
      "step": 14
    },
    {
      "epoch": 0.1917808219178082,
      "step": 14,
      "train/ce_loss": 20.125,
      "train/diffusion_loss": 0.4370043873786926
    },
    {
      "epoch": 0.1917808219178082,
      "step": 14,
      "train/learning_rate_real": 1.590909090909091e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2617.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2617.0,
      "epoch": 0.2054794520547945,
      "step": 15
    },
    {
      "epoch": 0.2054794520547945,
      "step": 15,
      "train/ce_loss": 18.875,
      "train/diffusion_loss": 0.45287543535232544
    },
    {
      "epoch": 0.2054794520547945,
      "step": 15,
      "train/learning_rate_real": 1.7045454545454546e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2454.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2454.0,
      "epoch": 0.2054794520547945,
      "step": 15
    },
    {
      "epoch": 0.2054794520547945,
      "step": 15,
      "train/ce_loss": 18.875,
      "train/diffusion_loss": 0.5138697028160095
    },
    {
      "epoch": 0.2054794520547945,
      "step": 15,
      "train/learning_rate_real": 1.7045454545454546e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2184.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2184.0,
      "epoch": 0.2054794520547945,
      "step": 15
    },
    {
      "epoch": 0.2054794520547945,
      "step": 15,
      "train/ce_loss": 18.75,
      "train/diffusion_loss": 0.5638265013694763
    },
    {
      "epoch": 0.2054794520547945,
      "step": 15,
      "train/learning_rate_real": 1.7045454545454546e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 1998.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 1998.0,
      "epoch": 0.2054794520547945,
      "step": 15
    },
    {
      "epoch": 0.2054794520547945,
      "step": 15,
      "train/ce_loss": 18.625,
      "train/diffusion_loss": 0.5613153576850891
    },
    {
      "epoch": 0.2054794520547945,
      "step": 15,
      "train/learning_rate_real": 1.7045454545454546e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2632.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2632.0,
      "epoch": 0.2191780821917808,
      "step": 16
    },
    {
      "epoch": 0.2191780821917808,
      "step": 16,
      "train/ce_loss": 18.0,
      "train/diffusion_loss": 0.4571579396724701
    },
    {
      "epoch": 0.2191780821917808,
      "step": 16,
      "train/learning_rate_real": 1.8181818181818182e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 3095.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 3095.0,
      "epoch": 0.2191780821917808,
      "step": 16
    },
    {
      "epoch": 0.2191780821917808,
      "step": 16,
      "train/ce_loss": 18.25,
      "train/diffusion_loss": 0.3644043803215027
    },
    {
      "epoch": 0.2191780821917808,
      "step": 16,
      "train/learning_rate_real": 1.8181818181818182e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2632.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2632.0,
      "epoch": 0.2191780821917808,
      "step": 16
    },
    {
      "epoch": 0.2191780821917808,
      "step": 16,
      "train/ce_loss": 18.125,
      "train/diffusion_loss": 0.4329890012741089
    },
    {
      "epoch": 0.2191780821917808,
      "step": 16,
      "train/learning_rate_real": 1.8181818181818182e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2653.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2653.0,
      "epoch": 0.2191780821917808,
      "step": 16
    },
    {
      "epoch": 0.2191780821917808,
      "step": 16,
      "train/ce_loss": 17.75,
      "train/diffusion_loss": 0.4439554512500763
    },
    {
      "epoch": 0.2191780821917808,
      "step": 16,
      "train/learning_rate_real": 1.8181818181818182e-05
    },
    {
      "debug/num_lat_loss": 1679.0,
      "debug/num_lat_total": 2760.0,
      "debug/num_tok_loss": 1679.0,
      "debug/num_tok_total": 2760.0,
      "epoch": 0.2328767123287671,
      "step": 17
    },
    {
      "epoch": 0.2328767123287671,
      "step": 17,
      "train/ce_loss": 17.125,
      "train/diffusion_loss": 0.41194167733192444
    },
    {
      "epoch": 0.2328767123287671,
      "step": 17,
      "train/learning_rate_real": 1.9318181818181818e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2848.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2848.0,
      "epoch": 0.2328767123287671,
      "step": 17
    },
    {
      "epoch": 0.2328767123287671,
      "step": 17,
      "train/ce_loss": 16.875,
      "train/diffusion_loss": 0.4312850832939148
    },
    {
      "epoch": 0.2328767123287671,
      "step": 17,
      "train/learning_rate_real": 1.9318181818181818e-05
    },
    {
      "debug/num_lat_loss": 1754.0,
      "debug/num_lat_total": 2575.0,
      "debug/num_tok_loss": 1754.0,
      "debug/num_tok_total": 2575.0,
      "epoch": 0.2328767123287671,
      "step": 17
    },
    {
      "epoch": 0.2328767123287671,
      "step": 17,
      "train/ce_loss": 16.875,
      "train/diffusion_loss": 0.4552731513977051
    },
    {
      "epoch": 0.2328767123287671,
      "step": 17,
      "train/learning_rate_real": 1.9318181818181818e-05
    },
    {
      "debug/num_lat_loss": 1823.0,
      "debug/num_lat_total": 3136.0,
      "debug/num_tok_loss": 1823.0,
      "debug/num_tok_total": 3136.0,
      "epoch": 0.2328767123287671,
      "step": 17
    },
    {
      "epoch": 0.2328767123287671,
      "step": 17,
      "train/ce_loss": 17.375,
      "train/diffusion_loss": 0.37979450821876526
    },
    {
      "epoch": 0.2328767123287671,
      "step": 17,
      "train/learning_rate_real": 1.9318181818181818e-05
    },
    {
      "debug/num_lat_loss": 1807.0,
      "debug/num_lat_total": 2678.0,
      "debug/num_tok_loss": 1807.0,
      "debug/num_tok_total": 2678.0,
      "epoch": 0.2465753424657534,
      "step": 18
    },
    {
      "epoch": 0.2465753424657534,
      "step": 18,
      "train/ce_loss": 15.6875,
      "train/diffusion_loss": 0.45173999667167664
    },
    {
      "epoch": 0.2465753424657534,
      "step": 18,
      "train/learning_rate_real": 2.0454545454545457e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2446.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2446.0,
      "epoch": 0.2465753424657534,
      "step": 18
    },
    {
      "epoch": 0.2465753424657534,
      "step": 18,
      "train/ce_loss": 15.625,
      "train/diffusion_loss": 0.4798762798309326
    },
    {
      "epoch": 0.2465753424657534,
      "step": 18,
      "train/learning_rate_real": 2.0454545454545457e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2654.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2654.0,
      "epoch": 0.2465753424657534,
      "step": 18
    },
    {
      "epoch": 0.2465753424657534,
      "step": 18,
      "train/ce_loss": 15.6875,
      "train/diffusion_loss": 0.4615324139595032
    },
    {
      "epoch": 0.2465753424657534,
      "step": 18,
      "train/learning_rate_real": 2.0454545454545457e-05
    },
    {
      "debug/num_lat_loss": 1628.0,
      "debug/num_lat_total": 2720.0,
      "debug/num_tok_loss": 1628.0,
      "debug/num_tok_total": 2720.0,
      "epoch": 0.2465753424657534,
      "step": 18
    },
    {
      "epoch": 0.2465753424657534,
      "step": 18,
      "train/ce_loss": 16.25,
      "train/diffusion_loss": 0.365399032831192
    },
    {
      "epoch": 0.2465753424657534,
      "step": 18,
      "train/learning_rate_real": 2.0454545454545457e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2431.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2431.0,
      "epoch": 0.2602739726027397,
      "step": 19
    },
    {
      "epoch": 0.2602739726027397,
      "step": 19,
      "train/ce_loss": 14.6875,
      "train/diffusion_loss": 0.4907709062099457
    },
    {
      "epoch": 0.2602739726027397,
      "step": 19,
      "train/learning_rate_real": 2.1590909090909093e-05
    },
    {
      "debug/num_lat_loss": 1752.0,
      "debug/num_lat_total": 2186.0,
      "debug/num_tok_loss": 1752.0,
      "debug/num_tok_total": 2186.0,
      "epoch": 0.2602739726027397,
      "step": 19
    },
    {
      "epoch": 0.2602739726027397,
      "step": 19,
      "train/ce_loss": 14.125,
      "train/diffusion_loss": 0.542559027671814
    },
    {
      "epoch": 0.2602739726027397,
      "step": 19,
      "train/learning_rate_real": 2.1590909090909093e-05
    },
    {
      "debug/num_lat_loss": 1687.0,
      "debug/num_lat_total": 2674.0,
      "debug/num_tok_loss": 1687.0,
      "debug/num_tok_total": 2674.0,
      "epoch": 0.2602739726027397,
      "step": 19
    },
    {
      "epoch": 0.2602739726027397,
      "step": 19,
      "train/ce_loss": 15.0625,
      "train/diffusion_loss": 0.38787195086479187
    },
    {
      "epoch": 0.2602739726027397,
      "step": 19,
      "train/learning_rate_real": 2.1590909090909093e-05
    },
    {
      "debug/num_lat_loss": 1807.0,
      "debug/num_lat_total": 2032.0,
      "debug/num_tok_loss": 1807.0,
      "debug/num_tok_total": 2032.0,
      "epoch": 0.2602739726027397,
      "step": 19
    },
    {
      "epoch": 0.2602739726027397,
      "step": 19,
      "train/ce_loss": 13.875,
      "train/diffusion_loss": 0.5763474106788635
    },
    {
      "epoch": 0.2602739726027397,
      "step": 19,
      "train/learning_rate_real": 2.1590909090909093e-05
    },
    {
      "epoch": 0.273972602739726,
      "grad_norm": 8.30610466003418,
      "learning_rate": 2.1590909090909093e-05,
      "loss": 5.6093,
      "step": 20
    },
    {
      "debug/num_lat_loss": 1812.0,
      "debug/num_lat_total": 3130.0,
      "debug/num_tok_loss": 1812.0,
      "debug/num_tok_total": 3130.0,
      "epoch": 0.273972602739726,
      "step": 20
    },
    {
      "epoch": 0.273972602739726,
      "step": 20,
      "train/ce_loss": 14.8125,
      "train/diffusion_loss": 0.3510192036628723
    },
    {
      "epoch": 0.273972602739726,
      "step": 20,
      "train/learning_rate_real": 2.272727272727273e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2193.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2193.0,
      "epoch": 0.273972602739726,
      "step": 20
    },
    {
      "epoch": 0.273972602739726,
      "step": 20,
      "train/ce_loss": 13.4375,
      "train/diffusion_loss": 0.5544577836990356
    },
    {
      "epoch": 0.273972602739726,
      "step": 20,
      "train/learning_rate_real": 2.272727272727273e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2394.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2394.0,
      "epoch": 0.273972602739726,
      "step": 20
    },
    {
      "epoch": 0.273972602739726,
      "step": 20,
      "train/ce_loss": 13.75,
      "train/diffusion_loss": 0.48900994658470154
    },
    {
      "epoch": 0.273972602739726,
      "step": 20,
      "train/learning_rate_real": 2.272727272727273e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2868.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2868.0,
      "epoch": 0.273972602739726,
      "step": 20
    },
    {
      "epoch": 0.273972602739726,
      "step": 20,
      "train/ce_loss": 14.25,
      "train/diffusion_loss": 0.3776327073574066
    },
    {
      "epoch": 0.273972602739726,
      "step": 20,
      "train/learning_rate_real": 2.272727272727273e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2857.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2857.0,
      "epoch": 0.2876712328767123,
      "step": 21
    },
    {
      "epoch": 0.2876712328767123,
      "step": 21,
      "train/ce_loss": 13.875,
      "train/diffusion_loss": 0.39368781447410583
    },
    {
      "epoch": 0.2876712328767123,
      "step": 21,
      "train/learning_rate_real": 2.3863636363636365e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2192.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2192.0,
      "epoch": 0.2876712328767123,
      "step": 21
    },
    {
      "epoch": 0.2876712328767123,
      "step": 21,
      "train/ce_loss": 12.875,
      "train/diffusion_loss": 0.5420176982879639
    },
    {
      "epoch": 0.2876712328767123,
      "step": 21,
      "train/learning_rate_real": 2.3863636363636365e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 3104.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 3104.0,
      "epoch": 0.2876712328767123,
      "step": 21
    },
    {
      "epoch": 0.2876712328767123,
      "step": 21,
      "train/ce_loss": 14.0625,
      "train/diffusion_loss": 0.3666694760322571
    },
    {
      "epoch": 0.2876712328767123,
      "step": 21,
      "train/learning_rate_real": 2.3863636363636365e-05
    },
    {
      "debug/num_lat_loss": 1634.0,
      "debug/num_lat_total": 2789.0,
      "debug/num_tok_loss": 1634.0,
      "debug/num_tok_total": 2789.0,
      "epoch": 0.2876712328767123,
      "step": 21
    },
    {
      "epoch": 0.2876712328767123,
      "step": 21,
      "train/ce_loss": 14.0,
      "train/diffusion_loss": 0.37210342288017273
    },
    {
      "epoch": 0.2876712328767123,
      "step": 21,
      "train/learning_rate_real": 2.3863636363636365e-05
    },
    {
      "debug/num_lat_loss": 1586.0,
      "debug/num_lat_total": 2434.0,
      "debug/num_tok_loss": 1586.0,
      "debug/num_tok_total": 2434.0,
      "epoch": 0.3013698630136986,
      "step": 22
    },
    {
      "epoch": 0.3013698630136986,
      "step": 22,
      "train/ce_loss": 13.1875,
      "train/diffusion_loss": 0.4085391163825989
    },
    {
      "epoch": 0.3013698630136986,
      "step": 22,
      "train/learning_rate_real": 2.5e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2223.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2223.0,
      "epoch": 0.3013698630136986,
      "step": 22
    },
    {
      "epoch": 0.3013698630136986,
      "step": 22,
      "train/ce_loss": 12.4375,
      "train/diffusion_loss": 0.5267496705055237
    },
    {
      "epoch": 0.3013698630136986,
      "step": 22,
      "train/learning_rate_real": 2.5e-05
    },
    {
      "debug/num_lat_loss": 1751.0,
      "debug/num_lat_total": 3025.0,
      "debug/num_tok_loss": 1751.0,
      "debug/num_tok_total": 3025.0,
      "epoch": 0.3013698630136986,
      "step": 22
    },
    {
      "epoch": 0.3013698630136986,
      "step": 22,
      "train/ce_loss": 13.5625,
      "train/diffusion_loss": 0.3490737974643707
    },
    {
      "epoch": 0.3013698630136986,
      "step": 22,
      "train/learning_rate_real": 2.5e-05
    },
    {
      "debug/num_lat_loss": 1596.0,
      "debug/num_lat_total": 2684.0,
      "debug/num_tok_loss": 1596.0,
      "debug/num_tok_total": 2684.0,
      "epoch": 0.3013698630136986,
      "step": 22
    },
    {
      "epoch": 0.3013698630136986,
      "step": 22,
      "train/ce_loss": 13.3125,
      "train/diffusion_loss": 0.3688155710697174
    },
    {
      "epoch": 0.3013698630136986,
      "step": 22,
      "train/learning_rate_real": 2.5e-05
    },
    {
      "debug/num_lat_loss": 1584.0,
      "debug/num_lat_total": 2224.0,
      "debug/num_tok_loss": 1584.0,
      "debug/num_tok_total": 2224.0,
      "epoch": 0.3150684931506849,
      "step": 23
    },
    {
      "epoch": 0.3150684931506849,
      "step": 23,
      "train/ce_loss": 12.5,
      "train/diffusion_loss": 0.4947201907634735
    },
    {
      "epoch": 0.3150684931506849,
      "step": 23,
      "train/learning_rate_real": 2.4999876941239957e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2617.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2617.0,
      "epoch": 0.3150684931506849,
      "step": 23
    },
    {
      "epoch": 0.3150684931506849,
      "step": 23,
      "train/ce_loss": 12.6875,
      "train/diffusion_loss": 0.42563173174858093
    },
    {
      "epoch": 0.3150684931506849,
      "step": 23,
      "train/learning_rate_real": 2.4999876941239957e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 0.3150684931506849,
      "step": 23
    },
    {
      "epoch": 0.3150684931506849,
      "step": 23,
      "train/ce_loss": 12.5625,
      "train/diffusion_loss": 0.4141148328781128
    },
    {
      "epoch": 0.3150684931506849,
      "step": 23,
      "train/learning_rate_real": 2.4999876941239957e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 3114.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 3114.0,
      "epoch": 0.3150684931506849,
      "step": 23
    },
    {
      "epoch": 0.3150684931506849,
      "step": 23,
      "train/ce_loss": 13.3125,
      "train/diffusion_loss": 0.3588603138923645
    },
    {
      "epoch": 0.3150684931506849,
      "step": 23,
      "train/learning_rate_real": 2.4999876941239957e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2680.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2680.0,
      "epoch": 0.3287671232876712,
      "step": 24
    },
    {
      "epoch": 0.3287671232876712,
      "step": 24,
      "train/ce_loss": 12.1875,
      "train/diffusion_loss": 0.4522026479244232
    },
    {
      "epoch": 0.3287671232876712,
      "step": 24,
      "train/learning_rate_real": 2.4999507767382776e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2847.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2847.0,
      "epoch": 0.3287671232876712,
      "step": 24
    },
    {
      "epoch": 0.3287671232876712,
      "step": 24,
      "train/ce_loss": 12.3125,
      "train/diffusion_loss": 0.37764832377433777
    },
    {
      "epoch": 0.3287671232876712,
      "step": 24,
      "train/learning_rate_real": 2.4999507767382776e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2842.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2842.0,
      "epoch": 0.3287671232876712,
      "step": 24
    },
    {
      "epoch": 0.3287671232876712,
      "step": 24,
      "train/ce_loss": 12.3125,
      "train/diffusion_loss": 0.4105679392814636
    },
    {
      "epoch": 0.3287671232876712,
      "step": 24,
      "train/learning_rate_real": 2.4999507767382776e-05
    },
    {
      "debug/num_lat_loss": 1759.0,
      "debug/num_lat_total": 3031.0,
      "debug/num_tok_loss": 1759.0,
      "debug/num_tok_total": 3031.0,
      "epoch": 0.3287671232876712,
      "step": 24
    },
    {
      "epoch": 0.3287671232876712,
      "step": 24,
      "train/ce_loss": 12.4375,
      "train/diffusion_loss": 0.3406731188297272
    },
    {
      "epoch": 0.3287671232876712,
      "step": 24,
      "train/learning_rate_real": 2.4999507767382776e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2465.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2465.0,
      "epoch": 0.3424657534246575,
      "step": 25
    },
    {
      "epoch": 0.3424657534246575,
      "step": 25,
      "train/ce_loss": 11.625,
      "train/diffusion_loss": 0.4853813946247101
    },
    {
      "epoch": 0.3424657534246575,
      "step": 25,
      "train/learning_rate_real": 2.4998892485697274e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2635.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2635.0,
      "epoch": 0.3424657534246575,
      "step": 25
    },
    {
      "epoch": 0.3424657534246575,
      "step": 25,
      "train/ce_loss": 11.6875,
      "train/diffusion_loss": 0.44053584337234497
    },
    {
      "epoch": 0.3424657534246575,
      "step": 25,
      "train/learning_rate_real": 2.4998892485697274e-05
    },
    {
      "debug/num_lat_loss": 1753.0,
      "debug/num_lat_total": 3020.0,
      "debug/num_tok_loss": 1753.0,
      "debug/num_tok_total": 3020.0,
      "epoch": 0.3424657534246575,
      "step": 25
    },
    {
      "epoch": 0.3424657534246575,
      "step": 25,
      "train/ce_loss": 12.125,
      "train/diffusion_loss": 0.3806762397289276
    },
    {
      "epoch": 0.3424657534246575,
      "step": 25,
      "train/learning_rate_real": 2.4998892485697274e-05
    },
    {
      "debug/num_lat_loss": 1819.0,
      "debug/num_lat_total": 2919.0,
      "debug/num_tok_loss": 1819.0,
      "debug/num_tok_total": 2919.0,
      "epoch": 0.3424657534246575,
      "step": 25
    },
    {
      "epoch": 0.3424657534246575,
      "step": 25,
      "train/ce_loss": 11.8125,
      "train/diffusion_loss": 0.4265860915184021
    },
    {
      "epoch": 0.3424657534246575,
      "step": 25,
      "train/learning_rate_real": 2.4998892485697274e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2678.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2678.0,
      "epoch": 0.3561643835616438,
      "step": 26
    },
    {
      "epoch": 0.3561643835616438,
      "step": 26,
      "train/ce_loss": 11.375,
      "train/diffusion_loss": 0.4423990845680237
    },
    {
      "epoch": 0.3561643835616438,
      "step": 26,
      "train/learning_rate_real": 2.4998031108297975e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2840.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2840.0,
      "epoch": 0.3561643835616438,
      "step": 26
    },
    {
      "epoch": 0.3561643835616438,
      "step": 26,
      "train/ce_loss": 11.5,
      "train/diffusion_loss": 0.3932268023490906
    },
    {
      "epoch": 0.3561643835616438,
      "step": 26,
      "train/learning_rate_real": 2.4998031108297975e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 3091.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 3091.0,
      "epoch": 0.3561643835616438,
      "step": 26
    },
    {
      "epoch": 0.3561643835616438,
      "step": 26,
      "train/ce_loss": 11.625,
      "train/diffusion_loss": 0.3441810607910156
    },
    {
      "epoch": 0.3561643835616438,
      "step": 26,
      "train/learning_rate_real": 2.4998031108297975e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2841.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2841.0,
      "epoch": 0.3561643835616438,
      "step": 26
    },
    {
      "epoch": 0.3561643835616438,
      "step": 26,
      "train/ce_loss": 11.5,
      "train/diffusion_loss": 0.4140407145023346
    },
    {
      "epoch": 0.3561643835616438,
      "step": 26,
      "train/learning_rate_real": 2.4998031108297975e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 2690.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 2690.0,
      "epoch": 0.3698630136986301,
      "step": 27
    },
    {
      "epoch": 0.3698630136986301,
      "step": 27,
      "train/ce_loss": 11.125,
      "train/diffusion_loss": 0.4142424166202545
    },
    {
      "epoch": 0.3698630136986301,
      "step": 27,
      "train/learning_rate_real": 2.4996923652144887e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2457.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2457.0,
      "epoch": 0.3698630136986301,
      "step": 27
    },
    {
      "epoch": 0.3698630136986301,
      "step": 27,
      "train/ce_loss": 11.125,
      "train/diffusion_loss": 0.4762253761291504
    },
    {
      "epoch": 0.3698630136986301,
      "step": 27,
      "train/learning_rate_real": 2.4996923652144887e-05
    },
    {
      "debug/num_lat_loss": 1754.0,
      "debug/num_lat_total": 2393.0,
      "debug/num_tok_loss": 1754.0,
      "debug/num_tok_total": 2393.0,
      "epoch": 0.3698630136986301,
      "step": 27
    },
    {
      "epoch": 0.3698630136986301,
      "step": 27,
      "train/ce_loss": 11.0,
      "train/diffusion_loss": 0.46639570593833923
    },
    {
      "epoch": 0.3698630136986301,
      "step": 27,
      "train/learning_rate_real": 2.4996923652144887e-05
    },
    {
      "debug/num_lat_loss": 1621.0,
      "debug/num_lat_total": 2277.0,
      "debug/num_tok_loss": 1621.0,
      "debug/num_tok_total": 2277.0,
      "epoch": 0.3698630136986301,
      "step": 27
    },
    {
      "epoch": 0.3698630136986301,
      "step": 27,
      "train/ce_loss": 11.0625,
      "train/diffusion_loss": 0.4759414494037628
    },
    {
      "epoch": 0.3698630136986301,
      "step": 27,
      "train/learning_rate_real": 2.4996923652144887e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2847.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2847.0,
      "epoch": 0.3835616438356164,
      "step": 28
    },
    {
      "epoch": 0.3835616438356164,
      "step": 28,
      "train/ce_loss": 11.0,
      "train/diffusion_loss": 0.40890657901763916
    },
    {
      "epoch": 0.3835616438356164,
      "step": 28,
      "train/learning_rate_real": 2.4995570139043158e-05
    },
    {
      "debug/num_lat_loss": 1735.0,
      "debug/num_lat_total": 2790.0,
      "debug/num_tok_loss": 1735.0,
      "debug/num_tok_total": 2790.0,
      "epoch": 0.3835616438356164,
      "step": 28
    },
    {
      "epoch": 0.3835616438356164,
      "step": 28,
      "train/ce_loss": 11.0,
      "train/diffusion_loss": 0.3836289644241333
    },
    {
      "epoch": 0.3835616438356164,
      "step": 28,
      "train/learning_rate_real": 2.4995570139043158e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 3062.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 3062.0,
      "epoch": 0.3835616438356164,
      "step": 28
    },
    {
      "epoch": 0.3835616438356164,
      "step": 28,
      "train/ce_loss": 11.0625,
      "train/diffusion_loss": 0.32098737359046936
    },
    {
      "epoch": 0.3835616438356164,
      "step": 28,
      "train/learning_rate_real": 2.4995570139043158e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 3061.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 3061.0,
      "epoch": 0.3835616438356164,
      "step": 28
    },
    {
      "epoch": 0.3835616438356164,
      "step": 28,
      "train/ce_loss": 11.0,
      "train/diffusion_loss": 0.35937121510505676
    },
    {
      "epoch": 0.3835616438356164,
      "step": 28,
      "train/learning_rate_real": 2.4995570139043158e-05
    },
    {
      "debug/num_lat_loss": 1797.0,
      "debug/num_lat_total": 2669.0,
      "debug/num_tok_loss": 1797.0,
      "debug/num_tok_total": 2669.0,
      "epoch": 0.3972602739726027,
      "step": 29
    },
    {
      "epoch": 0.3972602739726027,
      "step": 29,
      "train/ce_loss": 10.75,
      "train/diffusion_loss": 0.4344680905342102
    },
    {
      "epoch": 0.3972602739726027,
      "step": 29,
      "train/learning_rate_real": 2.499397059564265e-05
    },
    {
      "debug/num_lat_loss": 1756.0,
      "debug/num_lat_total": 3022.0,
      "debug/num_tok_loss": 1756.0,
      "debug/num_tok_total": 3022.0,
      "epoch": 0.3972602739726027,
      "step": 29
    },
    {
      "epoch": 0.3972602739726027,
      "step": 29,
      "train/ce_loss": 10.8125,
      "train/diffusion_loss": 0.34724363684654236
    },
    {
      "epoch": 0.3972602739726027,
      "step": 29,
      "train/learning_rate_real": 2.499397059564265e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2866.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2866.0,
      "epoch": 0.3972602739726027,
      "step": 29
    },
    {
      "epoch": 0.3972602739726027,
      "step": 29,
      "train/ce_loss": 10.75,
      "train/diffusion_loss": 0.39015740156173706
    },
    {
      "epoch": 0.3972602739726027,
      "step": 29,
      "train/learning_rate_real": 2.499397059564265e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2869.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2869.0,
      "epoch": 0.3972602739726027,
      "step": 29
    },
    {
      "epoch": 0.3972602739726027,
      "step": 29,
      "train/ce_loss": 10.75,
      "train/diffusion_loss": 0.37296244502067566
    },
    {
      "epoch": 0.3972602739726027,
      "step": 29,
      "train/learning_rate_real": 2.499397059564265e-05
    },
    {
      "epoch": 0.410958904109589,
      "grad_norm": 3.1832733154296875,
      "learning_rate": 2.499397059564265e-05,
      "loss": 4.2717,
      "step": 30
    },
    {
      "debug/num_lat_loss": 1707.0,
      "debug/num_lat_total": 2707.0,
      "debug/num_tok_loss": 1707.0,
      "debug/num_tok_total": 2707.0,
      "epoch": 0.410958904109589,
      "step": 30
    },
    {
      "epoch": 0.410958904109589,
      "step": 30,
      "train/ce_loss": 10.625,
      "train/diffusion_loss": 0.38847485184669495
    },
    {
      "epoch": 0.410958904109589,
      "step": 30,
      "train/learning_rate_real": 2.499212505343742e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 0.410958904109589,
      "step": 30
    },
    {
      "epoch": 0.410958904109589,
      "step": 30,
      "train/ce_loss": 10.5625,
      "train/diffusion_loss": 0.42114052176475525
    },
    {
      "epoch": 0.410958904109589,
      "step": 30,
      "train/learning_rate_real": 2.499212505343742e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2236.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2236.0,
      "epoch": 0.410958904109589,
      "step": 30
    },
    {
      "epoch": 0.410958904109589,
      "step": 30,
      "train/ce_loss": 10.375,
      "train/diffusion_loss": 0.5161031484603882
    },
    {
      "epoch": 0.410958904109589,
      "step": 30,
      "train/learning_rate_real": 2.499212505343742e-05
    },
    {
      "debug/num_lat_loss": 1548.0,
      "debug/num_lat_total": 2354.0,
      "debug/num_tok_loss": 1548.0,
      "debug/num_tok_total": 2354.0,
      "epoch": 0.410958904109589,
      "step": 30
    },
    {
      "epoch": 0.410958904109589,
      "step": 30,
      "train/ce_loss": 10.5625,
      "train/diffusion_loss": 0.404449462890625
    },
    {
      "epoch": 0.410958904109589,
      "step": 30,
      "train/learning_rate_real": 2.499212505343742e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2417.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2417.0,
      "epoch": 0.4246575342465753,
      "step": 31
    },
    {
      "epoch": 0.4246575342465753,
      "step": 31,
      "train/ce_loss": 10.25,
      "train/diffusion_loss": 0.48773378133773804
    },
    {
      "epoch": 0.4246575342465753,
      "step": 31,
      "train/learning_rate_real": 2.4990033548765084e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2422.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2422.0,
      "epoch": 0.4246575342465753,
      "step": 31
    },
    {
      "epoch": 0.4246575342465753,
      "step": 31,
      "train/ce_loss": 10.3125,
      "train/diffusion_loss": 0.4736463129520416
    },
    {
      "epoch": 0.4246575342465753,
      "step": 31,
      "train/learning_rate_real": 2.4990033548765084e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2889.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2889.0,
      "epoch": 0.4246575342465753,
      "step": 31
    },
    {
      "epoch": 0.4246575342465753,
      "step": 31,
      "train/ce_loss": 10.4375,
      "train/diffusion_loss": 0.347330778837204
    },
    {
      "epoch": 0.4246575342465753,
      "step": 31,
      "train/learning_rate_real": 2.4990033548765084e-05
    },
    {
      "debug/num_lat_loss": 1755.0,
      "debug/num_lat_total": 2391.0,
      "debug/num_tok_loss": 1755.0,
      "debug/num_tok_total": 2391.0,
      "epoch": 0.4246575342465753,
      "step": 31
    },
    {
      "epoch": 0.4246575342465753,
      "step": 31,
      "train/ce_loss": 10.25,
      "train/diffusion_loss": 0.48943084478378296
    },
    {
      "epoch": 0.4246575342465753,
      "step": 31,
      "train/learning_rate_real": 2.4990033548765084e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2192.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2192.0,
      "epoch": 0.4383561643835616,
      "step": 32
    },
    {
      "epoch": 0.4383561643835616,
      "step": 32,
      "train/ce_loss": 10.0,
      "train/diffusion_loss": 0.5378103852272034
    },
    {
      "epoch": 0.4383561643835616,
      "step": 32,
      "train/learning_rate_real": 2.4987696122806127e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2235.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2235.0,
      "epoch": 0.4383561643835616,
      "step": 32
    },
    {
      "epoch": 0.4383561643835616,
      "step": 32,
      "train/ce_loss": 10.0,
      "train/diffusion_loss": 0.546953022480011
    },
    {
      "epoch": 0.4383561643835616,
      "step": 32,
      "train/learning_rate_real": 2.4987696122806127e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 3086.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 3086.0,
      "epoch": 0.4383561643835616,
      "step": 32
    },
    {
      "epoch": 0.4383561643835616,
      "step": 32,
      "train/ce_loss": 10.25,
      "train/diffusion_loss": 0.3321462869644165
    },
    {
      "epoch": 0.4383561643835616,
      "step": 32,
      "train/learning_rate_real": 2.4987696122806127e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2842.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2842.0,
      "epoch": 0.4383561643835616,
      "step": 32
    },
    {
      "epoch": 0.4383561643835616,
      "step": 32,
      "train/ce_loss": 10.1875,
      "train/diffusion_loss": 0.39034295082092285
    },
    {
      "epoch": 0.4383561643835616,
      "step": 32,
      "train/learning_rate_real": 2.4987696122806127e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2399.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2399.0,
      "epoch": 0.4520547945205479,
      "step": 33
    },
    {
      "epoch": 0.4520547945205479,
      "step": 33,
      "train/ce_loss": 9.9375,
      "train/diffusion_loss": 0.4693682789802551
    },
    {
      "epoch": 0.4520547945205479,
      "step": 33,
      "train/learning_rate_real": 2.498511282158305e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2620.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2620.0,
      "epoch": 0.4520547945205479,
      "step": 33
    },
    {
      "epoch": 0.4520547945205479,
      "step": 33,
      "train/ce_loss": 9.9375,
      "train/diffusion_loss": 0.39866846799850464
    },
    {
      "epoch": 0.4520547945205479,
      "step": 33,
      "train/learning_rate_real": 2.498511282158305e-05
    },
    {
      "debug/num_lat_loss": 1599.0,
      "debug/num_lat_total": 2706.0,
      "debug/num_tok_loss": 1599.0,
      "debug/num_tok_total": 2706.0,
      "epoch": 0.4520547945205479,
      "step": 33
    },
    {
      "epoch": 0.4520547945205479,
      "step": 33,
      "train/ce_loss": 10.0,
      "train/diffusion_loss": 0.3576337397098541
    },
    {
      "epoch": 0.4520547945205479,
      "step": 33,
      "train/learning_rate_real": 2.498511282158305e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 1977.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 1977.0,
      "epoch": 0.4520547945205479,
      "step": 33
    },
    {
      "epoch": 0.4520547945205479,
      "step": 33,
      "train/ce_loss": 9.75,
      "train/diffusion_loss": 0.5810636878013611
    },
    {
      "epoch": 0.4520547945205479,
      "step": 33,
      "train/learning_rate_real": 2.498511282158305e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2402.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2402.0,
      "epoch": 0.4657534246575342,
      "step": 34
    },
    {
      "epoch": 0.4657534246575342,
      "step": 34,
      "train/ce_loss": 9.6875,
      "train/diffusion_loss": 0.4539051651954651
    },
    {
      "epoch": 0.4657534246575342,
      "step": 34,
      "train/learning_rate_real": 2.4982283695959525e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2834.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2834.0,
      "epoch": 0.4657534246575342,
      "step": 34
    },
    {
      "epoch": 0.4657534246575342,
      "step": 34,
      "train/ce_loss": 9.6875,
      "train/diffusion_loss": 0.3914901614189148
    },
    {
      "epoch": 0.4657534246575342,
      "step": 34,
      "train/learning_rate_real": 2.4982283695959525e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 3059.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 3059.0,
      "epoch": 0.4657534246575342,
      "step": 34
    },
    {
      "epoch": 0.4657534246575342,
      "step": 34,
      "train/ce_loss": 9.8125,
      "train/diffusion_loss": 0.3305813670158386
    },
    {
      "epoch": 0.4657534246575342,
      "step": 34,
      "train/learning_rate_real": 2.4982283695959525e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2895.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2895.0,
      "epoch": 0.4657534246575342,
      "step": 34
    },
    {
      "epoch": 0.4657534246575342,
      "step": 34,
      "train/ce_loss": 9.75,
      "train/diffusion_loss": 0.3716033399105072
    },
    {
      "epoch": 0.4657534246575342,
      "step": 34,
      "train/learning_rate_real": 2.4982283695959525e-05
    },
    {
      "debug/num_lat_loss": 1817.0,
      "debug/num_lat_total": 3350.0,
      "debug/num_tok_loss": 1817.0,
      "debug/num_tok_total": 3350.0,
      "epoch": 0.4794520547945205,
      "step": 35
    },
    {
      "epoch": 0.4794520547945205,
      "step": 35,
      "train/ce_loss": 9.5625,
      "train/diffusion_loss": 0.27160805463790894
    },
    {
      "epoch": 0.4794520547945205,
      "step": 35,
      "train/learning_rate_real": 2.4979208801639335e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2438.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2438.0,
      "epoch": 0.4794520547945205,
      "step": 35
    },
    {
      "epoch": 0.4794520547945205,
      "step": 35,
      "train/ce_loss": 9.4375,
      "train/diffusion_loss": 0.48116302490234375
    },
    {
      "epoch": 0.4794520547945205,
      "step": 35,
      "train/learning_rate_real": 2.4979208801639335e-05
    },
    {
      "debug/num_lat_loss": 1652.0,
      "debug/num_lat_total": 2077.0,
      "debug/num_tok_loss": 1652.0,
      "debug/num_tok_total": 2077.0,
      "epoch": 0.4794520547945205,
      "step": 35
    },
    {
      "epoch": 0.4794520547945205,
      "step": 35,
      "train/ce_loss": 9.375,
      "train/diffusion_loss": 0.5048086643218994
    },
    {
      "epoch": 0.4794520547945205,
      "step": 35,
      "train/learning_rate_real": 2.4979208801639335e-05
    },
    {
      "debug/num_lat_loss": 1626.0,
      "debug/num_lat_total": 2331.0,
      "debug/num_tok_loss": 1626.0,
      "debug/num_tok_total": 2331.0,
      "epoch": 0.4794520547945205,
      "step": 35
    },
    {
      "epoch": 0.4794520547945205,
      "step": 35,
      "train/ce_loss": 9.5,
      "train/diffusion_loss": 0.43859153985977173
    },
    {
      "epoch": 0.4794520547945205,
      "step": 35,
      "train/learning_rate_real": 2.4979208801639335e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2455.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2455.0,
      "epoch": 0.4931506849315068,
      "step": 36
    },
    {
      "epoch": 0.4931506849315068,
      "step": 36,
      "train/ce_loss": 9.25,
      "train/diffusion_loss": 0.4600470960140228
    },
    {
      "epoch": 0.4931506849315068,
      "step": 36,
      "train/learning_rate_real": 2.497588819916531e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2444.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2444.0,
      "epoch": 0.4931506849315068,
      "step": 36
    },
    {
      "epoch": 0.4931506849315068,
      "step": 36,
      "train/ce_loss": 9.125,
      "train/diffusion_loss": 0.5146046876907349
    },
    {
      "epoch": 0.4931506849315068,
      "step": 36,
      "train/learning_rate_real": 2.497588819916531e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2006.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2006.0,
      "epoch": 0.4931506849315068,
      "step": 36
    },
    {
      "epoch": 0.4931506849315068,
      "step": 36,
      "train/ce_loss": 9.0,
      "train/diffusion_loss": 0.5873913168907166
    },
    {
      "epoch": 0.4931506849315068,
      "step": 36,
      "train/learning_rate_real": 2.497588819916531e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2867.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2867.0,
      "epoch": 0.4931506849315068,
      "step": 36
    },
    {
      "epoch": 0.4931506849315068,
      "step": 36,
      "train/ce_loss": 9.25,
      "train/diffusion_loss": 0.36510205268859863
    },
    {
      "epoch": 0.4931506849315068,
      "step": 36,
      "train/learning_rate_real": 2.497588819916531e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 2899.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 2899.0,
      "epoch": 0.5068493150684932,
      "step": 37
    },
    {
      "epoch": 0.5068493150684932,
      "step": 37,
      "train/ce_loss": 9.0,
      "train/diffusion_loss": 0.3962245285511017
    },
    {
      "epoch": 0.5068493150684932,
      "step": 37,
      "train/learning_rate_real": 2.4972321953918126e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2637.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2637.0,
      "epoch": 0.5068493150684932,
      "step": 37
    },
    {
      "epoch": 0.5068493150684932,
      "step": 37,
      "train/ce_loss": 8.9375,
      "train/diffusion_loss": 0.41625067591667175
    },
    {
      "epoch": 0.5068493150684932,
      "step": 37,
      "train/learning_rate_real": 2.4972321953918126e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2415.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2415.0,
      "epoch": 0.5068493150684932,
      "step": 37
    },
    {
      "epoch": 0.5068493150684932,
      "step": 37,
      "train/ce_loss": 8.875,
      "train/diffusion_loss": 0.45504698157310486
    },
    {
      "epoch": 0.5068493150684932,
      "step": 37,
      "train/learning_rate_real": 2.4972321953918126e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2875.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2875.0,
      "epoch": 0.5068493150684932,
      "step": 37
    },
    {
      "epoch": 0.5068493150684932,
      "step": 37,
      "train/ce_loss": 8.9375,
      "train/diffusion_loss": 0.3914295434951782
    },
    {
      "epoch": 0.5068493150684932,
      "step": 37,
      "train/learning_rate_real": 2.4972321953918126e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2664.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2664.0,
      "epoch": 0.5205479452054794,
      "step": 38
    },
    {
      "epoch": 0.5205479452054794,
      "step": 38,
      "train/ce_loss": 8.625,
      "train/diffusion_loss": 0.4318960905075073
    },
    {
      "epoch": 0.5205479452054794,
      "step": 38,
      "train/learning_rate_real": 2.496851013611502e-05
    },
    {
      "debug/num_lat_loss": 1677.0,
      "debug/num_lat_total": 2958.0,
      "debug/num_tok_loss": 1677.0,
      "debug/num_tok_total": 2958.0,
      "epoch": 0.5205479452054794,
      "step": 38
    },
    {
      "epoch": 0.5205479452054794,
      "step": 38,
      "train/ce_loss": 8.8125,
      "train/diffusion_loss": 0.31698915362358093
    },
    {
      "epoch": 0.5205479452054794,
      "step": 38,
      "train/learning_rate_real": 2.496851013611502e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2453.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2453.0,
      "epoch": 0.5205479452054794,
      "step": 38
    },
    {
      "epoch": 0.5205479452054794,
      "step": 38,
      "train/ce_loss": 8.5625,
      "train/diffusion_loss": 0.481392502784729
    },
    {
      "epoch": 0.5205479452054794,
      "step": 38,
      "train/learning_rate_real": 2.496851013611502e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2440.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2440.0,
      "epoch": 0.5205479452054794,
      "step": 38
    },
    {
      "epoch": 0.5205479452054794,
      "step": 38,
      "train/ce_loss": 8.6875,
      "train/diffusion_loss": 0.46959739923477173
    },
    {
      "epoch": 0.5205479452054794,
      "step": 38,
      "train/learning_rate_real": 2.496851013611502e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 3080.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 3080.0,
      "epoch": 0.5342465753424658,
      "step": 39
    },
    {
      "epoch": 0.5342465753424658,
      "step": 39,
      "train/ce_loss": 8.5,
      "train/diffusion_loss": 0.35770803689956665
    },
    {
      "epoch": 0.5342465753424658,
      "step": 39,
      "train/learning_rate_real": 2.4964452820808397e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2442.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2442.0,
      "epoch": 0.5342465753424658,
      "step": 39
    },
    {
      "epoch": 0.5342465753424658,
      "step": 39,
      "train/ce_loss": 8.3125,
      "train/diffusion_loss": 0.4941246509552002
    },
    {
      "epoch": 0.5342465753424658,
      "step": 39,
      "train/learning_rate_real": 2.4964452820808397e-05
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 2457.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 2457.0,
      "epoch": 0.5342465753424658,
      "step": 39
    },
    {
      "epoch": 0.5342465753424658,
      "step": 39,
      "train/ce_loss": 8.375,
      "train/diffusion_loss": 0.4619571268558502
    },
    {
      "epoch": 0.5342465753424658,
      "step": 39,
      "train/learning_rate_real": 2.4964452820808397e-05
    },
    {
      "debug/num_lat_loss": 1661.0,
      "debug/num_lat_total": 2303.0,
      "debug/num_tok_loss": 1661.0,
      "debug/num_tok_total": 2303.0,
      "epoch": 0.5342465753424658,
      "step": 39
    },
    {
      "epoch": 0.5342465753424658,
      "step": 39,
      "train/ce_loss": 8.375,
      "train/diffusion_loss": 0.4603547155857086
    },
    {
      "epoch": 0.5342465753424658,
      "step": 39,
      "train/learning_rate_real": 2.4964452820808397e-05
    },
    {
      "epoch": 0.547945205479452,
      "grad_norm": 2.600710391998291,
      "learning_rate": 2.4964452820808397e-05,
      "loss": 3.9661,
      "step": 40
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2843.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2843.0,
      "epoch": 0.547945205479452,
      "step": 40
    },
    {
      "epoch": 0.547945205479452,
      "step": 40,
      "train/ce_loss": 8.125,
      "train/diffusion_loss": 0.37036633491516113
    },
    {
      "epoch": 0.547945205479452,
      "step": 40,
      "train/learning_rate_real": 2.4960150087884376e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2626.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2626.0,
      "epoch": 0.547945205479452,
      "step": 40
    },
    {
      "epoch": 0.547945205479452,
      "step": 40,
      "train/ce_loss": 8.125,
      "train/diffusion_loss": 0.4317276179790497
    },
    {
      "epoch": 0.547945205479452,
      "step": 40,
      "train/learning_rate_real": 2.4960150087884376e-05
    },
    {
      "debug/num_lat_loss": 1606.0,
      "debug/num_lat_total": 2725.0,
      "debug/num_tok_loss": 1606.0,
      "debug/num_tok_total": 2725.0,
      "epoch": 0.547945205479452,
      "step": 40
    },
    {
      "epoch": 0.547945205479452,
      "step": 40,
      "train/ce_loss": 8.1875,
      "train/diffusion_loss": 0.33192673325538635
    },
    {
      "epoch": 0.547945205479452,
      "step": 40,
      "train/learning_rate_real": 2.4960150087884376e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2447.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2447.0,
      "epoch": 0.547945205479452,
      "step": 40
    },
    {
      "epoch": 0.547945205479452,
      "step": 40,
      "train/ce_loss": 8.1875,
      "train/diffusion_loss": 0.4681604206562042
    },
    {
      "epoch": 0.547945205479452,
      "step": 40,
      "train/learning_rate_real": 2.4960150087884376e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2449.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2449.0,
      "epoch": 0.5616438356164384,
      "step": 41
    },
    {
      "epoch": 0.5616438356164384,
      "step": 41,
      "train/ce_loss": 7.8125,
      "train/diffusion_loss": 0.450018048286438
    },
    {
      "epoch": 0.5616438356164384,
      "step": 41,
      "train/learning_rate_real": 2.495560202206119e-05
    },
    {
      "debug/num_lat_loss": 1818.0,
      "debug/num_lat_total": 2474.0,
      "debug/num_tok_loss": 1818.0,
      "debug/num_tok_total": 2474.0,
      "epoch": 0.5616438356164384,
      "step": 41
    },
    {
      "epoch": 0.5616438356164384,
      "step": 41,
      "train/ce_loss": 7.84375,
      "train/diffusion_loss": 0.4612468481063843
    },
    {
      "epoch": 0.5616438356164384,
      "step": 41,
      "train/learning_rate_real": 2.495560202206119e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2614.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2614.0,
      "epoch": 0.5616438356164384,
      "step": 41
    },
    {
      "epoch": 0.5616438356164384,
      "step": 41,
      "train/ce_loss": 7.90625,
      "train/diffusion_loss": 0.4192771017551422
    },
    {
      "epoch": 0.5616438356164384,
      "step": 41,
      "train/learning_rate_real": 2.495560202206119e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2437.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2437.0,
      "epoch": 0.5616438356164384,
      "step": 41
    },
    {
      "epoch": 0.5616438356164384,
      "step": 41,
      "train/ce_loss": 7.8125,
      "train/diffusion_loss": 0.47766613960266113
    },
    {
      "epoch": 0.5616438356164384,
      "step": 41,
      "train/learning_rate_real": 2.495560202206119e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2647.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2647.0,
      "epoch": 0.5753424657534246,
      "step": 42
    },
    {
      "epoch": 0.5753424657534246,
      "step": 42,
      "train/ce_loss": 7.53125,
      "train/diffusion_loss": 0.4023301303386688
    },
    {
      "epoch": 0.5753424657534246,
      "step": 42,
      "train/learning_rate_real": 2.4950808712887533e-05
    },
    {
      "debug/num_lat_loss": 1758.0,
      "debug/num_lat_total": 3016.0,
      "debug/num_tok_loss": 1758.0,
      "debug/num_tok_total": 3016.0,
      "epoch": 0.5753424657534246,
      "step": 42
    },
    {
      "epoch": 0.5753424657534246,
      "step": 42,
      "train/ce_loss": 7.59375,
      "train/diffusion_loss": 0.360873818397522
    },
    {
      "epoch": 0.5753424657534246,
      "step": 42,
      "train/learning_rate_real": 2.4950808712887533e-05
    },
    {
      "debug/num_lat_loss": 1818.0,
      "debug/num_lat_total": 2704.0,
      "debug/num_tok_loss": 1818.0,
      "debug/num_tok_total": 2704.0,
      "epoch": 0.5753424657534246,
      "step": 42
    },
    {
      "epoch": 0.5753424657534246,
      "step": 42,
      "train/ce_loss": 7.53125,
      "train/diffusion_loss": 0.42654749751091003
    },
    {
      "epoch": 0.5753424657534246,
      "step": 42,
      "train/learning_rate_real": 2.4950808712887533e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2207.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2207.0,
      "epoch": 0.5753424657534246,
      "step": 42
    },
    {
      "epoch": 0.5753424657534246,
      "step": 42,
      "train/ce_loss": 7.71875,
      "train/diffusion_loss": 0.5486947894096375
    },
    {
      "epoch": 0.5753424657534246,
      "step": 42,
      "train/learning_rate_real": 2.4950808712887533e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2862.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2862.0,
      "epoch": 0.589041095890411,
      "step": 43
    },
    {
      "epoch": 0.589041095890411,
      "step": 43,
      "train/ce_loss": 7.65625,
      "train/diffusion_loss": 0.3878563344478607
    },
    {
      "epoch": 0.589041095890411,
      "step": 43,
      "train/learning_rate_real": 2.4945770254740794e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2436.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2436.0,
      "epoch": 0.589041095890411,
      "step": 43
    },
    {
      "epoch": 0.589041095890411,
      "step": 43,
      "train/ce_loss": 7.34375,
      "train/diffusion_loss": 0.4670861065387726
    },
    {
      "epoch": 0.589041095890411,
      "step": 43,
      "train/learning_rate_real": 2.4945770254740794e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2687.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2687.0,
      "epoch": 0.589041095890411,
      "step": 43
    },
    {
      "epoch": 0.589041095890411,
      "step": 43,
      "train/ce_loss": 7.3125,
      "train/diffusion_loss": 0.4248601496219635
    },
    {
      "epoch": 0.589041095890411,
      "step": 43,
      "train/learning_rate_real": 2.4945770254740794e-05
    },
    {
      "debug/num_lat_loss": 1754.0,
      "debug/num_lat_total": 2381.0,
      "debug/num_tok_loss": 1754.0,
      "debug/num_tok_total": 2381.0,
      "epoch": 0.589041095890411,
      "step": 43
    },
    {
      "epoch": 0.589041095890411,
      "step": 43,
      "train/ce_loss": 7.28125,
      "train/diffusion_loss": 0.49052563309669495
    },
    {
      "epoch": 0.589041095890411,
      "step": 43,
      "train/learning_rate_real": 2.4945770254740794e-05
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 2239.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 2239.0,
      "epoch": 0.6027397260273972,
      "step": 44
    },
    {
      "epoch": 0.6027397260273972,
      "step": 44,
      "train/ce_loss": 7.03125,
      "train/diffusion_loss": 0.5206171870231628
    },
    {
      "epoch": 0.6027397260273972,
      "step": 44,
      "train/learning_rate_real": 2.4940486746825197e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 3057.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 3057.0,
      "epoch": 0.6027397260273972,
      "step": 44
    },
    {
      "epoch": 0.6027397260273972,
      "step": 44,
      "train/ce_loss": 7.21875,
      "train/diffusion_loss": 0.3397883474826813
    },
    {
      "epoch": 0.6027397260273972,
      "step": 44,
      "train/learning_rate_real": 2.4940486746825197e-05
    },
    {
      "debug/num_lat_loss": 1652.0,
      "debug/num_lat_total": 2164.0,
      "debug/num_tok_loss": 1652.0,
      "debug/num_tok_total": 2164.0,
      "epoch": 0.6027397260273972,
      "step": 44
    },
    {
      "epoch": 0.6027397260273972,
      "step": 44,
      "train/ce_loss": 7.09375,
      "train/diffusion_loss": 0.4852142930030823
    },
    {
      "epoch": 0.6027397260273972,
      "step": 44,
      "train/learning_rate_real": 2.4940486746825197e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 3095.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 3095.0,
      "epoch": 0.6027397260273972,
      "step": 44
    },
    {
      "epoch": 0.6027397260273972,
      "step": 44,
      "train/ce_loss": 7.15625,
      "train/diffusion_loss": 0.33311718702316284
    },
    {
      "epoch": 0.6027397260273972,
      "step": 44,
      "train/learning_rate_real": 2.4940486746825197e-05
    },
    {
      "debug/num_lat_loss": 1700.0,
      "debug/num_lat_total": 2680.0,
      "debug/num_tok_loss": 1700.0,
      "debug/num_tok_total": 2680.0,
      "epoch": 0.6164383561643836,
      "step": 45
    },
    {
      "epoch": 0.6164383561643836,
      "step": 45,
      "train/ce_loss": 7.03125,
      "train/diffusion_loss": 0.3979421555995941
    },
    {
      "epoch": 0.6164383561643836,
      "step": 45,
      "train/learning_rate_real": 2.493495829316986e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2441.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2441.0,
      "epoch": 0.6164383561643836,
      "step": 45
    },
    {
      "epoch": 0.6164383561643836,
      "step": 45,
      "train/ce_loss": 6.90625,
      "train/diffusion_loss": 0.47810766100883484
    },
    {
      "epoch": 0.6164383561643836,
      "step": 45,
      "train/learning_rate_real": 2.493495829316986e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2632.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2632.0,
      "epoch": 0.6164383561643836,
      "step": 45
    },
    {
      "epoch": 0.6164383561643836,
      "step": 45,
      "train/ce_loss": 6.90625,
      "train/diffusion_loss": 0.4057077169418335
    },
    {
      "epoch": 0.6164383561643836,
      "step": 45,
      "train/learning_rate_real": 2.493495829316986e-05
    },
    {
      "debug/num_lat_loss": 1716.0,
      "debug/num_lat_total": 2506.0,
      "debug/num_tok_loss": 1716.0,
      "debug/num_tok_total": 2506.0,
      "epoch": 0.6164383561643836,
      "step": 45
    },
    {
      "epoch": 0.6164383561643836,
      "step": 45,
      "train/ce_loss": 7.03125,
      "train/diffusion_loss": 0.4449847936630249
    },
    {
      "epoch": 0.6164383561643836,
      "step": 45,
      "train/learning_rate_real": 2.493495829316986e-05
    },
    {
      "debug/num_lat_loss": 1629.0,
      "debug/num_lat_total": 2764.0,
      "debug/num_tok_loss": 1629.0,
      "debug/num_tok_total": 2764.0,
      "epoch": 0.6301369863013698,
      "step": 46
    },
    {
      "epoch": 0.6301369863013698,
      "step": 46,
      "train/ce_loss": 6.84375,
      "train/diffusion_loss": 0.3362199664115906
    },
    {
      "epoch": 0.6301369863013698,
      "step": 46,
      "train/learning_rate_real": 2.4929185002626714e-05
    },
    {
      "debug/num_lat_loss": 1749.0,
      "debug/num_lat_total": 2588.0,
      "debug/num_tok_loss": 1749.0,
      "debug/num_tok_total": 2588.0,
      "epoch": 0.6301369863013698,
      "step": 46
    },
    {
      "epoch": 0.6301369863013698,
      "step": 46,
      "train/ce_loss": 6.78125,
      "train/diffusion_loss": 0.447072297334671
    },
    {
      "epoch": 0.6301369863013698,
      "step": 46,
      "train/learning_rate_real": 2.4929185002626714e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2666.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2666.0,
      "epoch": 0.6301369863013698,
      "step": 46
    },
    {
      "epoch": 0.6301369863013698,
      "step": 46,
      "train/ce_loss": 6.71875,
      "train/diffusion_loss": 0.4139653742313385
    },
    {
      "epoch": 0.6301369863013698,
      "step": 46,
      "train/learning_rate_real": 2.4929185002626714e-05
    },
    {
      "debug/num_lat_loss": 1716.0,
      "debug/num_lat_total": 2716.0,
      "debug/num_tok_loss": 1716.0,
      "debug/num_tok_total": 2716.0,
      "epoch": 0.6301369863013698,
      "step": 46
    },
    {
      "epoch": 0.6301369863013698,
      "step": 46,
      "train/ce_loss": 6.875,
      "train/diffusion_loss": 0.3880141079425812
    },
    {
      "epoch": 0.6301369863013698,
      "step": 46,
      "train/learning_rate_real": 2.4929185002626714e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2618.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2618.0,
      "epoch": 0.6438356164383562,
      "step": 47
    },
    {
      "epoch": 0.6438356164383562,
      "step": 47,
      "train/ce_loss": 6.5,
      "train/diffusion_loss": 0.4298379421234131
    },
    {
      "epoch": 0.6438356164383562,
      "step": 47,
      "train/learning_rate_real": 2.4923166988868407e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 3106.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 3106.0,
      "epoch": 0.6438356164383562,
      "step": 47
    },
    {
      "epoch": 0.6438356164383562,
      "step": 47,
      "train/ce_loss": 6.59375,
      "train/diffusion_loss": 0.3285239040851593
    },
    {
      "epoch": 0.6438356164383562,
      "step": 47,
      "train/learning_rate_real": 2.4923166988868407e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2233.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2233.0,
      "epoch": 0.6438356164383562,
      "step": 47
    },
    {
      "epoch": 0.6438356164383562,
      "step": 47,
      "train/ce_loss": 6.5625,
      "train/diffusion_loss": 0.5352428555488586
    },
    {
      "epoch": 0.6438356164383562,
      "step": 47,
      "train/learning_rate_real": 2.4923166988868407e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2406.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2406.0,
      "epoch": 0.6438356164383562,
      "step": 47
    },
    {
      "epoch": 0.6438356164383562,
      "step": 47,
      "train/ce_loss": 6.84375,
      "train/diffusion_loss": 0.45154592394828796
    },
    {
      "epoch": 0.6438356164383562,
      "step": 47,
      "train/learning_rate_real": 2.4923166988868407e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2466.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2466.0,
      "epoch": 0.6575342465753424,
      "step": 48
    },
    {
      "epoch": 0.6575342465753424,
      "step": 48,
      "train/ce_loss": 6.4375,
      "train/diffusion_loss": 0.4415627121925354
    },
    {
      "epoch": 0.6575342465753424,
      "step": 48,
      "train/learning_rate_real": 2.491690437038602e-05
    },
    {
      "debug/num_lat_loss": 1604.0,
      "debug/num_lat_total": 3149.0,
      "debug/num_tok_loss": 1604.0,
      "debug/num_tok_total": 3149.0,
      "epoch": 0.6575342465753424,
      "step": 48
    },
    {
      "epoch": 0.6575342465753424,
      "step": 48,
      "train/ce_loss": 6.5625,
      "train/diffusion_loss": 0.24725854396820068
    },
    {
      "epoch": 0.6575342465753424,
      "step": 48,
      "train/learning_rate_real": 2.491690437038602e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2419.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2419.0,
      "epoch": 0.6575342465753424,
      "step": 48
    },
    {
      "epoch": 0.6575342465753424,
      "step": 48,
      "train/ce_loss": 6.40625,
      "train/diffusion_loss": 0.4734286665916443
    },
    {
      "epoch": 0.6575342465753424,
      "step": 48,
      "train/learning_rate_real": 2.491690437038602e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 3086.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 3086.0,
      "epoch": 0.6575342465753424,
      "step": 48
    },
    {
      "epoch": 0.6575342465753424,
      "step": 48,
      "train/ce_loss": 6.46875,
      "train/diffusion_loss": 0.32624179124832153
    },
    {
      "epoch": 0.6575342465753424,
      "step": 48,
      "train/learning_rate_real": 2.491690437038602e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2873.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2873.0,
      "epoch": 0.6712328767123288,
      "step": 49
    },
    {
      "epoch": 0.6712328767123288,
      "step": 49,
      "train/ce_loss": 6.25,
      "train/diffusion_loss": 0.38346582651138306
    },
    {
      "epoch": 0.6712328767123288,
      "step": 49,
      "train/learning_rate_real": 2.491039727048677e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2609.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2609.0,
      "epoch": 0.6712328767123288,
      "step": 49
    },
    {
      "epoch": 0.6712328767123288,
      "step": 49,
      "train/ce_loss": 6.25,
      "train/diffusion_loss": 0.4216737449169159
    },
    {
      "epoch": 0.6712328767123288,
      "step": 49,
      "train/learning_rate_real": 2.491039727048677e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2620.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2620.0,
      "epoch": 0.6712328767123288,
      "step": 49
    },
    {
      "epoch": 0.6712328767123288,
      "step": 49,
      "train/ce_loss": 6.25,
      "train/diffusion_loss": 0.4077589511871338
    },
    {
      "epoch": 0.6712328767123288,
      "step": 49,
      "train/learning_rate_real": 2.491039727048677e-05
    },
    {
      "debug/num_lat_loss": 1635.0,
      "debug/num_lat_total": 2331.0,
      "debug/num_tok_loss": 1635.0,
      "debug/num_tok_total": 2331.0,
      "epoch": 0.6712328767123288,
      "step": 49
    },
    {
      "epoch": 0.6712328767123288,
      "step": 49,
      "train/ce_loss": 6.28125,
      "train/diffusion_loss": 0.43508994579315186
    },
    {
      "epoch": 0.6712328767123288,
      "step": 49,
      "train/learning_rate_real": 2.491039727048677e-05
    },
    {
      "epoch": 0.684931506849315,
      "grad_norm": 2.2437052726745605,
      "learning_rate": 2.491039727048677e-05,
      "loss": 3.4905,
      "step": 50
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 3297.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 3297.0,
      "epoch": 0.684931506849315,
      "step": 50
    },
    {
      "epoch": 0.684931506849315,
      "step": 50,
      "train/ce_loss": 6.15625,
      "train/diffusion_loss": 0.3150237500667572
    },
    {
      "epoch": 0.684931506849315,
      "step": 50,
      "train/learning_rate_real": 2.490364581729156e-05
    },
    {
      "debug/num_lat_loss": 1631.0,
      "debug/num_lat_total": 2550.0,
      "debug/num_tok_loss": 1631.0,
      "debug/num_tok_total": 2550.0,
      "epoch": 0.684931506849315,
      "step": 50
    },
    {
      "epoch": 0.684931506849315,
      "step": 50,
      "train/ce_loss": 6.21875,
      "train/diffusion_loss": 0.39398685097694397
    },
    {
      "epoch": 0.684931506849315,
      "step": 50,
      "train/learning_rate_real": 2.490364581729156e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2623.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2623.0,
      "epoch": 0.684931506849315,
      "step": 50
    },
    {
      "epoch": 0.684931506849315,
      "step": 50,
      "train/ce_loss": 6.03125,
      "train/diffusion_loss": 0.41709935665130615
    },
    {
      "epoch": 0.684931506849315,
      "step": 50,
      "train/learning_rate_real": 2.490364581729156e-05
    },
    {
      "debug/num_lat_loss": 1681.0,
      "debug/num_lat_total": 2113.0,
      "debug/num_tok_loss": 1681.0,
      "debug/num_tok_total": 2113.0,
      "epoch": 0.684931506849315,
      "step": 50
    },
    {
      "epoch": 0.684931506849315,
      "step": 50,
      "train/ce_loss": 6.03125,
      "train/diffusion_loss": 0.5213543772697449
    },
    {
      "epoch": 0.684931506849315,
      "step": 50,
      "train/learning_rate_real": 2.490364581729156e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2661.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2661.0,
      "epoch": 0.6986301369863014,
      "step": 51
    },
    {
      "epoch": 0.6986301369863014,
      "step": 51,
      "train/ce_loss": 5.9375,
      "train/diffusion_loss": 0.4208451509475708
    },
    {
      "epoch": 0.6986301369863014,
      "step": 51,
      "train/learning_rate_real": 2.4896650143732457e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2217.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2217.0,
      "epoch": 0.6986301369863014,
      "step": 51
    },
    {
      "epoch": 0.6986301369863014,
      "step": 51,
      "train/ce_loss": 6.15625,
      "train/diffusion_loss": 0.5214932560920715
    },
    {
      "epoch": 0.6986301369863014,
      "step": 51,
      "train/learning_rate_real": 2.4896650143732457e-05
    },
    {
      "debug/num_lat_loss": 1747.0,
      "debug/num_lat_total": 2379.0,
      "debug/num_tok_loss": 1747.0,
      "debug/num_tok_total": 2379.0,
      "epoch": 0.6986301369863014,
      "step": 51
    },
    {
      "epoch": 0.6986301369863014,
      "step": 51,
      "train/ce_loss": 6.0,
      "train/diffusion_loss": 0.4774201214313507
    },
    {
      "epoch": 0.6986301369863014,
      "step": 51,
      "train/learning_rate_real": 2.4896650143732457e-05
    },
    {
      "debug/num_lat_loss": 1621.0,
      "debug/num_lat_total": 2274.0,
      "debug/num_tok_loss": 1621.0,
      "debug/num_tok_total": 2274.0,
      "epoch": 0.6986301369863014,
      "step": 51
    },
    {
      "epoch": 0.6986301369863014,
      "step": 51,
      "train/ce_loss": 6.09375,
      "train/diffusion_loss": 0.44967830181121826
    },
    {
      "epoch": 0.6986301369863014,
      "step": 51,
      "train/learning_rate_real": 2.4896650143732457e-05
    },
    {
      "debug/num_lat_loss": 1820.0,
      "debug/num_lat_total": 2915.0,
      "debug/num_tok_loss": 1820.0,
      "debug/num_tok_total": 2915.0,
      "epoch": 0.7123287671232876,
      "step": 52
    },
    {
      "epoch": 0.7123287671232876,
      "step": 52,
      "train/ce_loss": 5.8125,
      "train/diffusion_loss": 0.35934463143348694
    },
    {
      "epoch": 0.7123287671232876,
      "step": 52,
      "train/learning_rate_real": 2.4889410387550093e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2635.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2635.0,
      "epoch": 0.7123287671232876,
      "step": 52
    },
    {
      "epoch": 0.7123287671232876,
      "step": 52,
      "train/ce_loss": 5.75,
      "train/diffusion_loss": 0.4354902505874634
    },
    {
      "epoch": 0.7123287671232876,
      "step": 52,
      "train/learning_rate_real": 2.4889410387550093e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2865.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2865.0,
      "epoch": 0.7123287671232876,
      "step": 52
    },
    {
      "epoch": 0.7123287671232876,
      "step": 52,
      "train/ce_loss": 5.78125,
      "train/diffusion_loss": 0.3834609389305115
    },
    {
      "epoch": 0.7123287671232876,
      "step": 52,
      "train/learning_rate_real": 2.4889410387550093e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2840.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2840.0,
      "epoch": 0.7123287671232876,
      "step": 52
    },
    {
      "epoch": 0.7123287671232876,
      "step": 52,
      "train/ce_loss": 5.90625,
      "train/diffusion_loss": 0.3739621043205261
    },
    {
      "epoch": 0.7123287671232876,
      "step": 52,
      "train/learning_rate_real": 2.4889410387550093e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2414.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2414.0,
      "epoch": 0.726027397260274,
      "step": 53
    },
    {
      "epoch": 0.726027397260274,
      "step": 53,
      "train/ce_loss": 5.96875,
      "train/diffusion_loss": 0.4601896107196808
    },
    {
      "epoch": 0.726027397260274,
      "step": 53,
      "train/learning_rate_real": 2.488192669129093e-05
    },
    {
      "debug/num_lat_loss": 1807.0,
      "debug/num_lat_total": 2902.0,
      "debug/num_tok_loss": 1807.0,
      "debug/num_tok_total": 2902.0,
      "epoch": 0.726027397260274,
      "step": 53
    },
    {
      "epoch": 0.726027397260274,
      "step": 53,
      "train/ce_loss": 6.0,
      "train/diffusion_loss": 0.3668011426925659
    },
    {
      "epoch": 0.726027397260274,
      "step": 53,
      "train/learning_rate_real": 2.488192669129093e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 3089.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 3089.0,
      "epoch": 0.726027397260274,
      "step": 53
    },
    {
      "epoch": 0.726027397260274,
      "step": 53,
      "train/ce_loss": 5.78125,
      "train/diffusion_loss": 0.3401413857936859
    },
    {
      "epoch": 0.726027397260274,
      "step": 53,
      "train/learning_rate_real": 2.488192669129093e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 3095.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 3095.0,
      "epoch": 0.726027397260274,
      "step": 53
    },
    {
      "epoch": 0.726027397260274,
      "step": 53,
      "train/ce_loss": 6.03125,
      "train/diffusion_loss": 0.33927619457244873
    },
    {
      "epoch": 0.726027397260274,
      "step": 53,
      "train/learning_rate_real": 2.488192669129093e-05
    },
    {
      "debug/num_lat_loss": 1756.0,
      "debug/num_lat_total": 2812.0,
      "debug/num_tok_loss": 1756.0,
      "debug/num_tok_total": 2812.0,
      "epoch": 0.7397260273972602,
      "step": 54
    },
    {
      "epoch": 0.7397260273972602,
      "step": 54,
      "train/ce_loss": 5.90625,
      "train/diffusion_loss": 0.39091378450393677
    },
    {
      "epoch": 0.7397260273972602,
      "step": 54,
      "train/learning_rate_real": 2.4874199202304475e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2416.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2416.0,
      "epoch": 0.7397260273972602,
      "step": 54
    },
    {
      "epoch": 0.7397260273972602,
      "step": 54,
      "train/ce_loss": 5.90625,
      "train/diffusion_loss": 0.4684540927410126
    },
    {
      "epoch": 0.7397260273972602,
      "step": 54,
      "train/learning_rate_real": 2.4874199202304475e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2662.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2662.0,
      "epoch": 0.7397260273972602,
      "step": 54
    },
    {
      "epoch": 0.7397260273972602,
      "step": 54,
      "train/ce_loss": 5.875,
      "train/diffusion_loss": 0.43764838576316833
    },
    {
      "epoch": 0.7397260273972602,
      "step": 54,
      "train/learning_rate_real": 2.4874199202304475e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2662.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2662.0,
      "epoch": 0.7397260273972602,
      "step": 54
    },
    {
      "epoch": 0.7397260273972602,
      "step": 54,
      "train/ce_loss": 5.9375,
      "train/diffusion_loss": 0.4463954269886017
    },
    {
      "epoch": 0.7397260273972602,
      "step": 54,
      "train/learning_rate_real": 2.4874199202304475e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2869.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2869.0,
      "epoch": 0.7534246575342466,
      "step": 55
    },
    {
      "epoch": 0.7534246575342466,
      "step": 55,
      "train/ce_loss": 5.65625,
      "train/diffusion_loss": 0.3489157557487488
    },
    {
      "epoch": 0.7534246575342466,
      "step": 55,
      "train/learning_rate_real": 2.486622807274036e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2442.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2442.0,
      "epoch": 0.7534246575342466,
      "step": 55
    },
    {
      "epoch": 0.7534246575342466,
      "step": 55,
      "train/ce_loss": 5.9375,
      "train/diffusion_loss": 0.4745232164859772
    },
    {
      "epoch": 0.7534246575342466,
      "step": 55,
      "train/learning_rate_real": 2.486622807274036e-05
    },
    {
      "debug/num_lat_loss": 1695.0,
      "debug/num_lat_total": 2697.0,
      "debug/num_tok_loss": 1695.0,
      "debug/num_tok_total": 2697.0,
      "epoch": 0.7534246575342466,
      "step": 55
    },
    {
      "epoch": 0.7534246575342466,
      "step": 55,
      "train/ce_loss": 5.53125,
      "train/diffusion_loss": 0.3809465765953064
    },
    {
      "epoch": 0.7534246575342466,
      "step": 55,
      "train/learning_rate_real": 2.486622807274036e-05
    },
    {
      "debug/num_lat_loss": 1561.0,
      "debug/num_lat_total": 1777.0,
      "debug/num_tok_loss": 1561.0,
      "debug/num_tok_total": 1777.0,
      "epoch": 0.7534246575342466,
      "step": 55
    },
    {
      "epoch": 0.7534246575342466,
      "step": 55,
      "train/ce_loss": 5.6875,
      "train/diffusion_loss": 0.5590707063674927
    },
    {
      "epoch": 0.7534246575342466,
      "step": 55,
      "train/learning_rate_real": 2.486622807274036e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 3064.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 3064.0,
      "epoch": 0.7671232876712328,
      "step": 56
    },
    {
      "epoch": 0.7671232876712328,
      "step": 56,
      "train/ce_loss": 5.59375,
      "train/diffusion_loss": 0.3314301371574402
    },
    {
      "epoch": 0.7671232876712328,
      "step": 56,
      "train/learning_rate_real": 2.4858013459545352e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2649.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2649.0,
      "epoch": 0.7671232876712328,
      "step": 56
    },
    {
      "epoch": 0.7671232876712328,
      "step": 56,
      "train/ce_loss": 5.4375,
      "train/diffusion_loss": 0.3836974501609802
    },
    {
      "epoch": 0.7671232876712328,
      "step": 56,
      "train/learning_rate_real": 2.4858013459545352e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2845.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2845.0,
      "epoch": 0.7671232876712328,
      "step": 56
    },
    {
      "epoch": 0.7671232876712328,
      "step": 56,
      "train/ce_loss": 5.625,
      "train/diffusion_loss": 0.382179856300354
    },
    {
      "epoch": 0.7671232876712328,
      "step": 56,
      "train/learning_rate_real": 2.4858013459545352e-05
    },
    {
      "debug/num_lat_loss": 1808.0,
      "debug/num_lat_total": 3123.0,
      "debug/num_tok_loss": 1808.0,
      "debug/num_tok_total": 3123.0,
      "epoch": 0.7671232876712328,
      "step": 56
    },
    {
      "epoch": 0.7671232876712328,
      "step": 56,
      "train/ce_loss": 5.625,
      "train/diffusion_loss": 0.34347909688949585
    },
    {
      "epoch": 0.7671232876712328,
      "step": 56,
      "train/learning_rate_real": 2.4858013459545352e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2622.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2622.0,
      "epoch": 0.7808219178082192,
      "step": 57
    },
    {
      "epoch": 0.7808219178082192,
      "step": 57,
      "train/ce_loss": 5.9375,
      "train/diffusion_loss": 0.4226444661617279
    },
    {
      "epoch": 0.7808219178082192,
      "step": 57,
      "train/learning_rate_real": 2.4849555524460277e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 3032.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 3032.0,
      "epoch": 0.7808219178082192,
      "step": 57
    },
    {
      "epoch": 0.7808219178082192,
      "step": 57,
      "train/ce_loss": 5.5625,
      "train/diffusion_loss": 0.31934428215026855
    },
    {
      "epoch": 0.7808219178082192,
      "step": 57,
      "train/learning_rate_real": 2.4849555524460277e-05
    },
    {
      "debug/num_lat_loss": 1709.0,
      "debug/num_lat_total": 2503.0,
      "debug/num_tok_loss": 1709.0,
      "debug/num_tok_total": 2503.0,
      "epoch": 0.7808219178082192,
      "step": 57
    },
    {
      "epoch": 0.7808219178082192,
      "step": 57,
      "train/ce_loss": 5.625,
      "train/diffusion_loss": 0.4243476390838623
    },
    {
      "epoch": 0.7808219178082192,
      "step": 57,
      "train/learning_rate_real": 2.4849555524460277e-05
    },
    {
      "debug/num_lat_loss": 1664.0,
      "debug/num_lat_total": 2298.0,
      "debug/num_tok_loss": 1664.0,
      "debug/num_tok_total": 2298.0,
      "epoch": 0.7808219178082192,
      "step": 57
    },
    {
      "epoch": 0.7808219178082192,
      "step": 57,
      "train/ce_loss": 5.4375,
      "train/diffusion_loss": 0.46369606256484985
    },
    {
      "epoch": 0.7808219178082192,
      "step": 57,
      "train/learning_rate_real": 2.4849555524460277e-05
    },
    {
      "debug/num_lat_loss": 1589.0,
      "debug/num_lat_total": 2694.0,
      "debug/num_tok_loss": 1589.0,
      "debug/num_tok_total": 2694.0,
      "epoch": 0.7945205479452054,
      "step": 58
    },
    {
      "epoch": 0.7945205479452054,
      "step": 58,
      "train/ce_loss": 5.71875,
      "train/diffusion_loss": 0.33193787932395935
    },
    {
      "epoch": 0.7945205479452054,
      "step": 58,
      "train/learning_rate_real": 2.4840854434016808e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2214.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2214.0,
      "epoch": 0.7945205479452054,
      "step": 58
    },
    {
      "epoch": 0.7945205479452054,
      "step": 58,
      "train/ce_loss": 5.40625,
      "train/diffusion_loss": 0.4997442066669464
    },
    {
      "epoch": 0.7945205479452054,
      "step": 58,
      "train/learning_rate_real": 2.4840854434016808e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2435.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2435.0,
      "epoch": 0.7945205479452054,
      "step": 58
    },
    {
      "epoch": 0.7945205479452054,
      "step": 58,
      "train/ce_loss": 5.5,
      "train/diffusion_loss": 0.44154196977615356
    },
    {
      "epoch": 0.7945205479452054,
      "step": 58,
      "train/learning_rate_real": 2.4840854434016808e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2635.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2635.0,
      "epoch": 0.7945205479452054,
      "step": 58
    },
    {
      "epoch": 0.7945205479452054,
      "step": 58,
      "train/ce_loss": 5.625,
      "train/diffusion_loss": 0.42629536986351013
    },
    {
      "epoch": 0.7945205479452054,
      "step": 58,
      "train/learning_rate_real": 2.4840854434016808e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2627.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2627.0,
      "epoch": 0.8082191780821918,
      "step": 59
    },
    {
      "epoch": 0.8082191780821918,
      "step": 59,
      "train/ce_loss": 5.46875,
      "train/diffusion_loss": 0.4058604836463928
    },
    {
      "epoch": 0.8082191780821918,
      "step": 59,
      "train/learning_rate_real": 2.4831910359534216e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2643.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2643.0,
      "epoch": 0.8082191780821918,
      "step": 59
    },
    {
      "epoch": 0.8082191780821918,
      "step": 59,
      "train/ce_loss": 5.53125,
      "train/diffusion_loss": 0.42272061109542847
    },
    {
      "epoch": 0.8082191780821918,
      "step": 59,
      "train/learning_rate_real": 2.4831910359534216e-05
    },
    {
      "debug/num_lat_loss": 1585.0,
      "debug/num_lat_total": 2473.0,
      "debug/num_tok_loss": 1585.0,
      "debug/num_tok_total": 2473.0,
      "epoch": 0.8082191780821918,
      "step": 59
    },
    {
      "epoch": 0.8082191780821918,
      "step": 59,
      "train/ce_loss": 5.71875,
      "train/diffusion_loss": 0.40059351921081543
    },
    {
      "epoch": 0.8082191780821918,
      "step": 59,
      "train/learning_rate_real": 2.4831910359534216e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2440.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2440.0,
      "epoch": 0.8082191780821918,
      "step": 59
    },
    {
      "epoch": 0.8082191780821918,
      "step": 59,
      "train/ce_loss": 5.3125,
      "train/diffusion_loss": 0.4610413908958435
    },
    {
      "epoch": 0.8082191780821918,
      "step": 59,
      "train/learning_rate_real": 2.4831910359534216e-05
    },
    {
      "epoch": 0.821917808219178,
      "grad_norm": 1.708295464515686,
      "learning_rate": 2.4831910359534216e-05,
      "loss": 3.2409,
      "step": 60
    },
    {
      "debug/num_lat_loss": 1725.0,
      "debug/num_lat_total": 2527.0,
      "debug/num_tok_loss": 1725.0,
      "debug/num_tok_total": 2527.0,
      "epoch": 0.821917808219178,
      "step": 60
    },
    {
      "epoch": 0.821917808219178,
      "step": 60,
      "train/ce_loss": 5.5625,
      "train/diffusion_loss": 0.3908693790435791
    },
    {
      "epoch": 0.821917808219178,
      "step": 60,
      "train/learning_rate_real": 2.4822723477115968e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2636.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2636.0,
      "epoch": 0.821917808219178,
      "step": 60
    },
    {
      "epoch": 0.821917808219178,
      "step": 60,
      "train/ce_loss": 5.53125,
      "train/diffusion_loss": 0.42371171712875366
    },
    {
      "epoch": 0.821917808219178,
      "step": 60,
      "train/learning_rate_real": 2.4822723477115968e-05
    },
    {
      "debug/num_lat_loss": 1650.0,
      "debug/num_lat_total": 2581.0,
      "debug/num_tok_loss": 1650.0,
      "debug/num_tok_total": 2581.0,
      "epoch": 0.821917808219178,
      "step": 60
    },
    {
      "epoch": 0.821917808219178,
      "step": 60,
      "train/ce_loss": 5.625,
      "train/diffusion_loss": 0.40264788269996643
    },
    {
      "epoch": 0.821917808219178,
      "step": 60,
      "train/learning_rate_real": 2.4822723477115968e-05
    },
    {
      "debug/num_lat_loss": 1750.0,
      "debug/num_lat_total": 2817.0,
      "debug/num_tok_loss": 1750.0,
      "debug/num_tok_total": 2817.0,
      "epoch": 0.821917808219178,
      "step": 60
    },
    {
      "epoch": 0.821917808219178,
      "step": 60,
      "train/ce_loss": 5.46875,
      "train/diffusion_loss": 0.356502503156662
    },
    {
      "epoch": 0.821917808219178,
      "step": 60,
      "train/learning_rate_real": 2.4822723477115968e-05
    },
    {
      "debug/num_lat_loss": 1814.0,
      "debug/num_lat_total": 3138.0,
      "debug/num_tok_loss": 1814.0,
      "debug/num_tok_total": 3138.0,
      "epoch": 0.8356164383561644,
      "step": 61
    },
    {
      "epoch": 0.8356164383561644,
      "step": 61,
      "train/ce_loss": 5.4375,
      "train/diffusion_loss": 0.3347739279270172
    },
    {
      "epoch": 0.8356164383561644,
      "step": 61,
      "train/learning_rate_real": 2.481329396764629e-05
    },
    {
      "debug/num_lat_loss": 1758.0,
      "debug/num_lat_total": 2611.0,
      "debug/num_tok_loss": 1758.0,
      "debug/num_tok_total": 2611.0,
      "epoch": 0.8356164383561644,
      "step": 61
    },
    {
      "epoch": 0.8356164383561644,
      "step": 61,
      "train/ce_loss": 5.40625,
      "train/diffusion_loss": 0.4167521595954895
    },
    {
      "epoch": 0.8356164383561644,
      "step": 61,
      "train/learning_rate_real": 2.481329396764629e-05
    },
    {
      "debug/num_lat_loss": 1663.0,
      "debug/num_lat_total": 2322.0,
      "debug/num_tok_loss": 1663.0,
      "debug/num_tok_total": 2322.0,
      "epoch": 0.8356164383561644,
      "step": 61
    },
    {
      "epoch": 0.8356164383561644,
      "step": 61,
      "train/ce_loss": 5.375,
      "train/diffusion_loss": 0.4386259913444519
    },
    {
      "epoch": 0.8356164383561644,
      "step": 61,
      "train/learning_rate_real": 2.481329396764629e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2614.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2614.0,
      "epoch": 0.8356164383561644,
      "step": 61
    },
    {
      "epoch": 0.8356164383561644,
      "step": 61,
      "train/ce_loss": 5.3125,
      "train/diffusion_loss": 0.4437485337257385
    },
    {
      "epoch": 0.8356164383561644,
      "step": 61,
      "train/learning_rate_real": 2.481329396764629e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2430.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2430.0,
      "epoch": 0.8493150684931506,
      "step": 62
    },
    {
      "epoch": 0.8493150684931506,
      "step": 62,
      "train/ce_loss": 5.375,
      "train/diffusion_loss": 0.44565480947494507
    },
    {
      "epoch": 0.8493150684931506,
      "step": 62,
      "train/learning_rate_real": 2.4803622016786578e-05
    },
    {
      "debug/num_lat_loss": 1702.0,
      "debug/num_lat_total": 2550.0,
      "debug/num_tok_loss": 1702.0,
      "debug/num_tok_total": 2550.0,
      "epoch": 0.8493150684931506,
      "step": 62
    },
    {
      "epoch": 0.8493150684931506,
      "step": 62,
      "train/ce_loss": 5.625,
      "train/diffusion_loss": 0.39768069982528687
    },
    {
      "epoch": 0.8493150684931506,
      "step": 62,
      "train/learning_rate_real": 2.4803622016786578e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2612.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2612.0,
      "epoch": 0.8493150684931506,
      "step": 62
    },
    {
      "epoch": 0.8493150684931506,
      "step": 62,
      "train/ce_loss": 5.8125,
      "train/diffusion_loss": 0.4438689053058624
    },
    {
      "epoch": 0.8493150684931506,
      "step": 62,
      "train/learning_rate_real": 2.4803622016786578e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2835.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2835.0,
      "epoch": 0.8493150684931506,
      "step": 62
    },
    {
      "epoch": 0.8493150684931506,
      "step": 62,
      "train/ce_loss": 5.3125,
      "train/diffusion_loss": 0.4113655686378479
    },
    {
      "epoch": 0.8493150684931506,
      "step": 62,
      "train/learning_rate_real": 2.4803622016786578e-05
    },
    {
      "debug/num_lat_loss": 1756.0,
      "debug/num_lat_total": 2392.0,
      "debug/num_tok_loss": 1756.0,
      "debug/num_tok_total": 2392.0,
      "epoch": 0.863013698630137,
      "step": 63
    },
    {
      "epoch": 0.863013698630137,
      "step": 63,
      "train/ce_loss": 5.3125,
      "train/diffusion_loss": 0.4595162272453308
    },
    {
      "epoch": 0.863013698630137,
      "step": 63,
      "train/learning_rate_real": 2.479370781497175e-05
    },
    {
      "debug/num_lat_loss": 1754.0,
      "debug/num_lat_total": 2799.0,
      "debug/num_tok_loss": 1754.0,
      "debug/num_tok_total": 2799.0,
      "epoch": 0.863013698630137,
      "step": 63
    },
    {
      "epoch": 0.863013698630137,
      "step": 63,
      "train/ce_loss": 5.375,
      "train/diffusion_loss": 0.38731205463409424
    },
    {
      "epoch": 0.863013698630137,
      "step": 63,
      "train/learning_rate_real": 2.479370781497175e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2842.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2842.0,
      "epoch": 0.863013698630137,
      "step": 63
    },
    {
      "epoch": 0.863013698630137,
      "step": 63,
      "train/ce_loss": 5.21875,
      "train/diffusion_loss": 0.3685234487056732
    },
    {
      "epoch": 0.863013698630137,
      "step": 63,
      "train/learning_rate_real": 2.479370781497175e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 3082.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 3082.0,
      "epoch": 0.863013698630137,
      "step": 63
    },
    {
      "epoch": 0.863013698630137,
      "step": 63,
      "train/ce_loss": 5.375,
      "train/diffusion_loss": 0.3305037319660187
    },
    {
      "epoch": 0.863013698630137,
      "step": 63,
      "train/learning_rate_real": 2.479370781497175e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2649.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2649.0,
      "epoch": 0.8767123287671232,
      "step": 64
    },
    {
      "epoch": 0.8767123287671232,
      "step": 64,
      "train/ce_loss": 5.21875,
      "train/diffusion_loss": 0.41264399886131287
    },
    {
      "epoch": 0.8767123287671232,
      "step": 64,
      "train/learning_rate_real": 2.4783551557406515e-05
    },
    {
      "debug/num_lat_loss": 1687.0,
      "debug/num_lat_total": 2663.0,
      "debug/num_tok_loss": 1687.0,
      "debug/num_tok_total": 2663.0,
      "epoch": 0.8767123287671232,
      "step": 64
    },
    {
      "epoch": 0.8767123287671232,
      "step": 64,
      "train/ce_loss": 5.21875,
      "train/diffusion_loss": 0.36778780817985535
    },
    {
      "epoch": 0.8767123287671232,
      "step": 64,
      "train/learning_rate_real": 2.4783551557406515e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2437.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2437.0,
      "epoch": 0.8767123287671232,
      "step": 64
    },
    {
      "epoch": 0.8767123287671232,
      "step": 64,
      "train/ce_loss": 5.125,
      "train/diffusion_loss": 0.45238378643989563
    },
    {
      "epoch": 0.8767123287671232,
      "step": 64,
      "train/learning_rate_real": 2.4783551557406515e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2450.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2450.0,
      "epoch": 0.8767123287671232,
      "step": 64
    },
    {
      "epoch": 0.8767123287671232,
      "step": 64,
      "train/ce_loss": 5.09375,
      "train/diffusion_loss": 0.46885961294174194
    },
    {
      "epoch": 0.8767123287671232,
      "step": 64,
      "train/learning_rate_real": 2.4783551557406515e-05
    },
    {
      "debug/num_lat_loss": 1750.0,
      "debug/num_lat_total": 2385.0,
      "debug/num_tok_loss": 1750.0,
      "debug/num_tok_total": 2385.0,
      "epoch": 0.8904109589041096,
      "step": 65
    },
    {
      "epoch": 0.8904109589041096,
      "step": 65,
      "train/ce_loss": 5.125,
      "train/diffusion_loss": 0.45050710439682007
    },
    {
      "epoch": 0.8904109589041096,
      "step": 65,
      "train/learning_rate_real": 2.4773153444061502e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2855.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2855.0,
      "epoch": 0.8904109589041096,
      "step": 65
    },
    {
      "epoch": 0.8904109589041096,
      "step": 65,
      "train/ce_loss": 5.15625,
      "train/diffusion_loss": 0.38604146242141724
    },
    {
      "epoch": 0.8904109589041096,
      "step": 65,
      "train/learning_rate_real": 2.4773153444061502e-05
    },
    {
      "debug/num_lat_loss": 1548.0,
      "debug/num_lat_total": 2369.0,
      "debug/num_tok_loss": 1548.0,
      "debug/num_tok_total": 2369.0,
      "epoch": 0.8904109589041096,
      "step": 65
    },
    {
      "epoch": 0.8904109589041096,
      "step": 65,
      "train/ce_loss": 5.65625,
      "train/diffusion_loss": 0.4263710379600525
    },
    {
      "epoch": 0.8904109589041096,
      "step": 65,
      "train/learning_rate_real": 2.4773153444061502e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 3074.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 3074.0,
      "epoch": 0.8904109589041096,
      "step": 65
    },
    {
      "epoch": 0.8904109589041096,
      "step": 65,
      "train/ce_loss": 5.09375,
      "train/diffusion_loss": 0.31766679883003235
    },
    {
      "epoch": 0.8904109589041096,
      "step": 65,
      "train/learning_rate_real": 2.4773153444061502e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2612.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2612.0,
      "epoch": 0.9041095890410958,
      "step": 66
    },
    {
      "epoch": 0.9041095890410958,
      "step": 66,
      "train/ce_loss": 5.1875,
      "train/diffusion_loss": 0.40880653262138367
    },
    {
      "epoch": 0.9041095890410958,
      "step": 66,
      "train/learning_rate_real": 2.4762513679669348e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2648.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2648.0,
      "epoch": 0.9041095890410958,
      "step": 66
    },
    {
      "epoch": 0.9041095890410958,
      "step": 66,
      "train/ce_loss": 5.15625,
      "train/diffusion_loss": 0.39718738198280334
    },
    {
      "epoch": 0.9041095890410958,
      "step": 66,
      "train/learning_rate_real": 2.4762513679669348e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2652.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2652.0,
      "epoch": 0.9041095890410958,
      "step": 66
    },
    {
      "epoch": 0.9041095890410958,
      "step": 66,
      "train/ce_loss": 5.09375,
      "train/diffusion_loss": 0.45373260974884033
    },
    {
      "epoch": 0.9041095890410958,
      "step": 66,
      "train/learning_rate_real": 2.4762513679669348e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2414.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2414.0,
      "epoch": 0.9041095890410958,
      "step": 66
    },
    {
      "epoch": 0.9041095890410958,
      "step": 66,
      "train/ce_loss": 5.53125,
      "train/diffusion_loss": 0.4466804265975952
    },
    {
      "epoch": 0.9041095890410958,
      "step": 66,
      "train/learning_rate_real": 2.4762513679669348e-05
    },
    {
      "debug/num_lat_loss": 1745.0,
      "debug/num_lat_total": 3007.0,
      "debug/num_tok_loss": 1745.0,
      "debug/num_tok_total": 3007.0,
      "epoch": 0.9178082191780822,
      "step": 67
    },
    {
      "epoch": 0.9178082191780822,
      "step": 67,
      "train/ce_loss": 5.125,
      "train/diffusion_loss": 0.28842657804489136
    },
    {
      "epoch": 0.9178082191780822,
      "step": 67,
      "train/learning_rate_real": 2.4751632473720638e-05
    },
    {
      "debug/num_lat_loss": 1765.0,
      "debug/num_lat_total": 2411.0,
      "debug/num_tok_loss": 1765.0,
      "debug/num_tok_total": 2411.0,
      "epoch": 0.9178082191780822,
      "step": 67
    },
    {
      "epoch": 0.9178082191780822,
      "step": 67,
      "train/ce_loss": 5.15625,
      "train/diffusion_loss": 0.4682995080947876
    },
    {
      "epoch": 0.9178082191780822,
      "step": 67,
      "train/learning_rate_real": 2.4751632473720638e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 2453.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 2453.0,
      "epoch": 0.9178082191780822,
      "step": 67
    },
    {
      "epoch": 0.9178082191780822,
      "step": 67,
      "train/ce_loss": 5.25,
      "train/diffusion_loss": 0.48682138323783875
    },
    {
      "epoch": 0.9178082191780822,
      "step": 67,
      "train/learning_rate_real": 2.4751632473720638e-05
    },
    {
      "debug/num_lat_loss": 1610.0,
      "debug/num_lat_total": 2531.0,
      "debug/num_tok_loss": 1610.0,
      "debug/num_tok_total": 2531.0,
      "epoch": 0.9178082191780822,
      "step": 67
    },
    {
      "epoch": 0.9178082191780822,
      "step": 67,
      "train/ce_loss": 5.1875,
      "train/diffusion_loss": 0.41317009925842285
    },
    {
      "epoch": 0.9178082191780822,
      "step": 67,
      "train/learning_rate_real": 2.4751632473720638e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2605.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2605.0,
      "epoch": 0.9315068493150684,
      "step": 68
    },
    {
      "epoch": 0.9315068493150684,
      "step": 68,
      "train/ce_loss": 5.28125,
      "train/diffusion_loss": 0.4040594696998596
    },
    {
      "epoch": 0.9315068493150684,
      "step": 68,
      "train/learning_rate_real": 2.474051004045981e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2861.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2861.0,
      "epoch": 0.9315068493150684,
      "step": 68
    },
    {
      "epoch": 0.9315068493150684,
      "step": 68,
      "train/ce_loss": 5.3125,
      "train/diffusion_loss": 0.3841204047203064
    },
    {
      "epoch": 0.9315068493150684,
      "step": 68,
      "train/learning_rate_real": 2.474051004045981e-05
    },
    {
      "debug/num_lat_loss": 1610.0,
      "debug/num_lat_total": 2451.0,
      "debug/num_tok_loss": 1610.0,
      "debug/num_tok_total": 2451.0,
      "epoch": 0.9315068493150684,
      "step": 68
    },
    {
      "epoch": 0.9315068493150684,
      "step": 68,
      "train/ce_loss": 5.0625,
      "train/diffusion_loss": 0.3975687026977539
    },
    {
      "epoch": 0.9315068493150684,
      "step": 68,
      "train/learning_rate_real": 2.474051004045981e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2410.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2410.0,
      "epoch": 0.9315068493150684,
      "step": 68
    },
    {
      "epoch": 0.9315068493150684,
      "step": 68,
      "train/ce_loss": 5.0,
      "train/diffusion_loss": 0.4634510278701782
    },
    {
      "epoch": 0.9315068493150684,
      "step": 68,
      "train/learning_rate_real": 2.474051004045981e-05
    },
    {
      "debug/num_lat_loss": 1617.0,
      "debug/num_lat_total": 2693.0,
      "debug/num_tok_loss": 1617.0,
      "debug/num_tok_total": 2693.0,
      "epoch": 0.9452054794520548,
      "step": 69
    },
    {
      "epoch": 0.9452054794520548,
      "step": 69,
      "train/ce_loss": 5.25,
      "train/diffusion_loss": 0.35224559903144836
    },
    {
      "epoch": 0.9452054794520548,
      "step": 69,
      "train/learning_rate_real": 2.472914659888092e-05
    },
    {
      "debug/num_lat_loss": 1657.0,
      "debug/num_lat_total": 2292.0,
      "debug/num_tok_loss": 1657.0,
      "debug/num_tok_total": 2292.0,
      "epoch": 0.9452054794520548,
      "step": 69
    },
    {
      "epoch": 0.9452054794520548,
      "step": 69,
      "train/ce_loss": 4.96875,
      "train/diffusion_loss": 0.4498156011104584
    },
    {
      "epoch": 0.9452054794520548,
      "step": 69,
      "train/learning_rate_real": 2.472914659888092e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 2024.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 2024.0,
      "epoch": 0.9452054794520548,
      "step": 69
    },
    {
      "epoch": 0.9452054794520548,
      "step": 69,
      "train/ce_loss": 4.96875,
      "train/diffusion_loss": 0.5396973490715027
    },
    {
      "epoch": 0.9452054794520548,
      "step": 69,
      "train/learning_rate_real": 2.472914659888092e-05
    },
    {
      "debug/num_lat_loss": 1592.0,
      "debug/num_lat_total": 2018.0,
      "debug/num_tok_loss": 1592.0,
      "debug/num_tok_total": 2018.0,
      "epoch": 0.9452054794520548,
      "step": 69
    },
    {
      "epoch": 0.9452054794520548,
      "step": 69,
      "train/ce_loss": 5.09375,
      "train/diffusion_loss": 0.4777624309062958
    },
    {
      "epoch": 0.9452054794520548,
      "step": 69,
      "train/learning_rate_real": 2.472914659888092e-05
    },
    {
      "epoch": 0.958904109589041,
      "grad_norm": 1.7449594736099243,
      "learning_rate": 2.472914659888092e-05,
      "loss": 3.1653,
      "step": 70
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2412.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2412.0,
      "epoch": 0.958904109589041,
      "step": 70
    },
    {
      "epoch": 0.958904109589041,
      "step": 70,
      "train/ce_loss": 5.125,
      "train/diffusion_loss": 0.4851177930831909
    },
    {
      "epoch": 0.958904109589041,
      "step": 70,
      "train/learning_rate_real": 2.4717542372723333e-05
    },
    {
      "debug/num_lat_loss": 1670.0,
      "debug/num_lat_total": 1879.0,
      "debug/num_tok_loss": 1670.0,
      "debug/num_tok_total": 1879.0,
      "epoch": 0.958904109589041,
      "step": 70
    },
    {
      "epoch": 0.958904109589041,
      "step": 70,
      "train/ce_loss": 5.1875,
      "train/diffusion_loss": 0.5450473427772522
    },
    {
      "epoch": 0.958904109589041,
      "step": 70,
      "train/learning_rate_real": 2.4717542372723333e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2656.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2656.0,
      "epoch": 0.958904109589041,
      "step": 70
    },
    {
      "epoch": 0.958904109589041,
      "step": 70,
      "train/ce_loss": 5.15625,
      "train/diffusion_loss": 0.40747514367103577
    },
    {
      "epoch": 0.958904109589041,
      "step": 70,
      "train/learning_rate_real": 2.4717542372723333e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2617.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2617.0,
      "epoch": 0.958904109589041,
      "step": 70
    },
    {
      "epoch": 0.958904109589041,
      "step": 70,
      "train/ce_loss": 5.0,
      "train/diffusion_loss": 0.41900011897087097
    },
    {
      "epoch": 0.958904109589041,
      "step": 70,
      "train/learning_rate_real": 2.4717542372723333e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2627.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2627.0,
      "epoch": 0.9726027397260274,
      "step": 71
    },
    {
      "epoch": 0.9726027397260274,
      "step": 71,
      "train/ce_loss": 5.625,
      "train/diffusion_loss": 0.40764084458351135
    },
    {
      "epoch": 0.9726027397260274,
      "step": 71,
      "train/learning_rate_real": 2.470569759046732e-05
    },
    {
      "debug/num_lat_loss": 1807.0,
      "debug/num_lat_total": 2681.0,
      "debug/num_tok_loss": 1807.0,
      "debug/num_tok_total": 2681.0,
      "epoch": 0.9726027397260274,
      "step": 71
    },
    {
      "epoch": 0.9726027397260274,
      "step": 71,
      "train/ce_loss": 5.03125,
      "train/diffusion_loss": 0.4096241891384125
    },
    {
      "epoch": 0.9726027397260274,
      "step": 71,
      "train/learning_rate_real": 2.470569759046732e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2827.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2827.0,
      "epoch": 0.9726027397260274,
      "step": 71
    },
    {
      "epoch": 0.9726027397260274,
      "step": 71,
      "train/ce_loss": 4.9375,
      "train/diffusion_loss": 0.3634895980358124
    },
    {
      "epoch": 0.9726027397260274,
      "step": 71,
      "train/learning_rate_real": 2.470569759046732e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2843.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2843.0,
      "epoch": 0.9726027397260274,
      "step": 71
    },
    {
      "epoch": 0.9726027397260274,
      "step": 71,
      "train/ce_loss": 4.90625,
      "train/diffusion_loss": 0.3556757867336273
    },
    {
      "epoch": 0.9726027397260274,
      "step": 71,
      "train/learning_rate_real": 2.470569759046732e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2877.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2877.0,
      "epoch": 0.9863013698630136,
      "step": 72
    },
    {
      "epoch": 0.9863013698630136,
      "step": 72,
      "train/ce_loss": 5.0625,
      "train/diffusion_loss": 0.39283737540245056
    },
    {
      "epoch": 0.9863013698630136,
      "step": 72,
      "train/learning_rate_real": 2.469361248532955e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 3048.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 3048.0,
      "epoch": 0.9863013698630136,
      "step": 72
    },
    {
      "epoch": 0.9863013698630136,
      "step": 72,
      "train/ce_loss": 5.09375,
      "train/diffusion_loss": 0.33008840680122375
    },
    {
      "epoch": 0.9863013698630136,
      "step": 72,
      "train/learning_rate_real": 2.469361248532955e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2879.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2879.0,
      "epoch": 0.9863013698630136,
      "step": 72
    },
    {
      "epoch": 0.9863013698630136,
      "step": 72,
      "train/ce_loss": 4.9375,
      "train/diffusion_loss": 0.37761390209198
    },
    {
      "epoch": 0.9863013698630136,
      "step": 72,
      "train/learning_rate_real": 2.469361248532955e-05
    },
    {
      "debug/num_lat_loss": 445.0,
      "debug/num_lat_total": 662.0,
      "debug/num_tok_loss": 445.0,
      "debug/num_tok_total": 662.0,
      "epoch": 0.9863013698630136,
      "step": 72
    },
    {
      "epoch": 0.9863013698630136,
      "step": 72,
      "train/ce_loss": 7.09375,
      "train/diffusion_loss": 0.5194283723831177
    },
    {
      "epoch": 0.9863013698630136,
      "step": 72,
      "train/learning_rate_real": 2.469361248532955e-05
    },
    {
      "debug/num_lat_loss": 1812.0,
      "debug/num_lat_total": 2690.0,
      "debug/num_tok_loss": 1812.0,
      "debug/num_tok_total": 2690.0,
      "epoch": 1.0,
      "step": 73
    },
    {
      "epoch": 1.0,
      "step": 73,
      "train/ce_loss": 4.90625,
      "train/diffusion_loss": 0.4245181083679199
    },
    {
      "epoch": 1.0,
      "step": 73,
      "train/learning_rate_real": 2.4681287295258516e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2656.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2656.0,
      "epoch": 1.0,
      "step": 73
    },
    {
      "epoch": 1.0,
      "step": 73,
      "train/ce_loss": 4.96875,
      "train/diffusion_loss": 0.4211677014827728
    },
    {
      "epoch": 1.0,
      "step": 73,
      "train/learning_rate_real": 2.4681287295258516e-05
    },
    {
      "debug/num_lat_loss": 1797.0,
      "debug/num_lat_total": 2018.0,
      "debug/num_tok_loss": 1797.0,
      "debug/num_tok_total": 2018.0,
      "epoch": 1.0,
      "step": 73
    },
    {
      "epoch": 1.0,
      "step": 73,
      "train/ce_loss": 5.125,
      "train/diffusion_loss": 0.5452603697776794
    },
    {
      "epoch": 1.0,
      "step": 73,
      "train/learning_rate_real": 2.4681287295258516e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 1.0,
      "step": 73
    },
    {
      "epoch": 1.0,
      "step": 73,
      "train/ce_loss": 5.125,
      "train/diffusion_loss": 0.39598238468170166
    },
    {
      "epoch": 1.0,
      "step": 73,
      "train/learning_rate_real": 2.4681287295258516e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2624.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2624.0,
      "epoch": 1.0136986301369864,
      "step": 74
    },
    {
      "epoch": 1.0136986301369864,
      "step": 74,
      "train/ce_loss": 4.90625,
      "train/diffusion_loss": 0.41032910346984863
    },
    {
      "epoch": 1.0136986301369864,
      "step": 74,
      "train/learning_rate_real": 2.466872226292983e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2220.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2220.0,
      "epoch": 1.0136986301369864,
      "step": 74
    },
    {
      "epoch": 1.0136986301369864,
      "step": 74,
      "train/ce_loss": 4.875,
      "train/diffusion_loss": 0.5221177935600281
    },
    {
      "epoch": 1.0136986301369864,
      "step": 74,
      "train/learning_rate_real": 2.466872226292983e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2661.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2661.0,
      "epoch": 1.0136986301369864,
      "step": 74
    },
    {
      "epoch": 1.0136986301369864,
      "step": 74,
      "train/ce_loss": 5.0,
      "train/diffusion_loss": 0.4044923782348633
    },
    {
      "epoch": 1.0136986301369864,
      "step": 74,
      "train/learning_rate_real": 2.466872226292983e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 1.0136986301369864,
      "step": 74
    },
    {
      "epoch": 1.0136986301369864,
      "step": 74,
      "train/ce_loss": 5.4375,
      "train/diffusion_loss": 0.421188086271286
    },
    {
      "epoch": 1.0136986301369864,
      "step": 74,
      "train/learning_rate_real": 2.466872226292983e-05
    },
    {
      "debug/num_lat_loss": 1657.0,
      "debug/num_lat_total": 3166.0,
      "debug/num_tok_loss": 1657.0,
      "debug/num_tok_total": 3166.0,
      "epoch": 1.0273972602739727,
      "step": 75
    },
    {
      "epoch": 1.0273972602739727,
      "step": 75,
      "train/ce_loss": 4.9375,
      "train/diffusion_loss": 0.2653091251850128
    },
    {
      "epoch": 1.0273972602739727,
      "step": 75,
      "train/learning_rate_real": 2.4655917635741472e-05
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 2254.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 2254.0,
      "epoch": 1.0273972602739727,
      "step": 75
    },
    {
      "epoch": 1.0273972602739727,
      "step": 75,
      "train/ce_loss": 5.0,
      "train/diffusion_loss": 0.48585981130599976
    },
    {
      "epoch": 1.0273972602739727,
      "step": 75,
      "train/learning_rate_real": 2.4655917635741472e-05
    },
    {
      "debug/num_lat_loss": 1808.0,
      "debug/num_lat_total": 2675.0,
      "debug/num_tok_loss": 1808.0,
      "debug/num_tok_total": 2675.0,
      "epoch": 1.0273972602739727,
      "step": 75
    },
    {
      "epoch": 1.0273972602739727,
      "step": 75,
      "train/ce_loss": 4.90625,
      "train/diffusion_loss": 0.44348567724227905
    },
    {
      "epoch": 1.0273972602739727,
      "step": 75,
      "train/learning_rate_real": 2.4655917635741472e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2207.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2207.0,
      "epoch": 1.0273972602739727,
      "step": 75
    },
    {
      "epoch": 1.0273972602739727,
      "step": 75,
      "train/ce_loss": 5.03125,
      "train/diffusion_loss": 0.4878855049610138
    },
    {
      "epoch": 1.0273972602739727,
      "step": 75,
      "train/learning_rate_real": 2.4655917635741472e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2690.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2690.0,
      "epoch": 1.0410958904109588,
      "step": 76
    },
    {
      "epoch": 1.0410958904109588,
      "step": 76,
      "train/ce_loss": 5.0,
      "train/diffusion_loss": 0.4145924746990204
    },
    {
      "epoch": 1.0410958904109588,
      "step": 76,
      "train/learning_rate_real": 2.4642873665808873e-05
    },
    {
      "debug/num_lat_loss": 1747.0,
      "debug/num_lat_total": 2383.0,
      "debug/num_tok_loss": 1747.0,
      "debug/num_tok_total": 2383.0,
      "epoch": 1.0410958904109588,
      "step": 76
    },
    {
      "epoch": 1.0410958904109588,
      "step": 76,
      "train/ce_loss": 5.0,
      "train/diffusion_loss": 0.49800702929496765
    },
    {
      "epoch": 1.0410958904109588,
      "step": 76,
      "train/learning_rate_real": 2.4642873665808873e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2667.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2667.0,
      "epoch": 1.0410958904109588,
      "step": 76
    },
    {
      "epoch": 1.0410958904109588,
      "step": 76,
      "train/ce_loss": 4.84375,
      "train/diffusion_loss": 0.4373202919960022
    },
    {
      "epoch": 1.0410958904109588,
      "step": 76,
      "train/learning_rate_real": 2.4642873665808873e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2857.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2857.0,
      "epoch": 1.0410958904109588,
      "step": 76
    },
    {
      "epoch": 1.0410958904109588,
      "step": 76,
      "train/ce_loss": 4.9375,
      "train/diffusion_loss": 0.3920494318008423
    },
    {
      "epoch": 1.0410958904109588,
      "step": 76,
      "train/learning_rate_real": 2.4642873665808873e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2656.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2656.0,
      "epoch": 1.0547945205479452,
      "step": 77
    },
    {
      "epoch": 1.0547945205479452,
      "step": 77,
      "train/ce_loss": 4.96875,
      "train/diffusion_loss": 0.39454302191734314
    },
    {
      "epoch": 1.0547945205479452,
      "step": 77,
      "train/learning_rate_real": 2.462959060996001e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 3066.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 3066.0,
      "epoch": 1.0547945205479452,
      "step": 77
    },
    {
      "epoch": 1.0547945205479452,
      "step": 77,
      "train/ce_loss": 5.21875,
      "train/diffusion_loss": 0.3013768494129181
    },
    {
      "epoch": 1.0547945205479452,
      "step": 77,
      "train/learning_rate_real": 2.462959060996001e-05
    },
    {
      "debug/num_lat_loss": 1811.0,
      "debug/num_lat_total": 2032.0,
      "debug/num_tok_loss": 1811.0,
      "debug/num_tok_total": 2032.0,
      "epoch": 1.0547945205479452,
      "step": 77
    },
    {
      "epoch": 1.0547945205479452,
      "step": 77,
      "train/ce_loss": 5.0,
      "train/diffusion_loss": 0.5480638146400452
    },
    {
      "epoch": 1.0547945205479452,
      "step": 77,
      "train/learning_rate_real": 2.462959060996001e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2650.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2650.0,
      "epoch": 1.0547945205479452,
      "step": 77
    },
    {
      "epoch": 1.0547945205479452,
      "step": 77,
      "train/ce_loss": 4.6875,
      "train/diffusion_loss": 0.4419059753417969
    },
    {
      "epoch": 1.0547945205479452,
      "step": 77,
      "train/learning_rate_real": 2.462959060996001e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 3096.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 3096.0,
      "epoch": 1.0684931506849316,
      "step": 78
    },
    {
      "epoch": 1.0684931506849316,
      "step": 78,
      "train/ce_loss": 4.84375,
      "train/diffusion_loss": 0.3151288628578186
    },
    {
      "epoch": 1.0684931506849316,
      "step": 78,
      "train/learning_rate_real": 2.4616068729730294e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2860.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2860.0,
      "epoch": 1.0684931506849316,
      "step": 78
    },
    {
      "epoch": 1.0684931506849316,
      "step": 78,
      "train/ce_loss": 4.8125,
      "train/diffusion_loss": 0.3643770217895508
    },
    {
      "epoch": 1.0684931506849316,
      "step": 78,
      "train/learning_rate_real": 2.4616068729730294e-05
    },
    {
      "debug/num_lat_loss": 1610.0,
      "debug/num_lat_total": 2453.0,
      "debug/num_tok_loss": 1610.0,
      "debug/num_tok_total": 2453.0,
      "epoch": 1.0684931506849316,
      "step": 78
    },
    {
      "epoch": 1.0684931506849316,
      "step": 78,
      "train/ce_loss": 4.9375,
      "train/diffusion_loss": 0.3915770947933197
    },
    {
      "epoch": 1.0684931506849316,
      "step": 78,
      "train/learning_rate_real": 2.4616068729730294e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2852.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2852.0,
      "epoch": 1.0684931506849316,
      "step": 78
    },
    {
      "epoch": 1.0684931506849316,
      "step": 78,
      "train/ce_loss": 4.96875,
      "train/diffusion_loss": 0.39558202028274536
    },
    {
      "epoch": 1.0684931506849316,
      "step": 78,
      "train/learning_rate_real": 2.4616068729730294e-05
    },
    {
      "debug/num_lat_loss": 1709.0,
      "debug/num_lat_total": 2137.0,
      "debug/num_tok_loss": 1709.0,
      "debug/num_tok_total": 2137.0,
      "epoch": 1.0821917808219177,
      "step": 79
    },
    {
      "epoch": 1.0821917808219177,
      "step": 79,
      "train/ce_loss": 4.875,
      "train/diffusion_loss": 0.4808519780635834
    },
    {
      "epoch": 1.0821917808219177,
      "step": 79,
      "train/learning_rate_real": 2.460230829135746e-05
    },
    {
      "debug/num_lat_loss": 1698.0,
      "debug/num_lat_total": 2234.0,
      "debug/num_tok_loss": 1698.0,
      "debug/num_tok_total": 2234.0,
      "epoch": 1.0821917808219177,
      "step": 79
    },
    {
      "epoch": 1.0821917808219177,
      "step": 79,
      "train/ce_loss": 4.84375,
      "train/diffusion_loss": 0.487990140914917
    },
    {
      "epoch": 1.0821917808219177,
      "step": 79,
      "train/learning_rate_real": 2.460230829135746e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 2467.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 2467.0,
      "epoch": 1.0821917808219177,
      "step": 79
    },
    {
      "epoch": 1.0821917808219177,
      "step": 79,
      "train/ce_loss": 4.8125,
      "train/diffusion_loss": 0.4461616575717926
    },
    {
      "epoch": 1.0821917808219177,
      "step": 79,
      "train/learning_rate_real": 2.460230829135746e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 2677.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 2677.0,
      "epoch": 1.0821917808219177,
      "step": 79
    },
    {
      "epoch": 1.0821917808219177,
      "step": 79,
      "train/ce_loss": 4.78125,
      "train/diffusion_loss": 0.42311185598373413
    },
    {
      "epoch": 1.0821917808219177,
      "step": 79,
      "train/learning_rate_real": 2.460230829135746e-05
    },
    {
      "epoch": 1.095890410958904,
      "grad_norm": 1.6366914510726929,
      "learning_rate": 2.460230829135746e-05,
      "loss": 3.1839,
      "step": 80
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2459.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2459.0,
      "epoch": 1.095890410958904,
      "step": 80
    },
    {
      "epoch": 1.095890410958904,
      "step": 80,
      "train/ce_loss": 4.8125,
      "train/diffusion_loss": 0.4312282204627991
    },
    {
      "epoch": 1.095890410958904,
      "step": 80,
      "train/learning_rate_real": 2.4588309565776302e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2454.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2454.0,
      "epoch": 1.095890410958904,
      "step": 80
    },
    {
      "epoch": 1.095890410958904,
      "step": 80,
      "train/ce_loss": 4.78125,
      "train/diffusion_loss": 0.4743603467941284
    },
    {
      "epoch": 1.095890410958904,
      "step": 80,
      "train/learning_rate_real": 2.4588309565776302e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2443.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2443.0,
      "epoch": 1.095890410958904,
      "step": 80
    },
    {
      "epoch": 1.095890410958904,
      "step": 80,
      "train/ce_loss": 4.71875,
      "train/diffusion_loss": 0.48455047607421875
    },
    {
      "epoch": 1.095890410958904,
      "step": 80,
      "train/learning_rate_real": 2.4588309565776302e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2669.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2669.0,
      "epoch": 1.095890410958904,
      "step": 80
    },
    {
      "epoch": 1.095890410958904,
      "step": 80,
      "train/ce_loss": 4.8125,
      "train/diffusion_loss": 0.4093216359615326
    },
    {
      "epoch": 1.095890410958904,
      "step": 80,
      "train/learning_rate_real": 2.4588309565776302e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2668.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2668.0,
      "epoch": 1.1095890410958904,
      "step": 81
    },
    {
      "epoch": 1.1095890410958904,
      "step": 81,
      "train/ce_loss": 4.84375,
      "train/diffusion_loss": 0.4128156304359436
    },
    {
      "epoch": 1.1095890410958904,
      "step": 81,
      "train/learning_rate_real": 2.4574072828613354e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2876.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2876.0,
      "epoch": 1.1095890410958904,
      "step": 81
    },
    {
      "epoch": 1.1095890410958904,
      "step": 81,
      "train/ce_loss": 5.0625,
      "train/diffusion_loss": 0.35143572092056274
    },
    {
      "epoch": 1.1095890410958904,
      "step": 81,
      "train/learning_rate_real": 2.4574072828613354e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2652.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2652.0,
      "epoch": 1.1095890410958904,
      "step": 81
    },
    {
      "epoch": 1.1095890410958904,
      "step": 81,
      "train/ce_loss": 4.84375,
      "train/diffusion_loss": 0.4372323751449585
    },
    {
      "epoch": 1.1095890410958904,
      "step": 81,
      "train/learning_rate_real": 2.4574072828613354e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 3108.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 3108.0,
      "epoch": 1.1095890410958904,
      "step": 81
    },
    {
      "epoch": 1.1095890410958904,
      "step": 81,
      "train/ce_loss": 4.84375,
      "train/diffusion_loss": 0.3405665457248688
    },
    {
      "epoch": 1.1095890410958904,
      "step": 81,
      "train/learning_rate_real": 2.4574072828613354e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2637.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2637.0,
      "epoch": 1.1232876712328768,
      "step": 82
    },
    {
      "epoch": 1.1232876712328768,
      "step": 82,
      "train/ce_loss": 4.9375,
      "train/diffusion_loss": 0.40083348751068115
    },
    {
      "epoch": 1.1232876712328768,
      "step": 82,
      "train/learning_rate_real": 2.455959836018145e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 3062.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 3062.0,
      "epoch": 1.1232876712328768,
      "step": 82
    },
    {
      "epoch": 1.1232876712328768,
      "step": 82,
      "train/ce_loss": 4.875,
      "train/diffusion_loss": 0.2902744710445404
    },
    {
      "epoch": 1.1232876712328768,
      "step": 82,
      "train/learning_rate_real": 2.455959836018145e-05
    },
    {
      "debug/num_lat_loss": 1699.0,
      "debug/num_lat_total": 2280.0,
      "debug/num_tok_loss": 1699.0,
      "debug/num_tok_total": 2280.0,
      "epoch": 1.1232876712328768,
      "step": 82
    },
    {
      "epoch": 1.1232876712328768,
      "step": 82,
      "train/ce_loss": 4.84375,
      "train/diffusion_loss": 0.48062434792518616
    },
    {
      "epoch": 1.1232876712328768,
      "step": 82,
      "train/learning_rate_real": 2.455959836018145e-05
    },
    {
      "debug/num_lat_loss": 1633.0,
      "debug/num_lat_total": 2765.0,
      "debug/num_tok_loss": 1633.0,
      "debug/num_tok_total": 2765.0,
      "epoch": 1.1232876712328768,
      "step": 82
    },
    {
      "epoch": 1.1232876712328768,
      "step": 82,
      "train/ce_loss": 4.9375,
      "train/diffusion_loss": 0.32621175050735474
    },
    {
      "epoch": 1.1232876712328768,
      "step": 82,
      "train/learning_rate_real": 2.455959836018145e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2657.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2657.0,
      "epoch": 1.1369863013698631,
      "step": 83
    },
    {
      "epoch": 1.1369863013698631,
      "step": 83,
      "train/ce_loss": 4.75,
      "train/diffusion_loss": 0.41724833846092224
    },
    {
      "epoch": 1.1369863013698631,
      "step": 83,
      "train/learning_rate_real": 2.4544886445474215e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 3088.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 3088.0,
      "epoch": 1.1369863013698631,
      "step": 83
    },
    {
      "epoch": 1.1369863013698631,
      "step": 83,
      "train/ce_loss": 4.8125,
      "train/diffusion_loss": 0.3163226842880249
    },
    {
      "epoch": 1.1369863013698631,
      "step": 83,
      "train/learning_rate_real": 2.4544886445474215e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2423.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2423.0,
      "epoch": 1.1369863013698631,
      "step": 83
    },
    {
      "epoch": 1.1369863013698631,
      "step": 83,
      "train/ce_loss": 4.78125,
      "train/diffusion_loss": 0.45068359375
    },
    {
      "epoch": 1.1369863013698631,
      "step": 83,
      "train/learning_rate_real": 2.4544886445474215e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 3037.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 3037.0,
      "epoch": 1.1369863013698631,
      "step": 83
    },
    {
      "epoch": 1.1369863013698631,
      "step": 83,
      "train/ce_loss": 4.75,
      "train/diffusion_loss": 0.3273552358150482
    },
    {
      "epoch": 1.1369863013698631,
      "step": 83,
      "train/learning_rate_real": 2.4544886445474215e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2643.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2643.0,
      "epoch": 1.1506849315068493,
      "step": 84
    },
    {
      "epoch": 1.1506849315068493,
      "step": 84,
      "train/ce_loss": 5.125,
      "train/diffusion_loss": 0.41567105054855347
    },
    {
      "epoch": 1.1506849315068493,
      "step": 84,
      "train/learning_rate_real": 2.4529937374160438e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2448.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2448.0,
      "epoch": 1.1506849315068493,
      "step": 84
    },
    {
      "epoch": 1.1506849315068493,
      "step": 84,
      "train/ce_loss": 4.90625,
      "train/diffusion_loss": 0.4494771957397461
    },
    {
      "epoch": 1.1506849315068493,
      "step": 84,
      "train/learning_rate_real": 2.4529937374160438e-05
    },
    {
      "debug/num_lat_loss": 1751.0,
      "debug/num_lat_total": 2386.0,
      "debug/num_tok_loss": 1751.0,
      "debug/num_tok_total": 2386.0,
      "epoch": 1.1506849315068493,
      "step": 84
    },
    {
      "epoch": 1.1506849315068493,
      "step": 84,
      "train/ce_loss": 4.71875,
      "train/diffusion_loss": 0.45726028084754944
    },
    {
      "epoch": 1.1506849315068493,
      "step": 84,
      "train/learning_rate_real": 2.4529937374160438e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2621.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2621.0,
      "epoch": 1.1506849315068493,
      "step": 84
    },
    {
      "epoch": 1.1506849315068493,
      "step": 84,
      "train/ce_loss": 5.03125,
      "train/diffusion_loss": 0.4082919955253601
    },
    {
      "epoch": 1.1506849315068493,
      "step": 84,
      "train/learning_rate_real": 2.4529937374160438e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2445.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2445.0,
      "epoch": 1.1643835616438356,
      "step": 85
    },
    {
      "epoch": 1.1643835616438356,
      "step": 85,
      "train/ce_loss": 4.65625,
      "train/diffusion_loss": 0.45018747448921204
    },
    {
      "epoch": 1.1643835616438356,
      "step": 85,
      "train/learning_rate_real": 2.4514751440578394e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2833.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2833.0,
      "epoch": 1.1643835616438356,
      "step": 85
    },
    {
      "epoch": 1.1643835616438356,
      "step": 85,
      "train/ce_loss": 4.78125,
      "train/diffusion_loss": 0.36236572265625
    },
    {
      "epoch": 1.1643835616438356,
      "step": 85,
      "train/learning_rate_real": 2.4514751440578394e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2428.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2428.0,
      "epoch": 1.1643835616438356,
      "step": 85
    },
    {
      "epoch": 1.1643835616438356,
      "step": 85,
      "train/ce_loss": 4.8125,
      "train/diffusion_loss": 0.4757889211177826
    },
    {
      "epoch": 1.1643835616438356,
      "step": 85,
      "train/learning_rate_real": 2.4514751440578394e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2668.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2668.0,
      "epoch": 1.1643835616438356,
      "step": 85
    },
    {
      "epoch": 1.1643835616438356,
      "step": 85,
      "train/ce_loss": 4.8125,
      "train/diffusion_loss": 0.40141940116882324
    },
    {
      "epoch": 1.1643835616438356,
      "step": 85,
      "train/learning_rate_real": 2.4514751440578394e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2393.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2393.0,
      "epoch": 1.178082191780822,
      "step": 86
    },
    {
      "epoch": 1.178082191780822,
      "step": 86,
      "train/ce_loss": 4.6875,
      "train/diffusion_loss": 0.48776713013648987
    },
    {
      "epoch": 1.178082191780822,
      "step": 86,
      "train/learning_rate_real": 2.4499328943730026e-05
    },
    {
      "debug/num_lat_loss": 1699.0,
      "debug/num_lat_total": 2692.0,
      "debug/num_tok_loss": 1699.0,
      "debug/num_tok_total": 2692.0,
      "epoch": 1.178082191780822,
      "step": 86
    },
    {
      "epoch": 1.178082191780822,
      "step": 86,
      "train/ce_loss": 4.84375,
      "train/diffusion_loss": 0.3635094165802002
    },
    {
      "epoch": 1.178082191780822,
      "step": 86,
      "train/learning_rate_real": 2.4499328943730026e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2856.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2856.0,
      "epoch": 1.178082191780822,
      "step": 86
    },
    {
      "epoch": 1.178082191780822,
      "step": 86,
      "train/ce_loss": 4.75,
      "train/diffusion_loss": 0.38390570878982544
    },
    {
      "epoch": 1.178082191780822,
      "step": 86,
      "train/learning_rate_real": 2.4499328943730026e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2422.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2422.0,
      "epoch": 1.178082191780822,
      "step": 86
    },
    {
      "epoch": 1.178082191780822,
      "step": 86,
      "train/ce_loss": 4.75,
      "train/diffusion_loss": 0.4719712436199188
    },
    {
      "epoch": 1.178082191780822,
      "step": 86,
      "train/learning_rate_real": 2.4499328943730026e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2637.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2637.0,
      "epoch": 1.191780821917808,
      "step": 87
    },
    {
      "epoch": 1.191780821917808,
      "step": 87,
      "train/ce_loss": 4.46875,
      "train/diffusion_loss": 0.4493198096752167
    },
    {
      "epoch": 1.191780821917808,
      "step": 87,
      "train/learning_rate_real": 2.448367018727507e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2464.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2464.0,
      "epoch": 1.191780821917808,
      "step": 87
    },
    {
      "epoch": 1.191780821917808,
      "step": 87,
      "train/ce_loss": 4.75,
      "train/diffusion_loss": 0.44079920649528503
    },
    {
      "epoch": 1.191780821917808,
      "step": 87,
      "train/learning_rate_real": 2.448367018727507e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2218.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2218.0,
      "epoch": 1.191780821917808,
      "step": 87
    },
    {
      "epoch": 1.191780821917808,
      "step": 87,
      "train/ce_loss": 4.8125,
      "train/diffusion_loss": 0.5176950097084045
    },
    {
      "epoch": 1.191780821917808,
      "step": 87,
      "train/learning_rate_real": 2.448367018727507e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2677.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2677.0,
      "epoch": 1.191780821917808,
      "step": 87
    },
    {
      "epoch": 1.191780821917808,
      "step": 87,
      "train/ce_loss": 4.6875,
      "train/diffusion_loss": 0.4029632806777954
    },
    {
      "epoch": 1.191780821917808,
      "step": 87,
      "train/learning_rate_real": 2.448367018727507e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2404.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2404.0,
      "epoch": 1.2054794520547945,
      "step": 88
    },
    {
      "epoch": 1.2054794520547945,
      "step": 88,
      "train/ce_loss": 4.5,
      "train/diffusion_loss": 0.4859849512577057
    },
    {
      "epoch": 1.2054794520547945,
      "step": 88,
      "train/learning_rate_real": 2.446777547952507e-05
    },
    {
      "debug/num_lat_loss": 1625.0,
      "debug/num_lat_total": 2067.0,
      "debug/num_tok_loss": 1625.0,
      "debug/num_tok_total": 2067.0,
      "epoch": 1.2054794520547945,
      "step": 88
    },
    {
      "epoch": 1.2054794520547945,
      "step": 88,
      "train/ce_loss": 4.78125,
      "train/diffusion_loss": 0.5068560838699341
    },
    {
      "epoch": 1.2054794520547945,
      "step": 88,
      "train/learning_rate_real": 2.446777547952507e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 3051.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 3051.0,
      "epoch": 1.2054794520547945,
      "step": 88
    },
    {
      "epoch": 1.2054794520547945,
      "step": 88,
      "train/ce_loss": 4.6875,
      "train/diffusion_loss": 0.3534332513809204
    },
    {
      "epoch": 1.2054794520547945,
      "step": 88,
      "train/learning_rate_real": 2.446777547952507e-05
    },
    {
      "debug/num_lat_loss": 1761.0,
      "debug/num_lat_total": 2410.0,
      "debug/num_tok_loss": 1761.0,
      "debug/num_tok_total": 2410.0,
      "epoch": 1.2054794520547945,
      "step": 88
    },
    {
      "epoch": 1.2054794520547945,
      "step": 88,
      "train/ce_loss": 4.6875,
      "train/diffusion_loss": 0.45841628313064575
    },
    {
      "epoch": 1.2054794520547945,
      "step": 88,
      "train/learning_rate_real": 2.446777547952507e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2399.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2399.0,
      "epoch": 1.2191780821917808,
      "step": 89
    },
    {
      "epoch": 1.2191780821917808,
      "step": 89,
      "train/ce_loss": 4.625,
      "train/diffusion_loss": 0.4496181309223175
    },
    {
      "epoch": 1.2191780821917808,
      "step": 89,
      "train/learning_rate_real": 2.445164513343731e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2210.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2210.0,
      "epoch": 1.2191780821917808,
      "step": 89
    },
    {
      "epoch": 1.2191780821917808,
      "step": 89,
      "train/ce_loss": 4.65625,
      "train/diffusion_loss": 0.5095124840736389
    },
    {
      "epoch": 1.2191780821917808,
      "step": 89,
      "train/learning_rate_real": 2.445164513343731e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2244.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2244.0,
      "epoch": 1.2191780821917808,
      "step": 89
    },
    {
      "epoch": 1.2191780821917808,
      "step": 89,
      "train/ce_loss": 4.84375,
      "train/diffusion_loss": 0.5054518580436707
    },
    {
      "epoch": 1.2191780821917808,
      "step": 89,
      "train/learning_rate_real": 2.445164513343731e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 3260.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 3260.0,
      "epoch": 1.2191780821917808,
      "step": 89
    },
    {
      "epoch": 1.2191780821917808,
      "step": 89,
      "train/ce_loss": 4.65625,
      "train/diffusion_loss": 0.2607570290565491
    },
    {
      "epoch": 1.2191780821917808,
      "step": 89,
      "train/learning_rate_real": 2.445164513343731e-05
    },
    {
      "epoch": 1.2328767123287672,
      "grad_norm": 1.9624649286270142,
      "learning_rate": 2.445164513343731e-05,
      "loss": 3.1204,
      "step": 90
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2453.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2453.0,
      "epoch": 1.2328767123287672,
      "step": 90
    },
    {
      "epoch": 1.2328767123287672,
      "step": 90,
      "train/ce_loss": 4.6875,
      "train/diffusion_loss": 0.4628406763076782
    },
    {
      "epoch": 1.2328767123287672,
      "step": 90,
      "train/learning_rate_real": 2.443527946660865e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 3073.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 3073.0,
      "epoch": 1.2328767123287672,
      "step": 90
    },
    {
      "epoch": 1.2328767123287672,
      "step": 90,
      "train/ce_loss": 4.71875,
      "train/diffusion_loss": 0.2992222309112549
    },
    {
      "epoch": 1.2328767123287672,
      "step": 90,
      "train/learning_rate_real": 2.443527946660865e-05
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 2897.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 2897.0,
      "epoch": 1.2328767123287672,
      "step": 90
    },
    {
      "epoch": 1.2328767123287672,
      "step": 90,
      "train/ce_loss": 4.71875,
      "train/diffusion_loss": 0.36971840262413025
    },
    {
      "epoch": 1.2328767123287672,
      "step": 90,
      "train/learning_rate_real": 2.443527946660865e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2889.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2889.0,
      "epoch": 1.2328767123287672,
      "step": 90
    },
    {
      "epoch": 1.2328767123287672,
      "step": 90,
      "train/ce_loss": 4.5625,
      "train/diffusion_loss": 0.3495144248008728
    },
    {
      "epoch": 1.2328767123287672,
      "step": 90,
      "train/learning_rate_real": 2.443527946660865e-05
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 3326.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 3326.0,
      "epoch": 1.2465753424657535,
      "step": 91
    },
    {
      "epoch": 1.2465753424657535,
      "step": 91,
      "train/ce_loss": 4.5625,
      "train/diffusion_loss": 0.27441421151161194
    },
    {
      "epoch": 1.2465753424657535,
      "step": 91,
      "train/learning_rate_real": 2.441867880126928e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2192.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2192.0,
      "epoch": 1.2465753424657535,
      "step": 91
    },
    {
      "epoch": 1.2465753424657535,
      "step": 91,
      "train/ce_loss": 4.75,
      "train/diffusion_loss": 0.5229299068450928
    },
    {
      "epoch": 1.2465753424657535,
      "step": 91,
      "train/learning_rate_real": 2.441867880126928e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2612.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2612.0,
      "epoch": 1.2465753424657535,
      "step": 91
    },
    {
      "epoch": 1.2465753424657535,
      "step": 91,
      "train/ce_loss": 4.6875,
      "train/diffusion_loss": 0.4148879945278168
    },
    {
      "epoch": 1.2465753424657535,
      "step": 91,
      "train/learning_rate_real": 2.441867880126928e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 3067.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 3067.0,
      "epoch": 1.2465753424657535,
      "step": 91
    },
    {
      "epoch": 1.2465753424657535,
      "step": 91,
      "train/ce_loss": 4.71875,
      "train/diffusion_loss": 0.30536091327667236
    },
    {
      "epoch": 1.2465753424657535,
      "step": 91,
      "train/learning_rate_real": 2.441867880126928e-05
    },
    {
      "debug/num_lat_loss": 1677.0,
      "debug/num_lat_total": 2094.0,
      "debug/num_tok_loss": 1677.0,
      "debug/num_tok_total": 2094.0,
      "epoch": 1.2602739726027397,
      "step": 92
    },
    {
      "epoch": 1.2602739726027397,
      "step": 92,
      "train/ce_loss": 4.5,
      "train/diffusion_loss": 0.4853772521018982
    },
    {
      "epoch": 1.2602739726027397,
      "step": 92,
      "train/learning_rate_real": 2.4401843464276368e-05
    },
    {
      "debug/num_lat_loss": 1750.0,
      "debug/num_lat_total": 3220.0,
      "debug/num_tok_loss": 1750.0,
      "debug/num_tok_total": 3220.0,
      "epoch": 1.2602739726027397,
      "step": 92
    },
    {
      "epoch": 1.2602739726027397,
      "step": 92,
      "train/ce_loss": 4.625,
      "train/diffusion_loss": 0.2949419319629669
    },
    {
      "epoch": 1.2602739726027397,
      "step": 92,
      "train/learning_rate_real": 2.4401843464276368e-05
    },
    {
      "debug/num_lat_loss": 1653.0,
      "debug/num_lat_total": 2483.0,
      "debug/num_tok_loss": 1653.0,
      "debug/num_tok_total": 2483.0,
      "epoch": 1.2602739726027397,
      "step": 92
    },
    {
      "epoch": 1.2602739726027397,
      "step": 92,
      "train/ce_loss": 4.71875,
      "train/diffusion_loss": 0.3998708128929138
    },
    {
      "epoch": 1.2602739726027397,
      "step": 92,
      "train/learning_rate_real": 2.4401843464276368e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 3071.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 3071.0,
      "epoch": 1.2602739726027397,
      "step": 92
    },
    {
      "epoch": 1.2602739726027397,
      "step": 92,
      "train/ce_loss": 4.875,
      "train/diffusion_loss": 0.28526005148887634
    },
    {
      "epoch": 1.2602739726027397,
      "step": 92,
      "train/learning_rate_real": 2.4401843464276368e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2826.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2826.0,
      "epoch": 1.273972602739726,
      "step": 93
    },
    {
      "epoch": 1.273972602739726,
      "step": 93,
      "train/ce_loss": 4.53125,
      "train/diffusion_loss": 0.36494237184524536
    },
    {
      "epoch": 1.273972602739726,
      "step": 93,
      "train/learning_rate_real": 2.4384773787107616e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 1.273972602739726,
      "step": 93
    },
    {
      "epoch": 1.273972602739726,
      "step": 93,
      "train/ce_loss": 4.59375,
      "train/diffusion_loss": 0.4222411513328552
    },
    {
      "epoch": 1.273972602739726,
      "step": 93,
      "train/learning_rate_real": 2.4384773787107616e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2240.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2240.0,
      "epoch": 1.273972602739726,
      "step": 93
    },
    {
      "epoch": 1.273972602739726,
      "step": 93,
      "train/ce_loss": 4.4375,
      "train/diffusion_loss": 0.4870891571044922
    },
    {
      "epoch": 1.273972602739726,
      "step": 93,
      "train/learning_rate_real": 2.4384773787107616e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2636.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2636.0,
      "epoch": 1.273972602739726,
      "step": 93
    },
    {
      "epoch": 1.273972602739726,
      "step": 93,
      "train/ce_loss": 4.53125,
      "train/diffusion_loss": 0.3951069116592407
    },
    {
      "epoch": 1.273972602739726,
      "step": 93,
      "train/learning_rate_real": 2.4384773787107616e-05
    },
    {
      "debug/num_lat_loss": 1698.0,
      "debug/num_lat_total": 2271.0,
      "debug/num_tok_loss": 1698.0,
      "debug/num_tok_total": 2271.0,
      "epoch": 1.2876712328767124,
      "step": 94
    },
    {
      "epoch": 1.2876712328767124,
      "step": 94,
      "train/ce_loss": 4.53125,
      "train/diffusion_loss": 0.4701569974422455
    },
    {
      "epoch": 1.2876712328767124,
      "step": 94,
      "train/learning_rate_real": 2.4367470105854766e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2868.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2868.0,
      "epoch": 1.2876712328767124,
      "step": 94
    },
    {
      "epoch": 1.2876712328767124,
      "step": 94,
      "train/ce_loss": 4.53125,
      "train/diffusion_loss": 0.34901419281959534
    },
    {
      "epoch": 1.2876712328767124,
      "step": 94,
      "train/learning_rate_real": 2.4367470105854766e-05
    },
    {
      "debug/num_lat_loss": 1765.0,
      "debug/num_lat_total": 2200.0,
      "debug/num_tok_loss": 1765.0,
      "debug/num_tok_total": 2200.0,
      "epoch": 1.2876712328767124,
      "step": 94
    },
    {
      "epoch": 1.2876712328767124,
      "step": 94,
      "train/ce_loss": 4.59375,
      "train/diffusion_loss": 0.5008375644683838
    },
    {
      "epoch": 1.2876712328767124,
      "step": 94,
      "train/learning_rate_real": 2.4367470105854766e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2420.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2420.0,
      "epoch": 1.2876712328767124,
      "step": 94
    },
    {
      "epoch": 1.2876712328767124,
      "step": 94,
      "train/ce_loss": 4.6875,
      "train/diffusion_loss": 0.4832378625869751
    },
    {
      "epoch": 1.2876712328767124,
      "step": 94,
      "train/learning_rate_real": 2.4367470105854766e-05
    },
    {
      "debug/num_lat_loss": 1728.0,
      "debug/num_lat_total": 2578.0,
      "debug/num_tok_loss": 1728.0,
      "debug/num_tok_total": 2578.0,
      "epoch": 1.3013698630136985,
      "step": 95
    },
    {
      "epoch": 1.3013698630136985,
      "step": 95,
      "train/ce_loss": 4.4375,
      "train/diffusion_loss": 0.4089829623699188
    },
    {
      "epoch": 1.3013698630136985,
      "step": 95,
      "train/learning_rate_real": 2.4349932761216942e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2026.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2026.0,
      "epoch": 1.3013698630136985,
      "step": 95
    },
    {
      "epoch": 1.3013698630136985,
      "step": 95,
      "train/ce_loss": 4.59375,
      "train/diffusion_loss": 0.5384713411331177
    },
    {
      "epoch": 1.3013698630136985,
      "step": 95,
      "train/learning_rate_real": 2.4349932761216942e-05
    },
    {
      "debug/num_lat_loss": 1482.0,
      "debug/num_lat_total": 2137.0,
      "debug/num_tok_loss": 1482.0,
      "debug/num_tok_total": 2137.0,
      "epoch": 1.3013698630136985,
      "step": 95
    },
    {
      "epoch": 1.3013698630136985,
      "step": 95,
      "train/ce_loss": 4.5,
      "train/diffusion_loss": 0.4377511143684387
    },
    {
      "epoch": 1.3013698630136985,
      "step": 95,
      "train/learning_rate_real": 2.4349932761216942e-05
    },
    {
      "debug/num_lat_loss": 1757.0,
      "debug/num_lat_total": 2183.0,
      "debug/num_tok_loss": 1757.0,
      "debug/num_tok_total": 2183.0,
      "epoch": 1.3013698630136985,
      "step": 95
    },
    {
      "epoch": 1.3013698630136985,
      "step": 95,
      "train/ce_loss": 4.65625,
      "train/diffusion_loss": 0.5004580020904541
    },
    {
      "epoch": 1.3013698630136985,
      "step": 95,
      "train/learning_rate_real": 2.4349932761216942e-05
    },
    {
      "debug/num_lat_loss": 1659.0,
      "debug/num_lat_total": 2826.0,
      "debug/num_tok_loss": 1659.0,
      "debug/num_tok_total": 2826.0,
      "epoch": 1.3150684931506849,
      "step": 96
    },
    {
      "epoch": 1.3150684931506849,
      "step": 96,
      "train/ce_loss": 4.46875,
      "train/diffusion_loss": 0.3254546821117401
    },
    {
      "epoch": 1.3150684931506849,
      "step": 96,
      "train/learning_rate_real": 2.433216209849396e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2857.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2857.0,
      "epoch": 1.3150684931506849,
      "step": 96
    },
    {
      "epoch": 1.3150684931506849,
      "step": 96,
      "train/ce_loss": 4.46875,
      "train/diffusion_loss": 0.33290404081344604
    },
    {
      "epoch": 1.3150684931506849,
      "step": 96,
      "train/learning_rate_real": 2.433216209849396e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 3070.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 3070.0,
      "epoch": 1.3150684931506849,
      "step": 96
    },
    {
      "epoch": 1.3150684931506849,
      "step": 96,
      "train/ce_loss": 5.03125,
      "train/diffusion_loss": 0.3361597955226898
    },
    {
      "epoch": 1.3150684931506849,
      "step": 96,
      "train/learning_rate_real": 2.433216209849396e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2858.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2858.0,
      "epoch": 1.3150684931506849,
      "step": 96
    },
    {
      "epoch": 1.3150684931506849,
      "step": 96,
      "train/ce_loss": 4.96875,
      "train/diffusion_loss": 0.39105093479156494
    },
    {
      "epoch": 1.3150684931506849,
      "step": 96,
      "train/learning_rate_real": 2.433216209849396e-05
    },
    {
      "debug/num_lat_loss": 1760.0,
      "debug/num_lat_total": 2608.0,
      "debug/num_tok_loss": 1760.0,
      "debug/num_tok_total": 2608.0,
      "epoch": 1.3287671232876712,
      "step": 97
    },
    {
      "epoch": 1.3287671232876712,
      "step": 97,
      "train/ce_loss": 4.375,
      "train/diffusion_loss": 0.4111551344394684
    },
    {
      "epoch": 1.3287671232876712,
      "step": 97,
      "train/learning_rate_real": 2.4314158467579544e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2663.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2663.0,
      "epoch": 1.3287671232876712,
      "step": 97
    },
    {
      "epoch": 1.3287671232876712,
      "step": 97,
      "train/ce_loss": 4.625,
      "train/diffusion_loss": 0.4159858524799347
    },
    {
      "epoch": 1.3287671232876712,
      "step": 97,
      "train/learning_rate_real": 2.4314158467579544e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2891.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2891.0,
      "epoch": 1.3287671232876712,
      "step": 97
    },
    {
      "epoch": 1.3287671232876712,
      "step": 97,
      "train/ce_loss": 4.59375,
      "train/diffusion_loss": 0.3672471344470978
    },
    {
      "epoch": 1.3287671232876712,
      "step": 97,
      "train/learning_rate_real": 2.4314158467579544e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2221.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2221.0,
      "epoch": 1.3287671232876712,
      "step": 97
    },
    {
      "epoch": 1.3287671232876712,
      "step": 97,
      "train/ce_loss": 4.40625,
      "train/diffusion_loss": 0.5292556881904602
    },
    {
      "epoch": 1.3287671232876712,
      "step": 97,
      "train/learning_rate_real": 2.4314158467579544e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2425.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2425.0,
      "epoch": 1.3424657534246576,
      "step": 98
    },
    {
      "epoch": 1.3424657534246576,
      "step": 98,
      "train/ce_loss": 4.5,
      "train/diffusion_loss": 0.43493154644966125
    },
    {
      "epoch": 1.3424657534246576,
      "step": 98,
      "train/learning_rate_real": 2.429592222295441e-05
    },
    {
      "debug/num_lat_loss": 1622.0,
      "debug/num_lat_total": 2475.0,
      "debug/num_tok_loss": 1622.0,
      "debug/num_tok_total": 2475.0,
      "epoch": 1.3424657534246576,
      "step": 98
    },
    {
      "epoch": 1.3424657534246576,
      "step": 98,
      "train/ce_loss": 4.4375,
      "train/diffusion_loss": 0.37283438444137573
    },
    {
      "epoch": 1.3424657534246576,
      "step": 98,
      "train/learning_rate_real": 2.429592222295441e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 1.3424657534246576,
      "step": 98
    },
    {
      "epoch": 1.3424657534246576,
      "step": 98,
      "train/ce_loss": 4.4375,
      "train/diffusion_loss": 0.4417295753955841
    },
    {
      "epoch": 1.3424657534246576,
      "step": 98,
      "train/learning_rate_real": 2.429592222295441e-05
    },
    {
      "debug/num_lat_loss": 1644.0,
      "debug/num_lat_total": 2727.0,
      "debug/num_tok_loss": 1644.0,
      "debug/num_tok_total": 2727.0,
      "epoch": 1.3424657534246576,
      "step": 98
    },
    {
      "epoch": 1.3424657534246576,
      "step": 98,
      "train/ce_loss": 4.40625,
      "train/diffusion_loss": 0.3511442244052887
    },
    {
      "epoch": 1.3424657534246576,
      "step": 98,
      "train/learning_rate_real": 2.429592222295441e-05
    },
    {
      "debug/num_lat_loss": 1737.0,
      "debug/num_lat_total": 2841.0,
      "debug/num_tok_loss": 1737.0,
      "debug/num_tok_total": 2841.0,
      "epoch": 1.356164383561644,
      "step": 99
    },
    {
      "epoch": 1.356164383561644,
      "step": 99,
      "train/ce_loss": 4.59375,
      "train/diffusion_loss": 0.36495158076286316
    },
    {
      "epoch": 1.356164383561644,
      "step": 99,
      "train/learning_rate_real": 2.42774537236793e-05
    },
    {
      "debug/num_lat_loss": 1436.0,
      "debug/num_lat_total": 2005.0,
      "debug/num_tok_loss": 1436.0,
      "debug/num_tok_total": 2005.0,
      "epoch": 1.356164383561644,
      "step": 99
    },
    {
      "epoch": 1.356164383561644,
      "step": 99,
      "train/ce_loss": 4.625,
      "train/diffusion_loss": 0.4353146255016327
    },
    {
      "epoch": 1.356164383561644,
      "step": 99,
      "train/learning_rate_real": 2.42774537236793e-05
    },
    {
      "debug/num_lat_loss": 1633.0,
      "debug/num_lat_total": 2501.0,
      "debug/num_tok_loss": 1633.0,
      "debug/num_tok_total": 2501.0,
      "epoch": 1.356164383561644,
      "step": 99
    },
    {
      "epoch": 1.356164383561644,
      "step": 99,
      "train/ce_loss": 4.34375,
      "train/diffusion_loss": 0.3748423457145691
    },
    {
      "epoch": 1.356164383561644,
      "step": 99,
      "train/learning_rate_real": 2.42774537236793e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 2891.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 2891.0,
      "epoch": 1.356164383561644,
      "step": 99
    },
    {
      "epoch": 1.356164383561644,
      "step": 99,
      "train/ce_loss": 4.375,
      "train/diffusion_loss": 0.3602873682975769
    },
    {
      "epoch": 1.356164383561644,
      "step": 99,
      "train/learning_rate_real": 2.42774537236793e-05
    },
    {
      "epoch": 1.36986301369863,
      "grad_norm": 2.010390281677246,
      "learning_rate": 2.42774537236793e-05,
      "loss": 2.983,
      "step": 100
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2860.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2860.0,
      "epoch": 1.36986301369863,
      "step": 100
    },
    {
      "epoch": 1.36986301369863,
      "step": 100,
      "train/ce_loss": 4.59375,
      "train/diffusion_loss": 0.3802020847797394
    },
    {
      "epoch": 1.36986301369863,
      "step": 100,
      "train/learning_rate_real": 2.425875333338792e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2640.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2640.0,
      "epoch": 1.36986301369863,
      "step": 100
    },
    {
      "epoch": 1.36986301369863,
      "step": 100,
      "train/ce_loss": 4.6875,
      "train/diffusion_loss": 0.4197426736354828
    },
    {
      "epoch": 1.36986301369863,
      "step": 100,
      "train/learning_rate_real": 2.425875333338792e-05
    },
    {
      "debug/num_lat_loss": 1716.0,
      "debug/num_lat_total": 2512.0,
      "debug/num_tok_loss": 1716.0,
      "debug/num_tok_total": 2512.0,
      "epoch": 1.36986301369863,
      "step": 100
    },
    {
      "epoch": 1.36986301369863,
      "step": 100,
      "train/ce_loss": 4.53125,
      "train/diffusion_loss": 0.4053674042224884
    },
    {
      "epoch": 1.36986301369863,
      "step": 100,
      "train/learning_rate_real": 2.425875333338792e-05
    },
    {
      "debug/num_lat_loss": 1746.0,
      "debug/num_lat_total": 2184.0,
      "debug/num_tok_loss": 1746.0,
      "debug/num_tok_total": 2184.0,
      "epoch": 1.36986301369863,
      "step": 100
    },
    {
      "epoch": 1.36986301369863,
      "step": 100,
      "train/ce_loss": 4.3125,
      "train/diffusion_loss": 0.4971557855606079
    },
    {
      "epoch": 1.36986301369863,
      "step": 100,
      "train/learning_rate_real": 2.425875333338792e-05
    },
    {
      "debug/num_lat_loss": 1599.0,
      "debug/num_lat_total": 2669.0,
      "debug/num_tok_loss": 1599.0,
      "debug/num_tok_total": 2669.0,
      "epoch": 1.3835616438356164,
      "step": 101
    },
    {
      "epoch": 1.3835616438356164,
      "step": 101,
      "train/ce_loss": 4.53125,
      "train/diffusion_loss": 0.34701186418533325
    },
    {
      "epoch": 1.3835616438356164,
      "step": 101,
      "train/learning_rate_real": 2.4239821420279753e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2862.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2862.0,
      "epoch": 1.3835616438356164,
      "step": 101
    },
    {
      "epoch": 1.3835616438356164,
      "step": 101,
      "train/ce_loss": 4.78125,
      "train/diffusion_loss": 0.3729672431945801
    },
    {
      "epoch": 1.3835616438356164,
      "step": 101,
      "train/learning_rate_real": 2.4239821420279753e-05
    },
    {
      "debug/num_lat_loss": 1629.0,
      "debug/num_lat_total": 2978.0,
      "debug/num_tok_loss": 1629.0,
      "debug/num_tok_total": 2978.0,
      "epoch": 1.3835616438356164,
      "step": 101
    },
    {
      "epoch": 1.3835616438356164,
      "step": 101,
      "train/ce_loss": 4.4375,
      "train/diffusion_loss": 0.2844712734222412
    },
    {
      "epoch": 1.3835616438356164,
      "step": 101,
      "train/learning_rate_real": 2.4239821420279753e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2636.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2636.0,
      "epoch": 1.3835616438356164,
      "step": 101
    },
    {
      "epoch": 1.3835616438356164,
      "step": 101,
      "train/ce_loss": 4.375,
      "train/diffusion_loss": 0.40753600001335144
    },
    {
      "epoch": 1.3835616438356164,
      "step": 101,
      "train/learning_rate_real": 2.4239821420279753e-05
    },
    {
      "debug/num_lat_loss": 1762.0,
      "debug/num_lat_total": 3038.0,
      "debug/num_tok_loss": 1762.0,
      "debug/num_tok_total": 3038.0,
      "epoch": 1.3972602739726028,
      "step": 102
    },
    {
      "epoch": 1.3972602739726028,
      "step": 102,
      "train/ce_loss": 4.53125,
      "train/diffusion_loss": 0.3289642035961151
    },
    {
      "epoch": 1.3972602739726028,
      "step": 102,
      "train/learning_rate_real": 2.422065835711285e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2665.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2665.0,
      "epoch": 1.3972602739726028,
      "step": 102
    },
    {
      "epoch": 1.3972602739726028,
      "step": 102,
      "train/ce_loss": 4.40625,
      "train/diffusion_loss": 0.3820474445819855
    },
    {
      "epoch": 1.3972602739726028,
      "step": 102,
      "train/learning_rate_real": 2.422065835711285e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2671.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2671.0,
      "epoch": 1.3972602739726028,
      "step": 102
    },
    {
      "epoch": 1.3972602739726028,
      "step": 102,
      "train/ce_loss": 4.46875,
      "train/diffusion_loss": 0.4218079447746277
    },
    {
      "epoch": 1.3972602739726028,
      "step": 102,
      "train/learning_rate_real": 2.422065835711285e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2416.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2416.0,
      "epoch": 1.3972602739726028,
      "step": 102
    },
    {
      "epoch": 1.3972602739726028,
      "step": 102,
      "train/ce_loss": 4.3125,
      "train/diffusion_loss": 0.4562211334705353
    },
    {
      "epoch": 1.3972602739726028,
      "step": 102,
      "train/learning_rate_real": 2.422065835711285e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2874.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2874.0,
      "epoch": 1.410958904109589,
      "step": 103
    },
    {
      "epoch": 1.410958904109589,
      "step": 103,
      "train/ce_loss": 4.5625,
      "train/diffusion_loss": 0.3860173523426056
    },
    {
      "epoch": 1.410958904109589,
      "step": 103,
      "train/learning_rate_real": 2.4201264521196455e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2442.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2442.0,
      "epoch": 1.410958904109589,
      "step": 103
    },
    {
      "epoch": 1.410958904109589,
      "step": 103,
      "train/ce_loss": 4.21875,
      "train/diffusion_loss": 0.45044654607772827
    },
    {
      "epoch": 1.410958904109589,
      "step": 103,
      "train/learning_rate_real": 2.4201264521196455e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2619.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2619.0,
      "epoch": 1.410958904109589,
      "step": 103
    },
    {
      "epoch": 1.410958904109589,
      "step": 103,
      "train/ce_loss": 4.46875,
      "train/diffusion_loss": 0.4299888014793396
    },
    {
      "epoch": 1.410958904109589,
      "step": 103,
      "train/learning_rate_real": 2.4201264521196455e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2638.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2638.0,
      "epoch": 1.410958904109589,
      "step": 103
    },
    {
      "epoch": 1.410958904109589,
      "step": 103,
      "train/ce_loss": 4.4375,
      "train/diffusion_loss": 0.3971606194972992
    },
    {
      "epoch": 1.410958904109589,
      "step": 103,
      "train/learning_rate_real": 2.4201264521196455e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2223.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2223.0,
      "epoch": 1.4246575342465753,
      "step": 104
    },
    {
      "epoch": 1.4246575342465753,
      "step": 104,
      "train/ce_loss": 4.15625,
      "train/diffusion_loss": 0.48428550362586975
    },
    {
      "epoch": 1.4246575342465753,
      "step": 104,
      "train/learning_rate_real": 2.4181640294383585e-05
    },
    {
      "debug/num_lat_loss": 1674.0,
      "debug/num_lat_total": 2539.0,
      "debug/num_tok_loss": 1674.0,
      "debug/num_tok_total": 2539.0,
      "epoch": 1.4246575342465753,
      "step": 104
    },
    {
      "epoch": 1.4246575342465753,
      "step": 104,
      "train/ce_loss": 4.40625,
      "train/diffusion_loss": 0.3819606602191925
    },
    {
      "epoch": 1.4246575342465753,
      "step": 104,
      "train/learning_rate_real": 2.4181640294383585e-05
    },
    {
      "debug/num_lat_loss": 1690.0,
      "debug/num_lat_total": 2253.0,
      "debug/num_tok_loss": 1690.0,
      "debug/num_tok_total": 2253.0,
      "epoch": 1.4246575342465753,
      "step": 104
    },
    {
      "epoch": 1.4246575342465753,
      "step": 104,
      "train/ce_loss": 4.3125,
      "train/diffusion_loss": 0.4630298614501953
    },
    {
      "epoch": 1.4246575342465753,
      "step": 104,
      "train/learning_rate_real": 2.4181640294383585e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2633.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2633.0,
      "epoch": 1.4246575342465753,
      "step": 104
    },
    {
      "epoch": 1.4246575342465753,
      "step": 104,
      "train/ce_loss": 4.34375,
      "train/diffusion_loss": 0.3922845125198364
    },
    {
      "epoch": 1.4246575342465753,
      "step": 104,
      "train/learning_rate_real": 2.4181640294383585e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2897.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2897.0,
      "epoch": 1.4383561643835616,
      "step": 105
    },
    {
      "epoch": 1.4383561643835616,
      "step": 105,
      "train/ce_loss": 4.34375,
      "train/diffusion_loss": 0.37782275676727295
    },
    {
      "epoch": 1.4383561643835616,
      "step": 105,
      "train/learning_rate_real": 2.4161786063063536e-05
    },
    {
      "debug/num_lat_loss": 1712.0,
      "debug/num_lat_total": 2358.0,
      "debug/num_tok_loss": 1712.0,
      "debug/num_tok_total": 2358.0,
      "epoch": 1.4383561643835616,
      "step": 105
    },
    {
      "epoch": 1.4383561643835616,
      "step": 105,
      "train/ce_loss": 4.1875,
      "train/diffusion_loss": 0.4383047819137573
    },
    {
      "epoch": 1.4383561643835616,
      "step": 105,
      "train/learning_rate_real": 2.4161786063063536e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2836.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2836.0,
      "epoch": 1.4383561643835616,
      "step": 105
    },
    {
      "epoch": 1.4383561643835616,
      "step": 105,
      "train/ce_loss": 4.21875,
      "train/diffusion_loss": 0.3715423047542572
    },
    {
      "epoch": 1.4383561643835616,
      "step": 105,
      "train/learning_rate_real": 2.4161786063063536e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2647.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2647.0,
      "epoch": 1.4383561643835616,
      "step": 105
    },
    {
      "epoch": 1.4383561643835616,
      "step": 105,
      "train/ce_loss": 4.3125,
      "train/diffusion_loss": 0.4179925322532654
    },
    {
      "epoch": 1.4383561643835616,
      "step": 105,
      "train/learning_rate_real": 2.4161786063063536e-05
    },
    {
      "debug/num_lat_loss": 1625.0,
      "debug/num_lat_total": 2265.0,
      "debug/num_tok_loss": 1625.0,
      "debug/num_tok_total": 2265.0,
      "epoch": 1.452054794520548,
      "step": 106
    },
    {
      "epoch": 1.452054794520548,
      "step": 106,
      "train/ce_loss": 4.28125,
      "train/diffusion_loss": 0.4229045808315277
    },
    {
      "epoch": 1.452054794520548,
      "step": 106,
      "train/learning_rate_real": 2.4141702218154232e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2211.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2211.0,
      "epoch": 1.452054794520548,
      "step": 106
    },
    {
      "epoch": 1.452054794520548,
      "step": 106,
      "train/ce_loss": 4.25,
      "train/diffusion_loss": 0.49752187728881836
    },
    {
      "epoch": 1.452054794520548,
      "step": 106,
      "train/learning_rate_real": 2.4141702218154232e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2611.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2611.0,
      "epoch": 1.452054794520548,
      "step": 106
    },
    {
      "epoch": 1.452054794520548,
      "step": 106,
      "train/ce_loss": 4.1875,
      "train/diffusion_loss": 0.4122409224510193
    },
    {
      "epoch": 1.452054794520548,
      "step": 106,
      "train/learning_rate_real": 2.4141702218154232e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2434.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2434.0,
      "epoch": 1.452054794520548,
      "step": 106
    },
    {
      "epoch": 1.452054794520548,
      "step": 106,
      "train/ce_loss": 4.25,
      "train/diffusion_loss": 0.48291388154029846
    },
    {
      "epoch": 1.452054794520548,
      "step": 106,
      "train/learning_rate_real": 2.4141702218154232e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2204.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2204.0,
      "epoch": 1.4657534246575343,
      "step": 107
    },
    {
      "epoch": 1.4657534246575343,
      "step": 107,
      "train/ce_loss": 4.28125,
      "train/diffusion_loss": 0.48381802439689636
    },
    {
      "epoch": 1.4657534246575343,
      "step": 107,
      "train/learning_rate_real": 2.4121389155094564e-05
    },
    {
      "debug/num_lat_loss": 1645.0,
      "debug/num_lat_total": 2371.0,
      "debug/num_tok_loss": 1645.0,
      "debug/num_tok_total": 2371.0,
      "epoch": 1.4657534246575343,
      "step": 107
    },
    {
      "epoch": 1.4657534246575343,
      "step": 107,
      "train/ce_loss": 4.53125,
      "train/diffusion_loss": 0.4406833052635193
    },
    {
      "epoch": 1.4657534246575343,
      "step": 107,
      "train/learning_rate_real": 2.4121389155094564e-05
    },
    {
      "debug/num_lat_loss": 1723.0,
      "debug/num_lat_total": 2371.0,
      "debug/num_tok_loss": 1723.0,
      "debug/num_tok_total": 2371.0,
      "epoch": 1.4657534246575343,
      "step": 107
    },
    {
      "epoch": 1.4657534246575343,
      "step": 107,
      "train/ce_loss": 4.25,
      "train/diffusion_loss": 0.4781853258609772
    },
    {
      "epoch": 1.4657534246575343,
      "step": 107,
      "train/learning_rate_real": 2.4121389155094564e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2843.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2843.0,
      "epoch": 1.4657534246575343,
      "step": 107
    },
    {
      "epoch": 1.4657534246575343,
      "step": 107,
      "train/ce_loss": 4.21875,
      "train/diffusion_loss": 0.3677610754966736
    },
    {
      "epoch": 1.4657534246575343,
      "step": 107,
      "train/learning_rate_real": 2.4121389155094564e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2864.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2864.0,
      "epoch": 1.4794520547945205,
      "step": 108
    },
    {
      "epoch": 1.4794520547945205,
      "step": 108,
      "train/ce_loss": 4.25,
      "train/diffusion_loss": 0.35337358713150024
    },
    {
      "epoch": 1.4794520547945205,
      "step": 108,
      "train/learning_rate_real": 2.410084727383659e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2456.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2456.0,
      "epoch": 1.4794520547945205,
      "step": 108
    },
    {
      "epoch": 1.4794520547945205,
      "step": 108,
      "train/ce_loss": 4.125,
      "train/diffusion_loss": 0.47408199310302734
    },
    {
      "epoch": 1.4794520547945205,
      "step": 108,
      "train/learning_rate_real": 2.410084727383659e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2409.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2409.0,
      "epoch": 1.4794520547945205,
      "step": 108
    },
    {
      "epoch": 1.4794520547945205,
      "step": 108,
      "train/ce_loss": 4.25,
      "train/diffusion_loss": 0.460326611995697
    },
    {
      "epoch": 1.4794520547945205,
      "step": 108,
      "train/learning_rate_real": 2.410084727383659e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 3324.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 3324.0,
      "epoch": 1.4794520547945205,
      "step": 108
    },
    {
      "epoch": 1.4794520547945205,
      "step": 108,
      "train/ce_loss": 4.3125,
      "train/diffusion_loss": 0.26926982402801514
    },
    {
      "epoch": 1.4794520547945205,
      "step": 108,
      "train/learning_rate_real": 2.410084727383659e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2652.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2652.0,
      "epoch": 1.4931506849315068,
      "step": 109
    },
    {
      "epoch": 1.4931506849315068,
      "step": 109,
      "train/ce_loss": 4.1875,
      "train/diffusion_loss": 0.44318896532058716
    },
    {
      "epoch": 1.4931506849315068,
      "step": 109,
      "train/learning_rate_real": 2.4080076978837658e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2879.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2879.0,
      "epoch": 1.4931506849315068,
      "step": 109
    },
    {
      "epoch": 1.4931506849315068,
      "step": 109,
      "train/ce_loss": 4.15625,
      "train/diffusion_loss": 0.3627161979675293
    },
    {
      "epoch": 1.4931506849315068,
      "step": 109,
      "train/learning_rate_real": 2.4080076978837658e-05
    },
    {
      "debug/num_lat_loss": 1633.0,
      "debug/num_lat_total": 2070.0,
      "debug/num_tok_loss": 1633.0,
      "debug/num_tok_total": 2070.0,
      "epoch": 1.4931506849315068,
      "step": 109
    },
    {
      "epoch": 1.4931506849315068,
      "step": 109,
      "train/ce_loss": 4.25,
      "train/diffusion_loss": 0.47383245825767517
    },
    {
      "epoch": 1.4931506849315068,
      "step": 109,
      "train/learning_rate_real": 2.4080076978837658e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2443.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2443.0,
      "epoch": 1.4931506849315068,
      "step": 109
    },
    {
      "epoch": 1.4931506849315068,
      "step": 109,
      "train/ce_loss": 4.46875,
      "train/diffusion_loss": 0.4685288071632385
    },
    {
      "epoch": 1.4931506849315068,
      "step": 109,
      "train/learning_rate_real": 2.4080076978837658e-05
    },
    {
      "epoch": 1.5068493150684932,
      "grad_norm": 1.5252450704574585,
      "learning_rate": 2.4080076978837658e-05,
      "loss": 3.0191,
      "step": 110
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 2442.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 2442.0,
      "epoch": 1.5068493150684932,
      "step": 110
    },
    {
      "epoch": 1.5068493150684932,
      "step": 110,
      "train/ce_loss": 4.5,
      "train/diffusion_loss": 0.45642104744911194
    },
    {
      "epoch": 1.5068493150684932,
      "step": 110,
      "train/learning_rate_real": 2.405907867905245e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2425.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2425.0,
      "epoch": 1.5068493150684932,
      "step": 110
    },
    {
      "epoch": 1.5068493150684932,
      "step": 110,
      "train/ce_loss": 4.1875,
      "train/diffusion_loss": 0.48576435446739197
    },
    {
      "epoch": 1.5068493150684932,
      "step": 110,
      "train/learning_rate_real": 2.405907867905245e-05
    },
    {
      "debug/num_lat_loss": 1679.0,
      "debug/num_lat_total": 2746.0,
      "debug/num_tok_loss": 1679.0,
      "debug/num_tok_total": 2746.0,
      "epoch": 1.5068493150684932,
      "step": 110
    },
    {
      "epoch": 1.5068493150684932,
      "step": 110,
      "train/ce_loss": 4.125,
      "train/diffusion_loss": 0.3542007803916931
    },
    {
      "epoch": 1.5068493150684932,
      "step": 110,
      "train/learning_rate_real": 2.405907867905245e-05
    },
    {
      "debug/num_lat_loss": 1758.0,
      "debug/num_lat_total": 2609.0,
      "debug/num_tok_loss": 1758.0,
      "debug/num_tok_total": 2609.0,
      "epoch": 1.5068493150684932,
      "step": 110
    },
    {
      "epoch": 1.5068493150684932,
      "step": 110,
      "train/ce_loss": 4.0,
      "train/diffusion_loss": 0.4068741798400879
    },
    {
      "epoch": 1.5068493150684932,
      "step": 110,
      "train/learning_rate_real": 2.405907867905245e-05
    },
    {
      "debug/num_lat_loss": 1738.0,
      "debug/num_lat_total": 2577.0,
      "debug/num_tok_loss": 1738.0,
      "debug/num_tok_total": 2577.0,
      "epoch": 1.5205479452054793,
      "step": 111
    },
    {
      "epoch": 1.5205479452054793,
      "step": 111,
      "train/ce_loss": 4.0,
      "train/diffusion_loss": 0.41163885593414307
    },
    {
      "epoch": 1.5205479452054793,
      "step": 111,
      "train/learning_rate_real": 2.403785278792492e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2848.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2848.0,
      "epoch": 1.5205479452054793,
      "step": 111
    },
    {
      "epoch": 1.5205479452054793,
      "step": 111,
      "train/ce_loss": 4.21875,
      "train/diffusion_loss": 0.36420491337776184
    },
    {
      "epoch": 1.5205479452054793,
      "step": 111,
      "train/learning_rate_real": 2.403785278792492e-05
    },
    {
      "debug/num_lat_loss": 1747.0,
      "debug/num_lat_total": 2596.0,
      "debug/num_tok_loss": 1747.0,
      "debug/num_tok_total": 2596.0,
      "epoch": 1.5205479452054793,
      "step": 111
    },
    {
      "epoch": 1.5205479452054793,
      "step": 111,
      "train/ce_loss": 4.46875,
      "train/diffusion_loss": 0.3811115622520447
    },
    {
      "epoch": 1.5205479452054793,
      "step": 111,
      "train/learning_rate_real": 2.403785278792492e-05
    },
    {
      "debug/num_lat_loss": 1749.0,
      "debug/num_lat_total": 3021.0,
      "debug/num_tok_loss": 1749.0,
      "debug/num_tok_total": 3021.0,
      "epoch": 1.5205479452054793,
      "step": 111
    },
    {
      "epoch": 1.5205479452054793,
      "step": 111,
      "train/ce_loss": 4.28125,
      "train/diffusion_loss": 0.33210745453834534
    },
    {
      "epoch": 1.5205479452054793,
      "step": 111,
      "train/learning_rate_real": 2.403785278792492e-05
    },
    {
      "debug/num_lat_loss": 1759.0,
      "debug/num_lat_total": 2830.0,
      "debug/num_tok_loss": 1759.0,
      "debug/num_tok_total": 2830.0,
      "epoch": 1.5342465753424657,
      "step": 112
    },
    {
      "epoch": 1.5342465753424657,
      "step": 112,
      "train/ce_loss": 4.0625,
      "train/diffusion_loss": 0.35382625460624695
    },
    {
      "epoch": 1.5342465753424657,
      "step": 112,
      "train/learning_rate_real": 2.401639972338016e-05
    },
    {
      "debug/num_lat_loss": 1756.0,
      "debug/num_lat_total": 2603.0,
      "debug/num_tok_loss": 1756.0,
      "debug/num_tok_total": 2603.0,
      "epoch": 1.5342465753424657,
      "step": 112
    },
    {
      "epoch": 1.5342465753424657,
      "step": 112,
      "train/ce_loss": 4.0,
      "train/diffusion_loss": 0.4346335232257843
    },
    {
      "epoch": 1.5342465753424657,
      "step": 112,
      "train/learning_rate_real": 2.401639972338016e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 3095.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 3095.0,
      "epoch": 1.5342465753424657,
      "step": 112
    },
    {
      "epoch": 1.5342465753424657,
      "step": 112,
      "train/ce_loss": 4.0,
      "train/diffusion_loss": 0.331741064786911
    },
    {
      "epoch": 1.5342465753424657,
      "step": 112,
      "train/learning_rate_real": 2.401639972338016e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 3063.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 3063.0,
      "epoch": 1.5342465753424657,
      "step": 112
    },
    {
      "epoch": 1.5342465753424657,
      "step": 112,
      "train/ce_loss": 4.21875,
      "train/diffusion_loss": 0.3286246955394745
    },
    {
      "epoch": 1.5342465753424657,
      "step": 112,
      "train/learning_rate_real": 2.401639972338016e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2411.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2411.0,
      "epoch": 1.547945205479452,
      "step": 113
    },
    {
      "epoch": 1.547945205479452,
      "step": 113,
      "train/ce_loss": 3.84375,
      "train/diffusion_loss": 0.44130316376686096
    },
    {
      "epoch": 1.547945205479452,
      "step": 113,
      "train/learning_rate_real": 2.399471990781619e-05
    },
    {
      "debug/num_lat_loss": 1744.0,
      "debug/num_lat_total": 2801.0,
      "debug/num_tok_loss": 1744.0,
      "debug/num_tok_total": 2801.0,
      "epoch": 1.547945205479452,
      "step": 113
    },
    {
      "epoch": 1.547945205479452,
      "step": 113,
      "train/ce_loss": 4.03125,
      "train/diffusion_loss": 0.3459753096103668
    },
    {
      "epoch": 1.547945205479452,
      "step": 113,
      "train/learning_rate_real": 2.399471990781619e-05
    },
    {
      "debug/num_lat_loss": 1744.0,
      "debug/num_lat_total": 2598.0,
      "debug/num_tok_loss": 1744.0,
      "debug/num_tok_total": 2598.0,
      "epoch": 1.547945205479452,
      "step": 113
    },
    {
      "epoch": 1.547945205479452,
      "step": 113,
      "train/ce_loss": 4.21875,
      "train/diffusion_loss": 0.4222409129142761
    },
    {
      "epoch": 1.547945205479452,
      "step": 113,
      "train/learning_rate_real": 2.399471990781619e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2874.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2874.0,
      "epoch": 1.547945205479452,
      "step": 113
    },
    {
      "epoch": 1.547945205479452,
      "step": 113,
      "train/ce_loss": 4.0625,
      "train/diffusion_loss": 0.35831475257873535
    },
    {
      "epoch": 1.547945205479452,
      "step": 113,
      "train/learning_rate_real": 2.399471990781619e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 3056.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 3056.0,
      "epoch": 1.5616438356164384,
      "step": 114
    },
    {
      "epoch": 1.5616438356164384,
      "step": 114,
      "train/ce_loss": 4.15625,
      "train/diffusion_loss": 0.2948022186756134
    },
    {
      "epoch": 1.5616438356164384,
      "step": 114,
      "train/learning_rate_real": 2.3972813768095583e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2659.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2659.0,
      "epoch": 1.5616438356164384,
      "step": 114
    },
    {
      "epoch": 1.5616438356164384,
      "step": 114,
      "train/ce_loss": 4.21875,
      "train/diffusion_loss": 0.4711344838142395
    },
    {
      "epoch": 1.5616438356164384,
      "step": 114,
      "train/learning_rate_real": 2.3972813768095583e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 3543.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 3543.0,
      "epoch": 1.5616438356164384,
      "step": 114
    },
    {
      "epoch": 1.5616438356164384,
      "step": 114,
      "train/ce_loss": 4.0625,
      "train/diffusion_loss": 0.23170864582061768
    },
    {
      "epoch": 1.5616438356164384,
      "step": 114,
      "train/learning_rate_real": 2.3972813768095583e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 2682.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 2682.0,
      "epoch": 1.5616438356164384,
      "step": 114
    },
    {
      "epoch": 1.5616438356164384,
      "step": 114,
      "train/ce_loss": 4.0625,
      "train/diffusion_loss": 0.40863677859306335
    },
    {
      "epoch": 1.5616438356164384,
      "step": 114,
      "train/learning_rate_real": 2.3972813768095583e-05
    },
    {
      "debug/num_lat_loss": 1762.0,
      "debug/num_lat_total": 2399.0,
      "debug/num_tok_loss": 1762.0,
      "debug/num_tok_total": 2399.0,
      "epoch": 1.5753424657534247,
      "step": 115
    },
    {
      "epoch": 1.5753424657534247,
      "step": 115,
      "train/ce_loss": 3.71875,
      "train/diffusion_loss": 0.4591188132762909
    },
    {
      "epoch": 1.5753424657534247,
      "step": 115,
      "train/learning_rate_real": 2.3950681735537135e-05
    },
    {
      "debug/num_lat_loss": 1639.0,
      "debug/num_lat_total": 2499.0,
      "debug/num_tok_loss": 1639.0,
      "debug/num_tok_total": 2499.0,
      "epoch": 1.5753424657534247,
      "step": 115
    },
    {
      "epoch": 1.5753424657534247,
      "step": 115,
      "train/ce_loss": 3.953125,
      "train/diffusion_loss": 0.39648905396461487
    },
    {
      "epoch": 1.5753424657534247,
      "step": 115,
      "train/learning_rate_real": 2.3950681735537135e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2213.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2213.0,
      "epoch": 1.5753424657534247,
      "step": 115
    },
    {
      "epoch": 1.5753424657534247,
      "step": 115,
      "train/ce_loss": 3.890625,
      "train/diffusion_loss": 0.49079185724258423
    },
    {
      "epoch": 1.5753424657534247,
      "step": 115,
      "train/learning_rate_real": 2.3950681735537135e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2230.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2230.0,
      "epoch": 1.5753424657534247,
      "step": 115
    },
    {
      "epoch": 1.5753424657534247,
      "step": 115,
      "train/ce_loss": 4.03125,
      "train/diffusion_loss": 0.49304887652397156
    },
    {
      "epoch": 1.5753424657534247,
      "step": 115,
      "train/learning_rate_real": 2.3950681735537135e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 3094.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 3094.0,
      "epoch": 1.589041095890411,
      "step": 116
    },
    {
      "epoch": 1.589041095890411,
      "step": 116,
      "train/ce_loss": 3.859375,
      "train/diffusion_loss": 0.3350035548210144
    },
    {
      "epoch": 1.589041095890411,
      "step": 116,
      "train/learning_rate_real": 2.3928324245907324e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 3081.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 3081.0,
      "epoch": 1.589041095890411,
      "step": 116
    },
    {
      "epoch": 1.589041095890411,
      "step": 116,
      "train/ce_loss": 3.921875,
      "train/diffusion_loss": 0.30515187978744507
    },
    {
      "epoch": 1.589041095890411,
      "step": 116,
      "train/learning_rate_real": 2.3928324245907324e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2870.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2870.0,
      "epoch": 1.589041095890411,
      "step": 116
    },
    {
      "epoch": 1.589041095890411,
      "step": 116,
      "train/ce_loss": 3.953125,
      "train/diffusion_loss": 0.3786502778530121
    },
    {
      "epoch": 1.589041095890411,
      "step": 116,
      "train/learning_rate_real": 2.3928324245907324e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2645.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2645.0,
      "epoch": 1.589041095890411,
      "step": 116
    },
    {
      "epoch": 1.589041095890411,
      "step": 116,
      "train/ce_loss": 3.8125,
      "train/diffusion_loss": 0.415124773979187
    },
    {
      "epoch": 1.589041095890411,
      "step": 116,
      "train/learning_rate_real": 2.3928324245907324e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2645.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2645.0,
      "epoch": 1.6027397260273972,
      "step": 117
    },
    {
      "epoch": 1.6027397260273972,
      "step": 117,
      "train/ce_loss": 3.90625,
      "train/diffusion_loss": 0.4104914665222168
    },
    {
      "epoch": 1.6027397260273972,
      "step": 117,
      "train/learning_rate_real": 2.3905741739411735e-05
    },
    {
      "debug/num_lat_loss": 1679.0,
      "debug/num_lat_total": 3078.0,
      "debug/num_tok_loss": 1679.0,
      "debug/num_tok_total": 3078.0,
      "epoch": 1.6027397260273972,
      "step": 117
    },
    {
      "epoch": 1.6027397260273972,
      "step": 117,
      "train/ce_loss": 3.921875,
      "train/diffusion_loss": 0.26384884119033813
    },
    {
      "epoch": 1.6027397260273972,
      "step": 117,
      "train/learning_rate_real": 2.3905741739411735e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2408.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2408.0,
      "epoch": 1.6027397260273972,
      "step": 117
    },
    {
      "epoch": 1.6027397260273972,
      "step": 117,
      "train/ce_loss": 3.75,
      "train/diffusion_loss": 0.4527377486228943
    },
    {
      "epoch": 1.6027397260273972,
      "step": 117,
      "train/learning_rate_real": 2.3905741739411735e-05
    },
    {
      "debug/num_lat_loss": 1643.0,
      "debug/num_lat_total": 2517.0,
      "debug/num_tok_loss": 1643.0,
      "debug/num_tok_total": 2517.0,
      "epoch": 1.6027397260273972,
      "step": 117
    },
    {
      "epoch": 1.6027397260273972,
      "step": 117,
      "train/ce_loss": 4.03125,
      "train/diffusion_loss": 0.4090951979160309
    },
    {
      "epoch": 1.6027397260273972,
      "step": 117,
      "train/learning_rate_real": 2.3905741739411735e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 3067.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 3067.0,
      "epoch": 1.6164383561643836,
      "step": 118
    },
    {
      "epoch": 1.6164383561643836,
      "step": 118,
      "train/ce_loss": 3.796875,
      "train/diffusion_loss": 0.30969923734664917
    },
    {
      "epoch": 1.6164383561643836,
      "step": 118,
      "train/learning_rate_real": 2.3882934660686418e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2414.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2414.0,
      "epoch": 1.6164383561643836,
      "step": 118
    },
    {
      "epoch": 1.6164383561643836,
      "step": 118,
      "train/ce_loss": 3.796875,
      "train/diffusion_loss": 0.46044453978538513
    },
    {
      "epoch": 1.6164383561643836,
      "step": 118,
      "train/learning_rate_real": 2.3882934660686418e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2839.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2839.0,
      "epoch": 1.6164383561643836,
      "step": 118
    },
    {
      "epoch": 1.6164383561643836,
      "step": 118,
      "train/ce_loss": 3.90625,
      "train/diffusion_loss": 0.4008499085903168
    },
    {
      "epoch": 1.6164383561643836,
      "step": 118,
      "train/learning_rate_real": 2.3882934660686418e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2868.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2868.0,
      "epoch": 1.6164383561643836,
      "step": 118
    },
    {
      "epoch": 1.6164383561643836,
      "step": 118,
      "train/ce_loss": 4.0,
      "train/diffusion_loss": 0.33701223134994507
    },
    {
      "epoch": 1.6164383561643836,
      "step": 118,
      "train/learning_rate_real": 2.3882934660686418e-05
    },
    {
      "debug/num_lat_loss": 1727.0,
      "debug/num_lat_total": 2748.0,
      "debug/num_tok_loss": 1727.0,
      "debug/num_tok_total": 2748.0,
      "epoch": 1.6301369863013697,
      "step": 119
    },
    {
      "epoch": 1.6301369863013697,
      "step": 119,
      "train/ce_loss": 3.84375,
      "train/diffusion_loss": 0.36649128794670105
    },
    {
      "epoch": 1.6301369863013697,
      "step": 119,
      "train/learning_rate_real": 2.3859903458789094e-05
    },
    {
      "debug/num_lat_loss": 1707.0,
      "debug/num_lat_total": 2493.0,
      "debug/num_tok_loss": 1707.0,
      "debug/num_tok_total": 2493.0,
      "epoch": 1.6301369863013697,
      "step": 119
    },
    {
      "epoch": 1.6301369863013697,
      "step": 119,
      "train/ce_loss": 3.75,
      "train/diffusion_loss": 0.4311346709728241
    },
    {
      "epoch": 1.6301369863013697,
      "step": 119,
      "train/learning_rate_real": 2.3859903458789094e-05
    },
    {
      "debug/num_lat_loss": 1697.0,
      "debug/num_lat_total": 2782.0,
      "debug/num_tok_loss": 1697.0,
      "debug/num_tok_total": 2782.0,
      "epoch": 1.6301369863013697,
      "step": 119
    },
    {
      "epoch": 1.6301369863013697,
      "step": 119,
      "train/ce_loss": 3.90625,
      "train/diffusion_loss": 0.3630007207393646
    },
    {
      "epoch": 1.6301369863013697,
      "step": 119,
      "train/learning_rate_real": 2.3859903458789094e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2421.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2421.0,
      "epoch": 1.6301369863013697,
      "step": 119
    },
    {
      "epoch": 1.6301369863013697,
      "step": 119,
      "train/ce_loss": 3.8125,
      "train/diffusion_loss": 0.432765394449234
    },
    {
      "epoch": 1.6301369863013697,
      "step": 119,
      "train/learning_rate_real": 2.3859903458789094e-05
    },
    {
      "epoch": 1.643835616438356,
      "grad_norm": 1.4388368129730225,
      "learning_rate": 2.3859903458789094e-05,
      "loss": 2.8148,
      "step": 120
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2617.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2617.0,
      "epoch": 1.643835616438356,
      "step": 120
    },
    {
      "epoch": 1.643835616438356,
      "step": 120,
      "train/ce_loss": 3.953125,
      "train/diffusion_loss": 0.4270760118961334
    },
    {
      "epoch": 1.643835616438356,
      "step": 120,
      "train/learning_rate_real": 2.3836648587190354e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2220.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2220.0,
      "epoch": 1.643835616438356,
      "step": 120
    },
    {
      "epoch": 1.643835616438356,
      "step": 120,
      "train/ce_loss": 3.796875,
      "train/diffusion_loss": 0.5018504858016968
    },
    {
      "epoch": 1.643835616438356,
      "step": 120,
      "train/learning_rate_real": 2.3836648587190354e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2422.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2422.0,
      "epoch": 1.643835616438356,
      "step": 120
    },
    {
      "epoch": 1.643835616438356,
      "step": 120,
      "train/ce_loss": 3.796875,
      "train/diffusion_loss": 0.47332513332366943
    },
    {
      "epoch": 1.643835616438356,
      "step": 120,
      "train/learning_rate_real": 2.3836648587190354e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2864.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2864.0,
      "epoch": 1.643835616438356,
      "step": 120
    },
    {
      "epoch": 1.643835616438356,
      "step": 120,
      "train/ce_loss": 3.75,
      "train/diffusion_loss": 0.3780655264854431
    },
    {
      "epoch": 1.643835616438356,
      "step": 120,
      "train/learning_rate_real": 2.3836648587190354e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2640.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2640.0,
      "epoch": 1.6575342465753424,
      "step": 121
    },
    {
      "epoch": 1.6575342465753424,
      "step": 121,
      "train/ce_loss": 4.125,
      "train/diffusion_loss": 0.44194358587265015
    },
    {
      "epoch": 1.6575342465753424,
      "step": 121,
      "train/learning_rate_real": 2.38131705037647e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2855.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2855.0,
      "epoch": 1.6575342465753424,
      "step": 121
    },
    {
      "epoch": 1.6575342465753424,
      "step": 121,
      "train/ce_loss": 3.703125,
      "train/diffusion_loss": 0.3793834149837494
    },
    {
      "epoch": 1.6575342465753424,
      "step": 121,
      "train/learning_rate_real": 2.38131705037647e-05
    },
    {
      "debug/num_lat_loss": 1745.0,
      "debug/num_lat_total": 2801.0,
      "debug/num_tok_loss": 1745.0,
      "debug/num_tok_total": 2801.0,
      "epoch": 1.6575342465753424,
      "step": 121
    },
    {
      "epoch": 1.6575342465753424,
      "step": 121,
      "train/ce_loss": 3.90625,
      "train/diffusion_loss": 0.36281681060791016
    },
    {
      "epoch": 1.6575342465753424,
      "step": 121,
      "train/learning_rate_real": 2.38131705037647e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2887.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2887.0,
      "epoch": 1.6575342465753424,
      "step": 121
    },
    {
      "epoch": 1.6575342465753424,
      "step": 121,
      "train/ce_loss": 3.84375,
      "train/diffusion_loss": 0.3547899127006531
    },
    {
      "epoch": 1.6575342465753424,
      "step": 121,
      "train/learning_rate_real": 2.38131705037647e-05
    },
    {
      "debug/num_lat_loss": 1712.0,
      "debug/num_lat_total": 2153.0,
      "debug/num_tok_loss": 1712.0,
      "debug/num_tok_total": 2153.0,
      "epoch": 1.6712328767123288,
      "step": 122
    },
    {
      "epoch": 1.6712328767123288,
      "step": 122,
      "train/ce_loss": 3.828125,
      "train/diffusion_loss": 0.49153730273246765
    },
    {
      "epoch": 1.6712328767123288,
      "step": 122,
      "train/learning_rate_real": 2.3789469670781547e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2218.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2218.0,
      "epoch": 1.6712328767123288,
      "step": 122
    },
    {
      "epoch": 1.6712328767123288,
      "step": 122,
      "train/ce_loss": 3.65625,
      "train/diffusion_loss": 0.5030884742736816
    },
    {
      "epoch": 1.6712328767123288,
      "step": 122,
      "train/learning_rate_real": 2.3789469670781547e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2865.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2865.0,
      "epoch": 1.6712328767123288,
      "step": 122
    },
    {
      "epoch": 1.6712328767123288,
      "step": 122,
      "train/ce_loss": 4.03125,
      "train/diffusion_loss": 0.3352532982826233
    },
    {
      "epoch": 1.6712328767123288,
      "step": 122,
      "train/learning_rate_real": 2.3789469670781547e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 3069.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 3069.0,
      "epoch": 1.6712328767123288,
      "step": 122
    },
    {
      "epoch": 1.6712328767123288,
      "step": 122,
      "train/ce_loss": 3.828125,
      "train/diffusion_loss": 0.32163891196250916
    },
    {
      "epoch": 1.6712328767123288,
      "step": 122,
      "train/learning_rate_real": 2.3789469670781547e-05
    },
    {
      "debug/num_lat_loss": 1750.0,
      "debug/num_lat_total": 2383.0,
      "debug/num_tok_loss": 1750.0,
      "debug/num_tok_total": 2383.0,
      "epoch": 1.6849315068493151,
      "step": 123
    },
    {
      "epoch": 1.6849315068493151,
      "step": 123,
      "train/ce_loss": 3.734375,
      "train/diffusion_loss": 0.4190615117549896
    },
    {
      "epoch": 1.6849315068493151,
      "step": 123,
      "train/learning_rate_real": 2.376554655489612e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2214.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2214.0,
      "epoch": 1.6849315068493151,
      "step": 123
    },
    {
      "epoch": 1.6849315068493151,
      "step": 123,
      "train/ce_loss": 3.609375,
      "train/diffusion_loss": 0.5074918866157532
    },
    {
      "epoch": 1.6849315068493151,
      "step": 123,
      "train/learning_rate_real": 2.376554655489612e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2459.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2459.0,
      "epoch": 1.6849315068493151,
      "step": 123
    },
    {
      "epoch": 1.6849315068493151,
      "step": 123,
      "train/ce_loss": 3.671875,
      "train/diffusion_loss": 0.4351557791233063
    },
    {
      "epoch": 1.6849315068493151,
      "step": 123,
      "train/learning_rate_real": 2.376554655489612e-05
    },
    {
      "debug/num_lat_loss": 1642.0,
      "debug/num_lat_total": 2306.0,
      "debug/num_tok_loss": 1642.0,
      "debug/num_tok_total": 2306.0,
      "epoch": 1.6849315068493151,
      "step": 123
    },
    {
      "epoch": 1.6849315068493151,
      "step": 123,
      "train/ce_loss": 3.703125,
      "train/diffusion_loss": 0.4449380934238434
    },
    {
      "epoch": 1.6849315068493151,
      "step": 123,
      "train/learning_rate_real": 2.376554655489612e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2881.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2881.0,
      "epoch": 1.6986301369863015,
      "step": 124
    },
    {
      "epoch": 1.6986301369863015,
      "step": 124,
      "train/ce_loss": 3.703125,
      "train/diffusion_loss": 0.36390984058380127
    },
    {
      "epoch": 1.6986301369863015,
      "step": 124,
      "train/learning_rate_real": 2.3741401627140242e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2810.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2810.0,
      "epoch": 1.6986301369863015,
      "step": 124
    },
    {
      "epoch": 1.6986301369863015,
      "step": 124,
      "train/ce_loss": 3.734375,
      "train/diffusion_loss": 0.3646058440208435
    },
    {
      "epoch": 1.6986301369863015,
      "step": 124,
      "train/learning_rate_real": 2.3741401627140242e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2642.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2642.0,
      "epoch": 1.6986301369863015,
      "step": 124
    },
    {
      "epoch": 1.6986301369863015,
      "step": 124,
      "train/ce_loss": 3.796875,
      "train/diffusion_loss": 0.40764036774635315
    },
    {
      "epoch": 1.6986301369863015,
      "step": 124,
      "train/learning_rate_real": 2.3741401627140242e-05
    },
    {
      "debug/num_lat_loss": 1761.0,
      "debug/num_lat_total": 2195.0,
      "debug/num_tok_loss": 1761.0,
      "debug/num_tok_total": 2195.0,
      "epoch": 1.6986301369863015,
      "step": 124
    },
    {
      "epoch": 1.6986301369863015,
      "step": 124,
      "train/ce_loss": 4.09375,
      "train/diffusion_loss": 0.47080162167549133
    },
    {
      "epoch": 1.6986301369863015,
      "step": 124,
      "train/learning_rate_real": 2.3741401627140242e-05
    },
    {
      "debug/num_lat_loss": 1679.0,
      "debug/num_lat_total": 2225.0,
      "debug/num_tok_loss": 1679.0,
      "debug/num_tok_total": 2225.0,
      "epoch": 1.7123287671232876,
      "step": 125
    },
    {
      "epoch": 1.7123287671232876,
      "step": 125,
      "train/ce_loss": 3.546875,
      "train/diffusion_loss": 0.464213103055954
    },
    {
      "epoch": 1.7123287671232876,
      "step": 125,
      "train/learning_rate_real": 2.3717035362913105e-05
    },
    {
      "debug/num_lat_loss": 1592.0,
      "debug/num_lat_total": 2696.0,
      "debug/num_tok_loss": 1592.0,
      "debug/num_tok_total": 2696.0,
      "epoch": 1.7123287671232876,
      "step": 125
    },
    {
      "epoch": 1.7123287671232876,
      "step": 125,
      "train/ce_loss": 3.90625,
      "train/diffusion_loss": 0.3316189646720886
    },
    {
      "epoch": 1.7123287671232876,
      "step": 125,
      "train/learning_rate_real": 2.3717035362913105e-05
    },
    {
      "debug/num_lat_loss": 1578.0,
      "debug/num_lat_total": 2460.0,
      "debug/num_tok_loss": 1578.0,
      "debug/num_tok_total": 2460.0,
      "epoch": 1.7123287671232876,
      "step": 125
    },
    {
      "epoch": 1.7123287671232876,
      "step": 125,
      "train/ce_loss": 3.796875,
      "train/diffusion_loss": 0.39441150426864624
    },
    {
      "epoch": 1.7123287671232876,
      "step": 125,
      "train/learning_rate_real": 2.3717035362913105e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 2464.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 2464.0,
      "epoch": 1.7123287671232876,
      "step": 125
    },
    {
      "epoch": 1.7123287671232876,
      "step": 125,
      "train/ce_loss": 3.625,
      "train/diffusion_loss": 0.4529780447483063
    },
    {
      "epoch": 1.7123287671232876,
      "step": 125,
      "train/learning_rate_real": 2.3717035362913105e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 1.726027397260274,
      "step": 126
    },
    {
      "epoch": 1.726027397260274,
      "step": 126,
      "train/ce_loss": 3.71875,
      "train/diffusion_loss": 0.41818350553512573
    },
    {
      "epoch": 1.726027397260274,
      "step": 126,
      "train/learning_rate_real": 2.3692448241971866e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 1.726027397260274,
      "step": 126
    },
    {
      "epoch": 1.726027397260274,
      "step": 126,
      "train/ce_loss": 3.828125,
      "train/diffusion_loss": 0.4160309433937073
    },
    {
      "epoch": 1.726027397260274,
      "step": 126,
      "train/learning_rate_real": 2.3692448241971866e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2413.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2413.0,
      "epoch": 1.726027397260274,
      "step": 126
    },
    {
      "epoch": 1.726027397260274,
      "step": 126,
      "train/ce_loss": 3.75,
      "train/diffusion_loss": 0.4794480800628662
    },
    {
      "epoch": 1.726027397260274,
      "step": 126,
      "train/learning_rate_real": 2.3692448241971866e-05
    },
    {
      "debug/num_lat_loss": 1812.0,
      "debug/num_lat_total": 2695.0,
      "debug/num_tok_loss": 1812.0,
      "debug/num_tok_total": 2695.0,
      "epoch": 1.726027397260274,
      "step": 126
    },
    {
      "epoch": 1.726027397260274,
      "step": 126,
      "train/ce_loss": 3.765625,
      "train/diffusion_loss": 0.4259827435016632
    },
    {
      "epoch": 1.726027397260274,
      "step": 126,
      "train/learning_rate_real": 2.3692448241971866e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2427.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2427.0,
      "epoch": 1.7397260273972601,
      "step": 127
    },
    {
      "epoch": 1.7397260273972601,
      "step": 127,
      "train/ce_loss": 3.75,
      "train/diffusion_loss": 0.4588565230369568
    },
    {
      "epoch": 1.7397260273972601,
      "step": 127,
      "train/learning_rate_real": 2.366764074842222e-05
    },
    {
      "debug/num_lat_loss": 1683.0,
      "debug/num_lat_total": 2430.0,
      "debug/num_tok_loss": 1683.0,
      "debug/num_tok_total": 2430.0,
      "epoch": 1.7397260273972601,
      "step": 127
    },
    {
      "epoch": 1.7397260273972601,
      "step": 127,
      "train/ce_loss": 3.765625,
      "train/diffusion_loss": 0.42775484919548035
    },
    {
      "epoch": 1.7397260273972601,
      "step": 127,
      "train/learning_rate_real": 2.366764074842222e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2229.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2229.0,
      "epoch": 1.7397260273972601,
      "step": 127
    },
    {
      "epoch": 1.7397260273972601,
      "step": 127,
      "train/ce_loss": 3.65625,
      "train/diffusion_loss": 0.4776081442832947
    },
    {
      "epoch": 1.7397260273972601,
      "step": 127,
      "train/learning_rate_real": 2.366764074842222e-05
    },
    {
      "debug/num_lat_loss": 1598.0,
      "debug/num_lat_total": 2917.0,
      "debug/num_tok_loss": 1598.0,
      "debug/num_tok_total": 2917.0,
      "epoch": 1.7397260273972601,
      "step": 127
    },
    {
      "epoch": 1.7397260273972601,
      "step": 127,
      "train/ce_loss": 3.734375,
      "train/diffusion_loss": 0.2688691020011902
    },
    {
      "epoch": 1.7397260273972601,
      "step": 127,
      "train/learning_rate_real": 2.366764074842222e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2647.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2647.0,
      "epoch": 1.7534246575342465,
      "step": 128
    },
    {
      "epoch": 1.7534246575342465,
      "step": 128,
      "train/ce_loss": 3.609375,
      "train/diffusion_loss": 0.41486573219299316
    },
    {
      "epoch": 1.7534246575342465,
      "step": 128,
      "train/learning_rate_real": 2.364261337070888e-05
    },
    {
      "debug/num_lat_loss": 1762.0,
      "debug/num_lat_total": 2188.0,
      "debug/num_tok_loss": 1762.0,
      "debug/num_tok_total": 2188.0,
      "epoch": 1.7534246575342465,
      "step": 128
    },
    {
      "epoch": 1.7534246575342465,
      "step": 128,
      "train/ce_loss": 3.625,
      "train/diffusion_loss": 0.5014437437057495
    },
    {
      "epoch": 1.7534246575342465,
      "step": 128,
      "train/learning_rate_real": 2.364261337070888e-05
    },
    {
      "debug/num_lat_loss": 1660.0,
      "debug/num_lat_total": 2525.0,
      "debug/num_tok_loss": 1660.0,
      "debug/num_tok_total": 2525.0,
      "epoch": 1.7534246575342465,
      "step": 128
    },
    {
      "epoch": 1.7534246575342465,
      "step": 128,
      "train/ce_loss": 3.484375,
      "train/diffusion_loss": 0.40325120091438293
    },
    {
      "epoch": 1.7534246575342465,
      "step": 128,
      "train/learning_rate_real": 2.364261337070888e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2832.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2832.0,
      "epoch": 1.7534246575342465,
      "step": 128
    },
    {
      "epoch": 1.7534246575342465,
      "step": 128,
      "train/ce_loss": 4.09375,
      "train/diffusion_loss": 0.3543040454387665
    },
    {
      "epoch": 1.7534246575342465,
      "step": 128,
      "train/learning_rate_real": 2.364261337070888e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2445.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2445.0,
      "epoch": 1.7671232876712328,
      "step": 129
    },
    {
      "epoch": 1.7671232876712328,
      "step": 129,
      "train/ce_loss": 3.859375,
      "train/diffusion_loss": 0.472688764333725
    },
    {
      "epoch": 1.7671232876712328,
      "step": 129,
      "train/learning_rate_real": 2.361736660160593e-05
    },
    {
      "debug/num_lat_loss": 1816.0,
      "debug/num_lat_total": 3572.0,
      "debug/num_tok_loss": 1816.0,
      "debug/num_tok_total": 3572.0,
      "epoch": 1.7671232876712328,
      "step": 129
    },
    {
      "epoch": 1.7671232876712328,
      "step": 129,
      "train/ce_loss": 3.734375,
      "train/diffusion_loss": 0.22201991081237793
    },
    {
      "epoch": 1.7671232876712328,
      "step": 129,
      "train/learning_rate_real": 2.361736660160593e-05
    },
    {
      "debug/num_lat_loss": 1761.0,
      "debug/num_lat_total": 2410.0,
      "debug/num_tok_loss": 1761.0,
      "debug/num_tok_total": 2410.0,
      "epoch": 1.7671232876712328,
      "step": 129
    },
    {
      "epoch": 1.7671232876712328,
      "step": 129,
      "train/ce_loss": 3.53125,
      "train/diffusion_loss": 0.44313621520996094
    },
    {
      "epoch": 1.7671232876712328,
      "step": 129,
      "train/learning_rate_real": 2.361736660160593e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2825.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2825.0,
      "epoch": 1.7671232876712328,
      "step": 129
    },
    {
      "epoch": 1.7671232876712328,
      "step": 129,
      "train/ce_loss": 3.625,
      "train/diffusion_loss": 0.3336658179759979
    },
    {
      "epoch": 1.7671232876712328,
      "step": 129,
      "train/learning_rate_real": 2.361736660160593e-05
    },
    {
      "epoch": 1.7808219178082192,
      "grad_norm": 1.576515793800354,
      "learning_rate": 2.361736660160593e-05,
      "loss": 2.9194,
      "step": 130
    },
    {
      "debug/num_lat_loss": 1646.0,
      "debug/num_lat_total": 2308.0,
      "debug/num_tok_loss": 1646.0,
      "debug/num_tok_total": 2308.0,
      "epoch": 1.7808219178082192,
      "step": 130
    },
    {
      "epoch": 1.7808219178082192,
      "step": 130,
      "train/ce_loss": 3.578125,
      "train/diffusion_loss": 0.43932580947875977
    },
    {
      "epoch": 1.7808219178082192,
      "step": 130,
      "train/learning_rate_real": 2.3591900938207147e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2868.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2868.0,
      "epoch": 1.7808219178082192,
      "step": 130
    },
    {
      "epoch": 1.7808219178082192,
      "step": 130,
      "train/ce_loss": 3.6875,
      "train/diffusion_loss": 0.3732472360134125
    },
    {
      "epoch": 1.7808219178082192,
      "step": 130,
      "train/learning_rate_real": 2.3591900938207147e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2425.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2425.0,
      "epoch": 1.7808219178082192,
      "step": 130
    },
    {
      "epoch": 1.7808219178082192,
      "step": 130,
      "train/ce_loss": 3.515625,
      "train/diffusion_loss": 0.45174187421798706
    },
    {
      "epoch": 1.7808219178082192,
      "step": 130,
      "train/learning_rate_real": 2.3591900938207147e-05
    },
    {
      "debug/num_lat_loss": 1692.0,
      "debug/num_lat_total": 2887.0,
      "debug/num_tok_loss": 1692.0,
      "debug/num_tok_total": 2887.0,
      "epoch": 1.7808219178082192,
      "step": 130
    },
    {
      "epoch": 1.7808219178082192,
      "step": 130,
      "train/ce_loss": 3.734375,
      "train/diffusion_loss": 0.32698309421539307
    },
    {
      "epoch": 1.7808219178082192,
      "step": 130,
      "train/learning_rate_real": 2.3591900938207147e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2840.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2840.0,
      "epoch": 1.7945205479452055,
      "step": 131
    },
    {
      "epoch": 1.7945205479452055,
      "step": 131,
      "train/ce_loss": 3.5,
      "train/diffusion_loss": 0.34751012921333313
    },
    {
      "epoch": 1.7945205479452055,
      "step": 131,
      "train/learning_rate_real": 2.3566216881916202e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 1.7945205479452055,
      "step": 131
    },
    {
      "epoch": 1.7945205479452055,
      "step": 131,
      "train/ce_loss": 3.53125,
      "train/diffusion_loss": 0.4357163906097412
    },
    {
      "epoch": 1.7945205479452055,
      "step": 131,
      "train/learning_rate_real": 2.3566216881916202e-05
    },
    {
      "debug/num_lat_loss": 1753.0,
      "debug/num_lat_total": 2584.0,
      "debug/num_tok_loss": 1753.0,
      "debug/num_tok_total": 2584.0,
      "epoch": 1.7945205479452055,
      "step": 131
    },
    {
      "epoch": 1.7945205479452055,
      "step": 131,
      "train/ce_loss": 3.546875,
      "train/diffusion_loss": 0.41815292835235596
    },
    {
      "epoch": 1.7945205479452055,
      "step": 131,
      "train/learning_rate_real": 2.3566216881916202e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2648.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2648.0,
      "epoch": 1.7945205479452055,
      "step": 131
    },
    {
      "epoch": 1.7945205479452055,
      "step": 131,
      "train/ce_loss": 3.765625,
      "train/diffusion_loss": 0.36779430508613586
    },
    {
      "epoch": 1.7945205479452055,
      "step": 131,
      "train/learning_rate_real": 2.3566216881916202e-05
    },
    {
      "debug/num_lat_loss": 1757.0,
      "debug/num_lat_total": 2171.0,
      "debug/num_tok_loss": 1757.0,
      "debug/num_tok_total": 2171.0,
      "epoch": 1.808219178082192,
      "step": 132
    },
    {
      "epoch": 1.808219178082192,
      "step": 132,
      "train/ce_loss": 3.578125,
      "train/diffusion_loss": 0.47450587153434753
    },
    {
      "epoch": 1.808219178082192,
      "step": 132,
      "train/learning_rate_real": 2.35403149384368e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2014.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2014.0,
      "epoch": 1.808219178082192,
      "step": 132
    },
    {
      "epoch": 1.808219178082192,
      "step": 132,
      "train/ce_loss": 3.4375,
      "train/diffusion_loss": 0.5649107694625854
    },
    {
      "epoch": 1.808219178082192,
      "step": 132,
      "train/learning_rate_real": 2.35403149384368e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2633.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2633.0,
      "epoch": 1.808219178082192,
      "step": 132
    },
    {
      "epoch": 1.808219178082192,
      "step": 132,
      "train/ce_loss": 3.5625,
      "train/diffusion_loss": 0.4057047963142395
    },
    {
      "epoch": 1.808219178082192,
      "step": 132,
      "train/learning_rate_real": 2.35403149384368e-05
    },
    {
      "debug/num_lat_loss": 1647.0,
      "debug/num_lat_total": 2930.0,
      "debug/num_tok_loss": 1647.0,
      "debug/num_tok_total": 2930.0,
      "epoch": 1.808219178082192,
      "step": 132
    },
    {
      "epoch": 1.808219178082192,
      "step": 132,
      "train/ce_loss": 3.625,
      "train/diffusion_loss": 0.28367847204208374
    },
    {
      "epoch": 1.808219178082192,
      "step": 132,
      "train/learning_rate_real": 2.35403149384368e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2431.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2431.0,
      "epoch": 1.821917808219178,
      "step": 133
    },
    {
      "epoch": 1.821917808219178,
      "step": 133,
      "train/ce_loss": 3.53125,
      "train/diffusion_loss": 0.44793954491615295
    },
    {
      "epoch": 1.821917808219178,
      "step": 133,
      "train/learning_rate_real": 2.3514195617762706e-05
    },
    {
      "debug/num_lat_loss": 1759.0,
      "debug/num_lat_total": 2612.0,
      "debug/num_tok_loss": 1759.0,
      "debug/num_tok_total": 2612.0,
      "epoch": 1.821917808219178,
      "step": 133
    },
    {
      "epoch": 1.821917808219178,
      "step": 133,
      "train/ce_loss": 3.46875,
      "train/diffusion_loss": 0.4012427031993866
    },
    {
      "epoch": 1.821917808219178,
      "step": 133,
      "train/learning_rate_real": 2.3514195617762706e-05
    },
    {
      "debug/num_lat_loss": 1748.0,
      "debug/num_lat_total": 2788.0,
      "debug/num_tok_loss": 1748.0,
      "debug/num_tok_total": 2788.0,
      "epoch": 1.821917808219178,
      "step": 133
    },
    {
      "epoch": 1.821917808219178,
      "step": 133,
      "train/ce_loss": 3.484375,
      "train/diffusion_loss": 0.3418940603733063
    },
    {
      "epoch": 1.821917808219178,
      "step": 133,
      "train/learning_rate_real": 2.3514195617762706e-05
    },
    {
      "debug/num_lat_loss": 1572.0,
      "debug/num_lat_total": 2204.0,
      "debug/num_tok_loss": 1572.0,
      "debug/num_tok_total": 2204.0,
      "epoch": 1.821917808219178,
      "step": 133
    },
    {
      "epoch": 1.821917808219178,
      "step": 133,
      "train/ce_loss": 3.453125,
      "train/diffusion_loss": 0.4106413424015045
    },
    {
      "epoch": 1.821917808219178,
      "step": 133,
      "train/learning_rate_real": 2.3514195617762706e-05
    },
    {
      "debug/num_lat_loss": 1760.0,
      "debug/num_lat_total": 2190.0,
      "debug/num_tok_loss": 1760.0,
      "debug/num_tok_total": 2190.0,
      "epoch": 1.8356164383561644,
      "step": 134
    },
    {
      "epoch": 1.8356164383561644,
      "step": 134,
      "train/ce_loss": 3.9375,
      "train/diffusion_loss": 0.5045044422149658
    },
    {
      "epoch": 1.8356164383561644,
      "step": 134,
      "train/learning_rate_real": 2.348785943416772e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2641.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2641.0,
      "epoch": 1.8356164383561644,
      "step": 134
    },
    {
      "epoch": 1.8356164383561644,
      "step": 134,
      "train/ce_loss": 3.578125,
      "train/diffusion_loss": 0.38860100507736206
    },
    {
      "epoch": 1.8356164383561644,
      "step": 134,
      "train/learning_rate_real": 2.348785943416772e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 1.8356164383561644,
      "step": 134
    },
    {
      "epoch": 1.8356164383561644,
      "step": 134,
      "train/ce_loss": 3.578125,
      "train/diffusion_loss": 0.393936425447464
    },
    {
      "epoch": 1.8356164383561644,
      "step": 134,
      "train/learning_rate_real": 2.348785943416772e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2408.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2408.0,
      "epoch": 1.8356164383561644,
      "step": 134
    },
    {
      "epoch": 1.8356164383561644,
      "step": 134,
      "train/ce_loss": 3.5,
      "train/diffusion_loss": 0.44742295145988464
    },
    {
      "epoch": 1.8356164383561644,
      "step": 134,
      "train/learning_rate_real": 2.348785943416772e-05
    },
    {
      "debug/num_lat_loss": 1759.0,
      "debug/num_lat_total": 2402.0,
      "debug/num_tok_loss": 1759.0,
      "debug/num_tok_total": 2402.0,
      "epoch": 1.8493150684931505,
      "step": 135
    },
    {
      "epoch": 1.8493150684931505,
      "step": 135,
      "train/ce_loss": 3.390625,
      "train/diffusion_loss": 0.43890005350112915
    },
    {
      "epoch": 1.8493150684931505,
      "step": 135,
      "train/learning_rate_real": 2.3461306906195524e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2634.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2634.0,
      "epoch": 1.8493150684931505,
      "step": 135
    },
    {
      "epoch": 1.8493150684931505,
      "step": 135,
      "train/ce_loss": 3.53125,
      "train/diffusion_loss": 0.4111637473106384
    },
    {
      "epoch": 1.8493150684931505,
      "step": 135,
      "train/learning_rate_real": 2.3461306906195524e-05
    },
    {
      "debug/num_lat_loss": 1638.0,
      "debug/num_lat_total": 2712.0,
      "debug/num_tok_loss": 1638.0,
      "debug/num_tok_total": 2712.0,
      "epoch": 1.8493150684931505,
      "step": 135
    },
    {
      "epoch": 1.8493150684931505,
      "step": 135,
      "train/ce_loss": 3.484375,
      "train/diffusion_loss": 0.3159174621105194
    },
    {
      "epoch": 1.8493150684931505,
      "step": 135,
      "train/learning_rate_real": 2.3461306906195524e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 3102.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 3102.0,
      "epoch": 1.8493150684931505,
      "step": 135
    },
    {
      "epoch": 1.8493150684931505,
      "step": 135,
      "train/ce_loss": 3.4375,
      "train/diffusion_loss": 0.30301010608673096
    },
    {
      "epoch": 1.8493150684931505,
      "step": 135,
      "train/learning_rate_real": 2.3461306906195524e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2190.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2190.0,
      "epoch": 1.8630136986301369,
      "step": 136
    },
    {
      "epoch": 1.8630136986301369,
      "step": 136,
      "train/ce_loss": 3.515625,
      "train/diffusion_loss": 0.5018863081932068
    },
    {
      "epoch": 1.8630136986301369,
      "step": 136,
      "train/learning_rate_real": 2.3434538556649515e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2203.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2203.0,
      "epoch": 1.8630136986301369,
      "step": 136
    },
    {
      "epoch": 1.8630136986301369,
      "step": 136,
      "train/ce_loss": 3.421875,
      "train/diffusion_loss": 0.5082259774208069
    },
    {
      "epoch": 1.8630136986301369,
      "step": 136,
      "train/learning_rate_real": 2.3434538556649515e-05
    },
    {
      "debug/num_lat_loss": 1749.0,
      "debug/num_lat_total": 2184.0,
      "debug/num_tok_loss": 1749.0,
      "debug/num_tok_total": 2184.0,
      "epoch": 1.8630136986301369,
      "step": 136
    },
    {
      "epoch": 1.8630136986301369,
      "step": 136,
      "train/ce_loss": 3.640625,
      "train/diffusion_loss": 0.4796072542667389
    },
    {
      "epoch": 1.8630136986301369,
      "step": 136,
      "train/learning_rate_real": 2.3434538556649515e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 2459.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 2459.0,
      "epoch": 1.8630136986301369,
      "step": 136
    },
    {
      "epoch": 1.8630136986301369,
      "step": 136,
      "train/ce_loss": 3.671875,
      "train/diffusion_loss": 0.4622892439365387
    },
    {
      "epoch": 1.8630136986301369,
      "step": 136,
      "train/learning_rate_real": 2.3434538556649515e-05
    },
    {
      "debug/num_lat_loss": 1810.0,
      "debug/num_lat_total": 2692.0,
      "debug/num_tok_loss": 1810.0,
      "debug/num_tok_total": 2692.0,
      "epoch": 1.8767123287671232,
      "step": 137
    },
    {
      "epoch": 1.8767123287671232,
      "step": 137,
      "train/ce_loss": 3.359375,
      "train/diffusion_loss": 0.39755386114120483
    },
    {
      "epoch": 1.8767123287671232,
      "step": 137,
      "train/learning_rate_real": 2.340755491258248e-05
    },
    {
      "debug/num_lat_loss": 1689.0,
      "debug/num_lat_total": 2460.0,
      "debug/num_tok_loss": 1689.0,
      "debug/num_tok_total": 2460.0,
      "epoch": 1.8767123287671232,
      "step": 137
    },
    {
      "epoch": 1.8767123287671232,
      "step": 137,
      "train/ce_loss": 3.5,
      "train/diffusion_loss": 0.4218902289867401
    },
    {
      "epoch": 1.8767123287671232,
      "step": 137,
      "train/learning_rate_real": 2.340755491258248e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2195.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2195.0,
      "epoch": 1.8767123287671232,
      "step": 137
    },
    {
      "epoch": 1.8767123287671232,
      "step": 137,
      "train/ce_loss": 3.421875,
      "train/diffusion_loss": 0.5013386607170105
    },
    {
      "epoch": 1.8767123287671232,
      "step": 137,
      "train/learning_rate_real": 2.340755491258248e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2611.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2611.0,
      "epoch": 1.8767123287671232,
      "step": 137
    },
    {
      "epoch": 1.8767123287671232,
      "step": 137,
      "train/ce_loss": 3.578125,
      "train/diffusion_loss": 0.4383437931537628
    },
    {
      "epoch": 1.8767123287671232,
      "step": 137,
      "train/learning_rate_real": 2.340755491258248e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2640.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2640.0,
      "epoch": 1.8904109589041096,
      "step": 138
    },
    {
      "epoch": 1.8904109589041096,
      "step": 138,
      "train/ce_loss": 3.359375,
      "train/diffusion_loss": 0.3976190388202667
    },
    {
      "epoch": 1.8904109589041096,
      "step": 138,
      "train/learning_rate_real": 2.3380356505286216e-05
    },
    {
      "debug/num_lat_loss": 1758.0,
      "debug/num_lat_total": 2621.0,
      "debug/num_tok_loss": 1758.0,
      "debug/num_tok_total": 2621.0,
      "epoch": 1.8904109589041096,
      "step": 138
    },
    {
      "epoch": 1.8904109589041096,
      "step": 138,
      "train/ce_loss": 3.71875,
      "train/diffusion_loss": 0.40847277641296387
    },
    {
      "epoch": 1.8904109589041096,
      "step": 138,
      "train/learning_rate_real": 2.3380356505286216e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2660.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2660.0,
      "epoch": 1.8904109589041096,
      "step": 138
    },
    {
      "epoch": 1.8904109589041096,
      "step": 138,
      "train/ce_loss": 3.375,
      "train/diffusion_loss": 0.42175373435020447
    },
    {
      "epoch": 1.8904109589041096,
      "step": 138,
      "train/learning_rate_real": 2.3380356505286216e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2843.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2843.0,
      "epoch": 1.8904109589041096,
      "step": 138
    },
    {
      "epoch": 1.8904109589041096,
      "step": 138,
      "train/ce_loss": 3.671875,
      "train/diffusion_loss": 0.38470953702926636
    },
    {
      "epoch": 1.8904109589041096,
      "step": 138,
      "train/learning_rate_real": 2.3380356505286216e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2231.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2231.0,
      "epoch": 1.904109589041096,
      "step": 139
    },
    {
      "epoch": 1.904109589041096,
      "step": 139,
      "train/ce_loss": 3.46875,
      "train/diffusion_loss": 0.5015687346458435
    },
    {
      "epoch": 1.904109589041096,
      "step": 139,
      "train/learning_rate_real": 2.335294387028109e-05
    },
    {
      "debug/num_lat_loss": 1723.0,
      "debug/num_lat_total": 2364.0,
      "debug/num_tok_loss": 1723.0,
      "debug/num_tok_total": 2364.0,
      "epoch": 1.904109589041096,
      "step": 139
    },
    {
      "epoch": 1.904109589041096,
      "step": 139,
      "train/ce_loss": 3.390625,
      "train/diffusion_loss": 0.46811532974243164
    },
    {
      "epoch": 1.904109589041096,
      "step": 139,
      "train/learning_rate_real": 2.335294387028109e-05
    },
    {
      "debug/num_lat_loss": 1680.0,
      "debug/num_lat_total": 2648.0,
      "debug/num_tok_loss": 1680.0,
      "debug/num_tok_total": 2648.0,
      "epoch": 1.904109589041096,
      "step": 139
    },
    {
      "epoch": 1.904109589041096,
      "step": 139,
      "train/ce_loss": 3.453125,
      "train/diffusion_loss": 0.3847237229347229
    },
    {
      "epoch": 1.904109589041096,
      "step": 139,
      "train/learning_rate_real": 2.335294387028109e-05
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 3114.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 3114.0,
      "epoch": 1.904109589041096,
      "step": 139
    },
    {
      "epoch": 1.904109589041096,
      "step": 139,
      "train/ce_loss": 3.515625,
      "train/diffusion_loss": 0.32092833518981934
    },
    {
      "epoch": 1.904109589041096,
      "step": 139,
      "train/learning_rate_real": 2.335294387028109e-05
    },
    {
      "epoch": 1.9178082191780823,
      "grad_norm": 1.6461002826690674,
      "learning_rate": 2.335294387028109e-05,
      "loss": 2.9031,
      "step": 140
    },
    {
      "debug/num_lat_loss": 1760.0,
      "debug/num_lat_total": 2182.0,
      "debug/num_tok_loss": 1760.0,
      "debug/num_tok_total": 2182.0,
      "epoch": 1.9178082191780823,
      "step": 140
    },
    {
      "epoch": 1.9178082191780823,
      "step": 140,
      "train/ce_loss": 3.390625,
      "train/diffusion_loss": 0.5311415195465088
    },
    {
      "epoch": 1.9178082191780823,
      "step": 140,
      "train/learning_rate_real": 2.3325317547305485e-05
    },
    {
      "debug/num_lat_loss": 1607.0,
      "debug/num_lat_total": 2197.0,
      "debug/num_tok_loss": 1607.0,
      "debug/num_tok_total": 2197.0,
      "epoch": 1.9178082191780823,
      "step": 140
    },
    {
      "epoch": 1.9178082191780823,
      "step": 140,
      "train/ce_loss": 3.375,
      "train/diffusion_loss": 0.45163846015930176
    },
    {
      "epoch": 1.9178082191780823,
      "step": 140,
      "train/learning_rate_real": 2.3325317547305485e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2208.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2208.0,
      "epoch": 1.9178082191780823,
      "step": 140
    },
    {
      "epoch": 1.9178082191780823,
      "step": 140,
      "train/ce_loss": 3.53125,
      "train/diffusion_loss": 0.4920024275779724
    },
    {
      "epoch": 1.9178082191780823,
      "step": 140,
      "train/learning_rate_real": 2.3325317547305485e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2617.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2617.0,
      "epoch": 1.9178082191780823,
      "step": 140
    },
    {
      "epoch": 1.9178082191780823,
      "step": 140,
      "train/ce_loss": 3.40625,
      "train/diffusion_loss": 0.43166494369506836
    },
    {
      "epoch": 1.9178082191780823,
      "step": 140,
      "train/learning_rate_real": 2.3325317547305485e-05
    },
    {
      "debug/num_lat_loss": 1753.0,
      "debug/num_lat_total": 2813.0,
      "debug/num_tok_loss": 1753.0,
      "debug/num_tok_total": 2813.0,
      "epoch": 1.9315068493150684,
      "step": 141
    },
    {
      "epoch": 1.9315068493150684,
      "step": 141,
      "train/ce_loss": 3.75,
      "train/diffusion_loss": 0.3822256922721863
    },
    {
      "epoch": 1.9315068493150684,
      "step": 141,
      "train/learning_rate_real": 2.3297478080305162e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2649.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2649.0,
      "epoch": 1.9315068493150684,
      "step": 141
    },
    {
      "epoch": 1.9315068493150684,
      "step": 141,
      "train/ce_loss": 4.0,
      "train/diffusion_loss": 0.3977663516998291
    },
    {
      "epoch": 1.9315068493150684,
      "step": 141,
      "train/learning_rate_real": 2.3297478080305162e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 2903.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 2903.0,
      "epoch": 1.9315068493150684,
      "step": 141
    },
    {
      "epoch": 1.9315068493150684,
      "step": 141,
      "train/ce_loss": 3.421875,
      "train/diffusion_loss": 0.3696616291999817
    },
    {
      "epoch": 1.9315068493150684,
      "step": 141,
      "train/learning_rate_real": 2.3297478080305162e-05
    },
    {
      "debug/num_lat_loss": 1604.0,
      "debug/num_lat_total": 2076.0,
      "debug/num_tok_loss": 1604.0,
      "debug/num_tok_total": 2076.0,
      "epoch": 1.9315068493150684,
      "step": 141
    },
    {
      "epoch": 1.9315068493150684,
      "step": 141,
      "train/ce_loss": 3.390625,
      "train/diffusion_loss": 0.4954376816749573
    },
    {
      "epoch": 1.9315068493150684,
      "step": 141,
      "train/learning_rate_real": 2.3297478080305162e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2856.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2856.0,
      "epoch": 1.9452054794520548,
      "step": 142
    },
    {
      "epoch": 1.9452054794520548,
      "step": 142,
      "train/ce_loss": 3.40625,
      "train/diffusion_loss": 0.3527717590332031
    },
    {
      "epoch": 1.9452054794520548,
      "step": 142,
      "train/learning_rate_real": 2.3269426017422576e-05
    },
    {
      "debug/num_lat_loss": 1809.0,
      "debug/num_lat_total": 2470.0,
      "debug/num_tok_loss": 1809.0,
      "debug/num_tok_total": 2470.0,
      "epoch": 1.9452054794520548,
      "step": 142
    },
    {
      "epoch": 1.9452054794520548,
      "step": 142,
      "train/ce_loss": 3.390625,
      "train/diffusion_loss": 0.4418691396713257
    },
    {
      "epoch": 1.9452054794520548,
      "step": 142,
      "train/learning_rate_real": 2.3269426017422576e-05
    },
    {
      "debug/num_lat_loss": 1747.0,
      "debug/num_lat_total": 2610.0,
      "debug/num_tok_loss": 1747.0,
      "debug/num_tok_total": 2610.0,
      "epoch": 1.9452054794520548,
      "step": 142
    },
    {
      "epoch": 1.9452054794520548,
      "step": 142,
      "train/ce_loss": 3.59375,
      "train/diffusion_loss": 0.41341838240623474
    },
    {
      "epoch": 1.9452054794520548,
      "step": 142,
      "train/learning_rate_real": 2.3269426017422576e-05
    },
    {
      "debug/num_lat_loss": 1718.0,
      "debug/num_lat_total": 2343.0,
      "debug/num_tok_loss": 1718.0,
      "debug/num_tok_total": 2343.0,
      "epoch": 1.9452054794520548,
      "step": 142
    },
    {
      "epoch": 1.9452054794520548,
      "step": 142,
      "train/ce_loss": 3.5,
      "train/diffusion_loss": 0.476648211479187
    },
    {
      "epoch": 1.9452054794520548,
      "step": 142,
      "train/learning_rate_real": 2.3269426017422576e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2221.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2221.0,
      "epoch": 1.958904109589041,
      "step": 143
    },
    {
      "epoch": 1.958904109589041,
      "step": 143,
      "train/ce_loss": 3.34375,
      "train/diffusion_loss": 0.503341794013977
    },
    {
      "epoch": 1.958904109589041,
      "step": 143,
      "train/learning_rate_real": 2.3241161910986053e-05
    },
    {
      "debug/num_lat_loss": 1712.0,
      "debug/num_lat_total": 2588.0,
      "debug/num_tok_loss": 1712.0,
      "debug/num_tok_total": 2588.0,
      "epoch": 1.958904109589041,
      "step": 143
    },
    {
      "epoch": 1.958904109589041,
      "step": 143,
      "train/ce_loss": 3.453125,
      "train/diffusion_loss": 0.39334893226623535
    },
    {
      "epoch": 1.958904109589041,
      "step": 143,
      "train/learning_rate_real": 2.3241161910986053e-05
    },
    {
      "debug/num_lat_loss": 1819.0,
      "debug/num_lat_total": 2928.0,
      "debug/num_tok_loss": 1819.0,
      "debug/num_tok_total": 2928.0,
      "epoch": 1.958904109589041,
      "step": 143
    },
    {
      "epoch": 1.958904109589041,
      "step": 143,
      "train/ce_loss": 3.484375,
      "train/diffusion_loss": 0.3769533932209015
    },
    {
      "epoch": 1.958904109589041,
      "step": 143,
      "train/learning_rate_real": 2.3241161910986053e-05
    },
    {
      "debug/num_lat_loss": 1741.0,
      "debug/num_lat_total": 2599.0,
      "debug/num_tok_loss": 1741.0,
      "debug/num_tok_total": 2599.0,
      "epoch": 1.958904109589041,
      "step": 143
    },
    {
      "epoch": 1.958904109589041,
      "step": 143,
      "train/ce_loss": 3.328125,
      "train/diffusion_loss": 0.41423624753952026
    },
    {
      "epoch": 1.958904109589041,
      "step": 143,
      "train/learning_rate_real": 2.3241161910986053e-05
    },
    {
      "debug/num_lat_loss": 1381.0,
      "debug/num_lat_total": 2174.0,
      "debug/num_tok_loss": 1381.0,
      "debug/num_tok_total": 2174.0,
      "epoch": 1.9726027397260273,
      "step": 144
    },
    {
      "epoch": 1.9726027397260273,
      "step": 144,
      "train/ce_loss": 3.46875,
      "train/diffusion_loss": 0.3866157829761505
    },
    {
      "epoch": 1.9726027397260273,
      "step": 144,
      "train/learning_rate_real": 2.321268631749894e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2196.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2196.0,
      "epoch": 1.9726027397260273,
      "step": 144
    },
    {
      "epoch": 1.9726027397260273,
      "step": 144,
      "train/ce_loss": 3.40625,
      "train/diffusion_loss": 0.5139296054840088
    },
    {
      "epoch": 1.9726027397260273,
      "step": 144,
      "train/learning_rate_real": 2.321268631749894e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 3262.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 3262.0,
      "epoch": 1.9726027397260273,
      "step": 144
    },
    {
      "epoch": 1.9726027397260273,
      "step": 144,
      "train/ce_loss": 3.578125,
      "train/diffusion_loss": 0.26967543363571167
    },
    {
      "epoch": 1.9726027397260273,
      "step": 144,
      "train/learning_rate_real": 2.321268631749894e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2860.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2860.0,
      "epoch": 1.9726027397260273,
      "step": 144
    },
    {
      "epoch": 1.9726027397260273,
      "step": 144,
      "train/ce_loss": 3.390625,
      "train/diffusion_loss": 0.37319138646125793
    },
    {
      "epoch": 1.9726027397260273,
      "step": 144,
      "train/learning_rate_real": 2.321268631749894e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2839.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2839.0,
      "epoch": 1.9863013698630136,
      "step": 145
    },
    {
      "epoch": 1.9863013698630136,
      "step": 145,
      "train/ce_loss": 3.265625,
      "train/diffusion_loss": 0.3564004898071289
    },
    {
      "epoch": 1.9863013698630136,
      "step": 145,
      "train/learning_rate_real": 2.318399979762863e-05
    },
    {
      "debug/num_lat_loss": 1600.0,
      "debug/num_lat_total": 2254.0,
      "debug/num_tok_loss": 1600.0,
      "debug/num_tok_total": 2254.0,
      "epoch": 1.9863013698630136,
      "step": 145
    },
    {
      "epoch": 1.9863013698630136,
      "step": 145,
      "train/ce_loss": 3.4375,
      "train/diffusion_loss": 0.46334365010261536
    },
    {
      "epoch": 1.9863013698630136,
      "step": 145,
      "train/learning_rate_real": 2.318399979762863e-05
    },
    {
      "debug/num_lat_loss": 1810.0,
      "debug/num_lat_total": 2466.0,
      "debug/num_tok_loss": 1810.0,
      "debug/num_tok_total": 2466.0,
      "epoch": 1.9863013698630136,
      "step": 145
    },
    {
      "epoch": 1.9863013698630136,
      "step": 145,
      "train/ce_loss": 3.40625,
      "train/diffusion_loss": 0.47243303060531616
    },
    {
      "epoch": 1.9863013698630136,
      "step": 145,
      "train/learning_rate_real": 2.318399979762863e-05
    },
    {
      "debug/num_lat_loss": 451.0,
      "debug/num_lat_total": 451.0,
      "debug/num_tok_loss": 451.0,
      "debug/num_tok_total": 451.0,
      "epoch": 1.9863013698630136,
      "step": 145
    },
    {
      "epoch": 1.9863013698630136,
      "step": 145,
      "train/ce_loss": 3.25,
      "train/diffusion_loss": 0.5414097309112549
    },
    {
      "epoch": 1.9863013698630136,
      "step": 145,
      "train/learning_rate_real": 2.318399979762863e-05
    },
    {
      "debug/num_lat_loss": 1711.0,
      "debug/num_lat_total": 2300.0,
      "debug/num_tok_loss": 1711.0,
      "debug/num_tok_total": 2300.0,
      "epoch": 2.0,
      "step": 146
    },
    {
      "epoch": 2.0,
      "step": 146,
      "train/ce_loss": 3.34375,
      "train/diffusion_loss": 0.4503061771392822
    },
    {
      "epoch": 2.0,
      "step": 146,
      "train/learning_rate_real": 2.3155102916195535e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 2.0,
      "step": 146
    },
    {
      "epoch": 2.0,
      "step": 146,
      "train/ce_loss": 3.3125,
      "train/diffusion_loss": 0.42279499769210815
    },
    {
      "epoch": 2.0,
      "step": 146,
      "train/learning_rate_real": 2.3155102916195535e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2395.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2395.0,
      "epoch": 2.0,
      "step": 146
    },
    {
      "epoch": 2.0,
      "step": 146,
      "train/ce_loss": 3.453125,
      "train/diffusion_loss": 0.47049665451049805
    },
    {
      "epoch": 2.0,
      "step": 146,
      "train/learning_rate_real": 2.3155102916195535e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2638.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2638.0,
      "epoch": 2.0,
      "step": 146
    },
    {
      "epoch": 2.0,
      "step": 146,
      "train/ce_loss": 3.203125,
      "train/diffusion_loss": 0.37802591919898987
    },
    {
      "epoch": 2.0,
      "step": 146,
      "train/learning_rate_real": 2.3155102916195535e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2408.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2408.0,
      "epoch": 2.0136986301369864,
      "step": 147
    },
    {
      "epoch": 2.0136986301369864,
      "step": 147,
      "train/ce_loss": 3.25,
      "train/diffusion_loss": 0.45496639609336853
    },
    {
      "epoch": 2.0136986301369864,
      "step": 147,
      "train/learning_rate_real": 2.3125996242161953e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2851.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2851.0,
      "epoch": 2.0136986301369864,
      "step": 147
    },
    {
      "epoch": 2.0136986301369864,
      "step": 147,
      "train/ce_loss": 3.375,
      "train/diffusion_loss": 0.37154674530029297
    },
    {
      "epoch": 2.0136986301369864,
      "step": 147,
      "train/learning_rate_real": 2.3125996242161953e-05
    },
    {
      "debug/num_lat_loss": 1617.0,
      "debug/num_lat_total": 2051.0,
      "debug/num_tok_loss": 1617.0,
      "debug/num_tok_total": 2051.0,
      "epoch": 2.0136986301369864,
      "step": 147
    },
    {
      "epoch": 2.0136986301369864,
      "step": 147,
      "train/ce_loss": 3.453125,
      "train/diffusion_loss": 0.46421128511428833
    },
    {
      "epoch": 2.0136986301369864,
      "step": 147,
      "train/learning_rate_real": 2.3125996242161953e-05
    },
    {
      "debug/num_lat_loss": 1585.0,
      "debug/num_lat_total": 2429.0,
      "debug/num_tok_loss": 1585.0,
      "debug/num_tok_total": 2429.0,
      "epoch": 2.0136986301369864,
      "step": 147
    },
    {
      "epoch": 2.0136986301369864,
      "step": 147,
      "train/ce_loss": 3.421875,
      "train/diffusion_loss": 0.3745909631252289
    },
    {
      "epoch": 2.0136986301369864,
      "step": 147,
      "train/learning_rate_real": 2.3125996242161953e-05
    },
    {
      "debug/num_lat_loss": 1664.0,
      "debug/num_lat_total": 2199.0,
      "debug/num_tok_loss": 1664.0,
      "debug/num_tok_total": 2199.0,
      "epoch": 2.0273972602739727,
      "step": 148
    },
    {
      "epoch": 2.0273972602739727,
      "step": 148,
      "train/ce_loss": 3.296875,
      "train/diffusion_loss": 0.4661211371421814
    },
    {
      "epoch": 2.0273972602739727,
      "step": 148,
      "train/learning_rate_real": 2.3096680348620886e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2827.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2827.0,
      "epoch": 2.0273972602739727,
      "step": 148
    },
    {
      "epoch": 2.0273972602739727,
      "step": 148,
      "train/ce_loss": 3.265625,
      "train/diffusion_loss": 0.3533343970775604
    },
    {
      "epoch": 2.0273972602739727,
      "step": 148,
      "train/learning_rate_real": 2.3096680348620886e-05
    },
    {
      "debug/num_lat_loss": 1817.0,
      "debug/num_lat_total": 2686.0,
      "debug/num_tok_loss": 1817.0,
      "debug/num_tok_total": 2686.0,
      "epoch": 2.0273972602739727,
      "step": 148
    },
    {
      "epoch": 2.0273972602739727,
      "step": 148,
      "train/ce_loss": 3.453125,
      "train/diffusion_loss": 0.41130325198173523
    },
    {
      "epoch": 2.0273972602739727,
      "step": 148,
      "train/learning_rate_real": 2.3096680348620886e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2227.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2227.0,
      "epoch": 2.0273972602739727,
      "step": 148
    },
    {
      "epoch": 2.0273972602739727,
      "step": 148,
      "train/ce_loss": 3.265625,
      "train/diffusion_loss": 0.5083057880401611
    },
    {
      "epoch": 2.0273972602739727,
      "step": 148,
      "train/learning_rate_real": 2.3096680348620886e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2877.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2877.0,
      "epoch": 2.041095890410959,
      "step": 149
    },
    {
      "epoch": 2.041095890410959,
      "step": 149,
      "train/ce_loss": 3.28125,
      "train/diffusion_loss": 0.37254709005355835
    },
    {
      "epoch": 2.041095890410959,
      "step": 149,
      "train/learning_rate_real": 2.3067155812784734e-05
    },
    {
      "debug/num_lat_loss": 1629.0,
      "debug/num_lat_total": 2709.0,
      "debug/num_tok_loss": 1629.0,
      "debug/num_tok_total": 2709.0,
      "epoch": 2.041095890410959,
      "step": 149
    },
    {
      "epoch": 2.041095890410959,
      "step": 149,
      "train/ce_loss": 3.46875,
      "train/diffusion_loss": 0.3475020229816437
    },
    {
      "epoch": 2.041095890410959,
      "step": 149,
      "train/learning_rate_real": 2.3067155812784734e-05
    },
    {
      "debug/num_lat_loss": 1807.0,
      "debug/num_lat_total": 2671.0,
      "debug/num_tok_loss": 1807.0,
      "debug/num_tok_total": 2671.0,
      "epoch": 2.041095890410959,
      "step": 149
    },
    {
      "epoch": 2.041095890410959,
      "step": 149,
      "train/ce_loss": 3.21875,
      "train/diffusion_loss": 0.4053395688533783
    },
    {
      "epoch": 2.041095890410959,
      "step": 149,
      "train/learning_rate_real": 2.3067155812784734e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2659.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2659.0,
      "epoch": 2.041095890410959,
      "step": 149
    },
    {
      "epoch": 2.041095890410959,
      "step": 149,
      "train/ce_loss": 3.53125,
      "train/diffusion_loss": 0.39869940280914307
    },
    {
      "epoch": 2.041095890410959,
      "step": 149,
      "train/learning_rate_real": 2.3067155812784734e-05
    },
    {
      "epoch": 2.0547945205479454,
      "grad_norm": 1.5437519550323486,
      "learning_rate": 2.3067155812784734e-05,
      "loss": 2.9196,
      "step": 150
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 3083.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 3083.0,
      "epoch": 2.0547945205479454,
      "step": 150
    },
    {
      "epoch": 2.0547945205479454,
      "step": 150,
      "train/ce_loss": 3.21875,
      "train/diffusion_loss": 0.3080335855484009
    },
    {
      "epoch": 2.0547945205479454,
      "step": 150,
      "train/learning_rate_real": 2.303742321597394e-05
    },
    {
      "debug/num_lat_loss": 1731.0,
      "debug/num_lat_total": 2548.0,
      "debug/num_tok_loss": 1731.0,
      "debug/num_tok_total": 2548.0,
      "epoch": 2.0547945205479454,
      "step": 150
    },
    {
      "epoch": 2.0547945205479454,
      "step": 150,
      "train/ce_loss": 3.21875,
      "train/diffusion_loss": 0.4144803285598755
    },
    {
      "epoch": 2.0547945205479454,
      "step": 150,
      "train/learning_rate_real": 2.303742321597394e-05
    },
    {
      "debug/num_lat_loss": 1750.0,
      "debug/num_lat_total": 2803.0,
      "debug/num_tok_loss": 1750.0,
      "debug/num_tok_total": 2803.0,
      "epoch": 2.0547945205479454,
      "step": 150
    },
    {
      "epoch": 2.0547945205479454,
      "step": 150,
      "train/ce_loss": 3.34375,
      "train/diffusion_loss": 0.39247575402259827
    },
    {
      "epoch": 2.0547945205479454,
      "step": 150,
      "train/learning_rate_real": 2.303742321597394e-05
    },
    {
      "debug/num_lat_loss": 1755.0,
      "debug/num_lat_total": 2817.0,
      "debug/num_tok_loss": 1755.0,
      "debug/num_tok_total": 2817.0,
      "epoch": 2.0547945205479454,
      "step": 150
    },
    {
      "epoch": 2.0547945205479454,
      "step": 150,
      "train/ce_loss": 3.1875,
      "train/diffusion_loss": 0.3341597020626068
    },
    {
      "epoch": 2.0547945205479454,
      "step": 150,
      "train/learning_rate_real": 2.303742321597394e-05
    },
    {
      "debug/num_lat_loss": 1751.0,
      "debug/num_lat_total": 2384.0,
      "debug/num_tok_loss": 1751.0,
      "debug/num_tok_total": 2384.0,
      "epoch": 2.0684931506849313,
      "step": 151
    },
    {
      "epoch": 2.0684931506849313,
      "step": 151,
      "train/ce_loss": 3.1875,
      "train/diffusion_loss": 0.40776801109313965
    },
    {
      "epoch": 2.0684931506849313,
      "step": 151,
      "train/learning_rate_real": 2.3007483143605542e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2636.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2636.0,
      "epoch": 2.0684931506849313,
      "step": 151
    },
    {
      "epoch": 2.0684931506849313,
      "step": 151,
      "train/ce_loss": 3.3125,
      "train/diffusion_loss": 0.41128188371658325
    },
    {
      "epoch": 2.0684931506849313,
      "step": 151,
      "train/learning_rate_real": 2.3007483143605542e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2659.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2659.0,
      "epoch": 2.0684931506849313,
      "step": 151
    },
    {
      "epoch": 2.0684931506849313,
      "step": 151,
      "train/ce_loss": 3.203125,
      "train/diffusion_loss": 0.41530677676200867
    },
    {
      "epoch": 2.0684931506849313,
      "step": 151,
      "train/learning_rate_real": 2.3007483143605542e-05
    },
    {
      "debug/num_lat_loss": 1642.0,
      "debug/num_lat_total": 2288.0,
      "debug/num_tok_loss": 1642.0,
      "debug/num_tok_total": 2288.0,
      "epoch": 2.0684931506849313,
      "step": 151
    },
    {
      "epoch": 2.0684931506849313,
      "step": 151,
      "train/ce_loss": 3.3125,
      "train/diffusion_loss": 0.42171525955200195
    },
    {
      "epoch": 2.0684931506849313,
      "step": 151,
      "train/learning_rate_real": 2.3007483143605542e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2645.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2645.0,
      "epoch": 2.0821917808219177,
      "step": 152
    },
    {
      "epoch": 2.0821917808219177,
      "step": 152,
      "train/ce_loss": 3.21875,
      "train/diffusion_loss": 0.419938862323761
    },
    {
      "epoch": 2.0821917808219177,
      "step": 152,
      "train/learning_rate_real": 2.2977336185181646e-05
    },
    {
      "debug/num_lat_loss": 1746.0,
      "debug/num_lat_total": 3008.0,
      "debug/num_tok_loss": 1746.0,
      "debug/num_tok_total": 3008.0,
      "epoch": 2.0821917808219177,
      "step": 152
    },
    {
      "epoch": 2.0821917808219177,
      "step": 152,
      "train/ce_loss": 3.3125,
      "train/diffusion_loss": 0.29541534185409546
    },
    {
      "epoch": 2.0821917808219177,
      "step": 152,
      "train/learning_rate_real": 2.2977336185181646e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2838.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2838.0,
      "epoch": 2.0821917808219177,
      "step": 152
    },
    {
      "epoch": 2.0821917808219177,
      "step": 152,
      "train/ce_loss": 3.171875,
      "train/diffusion_loss": 0.3496735095977783
    },
    {
      "epoch": 2.0821917808219177,
      "step": 152,
      "train/learning_rate_real": 2.2977336185181646e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2391.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2391.0,
      "epoch": 2.0821917808219177,
      "step": 152
    },
    {
      "epoch": 2.0821917808219177,
      "step": 152,
      "train/ce_loss": 3.421875,
      "train/diffusion_loss": 0.4392234981060028
    },
    {
      "epoch": 2.0821917808219177,
      "step": 152,
      "train/learning_rate_real": 2.2977336185181646e-05
    },
    {
      "debug/num_lat_loss": 1757.0,
      "debug/num_lat_total": 2835.0,
      "debug/num_tok_loss": 1757.0,
      "debug/num_tok_total": 2835.0,
      "epoch": 2.095890410958904,
      "step": 153
    },
    {
      "epoch": 2.095890410958904,
      "step": 153,
      "train/ce_loss": 3.21875,
      "train/diffusion_loss": 0.3793810307979584
    },
    {
      "epoch": 2.095890410958904,
      "step": 153,
      "train/learning_rate_real": 2.2946982934277832e-05
    },
    {
      "debug/num_lat_loss": 1761.0,
      "debug/num_lat_total": 2184.0,
      "debug/num_tok_loss": 1761.0,
      "debug/num_tok_total": 2184.0,
      "epoch": 2.095890410958904,
      "step": 153
    },
    {
      "epoch": 2.095890410958904,
      "step": 153,
      "train/ce_loss": 3.09375,
      "train/diffusion_loss": 0.4782491624355316
    },
    {
      "epoch": 2.095890410958904,
      "step": 153,
      "train/learning_rate_real": 2.2946982934277832e-05
    },
    {
      "debug/num_lat_loss": 1646.0,
      "debug/num_lat_total": 2350.0,
      "debug/num_tok_loss": 1646.0,
      "debug/num_tok_total": 2350.0,
      "epoch": 2.095890410958904,
      "step": 153
    },
    {
      "epoch": 2.095890410958904,
      "step": 153,
      "train/ce_loss": 3.359375,
      "train/diffusion_loss": 0.49244463443756104
    },
    {
      "epoch": 2.095890410958904,
      "step": 153,
      "train/learning_rate_real": 2.2946982934277832e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2661.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2661.0,
      "epoch": 2.095890410958904,
      "step": 153
    },
    {
      "epoch": 2.095890410958904,
      "step": 153,
      "train/ce_loss": 3.109375,
      "train/diffusion_loss": 0.41750162839889526
    },
    {
      "epoch": 2.095890410958904,
      "step": 153,
      "train/learning_rate_real": 2.2946982934277832e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 3111.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 3111.0,
      "epoch": 2.1095890410958904,
      "step": 154
    },
    {
      "epoch": 2.1095890410958904,
      "step": 154,
      "train/ce_loss": 3.171875,
      "train/diffusion_loss": 0.32178986072540283
    },
    {
      "epoch": 2.1095890410958904,
      "step": 154,
      "train/learning_rate_real": 2.2916423988531437e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2198.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2198.0,
      "epoch": 2.1095890410958904,
      "step": 154
    },
    {
      "epoch": 2.1095890410958904,
      "step": 154,
      "train/ce_loss": 3.15625,
      "train/diffusion_loss": 0.510726273059845
    },
    {
      "epoch": 2.1095890410958904,
      "step": 154,
      "train/learning_rate_real": 2.2916423988531437e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2662.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2662.0,
      "epoch": 2.1095890410958904,
      "step": 154
    },
    {
      "epoch": 2.1095890410958904,
      "step": 154,
      "train/ce_loss": 3.109375,
      "train/diffusion_loss": 0.4098641872406006
    },
    {
      "epoch": 2.1095890410958904,
      "step": 154,
      "train/learning_rate_real": 2.2916423988531437e-05
    },
    {
      "debug/num_lat_loss": 1669.0,
      "debug/num_lat_total": 2528.0,
      "debug/num_tok_loss": 1669.0,
      "debug/num_tok_total": 2528.0,
      "epoch": 2.1095890410958904,
      "step": 154
    },
    {
      "epoch": 2.1095890410958904,
      "step": 154,
      "train/ce_loss": 3.234375,
      "train/diffusion_loss": 0.4132782518863678
    },
    {
      "epoch": 2.1095890410958904,
      "step": 154,
      "train/learning_rate_real": 2.2916423988531437e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2887.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2887.0,
      "epoch": 2.1232876712328768,
      "step": 155
    },
    {
      "epoch": 2.1232876712328768,
      "step": 155,
      "train/ce_loss": 3.578125,
      "train/diffusion_loss": 0.3646351993083954
    },
    {
      "epoch": 2.1232876712328768,
      "step": 155,
      "train/learning_rate_real": 2.2885659949629828e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2654.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2654.0,
      "epoch": 2.1232876712328768,
      "step": 155
    },
    {
      "epoch": 2.1232876712328768,
      "step": 155,
      "train/ce_loss": 3.15625,
      "train/diffusion_loss": 0.38885483145713806
    },
    {
      "epoch": 2.1232876712328768,
      "step": 155,
      "train/learning_rate_real": 2.2885659949629828e-05
    },
    {
      "debug/num_lat_loss": 1831.0,
      "debug/num_lat_total": 2937.0,
      "debug/num_tok_loss": 1831.0,
      "debug/num_tok_total": 2937.0,
      "epoch": 2.1232876712328768,
      "step": 155
    },
    {
      "epoch": 2.1232876712328768,
      "step": 155,
      "train/ce_loss": 3.109375,
      "train/diffusion_loss": 0.3445654809474945
    },
    {
      "epoch": 2.1232876712328768,
      "step": 155,
      "train/learning_rate_real": 2.2885659949629828e-05
    },
    {
      "debug/num_lat_loss": 1690.0,
      "debug/num_lat_total": 1897.0,
      "debug/num_tok_loss": 1690.0,
      "debug/num_tok_total": 1897.0,
      "epoch": 2.1232876712328768,
      "step": 155
    },
    {
      "epoch": 2.1232876712328768,
      "step": 155,
      "train/ce_loss": 3.15625,
      "train/diffusion_loss": 0.5193563103675842
    },
    {
      "epoch": 2.1232876712328768,
      "step": 155,
      "train/learning_rate_real": 2.2885659949629828e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 3090.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 3090.0,
      "epoch": 2.136986301369863,
      "step": 156
    },
    {
      "epoch": 2.136986301369863,
      "step": 156,
      "train/ce_loss": 3.0625,
      "train/diffusion_loss": 0.31100159883499146
    },
    {
      "epoch": 2.136986301369863,
      "step": 156,
      "train/learning_rate_real": 2.285469142329851e-05
    },
    {
      "debug/num_lat_loss": 1797.0,
      "debug/num_lat_total": 3100.0,
      "debug/num_tok_loss": 1797.0,
      "debug/num_tok_total": 3100.0,
      "epoch": 2.136986301369863,
      "step": 156
    },
    {
      "epoch": 2.136986301369863,
      "step": 156,
      "train/ce_loss": 3.0,
      "train/diffusion_loss": 0.3313130736351013
    },
    {
      "epoch": 2.136986301369863,
      "step": 156,
      "train/learning_rate_real": 2.285469142329851e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2852.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2852.0,
      "epoch": 2.136986301369863,
      "step": 156
    },
    {
      "epoch": 2.136986301369863,
      "step": 156,
      "train/ce_loss": 3.234375,
      "train/diffusion_loss": 0.34941917657852173
    },
    {
      "epoch": 2.136986301369863,
      "step": 156,
      "train/learning_rate_real": 2.285469142329851e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2620.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2620.0,
      "epoch": 2.136986301369863,
      "step": 156
    },
    {
      "epoch": 2.136986301369863,
      "step": 156,
      "train/ce_loss": 3.1875,
      "train/diffusion_loss": 0.3860790729522705
    },
    {
      "epoch": 2.136986301369863,
      "step": 156,
      "train/learning_rate_real": 2.285469142329851e-05
    },
    {
      "debug/num_lat_loss": 1717.0,
      "debug/num_lat_total": 2786.0,
      "debug/num_tok_loss": 1717.0,
      "debug/num_tok_total": 2786.0,
      "epoch": 2.1506849315068495,
      "step": 157
    },
    {
      "epoch": 2.1506849315068495,
      "step": 157,
      "train/ce_loss": 3.171875,
      "train/diffusion_loss": 0.34918954968452454
    },
    {
      "epoch": 2.1506849315068495,
      "step": 157,
      "train/learning_rate_real": 2.2823519019289245e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2235.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2235.0,
      "epoch": 2.1506849315068495,
      "step": 157
    },
    {
      "epoch": 2.1506849315068495,
      "step": 157,
      "train/ce_loss": 3.140625,
      "train/diffusion_loss": 0.48824694752693176
    },
    {
      "epoch": 2.1506849315068495,
      "step": 157,
      "train/learning_rate_real": 2.2823519019289245e-05
    },
    {
      "debug/num_lat_loss": 1625.0,
      "debug/num_lat_total": 2260.0,
      "debug/num_tok_loss": 1625.0,
      "debug/num_tok_total": 2260.0,
      "epoch": 2.1506849315068495,
      "step": 157
    },
    {
      "epoch": 2.1506849315068495,
      "step": 157,
      "train/ce_loss": 3.359375,
      "train/diffusion_loss": 0.4429050385951996
    },
    {
      "epoch": 2.1506849315068495,
      "step": 157,
      "train/learning_rate_real": 2.2823519019289245e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 3258.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 3258.0,
      "epoch": 2.1506849315068495,
      "step": 157
    },
    {
      "epoch": 2.1506849315068495,
      "step": 157,
      "train/ce_loss": 3.140625,
      "train/diffusion_loss": 0.2725350856781006
    },
    {
      "epoch": 2.1506849315068495,
      "step": 157,
      "train/learning_rate_real": 2.2823519019289245e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2649.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2649.0,
      "epoch": 2.1643835616438354,
      "step": 158
    },
    {
      "epoch": 2.1643835616438354,
      "step": 158,
      "train/ce_loss": 3.0625,
      "train/diffusion_loss": 0.4037143290042877
    },
    {
      "epoch": 2.1643835616438354,
      "step": 158,
      "train/learning_rate_real": 2.2792143351368005e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2872.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2872.0,
      "epoch": 2.1643835616438354,
      "step": 158
    },
    {
      "epoch": 2.1643835616438354,
      "step": 158,
      "train/ce_loss": 3.234375,
      "train/diffusion_loss": 0.343654602766037
    },
    {
      "epoch": 2.1643835616438354,
      "step": 158,
      "train/learning_rate_real": 2.2792143351368005e-05
    },
    {
      "debug/num_lat_loss": 1694.0,
      "debug/num_lat_total": 2905.0,
      "debug/num_tok_loss": 1694.0,
      "debug/num_tok_total": 2905.0,
      "epoch": 2.1643835616438354,
      "step": 158
    },
    {
      "epoch": 2.1643835616438354,
      "step": 158,
      "train/ce_loss": 3.125,
      "train/diffusion_loss": 0.2816028594970703
    },
    {
      "epoch": 2.1643835616438354,
      "step": 158,
      "train/learning_rate_real": 2.2792143351368005e-05
    },
    {
      "debug/num_lat_loss": 1758.0,
      "debug/num_lat_total": 2201.0,
      "debug/num_tok_loss": 1758.0,
      "debug/num_tok_total": 2201.0,
      "epoch": 2.1643835616438354,
      "step": 158
    },
    {
      "epoch": 2.1643835616438354,
      "step": 158,
      "train/ce_loss": 3.21875,
      "train/diffusion_loss": 0.525771975517273
    },
    {
      "epoch": 2.1643835616438354,
      "step": 158,
      "train/learning_rate_real": 2.2792143351368005e-05
    },
    {
      "debug/num_lat_loss": 1818.0,
      "debug/num_lat_total": 2697.0,
      "debug/num_tok_loss": 1818.0,
      "debug/num_tok_total": 2697.0,
      "epoch": 2.1780821917808217,
      "step": 159
    },
    {
      "epoch": 2.1780821917808217,
      "step": 159,
      "train/ce_loss": 3.046875,
      "train/diffusion_loss": 0.40741604566574097
    },
    {
      "epoch": 2.1780821917808217,
      "step": 159,
      "train/learning_rate_real": 2.276056503730293e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2861.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2861.0,
      "epoch": 2.1780821917808217,
      "step": 159
    },
    {
      "epoch": 2.1780821917808217,
      "step": 159,
      "train/ce_loss": 3.53125,
      "train/diffusion_loss": 0.37161630392074585
    },
    {
      "epoch": 2.1780821917808217,
      "step": 159,
      "train/learning_rate_real": 2.276056503730293e-05
    },
    {
      "debug/num_lat_loss": 1737.0,
      "debug/num_lat_total": 2767.0,
      "debug/num_tok_loss": 1737.0,
      "debug/num_tok_total": 2767.0,
      "epoch": 2.1780821917808217,
      "step": 159
    },
    {
      "epoch": 2.1780821917808217,
      "step": 159,
      "train/ce_loss": 3.15625,
      "train/diffusion_loss": 0.38080403208732605
    },
    {
      "epoch": 2.1780821917808217,
      "step": 159,
      "train/learning_rate_real": 2.276056503730293e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2836.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2836.0,
      "epoch": 2.1780821917808217,
      "step": 159
    },
    {
      "epoch": 2.1780821917808217,
      "step": 159,
      "train/ce_loss": 2.984375,
      "train/diffusion_loss": 0.3861440420150757
    },
    {
      "epoch": 2.1780821917808217,
      "step": 159,
      "train/learning_rate_real": 2.276056503730293e-05
    },
    {
      "epoch": 2.191780821917808,
      "grad_norm": 1.617277979850769,
      "learning_rate": 2.276056503730293e-05,
      "loss": 2.7086,
      "step": 160
    },
    {
      "debug/num_lat_loss": 1812.0,
      "debug/num_lat_total": 2700.0,
      "debug/num_tok_loss": 1812.0,
      "debug/num_tok_total": 2700.0,
      "epoch": 2.191780821917808,
      "step": 160
    },
    {
      "epoch": 2.191780821917808,
      "step": 160,
      "train/ce_loss": 3.3125,
      "train/diffusion_loss": 0.3891316056251526
    },
    {
      "epoch": 2.191780821917808,
      "step": 160,
      "train/learning_rate_real": 2.2728784698852114e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 1777.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 1777.0,
      "epoch": 2.191780821917808,
      "step": 160
    },
    {
      "epoch": 2.191780821917808,
      "step": 160,
      "train/ce_loss": 3.140625,
      "train/diffusion_loss": 0.5425366759300232
    },
    {
      "epoch": 2.191780821917808,
      "step": 160,
      "train/learning_rate_real": 2.2728784698852114e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2411.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2411.0,
      "epoch": 2.191780821917808,
      "step": 160
    },
    {
      "epoch": 2.191780821917808,
      "step": 160,
      "train/ce_loss": 3.125,
      "train/diffusion_loss": 0.44423243403434753
    },
    {
      "epoch": 2.191780821917808,
      "step": 160,
      "train/learning_rate_real": 2.2728784698852114e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2207.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2207.0,
      "epoch": 2.191780821917808,
      "step": 160
    },
    {
      "epoch": 2.191780821917808,
      "step": 160,
      "train/ce_loss": 3.09375,
      "train/diffusion_loss": 0.4857548177242279
    },
    {
      "epoch": 2.191780821917808,
      "step": 160,
      "train/learning_rate_real": 2.2728784698852114e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 2.2054794520547945,
      "step": 161
    },
    {
      "epoch": 2.2054794520547945,
      "step": 161,
      "train/ce_loss": 3.140625,
      "train/diffusion_loss": 0.4045424461364746
    },
    {
      "epoch": 2.2054794520547945,
      "step": 161,
      "train/learning_rate_real": 2.269680296175141e-05
    },
    {
      "debug/num_lat_loss": 1702.0,
      "debug/num_lat_total": 3137.0,
      "debug/num_tok_loss": 1702.0,
      "debug/num_tok_total": 3137.0,
      "epoch": 2.2054794520547945,
      "step": 161
    },
    {
      "epoch": 2.2054794520547945,
      "step": 161,
      "train/ce_loss": 2.9375,
      "train/diffusion_loss": 0.26028120517730713
    },
    {
      "epoch": 2.2054794520547945,
      "step": 161,
      "train/learning_rate_real": 2.269680296175141e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2635.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2635.0,
      "epoch": 2.2054794520547945,
      "step": 161
    },
    {
      "epoch": 2.2054794520547945,
      "step": 161,
      "train/ce_loss": 3.171875,
      "train/diffusion_loss": 0.4297439455986023
    },
    {
      "epoch": 2.2054794520547945,
      "step": 161,
      "train/learning_rate_real": 2.269680296175141e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2632.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2632.0,
      "epoch": 2.2054794520547945,
      "step": 161
    },
    {
      "epoch": 2.2054794520547945,
      "step": 161,
      "train/ce_loss": 3.09375,
      "train/diffusion_loss": 0.4013395309448242
    },
    {
      "epoch": 2.2054794520547945,
      "step": 161,
      "train/learning_rate_real": 2.269680296175141e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2443.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2443.0,
      "epoch": 2.219178082191781,
      "step": 162
    },
    {
      "epoch": 2.219178082191781,
      "step": 162,
      "train/ce_loss": 3.40625,
      "train/diffusion_loss": 0.45490705966949463
    },
    {
      "epoch": 2.219178082191781,
      "step": 162,
      "train/learning_rate_real": 2.2664620455702084e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2656.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2656.0,
      "epoch": 2.219178082191781,
      "step": 162
    },
    {
      "epoch": 2.219178082191781,
      "step": 162,
      "train/ce_loss": 3.109375,
      "train/diffusion_loss": 0.420023649930954
    },
    {
      "epoch": 2.219178082191781,
      "step": 162,
      "train/learning_rate_real": 2.2664620455702084e-05
    },
    {
      "debug/num_lat_loss": 1687.0,
      "debug/num_lat_total": 2893.0,
      "debug/num_tok_loss": 1687.0,
      "debug/num_tok_total": 2893.0,
      "epoch": 2.219178082191781,
      "step": 162
    },
    {
      "epoch": 2.219178082191781,
      "step": 162,
      "train/ce_loss": 3.109375,
      "train/diffusion_loss": 0.3170675039291382
    },
    {
      "epoch": 2.219178082191781,
      "step": 162,
      "train/learning_rate_real": 2.2664620455702084e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2852.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2852.0,
      "epoch": 2.219178082191781,
      "step": 162
    },
    {
      "epoch": 2.219178082191781,
      "step": 162,
      "train/ce_loss": 3.078125,
      "train/diffusion_loss": 0.3600369989871979
    },
    {
      "epoch": 2.219178082191781,
      "step": 162,
      "train/learning_rate_real": 2.2664620455702084e-05
    },
    {
      "debug/num_lat_loss": 1700.0,
      "debug/num_lat_total": 2555.0,
      "debug/num_tok_loss": 1700.0,
      "debug/num_tok_total": 2555.0,
      "epoch": 2.232876712328767,
      "step": 163
    },
    {
      "epoch": 2.232876712328767,
      "step": 163,
      "train/ce_loss": 3.125,
      "train/diffusion_loss": 0.3976871967315674
    },
    {
      "epoch": 2.232876712328767,
      "step": 163,
      "train/learning_rate_real": 2.2632237814358425e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2824.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2824.0,
      "epoch": 2.232876712328767,
      "step": 163
    },
    {
      "epoch": 2.232876712328767,
      "step": 163,
      "train/ce_loss": 2.953125,
      "train/diffusion_loss": 0.36776605248451233
    },
    {
      "epoch": 2.232876712328767,
      "step": 163,
      "train/learning_rate_real": 2.2632237814358425e-05
    },
    {
      "debug/num_lat_loss": 1729.0,
      "debug/num_lat_total": 2770.0,
      "debug/num_tok_loss": 1729.0,
      "debug/num_tok_total": 2770.0,
      "epoch": 2.232876712328767,
      "step": 163
    },
    {
      "epoch": 2.232876712328767,
      "step": 163,
      "train/ce_loss": 3.109375,
      "train/diffusion_loss": 0.3672686219215393
    },
    {
      "epoch": 2.232876712328767,
      "step": 163,
      "train/learning_rate_real": 2.2632237814358425e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2433.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2433.0,
      "epoch": 2.232876712328767,
      "step": 163
    },
    {
      "epoch": 2.232876712328767,
      "step": 163,
      "train/ce_loss": 3.390625,
      "train/diffusion_loss": 0.4243660271167755
    },
    {
      "epoch": 2.232876712328767,
      "step": 163,
      "train/learning_rate_real": 2.2632237814358425e-05
    },
    {
      "debug/num_lat_loss": 1756.0,
      "debug/num_lat_total": 2607.0,
      "debug/num_tok_loss": 1756.0,
      "debug/num_tok_total": 2607.0,
      "epoch": 2.2465753424657535,
      "step": 164
    },
    {
      "epoch": 2.2465753424657535,
      "step": 164,
      "train/ce_loss": 2.953125,
      "train/diffusion_loss": 0.4174022078514099
    },
    {
      "epoch": 2.2465753424657535,
      "step": 164,
      "train/learning_rate_real": 2.2599655675315256e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2235.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2235.0,
      "epoch": 2.2465753424657535,
      "step": 164
    },
    {
      "epoch": 2.2465753424657535,
      "step": 164,
      "train/ce_loss": 2.984375,
      "train/diffusion_loss": 0.5091489553451538
    },
    {
      "epoch": 2.2465753424657535,
      "step": 164,
      "train/learning_rate_real": 2.2599655675315256e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2889.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2889.0,
      "epoch": 2.2465753424657535,
      "step": 164
    },
    {
      "epoch": 2.2465753424657535,
      "step": 164,
      "train/ce_loss": 2.984375,
      "train/diffusion_loss": 0.36695384979248047
    },
    {
      "epoch": 2.2465753424657535,
      "step": 164,
      "train/learning_rate_real": 2.2599655675315256e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2242.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2242.0,
      "epoch": 2.2465753424657535,
      "step": 164
    },
    {
      "epoch": 2.2465753424657535,
      "step": 164,
      "train/ce_loss": 3.078125,
      "train/diffusion_loss": 0.4847750663757324
    },
    {
      "epoch": 2.2465753424657535,
      "step": 164,
      "train/learning_rate_real": 2.2599655675315256e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2627.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2627.0,
      "epoch": 2.26027397260274,
      "step": 165
    },
    {
      "epoch": 2.26027397260274,
      "step": 165,
      "train/ce_loss": 2.96875,
      "train/diffusion_loss": 0.3915446400642395
    },
    {
      "epoch": 2.26027397260274,
      "step": 165,
      "train/learning_rate_real": 2.2566874680095403e-05
    },
    {
      "debug/num_lat_loss": 1761.0,
      "debug/num_lat_total": 2390.0,
      "debug/num_tok_loss": 1761.0,
      "debug/num_tok_total": 2390.0,
      "epoch": 2.26027397260274,
      "step": 165
    },
    {
      "epoch": 2.26027397260274,
      "step": 165,
      "train/ce_loss": 2.859375,
      "train/diffusion_loss": 0.45723363757133484
    },
    {
      "epoch": 2.26027397260274,
      "step": 165,
      "train/learning_rate_real": 2.2566874680095403e-05
    },
    {
      "debug/num_lat_loss": 1811.0,
      "debug/num_lat_total": 2692.0,
      "debug/num_tok_loss": 1811.0,
      "debug/num_tok_total": 2692.0,
      "epoch": 2.26027397260274,
      "step": 165
    },
    {
      "epoch": 2.26027397260274,
      "step": 165,
      "train/ce_loss": 3.015625,
      "train/diffusion_loss": 0.4079940617084503
    },
    {
      "epoch": 2.26027397260274,
      "step": 165,
      "train/learning_rate_real": 2.2566874680095403e-05
    },
    {
      "debug/num_lat_loss": 1760.0,
      "debug/num_lat_total": 2602.0,
      "debug/num_tok_loss": 1760.0,
      "debug/num_tok_total": 2602.0,
      "epoch": 2.26027397260274,
      "step": 165
    },
    {
      "epoch": 2.26027397260274,
      "step": 165,
      "train/ce_loss": 2.953125,
      "train/diffusion_loss": 0.4096970856189728
    },
    {
      "epoch": 2.26027397260274,
      "step": 165,
      "train/learning_rate_real": 2.2566874680095403e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2875.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2875.0,
      "epoch": 2.2739726027397262,
      "step": 166
    },
    {
      "epoch": 2.2739726027397262,
      "step": 166,
      "train/ce_loss": 3.0625,
      "train/diffusion_loss": 0.34252914786338806
    },
    {
      "epoch": 2.2739726027397262,
      "step": 166,
      "train/learning_rate_real": 2.2533895474137047e-05
    },
    {
      "debug/num_lat_loss": 1759.0,
      "debug/num_lat_total": 2829.0,
      "debug/num_tok_loss": 1759.0,
      "debug/num_tok_total": 2829.0,
      "epoch": 2.2739726027397262,
      "step": 166
    },
    {
      "epoch": 2.2739726027397262,
      "step": 166,
      "train/ce_loss": 2.921875,
      "train/diffusion_loss": 0.3555743098258972
    },
    {
      "epoch": 2.2739726027397262,
      "step": 166,
      "train/learning_rate_real": 2.2533895474137047e-05
    },
    {
      "debug/num_lat_loss": 1811.0,
      "debug/num_lat_total": 2697.0,
      "debug/num_tok_loss": 1811.0,
      "debug/num_tok_total": 2697.0,
      "epoch": 2.2739726027397262,
      "step": 166
    },
    {
      "epoch": 2.2739726027397262,
      "step": 166,
      "train/ce_loss": 2.953125,
      "train/diffusion_loss": 0.4044460952281952
    },
    {
      "epoch": 2.2739726027397262,
      "step": 166,
      "train/learning_rate_real": 2.2533895474137047e-05
    },
    {
      "debug/num_lat_loss": 1812.0,
      "debug/num_lat_total": 2482.0,
      "debug/num_tok_loss": 1812.0,
      "debug/num_tok_total": 2482.0,
      "epoch": 2.2739726027397262,
      "step": 166
    },
    {
      "epoch": 2.2739726027397262,
      "step": 166,
      "train/ce_loss": 2.984375,
      "train/diffusion_loss": 0.4411238431930542
    },
    {
      "epoch": 2.2739726027397262,
      "step": 166,
      "train/learning_rate_real": 2.2533895474137047e-05
    },
    {
      "debug/num_lat_loss": 1717.0,
      "debug/num_lat_total": 2951.0,
      "debug/num_tok_loss": 1717.0,
      "debug/num_tok_total": 2951.0,
      "epoch": 2.287671232876712,
      "step": 167
    },
    {
      "epoch": 2.287671232876712,
      "step": 167,
      "train/ce_loss": 2.8125,
      "train/diffusion_loss": 0.28139710426330566
    },
    {
      "epoch": 2.287671232876712,
      "step": 167,
      "train/learning_rate_real": 2.2500718706781014e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2426.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2426.0,
      "epoch": 2.287671232876712,
      "step": 167
    },
    {
      "epoch": 2.287671232876712,
      "step": 167,
      "train/ce_loss": 2.859375,
      "train/diffusion_loss": 0.44409722089767456
    },
    {
      "epoch": 2.287671232876712,
      "step": 167,
      "train/learning_rate_real": 2.2500718706781014e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2416.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2416.0,
      "epoch": 2.287671232876712,
      "step": 167
    },
    {
      "epoch": 2.287671232876712,
      "step": 167,
      "train/ce_loss": 3.03125,
      "train/diffusion_loss": 0.4660814702510834
    },
    {
      "epoch": 2.287671232876712,
      "step": 167,
      "train/learning_rate_real": 2.2500718706781014e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2417.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2417.0,
      "epoch": 2.287671232876712,
      "step": 167
    },
    {
      "epoch": 2.287671232876712,
      "step": 167,
      "train/ce_loss": 2.875,
      "train/diffusion_loss": 0.46190908551216125
    },
    {
      "epoch": 2.287671232876712,
      "step": 167,
      "train/learning_rate_real": 2.2500718706781014e-05
    },
    {
      "debug/num_lat_loss": 1653.0,
      "debug/num_lat_total": 2146.0,
      "debug/num_tok_loss": 1653.0,
      "debug/num_tok_total": 2146.0,
      "epoch": 2.3013698630136985,
      "step": 168
    },
    {
      "epoch": 2.3013698630136985,
      "step": 168,
      "train/ce_loss": 2.859375,
      "train/diffusion_loss": 0.4429917335510254
    },
    {
      "epoch": 2.3013698630136985,
      "step": 168,
      "train/learning_rate_real": 2.2467345031258007e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2650.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2650.0,
      "epoch": 2.3013698630136985,
      "step": 168
    },
    {
      "epoch": 2.3013698630136985,
      "step": 168,
      "train/ce_loss": 3.046875,
      "train/diffusion_loss": 0.4134480059146881
    },
    {
      "epoch": 2.3013698630136985,
      "step": 168,
      "train/learning_rate_real": 2.2467345031258007e-05
    },
    {
      "debug/num_lat_loss": 1615.0,
      "debug/num_lat_total": 2248.0,
      "debug/num_tok_loss": 1615.0,
      "debug/num_tok_total": 2248.0,
      "epoch": 2.3013698630136985,
      "step": 168
    },
    {
      "epoch": 2.3013698630136985,
      "step": 168,
      "train/ce_loss": 3.015625,
      "train/diffusion_loss": 0.4329485595226288
    },
    {
      "epoch": 2.3013698630136985,
      "step": 168,
      "train/learning_rate_real": 2.2467345031258007e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2625.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2625.0,
      "epoch": 2.3013698630136985,
      "step": 168
    },
    {
      "epoch": 2.3013698630136985,
      "step": 168,
      "train/ce_loss": 3.015625,
      "train/diffusion_loss": 0.4059813618659973
    },
    {
      "epoch": 2.3013698630136985,
      "step": 168,
      "train/learning_rate_real": 2.2467345031258007e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 3095.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 3095.0,
      "epoch": 2.315068493150685,
      "step": 169
    },
    {
      "epoch": 2.315068493150685,
      "step": 169,
      "train/ce_loss": 3.015625,
      "train/diffusion_loss": 0.33460959792137146
    },
    {
      "epoch": 2.315068493150685,
      "step": 169,
      "train/learning_rate_real": 2.243377510467572e-05
    },
    {
      "debug/num_lat_loss": 1748.0,
      "debug/num_lat_total": 2592.0,
      "debug/num_tok_loss": 1748.0,
      "debug/num_tok_total": 2592.0,
      "epoch": 2.315068493150685,
      "step": 169
    },
    {
      "epoch": 2.315068493150685,
      "step": 169,
      "train/ce_loss": 2.875,
      "train/diffusion_loss": 0.37922653555870056
    },
    {
      "epoch": 2.315068493150685,
      "step": 169,
      "train/learning_rate_real": 2.243377510467572e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2911.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2911.0,
      "epoch": 2.315068493150685,
      "step": 169
    },
    {
      "epoch": 2.315068493150685,
      "step": 169,
      "train/ce_loss": 3.03125,
      "train/diffusion_loss": 0.3680286109447479
    },
    {
      "epoch": 2.315068493150685,
      "step": 169,
      "train/learning_rate_real": 2.243377510467572e-05
    },
    {
      "debug/num_lat_loss": 1678.0,
      "debug/num_lat_total": 2198.0,
      "debug/num_tok_loss": 1678.0,
      "debug/num_tok_total": 2198.0,
      "epoch": 2.315068493150685,
      "step": 169
    },
    {
      "epoch": 2.315068493150685,
      "step": 169,
      "train/ce_loss": 3.25,
      "train/diffusion_loss": 0.46388769149780273
    },
    {
      "epoch": 2.315068493150685,
      "step": 169,
      "train/learning_rate_real": 2.243377510467572e-05
    },
    {
      "epoch": 2.328767123287671,
      "grad_norm": 1.618893027305603,
      "learning_rate": 2.243377510467572e-05,
      "loss": 2.7758,
      "step": 170
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2429.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2429.0,
      "epoch": 2.328767123287671,
      "step": 170
    },
    {
      "epoch": 2.328767123287671,
      "step": 170,
      "train/ce_loss": 2.96875,
      "train/diffusion_loss": 0.453939825296402
    },
    {
      "epoch": 2.328767123287671,
      "step": 170,
      "train/learning_rate_real": 2.2400009588005925e-05
    },
    {
      "debug/num_lat_loss": 1824.0,
      "debug/num_lat_total": 3149.0,
      "debug/num_tok_loss": 1824.0,
      "debug/num_tok_total": 3149.0,
      "epoch": 2.328767123287671,
      "step": 170
    },
    {
      "epoch": 2.328767123287671,
      "step": 170,
      "train/ce_loss": 3.0,
      "train/diffusion_loss": 0.3223628103733063
    },
    {
      "epoch": 2.328767123287671,
      "step": 170,
      "train/learning_rate_real": 2.2400009588005925e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 3055.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 3055.0,
      "epoch": 2.328767123287671,
      "step": 170
    },
    {
      "epoch": 2.328767123287671,
      "step": 170,
      "train/ce_loss": 2.984375,
      "train/diffusion_loss": 0.325356125831604
    },
    {
      "epoch": 2.328767123287671,
      "step": 170,
      "train/learning_rate_real": 2.2400009588005925e-05
    },
    {
      "debug/num_lat_loss": 1758.0,
      "debug/num_lat_total": 2391.0,
      "debug/num_tok_loss": 1758.0,
      "debug/num_tok_total": 2391.0,
      "epoch": 2.328767123287671,
      "step": 170
    },
    {
      "epoch": 2.328767123287671,
      "step": 170,
      "train/ce_loss": 3.03125,
      "train/diffusion_loss": 0.4393155872821808
    },
    {
      "epoch": 2.328767123287671,
      "step": 170,
      "train/learning_rate_real": 2.2400009588005925e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2237.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2237.0,
      "epoch": 2.3424657534246576,
      "step": 171
    },
    {
      "epoch": 2.3424657534246576,
      "step": 171,
      "train/ce_loss": 2.953125,
      "train/diffusion_loss": 0.4934636950492859
    },
    {
      "epoch": 2.3424657534246576,
      "step": 171,
      "train/learning_rate_real": 2.236604914607143e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2625.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2625.0,
      "epoch": 2.3424657534246576,
      "step": 171
    },
    {
      "epoch": 2.3424657534246576,
      "step": 171,
      "train/ce_loss": 2.90625,
      "train/diffusion_loss": 0.41351816058158875
    },
    {
      "epoch": 2.3424657534246576,
      "step": 171,
      "train/learning_rate_real": 2.236604914607143e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2635.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2635.0,
      "epoch": 2.3424657534246576,
      "step": 171
    },
    {
      "epoch": 2.3424657534246576,
      "step": 171,
      "train/ce_loss": 2.921875,
      "train/diffusion_loss": 0.3951073884963989
    },
    {
      "epoch": 2.3424657534246576,
      "step": 171,
      "train/learning_rate_real": 2.236604914607143e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 3076.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 3076.0,
      "epoch": 2.3424657534246576,
      "step": 171
    },
    {
      "epoch": 2.3424657534246576,
      "step": 171,
      "train/ce_loss": 2.859375,
      "train/diffusion_loss": 0.30464741587638855
    },
    {
      "epoch": 2.3424657534246576,
      "step": 171,
      "train/learning_rate_real": 2.236604914607143e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 3094.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 3094.0,
      "epoch": 2.356164383561644,
      "step": 172
    },
    {
      "epoch": 2.356164383561644,
      "step": 172,
      "train/ce_loss": 3.234375,
      "train/diffusion_loss": 0.31877073645591736
    },
    {
      "epoch": 2.356164383561644,
      "step": 172,
      "train/learning_rate_real": 2.2331894447533035e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2888.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2888.0,
      "epoch": 2.356164383561644,
      "step": 172
    },
    {
      "epoch": 2.356164383561644,
      "step": 172,
      "train/ce_loss": 2.875,
      "train/diffusion_loss": 0.35470494627952576
    },
    {
      "epoch": 2.356164383561644,
      "step": 172,
      "train/learning_rate_real": 2.2331894447533035e-05
    },
    {
      "debug/num_lat_loss": 1816.0,
      "debug/num_lat_total": 2699.0,
      "debug/num_tok_loss": 1816.0,
      "debug/num_tok_total": 2699.0,
      "epoch": 2.356164383561644,
      "step": 172
    },
    {
      "epoch": 2.356164383561644,
      "step": 172,
      "train/ce_loss": 2.859375,
      "train/diffusion_loss": 0.4036940038204193
    },
    {
      "epoch": 2.356164383561644,
      "step": 172,
      "train/learning_rate_real": 2.2331894447533035e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2632.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2632.0,
      "epoch": 2.356164383561644,
      "step": 172
    },
    {
      "epoch": 2.356164383561644,
      "step": 172,
      "train/ce_loss": 2.96875,
      "train/diffusion_loss": 0.4525442123413086
    },
    {
      "epoch": 2.356164383561644,
      "step": 172,
      "train/learning_rate_real": 2.2331894447533035e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2609.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2609.0,
      "epoch": 2.3698630136986303,
      "step": 173
    },
    {
      "epoch": 2.3698630136986303,
      "step": 173,
      "train/ce_loss": 2.890625,
      "train/diffusion_loss": 0.40328508615493774
    },
    {
      "epoch": 2.3698630136986303,
      "step": 173,
      "train/learning_rate_real": 2.2297546164876292e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 2.3698630136986303,
      "step": 173
    },
    {
      "epoch": 2.3698630136986303,
      "step": 173,
      "train/ce_loss": 2.890625,
      "train/diffusion_loss": 0.3900800347328186
    },
    {
      "epoch": 2.3698630136986303,
      "step": 173,
      "train/learning_rate_real": 2.2297546164876292e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2653.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2653.0,
      "epoch": 2.3698630136986303,
      "step": 173
    },
    {
      "epoch": 2.3698630136986303,
      "step": 173,
      "train/ce_loss": 2.9375,
      "train/diffusion_loss": 0.39137205481529236
    },
    {
      "epoch": 2.3698630136986303,
      "step": 173,
      "train/learning_rate_real": 2.2297546164876292e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2645.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2645.0,
      "epoch": 2.3698630136986303,
      "step": 173
    },
    {
      "epoch": 2.3698630136986303,
      "step": 173,
      "train/ce_loss": 2.953125,
      "train/diffusion_loss": 0.4043900966644287
    },
    {
      "epoch": 2.3698630136986303,
      "step": 173,
      "train/learning_rate_real": 2.2297546164876292e-05
    },
    {
      "debug/num_lat_loss": 1756.0,
      "debug/num_lat_total": 2389.0,
      "debug/num_tok_loss": 1756.0,
      "debug/num_tok_total": 2389.0,
      "epoch": 2.383561643835616,
      "step": 174
    },
    {
      "epoch": 2.383561643835616,
      "step": 174,
      "train/ce_loss": 2.921875,
      "train/diffusion_loss": 0.45027777552604675
    },
    {
      "epoch": 2.383561643835616,
      "step": 174,
      "train/learning_rate_real": 2.2263004974398352e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2453.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2453.0,
      "epoch": 2.383561643835616,
      "step": 174
    },
    {
      "epoch": 2.383561643835616,
      "step": 174,
      "train/ce_loss": 2.828125,
      "train/diffusion_loss": 0.45082005858421326
    },
    {
      "epoch": 2.383561643835616,
      "step": 174,
      "train/learning_rate_real": 2.2263004974398352e-05
    },
    {
      "debug/num_lat_loss": 1755.0,
      "debug/num_lat_total": 2808.0,
      "debug/num_tok_loss": 1755.0,
      "debug/num_tok_total": 2808.0,
      "epoch": 2.383561643835616,
      "step": 174
    },
    {
      "epoch": 2.383561643835616,
      "step": 174,
      "train/ce_loss": 2.875,
      "train/diffusion_loss": 0.36461949348449707
    },
    {
      "epoch": 2.383561643835616,
      "step": 174,
      "train/learning_rate_real": 2.2263004974398352e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 3082.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 3082.0,
      "epoch": 2.383561643835616,
      "step": 174
    },
    {
      "epoch": 2.383561643835616,
      "step": 174,
      "train/ce_loss": 2.84375,
      "train/diffusion_loss": 0.32322970032691956
    },
    {
      "epoch": 2.383561643835616,
      "step": 174,
      "train/learning_rate_real": 2.2263004974398352e-05
    },
    {
      "debug/num_lat_loss": 1579.0,
      "debug/num_lat_total": 2219.0,
      "debug/num_tok_loss": 1579.0,
      "debug/num_tok_total": 2219.0,
      "epoch": 2.3972602739726026,
      "step": 175
    },
    {
      "epoch": 2.3972602739726026,
      "step": 175,
      "train/ce_loss": 2.921875,
      "train/diffusion_loss": 0.44200700521469116
    },
    {
      "epoch": 2.3972602739726026,
      "step": 175,
      "train/learning_rate_real": 2.2228271556194575e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2431.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2431.0,
      "epoch": 2.3972602739726026,
      "step": 175
    },
    {
      "epoch": 2.3972602739726026,
      "step": 175,
      "train/ce_loss": 3.015625,
      "train/diffusion_loss": 0.4691259562969208
    },
    {
      "epoch": 2.3972602739726026,
      "step": 175,
      "train/learning_rate_real": 2.2228271556194575e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2876.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2876.0,
      "epoch": 2.3972602739726026,
      "step": 175
    },
    {
      "epoch": 2.3972602739726026,
      "step": 175,
      "train/ce_loss": 3.375,
      "train/diffusion_loss": 0.34737053513526917
    },
    {
      "epoch": 2.3972602739726026,
      "step": 175,
      "train/learning_rate_real": 2.2228271556194575e-05
    },
    {
      "debug/num_lat_loss": 1725.0,
      "debug/num_lat_total": 2589.0,
      "debug/num_tok_loss": 1725.0,
      "debug/num_tok_total": 2589.0,
      "epoch": 2.3972602739726026,
      "step": 175
    },
    {
      "epoch": 2.3972602739726026,
      "step": 175,
      "train/ce_loss": 2.84375,
      "train/diffusion_loss": 0.43164265155792236
    },
    {
      "epoch": 2.3972602739726026,
      "step": 175,
      "train/learning_rate_real": 2.2228271556194575e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2614.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2614.0,
      "epoch": 2.410958904109589,
      "step": 176
    },
    {
      "epoch": 2.410958904109589,
      "step": 176,
      "train/ce_loss": 3.0,
      "train/diffusion_loss": 0.429606169462204
    },
    {
      "epoch": 2.410958904109589,
      "step": 176,
      "train/learning_rate_real": 2.2193346594145188e-05
    },
    {
      "debug/num_lat_loss": 1797.0,
      "debug/num_lat_total": 2658.0,
      "debug/num_tok_loss": 1797.0,
      "debug/num_tok_total": 2658.0,
      "epoch": 2.410958904109589,
      "step": 176
    },
    {
      "epoch": 2.410958904109589,
      "step": 176,
      "train/ce_loss": 2.84375,
      "train/diffusion_loss": 0.3894621431827545
    },
    {
      "epoch": 2.410958904109589,
      "step": 176,
      "train/learning_rate_real": 2.2193346594145188e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2654.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2654.0,
      "epoch": 2.410958904109589,
      "step": 176
    },
    {
      "epoch": 2.410958904109589,
      "step": 176,
      "train/ce_loss": 2.859375,
      "train/diffusion_loss": 0.4027283191680908
    },
    {
      "epoch": 2.410958904109589,
      "step": 176,
      "train/learning_rate_real": 2.2193346594145188e-05
    },
    {
      "debug/num_lat_loss": 1808.0,
      "debug/num_lat_total": 2690.0,
      "debug/num_tok_loss": 1808.0,
      "debug/num_tok_total": 2690.0,
      "epoch": 2.410958904109589,
      "step": 176
    },
    {
      "epoch": 2.410958904109589,
      "step": 176,
      "train/ce_loss": 2.6875,
      "train/diffusion_loss": 0.38133347034454346
    },
    {
      "epoch": 2.410958904109589,
      "step": 176,
      "train/learning_rate_real": 2.2193346594145188e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2837.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2837.0,
      "epoch": 2.4246575342465753,
      "step": 177
    },
    {
      "epoch": 2.4246575342465753,
      "step": 177,
      "train/ce_loss": 2.859375,
      "train/diffusion_loss": 0.3693162500858307
    },
    {
      "epoch": 2.4246575342465753,
      "step": 177,
      "train/learning_rate_real": 2.215823077590179e-05
    },
    {
      "debug/num_lat_loss": 1619.0,
      "debug/num_lat_total": 2252.0,
      "debug/num_tok_loss": 1619.0,
      "debug/num_tok_total": 2252.0,
      "epoch": 2.4246575342465753,
      "step": 177
    },
    {
      "epoch": 2.4246575342465753,
      "step": 177,
      "train/ce_loss": 2.796875,
      "train/diffusion_loss": 0.4304879307746887
    },
    {
      "epoch": 2.4246575342465753,
      "step": 177,
      "train/learning_rate_real": 2.215823077590179e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2425.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2425.0,
      "epoch": 2.4246575342465753,
      "step": 177
    },
    {
      "epoch": 2.4246575342465753,
      "step": 177,
      "train/ce_loss": 2.890625,
      "train/diffusion_loss": 0.4411391615867615
    },
    {
      "epoch": 2.4246575342465753,
      "step": 177,
      "train/learning_rate_real": 2.215823077590179e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2431.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2431.0,
      "epoch": 2.4246575342465753,
      "step": 177
    },
    {
      "epoch": 2.4246575342465753,
      "step": 177,
      "train/ce_loss": 3.15625,
      "train/diffusion_loss": 0.47264569997787476
    },
    {
      "epoch": 2.4246575342465753,
      "step": 177,
      "train/learning_rate_real": 2.215823077590179e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2395.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2395.0,
      "epoch": 2.4383561643835616,
      "step": 178
    },
    {
      "epoch": 2.4383561643835616,
      "step": 178,
      "train/ce_loss": 2.890625,
      "train/diffusion_loss": 0.453601598739624
    },
    {
      "epoch": 2.4383561643835616,
      "step": 178,
      "train/learning_rate_real": 2.2122924792873827e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 3096.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 3096.0,
      "epoch": 2.4383561643835616,
      "step": 178
    },
    {
      "epoch": 2.4383561643835616,
      "step": 178,
      "train/ce_loss": 2.9375,
      "train/diffusion_loss": 0.31741419434547424
    },
    {
      "epoch": 2.4383561643835616,
      "step": 178,
      "train/learning_rate_real": 2.2122924792873827e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 3100.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 3100.0,
      "epoch": 2.4383561643835616,
      "step": 178
    },
    {
      "epoch": 2.4383561643835616,
      "step": 178,
      "train/ce_loss": 2.78125,
      "train/diffusion_loss": 0.2818582057952881
    },
    {
      "epoch": 2.4383561643835616,
      "step": 178,
      "train/learning_rate_real": 2.2122924792873827e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2653.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2653.0,
      "epoch": 2.4383561643835616,
      "step": 178
    },
    {
      "epoch": 2.4383561643835616,
      "step": 178,
      "train/ce_loss": 2.8125,
      "train/diffusion_loss": 0.4282552897930145
    },
    {
      "epoch": 2.4383561643835616,
      "step": 178,
      "train/learning_rate_real": 2.2122924792873827e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2639.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2639.0,
      "epoch": 2.452054794520548,
      "step": 179
    },
    {
      "epoch": 2.452054794520548,
      "step": 179,
      "train/ce_loss": 2.90625,
      "train/diffusion_loss": 0.40325939655303955
    },
    {
      "epoch": 2.452054794520548,
      "step": 179,
      "train/learning_rate_real": 2.208742934021499e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2614.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2614.0,
      "epoch": 2.452054794520548,
      "step": 179
    },
    {
      "epoch": 2.452054794520548,
      "step": 179,
      "train/ce_loss": 2.84375,
      "train/diffusion_loss": 0.4070988595485687
    },
    {
      "epoch": 2.452054794520548,
      "step": 179,
      "train/learning_rate_real": 2.208742934021499e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 2.452054794520548,
      "step": 179
    },
    {
      "epoch": 2.452054794520548,
      "step": 179,
      "train/ce_loss": 2.671875,
      "train/diffusion_loss": 0.40981626510620117
    },
    {
      "epoch": 2.452054794520548,
      "step": 179,
      "train/learning_rate_real": 2.208742934021499e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2441.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2441.0,
      "epoch": 2.452054794520548,
      "step": 179
    },
    {
      "epoch": 2.452054794520548,
      "step": 179,
      "train/ce_loss": 2.9375,
      "train/diffusion_loss": 0.43094131350517273
    },
    {
      "epoch": 2.452054794520548,
      "step": 179,
      "train/learning_rate_real": 2.208742934021499e-05
    },
    {
      "epoch": 2.4657534246575343,
      "grad_norm": 1.5310542583465576,
      "learning_rate": 2.208742934021499e-05,
      "loss": 2.7061,
      "step": 180
    },
    {
      "debug/num_lat_loss": 1620.0,
      "debug/num_lat_total": 2117.0,
      "debug/num_tok_loss": 1620.0,
      "debug/num_tok_total": 2117.0,
      "epoch": 2.4657534246575343,
      "step": 180
    },
    {
      "epoch": 2.4657534246575343,
      "step": 180,
      "train/ce_loss": 2.921875,
      "train/diffusion_loss": 0.4778527617454529
    },
    {
      "epoch": 2.4657534246575343,
      "step": 180,
      "train/learning_rate_real": 2.205174511680949e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 3109.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 3109.0,
      "epoch": 2.4657534246575343,
      "step": 180
    },
    {
      "epoch": 2.4657534246575343,
      "step": 180,
      "train/ce_loss": 2.734375,
      "train/diffusion_loss": 0.3286422789096832
    },
    {
      "epoch": 2.4657534246575343,
      "step": 180,
      "train/learning_rate_real": 2.205174511680949e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2616.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2616.0,
      "epoch": 2.4657534246575343,
      "step": 180
    },
    {
      "epoch": 2.4657534246575343,
      "step": 180,
      "train/ce_loss": 2.765625,
      "train/diffusion_loss": 0.4315715730190277
    },
    {
      "epoch": 2.4657534246575343,
      "step": 180,
      "train/learning_rate_real": 2.205174511680949e-05
    },
    {
      "debug/num_lat_loss": 1606.0,
      "debug/num_lat_total": 2088.0,
      "debug/num_tok_loss": 1606.0,
      "debug/num_tok_total": 2088.0,
      "epoch": 2.4657534246575343,
      "step": 180
    },
    {
      "epoch": 2.4657534246575343,
      "step": 180,
      "train/ce_loss": 2.75,
      "train/diffusion_loss": 0.47049593925476074
    },
    {
      "epoch": 2.4657534246575343,
      "step": 180,
      "train/learning_rate_real": 2.205174511680949e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2638.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2638.0,
      "epoch": 2.4794520547945207,
      "step": 181
    },
    {
      "epoch": 2.4794520547945207,
      "step": 181,
      "train/ce_loss": 2.84375,
      "train/diffusion_loss": 0.38567018508911133
    },
    {
      "epoch": 2.4794520547945207,
      "step": 181,
      "train/learning_rate_real": 2.2015872825258333e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2402.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2402.0,
      "epoch": 2.4794520547945207,
      "step": 181
    },
    {
      "epoch": 2.4794520547945207,
      "step": 181,
      "train/ce_loss": 2.75,
      "train/diffusion_loss": 0.44906651973724365
    },
    {
      "epoch": 2.4794520547945207,
      "step": 181,
      "train/learning_rate_real": 2.2015872825258333e-05
    },
    {
      "debug/num_lat_loss": 1809.0,
      "debug/num_lat_total": 3334.0,
      "debug/num_tok_loss": 1809.0,
      "debug/num_tok_total": 3334.0,
      "epoch": 2.4794520547945207,
      "step": 181
    },
    {
      "epoch": 2.4794520547945207,
      "step": 181,
      "train/ce_loss": 2.890625,
      "train/diffusion_loss": 0.29875418543815613
    },
    {
      "epoch": 2.4794520547945207,
      "step": 181,
      "train/learning_rate_real": 2.2015872825258333e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2429.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2429.0,
      "epoch": 2.4794520547945207,
      "step": 181
    },
    {
      "epoch": 2.4794520547945207,
      "step": 181,
      "train/ce_loss": 2.75,
      "train/diffusion_loss": 0.441359281539917
    },
    {
      "epoch": 2.4794520547945207,
      "step": 181,
      "train/learning_rate_real": 2.2015872825258333e-05
    },
    {
      "debug/num_lat_loss": 1759.0,
      "debug/num_lat_total": 2816.0,
      "debug/num_tok_loss": 1759.0,
      "debug/num_tok_total": 2816.0,
      "epoch": 2.493150684931507,
      "step": 182
    },
    {
      "epoch": 2.493150684931507,
      "step": 182,
      "train/ce_loss": 2.75,
      "train/diffusion_loss": 0.3667384386062622
    },
    {
      "epoch": 2.493150684931507,
      "step": 182,
      "train/learning_rate_real": 2.1979813171865483e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2450.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2450.0,
      "epoch": 2.493150684931507,
      "step": 182
    },
    {
      "epoch": 2.493150684931507,
      "step": 182,
      "train/ce_loss": 3.0,
      "train/diffusion_loss": 0.4418801963329315
    },
    {
      "epoch": 2.493150684931507,
      "step": 182,
      "train/learning_rate_real": 2.1979813171865483e-05
    },
    {
      "debug/num_lat_loss": 1727.0,
      "debug/num_lat_total": 2594.0,
      "debug/num_tok_loss": 1727.0,
      "debug/num_tok_total": 2594.0,
      "epoch": 2.493150684931507,
      "step": 182
    },
    {
      "epoch": 2.493150684931507,
      "step": 182,
      "train/ce_loss": 2.8125,
      "train/diffusion_loss": 0.444356769323349
    },
    {
      "epoch": 2.493150684931507,
      "step": 182,
      "train/learning_rate_real": 2.1979813171865483e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2457.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2457.0,
      "epoch": 2.493150684931507,
      "step": 182
    },
    {
      "epoch": 2.493150684931507,
      "step": 182,
      "train/ce_loss": 3.015625,
      "train/diffusion_loss": 0.4595908522605896
    },
    {
      "epoch": 2.493150684931507,
      "step": 182,
      "train/learning_rate_real": 2.1979813171865483e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2448.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2448.0,
      "epoch": 2.506849315068493,
      "step": 183
    },
    {
      "epoch": 2.506849315068493,
      "step": 183,
      "train/ce_loss": 2.78125,
      "train/diffusion_loss": 0.43447527289390564
    },
    {
      "epoch": 2.506849315068493,
      "step": 183,
      "train/learning_rate_real": 2.1943566866623926e-05
    },
    {
      "debug/num_lat_loss": 1668.0,
      "debug/num_lat_total": 2625.0,
      "debug/num_tok_loss": 1668.0,
      "debug/num_tok_total": 2625.0,
      "epoch": 2.506849315068493,
      "step": 183
    },
    {
      "epoch": 2.506849315068493,
      "step": 183,
      "train/ce_loss": 2.703125,
      "train/diffusion_loss": 0.3590381145477295
    },
    {
      "epoch": 2.506849315068493,
      "step": 183,
      "train/learning_rate_real": 2.1943566866623926e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 1989.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 1989.0,
      "epoch": 2.506849315068493,
      "step": 183
    },
    {
      "epoch": 2.506849315068493,
      "step": 183,
      "train/ce_loss": 2.703125,
      "train/diffusion_loss": 0.5137954354286194
    },
    {
      "epoch": 2.506849315068493,
      "step": 183,
      "train/learning_rate_real": 2.1943566866623926e-05
    },
    {
      "debug/num_lat_loss": 1760.0,
      "debug/num_lat_total": 2186.0,
      "debug/num_tok_loss": 1760.0,
      "debug/num_tok_total": 2186.0,
      "epoch": 2.506849315068493,
      "step": 183
    },
    {
      "epoch": 2.506849315068493,
      "step": 183,
      "train/ce_loss": 2.765625,
      "train/diffusion_loss": 0.4624958336353302
    },
    {
      "epoch": 2.506849315068493,
      "step": 183,
      "train/learning_rate_real": 2.1943566866623926e-05
    },
    {
      "debug/num_lat_loss": 1720.0,
      "debug/num_lat_total": 2318.0,
      "debug/num_tok_loss": 1720.0,
      "debug/num_tok_total": 2318.0,
      "epoch": 2.5205479452054793,
      "step": 184
    },
    {
      "epoch": 2.5205479452054793,
      "step": 184,
      "train/ce_loss": 2.78125,
      "train/diffusion_loss": 0.4255053699016571
    },
    {
      "epoch": 2.5205479452054793,
      "step": 184,
      "train/learning_rate_real": 2.190713462320173e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2850.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2850.0,
      "epoch": 2.5205479452054793,
      "step": 184
    },
    {
      "epoch": 2.5205479452054793,
      "step": 184,
      "train/ce_loss": 2.953125,
      "train/diffusion_loss": 0.34381103515625
    },
    {
      "epoch": 2.5205479452054793,
      "step": 184,
      "train/learning_rate_real": 2.190713462320173e-05
    },
    {
      "debug/num_lat_loss": 1647.0,
      "debug/num_lat_total": 2360.0,
      "debug/num_tok_loss": 1647.0,
      "debug/num_tok_total": 2360.0,
      "epoch": 2.5205479452054793,
      "step": 184
    },
    {
      "epoch": 2.5205479452054793,
      "step": 184,
      "train/ce_loss": 2.703125,
      "train/diffusion_loss": 0.4182201325893402
    },
    {
      "epoch": 2.5205479452054793,
      "step": 184,
      "train/learning_rate_real": 2.190713462320173e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2648.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2648.0,
      "epoch": 2.5205479452054793,
      "step": 184
    },
    {
      "epoch": 2.5205479452054793,
      "step": 184,
      "train/ce_loss": 2.71875,
      "train/diffusion_loss": 0.39211803674697876
    },
    {
      "epoch": 2.5205479452054793,
      "step": 184,
      "train/learning_rate_real": 2.190713462320173e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2405.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2405.0,
      "epoch": 2.5342465753424657,
      "step": 185
    },
    {
      "epoch": 2.5342465753424657,
      "step": 185,
      "train/ce_loss": 2.765625,
      "train/diffusion_loss": 0.4424284100532532
    },
    {
      "epoch": 2.5342465753424657,
      "step": 185,
      "train/learning_rate_real": 2.187051715892797e-05
    },
    {
      "debug/num_lat_loss": 1797.0,
      "debug/num_lat_total": 2670.0,
      "debug/num_tok_loss": 1797.0,
      "debug/num_tok_total": 2670.0,
      "epoch": 2.5342465753424657,
      "step": 185
    },
    {
      "epoch": 2.5342465753424657,
      "step": 185,
      "train/ce_loss": 2.640625,
      "train/diffusion_loss": 0.3992789387702942
    },
    {
      "epoch": 2.5342465753424657,
      "step": 185,
      "train/learning_rate_real": 2.187051715892797e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2434.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2434.0,
      "epoch": 2.5342465753424657,
      "step": 185
    },
    {
      "epoch": 2.5342465753424657,
      "step": 185,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.43806523084640503
    },
    {
      "epoch": 2.5342465753424657,
      "step": 185,
      "train/learning_rate_real": 2.187051715892797e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2858.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2858.0,
      "epoch": 2.5342465753424657,
      "step": 185
    },
    {
      "epoch": 2.5342465753424657,
      "step": 185,
      "train/ce_loss": 2.671875,
      "train/diffusion_loss": 0.3659980595111847
    },
    {
      "epoch": 2.5342465753424657,
      "step": 185,
      "train/learning_rate_real": 2.187051715892797e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2647.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2647.0,
      "epoch": 2.547945205479452,
      "step": 186
    },
    {
      "epoch": 2.547945205479452,
      "step": 186,
      "train/ce_loss": 2.671875,
      "train/diffusion_loss": 0.3946780860424042
    },
    {
      "epoch": 2.547945205479452,
      "step": 186,
      "train/learning_rate_real": 2.18337151947786e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2853.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2853.0,
      "epoch": 2.547945205479452,
      "step": 186
    },
    {
      "epoch": 2.547945205479452,
      "step": 186,
      "train/ce_loss": 2.703125,
      "train/diffusion_loss": 0.3429841101169586
    },
    {
      "epoch": 2.547945205479452,
      "step": 186,
      "train/learning_rate_real": 2.18337151947786e-05
    },
    {
      "debug/num_lat_loss": 1579.0,
      "debug/num_lat_total": 2101.0,
      "debug/num_tok_loss": 1579.0,
      "debug/num_tok_total": 2101.0,
      "epoch": 2.547945205479452,
      "step": 186
    },
    {
      "epoch": 2.547945205479452,
      "step": 186,
      "train/ce_loss": 2.671875,
      "train/diffusion_loss": 0.4572349488735199
    },
    {
      "epoch": 2.547945205479452,
      "step": 186,
      "train/learning_rate_real": 2.18337151947786e-05
    },
    {
      "debug/num_lat_loss": 1750.0,
      "debug/num_lat_total": 2377.0,
      "debug/num_tok_loss": 1750.0,
      "debug/num_tok_total": 2377.0,
      "epoch": 2.547945205479452,
      "step": 186
    },
    {
      "epoch": 2.547945205479452,
      "step": 186,
      "train/ce_loss": 2.71875,
      "train/diffusion_loss": 0.4656928777694702
    },
    {
      "epoch": 2.547945205479452,
      "step": 186,
      "train/learning_rate_real": 2.18337151947786e-05
    },
    {
      "debug/num_lat_loss": 1812.0,
      "debug/num_lat_total": 2465.0,
      "debug/num_tok_loss": 1812.0,
      "debug/num_tok_total": 2465.0,
      "epoch": 2.5616438356164384,
      "step": 187
    },
    {
      "epoch": 2.5616438356164384,
      "step": 187,
      "train/ce_loss": 2.71875,
      "train/diffusion_loss": 0.46677687764167786
    },
    {
      "epoch": 2.5616438356164384,
      "step": 187,
      "train/learning_rate_real": 2.179672945536227e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 3053.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 3053.0,
      "epoch": 2.5616438356164384,
      "step": 187
    },
    {
      "epoch": 2.5616438356164384,
      "step": 187,
      "train/ce_loss": 2.71875,
      "train/diffusion_loss": 0.32807257771492004
    },
    {
      "epoch": 2.5616438356164384,
      "step": 187,
      "train/learning_rate_real": 2.179672945536227e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2667.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2667.0,
      "epoch": 2.5616438356164384,
      "step": 187
    },
    {
      "epoch": 2.5616438356164384,
      "step": 187,
      "train/ce_loss": 2.734375,
      "train/diffusion_loss": 0.3864240050315857
    },
    {
      "epoch": 2.5616438356164384,
      "step": 187,
      "train/learning_rate_real": 2.179672945536227e-05
    },
    {
      "debug/num_lat_loss": 1818.0,
      "debug/num_lat_total": 2261.0,
      "debug/num_tok_loss": 1818.0,
      "debug/num_tok_total": 2261.0,
      "epoch": 2.5616438356164384,
      "step": 187
    },
    {
      "epoch": 2.5616438356164384,
      "step": 187,
      "train/ce_loss": 2.65625,
      "train/diffusion_loss": 0.5067983865737915
    },
    {
      "epoch": 2.5616438356164384,
      "step": 187,
      "train/learning_rate_real": 2.179672945536227e-05
    },
    {
      "debug/num_lat_loss": 1743.0,
      "debug/num_lat_total": 2997.0,
      "debug/num_tok_loss": 1743.0,
      "debug/num_tok_total": 2997.0,
      "epoch": 2.5753424657534247,
      "step": 188
    },
    {
      "epoch": 2.5753424657534247,
      "step": 188,
      "train/ce_loss": 2.671875,
      "train/diffusion_loss": 0.3241000175476074
    },
    {
      "epoch": 2.5753424657534247,
      "step": 188,
      "train/learning_rate_real": 2.1759560668906064e-05
    },
    {
      "debug/num_lat_loss": 1668.0,
      "debug/num_lat_total": 2853.0,
      "debug/num_tok_loss": 1668.0,
      "debug/num_tok_total": 2853.0,
      "epoch": 2.5753424657534247,
      "step": 188
    },
    {
      "epoch": 2.5753424657534247,
      "step": 188,
      "train/ce_loss": 2.625,
      "train/diffusion_loss": 0.28809213638305664
    },
    {
      "epoch": 2.5753424657534247,
      "step": 188,
      "train/learning_rate_real": 2.1759560668906064e-05
    },
    {
      "debug/num_lat_loss": 1701.0,
      "debug/num_lat_total": 2237.0,
      "debug/num_tok_loss": 1701.0,
      "debug/num_tok_total": 2237.0,
      "epoch": 2.5753424657534247,
      "step": 188
    },
    {
      "epoch": 2.5753424657534247,
      "step": 188,
      "train/ce_loss": 2.59375,
      "train/diffusion_loss": 0.45437008142471313
    },
    {
      "epoch": 2.5753424657534247,
      "step": 188,
      "train/learning_rate_real": 2.1759560668906064e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2898.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2898.0,
      "epoch": 2.5753424657534247,
      "step": 188
    },
    {
      "epoch": 2.5753424657534247,
      "step": 188,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.3584519326686859
    },
    {
      "epoch": 2.5753424657534247,
      "step": 188,
      "train/learning_rate_real": 2.1759560668906064e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 2.589041095890411,
      "step": 189
    },
    {
      "epoch": 2.589041095890411,
      "step": 189,
      "train/ce_loss": 2.484375,
      "train/diffusion_loss": 0.4072858393192291
    },
    {
      "epoch": 2.589041095890411,
      "step": 189,
      "train/learning_rate_real": 2.172220956724114e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2878.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2878.0,
      "epoch": 2.589041095890411,
      "step": 189
    },
    {
      "epoch": 2.589041095890411,
      "step": 189,
      "train/ce_loss": 2.703125,
      "train/diffusion_loss": 0.3507998287677765
    },
    {
      "epoch": 2.589041095890411,
      "step": 189,
      "train/learning_rate_real": 2.172220956724114e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2216.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2216.0,
      "epoch": 2.589041095890411,
      "step": 189
    },
    {
      "epoch": 2.589041095890411,
      "step": 189,
      "train/ce_loss": 2.59375,
      "train/diffusion_loss": 0.4703916013240814
    },
    {
      "epoch": 2.589041095890411,
      "step": 189,
      "train/learning_rate_real": 2.172220956724114e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2653.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2653.0,
      "epoch": 2.589041095890411,
      "step": 189
    },
    {
      "epoch": 2.589041095890411,
      "step": 189,
      "train/ce_loss": 2.65625,
      "train/diffusion_loss": 0.40979763865470886
    },
    {
      "epoch": 2.589041095890411,
      "step": 189,
      "train/learning_rate_real": 2.172220956724114e-05
    },
    {
      "epoch": 2.602739726027397,
      "grad_norm": 1.401570200920105,
      "learning_rate": 2.172220956724114e-05,
      "loss": 2.733,
      "step": 190
    },
    {
      "debug/num_lat_loss": 1683.0,
      "debug/num_lat_total": 2553.0,
      "debug/num_tok_loss": 1683.0,
      "debug/num_tok_total": 2553.0,
      "epoch": 2.602739726027397,
      "step": 190
    },
    {
      "epoch": 2.602739726027397,
      "step": 190,
      "train/ce_loss": 2.578125,
      "train/diffusion_loss": 0.41486406326293945
    },
    {
      "epoch": 2.602739726027397,
      "step": 190,
      "train/learning_rate_real": 2.168467688578834e-05
    },
    {
      "debug/num_lat_loss": 1746.0,
      "debug/num_lat_total": 2580.0,
      "debug/num_tok_loss": 1746.0,
      "debug/num_tok_total": 2580.0,
      "epoch": 2.602739726027397,
      "step": 190
    },
    {
      "epoch": 2.602739726027397,
      "step": 190,
      "train/ce_loss": 2.75,
      "train/diffusion_loss": 0.40670496225357056
    },
    {
      "epoch": 2.602739726027397,
      "step": 190,
      "train/learning_rate_real": 2.168467688578834e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2869.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2869.0,
      "epoch": 2.602739726027397,
      "step": 190
    },
    {
      "epoch": 2.602739726027397,
      "step": 190,
      "train/ce_loss": 2.75,
      "train/diffusion_loss": 0.33270514011383057
    },
    {
      "epoch": 2.602739726027397,
      "step": 190,
      "train/learning_rate_real": 2.168467688578834e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2411.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2411.0,
      "epoch": 2.602739726027397,
      "step": 190
    },
    {
      "epoch": 2.602739726027397,
      "step": 190,
      "train/ce_loss": 2.59375,
      "train/diffusion_loss": 0.45577895641326904
    },
    {
      "epoch": 2.602739726027397,
      "step": 190,
      "train/learning_rate_real": 2.168467688578834e-05
    },
    {
      "debug/num_lat_loss": 1811.0,
      "debug/num_lat_total": 2683.0,
      "debug/num_tok_loss": 1811.0,
      "debug/num_tok_total": 2683.0,
      "epoch": 2.616438356164384,
      "step": 191
    },
    {
      "epoch": 2.616438356164384,
      "step": 191,
      "train/ce_loss": 2.75,
      "train/diffusion_loss": 0.37813183665275574
    },
    {
      "epoch": 2.616438356164384,
      "step": 191,
      "train/learning_rate_real": 2.164696336354371e-05
    },
    {
      "debug/num_lat_loss": 1640.0,
      "debug/num_lat_total": 1947.0,
      "debug/num_tok_loss": 1640.0,
      "debug/num_tok_total": 1947.0,
      "epoch": 2.616438356164384,
      "step": 191
    },
    {
      "epoch": 2.616438356164384,
      "step": 191,
      "train/ce_loss": 2.578125,
      "train/diffusion_loss": 0.49075964093208313
    },
    {
      "epoch": 2.616438356164384,
      "step": 191,
      "train/learning_rate_real": 2.164696336354371e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2454.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2454.0,
      "epoch": 2.616438356164384,
      "step": 191
    },
    {
      "epoch": 2.616438356164384,
      "step": 191,
      "train/ce_loss": 2.625,
      "train/diffusion_loss": 0.4536711573600769
    },
    {
      "epoch": 2.616438356164384,
      "step": 191,
      "train/learning_rate_real": 2.164696336354371e-05
    },
    {
      "debug/num_lat_loss": 1808.0,
      "debug/num_lat_total": 2471.0,
      "debug/num_tok_loss": 1808.0,
      "debug/num_tok_total": 2471.0,
      "epoch": 2.616438356164384,
      "step": 191
    },
    {
      "epoch": 2.616438356164384,
      "step": 191,
      "train/ce_loss": 2.734375,
      "train/diffusion_loss": 0.4252324104309082
    },
    {
      "epoch": 2.616438356164384,
      "step": 191,
      "train/learning_rate_real": 2.164696336354371e-05
    },
    {
      "debug/num_lat_loss": 1753.0,
      "debug/num_lat_total": 2596.0,
      "debug/num_tok_loss": 1753.0,
      "debug/num_tok_total": 2596.0,
      "epoch": 2.6301369863013697,
      "step": 192
    },
    {
      "epoch": 2.6301369863013697,
      "step": 192,
      "train/ce_loss": 2.5,
      "train/diffusion_loss": 0.4330429434776306
    },
    {
      "epoch": 2.6301369863013697,
      "step": 192,
      "train/learning_rate_real": 2.1609069743063927e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2177.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2177.0,
      "epoch": 2.6301369863013697,
      "step": 192
    },
    {
      "epoch": 2.6301369863013697,
      "step": 192,
      "train/ce_loss": 2.640625,
      "train/diffusion_loss": 0.4865707755088806
    },
    {
      "epoch": 2.6301369863013697,
      "step": 192,
      "train/learning_rate_real": 2.1609069743063927e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2853.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2853.0,
      "epoch": 2.6301369863013697,
      "step": 192
    },
    {
      "epoch": 2.6301369863013697,
      "step": 192,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.3472568392753601
    },
    {
      "epoch": 2.6301369863013697,
      "step": 192,
      "train/learning_rate_real": 2.1609069743063927e-05
    },
    {
      "debug/num_lat_loss": 1761.0,
      "debug/num_lat_total": 2822.0,
      "debug/num_tok_loss": 1761.0,
      "debug/num_tok_total": 2822.0,
      "epoch": 2.6301369863013697,
      "step": 192
    },
    {
      "epoch": 2.6301369863013697,
      "step": 192,
      "train/ce_loss": 2.5,
      "train/diffusion_loss": 0.3912636339664459
    },
    {
      "epoch": 2.6301369863013697,
      "step": 192,
      "train/learning_rate_real": 2.1609069743063927e-05
    },
    {
      "debug/num_lat_loss": 1662.0,
      "debug/num_lat_total": 2314.0,
      "debug/num_tok_loss": 1662.0,
      "debug/num_tok_total": 2314.0,
      "epoch": 2.643835616438356,
      "step": 193
    },
    {
      "epoch": 2.643835616438356,
      "step": 193,
      "train/ce_loss": 2.765625,
      "train/diffusion_loss": 0.4403693377971649
    },
    {
      "epoch": 2.643835616438356,
      "step": 193,
      "train/learning_rate_real": 2.1570996770451702e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 1999.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 1999.0,
      "epoch": 2.643835616438356,
      "step": 193
    },
    {
      "epoch": 2.643835616438356,
      "step": 193,
      "train/ce_loss": 2.609375,
      "train/diffusion_loss": 0.517785370349884
    },
    {
      "epoch": 2.643835616438356,
      "step": 193,
      "train/learning_rate_real": 2.1570996770451702e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2614.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2614.0,
      "epoch": 2.643835616438356,
      "step": 193
    },
    {
      "epoch": 2.643835616438356,
      "step": 193,
      "train/ce_loss": 2.75,
      "train/diffusion_loss": 0.3927980065345764
    },
    {
      "epoch": 2.643835616438356,
      "step": 193,
      "train/learning_rate_real": 2.1570996770451702e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2649.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2649.0,
      "epoch": 2.643835616438356,
      "step": 193
    },
    {
      "epoch": 2.643835616438356,
      "step": 193,
      "train/ce_loss": 2.546875,
      "train/diffusion_loss": 0.41709792613983154
    },
    {
      "epoch": 2.643835616438356,
      "step": 193,
      "train/learning_rate_real": 2.1570996770451702e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2651.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2651.0,
      "epoch": 2.6575342465753424,
      "step": 194
    },
    {
      "epoch": 2.6575342465753424,
      "step": 194,
      "train/ce_loss": 2.65625,
      "train/diffusion_loss": 0.41924428939819336
    },
    {
      "epoch": 2.6575342465753424,
      "step": 194,
      "train/learning_rate_real": 2.1532745195341093e-05
    },
    {
      "debug/num_lat_loss": 1658.0,
      "debug/num_lat_total": 2288.0,
      "debug/num_tok_loss": 1658.0,
      "debug/num_tok_total": 2288.0,
      "epoch": 2.6575342465753424,
      "step": 194
    },
    {
      "epoch": 2.6575342465753424,
      "step": 194,
      "train/ce_loss": 2.640625,
      "train/diffusion_loss": 0.44974976778030396
    },
    {
      "epoch": 2.6575342465753424,
      "step": 194,
      "train/learning_rate_real": 2.1532745195341093e-05
    },
    {
      "debug/num_lat_loss": 1724.0,
      "debug/num_lat_total": 2803.0,
      "debug/num_tok_loss": 1724.0,
      "debug/num_tok_total": 2803.0,
      "epoch": 2.6575342465753424,
      "step": 194
    },
    {
      "epoch": 2.6575342465753424,
      "step": 194,
      "train/ce_loss": 2.796875,
      "train/diffusion_loss": 0.3286336064338684
    },
    {
      "epoch": 2.6575342465753424,
      "step": 194,
      "train/learning_rate_real": 2.1532745195341093e-05
    },
    {
      "debug/num_lat_loss": 1810.0,
      "debug/num_lat_total": 2682.0,
      "debug/num_tok_loss": 1810.0,
      "debug/num_tok_total": 2682.0,
      "epoch": 2.6575342465753424,
      "step": 194
    },
    {
      "epoch": 2.6575342465753424,
      "step": 194,
      "train/ce_loss": 2.765625,
      "train/diffusion_loss": 0.4254552721977234
    },
    {
      "epoch": 2.6575342465753424,
      "step": 194,
      "train/learning_rate_real": 2.1532745195341093e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2440.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2440.0,
      "epoch": 2.671232876712329,
      "step": 195
    },
    {
      "epoch": 2.671232876712329,
      "step": 195,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.4401073455810547
    },
    {
      "epoch": 2.671232876712329,
      "step": 195,
      "train/learning_rate_real": 2.1494315770882713e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2192.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2192.0,
      "epoch": 2.671232876712329,
      "step": 195
    },
    {
      "epoch": 2.671232876712329,
      "step": 195,
      "train/ce_loss": 2.78125,
      "train/diffusion_loss": 0.5180473923683167
    },
    {
      "epoch": 2.671232876712329,
      "step": 195,
      "train/learning_rate_real": 2.1494315770882713e-05
    },
    {
      "debug/num_lat_loss": 1644.0,
      "debug/num_lat_total": 2282.0,
      "debug/num_tok_loss": 1644.0,
      "debug/num_tok_total": 2282.0,
      "epoch": 2.671232876712329,
      "step": 195
    },
    {
      "epoch": 2.671232876712329,
      "step": 195,
      "train/ce_loss": 2.609375,
      "train/diffusion_loss": 0.4051307737827301
    },
    {
      "epoch": 2.671232876712329,
      "step": 195,
      "train/learning_rate_real": 2.1494315770882713e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 3111.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 3111.0,
      "epoch": 2.671232876712329,
      "step": 195
    },
    {
      "epoch": 2.671232876712329,
      "step": 195,
      "train/ce_loss": 2.703125,
      "train/diffusion_loss": 0.3186243176460266
    },
    {
      "epoch": 2.671232876712329,
      "step": 195,
      "train/learning_rate_real": 2.1494315770882713e-05
    },
    {
      "debug/num_lat_loss": 1417.0,
      "debug/num_lat_total": 2097.0,
      "debug/num_tok_loss": 1417.0,
      "debug/num_tok_total": 2097.0,
      "epoch": 2.684931506849315,
      "step": 196
    },
    {
      "epoch": 2.684931506849315,
      "step": 196,
      "train/ce_loss": 2.65625,
      "train/diffusion_loss": 0.4389093816280365
    },
    {
      "epoch": 2.684931506849315,
      "step": 196,
      "train/learning_rate_real": 2.145570925372894e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2651.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2651.0,
      "epoch": 2.684931506849315,
      "step": 196
    },
    {
      "epoch": 2.684931506849315,
      "step": 196,
      "train/ce_loss": 2.609375,
      "train/diffusion_loss": 0.3858262300491333
    },
    {
      "epoch": 2.684931506849315,
      "step": 196,
      "train/learning_rate_real": 2.145570925372894e-05
    },
    {
      "debug/num_lat_loss": 1810.0,
      "debug/num_lat_total": 2682.0,
      "debug/num_tok_loss": 1810.0,
      "debug/num_tok_total": 2682.0,
      "epoch": 2.684931506849315,
      "step": 196
    },
    {
      "epoch": 2.684931506849315,
      "step": 196,
      "train/ce_loss": 2.875,
      "train/diffusion_loss": 0.40185388922691345
    },
    {
      "epoch": 2.684931506849315,
      "step": 196,
      "train/learning_rate_real": 2.145570925372894e-05
    },
    {
      "debug/num_lat_loss": 1426.0,
      "debug/num_lat_total": 1666.0,
      "debug/num_tok_loss": 1426.0,
      "debug/num_tok_total": 1666.0,
      "epoch": 2.684931506849315,
      "step": 196
    },
    {
      "epoch": 2.684931506849315,
      "step": 196,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.4992228150367737
    },
    {
      "epoch": 2.684931506849315,
      "step": 196,
      "train/learning_rate_real": 2.145570925372894e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2659.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2659.0,
      "epoch": 2.6986301369863015,
      "step": 197
    },
    {
      "epoch": 2.6986301369863015,
      "step": 197,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.37056055665016174
    },
    {
      "epoch": 2.6986301369863015,
      "step": 197,
      "train/learning_rate_real": 2.1416926404018994e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2850.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2850.0,
      "epoch": 2.6986301369863015,
      "step": 197
    },
    {
      "epoch": 2.6986301369863015,
      "step": 197,
      "train/ce_loss": 2.578125,
      "train/diffusion_loss": 0.3587437570095062
    },
    {
      "epoch": 2.6986301369863015,
      "step": 197,
      "train/learning_rate_real": 2.1416926404018994e-05
    },
    {
      "debug/num_lat_loss": 1743.0,
      "debug/num_lat_total": 3000.0,
      "debug/num_tok_loss": 1743.0,
      "debug/num_tok_total": 3000.0,
      "epoch": 2.6986301369863015,
      "step": 197
    },
    {
      "epoch": 2.6986301369863015,
      "step": 197,
      "train/ce_loss": 2.546875,
      "train/diffusion_loss": 0.31014642119407654
    },
    {
      "epoch": 2.6986301369863015,
      "step": 197,
      "train/learning_rate_real": 2.1416926404018994e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2639.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2639.0,
      "epoch": 2.6986301369863015,
      "step": 197
    },
    {
      "epoch": 2.6986301369863015,
      "step": 197,
      "train/ce_loss": 2.65625,
      "train/diffusion_loss": 0.41646116971969604
    },
    {
      "epoch": 2.6986301369863015,
      "step": 197,
      "train/learning_rate_real": 2.1416926404018994e-05
    },
    {
      "debug/num_lat_loss": 1808.0,
      "debug/num_lat_total": 2249.0,
      "debug/num_tok_loss": 1808.0,
      "debug/num_tok_total": 2249.0,
      "epoch": 2.712328767123288,
      "step": 198
    },
    {
      "epoch": 2.712328767123288,
      "step": 198,
      "train/ce_loss": 2.578125,
      "train/diffusion_loss": 0.5441051721572876
    },
    {
      "epoch": 2.712328767123288,
      "step": 198,
      "train/learning_rate_real": 2.137796798536398e-05
    },
    {
      "debug/num_lat_loss": 1728.0,
      "debug/num_lat_total": 2345.0,
      "debug/num_tok_loss": 1728.0,
      "debug/num_tok_total": 2345.0,
      "epoch": 2.712328767123288,
      "step": 198
    },
    {
      "epoch": 2.712328767123288,
      "step": 198,
      "train/ce_loss": 2.484375,
      "train/diffusion_loss": 0.4259048104286194
    },
    {
      "epoch": 2.712328767123288,
      "step": 198,
      "train/learning_rate_real": 2.137796798536398e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2249.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2249.0,
      "epoch": 2.712328767123288,
      "step": 198
    },
    {
      "epoch": 2.712328767123288,
      "step": 198,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.5017729997634888
    },
    {
      "epoch": 2.712328767123288,
      "step": 198,
      "train/learning_rate_real": 2.137796798536398e-05
    },
    {
      "debug/num_lat_loss": 1589.0,
      "debug/num_lat_total": 2480.0,
      "debug/num_tok_loss": 1589.0,
      "debug/num_tok_total": 2480.0,
      "epoch": 2.712328767123288,
      "step": 198
    },
    {
      "epoch": 2.712328767123288,
      "step": 198,
      "train/ce_loss": 2.90625,
      "train/diffusion_loss": 0.3666616976261139
    },
    {
      "epoch": 2.712328767123288,
      "step": 198,
      "train/learning_rate_real": 2.137796798536398e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2620.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2620.0,
      "epoch": 2.7260273972602738,
      "step": 199
    },
    {
      "epoch": 2.7260273972602738,
      "step": 199,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.4006059169769287
    },
    {
      "epoch": 2.7260273972602738,
      "step": 199,
      "train/learning_rate_real": 2.1338834764831845e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 3061.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 3061.0,
      "epoch": 2.7260273972602738,
      "step": 199
    },
    {
      "epoch": 2.7260273972602738,
      "step": 199,
      "train/ce_loss": 2.59375,
      "train/diffusion_loss": 0.322833776473999
    },
    {
      "epoch": 2.7260273972602738,
      "step": 199,
      "train/learning_rate_real": 2.1338834764831845e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2646.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2646.0,
      "epoch": 2.7260273972602738,
      "step": 199
    },
    {
      "epoch": 2.7260273972602738,
      "step": 199,
      "train/ce_loss": 2.671875,
      "train/diffusion_loss": 0.4167260527610779
    },
    {
      "epoch": 2.7260273972602738,
      "step": 199,
      "train/learning_rate_real": 2.1338834764831845e-05
    },
    {
      "debug/num_lat_loss": 1809.0,
      "debug/num_lat_total": 2461.0,
      "debug/num_tok_loss": 1809.0,
      "debug/num_tok_total": 2461.0,
      "epoch": 2.7260273972602738,
      "step": 199
    },
    {
      "epoch": 2.7260273972602738,
      "step": 199,
      "train/ce_loss": 2.390625,
      "train/diffusion_loss": 0.40909343957901
    },
    {
      "epoch": 2.7260273972602738,
      "step": 199,
      "train/learning_rate_real": 2.1338834764831845e-05
    },
    {
      "epoch": 2.73972602739726,
      "grad_norm": 1.5308177471160889,
      "learning_rate": 2.1338834764831845e-05,
      "loss": 2.7536,
      "step": 200
    }
  ],
  "logging_steps": 10,
  "max_steps": 730,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 10,
  "save_steps": 200,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 7.25125837965169e+16,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}
