{
  "best_global_step": null,
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 5.47945205479452,
  "eval_steps": 500,
  "global_step": 400,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "debug/num_lat_loss": 1603.0,
      "debug/num_lat_total": 2294.0,
      "debug/num_tok_loss": 1603.0,
      "debug/num_tok_total": 2294.0,
      "epoch": 0,
      "step": 0
    },
    {
      "epoch": 0,
      "step": 0,
      "train/ce_loss": 23.25,
      "train/diffusion_loss": 0.5052313208580017
    },
    {
      "epoch": 0,
      "step": 0,
      "train/learning_rate_real": 0.0
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2404.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2404.0,
      "epoch": 0,
      "step": 0
    },
    {
      "epoch": 0,
      "step": 0,
      "train/ce_loss": 23.375,
      "train/diffusion_loss": 0.5152708888053894
    },
    {
      "epoch": 0,
      "step": 0,
      "train/learning_rate_real": 0.0
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2651.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2651.0,
      "epoch": 0,
      "step": 0
    },
    {
      "epoch": 0,
      "step": 0,
      "train/ce_loss": 23.75,
      "train/diffusion_loss": 0.49939244985580444
    },
    {
      "epoch": 0,
      "step": 0,
      "train/learning_rate_real": 0.0
    },
    {
      "debug/num_lat_loss": 1809.0,
      "debug/num_lat_total": 2892.0,
      "debug/num_tok_loss": 1809.0,
      "debug/num_tok_total": 2892.0,
      "epoch": 0,
      "step": 0
    },
    {
      "epoch": 0,
      "step": 0,
      "train/ce_loss": 23.5,
      "train/diffusion_loss": 0.4265933632850647
    },
    {
      "epoch": 0,
      "step": 0,
      "train/learning_rate_real": 0.0
    },
    {
      "debug/num_lat_loss": 1753.0,
      "debug/num_lat_total": 3242.0,
      "debug/num_tok_loss": 1753.0,
      "debug/num_tok_total": 3242.0,
      "epoch": 0.0136986301369863,
      "step": 1
    },
    {
      "epoch": 0.0136986301369863,
      "step": 1,
      "train/ce_loss": 23.625,
      "train/diffusion_loss": 0.35353660583496094
    },
    {
      "epoch": 0.0136986301369863,
      "step": 1,
      "train/learning_rate_real": 1.1363636363636364e-06
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2013.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2013.0,
      "epoch": 0.0136986301369863,
      "step": 1
    },
    {
      "epoch": 0.0136986301369863,
      "step": 1,
      "train/ce_loss": 23.625,
      "train/diffusion_loss": 0.5851874947547913
    },
    {
      "epoch": 0.0136986301369863,
      "step": 1,
      "train/learning_rate_real": 1.1363636363636364e-06
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2652.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2652.0,
      "epoch": 0.0136986301369863,
      "step": 1
    },
    {
      "epoch": 0.0136986301369863,
      "step": 1,
      "train/ce_loss": 23.625,
      "train/diffusion_loss": 0.49107739329338074
    },
    {
      "epoch": 0.0136986301369863,
      "step": 1,
      "train/learning_rate_real": 1.1363636363636364e-06
    },
    {
      "debug/num_lat_loss": 1818.0,
      "debug/num_lat_total": 2911.0,
      "debug/num_tok_loss": 1818.0,
      "debug/num_tok_total": 2911.0,
      "epoch": 0.0136986301369863,
      "step": 1
    },
    {
      "epoch": 0.0136986301369863,
      "step": 1,
      "train/ce_loss": 23.625,
      "train/diffusion_loss": 0.461406946182251
    },
    {
      "epoch": 0.0136986301369863,
      "step": 1,
      "train/learning_rate_real": 1.1363636363636364e-06
    },
    {
      "debug/num_lat_loss": 1642.0,
      "debug/num_lat_total": 2128.0,
      "debug/num_tok_loss": 1642.0,
      "debug/num_tok_total": 2128.0,
      "epoch": 0.0273972602739726,
      "step": 2
    },
    {
      "epoch": 0.0273972602739726,
      "step": 2,
      "train/ce_loss": 23.375,
      "train/diffusion_loss": 0.5282515287399292
    },
    {
      "epoch": 0.0273972602739726,
      "step": 2,
      "train/learning_rate_real": 2.2727272727272728e-06
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2405.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2405.0,
      "epoch": 0.0273972602739726,
      "step": 2
    },
    {
      "epoch": 0.0273972602739726,
      "step": 2,
      "train/ce_loss": 23.5,
      "train/diffusion_loss": 0.5025462508201599
    },
    {
      "epoch": 0.0273972602739726,
      "step": 2,
      "train/learning_rate_real": 2.2727272727272728e-06
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2867.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2867.0,
      "epoch": 0.0273972602739726,
      "step": 2
    },
    {
      "epoch": 0.0273972602739726,
      "step": 2,
      "train/ce_loss": 23.125,
      "train/diffusion_loss": 0.42426061630249023
    },
    {
      "epoch": 0.0273972602739726,
      "step": 2,
      "train/learning_rate_real": 2.2727272727272728e-06
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2234.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2234.0,
      "epoch": 0.0273972602739726,
      "step": 2
    },
    {
      "epoch": 0.0273972602739726,
      "step": 2,
      "train/ce_loss": 24.125,
      "train/diffusion_loss": 0.5727062225341797
    },
    {
      "epoch": 0.0273972602739726,
      "step": 2,
      "train/learning_rate_real": 2.2727272727272728e-06
    },
    {
      "debug/num_lat_loss": 1648.0,
      "debug/num_lat_total": 3015.0,
      "debug/num_tok_loss": 1648.0,
      "debug/num_tok_total": 3015.0,
      "epoch": 0.0410958904109589,
      "step": 3
    },
    {
      "epoch": 0.0410958904109589,
      "step": 3,
      "train/ce_loss": 23.625,
      "train/diffusion_loss": 0.3512691855430603
    },
    {
      "epoch": 0.0410958904109589,
      "step": 3,
      "train/learning_rate_real": 3.409090909090909e-06
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2399.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2399.0,
      "epoch": 0.0410958904109589,
      "step": 3
    },
    {
      "epoch": 0.0410958904109589,
      "step": 3,
      "train/ce_loss": 23.875,
      "train/diffusion_loss": 0.48421576619148254
    },
    {
      "epoch": 0.0410958904109589,
      "step": 3,
      "train/learning_rate_real": 3.409090909090909e-06
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2825.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2825.0,
      "epoch": 0.0410958904109589,
      "step": 3
    },
    {
      "epoch": 0.0410958904109589,
      "step": 3,
      "train/ce_loss": 23.75,
      "train/diffusion_loss": 0.4260019361972809
    },
    {
      "epoch": 0.0410958904109589,
      "step": 3,
      "train/learning_rate_real": 3.409090909090909e-06
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2866.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2866.0,
      "epoch": 0.0410958904109589,
      "step": 3
    },
    {
      "epoch": 0.0410958904109589,
      "step": 3,
      "train/ce_loss": 23.625,
      "train/diffusion_loss": 0.42226922512054443
    },
    {
      "epoch": 0.0410958904109589,
      "step": 3,
      "train/learning_rate_real": 3.409090909090909e-06
    },
    {
      "debug/num_lat_loss": 1633.0,
      "debug/num_lat_total": 2354.0,
      "debug/num_tok_loss": 1633.0,
      "debug/num_tok_total": 2354.0,
      "epoch": 0.0547945205479452,
      "step": 4
    },
    {
      "epoch": 0.0547945205479452,
      "step": 4,
      "train/ce_loss": 23.25,
      "train/diffusion_loss": 0.4710257947444916
    },
    {
      "epoch": 0.0547945205479452,
      "step": 4,
      "train/learning_rate_real": 4.5454545454545455e-06
    },
    {
      "debug/num_lat_loss": 1811.0,
      "debug/num_lat_total": 2670.0,
      "debug/num_tok_loss": 1811.0,
      "debug/num_tok_total": 2670.0,
      "epoch": 0.0547945205479452,
      "step": 4
    },
    {
      "epoch": 0.0547945205479452,
      "step": 4,
      "train/ce_loss": 24.0,
      "train/diffusion_loss": 0.4669632911682129
    },
    {
      "epoch": 0.0547945205479452,
      "step": 4,
      "train/learning_rate_real": 4.5454545454545455e-06
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2660.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2660.0,
      "epoch": 0.0547945205479452,
      "step": 4
    },
    {
      "epoch": 0.0547945205479452,
      "step": 4,
      "train/ce_loss": 23.75,
      "train/diffusion_loss": 0.4852781295776367
    },
    {
      "epoch": 0.0547945205479452,
      "step": 4,
      "train/learning_rate_real": 4.5454545454545455e-06
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2247.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2247.0,
      "epoch": 0.0547945205479452,
      "step": 4
    },
    {
      "epoch": 0.0547945205479452,
      "step": 4,
      "train/ce_loss": 23.5,
      "train/diffusion_loss": 0.5597466826438904
    },
    {
      "epoch": 0.0547945205479452,
      "step": 4,
      "train/learning_rate_real": 4.5454545454545455e-06
    },
    {
      "debug/num_lat_loss": 1755.0,
      "debug/num_lat_total": 2583.0,
      "debug/num_tok_loss": 1755.0,
      "debug/num_tok_total": 2583.0,
      "epoch": 0.0684931506849315,
      "step": 5
    },
    {
      "epoch": 0.0684931506849315,
      "step": 5,
      "train/ce_loss": 23.625,
      "train/diffusion_loss": 0.48908230662345886
    },
    {
      "epoch": 0.0684931506849315,
      "step": 5,
      "train/learning_rate_real": 5.681818181818182e-06
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2606.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2606.0,
      "epoch": 0.0684931506849315,
      "step": 5
    },
    {
      "epoch": 0.0684931506849315,
      "step": 5,
      "train/ce_loss": 23.375,
      "train/diffusion_loss": 0.4757000505924225
    },
    {
      "epoch": 0.0684931506849315,
      "step": 5,
      "train/learning_rate_real": 5.681818181818182e-06
    },
    {
      "debug/num_lat_loss": 1797.0,
      "debug/num_lat_total": 2459.0,
      "debug/num_tok_loss": 1797.0,
      "debug/num_tok_total": 2459.0,
      "epoch": 0.0684931506849315,
      "step": 5
    },
    {
      "epoch": 0.0684931506849315,
      "step": 5,
      "train/ce_loss": 23.75,
      "train/diffusion_loss": 0.5041497945785522
    },
    {
      "epoch": 0.0684931506849315,
      "step": 5,
      "train/learning_rate_real": 5.681818181818182e-06
    },
    {
      "debug/num_lat_loss": 1617.0,
      "debug/num_lat_total": 1823.0,
      "debug/num_tok_loss": 1617.0,
      "debug/num_tok_total": 1823.0,
      "epoch": 0.0684931506849315,
      "step": 5
    },
    {
      "epoch": 0.0684931506849315,
      "step": 5,
      "train/ce_loss": 23.625,
      "train/diffusion_loss": 0.5805519223213196
    },
    {
      "epoch": 0.0684931506849315,
      "step": 5,
      "train/learning_rate_real": 5.681818181818182e-06
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2836.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2836.0,
      "epoch": 0.0821917808219178,
      "step": 6
    },
    {
      "epoch": 0.0821917808219178,
      "step": 6,
      "train/ce_loss": 23.25,
      "train/diffusion_loss": 0.4666789472103119
    },
    {
      "epoch": 0.0821917808219178,
      "step": 6,
      "train/learning_rate_real": 6.818181818181818e-06
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2437.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2437.0,
      "epoch": 0.0821917808219178,
      "step": 6
    },
    {
      "epoch": 0.0821917808219178,
      "step": 6,
      "train/ce_loss": 23.5,
      "train/diffusion_loss": 0.5358713865280151
    },
    {
      "epoch": 0.0821917808219178,
      "step": 6,
      "train/learning_rate_real": 6.818181818181818e-06
    },
    {
      "debug/num_lat_loss": 1634.0,
      "debug/num_lat_total": 2344.0,
      "debug/num_tok_loss": 1634.0,
      "debug/num_tok_total": 2344.0,
      "epoch": 0.0821917808219178,
      "step": 6
    },
    {
      "epoch": 0.0821917808219178,
      "step": 6,
      "train/ce_loss": 23.25,
      "train/diffusion_loss": 0.4909515976905823
    },
    {
      "epoch": 0.0821917808219178,
      "step": 6,
      "train/learning_rate_real": 6.818181818181818e-06
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 3088.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 3088.0,
      "epoch": 0.0821917808219178,
      "step": 6
    },
    {
      "epoch": 0.0821917808219178,
      "step": 6,
      "train/ce_loss": 23.5,
      "train/diffusion_loss": 0.3803665339946747
    },
    {
      "epoch": 0.0821917808219178,
      "step": 6,
      "train/learning_rate_real": 6.818181818181818e-06
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2623.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2623.0,
      "epoch": 0.0958904109589041,
      "step": 7
    },
    {
      "epoch": 0.0958904109589041,
      "step": 7,
      "train/ce_loss": 23.25,
      "train/diffusion_loss": 0.4644874334335327
    },
    {
      "epoch": 0.0958904109589041,
      "step": 7,
      "train/learning_rate_real": 7.954545454545455e-06
    },
    {
      "debug/num_lat_loss": 1765.0,
      "debug/num_lat_total": 2829.0,
      "debug/num_tok_loss": 1765.0,
      "debug/num_tok_total": 2829.0,
      "epoch": 0.0958904109589041,
      "step": 7
    },
    {
      "epoch": 0.0958904109589041,
      "step": 7,
      "train/ce_loss": 23.375,
      "train/diffusion_loss": 0.42349711060523987
    },
    {
      "epoch": 0.0958904109589041,
      "step": 7,
      "train/learning_rate_real": 7.954545454545455e-06
    },
    {
      "debug/num_lat_loss": 1600.0,
      "debug/num_lat_total": 2279.0,
      "debug/num_tok_loss": 1600.0,
      "debug/num_tok_total": 2279.0,
      "epoch": 0.0958904109589041,
      "step": 7
    },
    {
      "epoch": 0.0958904109589041,
      "step": 7,
      "train/ce_loss": 23.0,
      "train/diffusion_loss": 0.4753822982311249
    },
    {
      "epoch": 0.0958904109589041,
      "step": 7,
      "train/learning_rate_real": 7.954545454545455e-06
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 2679.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 2679.0,
      "epoch": 0.0958904109589041,
      "step": 7
    },
    {
      "epoch": 0.0958904109589041,
      "step": 7,
      "train/ce_loss": 23.5,
      "train/diffusion_loss": 0.4810636341571808
    },
    {
      "epoch": 0.0958904109589041,
      "step": 7,
      "train/learning_rate_real": 7.954545454545455e-06
    },
    {
      "debug/num_lat_loss": 1699.0,
      "debug/num_lat_total": 2348.0,
      "debug/num_tok_loss": 1699.0,
      "debug/num_tok_total": 2348.0,
      "epoch": 0.1095890410958904,
      "step": 8
    },
    {
      "epoch": 0.1095890410958904,
      "step": 8,
      "train/ce_loss": 23.0,
      "train/diffusion_loss": 0.5147892832756042
    },
    {
      "epoch": 0.1095890410958904,
      "step": 8,
      "train/learning_rate_real": 9.090909090909091e-06
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 3080.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 3080.0,
      "epoch": 0.1095890410958904,
      "step": 8
    },
    {
      "epoch": 0.1095890410958904,
      "step": 8,
      "train/ce_loss": 23.25,
      "train/diffusion_loss": 0.3648412227630615
    },
    {
      "epoch": 0.1095890410958904,
      "step": 8,
      "train/learning_rate_real": 9.090909090909091e-06
    },
    {
      "debug/num_lat_loss": 1765.0,
      "debug/num_lat_total": 2632.0,
      "debug/num_tok_loss": 1765.0,
      "debug/num_tok_total": 2632.0,
      "epoch": 0.1095890410958904,
      "step": 8
    },
    {
      "epoch": 0.1095890410958904,
      "step": 8,
      "train/ce_loss": 23.25,
      "train/diffusion_loss": 0.4737395942211151
    },
    {
      "epoch": 0.1095890410958904,
      "step": 8,
      "train/learning_rate_real": 9.090909090909091e-06
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2647.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2647.0,
      "epoch": 0.1095890410958904,
      "step": 8
    },
    {
      "epoch": 0.1095890410958904,
      "step": 8,
      "train/ce_loss": 23.125,
      "train/diffusion_loss": 0.4529899060726166
    },
    {
      "epoch": 0.1095890410958904,
      "step": 8,
      "train/learning_rate_real": 9.090909090909091e-06
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2645.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2645.0,
      "epoch": 0.1232876712328767,
      "step": 9
    },
    {
      "epoch": 0.1232876712328767,
      "step": 9,
      "train/ce_loss": 22.875,
      "train/diffusion_loss": 0.460472971200943
    },
    {
      "epoch": 0.1232876712328767,
      "step": 9,
      "train/learning_rate_real": 1.0227272727272729e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2845.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2845.0,
      "epoch": 0.1232876712328767,
      "step": 9
    },
    {
      "epoch": 0.1232876712328767,
      "step": 9,
      "train/ce_loss": 22.875,
      "train/diffusion_loss": 0.39518892765045166
    },
    {
      "epoch": 0.1232876712328767,
      "step": 9,
      "train/learning_rate_real": 1.0227272727272729e-05
    },
    {
      "debug/num_lat_loss": 1814.0,
      "debug/num_lat_total": 2689.0,
      "debug/num_tok_loss": 1814.0,
      "debug/num_tok_total": 2689.0,
      "epoch": 0.1232876712328767,
      "step": 9
    },
    {
      "epoch": 0.1232876712328767,
      "step": 9,
      "train/ce_loss": 22.875,
      "train/diffusion_loss": 0.4926188588142395
    },
    {
      "epoch": 0.1232876712328767,
      "step": 9,
      "train/learning_rate_real": 1.0227272727272729e-05
    },
    {
      "debug/num_lat_loss": 1575.0,
      "debug/num_lat_total": 2250.0,
      "debug/num_tok_loss": 1575.0,
      "debug/num_tok_total": 2250.0,
      "epoch": 0.1232876712328767,
      "step": 9
    },
    {
      "epoch": 0.1232876712328767,
      "step": 9,
      "train/ce_loss": 22.625,
      "train/diffusion_loss": 0.48270726203918457
    },
    {
      "epoch": 0.1232876712328767,
      "step": 9,
      "train/learning_rate_real": 1.0227272727272729e-05
    },
    {
      "epoch": 0.136986301369863,
      "grad_norm": 11.971195220947266,
      "learning_rate": 1.0227272727272729e-05,
      "loss": 6.3985,
      "step": 10
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 3053.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 3053.0,
      "epoch": 0.136986301369863,
      "step": 10
    },
    {
      "epoch": 0.136986301369863,
      "step": 10,
      "train/ce_loss": 22.875,
      "train/diffusion_loss": 0.35791292786598206
    },
    {
      "epoch": 0.136986301369863,
      "step": 10,
      "train/learning_rate_real": 1.1363636363636365e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 3087.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 3087.0,
      "epoch": 0.136986301369863,
      "step": 10
    },
    {
      "epoch": 0.136986301369863,
      "step": 10,
      "train/ce_loss": 22.875,
      "train/diffusion_loss": 0.38981443643569946
    },
    {
      "epoch": 0.136986301369863,
      "step": 10,
      "train/learning_rate_real": 1.1363636363636365e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2231.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2231.0,
      "epoch": 0.136986301369863,
      "step": 10
    },
    {
      "epoch": 0.136986301369863,
      "step": 10,
      "train/ce_loss": 22.5,
      "train/diffusion_loss": 0.5622515678405762
    },
    {
      "epoch": 0.136986301369863,
      "step": 10,
      "train/learning_rate_real": 1.1363636363636365e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2867.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2867.0,
      "epoch": 0.136986301369863,
      "step": 10
    },
    {
      "epoch": 0.136986301369863,
      "step": 10,
      "train/ce_loss": 22.625,
      "train/diffusion_loss": 0.3926439881324768
    },
    {
      "epoch": 0.136986301369863,
      "step": 10,
      "train/learning_rate_real": 1.1363636363636365e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2437.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2437.0,
      "epoch": 0.1506849315068493,
      "step": 11
    },
    {
      "epoch": 0.1506849315068493,
      "step": 11,
      "train/ce_loss": 22.375,
      "train/diffusion_loss": 0.4806886315345764
    },
    {
      "epoch": 0.1506849315068493,
      "step": 11,
      "train/learning_rate_real": 1.25e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2848.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2848.0,
      "epoch": 0.1506849315068493,
      "step": 11
    },
    {
      "epoch": 0.1506849315068493,
      "step": 11,
      "train/ce_loss": 22.125,
      "train/diffusion_loss": 0.4312594532966614
    },
    {
      "epoch": 0.1506849315068493,
      "step": 11,
      "train/learning_rate_real": 1.25e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2875.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2875.0,
      "epoch": 0.1506849315068493,
      "step": 11
    },
    {
      "epoch": 0.1506849315068493,
      "step": 11,
      "train/ce_loss": 22.125,
      "train/diffusion_loss": 0.4150593876838684
    },
    {
      "epoch": 0.1506849315068493,
      "step": 11,
      "train/learning_rate_real": 1.25e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2418.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2418.0,
      "epoch": 0.1506849315068493,
      "step": 11
    },
    {
      "epoch": 0.1506849315068493,
      "step": 11,
      "train/ce_loss": 22.0,
      "train/diffusion_loss": 0.49140727519989014
    },
    {
      "epoch": 0.1506849315068493,
      "step": 11,
      "train/learning_rate_real": 1.25e-05
    },
    {
      "debug/num_lat_loss": 1732.0,
      "debug/num_lat_total": 2536.0,
      "debug/num_tok_loss": 1732.0,
      "debug/num_tok_total": 2536.0,
      "epoch": 0.1643835616438356,
      "step": 12
    },
    {
      "epoch": 0.1643835616438356,
      "step": 12,
      "train/ce_loss": 21.5,
      "train/diffusion_loss": 0.4987460970878601
    },
    {
      "epoch": 0.1643835616438356,
      "step": 12,
      "train/learning_rate_real": 1.3636363636363637e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2399.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2399.0,
      "epoch": 0.1643835616438356,
      "step": 12
    },
    {
      "epoch": 0.1643835616438356,
      "step": 12,
      "train/ce_loss": 21.375,
      "train/diffusion_loss": 0.4842108190059662
    },
    {
      "epoch": 0.1643835616438356,
      "step": 12,
      "train/learning_rate_real": 1.3636363636363637e-05
    },
    {
      "debug/num_lat_loss": 1702.0,
      "debug/num_lat_total": 2363.0,
      "debug/num_tok_loss": 1702.0,
      "debug/num_tok_total": 2363.0,
      "epoch": 0.1643835616438356,
      "step": 12
    },
    {
      "epoch": 0.1643835616438356,
      "step": 12,
      "train/ce_loss": 21.5,
      "train/diffusion_loss": 0.4798933267593384
    },
    {
      "epoch": 0.1643835616438356,
      "step": 12,
      "train/learning_rate_real": 1.3636363636363637e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2623.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2623.0,
      "epoch": 0.1643835616438356,
      "step": 12
    },
    {
      "epoch": 0.1643835616438356,
      "step": 12,
      "train/ce_loss": 21.625,
      "train/diffusion_loss": 0.48570314049720764
    },
    {
      "epoch": 0.1643835616438356,
      "step": 12,
      "train/learning_rate_real": 1.3636363636363637e-05
    },
    {
      "debug/num_lat_loss": 1765.0,
      "debug/num_lat_total": 2404.0,
      "debug/num_tok_loss": 1765.0,
      "debug/num_tok_total": 2404.0,
      "epoch": 0.1780821917808219,
      "step": 13
    },
    {
      "epoch": 0.1780821917808219,
      "step": 13,
      "train/ce_loss": 20.75,
      "train/diffusion_loss": 0.4996339678764343
    },
    {
      "epoch": 0.1780821917808219,
      "step": 13,
      "train/learning_rate_real": 1.4772727272727274e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2452.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2452.0,
      "epoch": 0.1780821917808219,
      "step": 13
    },
    {
      "epoch": 0.1780821917808219,
      "step": 13,
      "train/ce_loss": 20.75,
      "train/diffusion_loss": 0.5086291432380676
    },
    {
      "epoch": 0.1780821917808219,
      "step": 13,
      "train/learning_rate_real": 1.4772727272727274e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2656.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2656.0,
      "epoch": 0.1780821917808219,
      "step": 13
    },
    {
      "epoch": 0.1780821917808219,
      "step": 13,
      "train/ce_loss": 20.875,
      "train/diffusion_loss": 0.4561443328857422
    },
    {
      "epoch": 0.1780821917808219,
      "step": 13,
      "train/learning_rate_real": 1.4772727272727274e-05
    },
    {
      "debug/num_lat_loss": 1655.0,
      "debug/num_lat_total": 3249.0,
      "debug/num_tok_loss": 1655.0,
      "debug/num_tok_total": 3249.0,
      "epoch": 0.1780821917808219,
      "step": 13
    },
    {
      "epoch": 0.1780821917808219,
      "step": 13,
      "train/ce_loss": 21.125,
      "train/diffusion_loss": 0.26086410880088806
    },
    {
      "epoch": 0.1780821917808219,
      "step": 13,
      "train/learning_rate_real": 1.4772727272727274e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2870.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2870.0,
      "epoch": 0.1917808219178082,
      "step": 14
    },
    {
      "epoch": 0.1917808219178082,
      "step": 14,
      "train/ce_loss": 20.0,
      "train/diffusion_loss": 0.408465713262558
    },
    {
      "epoch": 0.1917808219178082,
      "step": 14,
      "train/learning_rate_real": 1.590909090909091e-05
    },
    {
      "debug/num_lat_loss": 1749.0,
      "debug/num_lat_total": 2379.0,
      "debug/num_tok_loss": 1749.0,
      "debug/num_tok_total": 2379.0,
      "epoch": 0.1917808219178082,
      "step": 14
    },
    {
      "epoch": 0.1917808219178082,
      "step": 14,
      "train/ce_loss": 19.875,
      "train/diffusion_loss": 0.4767815172672272
    },
    {
      "epoch": 0.1917808219178082,
      "step": 14,
      "train/learning_rate_real": 1.590909090909091e-05
    },
    {
      "debug/num_lat_loss": 1728.0,
      "debug/num_lat_total": 2612.0,
      "debug/num_tok_loss": 1728.0,
      "debug/num_tok_total": 2612.0,
      "epoch": 0.1917808219178082,
      "step": 14
    },
    {
      "epoch": 0.1917808219178082,
      "step": 14,
      "train/ce_loss": 20.0,
      "train/diffusion_loss": 0.45503005385398865
    },
    {
      "epoch": 0.1917808219178082,
      "step": 14,
      "train/learning_rate_real": 1.590909090909091e-05
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 2892.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 2892.0,
      "epoch": 0.1917808219178082,
      "step": 14
    },
    {
      "epoch": 0.1917808219178082,
      "step": 14,
      "train/ce_loss": 20.125,
      "train/diffusion_loss": 0.4370043873786926
    },
    {
      "epoch": 0.1917808219178082,
      "step": 14,
      "train/learning_rate_real": 1.590909090909091e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2617.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2617.0,
      "epoch": 0.2054794520547945,
      "step": 15
    },
    {
      "epoch": 0.2054794520547945,
      "step": 15,
      "train/ce_loss": 18.875,
      "train/diffusion_loss": 0.45287543535232544
    },
    {
      "epoch": 0.2054794520547945,
      "step": 15,
      "train/learning_rate_real": 1.7045454545454546e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2454.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2454.0,
      "epoch": 0.2054794520547945,
      "step": 15
    },
    {
      "epoch": 0.2054794520547945,
      "step": 15,
      "train/ce_loss": 18.875,
      "train/diffusion_loss": 0.5138697028160095
    },
    {
      "epoch": 0.2054794520547945,
      "step": 15,
      "train/learning_rate_real": 1.7045454545454546e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2184.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2184.0,
      "epoch": 0.2054794520547945,
      "step": 15
    },
    {
      "epoch": 0.2054794520547945,
      "step": 15,
      "train/ce_loss": 18.75,
      "train/diffusion_loss": 0.5638265013694763
    },
    {
      "epoch": 0.2054794520547945,
      "step": 15,
      "train/learning_rate_real": 1.7045454545454546e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 1998.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 1998.0,
      "epoch": 0.2054794520547945,
      "step": 15
    },
    {
      "epoch": 0.2054794520547945,
      "step": 15,
      "train/ce_loss": 18.625,
      "train/diffusion_loss": 0.5613153576850891
    },
    {
      "epoch": 0.2054794520547945,
      "step": 15,
      "train/learning_rate_real": 1.7045454545454546e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2632.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2632.0,
      "epoch": 0.2191780821917808,
      "step": 16
    },
    {
      "epoch": 0.2191780821917808,
      "step": 16,
      "train/ce_loss": 18.0,
      "train/diffusion_loss": 0.4571579396724701
    },
    {
      "epoch": 0.2191780821917808,
      "step": 16,
      "train/learning_rate_real": 1.8181818181818182e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 3095.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 3095.0,
      "epoch": 0.2191780821917808,
      "step": 16
    },
    {
      "epoch": 0.2191780821917808,
      "step": 16,
      "train/ce_loss": 18.25,
      "train/diffusion_loss": 0.3644043803215027
    },
    {
      "epoch": 0.2191780821917808,
      "step": 16,
      "train/learning_rate_real": 1.8181818181818182e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2632.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2632.0,
      "epoch": 0.2191780821917808,
      "step": 16
    },
    {
      "epoch": 0.2191780821917808,
      "step": 16,
      "train/ce_loss": 18.125,
      "train/diffusion_loss": 0.4329890012741089
    },
    {
      "epoch": 0.2191780821917808,
      "step": 16,
      "train/learning_rate_real": 1.8181818181818182e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2653.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2653.0,
      "epoch": 0.2191780821917808,
      "step": 16
    },
    {
      "epoch": 0.2191780821917808,
      "step": 16,
      "train/ce_loss": 17.75,
      "train/diffusion_loss": 0.4439554512500763
    },
    {
      "epoch": 0.2191780821917808,
      "step": 16,
      "train/learning_rate_real": 1.8181818181818182e-05
    },
    {
      "debug/num_lat_loss": 1679.0,
      "debug/num_lat_total": 2760.0,
      "debug/num_tok_loss": 1679.0,
      "debug/num_tok_total": 2760.0,
      "epoch": 0.2328767123287671,
      "step": 17
    },
    {
      "epoch": 0.2328767123287671,
      "step": 17,
      "train/ce_loss": 17.125,
      "train/diffusion_loss": 0.41194167733192444
    },
    {
      "epoch": 0.2328767123287671,
      "step": 17,
      "train/learning_rate_real": 1.9318181818181818e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2848.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2848.0,
      "epoch": 0.2328767123287671,
      "step": 17
    },
    {
      "epoch": 0.2328767123287671,
      "step": 17,
      "train/ce_loss": 16.875,
      "train/diffusion_loss": 0.4312850832939148
    },
    {
      "epoch": 0.2328767123287671,
      "step": 17,
      "train/learning_rate_real": 1.9318181818181818e-05
    },
    {
      "debug/num_lat_loss": 1754.0,
      "debug/num_lat_total": 2575.0,
      "debug/num_tok_loss": 1754.0,
      "debug/num_tok_total": 2575.0,
      "epoch": 0.2328767123287671,
      "step": 17
    },
    {
      "epoch": 0.2328767123287671,
      "step": 17,
      "train/ce_loss": 16.875,
      "train/diffusion_loss": 0.4552731513977051
    },
    {
      "epoch": 0.2328767123287671,
      "step": 17,
      "train/learning_rate_real": 1.9318181818181818e-05
    },
    {
      "debug/num_lat_loss": 1823.0,
      "debug/num_lat_total": 3136.0,
      "debug/num_tok_loss": 1823.0,
      "debug/num_tok_total": 3136.0,
      "epoch": 0.2328767123287671,
      "step": 17
    },
    {
      "epoch": 0.2328767123287671,
      "step": 17,
      "train/ce_loss": 17.375,
      "train/diffusion_loss": 0.37979450821876526
    },
    {
      "epoch": 0.2328767123287671,
      "step": 17,
      "train/learning_rate_real": 1.9318181818181818e-05
    },
    {
      "debug/num_lat_loss": 1807.0,
      "debug/num_lat_total": 2678.0,
      "debug/num_tok_loss": 1807.0,
      "debug/num_tok_total": 2678.0,
      "epoch": 0.2465753424657534,
      "step": 18
    },
    {
      "epoch": 0.2465753424657534,
      "step": 18,
      "train/ce_loss": 15.6875,
      "train/diffusion_loss": 0.45173999667167664
    },
    {
      "epoch": 0.2465753424657534,
      "step": 18,
      "train/learning_rate_real": 2.0454545454545457e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2446.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2446.0,
      "epoch": 0.2465753424657534,
      "step": 18
    },
    {
      "epoch": 0.2465753424657534,
      "step": 18,
      "train/ce_loss": 15.625,
      "train/diffusion_loss": 0.4798762798309326
    },
    {
      "epoch": 0.2465753424657534,
      "step": 18,
      "train/learning_rate_real": 2.0454545454545457e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2654.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2654.0,
      "epoch": 0.2465753424657534,
      "step": 18
    },
    {
      "epoch": 0.2465753424657534,
      "step": 18,
      "train/ce_loss": 15.6875,
      "train/diffusion_loss": 0.4615324139595032
    },
    {
      "epoch": 0.2465753424657534,
      "step": 18,
      "train/learning_rate_real": 2.0454545454545457e-05
    },
    {
      "debug/num_lat_loss": 1628.0,
      "debug/num_lat_total": 2720.0,
      "debug/num_tok_loss": 1628.0,
      "debug/num_tok_total": 2720.0,
      "epoch": 0.2465753424657534,
      "step": 18
    },
    {
      "epoch": 0.2465753424657534,
      "step": 18,
      "train/ce_loss": 16.25,
      "train/diffusion_loss": 0.365399032831192
    },
    {
      "epoch": 0.2465753424657534,
      "step": 18,
      "train/learning_rate_real": 2.0454545454545457e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2431.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2431.0,
      "epoch": 0.2602739726027397,
      "step": 19
    },
    {
      "epoch": 0.2602739726027397,
      "step": 19,
      "train/ce_loss": 14.6875,
      "train/diffusion_loss": 0.4907709062099457
    },
    {
      "epoch": 0.2602739726027397,
      "step": 19,
      "train/learning_rate_real": 2.1590909090909093e-05
    },
    {
      "debug/num_lat_loss": 1752.0,
      "debug/num_lat_total": 2186.0,
      "debug/num_tok_loss": 1752.0,
      "debug/num_tok_total": 2186.0,
      "epoch": 0.2602739726027397,
      "step": 19
    },
    {
      "epoch": 0.2602739726027397,
      "step": 19,
      "train/ce_loss": 14.125,
      "train/diffusion_loss": 0.542559027671814
    },
    {
      "epoch": 0.2602739726027397,
      "step": 19,
      "train/learning_rate_real": 2.1590909090909093e-05
    },
    {
      "debug/num_lat_loss": 1687.0,
      "debug/num_lat_total": 2674.0,
      "debug/num_tok_loss": 1687.0,
      "debug/num_tok_total": 2674.0,
      "epoch": 0.2602739726027397,
      "step": 19
    },
    {
      "epoch": 0.2602739726027397,
      "step": 19,
      "train/ce_loss": 15.0625,
      "train/diffusion_loss": 0.38787195086479187
    },
    {
      "epoch": 0.2602739726027397,
      "step": 19,
      "train/learning_rate_real": 2.1590909090909093e-05
    },
    {
      "debug/num_lat_loss": 1807.0,
      "debug/num_lat_total": 2032.0,
      "debug/num_tok_loss": 1807.0,
      "debug/num_tok_total": 2032.0,
      "epoch": 0.2602739726027397,
      "step": 19
    },
    {
      "epoch": 0.2602739726027397,
      "step": 19,
      "train/ce_loss": 13.875,
      "train/diffusion_loss": 0.5763474106788635
    },
    {
      "epoch": 0.2602739726027397,
      "step": 19,
      "train/learning_rate_real": 2.1590909090909093e-05
    },
    {
      "epoch": 0.273972602739726,
      "grad_norm": 8.30610466003418,
      "learning_rate": 2.1590909090909093e-05,
      "loss": 5.6093,
      "step": 20
    },
    {
      "debug/num_lat_loss": 1812.0,
      "debug/num_lat_total": 3130.0,
      "debug/num_tok_loss": 1812.0,
      "debug/num_tok_total": 3130.0,
      "epoch": 0.273972602739726,
      "step": 20
    },
    {
      "epoch": 0.273972602739726,
      "step": 20,
      "train/ce_loss": 14.8125,
      "train/diffusion_loss": 0.3510192036628723
    },
    {
      "epoch": 0.273972602739726,
      "step": 20,
      "train/learning_rate_real": 2.272727272727273e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2193.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2193.0,
      "epoch": 0.273972602739726,
      "step": 20
    },
    {
      "epoch": 0.273972602739726,
      "step": 20,
      "train/ce_loss": 13.4375,
      "train/diffusion_loss": 0.5544577836990356
    },
    {
      "epoch": 0.273972602739726,
      "step": 20,
      "train/learning_rate_real": 2.272727272727273e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2394.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2394.0,
      "epoch": 0.273972602739726,
      "step": 20
    },
    {
      "epoch": 0.273972602739726,
      "step": 20,
      "train/ce_loss": 13.75,
      "train/diffusion_loss": 0.48900994658470154
    },
    {
      "epoch": 0.273972602739726,
      "step": 20,
      "train/learning_rate_real": 2.272727272727273e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2868.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2868.0,
      "epoch": 0.273972602739726,
      "step": 20
    },
    {
      "epoch": 0.273972602739726,
      "step": 20,
      "train/ce_loss": 14.25,
      "train/diffusion_loss": 0.3776327073574066
    },
    {
      "epoch": 0.273972602739726,
      "step": 20,
      "train/learning_rate_real": 2.272727272727273e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2857.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2857.0,
      "epoch": 0.2876712328767123,
      "step": 21
    },
    {
      "epoch": 0.2876712328767123,
      "step": 21,
      "train/ce_loss": 13.875,
      "train/diffusion_loss": 0.39368781447410583
    },
    {
      "epoch": 0.2876712328767123,
      "step": 21,
      "train/learning_rate_real": 2.3863636363636365e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2192.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2192.0,
      "epoch": 0.2876712328767123,
      "step": 21
    },
    {
      "epoch": 0.2876712328767123,
      "step": 21,
      "train/ce_loss": 12.875,
      "train/diffusion_loss": 0.5420176982879639
    },
    {
      "epoch": 0.2876712328767123,
      "step": 21,
      "train/learning_rate_real": 2.3863636363636365e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 3104.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 3104.0,
      "epoch": 0.2876712328767123,
      "step": 21
    },
    {
      "epoch": 0.2876712328767123,
      "step": 21,
      "train/ce_loss": 14.0625,
      "train/diffusion_loss": 0.3666694760322571
    },
    {
      "epoch": 0.2876712328767123,
      "step": 21,
      "train/learning_rate_real": 2.3863636363636365e-05
    },
    {
      "debug/num_lat_loss": 1634.0,
      "debug/num_lat_total": 2789.0,
      "debug/num_tok_loss": 1634.0,
      "debug/num_tok_total": 2789.0,
      "epoch": 0.2876712328767123,
      "step": 21
    },
    {
      "epoch": 0.2876712328767123,
      "step": 21,
      "train/ce_loss": 14.0,
      "train/diffusion_loss": 0.37210342288017273
    },
    {
      "epoch": 0.2876712328767123,
      "step": 21,
      "train/learning_rate_real": 2.3863636363636365e-05
    },
    {
      "debug/num_lat_loss": 1586.0,
      "debug/num_lat_total": 2434.0,
      "debug/num_tok_loss": 1586.0,
      "debug/num_tok_total": 2434.0,
      "epoch": 0.3013698630136986,
      "step": 22
    },
    {
      "epoch": 0.3013698630136986,
      "step": 22,
      "train/ce_loss": 13.1875,
      "train/diffusion_loss": 0.4085391163825989
    },
    {
      "epoch": 0.3013698630136986,
      "step": 22,
      "train/learning_rate_real": 2.5e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2223.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2223.0,
      "epoch": 0.3013698630136986,
      "step": 22
    },
    {
      "epoch": 0.3013698630136986,
      "step": 22,
      "train/ce_loss": 12.4375,
      "train/diffusion_loss": 0.5267496705055237
    },
    {
      "epoch": 0.3013698630136986,
      "step": 22,
      "train/learning_rate_real": 2.5e-05
    },
    {
      "debug/num_lat_loss": 1751.0,
      "debug/num_lat_total": 3025.0,
      "debug/num_tok_loss": 1751.0,
      "debug/num_tok_total": 3025.0,
      "epoch": 0.3013698630136986,
      "step": 22
    },
    {
      "epoch": 0.3013698630136986,
      "step": 22,
      "train/ce_loss": 13.5625,
      "train/diffusion_loss": 0.3490737974643707
    },
    {
      "epoch": 0.3013698630136986,
      "step": 22,
      "train/learning_rate_real": 2.5e-05
    },
    {
      "debug/num_lat_loss": 1596.0,
      "debug/num_lat_total": 2684.0,
      "debug/num_tok_loss": 1596.0,
      "debug/num_tok_total": 2684.0,
      "epoch": 0.3013698630136986,
      "step": 22
    },
    {
      "epoch": 0.3013698630136986,
      "step": 22,
      "train/ce_loss": 13.3125,
      "train/diffusion_loss": 0.3688155710697174
    },
    {
      "epoch": 0.3013698630136986,
      "step": 22,
      "train/learning_rate_real": 2.5e-05
    },
    {
      "debug/num_lat_loss": 1584.0,
      "debug/num_lat_total": 2224.0,
      "debug/num_tok_loss": 1584.0,
      "debug/num_tok_total": 2224.0,
      "epoch": 0.3150684931506849,
      "step": 23
    },
    {
      "epoch": 0.3150684931506849,
      "step": 23,
      "train/ce_loss": 12.5,
      "train/diffusion_loss": 0.4947201907634735
    },
    {
      "epoch": 0.3150684931506849,
      "step": 23,
      "train/learning_rate_real": 2.4999876941239957e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2617.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2617.0,
      "epoch": 0.3150684931506849,
      "step": 23
    },
    {
      "epoch": 0.3150684931506849,
      "step": 23,
      "train/ce_loss": 12.6875,
      "train/diffusion_loss": 0.42563173174858093
    },
    {
      "epoch": 0.3150684931506849,
      "step": 23,
      "train/learning_rate_real": 2.4999876941239957e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 0.3150684931506849,
      "step": 23
    },
    {
      "epoch": 0.3150684931506849,
      "step": 23,
      "train/ce_loss": 12.5625,
      "train/diffusion_loss": 0.4141148328781128
    },
    {
      "epoch": 0.3150684931506849,
      "step": 23,
      "train/learning_rate_real": 2.4999876941239957e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 3114.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 3114.0,
      "epoch": 0.3150684931506849,
      "step": 23
    },
    {
      "epoch": 0.3150684931506849,
      "step": 23,
      "train/ce_loss": 13.3125,
      "train/diffusion_loss": 0.3588603138923645
    },
    {
      "epoch": 0.3150684931506849,
      "step": 23,
      "train/learning_rate_real": 2.4999876941239957e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2680.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2680.0,
      "epoch": 0.3287671232876712,
      "step": 24
    },
    {
      "epoch": 0.3287671232876712,
      "step": 24,
      "train/ce_loss": 12.1875,
      "train/diffusion_loss": 0.4522026479244232
    },
    {
      "epoch": 0.3287671232876712,
      "step": 24,
      "train/learning_rate_real": 2.4999507767382776e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2847.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2847.0,
      "epoch": 0.3287671232876712,
      "step": 24
    },
    {
      "epoch": 0.3287671232876712,
      "step": 24,
      "train/ce_loss": 12.3125,
      "train/diffusion_loss": 0.37764832377433777
    },
    {
      "epoch": 0.3287671232876712,
      "step": 24,
      "train/learning_rate_real": 2.4999507767382776e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2842.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2842.0,
      "epoch": 0.3287671232876712,
      "step": 24
    },
    {
      "epoch": 0.3287671232876712,
      "step": 24,
      "train/ce_loss": 12.3125,
      "train/diffusion_loss": 0.4105679392814636
    },
    {
      "epoch": 0.3287671232876712,
      "step": 24,
      "train/learning_rate_real": 2.4999507767382776e-05
    },
    {
      "debug/num_lat_loss": 1759.0,
      "debug/num_lat_total": 3031.0,
      "debug/num_tok_loss": 1759.0,
      "debug/num_tok_total": 3031.0,
      "epoch": 0.3287671232876712,
      "step": 24
    },
    {
      "epoch": 0.3287671232876712,
      "step": 24,
      "train/ce_loss": 12.4375,
      "train/diffusion_loss": 0.3406731188297272
    },
    {
      "epoch": 0.3287671232876712,
      "step": 24,
      "train/learning_rate_real": 2.4999507767382776e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2465.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2465.0,
      "epoch": 0.3424657534246575,
      "step": 25
    },
    {
      "epoch": 0.3424657534246575,
      "step": 25,
      "train/ce_loss": 11.625,
      "train/diffusion_loss": 0.4853813946247101
    },
    {
      "epoch": 0.3424657534246575,
      "step": 25,
      "train/learning_rate_real": 2.4998892485697274e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2635.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2635.0,
      "epoch": 0.3424657534246575,
      "step": 25
    },
    {
      "epoch": 0.3424657534246575,
      "step": 25,
      "train/ce_loss": 11.6875,
      "train/diffusion_loss": 0.44053584337234497
    },
    {
      "epoch": 0.3424657534246575,
      "step": 25,
      "train/learning_rate_real": 2.4998892485697274e-05
    },
    {
      "debug/num_lat_loss": 1753.0,
      "debug/num_lat_total": 3020.0,
      "debug/num_tok_loss": 1753.0,
      "debug/num_tok_total": 3020.0,
      "epoch": 0.3424657534246575,
      "step": 25
    },
    {
      "epoch": 0.3424657534246575,
      "step": 25,
      "train/ce_loss": 12.125,
      "train/diffusion_loss": 0.3806762397289276
    },
    {
      "epoch": 0.3424657534246575,
      "step": 25,
      "train/learning_rate_real": 2.4998892485697274e-05
    },
    {
      "debug/num_lat_loss": 1819.0,
      "debug/num_lat_total": 2919.0,
      "debug/num_tok_loss": 1819.0,
      "debug/num_tok_total": 2919.0,
      "epoch": 0.3424657534246575,
      "step": 25
    },
    {
      "epoch": 0.3424657534246575,
      "step": 25,
      "train/ce_loss": 11.8125,
      "train/diffusion_loss": 0.4265860915184021
    },
    {
      "epoch": 0.3424657534246575,
      "step": 25,
      "train/learning_rate_real": 2.4998892485697274e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2678.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2678.0,
      "epoch": 0.3561643835616438,
      "step": 26
    },
    {
      "epoch": 0.3561643835616438,
      "step": 26,
      "train/ce_loss": 11.375,
      "train/diffusion_loss": 0.4423990845680237
    },
    {
      "epoch": 0.3561643835616438,
      "step": 26,
      "train/learning_rate_real": 2.4998031108297975e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2840.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2840.0,
      "epoch": 0.3561643835616438,
      "step": 26
    },
    {
      "epoch": 0.3561643835616438,
      "step": 26,
      "train/ce_loss": 11.5,
      "train/diffusion_loss": 0.3932268023490906
    },
    {
      "epoch": 0.3561643835616438,
      "step": 26,
      "train/learning_rate_real": 2.4998031108297975e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 3091.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 3091.0,
      "epoch": 0.3561643835616438,
      "step": 26
    },
    {
      "epoch": 0.3561643835616438,
      "step": 26,
      "train/ce_loss": 11.625,
      "train/diffusion_loss": 0.3441810607910156
    },
    {
      "epoch": 0.3561643835616438,
      "step": 26,
      "train/learning_rate_real": 2.4998031108297975e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2841.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2841.0,
      "epoch": 0.3561643835616438,
      "step": 26
    },
    {
      "epoch": 0.3561643835616438,
      "step": 26,
      "train/ce_loss": 11.5,
      "train/diffusion_loss": 0.4140407145023346
    },
    {
      "epoch": 0.3561643835616438,
      "step": 26,
      "train/learning_rate_real": 2.4998031108297975e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 2690.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 2690.0,
      "epoch": 0.3698630136986301,
      "step": 27
    },
    {
      "epoch": 0.3698630136986301,
      "step": 27,
      "train/ce_loss": 11.125,
      "train/diffusion_loss": 0.4142424166202545
    },
    {
      "epoch": 0.3698630136986301,
      "step": 27,
      "train/learning_rate_real": 2.4996923652144887e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2457.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2457.0,
      "epoch": 0.3698630136986301,
      "step": 27
    },
    {
      "epoch": 0.3698630136986301,
      "step": 27,
      "train/ce_loss": 11.125,
      "train/diffusion_loss": 0.4762253761291504
    },
    {
      "epoch": 0.3698630136986301,
      "step": 27,
      "train/learning_rate_real": 2.4996923652144887e-05
    },
    {
      "debug/num_lat_loss": 1754.0,
      "debug/num_lat_total": 2393.0,
      "debug/num_tok_loss": 1754.0,
      "debug/num_tok_total": 2393.0,
      "epoch": 0.3698630136986301,
      "step": 27
    },
    {
      "epoch": 0.3698630136986301,
      "step": 27,
      "train/ce_loss": 11.0,
      "train/diffusion_loss": 0.46639570593833923
    },
    {
      "epoch": 0.3698630136986301,
      "step": 27,
      "train/learning_rate_real": 2.4996923652144887e-05
    },
    {
      "debug/num_lat_loss": 1621.0,
      "debug/num_lat_total": 2277.0,
      "debug/num_tok_loss": 1621.0,
      "debug/num_tok_total": 2277.0,
      "epoch": 0.3698630136986301,
      "step": 27
    },
    {
      "epoch": 0.3698630136986301,
      "step": 27,
      "train/ce_loss": 11.0625,
      "train/diffusion_loss": 0.4759414494037628
    },
    {
      "epoch": 0.3698630136986301,
      "step": 27,
      "train/learning_rate_real": 2.4996923652144887e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2847.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2847.0,
      "epoch": 0.3835616438356164,
      "step": 28
    },
    {
      "epoch": 0.3835616438356164,
      "step": 28,
      "train/ce_loss": 11.0,
      "train/diffusion_loss": 0.40890657901763916
    },
    {
      "epoch": 0.3835616438356164,
      "step": 28,
      "train/learning_rate_real": 2.4995570139043158e-05
    },
    {
      "debug/num_lat_loss": 1735.0,
      "debug/num_lat_total": 2790.0,
      "debug/num_tok_loss": 1735.0,
      "debug/num_tok_total": 2790.0,
      "epoch": 0.3835616438356164,
      "step": 28
    },
    {
      "epoch": 0.3835616438356164,
      "step": 28,
      "train/ce_loss": 11.0,
      "train/diffusion_loss": 0.3836289644241333
    },
    {
      "epoch": 0.3835616438356164,
      "step": 28,
      "train/learning_rate_real": 2.4995570139043158e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 3062.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 3062.0,
      "epoch": 0.3835616438356164,
      "step": 28
    },
    {
      "epoch": 0.3835616438356164,
      "step": 28,
      "train/ce_loss": 11.0625,
      "train/diffusion_loss": 0.32098737359046936
    },
    {
      "epoch": 0.3835616438356164,
      "step": 28,
      "train/learning_rate_real": 2.4995570139043158e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 3061.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 3061.0,
      "epoch": 0.3835616438356164,
      "step": 28
    },
    {
      "epoch": 0.3835616438356164,
      "step": 28,
      "train/ce_loss": 11.0,
      "train/diffusion_loss": 0.35937121510505676
    },
    {
      "epoch": 0.3835616438356164,
      "step": 28,
      "train/learning_rate_real": 2.4995570139043158e-05
    },
    {
      "debug/num_lat_loss": 1797.0,
      "debug/num_lat_total": 2669.0,
      "debug/num_tok_loss": 1797.0,
      "debug/num_tok_total": 2669.0,
      "epoch": 0.3972602739726027,
      "step": 29
    },
    {
      "epoch": 0.3972602739726027,
      "step": 29,
      "train/ce_loss": 10.75,
      "train/diffusion_loss": 0.4344680905342102
    },
    {
      "epoch": 0.3972602739726027,
      "step": 29,
      "train/learning_rate_real": 2.499397059564265e-05
    },
    {
      "debug/num_lat_loss": 1756.0,
      "debug/num_lat_total": 3022.0,
      "debug/num_tok_loss": 1756.0,
      "debug/num_tok_total": 3022.0,
      "epoch": 0.3972602739726027,
      "step": 29
    },
    {
      "epoch": 0.3972602739726027,
      "step": 29,
      "train/ce_loss": 10.8125,
      "train/diffusion_loss": 0.34724363684654236
    },
    {
      "epoch": 0.3972602739726027,
      "step": 29,
      "train/learning_rate_real": 2.499397059564265e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2866.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2866.0,
      "epoch": 0.3972602739726027,
      "step": 29
    },
    {
      "epoch": 0.3972602739726027,
      "step": 29,
      "train/ce_loss": 10.75,
      "train/diffusion_loss": 0.39015740156173706
    },
    {
      "epoch": 0.3972602739726027,
      "step": 29,
      "train/learning_rate_real": 2.499397059564265e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2869.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2869.0,
      "epoch": 0.3972602739726027,
      "step": 29
    },
    {
      "epoch": 0.3972602739726027,
      "step": 29,
      "train/ce_loss": 10.75,
      "train/diffusion_loss": 0.37296244502067566
    },
    {
      "epoch": 0.3972602739726027,
      "step": 29,
      "train/learning_rate_real": 2.499397059564265e-05
    },
    {
      "epoch": 0.410958904109589,
      "grad_norm": 3.1832733154296875,
      "learning_rate": 2.499397059564265e-05,
      "loss": 4.2717,
      "step": 30
    },
    {
      "debug/num_lat_loss": 1707.0,
      "debug/num_lat_total": 2707.0,
      "debug/num_tok_loss": 1707.0,
      "debug/num_tok_total": 2707.0,
      "epoch": 0.410958904109589,
      "step": 30
    },
    {
      "epoch": 0.410958904109589,
      "step": 30,
      "train/ce_loss": 10.625,
      "train/diffusion_loss": 0.38847485184669495
    },
    {
      "epoch": 0.410958904109589,
      "step": 30,
      "train/learning_rate_real": 2.499212505343742e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 0.410958904109589,
      "step": 30
    },
    {
      "epoch": 0.410958904109589,
      "step": 30,
      "train/ce_loss": 10.5625,
      "train/diffusion_loss": 0.42114052176475525
    },
    {
      "epoch": 0.410958904109589,
      "step": 30,
      "train/learning_rate_real": 2.499212505343742e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2236.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2236.0,
      "epoch": 0.410958904109589,
      "step": 30
    },
    {
      "epoch": 0.410958904109589,
      "step": 30,
      "train/ce_loss": 10.375,
      "train/diffusion_loss": 0.5161031484603882
    },
    {
      "epoch": 0.410958904109589,
      "step": 30,
      "train/learning_rate_real": 2.499212505343742e-05
    },
    {
      "debug/num_lat_loss": 1548.0,
      "debug/num_lat_total": 2354.0,
      "debug/num_tok_loss": 1548.0,
      "debug/num_tok_total": 2354.0,
      "epoch": 0.410958904109589,
      "step": 30
    },
    {
      "epoch": 0.410958904109589,
      "step": 30,
      "train/ce_loss": 10.5625,
      "train/diffusion_loss": 0.404449462890625
    },
    {
      "epoch": 0.410958904109589,
      "step": 30,
      "train/learning_rate_real": 2.499212505343742e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2417.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2417.0,
      "epoch": 0.4246575342465753,
      "step": 31
    },
    {
      "epoch": 0.4246575342465753,
      "step": 31,
      "train/ce_loss": 10.25,
      "train/diffusion_loss": 0.48773378133773804
    },
    {
      "epoch": 0.4246575342465753,
      "step": 31,
      "train/learning_rate_real": 2.4990033548765084e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2422.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2422.0,
      "epoch": 0.4246575342465753,
      "step": 31
    },
    {
      "epoch": 0.4246575342465753,
      "step": 31,
      "train/ce_loss": 10.3125,
      "train/diffusion_loss": 0.4736463129520416
    },
    {
      "epoch": 0.4246575342465753,
      "step": 31,
      "train/learning_rate_real": 2.4990033548765084e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2889.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2889.0,
      "epoch": 0.4246575342465753,
      "step": 31
    },
    {
      "epoch": 0.4246575342465753,
      "step": 31,
      "train/ce_loss": 10.4375,
      "train/diffusion_loss": 0.347330778837204
    },
    {
      "epoch": 0.4246575342465753,
      "step": 31,
      "train/learning_rate_real": 2.4990033548765084e-05
    },
    {
      "debug/num_lat_loss": 1755.0,
      "debug/num_lat_total": 2391.0,
      "debug/num_tok_loss": 1755.0,
      "debug/num_tok_total": 2391.0,
      "epoch": 0.4246575342465753,
      "step": 31
    },
    {
      "epoch": 0.4246575342465753,
      "step": 31,
      "train/ce_loss": 10.25,
      "train/diffusion_loss": 0.48943084478378296
    },
    {
      "epoch": 0.4246575342465753,
      "step": 31,
      "train/learning_rate_real": 2.4990033548765084e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2192.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2192.0,
      "epoch": 0.4383561643835616,
      "step": 32
    },
    {
      "epoch": 0.4383561643835616,
      "step": 32,
      "train/ce_loss": 10.0,
      "train/diffusion_loss": 0.5378103852272034
    },
    {
      "epoch": 0.4383561643835616,
      "step": 32,
      "train/learning_rate_real": 2.4987696122806127e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2235.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2235.0,
      "epoch": 0.4383561643835616,
      "step": 32
    },
    {
      "epoch": 0.4383561643835616,
      "step": 32,
      "train/ce_loss": 10.0,
      "train/diffusion_loss": 0.546953022480011
    },
    {
      "epoch": 0.4383561643835616,
      "step": 32,
      "train/learning_rate_real": 2.4987696122806127e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 3086.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 3086.0,
      "epoch": 0.4383561643835616,
      "step": 32
    },
    {
      "epoch": 0.4383561643835616,
      "step": 32,
      "train/ce_loss": 10.25,
      "train/diffusion_loss": 0.3321462869644165
    },
    {
      "epoch": 0.4383561643835616,
      "step": 32,
      "train/learning_rate_real": 2.4987696122806127e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2842.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2842.0,
      "epoch": 0.4383561643835616,
      "step": 32
    },
    {
      "epoch": 0.4383561643835616,
      "step": 32,
      "train/ce_loss": 10.1875,
      "train/diffusion_loss": 0.39034295082092285
    },
    {
      "epoch": 0.4383561643835616,
      "step": 32,
      "train/learning_rate_real": 2.4987696122806127e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2399.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2399.0,
      "epoch": 0.4520547945205479,
      "step": 33
    },
    {
      "epoch": 0.4520547945205479,
      "step": 33,
      "train/ce_loss": 9.9375,
      "train/diffusion_loss": 0.4693682789802551
    },
    {
      "epoch": 0.4520547945205479,
      "step": 33,
      "train/learning_rate_real": 2.498511282158305e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2620.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2620.0,
      "epoch": 0.4520547945205479,
      "step": 33
    },
    {
      "epoch": 0.4520547945205479,
      "step": 33,
      "train/ce_loss": 9.9375,
      "train/diffusion_loss": 0.39866846799850464
    },
    {
      "epoch": 0.4520547945205479,
      "step": 33,
      "train/learning_rate_real": 2.498511282158305e-05
    },
    {
      "debug/num_lat_loss": 1599.0,
      "debug/num_lat_total": 2706.0,
      "debug/num_tok_loss": 1599.0,
      "debug/num_tok_total": 2706.0,
      "epoch": 0.4520547945205479,
      "step": 33
    },
    {
      "epoch": 0.4520547945205479,
      "step": 33,
      "train/ce_loss": 10.0,
      "train/diffusion_loss": 0.3576337397098541
    },
    {
      "epoch": 0.4520547945205479,
      "step": 33,
      "train/learning_rate_real": 2.498511282158305e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 1977.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 1977.0,
      "epoch": 0.4520547945205479,
      "step": 33
    },
    {
      "epoch": 0.4520547945205479,
      "step": 33,
      "train/ce_loss": 9.75,
      "train/diffusion_loss": 0.5810636878013611
    },
    {
      "epoch": 0.4520547945205479,
      "step": 33,
      "train/learning_rate_real": 2.498511282158305e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2402.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2402.0,
      "epoch": 0.4657534246575342,
      "step": 34
    },
    {
      "epoch": 0.4657534246575342,
      "step": 34,
      "train/ce_loss": 9.6875,
      "train/diffusion_loss": 0.4539051651954651
    },
    {
      "epoch": 0.4657534246575342,
      "step": 34,
      "train/learning_rate_real": 2.4982283695959525e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2834.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2834.0,
      "epoch": 0.4657534246575342,
      "step": 34
    },
    {
      "epoch": 0.4657534246575342,
      "step": 34,
      "train/ce_loss": 9.6875,
      "train/diffusion_loss": 0.3914901614189148
    },
    {
      "epoch": 0.4657534246575342,
      "step": 34,
      "train/learning_rate_real": 2.4982283695959525e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 3059.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 3059.0,
      "epoch": 0.4657534246575342,
      "step": 34
    },
    {
      "epoch": 0.4657534246575342,
      "step": 34,
      "train/ce_loss": 9.8125,
      "train/diffusion_loss": 0.3305813670158386
    },
    {
      "epoch": 0.4657534246575342,
      "step": 34,
      "train/learning_rate_real": 2.4982283695959525e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2895.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2895.0,
      "epoch": 0.4657534246575342,
      "step": 34
    },
    {
      "epoch": 0.4657534246575342,
      "step": 34,
      "train/ce_loss": 9.75,
      "train/diffusion_loss": 0.3716033399105072
    },
    {
      "epoch": 0.4657534246575342,
      "step": 34,
      "train/learning_rate_real": 2.4982283695959525e-05
    },
    {
      "debug/num_lat_loss": 1817.0,
      "debug/num_lat_total": 3350.0,
      "debug/num_tok_loss": 1817.0,
      "debug/num_tok_total": 3350.0,
      "epoch": 0.4794520547945205,
      "step": 35
    },
    {
      "epoch": 0.4794520547945205,
      "step": 35,
      "train/ce_loss": 9.5625,
      "train/diffusion_loss": 0.27160805463790894
    },
    {
      "epoch": 0.4794520547945205,
      "step": 35,
      "train/learning_rate_real": 2.4979208801639335e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2438.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2438.0,
      "epoch": 0.4794520547945205,
      "step": 35
    },
    {
      "epoch": 0.4794520547945205,
      "step": 35,
      "train/ce_loss": 9.4375,
      "train/diffusion_loss": 0.48116302490234375
    },
    {
      "epoch": 0.4794520547945205,
      "step": 35,
      "train/learning_rate_real": 2.4979208801639335e-05
    },
    {
      "debug/num_lat_loss": 1652.0,
      "debug/num_lat_total": 2077.0,
      "debug/num_tok_loss": 1652.0,
      "debug/num_tok_total": 2077.0,
      "epoch": 0.4794520547945205,
      "step": 35
    },
    {
      "epoch": 0.4794520547945205,
      "step": 35,
      "train/ce_loss": 9.375,
      "train/diffusion_loss": 0.5048086643218994
    },
    {
      "epoch": 0.4794520547945205,
      "step": 35,
      "train/learning_rate_real": 2.4979208801639335e-05
    },
    {
      "debug/num_lat_loss": 1626.0,
      "debug/num_lat_total": 2331.0,
      "debug/num_tok_loss": 1626.0,
      "debug/num_tok_total": 2331.0,
      "epoch": 0.4794520547945205,
      "step": 35
    },
    {
      "epoch": 0.4794520547945205,
      "step": 35,
      "train/ce_loss": 9.5,
      "train/diffusion_loss": 0.43859153985977173
    },
    {
      "epoch": 0.4794520547945205,
      "step": 35,
      "train/learning_rate_real": 2.4979208801639335e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2455.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2455.0,
      "epoch": 0.4931506849315068,
      "step": 36
    },
    {
      "epoch": 0.4931506849315068,
      "step": 36,
      "train/ce_loss": 9.25,
      "train/diffusion_loss": 0.4600470960140228
    },
    {
      "epoch": 0.4931506849315068,
      "step": 36,
      "train/learning_rate_real": 2.497588819916531e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2444.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2444.0,
      "epoch": 0.4931506849315068,
      "step": 36
    },
    {
      "epoch": 0.4931506849315068,
      "step": 36,
      "train/ce_loss": 9.125,
      "train/diffusion_loss": 0.5146046876907349
    },
    {
      "epoch": 0.4931506849315068,
      "step": 36,
      "train/learning_rate_real": 2.497588819916531e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2006.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2006.0,
      "epoch": 0.4931506849315068,
      "step": 36
    },
    {
      "epoch": 0.4931506849315068,
      "step": 36,
      "train/ce_loss": 9.0,
      "train/diffusion_loss": 0.5873913168907166
    },
    {
      "epoch": 0.4931506849315068,
      "step": 36,
      "train/learning_rate_real": 2.497588819916531e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2867.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2867.0,
      "epoch": 0.4931506849315068,
      "step": 36
    },
    {
      "epoch": 0.4931506849315068,
      "step": 36,
      "train/ce_loss": 9.25,
      "train/diffusion_loss": 0.36510205268859863
    },
    {
      "epoch": 0.4931506849315068,
      "step": 36,
      "train/learning_rate_real": 2.497588819916531e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 2899.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 2899.0,
      "epoch": 0.5068493150684932,
      "step": 37
    },
    {
      "epoch": 0.5068493150684932,
      "step": 37,
      "train/ce_loss": 9.0,
      "train/diffusion_loss": 0.3962245285511017
    },
    {
      "epoch": 0.5068493150684932,
      "step": 37,
      "train/learning_rate_real": 2.4972321953918126e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2637.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2637.0,
      "epoch": 0.5068493150684932,
      "step": 37
    },
    {
      "epoch": 0.5068493150684932,
      "step": 37,
      "train/ce_loss": 8.9375,
      "train/diffusion_loss": 0.41625067591667175
    },
    {
      "epoch": 0.5068493150684932,
      "step": 37,
      "train/learning_rate_real": 2.4972321953918126e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2415.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2415.0,
      "epoch": 0.5068493150684932,
      "step": 37
    },
    {
      "epoch": 0.5068493150684932,
      "step": 37,
      "train/ce_loss": 8.875,
      "train/diffusion_loss": 0.45504698157310486
    },
    {
      "epoch": 0.5068493150684932,
      "step": 37,
      "train/learning_rate_real": 2.4972321953918126e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2875.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2875.0,
      "epoch": 0.5068493150684932,
      "step": 37
    },
    {
      "epoch": 0.5068493150684932,
      "step": 37,
      "train/ce_loss": 8.9375,
      "train/diffusion_loss": 0.3914295434951782
    },
    {
      "epoch": 0.5068493150684932,
      "step": 37,
      "train/learning_rate_real": 2.4972321953918126e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2664.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2664.0,
      "epoch": 0.5205479452054794,
      "step": 38
    },
    {
      "epoch": 0.5205479452054794,
      "step": 38,
      "train/ce_loss": 8.625,
      "train/diffusion_loss": 0.4318960905075073
    },
    {
      "epoch": 0.5205479452054794,
      "step": 38,
      "train/learning_rate_real": 2.496851013611502e-05
    },
    {
      "debug/num_lat_loss": 1677.0,
      "debug/num_lat_total": 2958.0,
      "debug/num_tok_loss": 1677.0,
      "debug/num_tok_total": 2958.0,
      "epoch": 0.5205479452054794,
      "step": 38
    },
    {
      "epoch": 0.5205479452054794,
      "step": 38,
      "train/ce_loss": 8.8125,
      "train/diffusion_loss": 0.31698915362358093
    },
    {
      "epoch": 0.5205479452054794,
      "step": 38,
      "train/learning_rate_real": 2.496851013611502e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2453.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2453.0,
      "epoch": 0.5205479452054794,
      "step": 38
    },
    {
      "epoch": 0.5205479452054794,
      "step": 38,
      "train/ce_loss": 8.5625,
      "train/diffusion_loss": 0.481392502784729
    },
    {
      "epoch": 0.5205479452054794,
      "step": 38,
      "train/learning_rate_real": 2.496851013611502e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2440.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2440.0,
      "epoch": 0.5205479452054794,
      "step": 38
    },
    {
      "epoch": 0.5205479452054794,
      "step": 38,
      "train/ce_loss": 8.6875,
      "train/diffusion_loss": 0.46959739923477173
    },
    {
      "epoch": 0.5205479452054794,
      "step": 38,
      "train/learning_rate_real": 2.496851013611502e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 3080.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 3080.0,
      "epoch": 0.5342465753424658,
      "step": 39
    },
    {
      "epoch": 0.5342465753424658,
      "step": 39,
      "train/ce_loss": 8.5,
      "train/diffusion_loss": 0.35770803689956665
    },
    {
      "epoch": 0.5342465753424658,
      "step": 39,
      "train/learning_rate_real": 2.4964452820808397e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2442.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2442.0,
      "epoch": 0.5342465753424658,
      "step": 39
    },
    {
      "epoch": 0.5342465753424658,
      "step": 39,
      "train/ce_loss": 8.3125,
      "train/diffusion_loss": 0.4941246509552002
    },
    {
      "epoch": 0.5342465753424658,
      "step": 39,
      "train/learning_rate_real": 2.4964452820808397e-05
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 2457.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 2457.0,
      "epoch": 0.5342465753424658,
      "step": 39
    },
    {
      "epoch": 0.5342465753424658,
      "step": 39,
      "train/ce_loss": 8.375,
      "train/diffusion_loss": 0.4619571268558502
    },
    {
      "epoch": 0.5342465753424658,
      "step": 39,
      "train/learning_rate_real": 2.4964452820808397e-05
    },
    {
      "debug/num_lat_loss": 1661.0,
      "debug/num_lat_total": 2303.0,
      "debug/num_tok_loss": 1661.0,
      "debug/num_tok_total": 2303.0,
      "epoch": 0.5342465753424658,
      "step": 39
    },
    {
      "epoch": 0.5342465753424658,
      "step": 39,
      "train/ce_loss": 8.375,
      "train/diffusion_loss": 0.4603547155857086
    },
    {
      "epoch": 0.5342465753424658,
      "step": 39,
      "train/learning_rate_real": 2.4964452820808397e-05
    },
    {
      "epoch": 0.547945205479452,
      "grad_norm": 2.600710391998291,
      "learning_rate": 2.4964452820808397e-05,
      "loss": 3.9661,
      "step": 40
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2843.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2843.0,
      "epoch": 0.547945205479452,
      "step": 40
    },
    {
      "epoch": 0.547945205479452,
      "step": 40,
      "train/ce_loss": 8.125,
      "train/diffusion_loss": 0.37036633491516113
    },
    {
      "epoch": 0.547945205479452,
      "step": 40,
      "train/learning_rate_real": 2.4960150087884376e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2626.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2626.0,
      "epoch": 0.547945205479452,
      "step": 40
    },
    {
      "epoch": 0.547945205479452,
      "step": 40,
      "train/ce_loss": 8.125,
      "train/diffusion_loss": 0.4317276179790497
    },
    {
      "epoch": 0.547945205479452,
      "step": 40,
      "train/learning_rate_real": 2.4960150087884376e-05
    },
    {
      "debug/num_lat_loss": 1606.0,
      "debug/num_lat_total": 2725.0,
      "debug/num_tok_loss": 1606.0,
      "debug/num_tok_total": 2725.0,
      "epoch": 0.547945205479452,
      "step": 40
    },
    {
      "epoch": 0.547945205479452,
      "step": 40,
      "train/ce_loss": 8.1875,
      "train/diffusion_loss": 0.33192673325538635
    },
    {
      "epoch": 0.547945205479452,
      "step": 40,
      "train/learning_rate_real": 2.4960150087884376e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2447.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2447.0,
      "epoch": 0.547945205479452,
      "step": 40
    },
    {
      "epoch": 0.547945205479452,
      "step": 40,
      "train/ce_loss": 8.1875,
      "train/diffusion_loss": 0.4681604206562042
    },
    {
      "epoch": 0.547945205479452,
      "step": 40,
      "train/learning_rate_real": 2.4960150087884376e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2449.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2449.0,
      "epoch": 0.5616438356164384,
      "step": 41
    },
    {
      "epoch": 0.5616438356164384,
      "step": 41,
      "train/ce_loss": 7.8125,
      "train/diffusion_loss": 0.450018048286438
    },
    {
      "epoch": 0.5616438356164384,
      "step": 41,
      "train/learning_rate_real": 2.495560202206119e-05
    },
    {
      "debug/num_lat_loss": 1818.0,
      "debug/num_lat_total": 2474.0,
      "debug/num_tok_loss": 1818.0,
      "debug/num_tok_total": 2474.0,
      "epoch": 0.5616438356164384,
      "step": 41
    },
    {
      "epoch": 0.5616438356164384,
      "step": 41,
      "train/ce_loss": 7.84375,
      "train/diffusion_loss": 0.4612468481063843
    },
    {
      "epoch": 0.5616438356164384,
      "step": 41,
      "train/learning_rate_real": 2.495560202206119e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2614.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2614.0,
      "epoch": 0.5616438356164384,
      "step": 41
    },
    {
      "epoch": 0.5616438356164384,
      "step": 41,
      "train/ce_loss": 7.90625,
      "train/diffusion_loss": 0.4192771017551422
    },
    {
      "epoch": 0.5616438356164384,
      "step": 41,
      "train/learning_rate_real": 2.495560202206119e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2437.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2437.0,
      "epoch": 0.5616438356164384,
      "step": 41
    },
    {
      "epoch": 0.5616438356164384,
      "step": 41,
      "train/ce_loss": 7.8125,
      "train/diffusion_loss": 0.47766613960266113
    },
    {
      "epoch": 0.5616438356164384,
      "step": 41,
      "train/learning_rate_real": 2.495560202206119e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2647.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2647.0,
      "epoch": 0.5753424657534246,
      "step": 42
    },
    {
      "epoch": 0.5753424657534246,
      "step": 42,
      "train/ce_loss": 7.53125,
      "train/diffusion_loss": 0.4023301303386688
    },
    {
      "epoch": 0.5753424657534246,
      "step": 42,
      "train/learning_rate_real": 2.4950808712887533e-05
    },
    {
      "debug/num_lat_loss": 1758.0,
      "debug/num_lat_total": 3016.0,
      "debug/num_tok_loss": 1758.0,
      "debug/num_tok_total": 3016.0,
      "epoch": 0.5753424657534246,
      "step": 42
    },
    {
      "epoch": 0.5753424657534246,
      "step": 42,
      "train/ce_loss": 7.59375,
      "train/diffusion_loss": 0.360873818397522
    },
    {
      "epoch": 0.5753424657534246,
      "step": 42,
      "train/learning_rate_real": 2.4950808712887533e-05
    },
    {
      "debug/num_lat_loss": 1818.0,
      "debug/num_lat_total": 2704.0,
      "debug/num_tok_loss": 1818.0,
      "debug/num_tok_total": 2704.0,
      "epoch": 0.5753424657534246,
      "step": 42
    },
    {
      "epoch": 0.5753424657534246,
      "step": 42,
      "train/ce_loss": 7.53125,
      "train/diffusion_loss": 0.42654749751091003
    },
    {
      "epoch": 0.5753424657534246,
      "step": 42,
      "train/learning_rate_real": 2.4950808712887533e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2207.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2207.0,
      "epoch": 0.5753424657534246,
      "step": 42
    },
    {
      "epoch": 0.5753424657534246,
      "step": 42,
      "train/ce_loss": 7.71875,
      "train/diffusion_loss": 0.5486947894096375
    },
    {
      "epoch": 0.5753424657534246,
      "step": 42,
      "train/learning_rate_real": 2.4950808712887533e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2862.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2862.0,
      "epoch": 0.589041095890411,
      "step": 43
    },
    {
      "epoch": 0.589041095890411,
      "step": 43,
      "train/ce_loss": 7.65625,
      "train/diffusion_loss": 0.3878563344478607
    },
    {
      "epoch": 0.589041095890411,
      "step": 43,
      "train/learning_rate_real": 2.4945770254740794e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2436.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2436.0,
      "epoch": 0.589041095890411,
      "step": 43
    },
    {
      "epoch": 0.589041095890411,
      "step": 43,
      "train/ce_loss": 7.34375,
      "train/diffusion_loss": 0.4670861065387726
    },
    {
      "epoch": 0.589041095890411,
      "step": 43,
      "train/learning_rate_real": 2.4945770254740794e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2687.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2687.0,
      "epoch": 0.589041095890411,
      "step": 43
    },
    {
      "epoch": 0.589041095890411,
      "step": 43,
      "train/ce_loss": 7.3125,
      "train/diffusion_loss": 0.4248601496219635
    },
    {
      "epoch": 0.589041095890411,
      "step": 43,
      "train/learning_rate_real": 2.4945770254740794e-05
    },
    {
      "debug/num_lat_loss": 1754.0,
      "debug/num_lat_total": 2381.0,
      "debug/num_tok_loss": 1754.0,
      "debug/num_tok_total": 2381.0,
      "epoch": 0.589041095890411,
      "step": 43
    },
    {
      "epoch": 0.589041095890411,
      "step": 43,
      "train/ce_loss": 7.28125,
      "train/diffusion_loss": 0.49052563309669495
    },
    {
      "epoch": 0.589041095890411,
      "step": 43,
      "train/learning_rate_real": 2.4945770254740794e-05
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 2239.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 2239.0,
      "epoch": 0.6027397260273972,
      "step": 44
    },
    {
      "epoch": 0.6027397260273972,
      "step": 44,
      "train/ce_loss": 7.03125,
      "train/diffusion_loss": 0.5206171870231628
    },
    {
      "epoch": 0.6027397260273972,
      "step": 44,
      "train/learning_rate_real": 2.4940486746825197e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 3057.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 3057.0,
      "epoch": 0.6027397260273972,
      "step": 44
    },
    {
      "epoch": 0.6027397260273972,
      "step": 44,
      "train/ce_loss": 7.21875,
      "train/diffusion_loss": 0.3397883474826813
    },
    {
      "epoch": 0.6027397260273972,
      "step": 44,
      "train/learning_rate_real": 2.4940486746825197e-05
    },
    {
      "debug/num_lat_loss": 1652.0,
      "debug/num_lat_total": 2164.0,
      "debug/num_tok_loss": 1652.0,
      "debug/num_tok_total": 2164.0,
      "epoch": 0.6027397260273972,
      "step": 44
    },
    {
      "epoch": 0.6027397260273972,
      "step": 44,
      "train/ce_loss": 7.09375,
      "train/diffusion_loss": 0.4852142930030823
    },
    {
      "epoch": 0.6027397260273972,
      "step": 44,
      "train/learning_rate_real": 2.4940486746825197e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 3095.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 3095.0,
      "epoch": 0.6027397260273972,
      "step": 44
    },
    {
      "epoch": 0.6027397260273972,
      "step": 44,
      "train/ce_loss": 7.15625,
      "train/diffusion_loss": 0.33311718702316284
    },
    {
      "epoch": 0.6027397260273972,
      "step": 44,
      "train/learning_rate_real": 2.4940486746825197e-05
    },
    {
      "debug/num_lat_loss": 1700.0,
      "debug/num_lat_total": 2680.0,
      "debug/num_tok_loss": 1700.0,
      "debug/num_tok_total": 2680.0,
      "epoch": 0.6164383561643836,
      "step": 45
    },
    {
      "epoch": 0.6164383561643836,
      "step": 45,
      "train/ce_loss": 7.03125,
      "train/diffusion_loss": 0.3979421555995941
    },
    {
      "epoch": 0.6164383561643836,
      "step": 45,
      "train/learning_rate_real": 2.493495829316986e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2441.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2441.0,
      "epoch": 0.6164383561643836,
      "step": 45
    },
    {
      "epoch": 0.6164383561643836,
      "step": 45,
      "train/ce_loss": 6.90625,
      "train/diffusion_loss": 0.47810766100883484
    },
    {
      "epoch": 0.6164383561643836,
      "step": 45,
      "train/learning_rate_real": 2.493495829316986e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2632.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2632.0,
      "epoch": 0.6164383561643836,
      "step": 45
    },
    {
      "epoch": 0.6164383561643836,
      "step": 45,
      "train/ce_loss": 6.90625,
      "train/diffusion_loss": 0.4057077169418335
    },
    {
      "epoch": 0.6164383561643836,
      "step": 45,
      "train/learning_rate_real": 2.493495829316986e-05
    },
    {
      "debug/num_lat_loss": 1716.0,
      "debug/num_lat_total": 2506.0,
      "debug/num_tok_loss": 1716.0,
      "debug/num_tok_total": 2506.0,
      "epoch": 0.6164383561643836,
      "step": 45
    },
    {
      "epoch": 0.6164383561643836,
      "step": 45,
      "train/ce_loss": 7.03125,
      "train/diffusion_loss": 0.4449847936630249
    },
    {
      "epoch": 0.6164383561643836,
      "step": 45,
      "train/learning_rate_real": 2.493495829316986e-05
    },
    {
      "debug/num_lat_loss": 1629.0,
      "debug/num_lat_total": 2764.0,
      "debug/num_tok_loss": 1629.0,
      "debug/num_tok_total": 2764.0,
      "epoch": 0.6301369863013698,
      "step": 46
    },
    {
      "epoch": 0.6301369863013698,
      "step": 46,
      "train/ce_loss": 6.84375,
      "train/diffusion_loss": 0.3362199664115906
    },
    {
      "epoch": 0.6301369863013698,
      "step": 46,
      "train/learning_rate_real": 2.4929185002626714e-05
    },
    {
      "debug/num_lat_loss": 1749.0,
      "debug/num_lat_total": 2588.0,
      "debug/num_tok_loss": 1749.0,
      "debug/num_tok_total": 2588.0,
      "epoch": 0.6301369863013698,
      "step": 46
    },
    {
      "epoch": 0.6301369863013698,
      "step": 46,
      "train/ce_loss": 6.78125,
      "train/diffusion_loss": 0.447072297334671
    },
    {
      "epoch": 0.6301369863013698,
      "step": 46,
      "train/learning_rate_real": 2.4929185002626714e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2666.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2666.0,
      "epoch": 0.6301369863013698,
      "step": 46
    },
    {
      "epoch": 0.6301369863013698,
      "step": 46,
      "train/ce_loss": 6.71875,
      "train/diffusion_loss": 0.4139653742313385
    },
    {
      "epoch": 0.6301369863013698,
      "step": 46,
      "train/learning_rate_real": 2.4929185002626714e-05
    },
    {
      "debug/num_lat_loss": 1716.0,
      "debug/num_lat_total": 2716.0,
      "debug/num_tok_loss": 1716.0,
      "debug/num_tok_total": 2716.0,
      "epoch": 0.6301369863013698,
      "step": 46
    },
    {
      "epoch": 0.6301369863013698,
      "step": 46,
      "train/ce_loss": 6.875,
      "train/diffusion_loss": 0.3880141079425812
    },
    {
      "epoch": 0.6301369863013698,
      "step": 46,
      "train/learning_rate_real": 2.4929185002626714e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2618.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2618.0,
      "epoch": 0.6438356164383562,
      "step": 47
    },
    {
      "epoch": 0.6438356164383562,
      "step": 47,
      "train/ce_loss": 6.5,
      "train/diffusion_loss": 0.4298379421234131
    },
    {
      "epoch": 0.6438356164383562,
      "step": 47,
      "train/learning_rate_real": 2.4923166988868407e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 3106.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 3106.0,
      "epoch": 0.6438356164383562,
      "step": 47
    },
    {
      "epoch": 0.6438356164383562,
      "step": 47,
      "train/ce_loss": 6.59375,
      "train/diffusion_loss": 0.3285239040851593
    },
    {
      "epoch": 0.6438356164383562,
      "step": 47,
      "train/learning_rate_real": 2.4923166988868407e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2233.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2233.0,
      "epoch": 0.6438356164383562,
      "step": 47
    },
    {
      "epoch": 0.6438356164383562,
      "step": 47,
      "train/ce_loss": 6.5625,
      "train/diffusion_loss": 0.5352428555488586
    },
    {
      "epoch": 0.6438356164383562,
      "step": 47,
      "train/learning_rate_real": 2.4923166988868407e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2406.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2406.0,
      "epoch": 0.6438356164383562,
      "step": 47
    },
    {
      "epoch": 0.6438356164383562,
      "step": 47,
      "train/ce_loss": 6.84375,
      "train/diffusion_loss": 0.45154592394828796
    },
    {
      "epoch": 0.6438356164383562,
      "step": 47,
      "train/learning_rate_real": 2.4923166988868407e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2466.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2466.0,
      "epoch": 0.6575342465753424,
      "step": 48
    },
    {
      "epoch": 0.6575342465753424,
      "step": 48,
      "train/ce_loss": 6.4375,
      "train/diffusion_loss": 0.4415627121925354
    },
    {
      "epoch": 0.6575342465753424,
      "step": 48,
      "train/learning_rate_real": 2.491690437038602e-05
    },
    {
      "debug/num_lat_loss": 1604.0,
      "debug/num_lat_total": 3149.0,
      "debug/num_tok_loss": 1604.0,
      "debug/num_tok_total": 3149.0,
      "epoch": 0.6575342465753424,
      "step": 48
    },
    {
      "epoch": 0.6575342465753424,
      "step": 48,
      "train/ce_loss": 6.5625,
      "train/diffusion_loss": 0.24725854396820068
    },
    {
      "epoch": 0.6575342465753424,
      "step": 48,
      "train/learning_rate_real": 2.491690437038602e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2419.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2419.0,
      "epoch": 0.6575342465753424,
      "step": 48
    },
    {
      "epoch": 0.6575342465753424,
      "step": 48,
      "train/ce_loss": 6.40625,
      "train/diffusion_loss": 0.4734286665916443
    },
    {
      "epoch": 0.6575342465753424,
      "step": 48,
      "train/learning_rate_real": 2.491690437038602e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 3086.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 3086.0,
      "epoch": 0.6575342465753424,
      "step": 48
    },
    {
      "epoch": 0.6575342465753424,
      "step": 48,
      "train/ce_loss": 6.46875,
      "train/diffusion_loss": 0.32624179124832153
    },
    {
      "epoch": 0.6575342465753424,
      "step": 48,
      "train/learning_rate_real": 2.491690437038602e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2873.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2873.0,
      "epoch": 0.6712328767123288,
      "step": 49
    },
    {
      "epoch": 0.6712328767123288,
      "step": 49,
      "train/ce_loss": 6.25,
      "train/diffusion_loss": 0.38346582651138306
    },
    {
      "epoch": 0.6712328767123288,
      "step": 49,
      "train/learning_rate_real": 2.491039727048677e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2609.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2609.0,
      "epoch": 0.6712328767123288,
      "step": 49
    },
    {
      "epoch": 0.6712328767123288,
      "step": 49,
      "train/ce_loss": 6.25,
      "train/diffusion_loss": 0.4216737449169159
    },
    {
      "epoch": 0.6712328767123288,
      "step": 49,
      "train/learning_rate_real": 2.491039727048677e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2620.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2620.0,
      "epoch": 0.6712328767123288,
      "step": 49
    },
    {
      "epoch": 0.6712328767123288,
      "step": 49,
      "train/ce_loss": 6.25,
      "train/diffusion_loss": 0.4077589511871338
    },
    {
      "epoch": 0.6712328767123288,
      "step": 49,
      "train/learning_rate_real": 2.491039727048677e-05
    },
    {
      "debug/num_lat_loss": 1635.0,
      "debug/num_lat_total": 2331.0,
      "debug/num_tok_loss": 1635.0,
      "debug/num_tok_total": 2331.0,
      "epoch": 0.6712328767123288,
      "step": 49
    },
    {
      "epoch": 0.6712328767123288,
      "step": 49,
      "train/ce_loss": 6.28125,
      "train/diffusion_loss": 0.43508994579315186
    },
    {
      "epoch": 0.6712328767123288,
      "step": 49,
      "train/learning_rate_real": 2.491039727048677e-05
    },
    {
      "epoch": 0.684931506849315,
      "grad_norm": 2.2437052726745605,
      "learning_rate": 2.491039727048677e-05,
      "loss": 3.4905,
      "step": 50
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 3297.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 3297.0,
      "epoch": 0.684931506849315,
      "step": 50
    },
    {
      "epoch": 0.684931506849315,
      "step": 50,
      "train/ce_loss": 6.15625,
      "train/diffusion_loss": 0.3150237500667572
    },
    {
      "epoch": 0.684931506849315,
      "step": 50,
      "train/learning_rate_real": 2.490364581729156e-05
    },
    {
      "debug/num_lat_loss": 1631.0,
      "debug/num_lat_total": 2550.0,
      "debug/num_tok_loss": 1631.0,
      "debug/num_tok_total": 2550.0,
      "epoch": 0.684931506849315,
      "step": 50
    },
    {
      "epoch": 0.684931506849315,
      "step": 50,
      "train/ce_loss": 6.21875,
      "train/diffusion_loss": 0.39398685097694397
    },
    {
      "epoch": 0.684931506849315,
      "step": 50,
      "train/learning_rate_real": 2.490364581729156e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2623.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2623.0,
      "epoch": 0.684931506849315,
      "step": 50
    },
    {
      "epoch": 0.684931506849315,
      "step": 50,
      "train/ce_loss": 6.03125,
      "train/diffusion_loss": 0.41709935665130615
    },
    {
      "epoch": 0.684931506849315,
      "step": 50,
      "train/learning_rate_real": 2.490364581729156e-05
    },
    {
      "debug/num_lat_loss": 1681.0,
      "debug/num_lat_total": 2113.0,
      "debug/num_tok_loss": 1681.0,
      "debug/num_tok_total": 2113.0,
      "epoch": 0.684931506849315,
      "step": 50
    },
    {
      "epoch": 0.684931506849315,
      "step": 50,
      "train/ce_loss": 6.03125,
      "train/diffusion_loss": 0.5213543772697449
    },
    {
      "epoch": 0.684931506849315,
      "step": 50,
      "train/learning_rate_real": 2.490364581729156e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2661.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2661.0,
      "epoch": 0.6986301369863014,
      "step": 51
    },
    {
      "epoch": 0.6986301369863014,
      "step": 51,
      "train/ce_loss": 5.9375,
      "train/diffusion_loss": 0.4208451509475708
    },
    {
      "epoch": 0.6986301369863014,
      "step": 51,
      "train/learning_rate_real": 2.4896650143732457e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2217.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2217.0,
      "epoch": 0.6986301369863014,
      "step": 51
    },
    {
      "epoch": 0.6986301369863014,
      "step": 51,
      "train/ce_loss": 6.15625,
      "train/diffusion_loss": 0.5214932560920715
    },
    {
      "epoch": 0.6986301369863014,
      "step": 51,
      "train/learning_rate_real": 2.4896650143732457e-05
    },
    {
      "debug/num_lat_loss": 1747.0,
      "debug/num_lat_total": 2379.0,
      "debug/num_tok_loss": 1747.0,
      "debug/num_tok_total": 2379.0,
      "epoch": 0.6986301369863014,
      "step": 51
    },
    {
      "epoch": 0.6986301369863014,
      "step": 51,
      "train/ce_loss": 6.0,
      "train/diffusion_loss": 0.4774201214313507
    },
    {
      "epoch": 0.6986301369863014,
      "step": 51,
      "train/learning_rate_real": 2.4896650143732457e-05
    },
    {
      "debug/num_lat_loss": 1621.0,
      "debug/num_lat_total": 2274.0,
      "debug/num_tok_loss": 1621.0,
      "debug/num_tok_total": 2274.0,
      "epoch": 0.6986301369863014,
      "step": 51
    },
    {
      "epoch": 0.6986301369863014,
      "step": 51,
      "train/ce_loss": 6.09375,
      "train/diffusion_loss": 0.44967830181121826
    },
    {
      "epoch": 0.6986301369863014,
      "step": 51,
      "train/learning_rate_real": 2.4896650143732457e-05
    },
    {
      "debug/num_lat_loss": 1820.0,
      "debug/num_lat_total": 2915.0,
      "debug/num_tok_loss": 1820.0,
      "debug/num_tok_total": 2915.0,
      "epoch": 0.7123287671232876,
      "step": 52
    },
    {
      "epoch": 0.7123287671232876,
      "step": 52,
      "train/ce_loss": 5.8125,
      "train/diffusion_loss": 0.35934463143348694
    },
    {
      "epoch": 0.7123287671232876,
      "step": 52,
      "train/learning_rate_real": 2.4889410387550093e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2635.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2635.0,
      "epoch": 0.7123287671232876,
      "step": 52
    },
    {
      "epoch": 0.7123287671232876,
      "step": 52,
      "train/ce_loss": 5.75,
      "train/diffusion_loss": 0.4354902505874634
    },
    {
      "epoch": 0.7123287671232876,
      "step": 52,
      "train/learning_rate_real": 2.4889410387550093e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2865.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2865.0,
      "epoch": 0.7123287671232876,
      "step": 52
    },
    {
      "epoch": 0.7123287671232876,
      "step": 52,
      "train/ce_loss": 5.78125,
      "train/diffusion_loss": 0.3834609389305115
    },
    {
      "epoch": 0.7123287671232876,
      "step": 52,
      "train/learning_rate_real": 2.4889410387550093e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2840.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2840.0,
      "epoch": 0.7123287671232876,
      "step": 52
    },
    {
      "epoch": 0.7123287671232876,
      "step": 52,
      "train/ce_loss": 5.90625,
      "train/diffusion_loss": 0.3739621043205261
    },
    {
      "epoch": 0.7123287671232876,
      "step": 52,
      "train/learning_rate_real": 2.4889410387550093e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2414.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2414.0,
      "epoch": 0.726027397260274,
      "step": 53
    },
    {
      "epoch": 0.726027397260274,
      "step": 53,
      "train/ce_loss": 5.96875,
      "train/diffusion_loss": 0.4601896107196808
    },
    {
      "epoch": 0.726027397260274,
      "step": 53,
      "train/learning_rate_real": 2.488192669129093e-05
    },
    {
      "debug/num_lat_loss": 1807.0,
      "debug/num_lat_total": 2902.0,
      "debug/num_tok_loss": 1807.0,
      "debug/num_tok_total": 2902.0,
      "epoch": 0.726027397260274,
      "step": 53
    },
    {
      "epoch": 0.726027397260274,
      "step": 53,
      "train/ce_loss": 6.0,
      "train/diffusion_loss": 0.3668011426925659
    },
    {
      "epoch": 0.726027397260274,
      "step": 53,
      "train/learning_rate_real": 2.488192669129093e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 3089.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 3089.0,
      "epoch": 0.726027397260274,
      "step": 53
    },
    {
      "epoch": 0.726027397260274,
      "step": 53,
      "train/ce_loss": 5.78125,
      "train/diffusion_loss": 0.3401413857936859
    },
    {
      "epoch": 0.726027397260274,
      "step": 53,
      "train/learning_rate_real": 2.488192669129093e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 3095.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 3095.0,
      "epoch": 0.726027397260274,
      "step": 53
    },
    {
      "epoch": 0.726027397260274,
      "step": 53,
      "train/ce_loss": 6.03125,
      "train/diffusion_loss": 0.33927619457244873
    },
    {
      "epoch": 0.726027397260274,
      "step": 53,
      "train/learning_rate_real": 2.488192669129093e-05
    },
    {
      "debug/num_lat_loss": 1756.0,
      "debug/num_lat_total": 2812.0,
      "debug/num_tok_loss": 1756.0,
      "debug/num_tok_total": 2812.0,
      "epoch": 0.7397260273972602,
      "step": 54
    },
    {
      "epoch": 0.7397260273972602,
      "step": 54,
      "train/ce_loss": 5.90625,
      "train/diffusion_loss": 0.39091378450393677
    },
    {
      "epoch": 0.7397260273972602,
      "step": 54,
      "train/learning_rate_real": 2.4874199202304475e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2416.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2416.0,
      "epoch": 0.7397260273972602,
      "step": 54
    },
    {
      "epoch": 0.7397260273972602,
      "step": 54,
      "train/ce_loss": 5.90625,
      "train/diffusion_loss": 0.4684540927410126
    },
    {
      "epoch": 0.7397260273972602,
      "step": 54,
      "train/learning_rate_real": 2.4874199202304475e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2662.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2662.0,
      "epoch": 0.7397260273972602,
      "step": 54
    },
    {
      "epoch": 0.7397260273972602,
      "step": 54,
      "train/ce_loss": 5.875,
      "train/diffusion_loss": 0.43764838576316833
    },
    {
      "epoch": 0.7397260273972602,
      "step": 54,
      "train/learning_rate_real": 2.4874199202304475e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2662.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2662.0,
      "epoch": 0.7397260273972602,
      "step": 54
    },
    {
      "epoch": 0.7397260273972602,
      "step": 54,
      "train/ce_loss": 5.9375,
      "train/diffusion_loss": 0.4463954269886017
    },
    {
      "epoch": 0.7397260273972602,
      "step": 54,
      "train/learning_rate_real": 2.4874199202304475e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2869.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2869.0,
      "epoch": 0.7534246575342466,
      "step": 55
    },
    {
      "epoch": 0.7534246575342466,
      "step": 55,
      "train/ce_loss": 5.65625,
      "train/diffusion_loss": 0.3489157557487488
    },
    {
      "epoch": 0.7534246575342466,
      "step": 55,
      "train/learning_rate_real": 2.486622807274036e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2442.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2442.0,
      "epoch": 0.7534246575342466,
      "step": 55
    },
    {
      "epoch": 0.7534246575342466,
      "step": 55,
      "train/ce_loss": 5.9375,
      "train/diffusion_loss": 0.4745232164859772
    },
    {
      "epoch": 0.7534246575342466,
      "step": 55,
      "train/learning_rate_real": 2.486622807274036e-05
    },
    {
      "debug/num_lat_loss": 1695.0,
      "debug/num_lat_total": 2697.0,
      "debug/num_tok_loss": 1695.0,
      "debug/num_tok_total": 2697.0,
      "epoch": 0.7534246575342466,
      "step": 55
    },
    {
      "epoch": 0.7534246575342466,
      "step": 55,
      "train/ce_loss": 5.53125,
      "train/diffusion_loss": 0.3809465765953064
    },
    {
      "epoch": 0.7534246575342466,
      "step": 55,
      "train/learning_rate_real": 2.486622807274036e-05
    },
    {
      "debug/num_lat_loss": 1561.0,
      "debug/num_lat_total": 1777.0,
      "debug/num_tok_loss": 1561.0,
      "debug/num_tok_total": 1777.0,
      "epoch": 0.7534246575342466,
      "step": 55
    },
    {
      "epoch": 0.7534246575342466,
      "step": 55,
      "train/ce_loss": 5.6875,
      "train/diffusion_loss": 0.5590707063674927
    },
    {
      "epoch": 0.7534246575342466,
      "step": 55,
      "train/learning_rate_real": 2.486622807274036e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 3064.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 3064.0,
      "epoch": 0.7671232876712328,
      "step": 56
    },
    {
      "epoch": 0.7671232876712328,
      "step": 56,
      "train/ce_loss": 5.59375,
      "train/diffusion_loss": 0.3314301371574402
    },
    {
      "epoch": 0.7671232876712328,
      "step": 56,
      "train/learning_rate_real": 2.4858013459545352e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2649.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2649.0,
      "epoch": 0.7671232876712328,
      "step": 56
    },
    {
      "epoch": 0.7671232876712328,
      "step": 56,
      "train/ce_loss": 5.4375,
      "train/diffusion_loss": 0.3836974501609802
    },
    {
      "epoch": 0.7671232876712328,
      "step": 56,
      "train/learning_rate_real": 2.4858013459545352e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2845.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2845.0,
      "epoch": 0.7671232876712328,
      "step": 56
    },
    {
      "epoch": 0.7671232876712328,
      "step": 56,
      "train/ce_loss": 5.625,
      "train/diffusion_loss": 0.382179856300354
    },
    {
      "epoch": 0.7671232876712328,
      "step": 56,
      "train/learning_rate_real": 2.4858013459545352e-05
    },
    {
      "debug/num_lat_loss": 1808.0,
      "debug/num_lat_total": 3123.0,
      "debug/num_tok_loss": 1808.0,
      "debug/num_tok_total": 3123.0,
      "epoch": 0.7671232876712328,
      "step": 56
    },
    {
      "epoch": 0.7671232876712328,
      "step": 56,
      "train/ce_loss": 5.625,
      "train/diffusion_loss": 0.34347909688949585
    },
    {
      "epoch": 0.7671232876712328,
      "step": 56,
      "train/learning_rate_real": 2.4858013459545352e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2622.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2622.0,
      "epoch": 0.7808219178082192,
      "step": 57
    },
    {
      "epoch": 0.7808219178082192,
      "step": 57,
      "train/ce_loss": 5.9375,
      "train/diffusion_loss": 0.4226444661617279
    },
    {
      "epoch": 0.7808219178082192,
      "step": 57,
      "train/learning_rate_real": 2.4849555524460277e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 3032.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 3032.0,
      "epoch": 0.7808219178082192,
      "step": 57
    },
    {
      "epoch": 0.7808219178082192,
      "step": 57,
      "train/ce_loss": 5.5625,
      "train/diffusion_loss": 0.31934428215026855
    },
    {
      "epoch": 0.7808219178082192,
      "step": 57,
      "train/learning_rate_real": 2.4849555524460277e-05
    },
    {
      "debug/num_lat_loss": 1709.0,
      "debug/num_lat_total": 2503.0,
      "debug/num_tok_loss": 1709.0,
      "debug/num_tok_total": 2503.0,
      "epoch": 0.7808219178082192,
      "step": 57
    },
    {
      "epoch": 0.7808219178082192,
      "step": 57,
      "train/ce_loss": 5.625,
      "train/diffusion_loss": 0.4243476390838623
    },
    {
      "epoch": 0.7808219178082192,
      "step": 57,
      "train/learning_rate_real": 2.4849555524460277e-05
    },
    {
      "debug/num_lat_loss": 1664.0,
      "debug/num_lat_total": 2298.0,
      "debug/num_tok_loss": 1664.0,
      "debug/num_tok_total": 2298.0,
      "epoch": 0.7808219178082192,
      "step": 57
    },
    {
      "epoch": 0.7808219178082192,
      "step": 57,
      "train/ce_loss": 5.4375,
      "train/diffusion_loss": 0.46369606256484985
    },
    {
      "epoch": 0.7808219178082192,
      "step": 57,
      "train/learning_rate_real": 2.4849555524460277e-05
    },
    {
      "debug/num_lat_loss": 1589.0,
      "debug/num_lat_total": 2694.0,
      "debug/num_tok_loss": 1589.0,
      "debug/num_tok_total": 2694.0,
      "epoch": 0.7945205479452054,
      "step": 58
    },
    {
      "epoch": 0.7945205479452054,
      "step": 58,
      "train/ce_loss": 5.71875,
      "train/diffusion_loss": 0.33193787932395935
    },
    {
      "epoch": 0.7945205479452054,
      "step": 58,
      "train/learning_rate_real": 2.4840854434016808e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2214.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2214.0,
      "epoch": 0.7945205479452054,
      "step": 58
    },
    {
      "epoch": 0.7945205479452054,
      "step": 58,
      "train/ce_loss": 5.40625,
      "train/diffusion_loss": 0.4997442066669464
    },
    {
      "epoch": 0.7945205479452054,
      "step": 58,
      "train/learning_rate_real": 2.4840854434016808e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2435.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2435.0,
      "epoch": 0.7945205479452054,
      "step": 58
    },
    {
      "epoch": 0.7945205479452054,
      "step": 58,
      "train/ce_loss": 5.5,
      "train/diffusion_loss": 0.44154196977615356
    },
    {
      "epoch": 0.7945205479452054,
      "step": 58,
      "train/learning_rate_real": 2.4840854434016808e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2635.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2635.0,
      "epoch": 0.7945205479452054,
      "step": 58
    },
    {
      "epoch": 0.7945205479452054,
      "step": 58,
      "train/ce_loss": 5.625,
      "train/diffusion_loss": 0.42629536986351013
    },
    {
      "epoch": 0.7945205479452054,
      "step": 58,
      "train/learning_rate_real": 2.4840854434016808e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2627.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2627.0,
      "epoch": 0.8082191780821918,
      "step": 59
    },
    {
      "epoch": 0.8082191780821918,
      "step": 59,
      "train/ce_loss": 5.46875,
      "train/diffusion_loss": 0.4058604836463928
    },
    {
      "epoch": 0.8082191780821918,
      "step": 59,
      "train/learning_rate_real": 2.4831910359534216e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2643.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2643.0,
      "epoch": 0.8082191780821918,
      "step": 59
    },
    {
      "epoch": 0.8082191780821918,
      "step": 59,
      "train/ce_loss": 5.53125,
      "train/diffusion_loss": 0.42272061109542847
    },
    {
      "epoch": 0.8082191780821918,
      "step": 59,
      "train/learning_rate_real": 2.4831910359534216e-05
    },
    {
      "debug/num_lat_loss": 1585.0,
      "debug/num_lat_total": 2473.0,
      "debug/num_tok_loss": 1585.0,
      "debug/num_tok_total": 2473.0,
      "epoch": 0.8082191780821918,
      "step": 59
    },
    {
      "epoch": 0.8082191780821918,
      "step": 59,
      "train/ce_loss": 5.71875,
      "train/diffusion_loss": 0.40059351921081543
    },
    {
      "epoch": 0.8082191780821918,
      "step": 59,
      "train/learning_rate_real": 2.4831910359534216e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2440.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2440.0,
      "epoch": 0.8082191780821918,
      "step": 59
    },
    {
      "epoch": 0.8082191780821918,
      "step": 59,
      "train/ce_loss": 5.3125,
      "train/diffusion_loss": 0.4610413908958435
    },
    {
      "epoch": 0.8082191780821918,
      "step": 59,
      "train/learning_rate_real": 2.4831910359534216e-05
    },
    {
      "epoch": 0.821917808219178,
      "grad_norm": 1.708295464515686,
      "learning_rate": 2.4831910359534216e-05,
      "loss": 3.2409,
      "step": 60
    },
    {
      "debug/num_lat_loss": 1725.0,
      "debug/num_lat_total": 2527.0,
      "debug/num_tok_loss": 1725.0,
      "debug/num_tok_total": 2527.0,
      "epoch": 0.821917808219178,
      "step": 60
    },
    {
      "epoch": 0.821917808219178,
      "step": 60,
      "train/ce_loss": 5.5625,
      "train/diffusion_loss": 0.3908693790435791
    },
    {
      "epoch": 0.821917808219178,
      "step": 60,
      "train/learning_rate_real": 2.4822723477115968e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2636.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2636.0,
      "epoch": 0.821917808219178,
      "step": 60
    },
    {
      "epoch": 0.821917808219178,
      "step": 60,
      "train/ce_loss": 5.53125,
      "train/diffusion_loss": 0.42371171712875366
    },
    {
      "epoch": 0.821917808219178,
      "step": 60,
      "train/learning_rate_real": 2.4822723477115968e-05
    },
    {
      "debug/num_lat_loss": 1650.0,
      "debug/num_lat_total": 2581.0,
      "debug/num_tok_loss": 1650.0,
      "debug/num_tok_total": 2581.0,
      "epoch": 0.821917808219178,
      "step": 60
    },
    {
      "epoch": 0.821917808219178,
      "step": 60,
      "train/ce_loss": 5.625,
      "train/diffusion_loss": 0.40264788269996643
    },
    {
      "epoch": 0.821917808219178,
      "step": 60,
      "train/learning_rate_real": 2.4822723477115968e-05
    },
    {
      "debug/num_lat_loss": 1750.0,
      "debug/num_lat_total": 2817.0,
      "debug/num_tok_loss": 1750.0,
      "debug/num_tok_total": 2817.0,
      "epoch": 0.821917808219178,
      "step": 60
    },
    {
      "epoch": 0.821917808219178,
      "step": 60,
      "train/ce_loss": 5.46875,
      "train/diffusion_loss": 0.356502503156662
    },
    {
      "epoch": 0.821917808219178,
      "step": 60,
      "train/learning_rate_real": 2.4822723477115968e-05
    },
    {
      "debug/num_lat_loss": 1814.0,
      "debug/num_lat_total": 3138.0,
      "debug/num_tok_loss": 1814.0,
      "debug/num_tok_total": 3138.0,
      "epoch": 0.8356164383561644,
      "step": 61
    },
    {
      "epoch": 0.8356164383561644,
      "step": 61,
      "train/ce_loss": 5.4375,
      "train/diffusion_loss": 0.3347739279270172
    },
    {
      "epoch": 0.8356164383561644,
      "step": 61,
      "train/learning_rate_real": 2.481329396764629e-05
    },
    {
      "debug/num_lat_loss": 1758.0,
      "debug/num_lat_total": 2611.0,
      "debug/num_tok_loss": 1758.0,
      "debug/num_tok_total": 2611.0,
      "epoch": 0.8356164383561644,
      "step": 61
    },
    {
      "epoch": 0.8356164383561644,
      "step": 61,
      "train/ce_loss": 5.40625,
      "train/diffusion_loss": 0.4167521595954895
    },
    {
      "epoch": 0.8356164383561644,
      "step": 61,
      "train/learning_rate_real": 2.481329396764629e-05
    },
    {
      "debug/num_lat_loss": 1663.0,
      "debug/num_lat_total": 2322.0,
      "debug/num_tok_loss": 1663.0,
      "debug/num_tok_total": 2322.0,
      "epoch": 0.8356164383561644,
      "step": 61
    },
    {
      "epoch": 0.8356164383561644,
      "step": 61,
      "train/ce_loss": 5.375,
      "train/diffusion_loss": 0.4386259913444519
    },
    {
      "epoch": 0.8356164383561644,
      "step": 61,
      "train/learning_rate_real": 2.481329396764629e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2614.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2614.0,
      "epoch": 0.8356164383561644,
      "step": 61
    },
    {
      "epoch": 0.8356164383561644,
      "step": 61,
      "train/ce_loss": 5.3125,
      "train/diffusion_loss": 0.4437485337257385
    },
    {
      "epoch": 0.8356164383561644,
      "step": 61,
      "train/learning_rate_real": 2.481329396764629e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2430.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2430.0,
      "epoch": 0.8493150684931506,
      "step": 62
    },
    {
      "epoch": 0.8493150684931506,
      "step": 62,
      "train/ce_loss": 5.375,
      "train/diffusion_loss": 0.44565480947494507
    },
    {
      "epoch": 0.8493150684931506,
      "step": 62,
      "train/learning_rate_real": 2.4803622016786578e-05
    },
    {
      "debug/num_lat_loss": 1702.0,
      "debug/num_lat_total": 2550.0,
      "debug/num_tok_loss": 1702.0,
      "debug/num_tok_total": 2550.0,
      "epoch": 0.8493150684931506,
      "step": 62
    },
    {
      "epoch": 0.8493150684931506,
      "step": 62,
      "train/ce_loss": 5.625,
      "train/diffusion_loss": 0.39768069982528687
    },
    {
      "epoch": 0.8493150684931506,
      "step": 62,
      "train/learning_rate_real": 2.4803622016786578e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2612.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2612.0,
      "epoch": 0.8493150684931506,
      "step": 62
    },
    {
      "epoch": 0.8493150684931506,
      "step": 62,
      "train/ce_loss": 5.8125,
      "train/diffusion_loss": 0.4438689053058624
    },
    {
      "epoch": 0.8493150684931506,
      "step": 62,
      "train/learning_rate_real": 2.4803622016786578e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2835.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2835.0,
      "epoch": 0.8493150684931506,
      "step": 62
    },
    {
      "epoch": 0.8493150684931506,
      "step": 62,
      "train/ce_loss": 5.3125,
      "train/diffusion_loss": 0.4113655686378479
    },
    {
      "epoch": 0.8493150684931506,
      "step": 62,
      "train/learning_rate_real": 2.4803622016786578e-05
    },
    {
      "debug/num_lat_loss": 1756.0,
      "debug/num_lat_total": 2392.0,
      "debug/num_tok_loss": 1756.0,
      "debug/num_tok_total": 2392.0,
      "epoch": 0.863013698630137,
      "step": 63
    },
    {
      "epoch": 0.863013698630137,
      "step": 63,
      "train/ce_loss": 5.3125,
      "train/diffusion_loss": 0.4595162272453308
    },
    {
      "epoch": 0.863013698630137,
      "step": 63,
      "train/learning_rate_real": 2.479370781497175e-05
    },
    {
      "debug/num_lat_loss": 1754.0,
      "debug/num_lat_total": 2799.0,
      "debug/num_tok_loss": 1754.0,
      "debug/num_tok_total": 2799.0,
      "epoch": 0.863013698630137,
      "step": 63
    },
    {
      "epoch": 0.863013698630137,
      "step": 63,
      "train/ce_loss": 5.375,
      "train/diffusion_loss": 0.38731205463409424
    },
    {
      "epoch": 0.863013698630137,
      "step": 63,
      "train/learning_rate_real": 2.479370781497175e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2842.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2842.0,
      "epoch": 0.863013698630137,
      "step": 63
    },
    {
      "epoch": 0.863013698630137,
      "step": 63,
      "train/ce_loss": 5.21875,
      "train/diffusion_loss": 0.3685234487056732
    },
    {
      "epoch": 0.863013698630137,
      "step": 63,
      "train/learning_rate_real": 2.479370781497175e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 3082.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 3082.0,
      "epoch": 0.863013698630137,
      "step": 63
    },
    {
      "epoch": 0.863013698630137,
      "step": 63,
      "train/ce_loss": 5.375,
      "train/diffusion_loss": 0.3305037319660187
    },
    {
      "epoch": 0.863013698630137,
      "step": 63,
      "train/learning_rate_real": 2.479370781497175e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2649.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2649.0,
      "epoch": 0.8767123287671232,
      "step": 64
    },
    {
      "epoch": 0.8767123287671232,
      "step": 64,
      "train/ce_loss": 5.21875,
      "train/diffusion_loss": 0.41264399886131287
    },
    {
      "epoch": 0.8767123287671232,
      "step": 64,
      "train/learning_rate_real": 2.4783551557406515e-05
    },
    {
      "debug/num_lat_loss": 1687.0,
      "debug/num_lat_total": 2663.0,
      "debug/num_tok_loss": 1687.0,
      "debug/num_tok_total": 2663.0,
      "epoch": 0.8767123287671232,
      "step": 64
    },
    {
      "epoch": 0.8767123287671232,
      "step": 64,
      "train/ce_loss": 5.21875,
      "train/diffusion_loss": 0.36778780817985535
    },
    {
      "epoch": 0.8767123287671232,
      "step": 64,
      "train/learning_rate_real": 2.4783551557406515e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2437.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2437.0,
      "epoch": 0.8767123287671232,
      "step": 64
    },
    {
      "epoch": 0.8767123287671232,
      "step": 64,
      "train/ce_loss": 5.125,
      "train/diffusion_loss": 0.45238378643989563
    },
    {
      "epoch": 0.8767123287671232,
      "step": 64,
      "train/learning_rate_real": 2.4783551557406515e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2450.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2450.0,
      "epoch": 0.8767123287671232,
      "step": 64
    },
    {
      "epoch": 0.8767123287671232,
      "step": 64,
      "train/ce_loss": 5.09375,
      "train/diffusion_loss": 0.46885961294174194
    },
    {
      "epoch": 0.8767123287671232,
      "step": 64,
      "train/learning_rate_real": 2.4783551557406515e-05
    },
    {
      "debug/num_lat_loss": 1750.0,
      "debug/num_lat_total": 2385.0,
      "debug/num_tok_loss": 1750.0,
      "debug/num_tok_total": 2385.0,
      "epoch": 0.8904109589041096,
      "step": 65
    },
    {
      "epoch": 0.8904109589041096,
      "step": 65,
      "train/ce_loss": 5.125,
      "train/diffusion_loss": 0.45050710439682007
    },
    {
      "epoch": 0.8904109589041096,
      "step": 65,
      "train/learning_rate_real": 2.4773153444061502e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2855.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2855.0,
      "epoch": 0.8904109589041096,
      "step": 65
    },
    {
      "epoch": 0.8904109589041096,
      "step": 65,
      "train/ce_loss": 5.15625,
      "train/diffusion_loss": 0.38604146242141724
    },
    {
      "epoch": 0.8904109589041096,
      "step": 65,
      "train/learning_rate_real": 2.4773153444061502e-05
    },
    {
      "debug/num_lat_loss": 1548.0,
      "debug/num_lat_total": 2369.0,
      "debug/num_tok_loss": 1548.0,
      "debug/num_tok_total": 2369.0,
      "epoch": 0.8904109589041096,
      "step": 65
    },
    {
      "epoch": 0.8904109589041096,
      "step": 65,
      "train/ce_loss": 5.65625,
      "train/diffusion_loss": 0.4263710379600525
    },
    {
      "epoch": 0.8904109589041096,
      "step": 65,
      "train/learning_rate_real": 2.4773153444061502e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 3074.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 3074.0,
      "epoch": 0.8904109589041096,
      "step": 65
    },
    {
      "epoch": 0.8904109589041096,
      "step": 65,
      "train/ce_loss": 5.09375,
      "train/diffusion_loss": 0.31766679883003235
    },
    {
      "epoch": 0.8904109589041096,
      "step": 65,
      "train/learning_rate_real": 2.4773153444061502e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2612.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2612.0,
      "epoch": 0.9041095890410958,
      "step": 66
    },
    {
      "epoch": 0.9041095890410958,
      "step": 66,
      "train/ce_loss": 5.1875,
      "train/diffusion_loss": 0.40880653262138367
    },
    {
      "epoch": 0.9041095890410958,
      "step": 66,
      "train/learning_rate_real": 2.4762513679669348e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2648.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2648.0,
      "epoch": 0.9041095890410958,
      "step": 66
    },
    {
      "epoch": 0.9041095890410958,
      "step": 66,
      "train/ce_loss": 5.15625,
      "train/diffusion_loss": 0.39718738198280334
    },
    {
      "epoch": 0.9041095890410958,
      "step": 66,
      "train/learning_rate_real": 2.4762513679669348e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2652.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2652.0,
      "epoch": 0.9041095890410958,
      "step": 66
    },
    {
      "epoch": 0.9041095890410958,
      "step": 66,
      "train/ce_loss": 5.09375,
      "train/diffusion_loss": 0.45373260974884033
    },
    {
      "epoch": 0.9041095890410958,
      "step": 66,
      "train/learning_rate_real": 2.4762513679669348e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2414.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2414.0,
      "epoch": 0.9041095890410958,
      "step": 66
    },
    {
      "epoch": 0.9041095890410958,
      "step": 66,
      "train/ce_loss": 5.53125,
      "train/diffusion_loss": 0.4466804265975952
    },
    {
      "epoch": 0.9041095890410958,
      "step": 66,
      "train/learning_rate_real": 2.4762513679669348e-05
    },
    {
      "debug/num_lat_loss": 1745.0,
      "debug/num_lat_total": 3007.0,
      "debug/num_tok_loss": 1745.0,
      "debug/num_tok_total": 3007.0,
      "epoch": 0.9178082191780822,
      "step": 67
    },
    {
      "epoch": 0.9178082191780822,
      "step": 67,
      "train/ce_loss": 5.125,
      "train/diffusion_loss": 0.28842657804489136
    },
    {
      "epoch": 0.9178082191780822,
      "step": 67,
      "train/learning_rate_real": 2.4751632473720638e-05
    },
    {
      "debug/num_lat_loss": 1765.0,
      "debug/num_lat_total": 2411.0,
      "debug/num_tok_loss": 1765.0,
      "debug/num_tok_total": 2411.0,
      "epoch": 0.9178082191780822,
      "step": 67
    },
    {
      "epoch": 0.9178082191780822,
      "step": 67,
      "train/ce_loss": 5.15625,
      "train/diffusion_loss": 0.4682995080947876
    },
    {
      "epoch": 0.9178082191780822,
      "step": 67,
      "train/learning_rate_real": 2.4751632473720638e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 2453.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 2453.0,
      "epoch": 0.9178082191780822,
      "step": 67
    },
    {
      "epoch": 0.9178082191780822,
      "step": 67,
      "train/ce_loss": 5.25,
      "train/diffusion_loss": 0.48682138323783875
    },
    {
      "epoch": 0.9178082191780822,
      "step": 67,
      "train/learning_rate_real": 2.4751632473720638e-05
    },
    {
      "debug/num_lat_loss": 1610.0,
      "debug/num_lat_total": 2531.0,
      "debug/num_tok_loss": 1610.0,
      "debug/num_tok_total": 2531.0,
      "epoch": 0.9178082191780822,
      "step": 67
    },
    {
      "epoch": 0.9178082191780822,
      "step": 67,
      "train/ce_loss": 5.1875,
      "train/diffusion_loss": 0.41317009925842285
    },
    {
      "epoch": 0.9178082191780822,
      "step": 67,
      "train/learning_rate_real": 2.4751632473720638e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2605.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2605.0,
      "epoch": 0.9315068493150684,
      "step": 68
    },
    {
      "epoch": 0.9315068493150684,
      "step": 68,
      "train/ce_loss": 5.28125,
      "train/diffusion_loss": 0.4040594696998596
    },
    {
      "epoch": 0.9315068493150684,
      "step": 68,
      "train/learning_rate_real": 2.474051004045981e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2861.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2861.0,
      "epoch": 0.9315068493150684,
      "step": 68
    },
    {
      "epoch": 0.9315068493150684,
      "step": 68,
      "train/ce_loss": 5.3125,
      "train/diffusion_loss": 0.3841204047203064
    },
    {
      "epoch": 0.9315068493150684,
      "step": 68,
      "train/learning_rate_real": 2.474051004045981e-05
    },
    {
      "debug/num_lat_loss": 1610.0,
      "debug/num_lat_total": 2451.0,
      "debug/num_tok_loss": 1610.0,
      "debug/num_tok_total": 2451.0,
      "epoch": 0.9315068493150684,
      "step": 68
    },
    {
      "epoch": 0.9315068493150684,
      "step": 68,
      "train/ce_loss": 5.0625,
      "train/diffusion_loss": 0.3975687026977539
    },
    {
      "epoch": 0.9315068493150684,
      "step": 68,
      "train/learning_rate_real": 2.474051004045981e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2410.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2410.0,
      "epoch": 0.9315068493150684,
      "step": 68
    },
    {
      "epoch": 0.9315068493150684,
      "step": 68,
      "train/ce_loss": 5.0,
      "train/diffusion_loss": 0.4634510278701782
    },
    {
      "epoch": 0.9315068493150684,
      "step": 68,
      "train/learning_rate_real": 2.474051004045981e-05
    },
    {
      "debug/num_lat_loss": 1617.0,
      "debug/num_lat_total": 2693.0,
      "debug/num_tok_loss": 1617.0,
      "debug/num_tok_total": 2693.0,
      "epoch": 0.9452054794520548,
      "step": 69
    },
    {
      "epoch": 0.9452054794520548,
      "step": 69,
      "train/ce_loss": 5.25,
      "train/diffusion_loss": 0.35224559903144836
    },
    {
      "epoch": 0.9452054794520548,
      "step": 69,
      "train/learning_rate_real": 2.472914659888092e-05
    },
    {
      "debug/num_lat_loss": 1657.0,
      "debug/num_lat_total": 2292.0,
      "debug/num_tok_loss": 1657.0,
      "debug/num_tok_total": 2292.0,
      "epoch": 0.9452054794520548,
      "step": 69
    },
    {
      "epoch": 0.9452054794520548,
      "step": 69,
      "train/ce_loss": 4.96875,
      "train/diffusion_loss": 0.4498156011104584
    },
    {
      "epoch": 0.9452054794520548,
      "step": 69,
      "train/learning_rate_real": 2.472914659888092e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 2024.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 2024.0,
      "epoch": 0.9452054794520548,
      "step": 69
    },
    {
      "epoch": 0.9452054794520548,
      "step": 69,
      "train/ce_loss": 4.96875,
      "train/diffusion_loss": 0.5396973490715027
    },
    {
      "epoch": 0.9452054794520548,
      "step": 69,
      "train/learning_rate_real": 2.472914659888092e-05
    },
    {
      "debug/num_lat_loss": 1592.0,
      "debug/num_lat_total": 2018.0,
      "debug/num_tok_loss": 1592.0,
      "debug/num_tok_total": 2018.0,
      "epoch": 0.9452054794520548,
      "step": 69
    },
    {
      "epoch": 0.9452054794520548,
      "step": 69,
      "train/ce_loss": 5.09375,
      "train/diffusion_loss": 0.4777624309062958
    },
    {
      "epoch": 0.9452054794520548,
      "step": 69,
      "train/learning_rate_real": 2.472914659888092e-05
    },
    {
      "epoch": 0.958904109589041,
      "grad_norm": 1.7449594736099243,
      "learning_rate": 2.472914659888092e-05,
      "loss": 3.1653,
      "step": 70
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2412.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2412.0,
      "epoch": 0.958904109589041,
      "step": 70
    },
    {
      "epoch": 0.958904109589041,
      "step": 70,
      "train/ce_loss": 5.125,
      "train/diffusion_loss": 0.4851177930831909
    },
    {
      "epoch": 0.958904109589041,
      "step": 70,
      "train/learning_rate_real": 2.4717542372723333e-05
    },
    {
      "debug/num_lat_loss": 1670.0,
      "debug/num_lat_total": 1879.0,
      "debug/num_tok_loss": 1670.0,
      "debug/num_tok_total": 1879.0,
      "epoch": 0.958904109589041,
      "step": 70
    },
    {
      "epoch": 0.958904109589041,
      "step": 70,
      "train/ce_loss": 5.1875,
      "train/diffusion_loss": 0.5450473427772522
    },
    {
      "epoch": 0.958904109589041,
      "step": 70,
      "train/learning_rate_real": 2.4717542372723333e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2656.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2656.0,
      "epoch": 0.958904109589041,
      "step": 70
    },
    {
      "epoch": 0.958904109589041,
      "step": 70,
      "train/ce_loss": 5.15625,
      "train/diffusion_loss": 0.40747514367103577
    },
    {
      "epoch": 0.958904109589041,
      "step": 70,
      "train/learning_rate_real": 2.4717542372723333e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2617.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2617.0,
      "epoch": 0.958904109589041,
      "step": 70
    },
    {
      "epoch": 0.958904109589041,
      "step": 70,
      "train/ce_loss": 5.0,
      "train/diffusion_loss": 0.41900011897087097
    },
    {
      "epoch": 0.958904109589041,
      "step": 70,
      "train/learning_rate_real": 2.4717542372723333e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2627.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2627.0,
      "epoch": 0.9726027397260274,
      "step": 71
    },
    {
      "epoch": 0.9726027397260274,
      "step": 71,
      "train/ce_loss": 5.625,
      "train/diffusion_loss": 0.40764084458351135
    },
    {
      "epoch": 0.9726027397260274,
      "step": 71,
      "train/learning_rate_real": 2.470569759046732e-05
    },
    {
      "debug/num_lat_loss": 1807.0,
      "debug/num_lat_total": 2681.0,
      "debug/num_tok_loss": 1807.0,
      "debug/num_tok_total": 2681.0,
      "epoch": 0.9726027397260274,
      "step": 71
    },
    {
      "epoch": 0.9726027397260274,
      "step": 71,
      "train/ce_loss": 5.03125,
      "train/diffusion_loss": 0.4096241891384125
    },
    {
      "epoch": 0.9726027397260274,
      "step": 71,
      "train/learning_rate_real": 2.470569759046732e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2827.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2827.0,
      "epoch": 0.9726027397260274,
      "step": 71
    },
    {
      "epoch": 0.9726027397260274,
      "step": 71,
      "train/ce_loss": 4.9375,
      "train/diffusion_loss": 0.3634895980358124
    },
    {
      "epoch": 0.9726027397260274,
      "step": 71,
      "train/learning_rate_real": 2.470569759046732e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2843.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2843.0,
      "epoch": 0.9726027397260274,
      "step": 71
    },
    {
      "epoch": 0.9726027397260274,
      "step": 71,
      "train/ce_loss": 4.90625,
      "train/diffusion_loss": 0.3556757867336273
    },
    {
      "epoch": 0.9726027397260274,
      "step": 71,
      "train/learning_rate_real": 2.470569759046732e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2877.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2877.0,
      "epoch": 0.9863013698630136,
      "step": 72
    },
    {
      "epoch": 0.9863013698630136,
      "step": 72,
      "train/ce_loss": 5.0625,
      "train/diffusion_loss": 0.39283737540245056
    },
    {
      "epoch": 0.9863013698630136,
      "step": 72,
      "train/learning_rate_real": 2.469361248532955e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 3048.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 3048.0,
      "epoch": 0.9863013698630136,
      "step": 72
    },
    {
      "epoch": 0.9863013698630136,
      "step": 72,
      "train/ce_loss": 5.09375,
      "train/diffusion_loss": 0.33008840680122375
    },
    {
      "epoch": 0.9863013698630136,
      "step": 72,
      "train/learning_rate_real": 2.469361248532955e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2879.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2879.0,
      "epoch": 0.9863013698630136,
      "step": 72
    },
    {
      "epoch": 0.9863013698630136,
      "step": 72,
      "train/ce_loss": 4.9375,
      "train/diffusion_loss": 0.37761390209198
    },
    {
      "epoch": 0.9863013698630136,
      "step": 72,
      "train/learning_rate_real": 2.469361248532955e-05
    },
    {
      "debug/num_lat_loss": 445.0,
      "debug/num_lat_total": 662.0,
      "debug/num_tok_loss": 445.0,
      "debug/num_tok_total": 662.0,
      "epoch": 0.9863013698630136,
      "step": 72
    },
    {
      "epoch": 0.9863013698630136,
      "step": 72,
      "train/ce_loss": 7.09375,
      "train/diffusion_loss": 0.5194283723831177
    },
    {
      "epoch": 0.9863013698630136,
      "step": 72,
      "train/learning_rate_real": 2.469361248532955e-05
    },
    {
      "debug/num_lat_loss": 1812.0,
      "debug/num_lat_total": 2690.0,
      "debug/num_tok_loss": 1812.0,
      "debug/num_tok_total": 2690.0,
      "epoch": 1.0,
      "step": 73
    },
    {
      "epoch": 1.0,
      "step": 73,
      "train/ce_loss": 4.90625,
      "train/diffusion_loss": 0.4245181083679199
    },
    {
      "epoch": 1.0,
      "step": 73,
      "train/learning_rate_real": 2.4681287295258516e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2656.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2656.0,
      "epoch": 1.0,
      "step": 73
    },
    {
      "epoch": 1.0,
      "step": 73,
      "train/ce_loss": 4.96875,
      "train/diffusion_loss": 0.4211677014827728
    },
    {
      "epoch": 1.0,
      "step": 73,
      "train/learning_rate_real": 2.4681287295258516e-05
    },
    {
      "debug/num_lat_loss": 1797.0,
      "debug/num_lat_total": 2018.0,
      "debug/num_tok_loss": 1797.0,
      "debug/num_tok_total": 2018.0,
      "epoch": 1.0,
      "step": 73
    },
    {
      "epoch": 1.0,
      "step": 73,
      "train/ce_loss": 5.125,
      "train/diffusion_loss": 0.5452603697776794
    },
    {
      "epoch": 1.0,
      "step": 73,
      "train/learning_rate_real": 2.4681287295258516e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 1.0,
      "step": 73
    },
    {
      "epoch": 1.0,
      "step": 73,
      "train/ce_loss": 5.125,
      "train/diffusion_loss": 0.39598238468170166
    },
    {
      "epoch": 1.0,
      "step": 73,
      "train/learning_rate_real": 2.4681287295258516e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2624.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2624.0,
      "epoch": 1.0136986301369864,
      "step": 74
    },
    {
      "epoch": 1.0136986301369864,
      "step": 74,
      "train/ce_loss": 4.90625,
      "train/diffusion_loss": 0.41032910346984863
    },
    {
      "epoch": 1.0136986301369864,
      "step": 74,
      "train/learning_rate_real": 2.466872226292983e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2220.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2220.0,
      "epoch": 1.0136986301369864,
      "step": 74
    },
    {
      "epoch": 1.0136986301369864,
      "step": 74,
      "train/ce_loss": 4.875,
      "train/diffusion_loss": 0.5221177935600281
    },
    {
      "epoch": 1.0136986301369864,
      "step": 74,
      "train/learning_rate_real": 2.466872226292983e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2661.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2661.0,
      "epoch": 1.0136986301369864,
      "step": 74
    },
    {
      "epoch": 1.0136986301369864,
      "step": 74,
      "train/ce_loss": 5.0,
      "train/diffusion_loss": 0.4044923782348633
    },
    {
      "epoch": 1.0136986301369864,
      "step": 74,
      "train/learning_rate_real": 2.466872226292983e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 1.0136986301369864,
      "step": 74
    },
    {
      "epoch": 1.0136986301369864,
      "step": 74,
      "train/ce_loss": 5.4375,
      "train/diffusion_loss": 0.421188086271286
    },
    {
      "epoch": 1.0136986301369864,
      "step": 74,
      "train/learning_rate_real": 2.466872226292983e-05
    },
    {
      "debug/num_lat_loss": 1657.0,
      "debug/num_lat_total": 3166.0,
      "debug/num_tok_loss": 1657.0,
      "debug/num_tok_total": 3166.0,
      "epoch": 1.0273972602739727,
      "step": 75
    },
    {
      "epoch": 1.0273972602739727,
      "step": 75,
      "train/ce_loss": 4.9375,
      "train/diffusion_loss": 0.2653091251850128
    },
    {
      "epoch": 1.0273972602739727,
      "step": 75,
      "train/learning_rate_real": 2.4655917635741472e-05
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 2254.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 2254.0,
      "epoch": 1.0273972602739727,
      "step": 75
    },
    {
      "epoch": 1.0273972602739727,
      "step": 75,
      "train/ce_loss": 5.0,
      "train/diffusion_loss": 0.48585981130599976
    },
    {
      "epoch": 1.0273972602739727,
      "step": 75,
      "train/learning_rate_real": 2.4655917635741472e-05
    },
    {
      "debug/num_lat_loss": 1808.0,
      "debug/num_lat_total": 2675.0,
      "debug/num_tok_loss": 1808.0,
      "debug/num_tok_total": 2675.0,
      "epoch": 1.0273972602739727,
      "step": 75
    },
    {
      "epoch": 1.0273972602739727,
      "step": 75,
      "train/ce_loss": 4.90625,
      "train/diffusion_loss": 0.44348567724227905
    },
    {
      "epoch": 1.0273972602739727,
      "step": 75,
      "train/learning_rate_real": 2.4655917635741472e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2207.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2207.0,
      "epoch": 1.0273972602739727,
      "step": 75
    },
    {
      "epoch": 1.0273972602739727,
      "step": 75,
      "train/ce_loss": 5.03125,
      "train/diffusion_loss": 0.4878855049610138
    },
    {
      "epoch": 1.0273972602739727,
      "step": 75,
      "train/learning_rate_real": 2.4655917635741472e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2690.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2690.0,
      "epoch": 1.0410958904109588,
      "step": 76
    },
    {
      "epoch": 1.0410958904109588,
      "step": 76,
      "train/ce_loss": 5.0,
      "train/diffusion_loss": 0.4145924746990204
    },
    {
      "epoch": 1.0410958904109588,
      "step": 76,
      "train/learning_rate_real": 2.4642873665808873e-05
    },
    {
      "debug/num_lat_loss": 1747.0,
      "debug/num_lat_total": 2383.0,
      "debug/num_tok_loss": 1747.0,
      "debug/num_tok_total": 2383.0,
      "epoch": 1.0410958904109588,
      "step": 76
    },
    {
      "epoch": 1.0410958904109588,
      "step": 76,
      "train/ce_loss": 5.0,
      "train/diffusion_loss": 0.49800702929496765
    },
    {
      "epoch": 1.0410958904109588,
      "step": 76,
      "train/learning_rate_real": 2.4642873665808873e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2667.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2667.0,
      "epoch": 1.0410958904109588,
      "step": 76
    },
    {
      "epoch": 1.0410958904109588,
      "step": 76,
      "train/ce_loss": 4.84375,
      "train/diffusion_loss": 0.4373202919960022
    },
    {
      "epoch": 1.0410958904109588,
      "step": 76,
      "train/learning_rate_real": 2.4642873665808873e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2857.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2857.0,
      "epoch": 1.0410958904109588,
      "step": 76
    },
    {
      "epoch": 1.0410958904109588,
      "step": 76,
      "train/ce_loss": 4.9375,
      "train/diffusion_loss": 0.3920494318008423
    },
    {
      "epoch": 1.0410958904109588,
      "step": 76,
      "train/learning_rate_real": 2.4642873665808873e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2656.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2656.0,
      "epoch": 1.0547945205479452,
      "step": 77
    },
    {
      "epoch": 1.0547945205479452,
      "step": 77,
      "train/ce_loss": 4.96875,
      "train/diffusion_loss": 0.39454302191734314
    },
    {
      "epoch": 1.0547945205479452,
      "step": 77,
      "train/learning_rate_real": 2.462959060996001e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 3066.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 3066.0,
      "epoch": 1.0547945205479452,
      "step": 77
    },
    {
      "epoch": 1.0547945205479452,
      "step": 77,
      "train/ce_loss": 5.21875,
      "train/diffusion_loss": 0.3013768494129181
    },
    {
      "epoch": 1.0547945205479452,
      "step": 77,
      "train/learning_rate_real": 2.462959060996001e-05
    },
    {
      "debug/num_lat_loss": 1811.0,
      "debug/num_lat_total": 2032.0,
      "debug/num_tok_loss": 1811.0,
      "debug/num_tok_total": 2032.0,
      "epoch": 1.0547945205479452,
      "step": 77
    },
    {
      "epoch": 1.0547945205479452,
      "step": 77,
      "train/ce_loss": 5.0,
      "train/diffusion_loss": 0.5480638146400452
    },
    {
      "epoch": 1.0547945205479452,
      "step": 77,
      "train/learning_rate_real": 2.462959060996001e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2650.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2650.0,
      "epoch": 1.0547945205479452,
      "step": 77
    },
    {
      "epoch": 1.0547945205479452,
      "step": 77,
      "train/ce_loss": 4.6875,
      "train/diffusion_loss": 0.4419059753417969
    },
    {
      "epoch": 1.0547945205479452,
      "step": 77,
      "train/learning_rate_real": 2.462959060996001e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 3096.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 3096.0,
      "epoch": 1.0684931506849316,
      "step": 78
    },
    {
      "epoch": 1.0684931506849316,
      "step": 78,
      "train/ce_loss": 4.84375,
      "train/diffusion_loss": 0.3151288628578186
    },
    {
      "epoch": 1.0684931506849316,
      "step": 78,
      "train/learning_rate_real": 2.4616068729730294e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2860.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2860.0,
      "epoch": 1.0684931506849316,
      "step": 78
    },
    {
      "epoch": 1.0684931506849316,
      "step": 78,
      "train/ce_loss": 4.8125,
      "train/diffusion_loss": 0.3643770217895508
    },
    {
      "epoch": 1.0684931506849316,
      "step": 78,
      "train/learning_rate_real": 2.4616068729730294e-05
    },
    {
      "debug/num_lat_loss": 1610.0,
      "debug/num_lat_total": 2453.0,
      "debug/num_tok_loss": 1610.0,
      "debug/num_tok_total": 2453.0,
      "epoch": 1.0684931506849316,
      "step": 78
    },
    {
      "epoch": 1.0684931506849316,
      "step": 78,
      "train/ce_loss": 4.9375,
      "train/diffusion_loss": 0.3915770947933197
    },
    {
      "epoch": 1.0684931506849316,
      "step": 78,
      "train/learning_rate_real": 2.4616068729730294e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2852.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2852.0,
      "epoch": 1.0684931506849316,
      "step": 78
    },
    {
      "epoch": 1.0684931506849316,
      "step": 78,
      "train/ce_loss": 4.96875,
      "train/diffusion_loss": 0.39558202028274536
    },
    {
      "epoch": 1.0684931506849316,
      "step": 78,
      "train/learning_rate_real": 2.4616068729730294e-05
    },
    {
      "debug/num_lat_loss": 1709.0,
      "debug/num_lat_total": 2137.0,
      "debug/num_tok_loss": 1709.0,
      "debug/num_tok_total": 2137.0,
      "epoch": 1.0821917808219177,
      "step": 79
    },
    {
      "epoch": 1.0821917808219177,
      "step": 79,
      "train/ce_loss": 4.875,
      "train/diffusion_loss": 0.4808519780635834
    },
    {
      "epoch": 1.0821917808219177,
      "step": 79,
      "train/learning_rate_real": 2.460230829135746e-05
    },
    {
      "debug/num_lat_loss": 1698.0,
      "debug/num_lat_total": 2234.0,
      "debug/num_tok_loss": 1698.0,
      "debug/num_tok_total": 2234.0,
      "epoch": 1.0821917808219177,
      "step": 79
    },
    {
      "epoch": 1.0821917808219177,
      "step": 79,
      "train/ce_loss": 4.84375,
      "train/diffusion_loss": 0.487990140914917
    },
    {
      "epoch": 1.0821917808219177,
      "step": 79,
      "train/learning_rate_real": 2.460230829135746e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 2467.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 2467.0,
      "epoch": 1.0821917808219177,
      "step": 79
    },
    {
      "epoch": 1.0821917808219177,
      "step": 79,
      "train/ce_loss": 4.8125,
      "train/diffusion_loss": 0.4461616575717926
    },
    {
      "epoch": 1.0821917808219177,
      "step": 79,
      "train/learning_rate_real": 2.460230829135746e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 2677.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 2677.0,
      "epoch": 1.0821917808219177,
      "step": 79
    },
    {
      "epoch": 1.0821917808219177,
      "step": 79,
      "train/ce_loss": 4.78125,
      "train/diffusion_loss": 0.42311185598373413
    },
    {
      "epoch": 1.0821917808219177,
      "step": 79,
      "train/learning_rate_real": 2.460230829135746e-05
    },
    {
      "epoch": 1.095890410958904,
      "grad_norm": 1.6366914510726929,
      "learning_rate": 2.460230829135746e-05,
      "loss": 3.1839,
      "step": 80
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2459.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2459.0,
      "epoch": 1.095890410958904,
      "step": 80
    },
    {
      "epoch": 1.095890410958904,
      "step": 80,
      "train/ce_loss": 4.8125,
      "train/diffusion_loss": 0.4312282204627991
    },
    {
      "epoch": 1.095890410958904,
      "step": 80,
      "train/learning_rate_real": 2.4588309565776302e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2454.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2454.0,
      "epoch": 1.095890410958904,
      "step": 80
    },
    {
      "epoch": 1.095890410958904,
      "step": 80,
      "train/ce_loss": 4.78125,
      "train/diffusion_loss": 0.4743603467941284
    },
    {
      "epoch": 1.095890410958904,
      "step": 80,
      "train/learning_rate_real": 2.4588309565776302e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2443.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2443.0,
      "epoch": 1.095890410958904,
      "step": 80
    },
    {
      "epoch": 1.095890410958904,
      "step": 80,
      "train/ce_loss": 4.71875,
      "train/diffusion_loss": 0.48455047607421875
    },
    {
      "epoch": 1.095890410958904,
      "step": 80,
      "train/learning_rate_real": 2.4588309565776302e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2669.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2669.0,
      "epoch": 1.095890410958904,
      "step": 80
    },
    {
      "epoch": 1.095890410958904,
      "step": 80,
      "train/ce_loss": 4.8125,
      "train/diffusion_loss": 0.4093216359615326
    },
    {
      "epoch": 1.095890410958904,
      "step": 80,
      "train/learning_rate_real": 2.4588309565776302e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2668.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2668.0,
      "epoch": 1.1095890410958904,
      "step": 81
    },
    {
      "epoch": 1.1095890410958904,
      "step": 81,
      "train/ce_loss": 4.84375,
      "train/diffusion_loss": 0.4128156304359436
    },
    {
      "epoch": 1.1095890410958904,
      "step": 81,
      "train/learning_rate_real": 2.4574072828613354e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2876.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2876.0,
      "epoch": 1.1095890410958904,
      "step": 81
    },
    {
      "epoch": 1.1095890410958904,
      "step": 81,
      "train/ce_loss": 5.0625,
      "train/diffusion_loss": 0.35143572092056274
    },
    {
      "epoch": 1.1095890410958904,
      "step": 81,
      "train/learning_rate_real": 2.4574072828613354e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2652.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2652.0,
      "epoch": 1.1095890410958904,
      "step": 81
    },
    {
      "epoch": 1.1095890410958904,
      "step": 81,
      "train/ce_loss": 4.84375,
      "train/diffusion_loss": 0.4372323751449585
    },
    {
      "epoch": 1.1095890410958904,
      "step": 81,
      "train/learning_rate_real": 2.4574072828613354e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 3108.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 3108.0,
      "epoch": 1.1095890410958904,
      "step": 81
    },
    {
      "epoch": 1.1095890410958904,
      "step": 81,
      "train/ce_loss": 4.84375,
      "train/diffusion_loss": 0.3405665457248688
    },
    {
      "epoch": 1.1095890410958904,
      "step": 81,
      "train/learning_rate_real": 2.4574072828613354e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2637.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2637.0,
      "epoch": 1.1232876712328768,
      "step": 82
    },
    {
      "epoch": 1.1232876712328768,
      "step": 82,
      "train/ce_loss": 4.9375,
      "train/diffusion_loss": 0.40083348751068115
    },
    {
      "epoch": 1.1232876712328768,
      "step": 82,
      "train/learning_rate_real": 2.455959836018145e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 3062.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 3062.0,
      "epoch": 1.1232876712328768,
      "step": 82
    },
    {
      "epoch": 1.1232876712328768,
      "step": 82,
      "train/ce_loss": 4.875,
      "train/diffusion_loss": 0.2902744710445404
    },
    {
      "epoch": 1.1232876712328768,
      "step": 82,
      "train/learning_rate_real": 2.455959836018145e-05
    },
    {
      "debug/num_lat_loss": 1699.0,
      "debug/num_lat_total": 2280.0,
      "debug/num_tok_loss": 1699.0,
      "debug/num_tok_total": 2280.0,
      "epoch": 1.1232876712328768,
      "step": 82
    },
    {
      "epoch": 1.1232876712328768,
      "step": 82,
      "train/ce_loss": 4.84375,
      "train/diffusion_loss": 0.48062434792518616
    },
    {
      "epoch": 1.1232876712328768,
      "step": 82,
      "train/learning_rate_real": 2.455959836018145e-05
    },
    {
      "debug/num_lat_loss": 1633.0,
      "debug/num_lat_total": 2765.0,
      "debug/num_tok_loss": 1633.0,
      "debug/num_tok_total": 2765.0,
      "epoch": 1.1232876712328768,
      "step": 82
    },
    {
      "epoch": 1.1232876712328768,
      "step": 82,
      "train/ce_loss": 4.9375,
      "train/diffusion_loss": 0.32621175050735474
    },
    {
      "epoch": 1.1232876712328768,
      "step": 82,
      "train/learning_rate_real": 2.455959836018145e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2657.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2657.0,
      "epoch": 1.1369863013698631,
      "step": 83
    },
    {
      "epoch": 1.1369863013698631,
      "step": 83,
      "train/ce_loss": 4.75,
      "train/diffusion_loss": 0.41724833846092224
    },
    {
      "epoch": 1.1369863013698631,
      "step": 83,
      "train/learning_rate_real": 2.4544886445474215e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 3088.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 3088.0,
      "epoch": 1.1369863013698631,
      "step": 83
    },
    {
      "epoch": 1.1369863013698631,
      "step": 83,
      "train/ce_loss": 4.8125,
      "train/diffusion_loss": 0.3163226842880249
    },
    {
      "epoch": 1.1369863013698631,
      "step": 83,
      "train/learning_rate_real": 2.4544886445474215e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2423.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2423.0,
      "epoch": 1.1369863013698631,
      "step": 83
    },
    {
      "epoch": 1.1369863013698631,
      "step": 83,
      "train/ce_loss": 4.78125,
      "train/diffusion_loss": 0.45068359375
    },
    {
      "epoch": 1.1369863013698631,
      "step": 83,
      "train/learning_rate_real": 2.4544886445474215e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 3037.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 3037.0,
      "epoch": 1.1369863013698631,
      "step": 83
    },
    {
      "epoch": 1.1369863013698631,
      "step": 83,
      "train/ce_loss": 4.75,
      "train/diffusion_loss": 0.3273552358150482
    },
    {
      "epoch": 1.1369863013698631,
      "step": 83,
      "train/learning_rate_real": 2.4544886445474215e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2643.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2643.0,
      "epoch": 1.1506849315068493,
      "step": 84
    },
    {
      "epoch": 1.1506849315068493,
      "step": 84,
      "train/ce_loss": 5.125,
      "train/diffusion_loss": 0.41567105054855347
    },
    {
      "epoch": 1.1506849315068493,
      "step": 84,
      "train/learning_rate_real": 2.4529937374160438e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2448.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2448.0,
      "epoch": 1.1506849315068493,
      "step": 84
    },
    {
      "epoch": 1.1506849315068493,
      "step": 84,
      "train/ce_loss": 4.90625,
      "train/diffusion_loss": 0.4494771957397461
    },
    {
      "epoch": 1.1506849315068493,
      "step": 84,
      "train/learning_rate_real": 2.4529937374160438e-05
    },
    {
      "debug/num_lat_loss": 1751.0,
      "debug/num_lat_total": 2386.0,
      "debug/num_tok_loss": 1751.0,
      "debug/num_tok_total": 2386.0,
      "epoch": 1.1506849315068493,
      "step": 84
    },
    {
      "epoch": 1.1506849315068493,
      "step": 84,
      "train/ce_loss": 4.71875,
      "train/diffusion_loss": 0.45726028084754944
    },
    {
      "epoch": 1.1506849315068493,
      "step": 84,
      "train/learning_rate_real": 2.4529937374160438e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2621.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2621.0,
      "epoch": 1.1506849315068493,
      "step": 84
    },
    {
      "epoch": 1.1506849315068493,
      "step": 84,
      "train/ce_loss": 5.03125,
      "train/diffusion_loss": 0.4082919955253601
    },
    {
      "epoch": 1.1506849315068493,
      "step": 84,
      "train/learning_rate_real": 2.4529937374160438e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2445.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2445.0,
      "epoch": 1.1643835616438356,
      "step": 85
    },
    {
      "epoch": 1.1643835616438356,
      "step": 85,
      "train/ce_loss": 4.65625,
      "train/diffusion_loss": 0.45018747448921204
    },
    {
      "epoch": 1.1643835616438356,
      "step": 85,
      "train/learning_rate_real": 2.4514751440578394e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2833.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2833.0,
      "epoch": 1.1643835616438356,
      "step": 85
    },
    {
      "epoch": 1.1643835616438356,
      "step": 85,
      "train/ce_loss": 4.78125,
      "train/diffusion_loss": 0.36236572265625
    },
    {
      "epoch": 1.1643835616438356,
      "step": 85,
      "train/learning_rate_real": 2.4514751440578394e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2428.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2428.0,
      "epoch": 1.1643835616438356,
      "step": 85
    },
    {
      "epoch": 1.1643835616438356,
      "step": 85,
      "train/ce_loss": 4.8125,
      "train/diffusion_loss": 0.4757889211177826
    },
    {
      "epoch": 1.1643835616438356,
      "step": 85,
      "train/learning_rate_real": 2.4514751440578394e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2668.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2668.0,
      "epoch": 1.1643835616438356,
      "step": 85
    },
    {
      "epoch": 1.1643835616438356,
      "step": 85,
      "train/ce_loss": 4.8125,
      "train/diffusion_loss": 0.40141940116882324
    },
    {
      "epoch": 1.1643835616438356,
      "step": 85,
      "train/learning_rate_real": 2.4514751440578394e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2393.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2393.0,
      "epoch": 1.178082191780822,
      "step": 86
    },
    {
      "epoch": 1.178082191780822,
      "step": 86,
      "train/ce_loss": 4.6875,
      "train/diffusion_loss": 0.48776713013648987
    },
    {
      "epoch": 1.178082191780822,
      "step": 86,
      "train/learning_rate_real": 2.4499328943730026e-05
    },
    {
      "debug/num_lat_loss": 1699.0,
      "debug/num_lat_total": 2692.0,
      "debug/num_tok_loss": 1699.0,
      "debug/num_tok_total": 2692.0,
      "epoch": 1.178082191780822,
      "step": 86
    },
    {
      "epoch": 1.178082191780822,
      "step": 86,
      "train/ce_loss": 4.84375,
      "train/diffusion_loss": 0.3635094165802002
    },
    {
      "epoch": 1.178082191780822,
      "step": 86,
      "train/learning_rate_real": 2.4499328943730026e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2856.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2856.0,
      "epoch": 1.178082191780822,
      "step": 86
    },
    {
      "epoch": 1.178082191780822,
      "step": 86,
      "train/ce_loss": 4.75,
      "train/diffusion_loss": 0.38390570878982544
    },
    {
      "epoch": 1.178082191780822,
      "step": 86,
      "train/learning_rate_real": 2.4499328943730026e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2422.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2422.0,
      "epoch": 1.178082191780822,
      "step": 86
    },
    {
      "epoch": 1.178082191780822,
      "step": 86,
      "train/ce_loss": 4.75,
      "train/diffusion_loss": 0.4719712436199188
    },
    {
      "epoch": 1.178082191780822,
      "step": 86,
      "train/learning_rate_real": 2.4499328943730026e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2637.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2637.0,
      "epoch": 1.191780821917808,
      "step": 87
    },
    {
      "epoch": 1.191780821917808,
      "step": 87,
      "train/ce_loss": 4.46875,
      "train/diffusion_loss": 0.4493198096752167
    },
    {
      "epoch": 1.191780821917808,
      "step": 87,
      "train/learning_rate_real": 2.448367018727507e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2464.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2464.0,
      "epoch": 1.191780821917808,
      "step": 87
    },
    {
      "epoch": 1.191780821917808,
      "step": 87,
      "train/ce_loss": 4.75,
      "train/diffusion_loss": 0.44079920649528503
    },
    {
      "epoch": 1.191780821917808,
      "step": 87,
      "train/learning_rate_real": 2.448367018727507e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2218.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2218.0,
      "epoch": 1.191780821917808,
      "step": 87
    },
    {
      "epoch": 1.191780821917808,
      "step": 87,
      "train/ce_loss": 4.8125,
      "train/diffusion_loss": 0.5176950097084045
    },
    {
      "epoch": 1.191780821917808,
      "step": 87,
      "train/learning_rate_real": 2.448367018727507e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2677.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2677.0,
      "epoch": 1.191780821917808,
      "step": 87
    },
    {
      "epoch": 1.191780821917808,
      "step": 87,
      "train/ce_loss": 4.6875,
      "train/diffusion_loss": 0.4029632806777954
    },
    {
      "epoch": 1.191780821917808,
      "step": 87,
      "train/learning_rate_real": 2.448367018727507e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2404.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2404.0,
      "epoch": 1.2054794520547945,
      "step": 88
    },
    {
      "epoch": 1.2054794520547945,
      "step": 88,
      "train/ce_loss": 4.5,
      "train/diffusion_loss": 0.4859849512577057
    },
    {
      "epoch": 1.2054794520547945,
      "step": 88,
      "train/learning_rate_real": 2.446777547952507e-05
    },
    {
      "debug/num_lat_loss": 1625.0,
      "debug/num_lat_total": 2067.0,
      "debug/num_tok_loss": 1625.0,
      "debug/num_tok_total": 2067.0,
      "epoch": 1.2054794520547945,
      "step": 88
    },
    {
      "epoch": 1.2054794520547945,
      "step": 88,
      "train/ce_loss": 4.78125,
      "train/diffusion_loss": 0.5068560838699341
    },
    {
      "epoch": 1.2054794520547945,
      "step": 88,
      "train/learning_rate_real": 2.446777547952507e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 3051.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 3051.0,
      "epoch": 1.2054794520547945,
      "step": 88
    },
    {
      "epoch": 1.2054794520547945,
      "step": 88,
      "train/ce_loss": 4.6875,
      "train/diffusion_loss": 0.3534332513809204
    },
    {
      "epoch": 1.2054794520547945,
      "step": 88,
      "train/learning_rate_real": 2.446777547952507e-05
    },
    {
      "debug/num_lat_loss": 1761.0,
      "debug/num_lat_total": 2410.0,
      "debug/num_tok_loss": 1761.0,
      "debug/num_tok_total": 2410.0,
      "epoch": 1.2054794520547945,
      "step": 88
    },
    {
      "epoch": 1.2054794520547945,
      "step": 88,
      "train/ce_loss": 4.6875,
      "train/diffusion_loss": 0.45841628313064575
    },
    {
      "epoch": 1.2054794520547945,
      "step": 88,
      "train/learning_rate_real": 2.446777547952507e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2399.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2399.0,
      "epoch": 1.2191780821917808,
      "step": 89
    },
    {
      "epoch": 1.2191780821917808,
      "step": 89,
      "train/ce_loss": 4.625,
      "train/diffusion_loss": 0.4496181309223175
    },
    {
      "epoch": 1.2191780821917808,
      "step": 89,
      "train/learning_rate_real": 2.445164513343731e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2210.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2210.0,
      "epoch": 1.2191780821917808,
      "step": 89
    },
    {
      "epoch": 1.2191780821917808,
      "step": 89,
      "train/ce_loss": 4.65625,
      "train/diffusion_loss": 0.5095124840736389
    },
    {
      "epoch": 1.2191780821917808,
      "step": 89,
      "train/learning_rate_real": 2.445164513343731e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2244.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2244.0,
      "epoch": 1.2191780821917808,
      "step": 89
    },
    {
      "epoch": 1.2191780821917808,
      "step": 89,
      "train/ce_loss": 4.84375,
      "train/diffusion_loss": 0.5054518580436707
    },
    {
      "epoch": 1.2191780821917808,
      "step": 89,
      "train/learning_rate_real": 2.445164513343731e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 3260.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 3260.0,
      "epoch": 1.2191780821917808,
      "step": 89
    },
    {
      "epoch": 1.2191780821917808,
      "step": 89,
      "train/ce_loss": 4.65625,
      "train/diffusion_loss": 0.2607570290565491
    },
    {
      "epoch": 1.2191780821917808,
      "step": 89,
      "train/learning_rate_real": 2.445164513343731e-05
    },
    {
      "epoch": 1.2328767123287672,
      "grad_norm": 1.9624649286270142,
      "learning_rate": 2.445164513343731e-05,
      "loss": 3.1204,
      "step": 90
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2453.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2453.0,
      "epoch": 1.2328767123287672,
      "step": 90
    },
    {
      "epoch": 1.2328767123287672,
      "step": 90,
      "train/ce_loss": 4.6875,
      "train/diffusion_loss": 0.4628406763076782
    },
    {
      "epoch": 1.2328767123287672,
      "step": 90,
      "train/learning_rate_real": 2.443527946660865e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 3073.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 3073.0,
      "epoch": 1.2328767123287672,
      "step": 90
    },
    {
      "epoch": 1.2328767123287672,
      "step": 90,
      "train/ce_loss": 4.71875,
      "train/diffusion_loss": 0.2992222309112549
    },
    {
      "epoch": 1.2328767123287672,
      "step": 90,
      "train/learning_rate_real": 2.443527946660865e-05
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 2897.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 2897.0,
      "epoch": 1.2328767123287672,
      "step": 90
    },
    {
      "epoch": 1.2328767123287672,
      "step": 90,
      "train/ce_loss": 4.71875,
      "train/diffusion_loss": 0.36971840262413025
    },
    {
      "epoch": 1.2328767123287672,
      "step": 90,
      "train/learning_rate_real": 2.443527946660865e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2889.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2889.0,
      "epoch": 1.2328767123287672,
      "step": 90
    },
    {
      "epoch": 1.2328767123287672,
      "step": 90,
      "train/ce_loss": 4.5625,
      "train/diffusion_loss": 0.3495144248008728
    },
    {
      "epoch": 1.2328767123287672,
      "step": 90,
      "train/learning_rate_real": 2.443527946660865e-05
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 3326.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 3326.0,
      "epoch": 1.2465753424657535,
      "step": 91
    },
    {
      "epoch": 1.2465753424657535,
      "step": 91,
      "train/ce_loss": 4.5625,
      "train/diffusion_loss": 0.27441421151161194
    },
    {
      "epoch": 1.2465753424657535,
      "step": 91,
      "train/learning_rate_real": 2.441867880126928e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2192.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2192.0,
      "epoch": 1.2465753424657535,
      "step": 91
    },
    {
      "epoch": 1.2465753424657535,
      "step": 91,
      "train/ce_loss": 4.75,
      "train/diffusion_loss": 0.5229299068450928
    },
    {
      "epoch": 1.2465753424657535,
      "step": 91,
      "train/learning_rate_real": 2.441867880126928e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2612.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2612.0,
      "epoch": 1.2465753424657535,
      "step": 91
    },
    {
      "epoch": 1.2465753424657535,
      "step": 91,
      "train/ce_loss": 4.6875,
      "train/diffusion_loss": 0.4148879945278168
    },
    {
      "epoch": 1.2465753424657535,
      "step": 91,
      "train/learning_rate_real": 2.441867880126928e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 3067.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 3067.0,
      "epoch": 1.2465753424657535,
      "step": 91
    },
    {
      "epoch": 1.2465753424657535,
      "step": 91,
      "train/ce_loss": 4.71875,
      "train/diffusion_loss": 0.30536091327667236
    },
    {
      "epoch": 1.2465753424657535,
      "step": 91,
      "train/learning_rate_real": 2.441867880126928e-05
    },
    {
      "debug/num_lat_loss": 1677.0,
      "debug/num_lat_total": 2094.0,
      "debug/num_tok_loss": 1677.0,
      "debug/num_tok_total": 2094.0,
      "epoch": 1.2602739726027397,
      "step": 92
    },
    {
      "epoch": 1.2602739726027397,
      "step": 92,
      "train/ce_loss": 4.5,
      "train/diffusion_loss": 0.4853772521018982
    },
    {
      "epoch": 1.2602739726027397,
      "step": 92,
      "train/learning_rate_real": 2.4401843464276368e-05
    },
    {
      "debug/num_lat_loss": 1750.0,
      "debug/num_lat_total": 3220.0,
      "debug/num_tok_loss": 1750.0,
      "debug/num_tok_total": 3220.0,
      "epoch": 1.2602739726027397,
      "step": 92
    },
    {
      "epoch": 1.2602739726027397,
      "step": 92,
      "train/ce_loss": 4.625,
      "train/diffusion_loss": 0.2949419319629669
    },
    {
      "epoch": 1.2602739726027397,
      "step": 92,
      "train/learning_rate_real": 2.4401843464276368e-05
    },
    {
      "debug/num_lat_loss": 1653.0,
      "debug/num_lat_total": 2483.0,
      "debug/num_tok_loss": 1653.0,
      "debug/num_tok_total": 2483.0,
      "epoch": 1.2602739726027397,
      "step": 92
    },
    {
      "epoch": 1.2602739726027397,
      "step": 92,
      "train/ce_loss": 4.71875,
      "train/diffusion_loss": 0.3998708128929138
    },
    {
      "epoch": 1.2602739726027397,
      "step": 92,
      "train/learning_rate_real": 2.4401843464276368e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 3071.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 3071.0,
      "epoch": 1.2602739726027397,
      "step": 92
    },
    {
      "epoch": 1.2602739726027397,
      "step": 92,
      "train/ce_loss": 4.875,
      "train/diffusion_loss": 0.28526005148887634
    },
    {
      "epoch": 1.2602739726027397,
      "step": 92,
      "train/learning_rate_real": 2.4401843464276368e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2826.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2826.0,
      "epoch": 1.273972602739726,
      "step": 93
    },
    {
      "epoch": 1.273972602739726,
      "step": 93,
      "train/ce_loss": 4.53125,
      "train/diffusion_loss": 0.36494237184524536
    },
    {
      "epoch": 1.273972602739726,
      "step": 93,
      "train/learning_rate_real": 2.4384773787107616e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 1.273972602739726,
      "step": 93
    },
    {
      "epoch": 1.273972602739726,
      "step": 93,
      "train/ce_loss": 4.59375,
      "train/diffusion_loss": 0.4222411513328552
    },
    {
      "epoch": 1.273972602739726,
      "step": 93,
      "train/learning_rate_real": 2.4384773787107616e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2240.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2240.0,
      "epoch": 1.273972602739726,
      "step": 93
    },
    {
      "epoch": 1.273972602739726,
      "step": 93,
      "train/ce_loss": 4.4375,
      "train/diffusion_loss": 0.4870891571044922
    },
    {
      "epoch": 1.273972602739726,
      "step": 93,
      "train/learning_rate_real": 2.4384773787107616e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2636.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2636.0,
      "epoch": 1.273972602739726,
      "step": 93
    },
    {
      "epoch": 1.273972602739726,
      "step": 93,
      "train/ce_loss": 4.53125,
      "train/diffusion_loss": 0.3951069116592407
    },
    {
      "epoch": 1.273972602739726,
      "step": 93,
      "train/learning_rate_real": 2.4384773787107616e-05
    },
    {
      "debug/num_lat_loss": 1698.0,
      "debug/num_lat_total": 2271.0,
      "debug/num_tok_loss": 1698.0,
      "debug/num_tok_total": 2271.0,
      "epoch": 1.2876712328767124,
      "step": 94
    },
    {
      "epoch": 1.2876712328767124,
      "step": 94,
      "train/ce_loss": 4.53125,
      "train/diffusion_loss": 0.4701569974422455
    },
    {
      "epoch": 1.2876712328767124,
      "step": 94,
      "train/learning_rate_real": 2.4367470105854766e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2868.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2868.0,
      "epoch": 1.2876712328767124,
      "step": 94
    },
    {
      "epoch": 1.2876712328767124,
      "step": 94,
      "train/ce_loss": 4.53125,
      "train/diffusion_loss": 0.34901419281959534
    },
    {
      "epoch": 1.2876712328767124,
      "step": 94,
      "train/learning_rate_real": 2.4367470105854766e-05
    },
    {
      "debug/num_lat_loss": 1765.0,
      "debug/num_lat_total": 2200.0,
      "debug/num_tok_loss": 1765.0,
      "debug/num_tok_total": 2200.0,
      "epoch": 1.2876712328767124,
      "step": 94
    },
    {
      "epoch": 1.2876712328767124,
      "step": 94,
      "train/ce_loss": 4.59375,
      "train/diffusion_loss": 0.5008375644683838
    },
    {
      "epoch": 1.2876712328767124,
      "step": 94,
      "train/learning_rate_real": 2.4367470105854766e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2420.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2420.0,
      "epoch": 1.2876712328767124,
      "step": 94
    },
    {
      "epoch": 1.2876712328767124,
      "step": 94,
      "train/ce_loss": 4.6875,
      "train/diffusion_loss": 0.4832378625869751
    },
    {
      "epoch": 1.2876712328767124,
      "step": 94,
      "train/learning_rate_real": 2.4367470105854766e-05
    },
    {
      "debug/num_lat_loss": 1728.0,
      "debug/num_lat_total": 2578.0,
      "debug/num_tok_loss": 1728.0,
      "debug/num_tok_total": 2578.0,
      "epoch": 1.3013698630136985,
      "step": 95
    },
    {
      "epoch": 1.3013698630136985,
      "step": 95,
      "train/ce_loss": 4.4375,
      "train/diffusion_loss": 0.4089829623699188
    },
    {
      "epoch": 1.3013698630136985,
      "step": 95,
      "train/learning_rate_real": 2.4349932761216942e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2026.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2026.0,
      "epoch": 1.3013698630136985,
      "step": 95
    },
    {
      "epoch": 1.3013698630136985,
      "step": 95,
      "train/ce_loss": 4.59375,
      "train/diffusion_loss": 0.5384713411331177
    },
    {
      "epoch": 1.3013698630136985,
      "step": 95,
      "train/learning_rate_real": 2.4349932761216942e-05
    },
    {
      "debug/num_lat_loss": 1482.0,
      "debug/num_lat_total": 2137.0,
      "debug/num_tok_loss": 1482.0,
      "debug/num_tok_total": 2137.0,
      "epoch": 1.3013698630136985,
      "step": 95
    },
    {
      "epoch": 1.3013698630136985,
      "step": 95,
      "train/ce_loss": 4.5,
      "train/diffusion_loss": 0.4377511143684387
    },
    {
      "epoch": 1.3013698630136985,
      "step": 95,
      "train/learning_rate_real": 2.4349932761216942e-05
    },
    {
      "debug/num_lat_loss": 1757.0,
      "debug/num_lat_total": 2183.0,
      "debug/num_tok_loss": 1757.0,
      "debug/num_tok_total": 2183.0,
      "epoch": 1.3013698630136985,
      "step": 95
    },
    {
      "epoch": 1.3013698630136985,
      "step": 95,
      "train/ce_loss": 4.65625,
      "train/diffusion_loss": 0.5004580020904541
    },
    {
      "epoch": 1.3013698630136985,
      "step": 95,
      "train/learning_rate_real": 2.4349932761216942e-05
    },
    {
      "debug/num_lat_loss": 1659.0,
      "debug/num_lat_total": 2826.0,
      "debug/num_tok_loss": 1659.0,
      "debug/num_tok_total": 2826.0,
      "epoch": 1.3150684931506849,
      "step": 96
    },
    {
      "epoch": 1.3150684931506849,
      "step": 96,
      "train/ce_loss": 4.46875,
      "train/diffusion_loss": 0.3254546821117401
    },
    {
      "epoch": 1.3150684931506849,
      "step": 96,
      "train/learning_rate_real": 2.433216209849396e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2857.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2857.0,
      "epoch": 1.3150684931506849,
      "step": 96
    },
    {
      "epoch": 1.3150684931506849,
      "step": 96,
      "train/ce_loss": 4.46875,
      "train/diffusion_loss": 0.33290404081344604
    },
    {
      "epoch": 1.3150684931506849,
      "step": 96,
      "train/learning_rate_real": 2.433216209849396e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 3070.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 3070.0,
      "epoch": 1.3150684931506849,
      "step": 96
    },
    {
      "epoch": 1.3150684931506849,
      "step": 96,
      "train/ce_loss": 5.03125,
      "train/diffusion_loss": 0.3361597955226898
    },
    {
      "epoch": 1.3150684931506849,
      "step": 96,
      "train/learning_rate_real": 2.433216209849396e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2858.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2858.0,
      "epoch": 1.3150684931506849,
      "step": 96
    },
    {
      "epoch": 1.3150684931506849,
      "step": 96,
      "train/ce_loss": 4.96875,
      "train/diffusion_loss": 0.39105093479156494
    },
    {
      "epoch": 1.3150684931506849,
      "step": 96,
      "train/learning_rate_real": 2.433216209849396e-05
    },
    {
      "debug/num_lat_loss": 1760.0,
      "debug/num_lat_total": 2608.0,
      "debug/num_tok_loss": 1760.0,
      "debug/num_tok_total": 2608.0,
      "epoch": 1.3287671232876712,
      "step": 97
    },
    {
      "epoch": 1.3287671232876712,
      "step": 97,
      "train/ce_loss": 4.375,
      "train/diffusion_loss": 0.4111551344394684
    },
    {
      "epoch": 1.3287671232876712,
      "step": 97,
      "train/learning_rate_real": 2.4314158467579544e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2663.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2663.0,
      "epoch": 1.3287671232876712,
      "step": 97
    },
    {
      "epoch": 1.3287671232876712,
      "step": 97,
      "train/ce_loss": 4.625,
      "train/diffusion_loss": 0.4159858524799347
    },
    {
      "epoch": 1.3287671232876712,
      "step": 97,
      "train/learning_rate_real": 2.4314158467579544e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2891.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2891.0,
      "epoch": 1.3287671232876712,
      "step": 97
    },
    {
      "epoch": 1.3287671232876712,
      "step": 97,
      "train/ce_loss": 4.59375,
      "train/diffusion_loss": 0.3672471344470978
    },
    {
      "epoch": 1.3287671232876712,
      "step": 97,
      "train/learning_rate_real": 2.4314158467579544e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2221.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2221.0,
      "epoch": 1.3287671232876712,
      "step": 97
    },
    {
      "epoch": 1.3287671232876712,
      "step": 97,
      "train/ce_loss": 4.40625,
      "train/diffusion_loss": 0.5292556881904602
    },
    {
      "epoch": 1.3287671232876712,
      "step": 97,
      "train/learning_rate_real": 2.4314158467579544e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2425.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2425.0,
      "epoch": 1.3424657534246576,
      "step": 98
    },
    {
      "epoch": 1.3424657534246576,
      "step": 98,
      "train/ce_loss": 4.5,
      "train/diffusion_loss": 0.43493154644966125
    },
    {
      "epoch": 1.3424657534246576,
      "step": 98,
      "train/learning_rate_real": 2.429592222295441e-05
    },
    {
      "debug/num_lat_loss": 1622.0,
      "debug/num_lat_total": 2475.0,
      "debug/num_tok_loss": 1622.0,
      "debug/num_tok_total": 2475.0,
      "epoch": 1.3424657534246576,
      "step": 98
    },
    {
      "epoch": 1.3424657534246576,
      "step": 98,
      "train/ce_loss": 4.4375,
      "train/diffusion_loss": 0.37283438444137573
    },
    {
      "epoch": 1.3424657534246576,
      "step": 98,
      "train/learning_rate_real": 2.429592222295441e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 1.3424657534246576,
      "step": 98
    },
    {
      "epoch": 1.3424657534246576,
      "step": 98,
      "train/ce_loss": 4.4375,
      "train/diffusion_loss": 0.4417295753955841
    },
    {
      "epoch": 1.3424657534246576,
      "step": 98,
      "train/learning_rate_real": 2.429592222295441e-05
    },
    {
      "debug/num_lat_loss": 1644.0,
      "debug/num_lat_total": 2727.0,
      "debug/num_tok_loss": 1644.0,
      "debug/num_tok_total": 2727.0,
      "epoch": 1.3424657534246576,
      "step": 98
    },
    {
      "epoch": 1.3424657534246576,
      "step": 98,
      "train/ce_loss": 4.40625,
      "train/diffusion_loss": 0.3511442244052887
    },
    {
      "epoch": 1.3424657534246576,
      "step": 98,
      "train/learning_rate_real": 2.429592222295441e-05
    },
    {
      "debug/num_lat_loss": 1737.0,
      "debug/num_lat_total": 2841.0,
      "debug/num_tok_loss": 1737.0,
      "debug/num_tok_total": 2841.0,
      "epoch": 1.356164383561644,
      "step": 99
    },
    {
      "epoch": 1.356164383561644,
      "step": 99,
      "train/ce_loss": 4.59375,
      "train/diffusion_loss": 0.36495158076286316
    },
    {
      "epoch": 1.356164383561644,
      "step": 99,
      "train/learning_rate_real": 2.42774537236793e-05
    },
    {
      "debug/num_lat_loss": 1436.0,
      "debug/num_lat_total": 2005.0,
      "debug/num_tok_loss": 1436.0,
      "debug/num_tok_total": 2005.0,
      "epoch": 1.356164383561644,
      "step": 99
    },
    {
      "epoch": 1.356164383561644,
      "step": 99,
      "train/ce_loss": 4.625,
      "train/diffusion_loss": 0.4353146255016327
    },
    {
      "epoch": 1.356164383561644,
      "step": 99,
      "train/learning_rate_real": 2.42774537236793e-05
    },
    {
      "debug/num_lat_loss": 1633.0,
      "debug/num_lat_total": 2501.0,
      "debug/num_tok_loss": 1633.0,
      "debug/num_tok_total": 2501.0,
      "epoch": 1.356164383561644,
      "step": 99
    },
    {
      "epoch": 1.356164383561644,
      "step": 99,
      "train/ce_loss": 4.34375,
      "train/diffusion_loss": 0.3748423457145691
    },
    {
      "epoch": 1.356164383561644,
      "step": 99,
      "train/learning_rate_real": 2.42774537236793e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 2891.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 2891.0,
      "epoch": 1.356164383561644,
      "step": 99
    },
    {
      "epoch": 1.356164383561644,
      "step": 99,
      "train/ce_loss": 4.375,
      "train/diffusion_loss": 0.3602873682975769
    },
    {
      "epoch": 1.356164383561644,
      "step": 99,
      "train/learning_rate_real": 2.42774537236793e-05
    },
    {
      "epoch": 1.36986301369863,
      "grad_norm": 2.010390281677246,
      "learning_rate": 2.42774537236793e-05,
      "loss": 2.983,
      "step": 100
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2860.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2860.0,
      "epoch": 1.36986301369863,
      "step": 100
    },
    {
      "epoch": 1.36986301369863,
      "step": 100,
      "train/ce_loss": 4.59375,
      "train/diffusion_loss": 0.3802020847797394
    },
    {
      "epoch": 1.36986301369863,
      "step": 100,
      "train/learning_rate_real": 2.425875333338792e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2640.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2640.0,
      "epoch": 1.36986301369863,
      "step": 100
    },
    {
      "epoch": 1.36986301369863,
      "step": 100,
      "train/ce_loss": 4.6875,
      "train/diffusion_loss": 0.4197426736354828
    },
    {
      "epoch": 1.36986301369863,
      "step": 100,
      "train/learning_rate_real": 2.425875333338792e-05
    },
    {
      "debug/num_lat_loss": 1716.0,
      "debug/num_lat_total": 2512.0,
      "debug/num_tok_loss": 1716.0,
      "debug/num_tok_total": 2512.0,
      "epoch": 1.36986301369863,
      "step": 100
    },
    {
      "epoch": 1.36986301369863,
      "step": 100,
      "train/ce_loss": 4.53125,
      "train/diffusion_loss": 0.4053674042224884
    },
    {
      "epoch": 1.36986301369863,
      "step": 100,
      "train/learning_rate_real": 2.425875333338792e-05
    },
    {
      "debug/num_lat_loss": 1746.0,
      "debug/num_lat_total": 2184.0,
      "debug/num_tok_loss": 1746.0,
      "debug/num_tok_total": 2184.0,
      "epoch": 1.36986301369863,
      "step": 100
    },
    {
      "epoch": 1.36986301369863,
      "step": 100,
      "train/ce_loss": 4.3125,
      "train/diffusion_loss": 0.4971557855606079
    },
    {
      "epoch": 1.36986301369863,
      "step": 100,
      "train/learning_rate_real": 2.425875333338792e-05
    },
    {
      "debug/num_lat_loss": 1599.0,
      "debug/num_lat_total": 2669.0,
      "debug/num_tok_loss": 1599.0,
      "debug/num_tok_total": 2669.0,
      "epoch": 1.3835616438356164,
      "step": 101
    },
    {
      "epoch": 1.3835616438356164,
      "step": 101,
      "train/ce_loss": 4.53125,
      "train/diffusion_loss": 0.34701186418533325
    },
    {
      "epoch": 1.3835616438356164,
      "step": 101,
      "train/learning_rate_real": 2.4239821420279753e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2862.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2862.0,
      "epoch": 1.3835616438356164,
      "step": 101
    },
    {
      "epoch": 1.3835616438356164,
      "step": 101,
      "train/ce_loss": 4.78125,
      "train/diffusion_loss": 0.3729672431945801
    },
    {
      "epoch": 1.3835616438356164,
      "step": 101,
      "train/learning_rate_real": 2.4239821420279753e-05
    },
    {
      "debug/num_lat_loss": 1629.0,
      "debug/num_lat_total": 2978.0,
      "debug/num_tok_loss": 1629.0,
      "debug/num_tok_total": 2978.0,
      "epoch": 1.3835616438356164,
      "step": 101
    },
    {
      "epoch": 1.3835616438356164,
      "step": 101,
      "train/ce_loss": 4.4375,
      "train/diffusion_loss": 0.2844712734222412
    },
    {
      "epoch": 1.3835616438356164,
      "step": 101,
      "train/learning_rate_real": 2.4239821420279753e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2636.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2636.0,
      "epoch": 1.3835616438356164,
      "step": 101
    },
    {
      "epoch": 1.3835616438356164,
      "step": 101,
      "train/ce_loss": 4.375,
      "train/diffusion_loss": 0.40753600001335144
    },
    {
      "epoch": 1.3835616438356164,
      "step": 101,
      "train/learning_rate_real": 2.4239821420279753e-05
    },
    {
      "debug/num_lat_loss": 1762.0,
      "debug/num_lat_total": 3038.0,
      "debug/num_tok_loss": 1762.0,
      "debug/num_tok_total": 3038.0,
      "epoch": 1.3972602739726028,
      "step": 102
    },
    {
      "epoch": 1.3972602739726028,
      "step": 102,
      "train/ce_loss": 4.53125,
      "train/diffusion_loss": 0.3289642035961151
    },
    {
      "epoch": 1.3972602739726028,
      "step": 102,
      "train/learning_rate_real": 2.422065835711285e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2665.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2665.0,
      "epoch": 1.3972602739726028,
      "step": 102
    },
    {
      "epoch": 1.3972602739726028,
      "step": 102,
      "train/ce_loss": 4.40625,
      "train/diffusion_loss": 0.3820474445819855
    },
    {
      "epoch": 1.3972602739726028,
      "step": 102,
      "train/learning_rate_real": 2.422065835711285e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2671.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2671.0,
      "epoch": 1.3972602739726028,
      "step": 102
    },
    {
      "epoch": 1.3972602739726028,
      "step": 102,
      "train/ce_loss": 4.46875,
      "train/diffusion_loss": 0.4218079447746277
    },
    {
      "epoch": 1.3972602739726028,
      "step": 102,
      "train/learning_rate_real": 2.422065835711285e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2416.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2416.0,
      "epoch": 1.3972602739726028,
      "step": 102
    },
    {
      "epoch": 1.3972602739726028,
      "step": 102,
      "train/ce_loss": 4.3125,
      "train/diffusion_loss": 0.4562211334705353
    },
    {
      "epoch": 1.3972602739726028,
      "step": 102,
      "train/learning_rate_real": 2.422065835711285e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2874.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2874.0,
      "epoch": 1.410958904109589,
      "step": 103
    },
    {
      "epoch": 1.410958904109589,
      "step": 103,
      "train/ce_loss": 4.5625,
      "train/diffusion_loss": 0.3860173523426056
    },
    {
      "epoch": 1.410958904109589,
      "step": 103,
      "train/learning_rate_real": 2.4201264521196455e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2442.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2442.0,
      "epoch": 1.410958904109589,
      "step": 103
    },
    {
      "epoch": 1.410958904109589,
      "step": 103,
      "train/ce_loss": 4.21875,
      "train/diffusion_loss": 0.45044654607772827
    },
    {
      "epoch": 1.410958904109589,
      "step": 103,
      "train/learning_rate_real": 2.4201264521196455e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2619.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2619.0,
      "epoch": 1.410958904109589,
      "step": 103
    },
    {
      "epoch": 1.410958904109589,
      "step": 103,
      "train/ce_loss": 4.46875,
      "train/diffusion_loss": 0.4299888014793396
    },
    {
      "epoch": 1.410958904109589,
      "step": 103,
      "train/learning_rate_real": 2.4201264521196455e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2638.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2638.0,
      "epoch": 1.410958904109589,
      "step": 103
    },
    {
      "epoch": 1.410958904109589,
      "step": 103,
      "train/ce_loss": 4.4375,
      "train/diffusion_loss": 0.3971606194972992
    },
    {
      "epoch": 1.410958904109589,
      "step": 103,
      "train/learning_rate_real": 2.4201264521196455e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2223.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2223.0,
      "epoch": 1.4246575342465753,
      "step": 104
    },
    {
      "epoch": 1.4246575342465753,
      "step": 104,
      "train/ce_loss": 4.15625,
      "train/diffusion_loss": 0.48428550362586975
    },
    {
      "epoch": 1.4246575342465753,
      "step": 104,
      "train/learning_rate_real": 2.4181640294383585e-05
    },
    {
      "debug/num_lat_loss": 1674.0,
      "debug/num_lat_total": 2539.0,
      "debug/num_tok_loss": 1674.0,
      "debug/num_tok_total": 2539.0,
      "epoch": 1.4246575342465753,
      "step": 104
    },
    {
      "epoch": 1.4246575342465753,
      "step": 104,
      "train/ce_loss": 4.40625,
      "train/diffusion_loss": 0.3819606602191925
    },
    {
      "epoch": 1.4246575342465753,
      "step": 104,
      "train/learning_rate_real": 2.4181640294383585e-05
    },
    {
      "debug/num_lat_loss": 1690.0,
      "debug/num_lat_total": 2253.0,
      "debug/num_tok_loss": 1690.0,
      "debug/num_tok_total": 2253.0,
      "epoch": 1.4246575342465753,
      "step": 104
    },
    {
      "epoch": 1.4246575342465753,
      "step": 104,
      "train/ce_loss": 4.3125,
      "train/diffusion_loss": 0.4630298614501953
    },
    {
      "epoch": 1.4246575342465753,
      "step": 104,
      "train/learning_rate_real": 2.4181640294383585e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2633.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2633.0,
      "epoch": 1.4246575342465753,
      "step": 104
    },
    {
      "epoch": 1.4246575342465753,
      "step": 104,
      "train/ce_loss": 4.34375,
      "train/diffusion_loss": 0.3922845125198364
    },
    {
      "epoch": 1.4246575342465753,
      "step": 104,
      "train/learning_rate_real": 2.4181640294383585e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2897.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2897.0,
      "epoch": 1.4383561643835616,
      "step": 105
    },
    {
      "epoch": 1.4383561643835616,
      "step": 105,
      "train/ce_loss": 4.34375,
      "train/diffusion_loss": 0.37782275676727295
    },
    {
      "epoch": 1.4383561643835616,
      "step": 105,
      "train/learning_rate_real": 2.4161786063063536e-05
    },
    {
      "debug/num_lat_loss": 1712.0,
      "debug/num_lat_total": 2358.0,
      "debug/num_tok_loss": 1712.0,
      "debug/num_tok_total": 2358.0,
      "epoch": 1.4383561643835616,
      "step": 105
    },
    {
      "epoch": 1.4383561643835616,
      "step": 105,
      "train/ce_loss": 4.1875,
      "train/diffusion_loss": 0.4383047819137573
    },
    {
      "epoch": 1.4383561643835616,
      "step": 105,
      "train/learning_rate_real": 2.4161786063063536e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2836.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2836.0,
      "epoch": 1.4383561643835616,
      "step": 105
    },
    {
      "epoch": 1.4383561643835616,
      "step": 105,
      "train/ce_loss": 4.21875,
      "train/diffusion_loss": 0.3715423047542572
    },
    {
      "epoch": 1.4383561643835616,
      "step": 105,
      "train/learning_rate_real": 2.4161786063063536e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2647.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2647.0,
      "epoch": 1.4383561643835616,
      "step": 105
    },
    {
      "epoch": 1.4383561643835616,
      "step": 105,
      "train/ce_loss": 4.3125,
      "train/diffusion_loss": 0.4179925322532654
    },
    {
      "epoch": 1.4383561643835616,
      "step": 105,
      "train/learning_rate_real": 2.4161786063063536e-05
    },
    {
      "debug/num_lat_loss": 1625.0,
      "debug/num_lat_total": 2265.0,
      "debug/num_tok_loss": 1625.0,
      "debug/num_tok_total": 2265.0,
      "epoch": 1.452054794520548,
      "step": 106
    },
    {
      "epoch": 1.452054794520548,
      "step": 106,
      "train/ce_loss": 4.28125,
      "train/diffusion_loss": 0.4229045808315277
    },
    {
      "epoch": 1.452054794520548,
      "step": 106,
      "train/learning_rate_real": 2.4141702218154232e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2211.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2211.0,
      "epoch": 1.452054794520548,
      "step": 106
    },
    {
      "epoch": 1.452054794520548,
      "step": 106,
      "train/ce_loss": 4.25,
      "train/diffusion_loss": 0.49752187728881836
    },
    {
      "epoch": 1.452054794520548,
      "step": 106,
      "train/learning_rate_real": 2.4141702218154232e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2611.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2611.0,
      "epoch": 1.452054794520548,
      "step": 106
    },
    {
      "epoch": 1.452054794520548,
      "step": 106,
      "train/ce_loss": 4.1875,
      "train/diffusion_loss": 0.4122409224510193
    },
    {
      "epoch": 1.452054794520548,
      "step": 106,
      "train/learning_rate_real": 2.4141702218154232e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2434.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2434.0,
      "epoch": 1.452054794520548,
      "step": 106
    },
    {
      "epoch": 1.452054794520548,
      "step": 106,
      "train/ce_loss": 4.25,
      "train/diffusion_loss": 0.48291388154029846
    },
    {
      "epoch": 1.452054794520548,
      "step": 106,
      "train/learning_rate_real": 2.4141702218154232e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2204.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2204.0,
      "epoch": 1.4657534246575343,
      "step": 107
    },
    {
      "epoch": 1.4657534246575343,
      "step": 107,
      "train/ce_loss": 4.28125,
      "train/diffusion_loss": 0.48381802439689636
    },
    {
      "epoch": 1.4657534246575343,
      "step": 107,
      "train/learning_rate_real": 2.4121389155094564e-05
    },
    {
      "debug/num_lat_loss": 1645.0,
      "debug/num_lat_total": 2371.0,
      "debug/num_tok_loss": 1645.0,
      "debug/num_tok_total": 2371.0,
      "epoch": 1.4657534246575343,
      "step": 107
    },
    {
      "epoch": 1.4657534246575343,
      "step": 107,
      "train/ce_loss": 4.53125,
      "train/diffusion_loss": 0.4406833052635193
    },
    {
      "epoch": 1.4657534246575343,
      "step": 107,
      "train/learning_rate_real": 2.4121389155094564e-05
    },
    {
      "debug/num_lat_loss": 1723.0,
      "debug/num_lat_total": 2371.0,
      "debug/num_tok_loss": 1723.0,
      "debug/num_tok_total": 2371.0,
      "epoch": 1.4657534246575343,
      "step": 107
    },
    {
      "epoch": 1.4657534246575343,
      "step": 107,
      "train/ce_loss": 4.25,
      "train/diffusion_loss": 0.4781853258609772
    },
    {
      "epoch": 1.4657534246575343,
      "step": 107,
      "train/learning_rate_real": 2.4121389155094564e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2843.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2843.0,
      "epoch": 1.4657534246575343,
      "step": 107
    },
    {
      "epoch": 1.4657534246575343,
      "step": 107,
      "train/ce_loss": 4.21875,
      "train/diffusion_loss": 0.3677610754966736
    },
    {
      "epoch": 1.4657534246575343,
      "step": 107,
      "train/learning_rate_real": 2.4121389155094564e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2864.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2864.0,
      "epoch": 1.4794520547945205,
      "step": 108
    },
    {
      "epoch": 1.4794520547945205,
      "step": 108,
      "train/ce_loss": 4.25,
      "train/diffusion_loss": 0.35337358713150024
    },
    {
      "epoch": 1.4794520547945205,
      "step": 108,
      "train/learning_rate_real": 2.410084727383659e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2456.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2456.0,
      "epoch": 1.4794520547945205,
      "step": 108
    },
    {
      "epoch": 1.4794520547945205,
      "step": 108,
      "train/ce_loss": 4.125,
      "train/diffusion_loss": 0.47408199310302734
    },
    {
      "epoch": 1.4794520547945205,
      "step": 108,
      "train/learning_rate_real": 2.410084727383659e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2409.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2409.0,
      "epoch": 1.4794520547945205,
      "step": 108
    },
    {
      "epoch": 1.4794520547945205,
      "step": 108,
      "train/ce_loss": 4.25,
      "train/diffusion_loss": 0.460326611995697
    },
    {
      "epoch": 1.4794520547945205,
      "step": 108,
      "train/learning_rate_real": 2.410084727383659e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 3324.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 3324.0,
      "epoch": 1.4794520547945205,
      "step": 108
    },
    {
      "epoch": 1.4794520547945205,
      "step": 108,
      "train/ce_loss": 4.3125,
      "train/diffusion_loss": 0.26926982402801514
    },
    {
      "epoch": 1.4794520547945205,
      "step": 108,
      "train/learning_rate_real": 2.410084727383659e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2652.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2652.0,
      "epoch": 1.4931506849315068,
      "step": 109
    },
    {
      "epoch": 1.4931506849315068,
      "step": 109,
      "train/ce_loss": 4.1875,
      "train/diffusion_loss": 0.44318896532058716
    },
    {
      "epoch": 1.4931506849315068,
      "step": 109,
      "train/learning_rate_real": 2.4080076978837658e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2879.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2879.0,
      "epoch": 1.4931506849315068,
      "step": 109
    },
    {
      "epoch": 1.4931506849315068,
      "step": 109,
      "train/ce_loss": 4.15625,
      "train/diffusion_loss": 0.3627161979675293
    },
    {
      "epoch": 1.4931506849315068,
      "step": 109,
      "train/learning_rate_real": 2.4080076978837658e-05
    },
    {
      "debug/num_lat_loss": 1633.0,
      "debug/num_lat_total": 2070.0,
      "debug/num_tok_loss": 1633.0,
      "debug/num_tok_total": 2070.0,
      "epoch": 1.4931506849315068,
      "step": 109
    },
    {
      "epoch": 1.4931506849315068,
      "step": 109,
      "train/ce_loss": 4.25,
      "train/diffusion_loss": 0.47383245825767517
    },
    {
      "epoch": 1.4931506849315068,
      "step": 109,
      "train/learning_rate_real": 2.4080076978837658e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2443.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2443.0,
      "epoch": 1.4931506849315068,
      "step": 109
    },
    {
      "epoch": 1.4931506849315068,
      "step": 109,
      "train/ce_loss": 4.46875,
      "train/diffusion_loss": 0.4685288071632385
    },
    {
      "epoch": 1.4931506849315068,
      "step": 109,
      "train/learning_rate_real": 2.4080076978837658e-05
    },
    {
      "epoch": 1.5068493150684932,
      "grad_norm": 1.5252450704574585,
      "learning_rate": 2.4080076978837658e-05,
      "loss": 3.0191,
      "step": 110
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 2442.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 2442.0,
      "epoch": 1.5068493150684932,
      "step": 110
    },
    {
      "epoch": 1.5068493150684932,
      "step": 110,
      "train/ce_loss": 4.5,
      "train/diffusion_loss": 0.45642104744911194
    },
    {
      "epoch": 1.5068493150684932,
      "step": 110,
      "train/learning_rate_real": 2.405907867905245e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2425.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2425.0,
      "epoch": 1.5068493150684932,
      "step": 110
    },
    {
      "epoch": 1.5068493150684932,
      "step": 110,
      "train/ce_loss": 4.1875,
      "train/diffusion_loss": 0.48576435446739197
    },
    {
      "epoch": 1.5068493150684932,
      "step": 110,
      "train/learning_rate_real": 2.405907867905245e-05
    },
    {
      "debug/num_lat_loss": 1679.0,
      "debug/num_lat_total": 2746.0,
      "debug/num_tok_loss": 1679.0,
      "debug/num_tok_total": 2746.0,
      "epoch": 1.5068493150684932,
      "step": 110
    },
    {
      "epoch": 1.5068493150684932,
      "step": 110,
      "train/ce_loss": 4.125,
      "train/diffusion_loss": 0.3542007803916931
    },
    {
      "epoch": 1.5068493150684932,
      "step": 110,
      "train/learning_rate_real": 2.405907867905245e-05
    },
    {
      "debug/num_lat_loss": 1758.0,
      "debug/num_lat_total": 2609.0,
      "debug/num_tok_loss": 1758.0,
      "debug/num_tok_total": 2609.0,
      "epoch": 1.5068493150684932,
      "step": 110
    },
    {
      "epoch": 1.5068493150684932,
      "step": 110,
      "train/ce_loss": 4.0,
      "train/diffusion_loss": 0.4068741798400879
    },
    {
      "epoch": 1.5068493150684932,
      "step": 110,
      "train/learning_rate_real": 2.405907867905245e-05
    },
    {
      "debug/num_lat_loss": 1738.0,
      "debug/num_lat_total": 2577.0,
      "debug/num_tok_loss": 1738.0,
      "debug/num_tok_total": 2577.0,
      "epoch": 1.5205479452054793,
      "step": 111
    },
    {
      "epoch": 1.5205479452054793,
      "step": 111,
      "train/ce_loss": 4.0,
      "train/diffusion_loss": 0.41163885593414307
    },
    {
      "epoch": 1.5205479452054793,
      "step": 111,
      "train/learning_rate_real": 2.403785278792492e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2848.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2848.0,
      "epoch": 1.5205479452054793,
      "step": 111
    },
    {
      "epoch": 1.5205479452054793,
      "step": 111,
      "train/ce_loss": 4.21875,
      "train/diffusion_loss": 0.36420491337776184
    },
    {
      "epoch": 1.5205479452054793,
      "step": 111,
      "train/learning_rate_real": 2.403785278792492e-05
    },
    {
      "debug/num_lat_loss": 1747.0,
      "debug/num_lat_total": 2596.0,
      "debug/num_tok_loss": 1747.0,
      "debug/num_tok_total": 2596.0,
      "epoch": 1.5205479452054793,
      "step": 111
    },
    {
      "epoch": 1.5205479452054793,
      "step": 111,
      "train/ce_loss": 4.46875,
      "train/diffusion_loss": 0.3811115622520447
    },
    {
      "epoch": 1.5205479452054793,
      "step": 111,
      "train/learning_rate_real": 2.403785278792492e-05
    },
    {
      "debug/num_lat_loss": 1749.0,
      "debug/num_lat_total": 3021.0,
      "debug/num_tok_loss": 1749.0,
      "debug/num_tok_total": 3021.0,
      "epoch": 1.5205479452054793,
      "step": 111
    },
    {
      "epoch": 1.5205479452054793,
      "step": 111,
      "train/ce_loss": 4.28125,
      "train/diffusion_loss": 0.33210745453834534
    },
    {
      "epoch": 1.5205479452054793,
      "step": 111,
      "train/learning_rate_real": 2.403785278792492e-05
    },
    {
      "debug/num_lat_loss": 1759.0,
      "debug/num_lat_total": 2830.0,
      "debug/num_tok_loss": 1759.0,
      "debug/num_tok_total": 2830.0,
      "epoch": 1.5342465753424657,
      "step": 112
    },
    {
      "epoch": 1.5342465753424657,
      "step": 112,
      "train/ce_loss": 4.0625,
      "train/diffusion_loss": 0.35382625460624695
    },
    {
      "epoch": 1.5342465753424657,
      "step": 112,
      "train/learning_rate_real": 2.401639972338016e-05
    },
    {
      "debug/num_lat_loss": 1756.0,
      "debug/num_lat_total": 2603.0,
      "debug/num_tok_loss": 1756.0,
      "debug/num_tok_total": 2603.0,
      "epoch": 1.5342465753424657,
      "step": 112
    },
    {
      "epoch": 1.5342465753424657,
      "step": 112,
      "train/ce_loss": 4.0,
      "train/diffusion_loss": 0.4346335232257843
    },
    {
      "epoch": 1.5342465753424657,
      "step": 112,
      "train/learning_rate_real": 2.401639972338016e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 3095.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 3095.0,
      "epoch": 1.5342465753424657,
      "step": 112
    },
    {
      "epoch": 1.5342465753424657,
      "step": 112,
      "train/ce_loss": 4.0,
      "train/diffusion_loss": 0.331741064786911
    },
    {
      "epoch": 1.5342465753424657,
      "step": 112,
      "train/learning_rate_real": 2.401639972338016e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 3063.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 3063.0,
      "epoch": 1.5342465753424657,
      "step": 112
    },
    {
      "epoch": 1.5342465753424657,
      "step": 112,
      "train/ce_loss": 4.21875,
      "train/diffusion_loss": 0.3286246955394745
    },
    {
      "epoch": 1.5342465753424657,
      "step": 112,
      "train/learning_rate_real": 2.401639972338016e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2411.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2411.0,
      "epoch": 1.547945205479452,
      "step": 113
    },
    {
      "epoch": 1.547945205479452,
      "step": 113,
      "train/ce_loss": 3.84375,
      "train/diffusion_loss": 0.44130316376686096
    },
    {
      "epoch": 1.547945205479452,
      "step": 113,
      "train/learning_rate_real": 2.399471990781619e-05
    },
    {
      "debug/num_lat_loss": 1744.0,
      "debug/num_lat_total": 2801.0,
      "debug/num_tok_loss": 1744.0,
      "debug/num_tok_total": 2801.0,
      "epoch": 1.547945205479452,
      "step": 113
    },
    {
      "epoch": 1.547945205479452,
      "step": 113,
      "train/ce_loss": 4.03125,
      "train/diffusion_loss": 0.3459753096103668
    },
    {
      "epoch": 1.547945205479452,
      "step": 113,
      "train/learning_rate_real": 2.399471990781619e-05
    },
    {
      "debug/num_lat_loss": 1744.0,
      "debug/num_lat_total": 2598.0,
      "debug/num_tok_loss": 1744.0,
      "debug/num_tok_total": 2598.0,
      "epoch": 1.547945205479452,
      "step": 113
    },
    {
      "epoch": 1.547945205479452,
      "step": 113,
      "train/ce_loss": 4.21875,
      "train/diffusion_loss": 0.4222409129142761
    },
    {
      "epoch": 1.547945205479452,
      "step": 113,
      "train/learning_rate_real": 2.399471990781619e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2874.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2874.0,
      "epoch": 1.547945205479452,
      "step": 113
    },
    {
      "epoch": 1.547945205479452,
      "step": 113,
      "train/ce_loss": 4.0625,
      "train/diffusion_loss": 0.35831475257873535
    },
    {
      "epoch": 1.547945205479452,
      "step": 113,
      "train/learning_rate_real": 2.399471990781619e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 3056.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 3056.0,
      "epoch": 1.5616438356164384,
      "step": 114
    },
    {
      "epoch": 1.5616438356164384,
      "step": 114,
      "train/ce_loss": 4.15625,
      "train/diffusion_loss": 0.2948022186756134
    },
    {
      "epoch": 1.5616438356164384,
      "step": 114,
      "train/learning_rate_real": 2.3972813768095583e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2659.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2659.0,
      "epoch": 1.5616438356164384,
      "step": 114
    },
    {
      "epoch": 1.5616438356164384,
      "step": 114,
      "train/ce_loss": 4.21875,
      "train/diffusion_loss": 0.4711344838142395
    },
    {
      "epoch": 1.5616438356164384,
      "step": 114,
      "train/learning_rate_real": 2.3972813768095583e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 3543.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 3543.0,
      "epoch": 1.5616438356164384,
      "step": 114
    },
    {
      "epoch": 1.5616438356164384,
      "step": 114,
      "train/ce_loss": 4.0625,
      "train/diffusion_loss": 0.23170864582061768
    },
    {
      "epoch": 1.5616438356164384,
      "step": 114,
      "train/learning_rate_real": 2.3972813768095583e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 2682.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 2682.0,
      "epoch": 1.5616438356164384,
      "step": 114
    },
    {
      "epoch": 1.5616438356164384,
      "step": 114,
      "train/ce_loss": 4.0625,
      "train/diffusion_loss": 0.40863677859306335
    },
    {
      "epoch": 1.5616438356164384,
      "step": 114,
      "train/learning_rate_real": 2.3972813768095583e-05
    },
    {
      "debug/num_lat_loss": 1762.0,
      "debug/num_lat_total": 2399.0,
      "debug/num_tok_loss": 1762.0,
      "debug/num_tok_total": 2399.0,
      "epoch": 1.5753424657534247,
      "step": 115
    },
    {
      "epoch": 1.5753424657534247,
      "step": 115,
      "train/ce_loss": 3.71875,
      "train/diffusion_loss": 0.4591188132762909
    },
    {
      "epoch": 1.5753424657534247,
      "step": 115,
      "train/learning_rate_real": 2.3950681735537135e-05
    },
    {
      "debug/num_lat_loss": 1639.0,
      "debug/num_lat_total": 2499.0,
      "debug/num_tok_loss": 1639.0,
      "debug/num_tok_total": 2499.0,
      "epoch": 1.5753424657534247,
      "step": 115
    },
    {
      "epoch": 1.5753424657534247,
      "step": 115,
      "train/ce_loss": 3.953125,
      "train/diffusion_loss": 0.39648905396461487
    },
    {
      "epoch": 1.5753424657534247,
      "step": 115,
      "train/learning_rate_real": 2.3950681735537135e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2213.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2213.0,
      "epoch": 1.5753424657534247,
      "step": 115
    },
    {
      "epoch": 1.5753424657534247,
      "step": 115,
      "train/ce_loss": 3.890625,
      "train/diffusion_loss": 0.49079185724258423
    },
    {
      "epoch": 1.5753424657534247,
      "step": 115,
      "train/learning_rate_real": 2.3950681735537135e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2230.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2230.0,
      "epoch": 1.5753424657534247,
      "step": 115
    },
    {
      "epoch": 1.5753424657534247,
      "step": 115,
      "train/ce_loss": 4.03125,
      "train/diffusion_loss": 0.49304887652397156
    },
    {
      "epoch": 1.5753424657534247,
      "step": 115,
      "train/learning_rate_real": 2.3950681735537135e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 3094.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 3094.0,
      "epoch": 1.589041095890411,
      "step": 116
    },
    {
      "epoch": 1.589041095890411,
      "step": 116,
      "train/ce_loss": 3.859375,
      "train/diffusion_loss": 0.3350035548210144
    },
    {
      "epoch": 1.589041095890411,
      "step": 116,
      "train/learning_rate_real": 2.3928324245907324e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 3081.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 3081.0,
      "epoch": 1.589041095890411,
      "step": 116
    },
    {
      "epoch": 1.589041095890411,
      "step": 116,
      "train/ce_loss": 3.921875,
      "train/diffusion_loss": 0.30515187978744507
    },
    {
      "epoch": 1.589041095890411,
      "step": 116,
      "train/learning_rate_real": 2.3928324245907324e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2870.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2870.0,
      "epoch": 1.589041095890411,
      "step": 116
    },
    {
      "epoch": 1.589041095890411,
      "step": 116,
      "train/ce_loss": 3.953125,
      "train/diffusion_loss": 0.3786502778530121
    },
    {
      "epoch": 1.589041095890411,
      "step": 116,
      "train/learning_rate_real": 2.3928324245907324e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2645.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2645.0,
      "epoch": 1.589041095890411,
      "step": 116
    },
    {
      "epoch": 1.589041095890411,
      "step": 116,
      "train/ce_loss": 3.8125,
      "train/diffusion_loss": 0.415124773979187
    },
    {
      "epoch": 1.589041095890411,
      "step": 116,
      "train/learning_rate_real": 2.3928324245907324e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2645.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2645.0,
      "epoch": 1.6027397260273972,
      "step": 117
    },
    {
      "epoch": 1.6027397260273972,
      "step": 117,
      "train/ce_loss": 3.90625,
      "train/diffusion_loss": 0.4104914665222168
    },
    {
      "epoch": 1.6027397260273972,
      "step": 117,
      "train/learning_rate_real": 2.3905741739411735e-05
    },
    {
      "debug/num_lat_loss": 1679.0,
      "debug/num_lat_total": 3078.0,
      "debug/num_tok_loss": 1679.0,
      "debug/num_tok_total": 3078.0,
      "epoch": 1.6027397260273972,
      "step": 117
    },
    {
      "epoch": 1.6027397260273972,
      "step": 117,
      "train/ce_loss": 3.921875,
      "train/diffusion_loss": 0.26384884119033813
    },
    {
      "epoch": 1.6027397260273972,
      "step": 117,
      "train/learning_rate_real": 2.3905741739411735e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2408.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2408.0,
      "epoch": 1.6027397260273972,
      "step": 117
    },
    {
      "epoch": 1.6027397260273972,
      "step": 117,
      "train/ce_loss": 3.75,
      "train/diffusion_loss": 0.4527377486228943
    },
    {
      "epoch": 1.6027397260273972,
      "step": 117,
      "train/learning_rate_real": 2.3905741739411735e-05
    },
    {
      "debug/num_lat_loss": 1643.0,
      "debug/num_lat_total": 2517.0,
      "debug/num_tok_loss": 1643.0,
      "debug/num_tok_total": 2517.0,
      "epoch": 1.6027397260273972,
      "step": 117
    },
    {
      "epoch": 1.6027397260273972,
      "step": 117,
      "train/ce_loss": 4.03125,
      "train/diffusion_loss": 0.4090951979160309
    },
    {
      "epoch": 1.6027397260273972,
      "step": 117,
      "train/learning_rate_real": 2.3905741739411735e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 3067.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 3067.0,
      "epoch": 1.6164383561643836,
      "step": 118
    },
    {
      "epoch": 1.6164383561643836,
      "step": 118,
      "train/ce_loss": 3.796875,
      "train/diffusion_loss": 0.30969923734664917
    },
    {
      "epoch": 1.6164383561643836,
      "step": 118,
      "train/learning_rate_real": 2.3882934660686418e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2414.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2414.0,
      "epoch": 1.6164383561643836,
      "step": 118
    },
    {
      "epoch": 1.6164383561643836,
      "step": 118,
      "train/ce_loss": 3.796875,
      "train/diffusion_loss": 0.46044453978538513
    },
    {
      "epoch": 1.6164383561643836,
      "step": 118,
      "train/learning_rate_real": 2.3882934660686418e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2839.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2839.0,
      "epoch": 1.6164383561643836,
      "step": 118
    },
    {
      "epoch": 1.6164383561643836,
      "step": 118,
      "train/ce_loss": 3.90625,
      "train/diffusion_loss": 0.4008499085903168
    },
    {
      "epoch": 1.6164383561643836,
      "step": 118,
      "train/learning_rate_real": 2.3882934660686418e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2868.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2868.0,
      "epoch": 1.6164383561643836,
      "step": 118
    },
    {
      "epoch": 1.6164383561643836,
      "step": 118,
      "train/ce_loss": 4.0,
      "train/diffusion_loss": 0.33701223134994507
    },
    {
      "epoch": 1.6164383561643836,
      "step": 118,
      "train/learning_rate_real": 2.3882934660686418e-05
    },
    {
      "debug/num_lat_loss": 1727.0,
      "debug/num_lat_total": 2748.0,
      "debug/num_tok_loss": 1727.0,
      "debug/num_tok_total": 2748.0,
      "epoch": 1.6301369863013697,
      "step": 119
    },
    {
      "epoch": 1.6301369863013697,
      "step": 119,
      "train/ce_loss": 3.84375,
      "train/diffusion_loss": 0.36649128794670105
    },
    {
      "epoch": 1.6301369863013697,
      "step": 119,
      "train/learning_rate_real": 2.3859903458789094e-05
    },
    {
      "debug/num_lat_loss": 1707.0,
      "debug/num_lat_total": 2493.0,
      "debug/num_tok_loss": 1707.0,
      "debug/num_tok_total": 2493.0,
      "epoch": 1.6301369863013697,
      "step": 119
    },
    {
      "epoch": 1.6301369863013697,
      "step": 119,
      "train/ce_loss": 3.75,
      "train/diffusion_loss": 0.4311346709728241
    },
    {
      "epoch": 1.6301369863013697,
      "step": 119,
      "train/learning_rate_real": 2.3859903458789094e-05
    },
    {
      "debug/num_lat_loss": 1697.0,
      "debug/num_lat_total": 2782.0,
      "debug/num_tok_loss": 1697.0,
      "debug/num_tok_total": 2782.0,
      "epoch": 1.6301369863013697,
      "step": 119
    },
    {
      "epoch": 1.6301369863013697,
      "step": 119,
      "train/ce_loss": 3.90625,
      "train/diffusion_loss": 0.3630007207393646
    },
    {
      "epoch": 1.6301369863013697,
      "step": 119,
      "train/learning_rate_real": 2.3859903458789094e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2421.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2421.0,
      "epoch": 1.6301369863013697,
      "step": 119
    },
    {
      "epoch": 1.6301369863013697,
      "step": 119,
      "train/ce_loss": 3.8125,
      "train/diffusion_loss": 0.432765394449234
    },
    {
      "epoch": 1.6301369863013697,
      "step": 119,
      "train/learning_rate_real": 2.3859903458789094e-05
    },
    {
      "epoch": 1.643835616438356,
      "grad_norm": 1.4388368129730225,
      "learning_rate": 2.3859903458789094e-05,
      "loss": 2.8148,
      "step": 120
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2617.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2617.0,
      "epoch": 1.643835616438356,
      "step": 120
    },
    {
      "epoch": 1.643835616438356,
      "step": 120,
      "train/ce_loss": 3.953125,
      "train/diffusion_loss": 0.4270760118961334
    },
    {
      "epoch": 1.643835616438356,
      "step": 120,
      "train/learning_rate_real": 2.3836648587190354e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2220.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2220.0,
      "epoch": 1.643835616438356,
      "step": 120
    },
    {
      "epoch": 1.643835616438356,
      "step": 120,
      "train/ce_loss": 3.796875,
      "train/diffusion_loss": 0.5018504858016968
    },
    {
      "epoch": 1.643835616438356,
      "step": 120,
      "train/learning_rate_real": 2.3836648587190354e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2422.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2422.0,
      "epoch": 1.643835616438356,
      "step": 120
    },
    {
      "epoch": 1.643835616438356,
      "step": 120,
      "train/ce_loss": 3.796875,
      "train/diffusion_loss": 0.47332513332366943
    },
    {
      "epoch": 1.643835616438356,
      "step": 120,
      "train/learning_rate_real": 2.3836648587190354e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2864.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2864.0,
      "epoch": 1.643835616438356,
      "step": 120
    },
    {
      "epoch": 1.643835616438356,
      "step": 120,
      "train/ce_loss": 3.75,
      "train/diffusion_loss": 0.3780655264854431
    },
    {
      "epoch": 1.643835616438356,
      "step": 120,
      "train/learning_rate_real": 2.3836648587190354e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2640.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2640.0,
      "epoch": 1.6575342465753424,
      "step": 121
    },
    {
      "epoch": 1.6575342465753424,
      "step": 121,
      "train/ce_loss": 4.125,
      "train/diffusion_loss": 0.44194358587265015
    },
    {
      "epoch": 1.6575342465753424,
      "step": 121,
      "train/learning_rate_real": 2.38131705037647e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2855.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2855.0,
      "epoch": 1.6575342465753424,
      "step": 121
    },
    {
      "epoch": 1.6575342465753424,
      "step": 121,
      "train/ce_loss": 3.703125,
      "train/diffusion_loss": 0.3793834149837494
    },
    {
      "epoch": 1.6575342465753424,
      "step": 121,
      "train/learning_rate_real": 2.38131705037647e-05
    },
    {
      "debug/num_lat_loss": 1745.0,
      "debug/num_lat_total": 2801.0,
      "debug/num_tok_loss": 1745.0,
      "debug/num_tok_total": 2801.0,
      "epoch": 1.6575342465753424,
      "step": 121
    },
    {
      "epoch": 1.6575342465753424,
      "step": 121,
      "train/ce_loss": 3.90625,
      "train/diffusion_loss": 0.36281681060791016
    },
    {
      "epoch": 1.6575342465753424,
      "step": 121,
      "train/learning_rate_real": 2.38131705037647e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2887.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2887.0,
      "epoch": 1.6575342465753424,
      "step": 121
    },
    {
      "epoch": 1.6575342465753424,
      "step": 121,
      "train/ce_loss": 3.84375,
      "train/diffusion_loss": 0.3547899127006531
    },
    {
      "epoch": 1.6575342465753424,
      "step": 121,
      "train/learning_rate_real": 2.38131705037647e-05
    },
    {
      "debug/num_lat_loss": 1712.0,
      "debug/num_lat_total": 2153.0,
      "debug/num_tok_loss": 1712.0,
      "debug/num_tok_total": 2153.0,
      "epoch": 1.6712328767123288,
      "step": 122
    },
    {
      "epoch": 1.6712328767123288,
      "step": 122,
      "train/ce_loss": 3.828125,
      "train/diffusion_loss": 0.49153730273246765
    },
    {
      "epoch": 1.6712328767123288,
      "step": 122,
      "train/learning_rate_real": 2.3789469670781547e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2218.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2218.0,
      "epoch": 1.6712328767123288,
      "step": 122
    },
    {
      "epoch": 1.6712328767123288,
      "step": 122,
      "train/ce_loss": 3.65625,
      "train/diffusion_loss": 0.5030884742736816
    },
    {
      "epoch": 1.6712328767123288,
      "step": 122,
      "train/learning_rate_real": 2.3789469670781547e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2865.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2865.0,
      "epoch": 1.6712328767123288,
      "step": 122
    },
    {
      "epoch": 1.6712328767123288,
      "step": 122,
      "train/ce_loss": 4.03125,
      "train/diffusion_loss": 0.3352532982826233
    },
    {
      "epoch": 1.6712328767123288,
      "step": 122,
      "train/learning_rate_real": 2.3789469670781547e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 3069.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 3069.0,
      "epoch": 1.6712328767123288,
      "step": 122
    },
    {
      "epoch": 1.6712328767123288,
      "step": 122,
      "train/ce_loss": 3.828125,
      "train/diffusion_loss": 0.32163891196250916
    },
    {
      "epoch": 1.6712328767123288,
      "step": 122,
      "train/learning_rate_real": 2.3789469670781547e-05
    },
    {
      "debug/num_lat_loss": 1750.0,
      "debug/num_lat_total": 2383.0,
      "debug/num_tok_loss": 1750.0,
      "debug/num_tok_total": 2383.0,
      "epoch": 1.6849315068493151,
      "step": 123
    },
    {
      "epoch": 1.6849315068493151,
      "step": 123,
      "train/ce_loss": 3.734375,
      "train/diffusion_loss": 0.4190615117549896
    },
    {
      "epoch": 1.6849315068493151,
      "step": 123,
      "train/learning_rate_real": 2.376554655489612e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2214.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2214.0,
      "epoch": 1.6849315068493151,
      "step": 123
    },
    {
      "epoch": 1.6849315068493151,
      "step": 123,
      "train/ce_loss": 3.609375,
      "train/diffusion_loss": 0.5074918866157532
    },
    {
      "epoch": 1.6849315068493151,
      "step": 123,
      "train/learning_rate_real": 2.376554655489612e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2459.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2459.0,
      "epoch": 1.6849315068493151,
      "step": 123
    },
    {
      "epoch": 1.6849315068493151,
      "step": 123,
      "train/ce_loss": 3.671875,
      "train/diffusion_loss": 0.4351557791233063
    },
    {
      "epoch": 1.6849315068493151,
      "step": 123,
      "train/learning_rate_real": 2.376554655489612e-05
    },
    {
      "debug/num_lat_loss": 1642.0,
      "debug/num_lat_total": 2306.0,
      "debug/num_tok_loss": 1642.0,
      "debug/num_tok_total": 2306.0,
      "epoch": 1.6849315068493151,
      "step": 123
    },
    {
      "epoch": 1.6849315068493151,
      "step": 123,
      "train/ce_loss": 3.703125,
      "train/diffusion_loss": 0.4449380934238434
    },
    {
      "epoch": 1.6849315068493151,
      "step": 123,
      "train/learning_rate_real": 2.376554655489612e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2881.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2881.0,
      "epoch": 1.6986301369863015,
      "step": 124
    },
    {
      "epoch": 1.6986301369863015,
      "step": 124,
      "train/ce_loss": 3.703125,
      "train/diffusion_loss": 0.36390984058380127
    },
    {
      "epoch": 1.6986301369863015,
      "step": 124,
      "train/learning_rate_real": 2.3741401627140242e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2810.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2810.0,
      "epoch": 1.6986301369863015,
      "step": 124
    },
    {
      "epoch": 1.6986301369863015,
      "step": 124,
      "train/ce_loss": 3.734375,
      "train/diffusion_loss": 0.3646058440208435
    },
    {
      "epoch": 1.6986301369863015,
      "step": 124,
      "train/learning_rate_real": 2.3741401627140242e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2642.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2642.0,
      "epoch": 1.6986301369863015,
      "step": 124
    },
    {
      "epoch": 1.6986301369863015,
      "step": 124,
      "train/ce_loss": 3.796875,
      "train/diffusion_loss": 0.40764036774635315
    },
    {
      "epoch": 1.6986301369863015,
      "step": 124,
      "train/learning_rate_real": 2.3741401627140242e-05
    },
    {
      "debug/num_lat_loss": 1761.0,
      "debug/num_lat_total": 2195.0,
      "debug/num_tok_loss": 1761.0,
      "debug/num_tok_total": 2195.0,
      "epoch": 1.6986301369863015,
      "step": 124
    },
    {
      "epoch": 1.6986301369863015,
      "step": 124,
      "train/ce_loss": 4.09375,
      "train/diffusion_loss": 0.47080162167549133
    },
    {
      "epoch": 1.6986301369863015,
      "step": 124,
      "train/learning_rate_real": 2.3741401627140242e-05
    },
    {
      "debug/num_lat_loss": 1679.0,
      "debug/num_lat_total": 2225.0,
      "debug/num_tok_loss": 1679.0,
      "debug/num_tok_total": 2225.0,
      "epoch": 1.7123287671232876,
      "step": 125
    },
    {
      "epoch": 1.7123287671232876,
      "step": 125,
      "train/ce_loss": 3.546875,
      "train/diffusion_loss": 0.464213103055954
    },
    {
      "epoch": 1.7123287671232876,
      "step": 125,
      "train/learning_rate_real": 2.3717035362913105e-05
    },
    {
      "debug/num_lat_loss": 1592.0,
      "debug/num_lat_total": 2696.0,
      "debug/num_tok_loss": 1592.0,
      "debug/num_tok_total": 2696.0,
      "epoch": 1.7123287671232876,
      "step": 125
    },
    {
      "epoch": 1.7123287671232876,
      "step": 125,
      "train/ce_loss": 3.90625,
      "train/diffusion_loss": 0.3316189646720886
    },
    {
      "epoch": 1.7123287671232876,
      "step": 125,
      "train/learning_rate_real": 2.3717035362913105e-05
    },
    {
      "debug/num_lat_loss": 1578.0,
      "debug/num_lat_total": 2460.0,
      "debug/num_tok_loss": 1578.0,
      "debug/num_tok_total": 2460.0,
      "epoch": 1.7123287671232876,
      "step": 125
    },
    {
      "epoch": 1.7123287671232876,
      "step": 125,
      "train/ce_loss": 3.796875,
      "train/diffusion_loss": 0.39441150426864624
    },
    {
      "epoch": 1.7123287671232876,
      "step": 125,
      "train/learning_rate_real": 2.3717035362913105e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 2464.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 2464.0,
      "epoch": 1.7123287671232876,
      "step": 125
    },
    {
      "epoch": 1.7123287671232876,
      "step": 125,
      "train/ce_loss": 3.625,
      "train/diffusion_loss": 0.4529780447483063
    },
    {
      "epoch": 1.7123287671232876,
      "step": 125,
      "train/learning_rate_real": 2.3717035362913105e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 1.726027397260274,
      "step": 126
    },
    {
      "epoch": 1.726027397260274,
      "step": 126,
      "train/ce_loss": 3.71875,
      "train/diffusion_loss": 0.41818350553512573
    },
    {
      "epoch": 1.726027397260274,
      "step": 126,
      "train/learning_rate_real": 2.3692448241971866e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 1.726027397260274,
      "step": 126
    },
    {
      "epoch": 1.726027397260274,
      "step": 126,
      "train/ce_loss": 3.828125,
      "train/diffusion_loss": 0.4160309433937073
    },
    {
      "epoch": 1.726027397260274,
      "step": 126,
      "train/learning_rate_real": 2.3692448241971866e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2413.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2413.0,
      "epoch": 1.726027397260274,
      "step": 126
    },
    {
      "epoch": 1.726027397260274,
      "step": 126,
      "train/ce_loss": 3.75,
      "train/diffusion_loss": 0.4794480800628662
    },
    {
      "epoch": 1.726027397260274,
      "step": 126,
      "train/learning_rate_real": 2.3692448241971866e-05
    },
    {
      "debug/num_lat_loss": 1812.0,
      "debug/num_lat_total": 2695.0,
      "debug/num_tok_loss": 1812.0,
      "debug/num_tok_total": 2695.0,
      "epoch": 1.726027397260274,
      "step": 126
    },
    {
      "epoch": 1.726027397260274,
      "step": 126,
      "train/ce_loss": 3.765625,
      "train/diffusion_loss": 0.4259827435016632
    },
    {
      "epoch": 1.726027397260274,
      "step": 126,
      "train/learning_rate_real": 2.3692448241971866e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2427.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2427.0,
      "epoch": 1.7397260273972601,
      "step": 127
    },
    {
      "epoch": 1.7397260273972601,
      "step": 127,
      "train/ce_loss": 3.75,
      "train/diffusion_loss": 0.4588565230369568
    },
    {
      "epoch": 1.7397260273972601,
      "step": 127,
      "train/learning_rate_real": 2.366764074842222e-05
    },
    {
      "debug/num_lat_loss": 1683.0,
      "debug/num_lat_total": 2430.0,
      "debug/num_tok_loss": 1683.0,
      "debug/num_tok_total": 2430.0,
      "epoch": 1.7397260273972601,
      "step": 127
    },
    {
      "epoch": 1.7397260273972601,
      "step": 127,
      "train/ce_loss": 3.765625,
      "train/diffusion_loss": 0.42775484919548035
    },
    {
      "epoch": 1.7397260273972601,
      "step": 127,
      "train/learning_rate_real": 2.366764074842222e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2229.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2229.0,
      "epoch": 1.7397260273972601,
      "step": 127
    },
    {
      "epoch": 1.7397260273972601,
      "step": 127,
      "train/ce_loss": 3.65625,
      "train/diffusion_loss": 0.4776081442832947
    },
    {
      "epoch": 1.7397260273972601,
      "step": 127,
      "train/learning_rate_real": 2.366764074842222e-05
    },
    {
      "debug/num_lat_loss": 1598.0,
      "debug/num_lat_total": 2917.0,
      "debug/num_tok_loss": 1598.0,
      "debug/num_tok_total": 2917.0,
      "epoch": 1.7397260273972601,
      "step": 127
    },
    {
      "epoch": 1.7397260273972601,
      "step": 127,
      "train/ce_loss": 3.734375,
      "train/diffusion_loss": 0.2688691020011902
    },
    {
      "epoch": 1.7397260273972601,
      "step": 127,
      "train/learning_rate_real": 2.366764074842222e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2647.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2647.0,
      "epoch": 1.7534246575342465,
      "step": 128
    },
    {
      "epoch": 1.7534246575342465,
      "step": 128,
      "train/ce_loss": 3.609375,
      "train/diffusion_loss": 0.41486573219299316
    },
    {
      "epoch": 1.7534246575342465,
      "step": 128,
      "train/learning_rate_real": 2.364261337070888e-05
    },
    {
      "debug/num_lat_loss": 1762.0,
      "debug/num_lat_total": 2188.0,
      "debug/num_tok_loss": 1762.0,
      "debug/num_tok_total": 2188.0,
      "epoch": 1.7534246575342465,
      "step": 128
    },
    {
      "epoch": 1.7534246575342465,
      "step": 128,
      "train/ce_loss": 3.625,
      "train/diffusion_loss": 0.5014437437057495
    },
    {
      "epoch": 1.7534246575342465,
      "step": 128,
      "train/learning_rate_real": 2.364261337070888e-05
    },
    {
      "debug/num_lat_loss": 1660.0,
      "debug/num_lat_total": 2525.0,
      "debug/num_tok_loss": 1660.0,
      "debug/num_tok_total": 2525.0,
      "epoch": 1.7534246575342465,
      "step": 128
    },
    {
      "epoch": 1.7534246575342465,
      "step": 128,
      "train/ce_loss": 3.484375,
      "train/diffusion_loss": 0.40325120091438293
    },
    {
      "epoch": 1.7534246575342465,
      "step": 128,
      "train/learning_rate_real": 2.364261337070888e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2832.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2832.0,
      "epoch": 1.7534246575342465,
      "step": 128
    },
    {
      "epoch": 1.7534246575342465,
      "step": 128,
      "train/ce_loss": 4.09375,
      "train/diffusion_loss": 0.3543040454387665
    },
    {
      "epoch": 1.7534246575342465,
      "step": 128,
      "train/learning_rate_real": 2.364261337070888e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2445.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2445.0,
      "epoch": 1.7671232876712328,
      "step": 129
    },
    {
      "epoch": 1.7671232876712328,
      "step": 129,
      "train/ce_loss": 3.859375,
      "train/diffusion_loss": 0.472688764333725
    },
    {
      "epoch": 1.7671232876712328,
      "step": 129,
      "train/learning_rate_real": 2.361736660160593e-05
    },
    {
      "debug/num_lat_loss": 1816.0,
      "debug/num_lat_total": 3572.0,
      "debug/num_tok_loss": 1816.0,
      "debug/num_tok_total": 3572.0,
      "epoch": 1.7671232876712328,
      "step": 129
    },
    {
      "epoch": 1.7671232876712328,
      "step": 129,
      "train/ce_loss": 3.734375,
      "train/diffusion_loss": 0.22201991081237793
    },
    {
      "epoch": 1.7671232876712328,
      "step": 129,
      "train/learning_rate_real": 2.361736660160593e-05
    },
    {
      "debug/num_lat_loss": 1761.0,
      "debug/num_lat_total": 2410.0,
      "debug/num_tok_loss": 1761.0,
      "debug/num_tok_total": 2410.0,
      "epoch": 1.7671232876712328,
      "step": 129
    },
    {
      "epoch": 1.7671232876712328,
      "step": 129,
      "train/ce_loss": 3.53125,
      "train/diffusion_loss": 0.44313621520996094
    },
    {
      "epoch": 1.7671232876712328,
      "step": 129,
      "train/learning_rate_real": 2.361736660160593e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2825.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2825.0,
      "epoch": 1.7671232876712328,
      "step": 129
    },
    {
      "epoch": 1.7671232876712328,
      "step": 129,
      "train/ce_loss": 3.625,
      "train/diffusion_loss": 0.3336658179759979
    },
    {
      "epoch": 1.7671232876712328,
      "step": 129,
      "train/learning_rate_real": 2.361736660160593e-05
    },
    {
      "epoch": 1.7808219178082192,
      "grad_norm": 1.576515793800354,
      "learning_rate": 2.361736660160593e-05,
      "loss": 2.9194,
      "step": 130
    },
    {
      "debug/num_lat_loss": 1646.0,
      "debug/num_lat_total": 2308.0,
      "debug/num_tok_loss": 1646.0,
      "debug/num_tok_total": 2308.0,
      "epoch": 1.7808219178082192,
      "step": 130
    },
    {
      "epoch": 1.7808219178082192,
      "step": 130,
      "train/ce_loss": 3.578125,
      "train/diffusion_loss": 0.43932580947875977
    },
    {
      "epoch": 1.7808219178082192,
      "step": 130,
      "train/learning_rate_real": 2.3591900938207147e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2868.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2868.0,
      "epoch": 1.7808219178082192,
      "step": 130
    },
    {
      "epoch": 1.7808219178082192,
      "step": 130,
      "train/ce_loss": 3.6875,
      "train/diffusion_loss": 0.3732472360134125
    },
    {
      "epoch": 1.7808219178082192,
      "step": 130,
      "train/learning_rate_real": 2.3591900938207147e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2425.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2425.0,
      "epoch": 1.7808219178082192,
      "step": 130
    },
    {
      "epoch": 1.7808219178082192,
      "step": 130,
      "train/ce_loss": 3.515625,
      "train/diffusion_loss": 0.45174187421798706
    },
    {
      "epoch": 1.7808219178082192,
      "step": 130,
      "train/learning_rate_real": 2.3591900938207147e-05
    },
    {
      "debug/num_lat_loss": 1692.0,
      "debug/num_lat_total": 2887.0,
      "debug/num_tok_loss": 1692.0,
      "debug/num_tok_total": 2887.0,
      "epoch": 1.7808219178082192,
      "step": 130
    },
    {
      "epoch": 1.7808219178082192,
      "step": 130,
      "train/ce_loss": 3.734375,
      "train/diffusion_loss": 0.32698309421539307
    },
    {
      "epoch": 1.7808219178082192,
      "step": 130,
      "train/learning_rate_real": 2.3591900938207147e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2840.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2840.0,
      "epoch": 1.7945205479452055,
      "step": 131
    },
    {
      "epoch": 1.7945205479452055,
      "step": 131,
      "train/ce_loss": 3.5,
      "train/diffusion_loss": 0.34751012921333313
    },
    {
      "epoch": 1.7945205479452055,
      "step": 131,
      "train/learning_rate_real": 2.3566216881916202e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 1.7945205479452055,
      "step": 131
    },
    {
      "epoch": 1.7945205479452055,
      "step": 131,
      "train/ce_loss": 3.53125,
      "train/diffusion_loss": 0.4357163906097412
    },
    {
      "epoch": 1.7945205479452055,
      "step": 131,
      "train/learning_rate_real": 2.3566216881916202e-05
    },
    {
      "debug/num_lat_loss": 1753.0,
      "debug/num_lat_total": 2584.0,
      "debug/num_tok_loss": 1753.0,
      "debug/num_tok_total": 2584.0,
      "epoch": 1.7945205479452055,
      "step": 131
    },
    {
      "epoch": 1.7945205479452055,
      "step": 131,
      "train/ce_loss": 3.546875,
      "train/diffusion_loss": 0.41815292835235596
    },
    {
      "epoch": 1.7945205479452055,
      "step": 131,
      "train/learning_rate_real": 2.3566216881916202e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2648.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2648.0,
      "epoch": 1.7945205479452055,
      "step": 131
    },
    {
      "epoch": 1.7945205479452055,
      "step": 131,
      "train/ce_loss": 3.765625,
      "train/diffusion_loss": 0.36779430508613586
    },
    {
      "epoch": 1.7945205479452055,
      "step": 131,
      "train/learning_rate_real": 2.3566216881916202e-05
    },
    {
      "debug/num_lat_loss": 1757.0,
      "debug/num_lat_total": 2171.0,
      "debug/num_tok_loss": 1757.0,
      "debug/num_tok_total": 2171.0,
      "epoch": 1.808219178082192,
      "step": 132
    },
    {
      "epoch": 1.808219178082192,
      "step": 132,
      "train/ce_loss": 3.578125,
      "train/diffusion_loss": 0.47450587153434753
    },
    {
      "epoch": 1.808219178082192,
      "step": 132,
      "train/learning_rate_real": 2.35403149384368e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2014.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2014.0,
      "epoch": 1.808219178082192,
      "step": 132
    },
    {
      "epoch": 1.808219178082192,
      "step": 132,
      "train/ce_loss": 3.4375,
      "train/diffusion_loss": 0.5649107694625854
    },
    {
      "epoch": 1.808219178082192,
      "step": 132,
      "train/learning_rate_real": 2.35403149384368e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2633.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2633.0,
      "epoch": 1.808219178082192,
      "step": 132
    },
    {
      "epoch": 1.808219178082192,
      "step": 132,
      "train/ce_loss": 3.5625,
      "train/diffusion_loss": 0.4057047963142395
    },
    {
      "epoch": 1.808219178082192,
      "step": 132,
      "train/learning_rate_real": 2.35403149384368e-05
    },
    {
      "debug/num_lat_loss": 1647.0,
      "debug/num_lat_total": 2930.0,
      "debug/num_tok_loss": 1647.0,
      "debug/num_tok_total": 2930.0,
      "epoch": 1.808219178082192,
      "step": 132
    },
    {
      "epoch": 1.808219178082192,
      "step": 132,
      "train/ce_loss": 3.625,
      "train/diffusion_loss": 0.28367847204208374
    },
    {
      "epoch": 1.808219178082192,
      "step": 132,
      "train/learning_rate_real": 2.35403149384368e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2431.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2431.0,
      "epoch": 1.821917808219178,
      "step": 133
    },
    {
      "epoch": 1.821917808219178,
      "step": 133,
      "train/ce_loss": 3.53125,
      "train/diffusion_loss": 0.44793954491615295
    },
    {
      "epoch": 1.821917808219178,
      "step": 133,
      "train/learning_rate_real": 2.3514195617762706e-05
    },
    {
      "debug/num_lat_loss": 1759.0,
      "debug/num_lat_total": 2612.0,
      "debug/num_tok_loss": 1759.0,
      "debug/num_tok_total": 2612.0,
      "epoch": 1.821917808219178,
      "step": 133
    },
    {
      "epoch": 1.821917808219178,
      "step": 133,
      "train/ce_loss": 3.46875,
      "train/diffusion_loss": 0.4012427031993866
    },
    {
      "epoch": 1.821917808219178,
      "step": 133,
      "train/learning_rate_real": 2.3514195617762706e-05
    },
    {
      "debug/num_lat_loss": 1748.0,
      "debug/num_lat_total": 2788.0,
      "debug/num_tok_loss": 1748.0,
      "debug/num_tok_total": 2788.0,
      "epoch": 1.821917808219178,
      "step": 133
    },
    {
      "epoch": 1.821917808219178,
      "step": 133,
      "train/ce_loss": 3.484375,
      "train/diffusion_loss": 0.3418940603733063
    },
    {
      "epoch": 1.821917808219178,
      "step": 133,
      "train/learning_rate_real": 2.3514195617762706e-05
    },
    {
      "debug/num_lat_loss": 1572.0,
      "debug/num_lat_total": 2204.0,
      "debug/num_tok_loss": 1572.0,
      "debug/num_tok_total": 2204.0,
      "epoch": 1.821917808219178,
      "step": 133
    },
    {
      "epoch": 1.821917808219178,
      "step": 133,
      "train/ce_loss": 3.453125,
      "train/diffusion_loss": 0.4106413424015045
    },
    {
      "epoch": 1.821917808219178,
      "step": 133,
      "train/learning_rate_real": 2.3514195617762706e-05
    },
    {
      "debug/num_lat_loss": 1760.0,
      "debug/num_lat_total": 2190.0,
      "debug/num_tok_loss": 1760.0,
      "debug/num_tok_total": 2190.0,
      "epoch": 1.8356164383561644,
      "step": 134
    },
    {
      "epoch": 1.8356164383561644,
      "step": 134,
      "train/ce_loss": 3.9375,
      "train/diffusion_loss": 0.5045044422149658
    },
    {
      "epoch": 1.8356164383561644,
      "step": 134,
      "train/learning_rate_real": 2.348785943416772e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2641.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2641.0,
      "epoch": 1.8356164383561644,
      "step": 134
    },
    {
      "epoch": 1.8356164383561644,
      "step": 134,
      "train/ce_loss": 3.578125,
      "train/diffusion_loss": 0.38860100507736206
    },
    {
      "epoch": 1.8356164383561644,
      "step": 134,
      "train/learning_rate_real": 2.348785943416772e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 1.8356164383561644,
      "step": 134
    },
    {
      "epoch": 1.8356164383561644,
      "step": 134,
      "train/ce_loss": 3.578125,
      "train/diffusion_loss": 0.393936425447464
    },
    {
      "epoch": 1.8356164383561644,
      "step": 134,
      "train/learning_rate_real": 2.348785943416772e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2408.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2408.0,
      "epoch": 1.8356164383561644,
      "step": 134
    },
    {
      "epoch": 1.8356164383561644,
      "step": 134,
      "train/ce_loss": 3.5,
      "train/diffusion_loss": 0.44742295145988464
    },
    {
      "epoch": 1.8356164383561644,
      "step": 134,
      "train/learning_rate_real": 2.348785943416772e-05
    },
    {
      "debug/num_lat_loss": 1759.0,
      "debug/num_lat_total": 2402.0,
      "debug/num_tok_loss": 1759.0,
      "debug/num_tok_total": 2402.0,
      "epoch": 1.8493150684931505,
      "step": 135
    },
    {
      "epoch": 1.8493150684931505,
      "step": 135,
      "train/ce_loss": 3.390625,
      "train/diffusion_loss": 0.43890005350112915
    },
    {
      "epoch": 1.8493150684931505,
      "step": 135,
      "train/learning_rate_real": 2.3461306906195524e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2634.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2634.0,
      "epoch": 1.8493150684931505,
      "step": 135
    },
    {
      "epoch": 1.8493150684931505,
      "step": 135,
      "train/ce_loss": 3.53125,
      "train/diffusion_loss": 0.4111637473106384
    },
    {
      "epoch": 1.8493150684931505,
      "step": 135,
      "train/learning_rate_real": 2.3461306906195524e-05
    },
    {
      "debug/num_lat_loss": 1638.0,
      "debug/num_lat_total": 2712.0,
      "debug/num_tok_loss": 1638.0,
      "debug/num_tok_total": 2712.0,
      "epoch": 1.8493150684931505,
      "step": 135
    },
    {
      "epoch": 1.8493150684931505,
      "step": 135,
      "train/ce_loss": 3.484375,
      "train/diffusion_loss": 0.3159174621105194
    },
    {
      "epoch": 1.8493150684931505,
      "step": 135,
      "train/learning_rate_real": 2.3461306906195524e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 3102.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 3102.0,
      "epoch": 1.8493150684931505,
      "step": 135
    },
    {
      "epoch": 1.8493150684931505,
      "step": 135,
      "train/ce_loss": 3.4375,
      "train/diffusion_loss": 0.30301010608673096
    },
    {
      "epoch": 1.8493150684931505,
      "step": 135,
      "train/learning_rate_real": 2.3461306906195524e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2190.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2190.0,
      "epoch": 1.8630136986301369,
      "step": 136
    },
    {
      "epoch": 1.8630136986301369,
      "step": 136,
      "train/ce_loss": 3.515625,
      "train/diffusion_loss": 0.5018863081932068
    },
    {
      "epoch": 1.8630136986301369,
      "step": 136,
      "train/learning_rate_real": 2.3434538556649515e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2203.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2203.0,
      "epoch": 1.8630136986301369,
      "step": 136
    },
    {
      "epoch": 1.8630136986301369,
      "step": 136,
      "train/ce_loss": 3.421875,
      "train/diffusion_loss": 0.5082259774208069
    },
    {
      "epoch": 1.8630136986301369,
      "step": 136,
      "train/learning_rate_real": 2.3434538556649515e-05
    },
    {
      "debug/num_lat_loss": 1749.0,
      "debug/num_lat_total": 2184.0,
      "debug/num_tok_loss": 1749.0,
      "debug/num_tok_total": 2184.0,
      "epoch": 1.8630136986301369,
      "step": 136
    },
    {
      "epoch": 1.8630136986301369,
      "step": 136,
      "train/ce_loss": 3.640625,
      "train/diffusion_loss": 0.4796072542667389
    },
    {
      "epoch": 1.8630136986301369,
      "step": 136,
      "train/learning_rate_real": 2.3434538556649515e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 2459.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 2459.0,
      "epoch": 1.8630136986301369,
      "step": 136
    },
    {
      "epoch": 1.8630136986301369,
      "step": 136,
      "train/ce_loss": 3.671875,
      "train/diffusion_loss": 0.4622892439365387
    },
    {
      "epoch": 1.8630136986301369,
      "step": 136,
      "train/learning_rate_real": 2.3434538556649515e-05
    },
    {
      "debug/num_lat_loss": 1810.0,
      "debug/num_lat_total": 2692.0,
      "debug/num_tok_loss": 1810.0,
      "debug/num_tok_total": 2692.0,
      "epoch": 1.8767123287671232,
      "step": 137
    },
    {
      "epoch": 1.8767123287671232,
      "step": 137,
      "train/ce_loss": 3.359375,
      "train/diffusion_loss": 0.39755386114120483
    },
    {
      "epoch": 1.8767123287671232,
      "step": 137,
      "train/learning_rate_real": 2.340755491258248e-05
    },
    {
      "debug/num_lat_loss": 1689.0,
      "debug/num_lat_total": 2460.0,
      "debug/num_tok_loss": 1689.0,
      "debug/num_tok_total": 2460.0,
      "epoch": 1.8767123287671232,
      "step": 137
    },
    {
      "epoch": 1.8767123287671232,
      "step": 137,
      "train/ce_loss": 3.5,
      "train/diffusion_loss": 0.4218902289867401
    },
    {
      "epoch": 1.8767123287671232,
      "step": 137,
      "train/learning_rate_real": 2.340755491258248e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2195.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2195.0,
      "epoch": 1.8767123287671232,
      "step": 137
    },
    {
      "epoch": 1.8767123287671232,
      "step": 137,
      "train/ce_loss": 3.421875,
      "train/diffusion_loss": 0.5013386607170105
    },
    {
      "epoch": 1.8767123287671232,
      "step": 137,
      "train/learning_rate_real": 2.340755491258248e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2611.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2611.0,
      "epoch": 1.8767123287671232,
      "step": 137
    },
    {
      "epoch": 1.8767123287671232,
      "step": 137,
      "train/ce_loss": 3.578125,
      "train/diffusion_loss": 0.4383437931537628
    },
    {
      "epoch": 1.8767123287671232,
      "step": 137,
      "train/learning_rate_real": 2.340755491258248e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2640.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2640.0,
      "epoch": 1.8904109589041096,
      "step": 138
    },
    {
      "epoch": 1.8904109589041096,
      "step": 138,
      "train/ce_loss": 3.359375,
      "train/diffusion_loss": 0.3976190388202667
    },
    {
      "epoch": 1.8904109589041096,
      "step": 138,
      "train/learning_rate_real": 2.3380356505286216e-05
    },
    {
      "debug/num_lat_loss": 1758.0,
      "debug/num_lat_total": 2621.0,
      "debug/num_tok_loss": 1758.0,
      "debug/num_tok_total": 2621.0,
      "epoch": 1.8904109589041096,
      "step": 138
    },
    {
      "epoch": 1.8904109589041096,
      "step": 138,
      "train/ce_loss": 3.71875,
      "train/diffusion_loss": 0.40847277641296387
    },
    {
      "epoch": 1.8904109589041096,
      "step": 138,
      "train/learning_rate_real": 2.3380356505286216e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2660.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2660.0,
      "epoch": 1.8904109589041096,
      "step": 138
    },
    {
      "epoch": 1.8904109589041096,
      "step": 138,
      "train/ce_loss": 3.375,
      "train/diffusion_loss": 0.42175373435020447
    },
    {
      "epoch": 1.8904109589041096,
      "step": 138,
      "train/learning_rate_real": 2.3380356505286216e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2843.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2843.0,
      "epoch": 1.8904109589041096,
      "step": 138
    },
    {
      "epoch": 1.8904109589041096,
      "step": 138,
      "train/ce_loss": 3.671875,
      "train/diffusion_loss": 0.38470953702926636
    },
    {
      "epoch": 1.8904109589041096,
      "step": 138,
      "train/learning_rate_real": 2.3380356505286216e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2231.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2231.0,
      "epoch": 1.904109589041096,
      "step": 139
    },
    {
      "epoch": 1.904109589041096,
      "step": 139,
      "train/ce_loss": 3.46875,
      "train/diffusion_loss": 0.5015687346458435
    },
    {
      "epoch": 1.904109589041096,
      "step": 139,
      "train/learning_rate_real": 2.335294387028109e-05
    },
    {
      "debug/num_lat_loss": 1723.0,
      "debug/num_lat_total": 2364.0,
      "debug/num_tok_loss": 1723.0,
      "debug/num_tok_total": 2364.0,
      "epoch": 1.904109589041096,
      "step": 139
    },
    {
      "epoch": 1.904109589041096,
      "step": 139,
      "train/ce_loss": 3.390625,
      "train/diffusion_loss": 0.46811532974243164
    },
    {
      "epoch": 1.904109589041096,
      "step": 139,
      "train/learning_rate_real": 2.335294387028109e-05
    },
    {
      "debug/num_lat_loss": 1680.0,
      "debug/num_lat_total": 2648.0,
      "debug/num_tok_loss": 1680.0,
      "debug/num_tok_total": 2648.0,
      "epoch": 1.904109589041096,
      "step": 139
    },
    {
      "epoch": 1.904109589041096,
      "step": 139,
      "train/ce_loss": 3.453125,
      "train/diffusion_loss": 0.3847237229347229
    },
    {
      "epoch": 1.904109589041096,
      "step": 139,
      "train/learning_rate_real": 2.335294387028109e-05
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 3114.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 3114.0,
      "epoch": 1.904109589041096,
      "step": 139
    },
    {
      "epoch": 1.904109589041096,
      "step": 139,
      "train/ce_loss": 3.515625,
      "train/diffusion_loss": 0.32092833518981934
    },
    {
      "epoch": 1.904109589041096,
      "step": 139,
      "train/learning_rate_real": 2.335294387028109e-05
    },
    {
      "epoch": 1.9178082191780823,
      "grad_norm": 1.6461002826690674,
      "learning_rate": 2.335294387028109e-05,
      "loss": 2.9031,
      "step": 140
    },
    {
      "debug/num_lat_loss": 1760.0,
      "debug/num_lat_total": 2182.0,
      "debug/num_tok_loss": 1760.0,
      "debug/num_tok_total": 2182.0,
      "epoch": 1.9178082191780823,
      "step": 140
    },
    {
      "epoch": 1.9178082191780823,
      "step": 140,
      "train/ce_loss": 3.390625,
      "train/diffusion_loss": 0.5311415195465088
    },
    {
      "epoch": 1.9178082191780823,
      "step": 140,
      "train/learning_rate_real": 2.3325317547305485e-05
    },
    {
      "debug/num_lat_loss": 1607.0,
      "debug/num_lat_total": 2197.0,
      "debug/num_tok_loss": 1607.0,
      "debug/num_tok_total": 2197.0,
      "epoch": 1.9178082191780823,
      "step": 140
    },
    {
      "epoch": 1.9178082191780823,
      "step": 140,
      "train/ce_loss": 3.375,
      "train/diffusion_loss": 0.45163846015930176
    },
    {
      "epoch": 1.9178082191780823,
      "step": 140,
      "train/learning_rate_real": 2.3325317547305485e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2208.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2208.0,
      "epoch": 1.9178082191780823,
      "step": 140
    },
    {
      "epoch": 1.9178082191780823,
      "step": 140,
      "train/ce_loss": 3.53125,
      "train/diffusion_loss": 0.4920024275779724
    },
    {
      "epoch": 1.9178082191780823,
      "step": 140,
      "train/learning_rate_real": 2.3325317547305485e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2617.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2617.0,
      "epoch": 1.9178082191780823,
      "step": 140
    },
    {
      "epoch": 1.9178082191780823,
      "step": 140,
      "train/ce_loss": 3.40625,
      "train/diffusion_loss": 0.43166494369506836
    },
    {
      "epoch": 1.9178082191780823,
      "step": 140,
      "train/learning_rate_real": 2.3325317547305485e-05
    },
    {
      "debug/num_lat_loss": 1753.0,
      "debug/num_lat_total": 2813.0,
      "debug/num_tok_loss": 1753.0,
      "debug/num_tok_total": 2813.0,
      "epoch": 1.9315068493150684,
      "step": 141
    },
    {
      "epoch": 1.9315068493150684,
      "step": 141,
      "train/ce_loss": 3.75,
      "train/diffusion_loss": 0.3822256922721863
    },
    {
      "epoch": 1.9315068493150684,
      "step": 141,
      "train/learning_rate_real": 2.3297478080305162e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2649.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2649.0,
      "epoch": 1.9315068493150684,
      "step": 141
    },
    {
      "epoch": 1.9315068493150684,
      "step": 141,
      "train/ce_loss": 4.0,
      "train/diffusion_loss": 0.3977663516998291
    },
    {
      "epoch": 1.9315068493150684,
      "step": 141,
      "train/learning_rate_real": 2.3297478080305162e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 2903.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 2903.0,
      "epoch": 1.9315068493150684,
      "step": 141
    },
    {
      "epoch": 1.9315068493150684,
      "step": 141,
      "train/ce_loss": 3.421875,
      "train/diffusion_loss": 0.3696616291999817
    },
    {
      "epoch": 1.9315068493150684,
      "step": 141,
      "train/learning_rate_real": 2.3297478080305162e-05
    },
    {
      "debug/num_lat_loss": 1604.0,
      "debug/num_lat_total": 2076.0,
      "debug/num_tok_loss": 1604.0,
      "debug/num_tok_total": 2076.0,
      "epoch": 1.9315068493150684,
      "step": 141
    },
    {
      "epoch": 1.9315068493150684,
      "step": 141,
      "train/ce_loss": 3.390625,
      "train/diffusion_loss": 0.4954376816749573
    },
    {
      "epoch": 1.9315068493150684,
      "step": 141,
      "train/learning_rate_real": 2.3297478080305162e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2856.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2856.0,
      "epoch": 1.9452054794520548,
      "step": 142
    },
    {
      "epoch": 1.9452054794520548,
      "step": 142,
      "train/ce_loss": 3.40625,
      "train/diffusion_loss": 0.3527717590332031
    },
    {
      "epoch": 1.9452054794520548,
      "step": 142,
      "train/learning_rate_real": 2.3269426017422576e-05
    },
    {
      "debug/num_lat_loss": 1809.0,
      "debug/num_lat_total": 2470.0,
      "debug/num_tok_loss": 1809.0,
      "debug/num_tok_total": 2470.0,
      "epoch": 1.9452054794520548,
      "step": 142
    },
    {
      "epoch": 1.9452054794520548,
      "step": 142,
      "train/ce_loss": 3.390625,
      "train/diffusion_loss": 0.4418691396713257
    },
    {
      "epoch": 1.9452054794520548,
      "step": 142,
      "train/learning_rate_real": 2.3269426017422576e-05
    },
    {
      "debug/num_lat_loss": 1747.0,
      "debug/num_lat_total": 2610.0,
      "debug/num_tok_loss": 1747.0,
      "debug/num_tok_total": 2610.0,
      "epoch": 1.9452054794520548,
      "step": 142
    },
    {
      "epoch": 1.9452054794520548,
      "step": 142,
      "train/ce_loss": 3.59375,
      "train/diffusion_loss": 0.41341838240623474
    },
    {
      "epoch": 1.9452054794520548,
      "step": 142,
      "train/learning_rate_real": 2.3269426017422576e-05
    },
    {
      "debug/num_lat_loss": 1718.0,
      "debug/num_lat_total": 2343.0,
      "debug/num_tok_loss": 1718.0,
      "debug/num_tok_total": 2343.0,
      "epoch": 1.9452054794520548,
      "step": 142
    },
    {
      "epoch": 1.9452054794520548,
      "step": 142,
      "train/ce_loss": 3.5,
      "train/diffusion_loss": 0.476648211479187
    },
    {
      "epoch": 1.9452054794520548,
      "step": 142,
      "train/learning_rate_real": 2.3269426017422576e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2221.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2221.0,
      "epoch": 1.958904109589041,
      "step": 143
    },
    {
      "epoch": 1.958904109589041,
      "step": 143,
      "train/ce_loss": 3.34375,
      "train/diffusion_loss": 0.503341794013977
    },
    {
      "epoch": 1.958904109589041,
      "step": 143,
      "train/learning_rate_real": 2.3241161910986053e-05
    },
    {
      "debug/num_lat_loss": 1712.0,
      "debug/num_lat_total": 2588.0,
      "debug/num_tok_loss": 1712.0,
      "debug/num_tok_total": 2588.0,
      "epoch": 1.958904109589041,
      "step": 143
    },
    {
      "epoch": 1.958904109589041,
      "step": 143,
      "train/ce_loss": 3.453125,
      "train/diffusion_loss": 0.39334893226623535
    },
    {
      "epoch": 1.958904109589041,
      "step": 143,
      "train/learning_rate_real": 2.3241161910986053e-05
    },
    {
      "debug/num_lat_loss": 1819.0,
      "debug/num_lat_total": 2928.0,
      "debug/num_tok_loss": 1819.0,
      "debug/num_tok_total": 2928.0,
      "epoch": 1.958904109589041,
      "step": 143
    },
    {
      "epoch": 1.958904109589041,
      "step": 143,
      "train/ce_loss": 3.484375,
      "train/diffusion_loss": 0.3769533932209015
    },
    {
      "epoch": 1.958904109589041,
      "step": 143,
      "train/learning_rate_real": 2.3241161910986053e-05
    },
    {
      "debug/num_lat_loss": 1741.0,
      "debug/num_lat_total": 2599.0,
      "debug/num_tok_loss": 1741.0,
      "debug/num_tok_total": 2599.0,
      "epoch": 1.958904109589041,
      "step": 143
    },
    {
      "epoch": 1.958904109589041,
      "step": 143,
      "train/ce_loss": 3.328125,
      "train/diffusion_loss": 0.41423624753952026
    },
    {
      "epoch": 1.958904109589041,
      "step": 143,
      "train/learning_rate_real": 2.3241161910986053e-05
    },
    {
      "debug/num_lat_loss": 1381.0,
      "debug/num_lat_total": 2174.0,
      "debug/num_tok_loss": 1381.0,
      "debug/num_tok_total": 2174.0,
      "epoch": 1.9726027397260273,
      "step": 144
    },
    {
      "epoch": 1.9726027397260273,
      "step": 144,
      "train/ce_loss": 3.46875,
      "train/diffusion_loss": 0.3866157829761505
    },
    {
      "epoch": 1.9726027397260273,
      "step": 144,
      "train/learning_rate_real": 2.321268631749894e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2196.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2196.0,
      "epoch": 1.9726027397260273,
      "step": 144
    },
    {
      "epoch": 1.9726027397260273,
      "step": 144,
      "train/ce_loss": 3.40625,
      "train/diffusion_loss": 0.5139296054840088
    },
    {
      "epoch": 1.9726027397260273,
      "step": 144,
      "train/learning_rate_real": 2.321268631749894e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 3262.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 3262.0,
      "epoch": 1.9726027397260273,
      "step": 144
    },
    {
      "epoch": 1.9726027397260273,
      "step": 144,
      "train/ce_loss": 3.578125,
      "train/diffusion_loss": 0.26967543363571167
    },
    {
      "epoch": 1.9726027397260273,
      "step": 144,
      "train/learning_rate_real": 2.321268631749894e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2860.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2860.0,
      "epoch": 1.9726027397260273,
      "step": 144
    },
    {
      "epoch": 1.9726027397260273,
      "step": 144,
      "train/ce_loss": 3.390625,
      "train/diffusion_loss": 0.37319138646125793
    },
    {
      "epoch": 1.9726027397260273,
      "step": 144,
      "train/learning_rate_real": 2.321268631749894e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2839.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2839.0,
      "epoch": 1.9863013698630136,
      "step": 145
    },
    {
      "epoch": 1.9863013698630136,
      "step": 145,
      "train/ce_loss": 3.265625,
      "train/diffusion_loss": 0.3564004898071289
    },
    {
      "epoch": 1.9863013698630136,
      "step": 145,
      "train/learning_rate_real": 2.318399979762863e-05
    },
    {
      "debug/num_lat_loss": 1600.0,
      "debug/num_lat_total": 2254.0,
      "debug/num_tok_loss": 1600.0,
      "debug/num_tok_total": 2254.0,
      "epoch": 1.9863013698630136,
      "step": 145
    },
    {
      "epoch": 1.9863013698630136,
      "step": 145,
      "train/ce_loss": 3.4375,
      "train/diffusion_loss": 0.46334365010261536
    },
    {
      "epoch": 1.9863013698630136,
      "step": 145,
      "train/learning_rate_real": 2.318399979762863e-05
    },
    {
      "debug/num_lat_loss": 1810.0,
      "debug/num_lat_total": 2466.0,
      "debug/num_tok_loss": 1810.0,
      "debug/num_tok_total": 2466.0,
      "epoch": 1.9863013698630136,
      "step": 145
    },
    {
      "epoch": 1.9863013698630136,
      "step": 145,
      "train/ce_loss": 3.40625,
      "train/diffusion_loss": 0.47243303060531616
    },
    {
      "epoch": 1.9863013698630136,
      "step": 145,
      "train/learning_rate_real": 2.318399979762863e-05
    },
    {
      "debug/num_lat_loss": 451.0,
      "debug/num_lat_total": 451.0,
      "debug/num_tok_loss": 451.0,
      "debug/num_tok_total": 451.0,
      "epoch": 1.9863013698630136,
      "step": 145
    },
    {
      "epoch": 1.9863013698630136,
      "step": 145,
      "train/ce_loss": 3.25,
      "train/diffusion_loss": 0.5414097309112549
    },
    {
      "epoch": 1.9863013698630136,
      "step": 145,
      "train/learning_rate_real": 2.318399979762863e-05
    },
    {
      "debug/num_lat_loss": 1711.0,
      "debug/num_lat_total": 2300.0,
      "debug/num_tok_loss": 1711.0,
      "debug/num_tok_total": 2300.0,
      "epoch": 2.0,
      "step": 146
    },
    {
      "epoch": 2.0,
      "step": 146,
      "train/ce_loss": 3.34375,
      "train/diffusion_loss": 0.4503061771392822
    },
    {
      "epoch": 2.0,
      "step": 146,
      "train/learning_rate_real": 2.3155102916195535e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 2.0,
      "step": 146
    },
    {
      "epoch": 2.0,
      "step": 146,
      "train/ce_loss": 3.3125,
      "train/diffusion_loss": 0.42279499769210815
    },
    {
      "epoch": 2.0,
      "step": 146,
      "train/learning_rate_real": 2.3155102916195535e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2395.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2395.0,
      "epoch": 2.0,
      "step": 146
    },
    {
      "epoch": 2.0,
      "step": 146,
      "train/ce_loss": 3.453125,
      "train/diffusion_loss": 0.47049665451049805
    },
    {
      "epoch": 2.0,
      "step": 146,
      "train/learning_rate_real": 2.3155102916195535e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2638.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2638.0,
      "epoch": 2.0,
      "step": 146
    },
    {
      "epoch": 2.0,
      "step": 146,
      "train/ce_loss": 3.203125,
      "train/diffusion_loss": 0.37802591919898987
    },
    {
      "epoch": 2.0,
      "step": 146,
      "train/learning_rate_real": 2.3155102916195535e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2408.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2408.0,
      "epoch": 2.0136986301369864,
      "step": 147
    },
    {
      "epoch": 2.0136986301369864,
      "step": 147,
      "train/ce_loss": 3.25,
      "train/diffusion_loss": 0.45496639609336853
    },
    {
      "epoch": 2.0136986301369864,
      "step": 147,
      "train/learning_rate_real": 2.3125996242161953e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2851.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2851.0,
      "epoch": 2.0136986301369864,
      "step": 147
    },
    {
      "epoch": 2.0136986301369864,
      "step": 147,
      "train/ce_loss": 3.375,
      "train/diffusion_loss": 0.37154674530029297
    },
    {
      "epoch": 2.0136986301369864,
      "step": 147,
      "train/learning_rate_real": 2.3125996242161953e-05
    },
    {
      "debug/num_lat_loss": 1617.0,
      "debug/num_lat_total": 2051.0,
      "debug/num_tok_loss": 1617.0,
      "debug/num_tok_total": 2051.0,
      "epoch": 2.0136986301369864,
      "step": 147
    },
    {
      "epoch": 2.0136986301369864,
      "step": 147,
      "train/ce_loss": 3.453125,
      "train/diffusion_loss": 0.46421128511428833
    },
    {
      "epoch": 2.0136986301369864,
      "step": 147,
      "train/learning_rate_real": 2.3125996242161953e-05
    },
    {
      "debug/num_lat_loss": 1585.0,
      "debug/num_lat_total": 2429.0,
      "debug/num_tok_loss": 1585.0,
      "debug/num_tok_total": 2429.0,
      "epoch": 2.0136986301369864,
      "step": 147
    },
    {
      "epoch": 2.0136986301369864,
      "step": 147,
      "train/ce_loss": 3.421875,
      "train/diffusion_loss": 0.3745909631252289
    },
    {
      "epoch": 2.0136986301369864,
      "step": 147,
      "train/learning_rate_real": 2.3125996242161953e-05
    },
    {
      "debug/num_lat_loss": 1664.0,
      "debug/num_lat_total": 2199.0,
      "debug/num_tok_loss": 1664.0,
      "debug/num_tok_total": 2199.0,
      "epoch": 2.0273972602739727,
      "step": 148
    },
    {
      "epoch": 2.0273972602739727,
      "step": 148,
      "train/ce_loss": 3.296875,
      "train/diffusion_loss": 0.4661211371421814
    },
    {
      "epoch": 2.0273972602739727,
      "step": 148,
      "train/learning_rate_real": 2.3096680348620886e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2827.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2827.0,
      "epoch": 2.0273972602739727,
      "step": 148
    },
    {
      "epoch": 2.0273972602739727,
      "step": 148,
      "train/ce_loss": 3.265625,
      "train/diffusion_loss": 0.3533343970775604
    },
    {
      "epoch": 2.0273972602739727,
      "step": 148,
      "train/learning_rate_real": 2.3096680348620886e-05
    },
    {
      "debug/num_lat_loss": 1817.0,
      "debug/num_lat_total": 2686.0,
      "debug/num_tok_loss": 1817.0,
      "debug/num_tok_total": 2686.0,
      "epoch": 2.0273972602739727,
      "step": 148
    },
    {
      "epoch": 2.0273972602739727,
      "step": 148,
      "train/ce_loss": 3.453125,
      "train/diffusion_loss": 0.41130325198173523
    },
    {
      "epoch": 2.0273972602739727,
      "step": 148,
      "train/learning_rate_real": 2.3096680348620886e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2227.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2227.0,
      "epoch": 2.0273972602739727,
      "step": 148
    },
    {
      "epoch": 2.0273972602739727,
      "step": 148,
      "train/ce_loss": 3.265625,
      "train/diffusion_loss": 0.5083057880401611
    },
    {
      "epoch": 2.0273972602739727,
      "step": 148,
      "train/learning_rate_real": 2.3096680348620886e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2877.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2877.0,
      "epoch": 2.041095890410959,
      "step": 149
    },
    {
      "epoch": 2.041095890410959,
      "step": 149,
      "train/ce_loss": 3.28125,
      "train/diffusion_loss": 0.37254709005355835
    },
    {
      "epoch": 2.041095890410959,
      "step": 149,
      "train/learning_rate_real": 2.3067155812784734e-05
    },
    {
      "debug/num_lat_loss": 1629.0,
      "debug/num_lat_total": 2709.0,
      "debug/num_tok_loss": 1629.0,
      "debug/num_tok_total": 2709.0,
      "epoch": 2.041095890410959,
      "step": 149
    },
    {
      "epoch": 2.041095890410959,
      "step": 149,
      "train/ce_loss": 3.46875,
      "train/diffusion_loss": 0.3475020229816437
    },
    {
      "epoch": 2.041095890410959,
      "step": 149,
      "train/learning_rate_real": 2.3067155812784734e-05
    },
    {
      "debug/num_lat_loss": 1807.0,
      "debug/num_lat_total": 2671.0,
      "debug/num_tok_loss": 1807.0,
      "debug/num_tok_total": 2671.0,
      "epoch": 2.041095890410959,
      "step": 149
    },
    {
      "epoch": 2.041095890410959,
      "step": 149,
      "train/ce_loss": 3.21875,
      "train/diffusion_loss": 0.4053395688533783
    },
    {
      "epoch": 2.041095890410959,
      "step": 149,
      "train/learning_rate_real": 2.3067155812784734e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2659.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2659.0,
      "epoch": 2.041095890410959,
      "step": 149
    },
    {
      "epoch": 2.041095890410959,
      "step": 149,
      "train/ce_loss": 3.53125,
      "train/diffusion_loss": 0.39869940280914307
    },
    {
      "epoch": 2.041095890410959,
      "step": 149,
      "train/learning_rate_real": 2.3067155812784734e-05
    },
    {
      "epoch": 2.0547945205479454,
      "grad_norm": 1.5437519550323486,
      "learning_rate": 2.3067155812784734e-05,
      "loss": 2.9196,
      "step": 150
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 3083.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 3083.0,
      "epoch": 2.0547945205479454,
      "step": 150
    },
    {
      "epoch": 2.0547945205479454,
      "step": 150,
      "train/ce_loss": 3.21875,
      "train/diffusion_loss": 0.3080335855484009
    },
    {
      "epoch": 2.0547945205479454,
      "step": 150,
      "train/learning_rate_real": 2.303742321597394e-05
    },
    {
      "debug/num_lat_loss": 1731.0,
      "debug/num_lat_total": 2548.0,
      "debug/num_tok_loss": 1731.0,
      "debug/num_tok_total": 2548.0,
      "epoch": 2.0547945205479454,
      "step": 150
    },
    {
      "epoch": 2.0547945205479454,
      "step": 150,
      "train/ce_loss": 3.21875,
      "train/diffusion_loss": 0.4144803285598755
    },
    {
      "epoch": 2.0547945205479454,
      "step": 150,
      "train/learning_rate_real": 2.303742321597394e-05
    },
    {
      "debug/num_lat_loss": 1750.0,
      "debug/num_lat_total": 2803.0,
      "debug/num_tok_loss": 1750.0,
      "debug/num_tok_total": 2803.0,
      "epoch": 2.0547945205479454,
      "step": 150
    },
    {
      "epoch": 2.0547945205479454,
      "step": 150,
      "train/ce_loss": 3.34375,
      "train/diffusion_loss": 0.39247575402259827
    },
    {
      "epoch": 2.0547945205479454,
      "step": 150,
      "train/learning_rate_real": 2.303742321597394e-05
    },
    {
      "debug/num_lat_loss": 1755.0,
      "debug/num_lat_total": 2817.0,
      "debug/num_tok_loss": 1755.0,
      "debug/num_tok_total": 2817.0,
      "epoch": 2.0547945205479454,
      "step": 150
    },
    {
      "epoch": 2.0547945205479454,
      "step": 150,
      "train/ce_loss": 3.1875,
      "train/diffusion_loss": 0.3341597020626068
    },
    {
      "epoch": 2.0547945205479454,
      "step": 150,
      "train/learning_rate_real": 2.303742321597394e-05
    },
    {
      "debug/num_lat_loss": 1751.0,
      "debug/num_lat_total": 2384.0,
      "debug/num_tok_loss": 1751.0,
      "debug/num_tok_total": 2384.0,
      "epoch": 2.0684931506849313,
      "step": 151
    },
    {
      "epoch": 2.0684931506849313,
      "step": 151,
      "train/ce_loss": 3.1875,
      "train/diffusion_loss": 0.40776801109313965
    },
    {
      "epoch": 2.0684931506849313,
      "step": 151,
      "train/learning_rate_real": 2.3007483143605542e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2636.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2636.0,
      "epoch": 2.0684931506849313,
      "step": 151
    },
    {
      "epoch": 2.0684931506849313,
      "step": 151,
      "train/ce_loss": 3.3125,
      "train/diffusion_loss": 0.41128188371658325
    },
    {
      "epoch": 2.0684931506849313,
      "step": 151,
      "train/learning_rate_real": 2.3007483143605542e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2659.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2659.0,
      "epoch": 2.0684931506849313,
      "step": 151
    },
    {
      "epoch": 2.0684931506849313,
      "step": 151,
      "train/ce_loss": 3.203125,
      "train/diffusion_loss": 0.41530677676200867
    },
    {
      "epoch": 2.0684931506849313,
      "step": 151,
      "train/learning_rate_real": 2.3007483143605542e-05
    },
    {
      "debug/num_lat_loss": 1642.0,
      "debug/num_lat_total": 2288.0,
      "debug/num_tok_loss": 1642.0,
      "debug/num_tok_total": 2288.0,
      "epoch": 2.0684931506849313,
      "step": 151
    },
    {
      "epoch": 2.0684931506849313,
      "step": 151,
      "train/ce_loss": 3.3125,
      "train/diffusion_loss": 0.42171525955200195
    },
    {
      "epoch": 2.0684931506849313,
      "step": 151,
      "train/learning_rate_real": 2.3007483143605542e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2645.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2645.0,
      "epoch": 2.0821917808219177,
      "step": 152
    },
    {
      "epoch": 2.0821917808219177,
      "step": 152,
      "train/ce_loss": 3.21875,
      "train/diffusion_loss": 0.419938862323761
    },
    {
      "epoch": 2.0821917808219177,
      "step": 152,
      "train/learning_rate_real": 2.2977336185181646e-05
    },
    {
      "debug/num_lat_loss": 1746.0,
      "debug/num_lat_total": 3008.0,
      "debug/num_tok_loss": 1746.0,
      "debug/num_tok_total": 3008.0,
      "epoch": 2.0821917808219177,
      "step": 152
    },
    {
      "epoch": 2.0821917808219177,
      "step": 152,
      "train/ce_loss": 3.3125,
      "train/diffusion_loss": 0.29541534185409546
    },
    {
      "epoch": 2.0821917808219177,
      "step": 152,
      "train/learning_rate_real": 2.2977336185181646e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2838.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2838.0,
      "epoch": 2.0821917808219177,
      "step": 152
    },
    {
      "epoch": 2.0821917808219177,
      "step": 152,
      "train/ce_loss": 3.171875,
      "train/diffusion_loss": 0.3496735095977783
    },
    {
      "epoch": 2.0821917808219177,
      "step": 152,
      "train/learning_rate_real": 2.2977336185181646e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2391.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2391.0,
      "epoch": 2.0821917808219177,
      "step": 152
    },
    {
      "epoch": 2.0821917808219177,
      "step": 152,
      "train/ce_loss": 3.421875,
      "train/diffusion_loss": 0.4392234981060028
    },
    {
      "epoch": 2.0821917808219177,
      "step": 152,
      "train/learning_rate_real": 2.2977336185181646e-05
    },
    {
      "debug/num_lat_loss": 1757.0,
      "debug/num_lat_total": 2835.0,
      "debug/num_tok_loss": 1757.0,
      "debug/num_tok_total": 2835.0,
      "epoch": 2.095890410958904,
      "step": 153
    },
    {
      "epoch": 2.095890410958904,
      "step": 153,
      "train/ce_loss": 3.21875,
      "train/diffusion_loss": 0.3793810307979584
    },
    {
      "epoch": 2.095890410958904,
      "step": 153,
      "train/learning_rate_real": 2.2946982934277832e-05
    },
    {
      "debug/num_lat_loss": 1761.0,
      "debug/num_lat_total": 2184.0,
      "debug/num_tok_loss": 1761.0,
      "debug/num_tok_total": 2184.0,
      "epoch": 2.095890410958904,
      "step": 153
    },
    {
      "epoch": 2.095890410958904,
      "step": 153,
      "train/ce_loss": 3.09375,
      "train/diffusion_loss": 0.4782491624355316
    },
    {
      "epoch": 2.095890410958904,
      "step": 153,
      "train/learning_rate_real": 2.2946982934277832e-05
    },
    {
      "debug/num_lat_loss": 1646.0,
      "debug/num_lat_total": 2350.0,
      "debug/num_tok_loss": 1646.0,
      "debug/num_tok_total": 2350.0,
      "epoch": 2.095890410958904,
      "step": 153
    },
    {
      "epoch": 2.095890410958904,
      "step": 153,
      "train/ce_loss": 3.359375,
      "train/diffusion_loss": 0.49244463443756104
    },
    {
      "epoch": 2.095890410958904,
      "step": 153,
      "train/learning_rate_real": 2.2946982934277832e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2661.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2661.0,
      "epoch": 2.095890410958904,
      "step": 153
    },
    {
      "epoch": 2.095890410958904,
      "step": 153,
      "train/ce_loss": 3.109375,
      "train/diffusion_loss": 0.41750162839889526
    },
    {
      "epoch": 2.095890410958904,
      "step": 153,
      "train/learning_rate_real": 2.2946982934277832e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 3111.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 3111.0,
      "epoch": 2.1095890410958904,
      "step": 154
    },
    {
      "epoch": 2.1095890410958904,
      "step": 154,
      "train/ce_loss": 3.171875,
      "train/diffusion_loss": 0.32178986072540283
    },
    {
      "epoch": 2.1095890410958904,
      "step": 154,
      "train/learning_rate_real": 2.2916423988531437e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2198.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2198.0,
      "epoch": 2.1095890410958904,
      "step": 154
    },
    {
      "epoch": 2.1095890410958904,
      "step": 154,
      "train/ce_loss": 3.15625,
      "train/diffusion_loss": 0.510726273059845
    },
    {
      "epoch": 2.1095890410958904,
      "step": 154,
      "train/learning_rate_real": 2.2916423988531437e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2662.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2662.0,
      "epoch": 2.1095890410958904,
      "step": 154
    },
    {
      "epoch": 2.1095890410958904,
      "step": 154,
      "train/ce_loss": 3.109375,
      "train/diffusion_loss": 0.4098641872406006
    },
    {
      "epoch": 2.1095890410958904,
      "step": 154,
      "train/learning_rate_real": 2.2916423988531437e-05
    },
    {
      "debug/num_lat_loss": 1669.0,
      "debug/num_lat_total": 2528.0,
      "debug/num_tok_loss": 1669.0,
      "debug/num_tok_total": 2528.0,
      "epoch": 2.1095890410958904,
      "step": 154
    },
    {
      "epoch": 2.1095890410958904,
      "step": 154,
      "train/ce_loss": 3.234375,
      "train/diffusion_loss": 0.4132782518863678
    },
    {
      "epoch": 2.1095890410958904,
      "step": 154,
      "train/learning_rate_real": 2.2916423988531437e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2887.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2887.0,
      "epoch": 2.1232876712328768,
      "step": 155
    },
    {
      "epoch": 2.1232876712328768,
      "step": 155,
      "train/ce_loss": 3.578125,
      "train/diffusion_loss": 0.3646351993083954
    },
    {
      "epoch": 2.1232876712328768,
      "step": 155,
      "train/learning_rate_real": 2.2885659949629828e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2654.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2654.0,
      "epoch": 2.1232876712328768,
      "step": 155
    },
    {
      "epoch": 2.1232876712328768,
      "step": 155,
      "train/ce_loss": 3.15625,
      "train/diffusion_loss": 0.38885483145713806
    },
    {
      "epoch": 2.1232876712328768,
      "step": 155,
      "train/learning_rate_real": 2.2885659949629828e-05
    },
    {
      "debug/num_lat_loss": 1831.0,
      "debug/num_lat_total": 2937.0,
      "debug/num_tok_loss": 1831.0,
      "debug/num_tok_total": 2937.0,
      "epoch": 2.1232876712328768,
      "step": 155
    },
    {
      "epoch": 2.1232876712328768,
      "step": 155,
      "train/ce_loss": 3.109375,
      "train/diffusion_loss": 0.3445654809474945
    },
    {
      "epoch": 2.1232876712328768,
      "step": 155,
      "train/learning_rate_real": 2.2885659949629828e-05
    },
    {
      "debug/num_lat_loss": 1690.0,
      "debug/num_lat_total": 1897.0,
      "debug/num_tok_loss": 1690.0,
      "debug/num_tok_total": 1897.0,
      "epoch": 2.1232876712328768,
      "step": 155
    },
    {
      "epoch": 2.1232876712328768,
      "step": 155,
      "train/ce_loss": 3.15625,
      "train/diffusion_loss": 0.5193563103675842
    },
    {
      "epoch": 2.1232876712328768,
      "step": 155,
      "train/learning_rate_real": 2.2885659949629828e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 3090.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 3090.0,
      "epoch": 2.136986301369863,
      "step": 156
    },
    {
      "epoch": 2.136986301369863,
      "step": 156,
      "train/ce_loss": 3.0625,
      "train/diffusion_loss": 0.31100159883499146
    },
    {
      "epoch": 2.136986301369863,
      "step": 156,
      "train/learning_rate_real": 2.285469142329851e-05
    },
    {
      "debug/num_lat_loss": 1797.0,
      "debug/num_lat_total": 3100.0,
      "debug/num_tok_loss": 1797.0,
      "debug/num_tok_total": 3100.0,
      "epoch": 2.136986301369863,
      "step": 156
    },
    {
      "epoch": 2.136986301369863,
      "step": 156,
      "train/ce_loss": 3.0,
      "train/diffusion_loss": 0.3313130736351013
    },
    {
      "epoch": 2.136986301369863,
      "step": 156,
      "train/learning_rate_real": 2.285469142329851e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2852.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2852.0,
      "epoch": 2.136986301369863,
      "step": 156
    },
    {
      "epoch": 2.136986301369863,
      "step": 156,
      "train/ce_loss": 3.234375,
      "train/diffusion_loss": 0.34941917657852173
    },
    {
      "epoch": 2.136986301369863,
      "step": 156,
      "train/learning_rate_real": 2.285469142329851e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2620.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2620.0,
      "epoch": 2.136986301369863,
      "step": 156
    },
    {
      "epoch": 2.136986301369863,
      "step": 156,
      "train/ce_loss": 3.1875,
      "train/diffusion_loss": 0.3860790729522705
    },
    {
      "epoch": 2.136986301369863,
      "step": 156,
      "train/learning_rate_real": 2.285469142329851e-05
    },
    {
      "debug/num_lat_loss": 1717.0,
      "debug/num_lat_total": 2786.0,
      "debug/num_tok_loss": 1717.0,
      "debug/num_tok_total": 2786.0,
      "epoch": 2.1506849315068495,
      "step": 157
    },
    {
      "epoch": 2.1506849315068495,
      "step": 157,
      "train/ce_loss": 3.171875,
      "train/diffusion_loss": 0.34918954968452454
    },
    {
      "epoch": 2.1506849315068495,
      "step": 157,
      "train/learning_rate_real": 2.2823519019289245e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2235.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2235.0,
      "epoch": 2.1506849315068495,
      "step": 157
    },
    {
      "epoch": 2.1506849315068495,
      "step": 157,
      "train/ce_loss": 3.140625,
      "train/diffusion_loss": 0.48824694752693176
    },
    {
      "epoch": 2.1506849315068495,
      "step": 157,
      "train/learning_rate_real": 2.2823519019289245e-05
    },
    {
      "debug/num_lat_loss": 1625.0,
      "debug/num_lat_total": 2260.0,
      "debug/num_tok_loss": 1625.0,
      "debug/num_tok_total": 2260.0,
      "epoch": 2.1506849315068495,
      "step": 157
    },
    {
      "epoch": 2.1506849315068495,
      "step": 157,
      "train/ce_loss": 3.359375,
      "train/diffusion_loss": 0.4429050385951996
    },
    {
      "epoch": 2.1506849315068495,
      "step": 157,
      "train/learning_rate_real": 2.2823519019289245e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 3258.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 3258.0,
      "epoch": 2.1506849315068495,
      "step": 157
    },
    {
      "epoch": 2.1506849315068495,
      "step": 157,
      "train/ce_loss": 3.140625,
      "train/diffusion_loss": 0.2725350856781006
    },
    {
      "epoch": 2.1506849315068495,
      "step": 157,
      "train/learning_rate_real": 2.2823519019289245e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2649.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2649.0,
      "epoch": 2.1643835616438354,
      "step": 158
    },
    {
      "epoch": 2.1643835616438354,
      "step": 158,
      "train/ce_loss": 3.0625,
      "train/diffusion_loss": 0.4037143290042877
    },
    {
      "epoch": 2.1643835616438354,
      "step": 158,
      "train/learning_rate_real": 2.2792143351368005e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2872.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2872.0,
      "epoch": 2.1643835616438354,
      "step": 158
    },
    {
      "epoch": 2.1643835616438354,
      "step": 158,
      "train/ce_loss": 3.234375,
      "train/diffusion_loss": 0.343654602766037
    },
    {
      "epoch": 2.1643835616438354,
      "step": 158,
      "train/learning_rate_real": 2.2792143351368005e-05
    },
    {
      "debug/num_lat_loss": 1694.0,
      "debug/num_lat_total": 2905.0,
      "debug/num_tok_loss": 1694.0,
      "debug/num_tok_total": 2905.0,
      "epoch": 2.1643835616438354,
      "step": 158
    },
    {
      "epoch": 2.1643835616438354,
      "step": 158,
      "train/ce_loss": 3.125,
      "train/diffusion_loss": 0.2816028594970703
    },
    {
      "epoch": 2.1643835616438354,
      "step": 158,
      "train/learning_rate_real": 2.2792143351368005e-05
    },
    {
      "debug/num_lat_loss": 1758.0,
      "debug/num_lat_total": 2201.0,
      "debug/num_tok_loss": 1758.0,
      "debug/num_tok_total": 2201.0,
      "epoch": 2.1643835616438354,
      "step": 158
    },
    {
      "epoch": 2.1643835616438354,
      "step": 158,
      "train/ce_loss": 3.21875,
      "train/diffusion_loss": 0.525771975517273
    },
    {
      "epoch": 2.1643835616438354,
      "step": 158,
      "train/learning_rate_real": 2.2792143351368005e-05
    },
    {
      "debug/num_lat_loss": 1818.0,
      "debug/num_lat_total": 2697.0,
      "debug/num_tok_loss": 1818.0,
      "debug/num_tok_total": 2697.0,
      "epoch": 2.1780821917808217,
      "step": 159
    },
    {
      "epoch": 2.1780821917808217,
      "step": 159,
      "train/ce_loss": 3.046875,
      "train/diffusion_loss": 0.40741604566574097
    },
    {
      "epoch": 2.1780821917808217,
      "step": 159,
      "train/learning_rate_real": 2.276056503730293e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2861.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2861.0,
      "epoch": 2.1780821917808217,
      "step": 159
    },
    {
      "epoch": 2.1780821917808217,
      "step": 159,
      "train/ce_loss": 3.53125,
      "train/diffusion_loss": 0.37161630392074585
    },
    {
      "epoch": 2.1780821917808217,
      "step": 159,
      "train/learning_rate_real": 2.276056503730293e-05
    },
    {
      "debug/num_lat_loss": 1737.0,
      "debug/num_lat_total": 2767.0,
      "debug/num_tok_loss": 1737.0,
      "debug/num_tok_total": 2767.0,
      "epoch": 2.1780821917808217,
      "step": 159
    },
    {
      "epoch": 2.1780821917808217,
      "step": 159,
      "train/ce_loss": 3.15625,
      "train/diffusion_loss": 0.38080403208732605
    },
    {
      "epoch": 2.1780821917808217,
      "step": 159,
      "train/learning_rate_real": 2.276056503730293e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2836.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2836.0,
      "epoch": 2.1780821917808217,
      "step": 159
    },
    {
      "epoch": 2.1780821917808217,
      "step": 159,
      "train/ce_loss": 2.984375,
      "train/diffusion_loss": 0.3861440420150757
    },
    {
      "epoch": 2.1780821917808217,
      "step": 159,
      "train/learning_rate_real": 2.276056503730293e-05
    },
    {
      "epoch": 2.191780821917808,
      "grad_norm": 1.617277979850769,
      "learning_rate": 2.276056503730293e-05,
      "loss": 2.7086,
      "step": 160
    },
    {
      "debug/num_lat_loss": 1812.0,
      "debug/num_lat_total": 2700.0,
      "debug/num_tok_loss": 1812.0,
      "debug/num_tok_total": 2700.0,
      "epoch": 2.191780821917808,
      "step": 160
    },
    {
      "epoch": 2.191780821917808,
      "step": 160,
      "train/ce_loss": 3.3125,
      "train/diffusion_loss": 0.3891316056251526
    },
    {
      "epoch": 2.191780821917808,
      "step": 160,
      "train/learning_rate_real": 2.2728784698852114e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 1777.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 1777.0,
      "epoch": 2.191780821917808,
      "step": 160
    },
    {
      "epoch": 2.191780821917808,
      "step": 160,
      "train/ce_loss": 3.140625,
      "train/diffusion_loss": 0.5425366759300232
    },
    {
      "epoch": 2.191780821917808,
      "step": 160,
      "train/learning_rate_real": 2.2728784698852114e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2411.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2411.0,
      "epoch": 2.191780821917808,
      "step": 160
    },
    {
      "epoch": 2.191780821917808,
      "step": 160,
      "train/ce_loss": 3.125,
      "train/diffusion_loss": 0.44423243403434753
    },
    {
      "epoch": 2.191780821917808,
      "step": 160,
      "train/learning_rate_real": 2.2728784698852114e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2207.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2207.0,
      "epoch": 2.191780821917808,
      "step": 160
    },
    {
      "epoch": 2.191780821917808,
      "step": 160,
      "train/ce_loss": 3.09375,
      "train/diffusion_loss": 0.4857548177242279
    },
    {
      "epoch": 2.191780821917808,
      "step": 160,
      "train/learning_rate_real": 2.2728784698852114e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 2.2054794520547945,
      "step": 161
    },
    {
      "epoch": 2.2054794520547945,
      "step": 161,
      "train/ce_loss": 3.140625,
      "train/diffusion_loss": 0.4045424461364746
    },
    {
      "epoch": 2.2054794520547945,
      "step": 161,
      "train/learning_rate_real": 2.269680296175141e-05
    },
    {
      "debug/num_lat_loss": 1702.0,
      "debug/num_lat_total": 3137.0,
      "debug/num_tok_loss": 1702.0,
      "debug/num_tok_total": 3137.0,
      "epoch": 2.2054794520547945,
      "step": 161
    },
    {
      "epoch": 2.2054794520547945,
      "step": 161,
      "train/ce_loss": 2.9375,
      "train/diffusion_loss": 0.26028120517730713
    },
    {
      "epoch": 2.2054794520547945,
      "step": 161,
      "train/learning_rate_real": 2.269680296175141e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2635.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2635.0,
      "epoch": 2.2054794520547945,
      "step": 161
    },
    {
      "epoch": 2.2054794520547945,
      "step": 161,
      "train/ce_loss": 3.171875,
      "train/diffusion_loss": 0.4297439455986023
    },
    {
      "epoch": 2.2054794520547945,
      "step": 161,
      "train/learning_rate_real": 2.269680296175141e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2632.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2632.0,
      "epoch": 2.2054794520547945,
      "step": 161
    },
    {
      "epoch": 2.2054794520547945,
      "step": 161,
      "train/ce_loss": 3.09375,
      "train/diffusion_loss": 0.4013395309448242
    },
    {
      "epoch": 2.2054794520547945,
      "step": 161,
      "train/learning_rate_real": 2.269680296175141e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2443.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2443.0,
      "epoch": 2.219178082191781,
      "step": 162
    },
    {
      "epoch": 2.219178082191781,
      "step": 162,
      "train/ce_loss": 3.40625,
      "train/diffusion_loss": 0.45490705966949463
    },
    {
      "epoch": 2.219178082191781,
      "step": 162,
      "train/learning_rate_real": 2.2664620455702084e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2656.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2656.0,
      "epoch": 2.219178082191781,
      "step": 162
    },
    {
      "epoch": 2.219178082191781,
      "step": 162,
      "train/ce_loss": 3.109375,
      "train/diffusion_loss": 0.420023649930954
    },
    {
      "epoch": 2.219178082191781,
      "step": 162,
      "train/learning_rate_real": 2.2664620455702084e-05
    },
    {
      "debug/num_lat_loss": 1687.0,
      "debug/num_lat_total": 2893.0,
      "debug/num_tok_loss": 1687.0,
      "debug/num_tok_total": 2893.0,
      "epoch": 2.219178082191781,
      "step": 162
    },
    {
      "epoch": 2.219178082191781,
      "step": 162,
      "train/ce_loss": 3.109375,
      "train/diffusion_loss": 0.3170675039291382
    },
    {
      "epoch": 2.219178082191781,
      "step": 162,
      "train/learning_rate_real": 2.2664620455702084e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2852.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2852.0,
      "epoch": 2.219178082191781,
      "step": 162
    },
    {
      "epoch": 2.219178082191781,
      "step": 162,
      "train/ce_loss": 3.078125,
      "train/diffusion_loss": 0.3600369989871979
    },
    {
      "epoch": 2.219178082191781,
      "step": 162,
      "train/learning_rate_real": 2.2664620455702084e-05
    },
    {
      "debug/num_lat_loss": 1700.0,
      "debug/num_lat_total": 2555.0,
      "debug/num_tok_loss": 1700.0,
      "debug/num_tok_total": 2555.0,
      "epoch": 2.232876712328767,
      "step": 163
    },
    {
      "epoch": 2.232876712328767,
      "step": 163,
      "train/ce_loss": 3.125,
      "train/diffusion_loss": 0.3976871967315674
    },
    {
      "epoch": 2.232876712328767,
      "step": 163,
      "train/learning_rate_real": 2.2632237814358425e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2824.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2824.0,
      "epoch": 2.232876712328767,
      "step": 163
    },
    {
      "epoch": 2.232876712328767,
      "step": 163,
      "train/ce_loss": 2.953125,
      "train/diffusion_loss": 0.36776605248451233
    },
    {
      "epoch": 2.232876712328767,
      "step": 163,
      "train/learning_rate_real": 2.2632237814358425e-05
    },
    {
      "debug/num_lat_loss": 1729.0,
      "debug/num_lat_total": 2770.0,
      "debug/num_tok_loss": 1729.0,
      "debug/num_tok_total": 2770.0,
      "epoch": 2.232876712328767,
      "step": 163
    },
    {
      "epoch": 2.232876712328767,
      "step": 163,
      "train/ce_loss": 3.109375,
      "train/diffusion_loss": 0.3672686219215393
    },
    {
      "epoch": 2.232876712328767,
      "step": 163,
      "train/learning_rate_real": 2.2632237814358425e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2433.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2433.0,
      "epoch": 2.232876712328767,
      "step": 163
    },
    {
      "epoch": 2.232876712328767,
      "step": 163,
      "train/ce_loss": 3.390625,
      "train/diffusion_loss": 0.4243660271167755
    },
    {
      "epoch": 2.232876712328767,
      "step": 163,
      "train/learning_rate_real": 2.2632237814358425e-05
    },
    {
      "debug/num_lat_loss": 1756.0,
      "debug/num_lat_total": 2607.0,
      "debug/num_tok_loss": 1756.0,
      "debug/num_tok_total": 2607.0,
      "epoch": 2.2465753424657535,
      "step": 164
    },
    {
      "epoch": 2.2465753424657535,
      "step": 164,
      "train/ce_loss": 2.953125,
      "train/diffusion_loss": 0.4174022078514099
    },
    {
      "epoch": 2.2465753424657535,
      "step": 164,
      "train/learning_rate_real": 2.2599655675315256e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2235.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2235.0,
      "epoch": 2.2465753424657535,
      "step": 164
    },
    {
      "epoch": 2.2465753424657535,
      "step": 164,
      "train/ce_loss": 2.984375,
      "train/diffusion_loss": 0.5091489553451538
    },
    {
      "epoch": 2.2465753424657535,
      "step": 164,
      "train/learning_rate_real": 2.2599655675315256e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2889.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2889.0,
      "epoch": 2.2465753424657535,
      "step": 164
    },
    {
      "epoch": 2.2465753424657535,
      "step": 164,
      "train/ce_loss": 2.984375,
      "train/diffusion_loss": 0.36695384979248047
    },
    {
      "epoch": 2.2465753424657535,
      "step": 164,
      "train/learning_rate_real": 2.2599655675315256e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2242.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2242.0,
      "epoch": 2.2465753424657535,
      "step": 164
    },
    {
      "epoch": 2.2465753424657535,
      "step": 164,
      "train/ce_loss": 3.078125,
      "train/diffusion_loss": 0.4847750663757324
    },
    {
      "epoch": 2.2465753424657535,
      "step": 164,
      "train/learning_rate_real": 2.2599655675315256e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2627.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2627.0,
      "epoch": 2.26027397260274,
      "step": 165
    },
    {
      "epoch": 2.26027397260274,
      "step": 165,
      "train/ce_loss": 2.96875,
      "train/diffusion_loss": 0.3915446400642395
    },
    {
      "epoch": 2.26027397260274,
      "step": 165,
      "train/learning_rate_real": 2.2566874680095403e-05
    },
    {
      "debug/num_lat_loss": 1761.0,
      "debug/num_lat_total": 2390.0,
      "debug/num_tok_loss": 1761.0,
      "debug/num_tok_total": 2390.0,
      "epoch": 2.26027397260274,
      "step": 165
    },
    {
      "epoch": 2.26027397260274,
      "step": 165,
      "train/ce_loss": 2.859375,
      "train/diffusion_loss": 0.45723363757133484
    },
    {
      "epoch": 2.26027397260274,
      "step": 165,
      "train/learning_rate_real": 2.2566874680095403e-05
    },
    {
      "debug/num_lat_loss": 1811.0,
      "debug/num_lat_total": 2692.0,
      "debug/num_tok_loss": 1811.0,
      "debug/num_tok_total": 2692.0,
      "epoch": 2.26027397260274,
      "step": 165
    },
    {
      "epoch": 2.26027397260274,
      "step": 165,
      "train/ce_loss": 3.015625,
      "train/diffusion_loss": 0.4079940617084503
    },
    {
      "epoch": 2.26027397260274,
      "step": 165,
      "train/learning_rate_real": 2.2566874680095403e-05
    },
    {
      "debug/num_lat_loss": 1760.0,
      "debug/num_lat_total": 2602.0,
      "debug/num_tok_loss": 1760.0,
      "debug/num_tok_total": 2602.0,
      "epoch": 2.26027397260274,
      "step": 165
    },
    {
      "epoch": 2.26027397260274,
      "step": 165,
      "train/ce_loss": 2.953125,
      "train/diffusion_loss": 0.4096970856189728
    },
    {
      "epoch": 2.26027397260274,
      "step": 165,
      "train/learning_rate_real": 2.2566874680095403e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2875.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2875.0,
      "epoch": 2.2739726027397262,
      "step": 166
    },
    {
      "epoch": 2.2739726027397262,
      "step": 166,
      "train/ce_loss": 3.0625,
      "train/diffusion_loss": 0.34252914786338806
    },
    {
      "epoch": 2.2739726027397262,
      "step": 166,
      "train/learning_rate_real": 2.2533895474137047e-05
    },
    {
      "debug/num_lat_loss": 1759.0,
      "debug/num_lat_total": 2829.0,
      "debug/num_tok_loss": 1759.0,
      "debug/num_tok_total": 2829.0,
      "epoch": 2.2739726027397262,
      "step": 166
    },
    {
      "epoch": 2.2739726027397262,
      "step": 166,
      "train/ce_loss": 2.921875,
      "train/diffusion_loss": 0.3555743098258972
    },
    {
      "epoch": 2.2739726027397262,
      "step": 166,
      "train/learning_rate_real": 2.2533895474137047e-05
    },
    {
      "debug/num_lat_loss": 1811.0,
      "debug/num_lat_total": 2697.0,
      "debug/num_tok_loss": 1811.0,
      "debug/num_tok_total": 2697.0,
      "epoch": 2.2739726027397262,
      "step": 166
    },
    {
      "epoch": 2.2739726027397262,
      "step": 166,
      "train/ce_loss": 2.953125,
      "train/diffusion_loss": 0.4044460952281952
    },
    {
      "epoch": 2.2739726027397262,
      "step": 166,
      "train/learning_rate_real": 2.2533895474137047e-05
    },
    {
      "debug/num_lat_loss": 1812.0,
      "debug/num_lat_total": 2482.0,
      "debug/num_tok_loss": 1812.0,
      "debug/num_tok_total": 2482.0,
      "epoch": 2.2739726027397262,
      "step": 166
    },
    {
      "epoch": 2.2739726027397262,
      "step": 166,
      "train/ce_loss": 2.984375,
      "train/diffusion_loss": 0.4411238431930542
    },
    {
      "epoch": 2.2739726027397262,
      "step": 166,
      "train/learning_rate_real": 2.2533895474137047e-05
    },
    {
      "debug/num_lat_loss": 1717.0,
      "debug/num_lat_total": 2951.0,
      "debug/num_tok_loss": 1717.0,
      "debug/num_tok_total": 2951.0,
      "epoch": 2.287671232876712,
      "step": 167
    },
    {
      "epoch": 2.287671232876712,
      "step": 167,
      "train/ce_loss": 2.8125,
      "train/diffusion_loss": 0.28139710426330566
    },
    {
      "epoch": 2.287671232876712,
      "step": 167,
      "train/learning_rate_real": 2.2500718706781014e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2426.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2426.0,
      "epoch": 2.287671232876712,
      "step": 167
    },
    {
      "epoch": 2.287671232876712,
      "step": 167,
      "train/ce_loss": 2.859375,
      "train/diffusion_loss": 0.44409722089767456
    },
    {
      "epoch": 2.287671232876712,
      "step": 167,
      "train/learning_rate_real": 2.2500718706781014e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2416.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2416.0,
      "epoch": 2.287671232876712,
      "step": 167
    },
    {
      "epoch": 2.287671232876712,
      "step": 167,
      "train/ce_loss": 3.03125,
      "train/diffusion_loss": 0.4660814702510834
    },
    {
      "epoch": 2.287671232876712,
      "step": 167,
      "train/learning_rate_real": 2.2500718706781014e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2417.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2417.0,
      "epoch": 2.287671232876712,
      "step": 167
    },
    {
      "epoch": 2.287671232876712,
      "step": 167,
      "train/ce_loss": 2.875,
      "train/diffusion_loss": 0.46190908551216125
    },
    {
      "epoch": 2.287671232876712,
      "step": 167,
      "train/learning_rate_real": 2.2500718706781014e-05
    },
    {
      "debug/num_lat_loss": 1653.0,
      "debug/num_lat_total": 2146.0,
      "debug/num_tok_loss": 1653.0,
      "debug/num_tok_total": 2146.0,
      "epoch": 2.3013698630136985,
      "step": 168
    },
    {
      "epoch": 2.3013698630136985,
      "step": 168,
      "train/ce_loss": 2.859375,
      "train/diffusion_loss": 0.4429917335510254
    },
    {
      "epoch": 2.3013698630136985,
      "step": 168,
      "train/learning_rate_real": 2.2467345031258007e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2650.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2650.0,
      "epoch": 2.3013698630136985,
      "step": 168
    },
    {
      "epoch": 2.3013698630136985,
      "step": 168,
      "train/ce_loss": 3.046875,
      "train/diffusion_loss": 0.4134480059146881
    },
    {
      "epoch": 2.3013698630136985,
      "step": 168,
      "train/learning_rate_real": 2.2467345031258007e-05
    },
    {
      "debug/num_lat_loss": 1615.0,
      "debug/num_lat_total": 2248.0,
      "debug/num_tok_loss": 1615.0,
      "debug/num_tok_total": 2248.0,
      "epoch": 2.3013698630136985,
      "step": 168
    },
    {
      "epoch": 2.3013698630136985,
      "step": 168,
      "train/ce_loss": 3.015625,
      "train/diffusion_loss": 0.4329485595226288
    },
    {
      "epoch": 2.3013698630136985,
      "step": 168,
      "train/learning_rate_real": 2.2467345031258007e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2625.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2625.0,
      "epoch": 2.3013698630136985,
      "step": 168
    },
    {
      "epoch": 2.3013698630136985,
      "step": 168,
      "train/ce_loss": 3.015625,
      "train/diffusion_loss": 0.4059813618659973
    },
    {
      "epoch": 2.3013698630136985,
      "step": 168,
      "train/learning_rate_real": 2.2467345031258007e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 3095.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 3095.0,
      "epoch": 2.315068493150685,
      "step": 169
    },
    {
      "epoch": 2.315068493150685,
      "step": 169,
      "train/ce_loss": 3.015625,
      "train/diffusion_loss": 0.33460959792137146
    },
    {
      "epoch": 2.315068493150685,
      "step": 169,
      "train/learning_rate_real": 2.243377510467572e-05
    },
    {
      "debug/num_lat_loss": 1748.0,
      "debug/num_lat_total": 2592.0,
      "debug/num_tok_loss": 1748.0,
      "debug/num_tok_total": 2592.0,
      "epoch": 2.315068493150685,
      "step": 169
    },
    {
      "epoch": 2.315068493150685,
      "step": 169,
      "train/ce_loss": 2.875,
      "train/diffusion_loss": 0.37922653555870056
    },
    {
      "epoch": 2.315068493150685,
      "step": 169,
      "train/learning_rate_real": 2.243377510467572e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2911.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2911.0,
      "epoch": 2.315068493150685,
      "step": 169
    },
    {
      "epoch": 2.315068493150685,
      "step": 169,
      "train/ce_loss": 3.03125,
      "train/diffusion_loss": 0.3680286109447479
    },
    {
      "epoch": 2.315068493150685,
      "step": 169,
      "train/learning_rate_real": 2.243377510467572e-05
    },
    {
      "debug/num_lat_loss": 1678.0,
      "debug/num_lat_total": 2198.0,
      "debug/num_tok_loss": 1678.0,
      "debug/num_tok_total": 2198.0,
      "epoch": 2.315068493150685,
      "step": 169
    },
    {
      "epoch": 2.315068493150685,
      "step": 169,
      "train/ce_loss": 3.25,
      "train/diffusion_loss": 0.46388769149780273
    },
    {
      "epoch": 2.315068493150685,
      "step": 169,
      "train/learning_rate_real": 2.243377510467572e-05
    },
    {
      "epoch": 2.328767123287671,
      "grad_norm": 1.618893027305603,
      "learning_rate": 2.243377510467572e-05,
      "loss": 2.7758,
      "step": 170
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2429.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2429.0,
      "epoch": 2.328767123287671,
      "step": 170
    },
    {
      "epoch": 2.328767123287671,
      "step": 170,
      "train/ce_loss": 2.96875,
      "train/diffusion_loss": 0.453939825296402
    },
    {
      "epoch": 2.328767123287671,
      "step": 170,
      "train/learning_rate_real": 2.2400009588005925e-05
    },
    {
      "debug/num_lat_loss": 1824.0,
      "debug/num_lat_total": 3149.0,
      "debug/num_tok_loss": 1824.0,
      "debug/num_tok_total": 3149.0,
      "epoch": 2.328767123287671,
      "step": 170
    },
    {
      "epoch": 2.328767123287671,
      "step": 170,
      "train/ce_loss": 3.0,
      "train/diffusion_loss": 0.3223628103733063
    },
    {
      "epoch": 2.328767123287671,
      "step": 170,
      "train/learning_rate_real": 2.2400009588005925e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 3055.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 3055.0,
      "epoch": 2.328767123287671,
      "step": 170
    },
    {
      "epoch": 2.328767123287671,
      "step": 170,
      "train/ce_loss": 2.984375,
      "train/diffusion_loss": 0.325356125831604
    },
    {
      "epoch": 2.328767123287671,
      "step": 170,
      "train/learning_rate_real": 2.2400009588005925e-05
    },
    {
      "debug/num_lat_loss": 1758.0,
      "debug/num_lat_total": 2391.0,
      "debug/num_tok_loss": 1758.0,
      "debug/num_tok_total": 2391.0,
      "epoch": 2.328767123287671,
      "step": 170
    },
    {
      "epoch": 2.328767123287671,
      "step": 170,
      "train/ce_loss": 3.03125,
      "train/diffusion_loss": 0.4393155872821808
    },
    {
      "epoch": 2.328767123287671,
      "step": 170,
      "train/learning_rate_real": 2.2400009588005925e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2237.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2237.0,
      "epoch": 2.3424657534246576,
      "step": 171
    },
    {
      "epoch": 2.3424657534246576,
      "step": 171,
      "train/ce_loss": 2.953125,
      "train/diffusion_loss": 0.4934636950492859
    },
    {
      "epoch": 2.3424657534246576,
      "step": 171,
      "train/learning_rate_real": 2.236604914607143e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2625.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2625.0,
      "epoch": 2.3424657534246576,
      "step": 171
    },
    {
      "epoch": 2.3424657534246576,
      "step": 171,
      "train/ce_loss": 2.90625,
      "train/diffusion_loss": 0.41351816058158875
    },
    {
      "epoch": 2.3424657534246576,
      "step": 171,
      "train/learning_rate_real": 2.236604914607143e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2635.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2635.0,
      "epoch": 2.3424657534246576,
      "step": 171
    },
    {
      "epoch": 2.3424657534246576,
      "step": 171,
      "train/ce_loss": 2.921875,
      "train/diffusion_loss": 0.3951073884963989
    },
    {
      "epoch": 2.3424657534246576,
      "step": 171,
      "train/learning_rate_real": 2.236604914607143e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 3076.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 3076.0,
      "epoch": 2.3424657534246576,
      "step": 171
    },
    {
      "epoch": 2.3424657534246576,
      "step": 171,
      "train/ce_loss": 2.859375,
      "train/diffusion_loss": 0.30464741587638855
    },
    {
      "epoch": 2.3424657534246576,
      "step": 171,
      "train/learning_rate_real": 2.236604914607143e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 3094.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 3094.0,
      "epoch": 2.356164383561644,
      "step": 172
    },
    {
      "epoch": 2.356164383561644,
      "step": 172,
      "train/ce_loss": 3.234375,
      "train/diffusion_loss": 0.31877073645591736
    },
    {
      "epoch": 2.356164383561644,
      "step": 172,
      "train/learning_rate_real": 2.2331894447533035e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2888.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2888.0,
      "epoch": 2.356164383561644,
      "step": 172
    },
    {
      "epoch": 2.356164383561644,
      "step": 172,
      "train/ce_loss": 2.875,
      "train/diffusion_loss": 0.35470494627952576
    },
    {
      "epoch": 2.356164383561644,
      "step": 172,
      "train/learning_rate_real": 2.2331894447533035e-05
    },
    {
      "debug/num_lat_loss": 1816.0,
      "debug/num_lat_total": 2699.0,
      "debug/num_tok_loss": 1816.0,
      "debug/num_tok_total": 2699.0,
      "epoch": 2.356164383561644,
      "step": 172
    },
    {
      "epoch": 2.356164383561644,
      "step": 172,
      "train/ce_loss": 2.859375,
      "train/diffusion_loss": 0.4036940038204193
    },
    {
      "epoch": 2.356164383561644,
      "step": 172,
      "train/learning_rate_real": 2.2331894447533035e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2632.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2632.0,
      "epoch": 2.356164383561644,
      "step": 172
    },
    {
      "epoch": 2.356164383561644,
      "step": 172,
      "train/ce_loss": 2.96875,
      "train/diffusion_loss": 0.4525442123413086
    },
    {
      "epoch": 2.356164383561644,
      "step": 172,
      "train/learning_rate_real": 2.2331894447533035e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2609.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2609.0,
      "epoch": 2.3698630136986303,
      "step": 173
    },
    {
      "epoch": 2.3698630136986303,
      "step": 173,
      "train/ce_loss": 2.890625,
      "train/diffusion_loss": 0.40328508615493774
    },
    {
      "epoch": 2.3698630136986303,
      "step": 173,
      "train/learning_rate_real": 2.2297546164876292e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 2.3698630136986303,
      "step": 173
    },
    {
      "epoch": 2.3698630136986303,
      "step": 173,
      "train/ce_loss": 2.890625,
      "train/diffusion_loss": 0.3900800347328186
    },
    {
      "epoch": 2.3698630136986303,
      "step": 173,
      "train/learning_rate_real": 2.2297546164876292e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2653.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2653.0,
      "epoch": 2.3698630136986303,
      "step": 173
    },
    {
      "epoch": 2.3698630136986303,
      "step": 173,
      "train/ce_loss": 2.9375,
      "train/diffusion_loss": 0.39137205481529236
    },
    {
      "epoch": 2.3698630136986303,
      "step": 173,
      "train/learning_rate_real": 2.2297546164876292e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2645.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2645.0,
      "epoch": 2.3698630136986303,
      "step": 173
    },
    {
      "epoch": 2.3698630136986303,
      "step": 173,
      "train/ce_loss": 2.953125,
      "train/diffusion_loss": 0.4043900966644287
    },
    {
      "epoch": 2.3698630136986303,
      "step": 173,
      "train/learning_rate_real": 2.2297546164876292e-05
    },
    {
      "debug/num_lat_loss": 1756.0,
      "debug/num_lat_total": 2389.0,
      "debug/num_tok_loss": 1756.0,
      "debug/num_tok_total": 2389.0,
      "epoch": 2.383561643835616,
      "step": 174
    },
    {
      "epoch": 2.383561643835616,
      "step": 174,
      "train/ce_loss": 2.921875,
      "train/diffusion_loss": 0.45027777552604675
    },
    {
      "epoch": 2.383561643835616,
      "step": 174,
      "train/learning_rate_real": 2.2263004974398352e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2453.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2453.0,
      "epoch": 2.383561643835616,
      "step": 174
    },
    {
      "epoch": 2.383561643835616,
      "step": 174,
      "train/ce_loss": 2.828125,
      "train/diffusion_loss": 0.45082005858421326
    },
    {
      "epoch": 2.383561643835616,
      "step": 174,
      "train/learning_rate_real": 2.2263004974398352e-05
    },
    {
      "debug/num_lat_loss": 1755.0,
      "debug/num_lat_total": 2808.0,
      "debug/num_tok_loss": 1755.0,
      "debug/num_tok_total": 2808.0,
      "epoch": 2.383561643835616,
      "step": 174
    },
    {
      "epoch": 2.383561643835616,
      "step": 174,
      "train/ce_loss": 2.875,
      "train/diffusion_loss": 0.36461949348449707
    },
    {
      "epoch": 2.383561643835616,
      "step": 174,
      "train/learning_rate_real": 2.2263004974398352e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 3082.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 3082.0,
      "epoch": 2.383561643835616,
      "step": 174
    },
    {
      "epoch": 2.383561643835616,
      "step": 174,
      "train/ce_loss": 2.84375,
      "train/diffusion_loss": 0.32322970032691956
    },
    {
      "epoch": 2.383561643835616,
      "step": 174,
      "train/learning_rate_real": 2.2263004974398352e-05
    },
    {
      "debug/num_lat_loss": 1579.0,
      "debug/num_lat_total": 2219.0,
      "debug/num_tok_loss": 1579.0,
      "debug/num_tok_total": 2219.0,
      "epoch": 2.3972602739726026,
      "step": 175
    },
    {
      "epoch": 2.3972602739726026,
      "step": 175,
      "train/ce_loss": 2.921875,
      "train/diffusion_loss": 0.44200700521469116
    },
    {
      "epoch": 2.3972602739726026,
      "step": 175,
      "train/learning_rate_real": 2.2228271556194575e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2431.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2431.0,
      "epoch": 2.3972602739726026,
      "step": 175
    },
    {
      "epoch": 2.3972602739726026,
      "step": 175,
      "train/ce_loss": 3.015625,
      "train/diffusion_loss": 0.4691259562969208
    },
    {
      "epoch": 2.3972602739726026,
      "step": 175,
      "train/learning_rate_real": 2.2228271556194575e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2876.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2876.0,
      "epoch": 2.3972602739726026,
      "step": 175
    },
    {
      "epoch": 2.3972602739726026,
      "step": 175,
      "train/ce_loss": 3.375,
      "train/diffusion_loss": 0.34737053513526917
    },
    {
      "epoch": 2.3972602739726026,
      "step": 175,
      "train/learning_rate_real": 2.2228271556194575e-05
    },
    {
      "debug/num_lat_loss": 1725.0,
      "debug/num_lat_total": 2589.0,
      "debug/num_tok_loss": 1725.0,
      "debug/num_tok_total": 2589.0,
      "epoch": 2.3972602739726026,
      "step": 175
    },
    {
      "epoch": 2.3972602739726026,
      "step": 175,
      "train/ce_loss": 2.84375,
      "train/diffusion_loss": 0.43164265155792236
    },
    {
      "epoch": 2.3972602739726026,
      "step": 175,
      "train/learning_rate_real": 2.2228271556194575e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2614.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2614.0,
      "epoch": 2.410958904109589,
      "step": 176
    },
    {
      "epoch": 2.410958904109589,
      "step": 176,
      "train/ce_loss": 3.0,
      "train/diffusion_loss": 0.429606169462204
    },
    {
      "epoch": 2.410958904109589,
      "step": 176,
      "train/learning_rate_real": 2.2193346594145188e-05
    },
    {
      "debug/num_lat_loss": 1797.0,
      "debug/num_lat_total": 2658.0,
      "debug/num_tok_loss": 1797.0,
      "debug/num_tok_total": 2658.0,
      "epoch": 2.410958904109589,
      "step": 176
    },
    {
      "epoch": 2.410958904109589,
      "step": 176,
      "train/ce_loss": 2.84375,
      "train/diffusion_loss": 0.3894621431827545
    },
    {
      "epoch": 2.410958904109589,
      "step": 176,
      "train/learning_rate_real": 2.2193346594145188e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2654.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2654.0,
      "epoch": 2.410958904109589,
      "step": 176
    },
    {
      "epoch": 2.410958904109589,
      "step": 176,
      "train/ce_loss": 2.859375,
      "train/diffusion_loss": 0.4027283191680908
    },
    {
      "epoch": 2.410958904109589,
      "step": 176,
      "train/learning_rate_real": 2.2193346594145188e-05
    },
    {
      "debug/num_lat_loss": 1808.0,
      "debug/num_lat_total": 2690.0,
      "debug/num_tok_loss": 1808.0,
      "debug/num_tok_total": 2690.0,
      "epoch": 2.410958904109589,
      "step": 176
    },
    {
      "epoch": 2.410958904109589,
      "step": 176,
      "train/ce_loss": 2.6875,
      "train/diffusion_loss": 0.38133347034454346
    },
    {
      "epoch": 2.410958904109589,
      "step": 176,
      "train/learning_rate_real": 2.2193346594145188e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2837.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2837.0,
      "epoch": 2.4246575342465753,
      "step": 177
    },
    {
      "epoch": 2.4246575342465753,
      "step": 177,
      "train/ce_loss": 2.859375,
      "train/diffusion_loss": 0.3693162500858307
    },
    {
      "epoch": 2.4246575342465753,
      "step": 177,
      "train/learning_rate_real": 2.215823077590179e-05
    },
    {
      "debug/num_lat_loss": 1619.0,
      "debug/num_lat_total": 2252.0,
      "debug/num_tok_loss": 1619.0,
      "debug/num_tok_total": 2252.0,
      "epoch": 2.4246575342465753,
      "step": 177
    },
    {
      "epoch": 2.4246575342465753,
      "step": 177,
      "train/ce_loss": 2.796875,
      "train/diffusion_loss": 0.4304879307746887
    },
    {
      "epoch": 2.4246575342465753,
      "step": 177,
      "train/learning_rate_real": 2.215823077590179e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2425.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2425.0,
      "epoch": 2.4246575342465753,
      "step": 177
    },
    {
      "epoch": 2.4246575342465753,
      "step": 177,
      "train/ce_loss": 2.890625,
      "train/diffusion_loss": 0.4411391615867615
    },
    {
      "epoch": 2.4246575342465753,
      "step": 177,
      "train/learning_rate_real": 2.215823077590179e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2431.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2431.0,
      "epoch": 2.4246575342465753,
      "step": 177
    },
    {
      "epoch": 2.4246575342465753,
      "step": 177,
      "train/ce_loss": 3.15625,
      "train/diffusion_loss": 0.47264569997787476
    },
    {
      "epoch": 2.4246575342465753,
      "step": 177,
      "train/learning_rate_real": 2.215823077590179e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2395.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2395.0,
      "epoch": 2.4383561643835616,
      "step": 178
    },
    {
      "epoch": 2.4383561643835616,
      "step": 178,
      "train/ce_loss": 2.890625,
      "train/diffusion_loss": 0.453601598739624
    },
    {
      "epoch": 2.4383561643835616,
      "step": 178,
      "train/learning_rate_real": 2.2122924792873827e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 3096.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 3096.0,
      "epoch": 2.4383561643835616,
      "step": 178
    },
    {
      "epoch": 2.4383561643835616,
      "step": 178,
      "train/ce_loss": 2.9375,
      "train/diffusion_loss": 0.31741419434547424
    },
    {
      "epoch": 2.4383561643835616,
      "step": 178,
      "train/learning_rate_real": 2.2122924792873827e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 3100.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 3100.0,
      "epoch": 2.4383561643835616,
      "step": 178
    },
    {
      "epoch": 2.4383561643835616,
      "step": 178,
      "train/ce_loss": 2.78125,
      "train/diffusion_loss": 0.2818582057952881
    },
    {
      "epoch": 2.4383561643835616,
      "step": 178,
      "train/learning_rate_real": 2.2122924792873827e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2653.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2653.0,
      "epoch": 2.4383561643835616,
      "step": 178
    },
    {
      "epoch": 2.4383561643835616,
      "step": 178,
      "train/ce_loss": 2.8125,
      "train/diffusion_loss": 0.4282552897930145
    },
    {
      "epoch": 2.4383561643835616,
      "step": 178,
      "train/learning_rate_real": 2.2122924792873827e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2639.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2639.0,
      "epoch": 2.452054794520548,
      "step": 179
    },
    {
      "epoch": 2.452054794520548,
      "step": 179,
      "train/ce_loss": 2.90625,
      "train/diffusion_loss": 0.40325939655303955
    },
    {
      "epoch": 2.452054794520548,
      "step": 179,
      "train/learning_rate_real": 2.208742934021499e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2614.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2614.0,
      "epoch": 2.452054794520548,
      "step": 179
    },
    {
      "epoch": 2.452054794520548,
      "step": 179,
      "train/ce_loss": 2.84375,
      "train/diffusion_loss": 0.4070988595485687
    },
    {
      "epoch": 2.452054794520548,
      "step": 179,
      "train/learning_rate_real": 2.208742934021499e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 2.452054794520548,
      "step": 179
    },
    {
      "epoch": 2.452054794520548,
      "step": 179,
      "train/ce_loss": 2.671875,
      "train/diffusion_loss": 0.40981626510620117
    },
    {
      "epoch": 2.452054794520548,
      "step": 179,
      "train/learning_rate_real": 2.208742934021499e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2441.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2441.0,
      "epoch": 2.452054794520548,
      "step": 179
    },
    {
      "epoch": 2.452054794520548,
      "step": 179,
      "train/ce_loss": 2.9375,
      "train/diffusion_loss": 0.43094131350517273
    },
    {
      "epoch": 2.452054794520548,
      "step": 179,
      "train/learning_rate_real": 2.208742934021499e-05
    },
    {
      "epoch": 2.4657534246575343,
      "grad_norm": 1.5310542583465576,
      "learning_rate": 2.208742934021499e-05,
      "loss": 2.7061,
      "step": 180
    },
    {
      "debug/num_lat_loss": 1620.0,
      "debug/num_lat_total": 2117.0,
      "debug/num_tok_loss": 1620.0,
      "debug/num_tok_total": 2117.0,
      "epoch": 2.4657534246575343,
      "step": 180
    },
    {
      "epoch": 2.4657534246575343,
      "step": 180,
      "train/ce_loss": 2.921875,
      "train/diffusion_loss": 0.4778527617454529
    },
    {
      "epoch": 2.4657534246575343,
      "step": 180,
      "train/learning_rate_real": 2.205174511680949e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 3109.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 3109.0,
      "epoch": 2.4657534246575343,
      "step": 180
    },
    {
      "epoch": 2.4657534246575343,
      "step": 180,
      "train/ce_loss": 2.734375,
      "train/diffusion_loss": 0.3286422789096832
    },
    {
      "epoch": 2.4657534246575343,
      "step": 180,
      "train/learning_rate_real": 2.205174511680949e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2616.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2616.0,
      "epoch": 2.4657534246575343,
      "step": 180
    },
    {
      "epoch": 2.4657534246575343,
      "step": 180,
      "train/ce_loss": 2.765625,
      "train/diffusion_loss": 0.4315715730190277
    },
    {
      "epoch": 2.4657534246575343,
      "step": 180,
      "train/learning_rate_real": 2.205174511680949e-05
    },
    {
      "debug/num_lat_loss": 1606.0,
      "debug/num_lat_total": 2088.0,
      "debug/num_tok_loss": 1606.0,
      "debug/num_tok_total": 2088.0,
      "epoch": 2.4657534246575343,
      "step": 180
    },
    {
      "epoch": 2.4657534246575343,
      "step": 180,
      "train/ce_loss": 2.75,
      "train/diffusion_loss": 0.47049593925476074
    },
    {
      "epoch": 2.4657534246575343,
      "step": 180,
      "train/learning_rate_real": 2.205174511680949e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2638.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2638.0,
      "epoch": 2.4794520547945207,
      "step": 181
    },
    {
      "epoch": 2.4794520547945207,
      "step": 181,
      "train/ce_loss": 2.84375,
      "train/diffusion_loss": 0.38567018508911133
    },
    {
      "epoch": 2.4794520547945207,
      "step": 181,
      "train/learning_rate_real": 2.2015872825258333e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2402.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2402.0,
      "epoch": 2.4794520547945207,
      "step": 181
    },
    {
      "epoch": 2.4794520547945207,
      "step": 181,
      "train/ce_loss": 2.75,
      "train/diffusion_loss": 0.44906651973724365
    },
    {
      "epoch": 2.4794520547945207,
      "step": 181,
      "train/learning_rate_real": 2.2015872825258333e-05
    },
    {
      "debug/num_lat_loss": 1809.0,
      "debug/num_lat_total": 3334.0,
      "debug/num_tok_loss": 1809.0,
      "debug/num_tok_total": 3334.0,
      "epoch": 2.4794520547945207,
      "step": 181
    },
    {
      "epoch": 2.4794520547945207,
      "step": 181,
      "train/ce_loss": 2.890625,
      "train/diffusion_loss": 0.29875418543815613
    },
    {
      "epoch": 2.4794520547945207,
      "step": 181,
      "train/learning_rate_real": 2.2015872825258333e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2429.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2429.0,
      "epoch": 2.4794520547945207,
      "step": 181
    },
    {
      "epoch": 2.4794520547945207,
      "step": 181,
      "train/ce_loss": 2.75,
      "train/diffusion_loss": 0.441359281539917
    },
    {
      "epoch": 2.4794520547945207,
      "step": 181,
      "train/learning_rate_real": 2.2015872825258333e-05
    },
    {
      "debug/num_lat_loss": 1759.0,
      "debug/num_lat_total": 2816.0,
      "debug/num_tok_loss": 1759.0,
      "debug/num_tok_total": 2816.0,
      "epoch": 2.493150684931507,
      "step": 182
    },
    {
      "epoch": 2.493150684931507,
      "step": 182,
      "train/ce_loss": 2.75,
      "train/diffusion_loss": 0.3667384386062622
    },
    {
      "epoch": 2.493150684931507,
      "step": 182,
      "train/learning_rate_real": 2.1979813171865483e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2450.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2450.0,
      "epoch": 2.493150684931507,
      "step": 182
    },
    {
      "epoch": 2.493150684931507,
      "step": 182,
      "train/ce_loss": 3.0,
      "train/diffusion_loss": 0.4418801963329315
    },
    {
      "epoch": 2.493150684931507,
      "step": 182,
      "train/learning_rate_real": 2.1979813171865483e-05
    },
    {
      "debug/num_lat_loss": 1727.0,
      "debug/num_lat_total": 2594.0,
      "debug/num_tok_loss": 1727.0,
      "debug/num_tok_total": 2594.0,
      "epoch": 2.493150684931507,
      "step": 182
    },
    {
      "epoch": 2.493150684931507,
      "step": 182,
      "train/ce_loss": 2.8125,
      "train/diffusion_loss": 0.444356769323349
    },
    {
      "epoch": 2.493150684931507,
      "step": 182,
      "train/learning_rate_real": 2.1979813171865483e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2457.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2457.0,
      "epoch": 2.493150684931507,
      "step": 182
    },
    {
      "epoch": 2.493150684931507,
      "step": 182,
      "train/ce_loss": 3.015625,
      "train/diffusion_loss": 0.4595908522605896
    },
    {
      "epoch": 2.493150684931507,
      "step": 182,
      "train/learning_rate_real": 2.1979813171865483e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2448.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2448.0,
      "epoch": 2.506849315068493,
      "step": 183
    },
    {
      "epoch": 2.506849315068493,
      "step": 183,
      "train/ce_loss": 2.78125,
      "train/diffusion_loss": 0.43447527289390564
    },
    {
      "epoch": 2.506849315068493,
      "step": 183,
      "train/learning_rate_real": 2.1943566866623926e-05
    },
    {
      "debug/num_lat_loss": 1668.0,
      "debug/num_lat_total": 2625.0,
      "debug/num_tok_loss": 1668.0,
      "debug/num_tok_total": 2625.0,
      "epoch": 2.506849315068493,
      "step": 183
    },
    {
      "epoch": 2.506849315068493,
      "step": 183,
      "train/ce_loss": 2.703125,
      "train/diffusion_loss": 0.3590381145477295
    },
    {
      "epoch": 2.506849315068493,
      "step": 183,
      "train/learning_rate_real": 2.1943566866623926e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 1989.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 1989.0,
      "epoch": 2.506849315068493,
      "step": 183
    },
    {
      "epoch": 2.506849315068493,
      "step": 183,
      "train/ce_loss": 2.703125,
      "train/diffusion_loss": 0.5137954354286194
    },
    {
      "epoch": 2.506849315068493,
      "step": 183,
      "train/learning_rate_real": 2.1943566866623926e-05
    },
    {
      "debug/num_lat_loss": 1760.0,
      "debug/num_lat_total": 2186.0,
      "debug/num_tok_loss": 1760.0,
      "debug/num_tok_total": 2186.0,
      "epoch": 2.506849315068493,
      "step": 183
    },
    {
      "epoch": 2.506849315068493,
      "step": 183,
      "train/ce_loss": 2.765625,
      "train/diffusion_loss": 0.4624958336353302
    },
    {
      "epoch": 2.506849315068493,
      "step": 183,
      "train/learning_rate_real": 2.1943566866623926e-05
    },
    {
      "debug/num_lat_loss": 1720.0,
      "debug/num_lat_total": 2318.0,
      "debug/num_tok_loss": 1720.0,
      "debug/num_tok_total": 2318.0,
      "epoch": 2.5205479452054793,
      "step": 184
    },
    {
      "epoch": 2.5205479452054793,
      "step": 184,
      "train/ce_loss": 2.78125,
      "train/diffusion_loss": 0.4255053699016571
    },
    {
      "epoch": 2.5205479452054793,
      "step": 184,
      "train/learning_rate_real": 2.190713462320173e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2850.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2850.0,
      "epoch": 2.5205479452054793,
      "step": 184
    },
    {
      "epoch": 2.5205479452054793,
      "step": 184,
      "train/ce_loss": 2.953125,
      "train/diffusion_loss": 0.34381103515625
    },
    {
      "epoch": 2.5205479452054793,
      "step": 184,
      "train/learning_rate_real": 2.190713462320173e-05
    },
    {
      "debug/num_lat_loss": 1647.0,
      "debug/num_lat_total": 2360.0,
      "debug/num_tok_loss": 1647.0,
      "debug/num_tok_total": 2360.0,
      "epoch": 2.5205479452054793,
      "step": 184
    },
    {
      "epoch": 2.5205479452054793,
      "step": 184,
      "train/ce_loss": 2.703125,
      "train/diffusion_loss": 0.4182201325893402
    },
    {
      "epoch": 2.5205479452054793,
      "step": 184,
      "train/learning_rate_real": 2.190713462320173e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2648.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2648.0,
      "epoch": 2.5205479452054793,
      "step": 184
    },
    {
      "epoch": 2.5205479452054793,
      "step": 184,
      "train/ce_loss": 2.71875,
      "train/diffusion_loss": 0.39211803674697876
    },
    {
      "epoch": 2.5205479452054793,
      "step": 184,
      "train/learning_rate_real": 2.190713462320173e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2405.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2405.0,
      "epoch": 2.5342465753424657,
      "step": 185
    },
    {
      "epoch": 2.5342465753424657,
      "step": 185,
      "train/ce_loss": 2.765625,
      "train/diffusion_loss": 0.4424284100532532
    },
    {
      "epoch": 2.5342465753424657,
      "step": 185,
      "train/learning_rate_real": 2.187051715892797e-05
    },
    {
      "debug/num_lat_loss": 1797.0,
      "debug/num_lat_total": 2670.0,
      "debug/num_tok_loss": 1797.0,
      "debug/num_tok_total": 2670.0,
      "epoch": 2.5342465753424657,
      "step": 185
    },
    {
      "epoch": 2.5342465753424657,
      "step": 185,
      "train/ce_loss": 2.640625,
      "train/diffusion_loss": 0.3992789387702942
    },
    {
      "epoch": 2.5342465753424657,
      "step": 185,
      "train/learning_rate_real": 2.187051715892797e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2434.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2434.0,
      "epoch": 2.5342465753424657,
      "step": 185
    },
    {
      "epoch": 2.5342465753424657,
      "step": 185,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.43806523084640503
    },
    {
      "epoch": 2.5342465753424657,
      "step": 185,
      "train/learning_rate_real": 2.187051715892797e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2858.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2858.0,
      "epoch": 2.5342465753424657,
      "step": 185
    },
    {
      "epoch": 2.5342465753424657,
      "step": 185,
      "train/ce_loss": 2.671875,
      "train/diffusion_loss": 0.3659980595111847
    },
    {
      "epoch": 2.5342465753424657,
      "step": 185,
      "train/learning_rate_real": 2.187051715892797e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2647.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2647.0,
      "epoch": 2.547945205479452,
      "step": 186
    },
    {
      "epoch": 2.547945205479452,
      "step": 186,
      "train/ce_loss": 2.671875,
      "train/diffusion_loss": 0.3946780860424042
    },
    {
      "epoch": 2.547945205479452,
      "step": 186,
      "train/learning_rate_real": 2.18337151947786e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2853.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2853.0,
      "epoch": 2.547945205479452,
      "step": 186
    },
    {
      "epoch": 2.547945205479452,
      "step": 186,
      "train/ce_loss": 2.703125,
      "train/diffusion_loss": 0.3429841101169586
    },
    {
      "epoch": 2.547945205479452,
      "step": 186,
      "train/learning_rate_real": 2.18337151947786e-05
    },
    {
      "debug/num_lat_loss": 1579.0,
      "debug/num_lat_total": 2101.0,
      "debug/num_tok_loss": 1579.0,
      "debug/num_tok_total": 2101.0,
      "epoch": 2.547945205479452,
      "step": 186
    },
    {
      "epoch": 2.547945205479452,
      "step": 186,
      "train/ce_loss": 2.671875,
      "train/diffusion_loss": 0.4572349488735199
    },
    {
      "epoch": 2.547945205479452,
      "step": 186,
      "train/learning_rate_real": 2.18337151947786e-05
    },
    {
      "debug/num_lat_loss": 1750.0,
      "debug/num_lat_total": 2377.0,
      "debug/num_tok_loss": 1750.0,
      "debug/num_tok_total": 2377.0,
      "epoch": 2.547945205479452,
      "step": 186
    },
    {
      "epoch": 2.547945205479452,
      "step": 186,
      "train/ce_loss": 2.71875,
      "train/diffusion_loss": 0.4656928777694702
    },
    {
      "epoch": 2.547945205479452,
      "step": 186,
      "train/learning_rate_real": 2.18337151947786e-05
    },
    {
      "debug/num_lat_loss": 1812.0,
      "debug/num_lat_total": 2465.0,
      "debug/num_tok_loss": 1812.0,
      "debug/num_tok_total": 2465.0,
      "epoch": 2.5616438356164384,
      "step": 187
    },
    {
      "epoch": 2.5616438356164384,
      "step": 187,
      "train/ce_loss": 2.71875,
      "train/diffusion_loss": 0.46677687764167786
    },
    {
      "epoch": 2.5616438356164384,
      "step": 187,
      "train/learning_rate_real": 2.179672945536227e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 3053.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 3053.0,
      "epoch": 2.5616438356164384,
      "step": 187
    },
    {
      "epoch": 2.5616438356164384,
      "step": 187,
      "train/ce_loss": 2.71875,
      "train/diffusion_loss": 0.32807257771492004
    },
    {
      "epoch": 2.5616438356164384,
      "step": 187,
      "train/learning_rate_real": 2.179672945536227e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2667.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2667.0,
      "epoch": 2.5616438356164384,
      "step": 187
    },
    {
      "epoch": 2.5616438356164384,
      "step": 187,
      "train/ce_loss": 2.734375,
      "train/diffusion_loss": 0.3864240050315857
    },
    {
      "epoch": 2.5616438356164384,
      "step": 187,
      "train/learning_rate_real": 2.179672945536227e-05
    },
    {
      "debug/num_lat_loss": 1818.0,
      "debug/num_lat_total": 2261.0,
      "debug/num_tok_loss": 1818.0,
      "debug/num_tok_total": 2261.0,
      "epoch": 2.5616438356164384,
      "step": 187
    },
    {
      "epoch": 2.5616438356164384,
      "step": 187,
      "train/ce_loss": 2.65625,
      "train/diffusion_loss": 0.5067983865737915
    },
    {
      "epoch": 2.5616438356164384,
      "step": 187,
      "train/learning_rate_real": 2.179672945536227e-05
    },
    {
      "debug/num_lat_loss": 1743.0,
      "debug/num_lat_total": 2997.0,
      "debug/num_tok_loss": 1743.0,
      "debug/num_tok_total": 2997.0,
      "epoch": 2.5753424657534247,
      "step": 188
    },
    {
      "epoch": 2.5753424657534247,
      "step": 188,
      "train/ce_loss": 2.671875,
      "train/diffusion_loss": 0.3241000175476074
    },
    {
      "epoch": 2.5753424657534247,
      "step": 188,
      "train/learning_rate_real": 2.1759560668906064e-05
    },
    {
      "debug/num_lat_loss": 1668.0,
      "debug/num_lat_total": 2853.0,
      "debug/num_tok_loss": 1668.0,
      "debug/num_tok_total": 2853.0,
      "epoch": 2.5753424657534247,
      "step": 188
    },
    {
      "epoch": 2.5753424657534247,
      "step": 188,
      "train/ce_loss": 2.625,
      "train/diffusion_loss": 0.28809213638305664
    },
    {
      "epoch": 2.5753424657534247,
      "step": 188,
      "train/learning_rate_real": 2.1759560668906064e-05
    },
    {
      "debug/num_lat_loss": 1701.0,
      "debug/num_lat_total": 2237.0,
      "debug/num_tok_loss": 1701.0,
      "debug/num_tok_total": 2237.0,
      "epoch": 2.5753424657534247,
      "step": 188
    },
    {
      "epoch": 2.5753424657534247,
      "step": 188,
      "train/ce_loss": 2.59375,
      "train/diffusion_loss": 0.45437008142471313
    },
    {
      "epoch": 2.5753424657534247,
      "step": 188,
      "train/learning_rate_real": 2.1759560668906064e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2898.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2898.0,
      "epoch": 2.5753424657534247,
      "step": 188
    },
    {
      "epoch": 2.5753424657534247,
      "step": 188,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.3584519326686859
    },
    {
      "epoch": 2.5753424657534247,
      "step": 188,
      "train/learning_rate_real": 2.1759560668906064e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 2.589041095890411,
      "step": 189
    },
    {
      "epoch": 2.589041095890411,
      "step": 189,
      "train/ce_loss": 2.484375,
      "train/diffusion_loss": 0.4072858393192291
    },
    {
      "epoch": 2.589041095890411,
      "step": 189,
      "train/learning_rate_real": 2.172220956724114e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2878.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2878.0,
      "epoch": 2.589041095890411,
      "step": 189
    },
    {
      "epoch": 2.589041095890411,
      "step": 189,
      "train/ce_loss": 2.703125,
      "train/diffusion_loss": 0.3507998287677765
    },
    {
      "epoch": 2.589041095890411,
      "step": 189,
      "train/learning_rate_real": 2.172220956724114e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2216.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2216.0,
      "epoch": 2.589041095890411,
      "step": 189
    },
    {
      "epoch": 2.589041095890411,
      "step": 189,
      "train/ce_loss": 2.59375,
      "train/diffusion_loss": 0.4703916013240814
    },
    {
      "epoch": 2.589041095890411,
      "step": 189,
      "train/learning_rate_real": 2.172220956724114e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2653.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2653.0,
      "epoch": 2.589041095890411,
      "step": 189
    },
    {
      "epoch": 2.589041095890411,
      "step": 189,
      "train/ce_loss": 2.65625,
      "train/diffusion_loss": 0.40979763865470886
    },
    {
      "epoch": 2.589041095890411,
      "step": 189,
      "train/learning_rate_real": 2.172220956724114e-05
    },
    {
      "epoch": 2.602739726027397,
      "grad_norm": 1.401570200920105,
      "learning_rate": 2.172220956724114e-05,
      "loss": 2.733,
      "step": 190
    },
    {
      "debug/num_lat_loss": 1683.0,
      "debug/num_lat_total": 2553.0,
      "debug/num_tok_loss": 1683.0,
      "debug/num_tok_total": 2553.0,
      "epoch": 2.602739726027397,
      "step": 190
    },
    {
      "epoch": 2.602739726027397,
      "step": 190,
      "train/ce_loss": 2.578125,
      "train/diffusion_loss": 0.41486406326293945
    },
    {
      "epoch": 2.602739726027397,
      "step": 190,
      "train/learning_rate_real": 2.168467688578834e-05
    },
    {
      "debug/num_lat_loss": 1746.0,
      "debug/num_lat_total": 2580.0,
      "debug/num_tok_loss": 1746.0,
      "debug/num_tok_total": 2580.0,
      "epoch": 2.602739726027397,
      "step": 190
    },
    {
      "epoch": 2.602739726027397,
      "step": 190,
      "train/ce_loss": 2.75,
      "train/diffusion_loss": 0.40670496225357056
    },
    {
      "epoch": 2.602739726027397,
      "step": 190,
      "train/learning_rate_real": 2.168467688578834e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2869.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2869.0,
      "epoch": 2.602739726027397,
      "step": 190
    },
    {
      "epoch": 2.602739726027397,
      "step": 190,
      "train/ce_loss": 2.75,
      "train/diffusion_loss": 0.33270514011383057
    },
    {
      "epoch": 2.602739726027397,
      "step": 190,
      "train/learning_rate_real": 2.168467688578834e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2411.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2411.0,
      "epoch": 2.602739726027397,
      "step": 190
    },
    {
      "epoch": 2.602739726027397,
      "step": 190,
      "train/ce_loss": 2.59375,
      "train/diffusion_loss": 0.45577895641326904
    },
    {
      "epoch": 2.602739726027397,
      "step": 190,
      "train/learning_rate_real": 2.168467688578834e-05
    },
    {
      "debug/num_lat_loss": 1811.0,
      "debug/num_lat_total": 2683.0,
      "debug/num_tok_loss": 1811.0,
      "debug/num_tok_total": 2683.0,
      "epoch": 2.616438356164384,
      "step": 191
    },
    {
      "epoch": 2.616438356164384,
      "step": 191,
      "train/ce_loss": 2.75,
      "train/diffusion_loss": 0.37813183665275574
    },
    {
      "epoch": 2.616438356164384,
      "step": 191,
      "train/learning_rate_real": 2.164696336354371e-05
    },
    {
      "debug/num_lat_loss": 1640.0,
      "debug/num_lat_total": 1947.0,
      "debug/num_tok_loss": 1640.0,
      "debug/num_tok_total": 1947.0,
      "epoch": 2.616438356164384,
      "step": 191
    },
    {
      "epoch": 2.616438356164384,
      "step": 191,
      "train/ce_loss": 2.578125,
      "train/diffusion_loss": 0.49075964093208313
    },
    {
      "epoch": 2.616438356164384,
      "step": 191,
      "train/learning_rate_real": 2.164696336354371e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2454.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2454.0,
      "epoch": 2.616438356164384,
      "step": 191
    },
    {
      "epoch": 2.616438356164384,
      "step": 191,
      "train/ce_loss": 2.625,
      "train/diffusion_loss": 0.4536711573600769
    },
    {
      "epoch": 2.616438356164384,
      "step": 191,
      "train/learning_rate_real": 2.164696336354371e-05
    },
    {
      "debug/num_lat_loss": 1808.0,
      "debug/num_lat_total": 2471.0,
      "debug/num_tok_loss": 1808.0,
      "debug/num_tok_total": 2471.0,
      "epoch": 2.616438356164384,
      "step": 191
    },
    {
      "epoch": 2.616438356164384,
      "step": 191,
      "train/ce_loss": 2.734375,
      "train/diffusion_loss": 0.4252324104309082
    },
    {
      "epoch": 2.616438356164384,
      "step": 191,
      "train/learning_rate_real": 2.164696336354371e-05
    },
    {
      "debug/num_lat_loss": 1753.0,
      "debug/num_lat_total": 2596.0,
      "debug/num_tok_loss": 1753.0,
      "debug/num_tok_total": 2596.0,
      "epoch": 2.6301369863013697,
      "step": 192
    },
    {
      "epoch": 2.6301369863013697,
      "step": 192,
      "train/ce_loss": 2.5,
      "train/diffusion_loss": 0.4330429434776306
    },
    {
      "epoch": 2.6301369863013697,
      "step": 192,
      "train/learning_rate_real": 2.1609069743063927e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2177.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2177.0,
      "epoch": 2.6301369863013697,
      "step": 192
    },
    {
      "epoch": 2.6301369863013697,
      "step": 192,
      "train/ce_loss": 2.640625,
      "train/diffusion_loss": 0.4865707755088806
    },
    {
      "epoch": 2.6301369863013697,
      "step": 192,
      "train/learning_rate_real": 2.1609069743063927e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2853.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2853.0,
      "epoch": 2.6301369863013697,
      "step": 192
    },
    {
      "epoch": 2.6301369863013697,
      "step": 192,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.3472568392753601
    },
    {
      "epoch": 2.6301369863013697,
      "step": 192,
      "train/learning_rate_real": 2.1609069743063927e-05
    },
    {
      "debug/num_lat_loss": 1761.0,
      "debug/num_lat_total": 2822.0,
      "debug/num_tok_loss": 1761.0,
      "debug/num_tok_total": 2822.0,
      "epoch": 2.6301369863013697,
      "step": 192
    },
    {
      "epoch": 2.6301369863013697,
      "step": 192,
      "train/ce_loss": 2.5,
      "train/diffusion_loss": 0.3912636339664459
    },
    {
      "epoch": 2.6301369863013697,
      "step": 192,
      "train/learning_rate_real": 2.1609069743063927e-05
    },
    {
      "debug/num_lat_loss": 1662.0,
      "debug/num_lat_total": 2314.0,
      "debug/num_tok_loss": 1662.0,
      "debug/num_tok_total": 2314.0,
      "epoch": 2.643835616438356,
      "step": 193
    },
    {
      "epoch": 2.643835616438356,
      "step": 193,
      "train/ce_loss": 2.765625,
      "train/diffusion_loss": 0.4403693377971649
    },
    {
      "epoch": 2.643835616438356,
      "step": 193,
      "train/learning_rate_real": 2.1570996770451702e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 1999.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 1999.0,
      "epoch": 2.643835616438356,
      "step": 193
    },
    {
      "epoch": 2.643835616438356,
      "step": 193,
      "train/ce_loss": 2.609375,
      "train/diffusion_loss": 0.517785370349884
    },
    {
      "epoch": 2.643835616438356,
      "step": 193,
      "train/learning_rate_real": 2.1570996770451702e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2614.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2614.0,
      "epoch": 2.643835616438356,
      "step": 193
    },
    {
      "epoch": 2.643835616438356,
      "step": 193,
      "train/ce_loss": 2.75,
      "train/diffusion_loss": 0.3927980065345764
    },
    {
      "epoch": 2.643835616438356,
      "step": 193,
      "train/learning_rate_real": 2.1570996770451702e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2649.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2649.0,
      "epoch": 2.643835616438356,
      "step": 193
    },
    {
      "epoch": 2.643835616438356,
      "step": 193,
      "train/ce_loss": 2.546875,
      "train/diffusion_loss": 0.41709792613983154
    },
    {
      "epoch": 2.643835616438356,
      "step": 193,
      "train/learning_rate_real": 2.1570996770451702e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2651.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2651.0,
      "epoch": 2.6575342465753424,
      "step": 194
    },
    {
      "epoch": 2.6575342465753424,
      "step": 194,
      "train/ce_loss": 2.65625,
      "train/diffusion_loss": 0.41924428939819336
    },
    {
      "epoch": 2.6575342465753424,
      "step": 194,
      "train/learning_rate_real": 2.1532745195341093e-05
    },
    {
      "debug/num_lat_loss": 1658.0,
      "debug/num_lat_total": 2288.0,
      "debug/num_tok_loss": 1658.0,
      "debug/num_tok_total": 2288.0,
      "epoch": 2.6575342465753424,
      "step": 194
    },
    {
      "epoch": 2.6575342465753424,
      "step": 194,
      "train/ce_loss": 2.640625,
      "train/diffusion_loss": 0.44974976778030396
    },
    {
      "epoch": 2.6575342465753424,
      "step": 194,
      "train/learning_rate_real": 2.1532745195341093e-05
    },
    {
      "debug/num_lat_loss": 1724.0,
      "debug/num_lat_total": 2803.0,
      "debug/num_tok_loss": 1724.0,
      "debug/num_tok_total": 2803.0,
      "epoch": 2.6575342465753424,
      "step": 194
    },
    {
      "epoch": 2.6575342465753424,
      "step": 194,
      "train/ce_loss": 2.796875,
      "train/diffusion_loss": 0.3286336064338684
    },
    {
      "epoch": 2.6575342465753424,
      "step": 194,
      "train/learning_rate_real": 2.1532745195341093e-05
    },
    {
      "debug/num_lat_loss": 1810.0,
      "debug/num_lat_total": 2682.0,
      "debug/num_tok_loss": 1810.0,
      "debug/num_tok_total": 2682.0,
      "epoch": 2.6575342465753424,
      "step": 194
    },
    {
      "epoch": 2.6575342465753424,
      "step": 194,
      "train/ce_loss": 2.765625,
      "train/diffusion_loss": 0.4254552721977234
    },
    {
      "epoch": 2.6575342465753424,
      "step": 194,
      "train/learning_rate_real": 2.1532745195341093e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2440.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2440.0,
      "epoch": 2.671232876712329,
      "step": 195
    },
    {
      "epoch": 2.671232876712329,
      "step": 195,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.4401073455810547
    },
    {
      "epoch": 2.671232876712329,
      "step": 195,
      "train/learning_rate_real": 2.1494315770882713e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2192.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2192.0,
      "epoch": 2.671232876712329,
      "step": 195
    },
    {
      "epoch": 2.671232876712329,
      "step": 195,
      "train/ce_loss": 2.78125,
      "train/diffusion_loss": 0.5180473923683167
    },
    {
      "epoch": 2.671232876712329,
      "step": 195,
      "train/learning_rate_real": 2.1494315770882713e-05
    },
    {
      "debug/num_lat_loss": 1644.0,
      "debug/num_lat_total": 2282.0,
      "debug/num_tok_loss": 1644.0,
      "debug/num_tok_total": 2282.0,
      "epoch": 2.671232876712329,
      "step": 195
    },
    {
      "epoch": 2.671232876712329,
      "step": 195,
      "train/ce_loss": 2.609375,
      "train/diffusion_loss": 0.4051307737827301
    },
    {
      "epoch": 2.671232876712329,
      "step": 195,
      "train/learning_rate_real": 2.1494315770882713e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 3111.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 3111.0,
      "epoch": 2.671232876712329,
      "step": 195
    },
    {
      "epoch": 2.671232876712329,
      "step": 195,
      "train/ce_loss": 2.703125,
      "train/diffusion_loss": 0.3186243176460266
    },
    {
      "epoch": 2.671232876712329,
      "step": 195,
      "train/learning_rate_real": 2.1494315770882713e-05
    },
    {
      "debug/num_lat_loss": 1417.0,
      "debug/num_lat_total": 2097.0,
      "debug/num_tok_loss": 1417.0,
      "debug/num_tok_total": 2097.0,
      "epoch": 2.684931506849315,
      "step": 196
    },
    {
      "epoch": 2.684931506849315,
      "step": 196,
      "train/ce_loss": 2.65625,
      "train/diffusion_loss": 0.4389093816280365
    },
    {
      "epoch": 2.684931506849315,
      "step": 196,
      "train/learning_rate_real": 2.145570925372894e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2651.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2651.0,
      "epoch": 2.684931506849315,
      "step": 196
    },
    {
      "epoch": 2.684931506849315,
      "step": 196,
      "train/ce_loss": 2.609375,
      "train/diffusion_loss": 0.3858262300491333
    },
    {
      "epoch": 2.684931506849315,
      "step": 196,
      "train/learning_rate_real": 2.145570925372894e-05
    },
    {
      "debug/num_lat_loss": 1810.0,
      "debug/num_lat_total": 2682.0,
      "debug/num_tok_loss": 1810.0,
      "debug/num_tok_total": 2682.0,
      "epoch": 2.684931506849315,
      "step": 196
    },
    {
      "epoch": 2.684931506849315,
      "step": 196,
      "train/ce_loss": 2.875,
      "train/diffusion_loss": 0.40185388922691345
    },
    {
      "epoch": 2.684931506849315,
      "step": 196,
      "train/learning_rate_real": 2.145570925372894e-05
    },
    {
      "debug/num_lat_loss": 1426.0,
      "debug/num_lat_total": 1666.0,
      "debug/num_tok_loss": 1426.0,
      "debug/num_tok_total": 1666.0,
      "epoch": 2.684931506849315,
      "step": 196
    },
    {
      "epoch": 2.684931506849315,
      "step": 196,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.4992228150367737
    },
    {
      "epoch": 2.684931506849315,
      "step": 196,
      "train/learning_rate_real": 2.145570925372894e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2659.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2659.0,
      "epoch": 2.6986301369863015,
      "step": 197
    },
    {
      "epoch": 2.6986301369863015,
      "step": 197,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.37056055665016174
    },
    {
      "epoch": 2.6986301369863015,
      "step": 197,
      "train/learning_rate_real": 2.1416926404018994e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2850.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2850.0,
      "epoch": 2.6986301369863015,
      "step": 197
    },
    {
      "epoch": 2.6986301369863015,
      "step": 197,
      "train/ce_loss": 2.578125,
      "train/diffusion_loss": 0.3587437570095062
    },
    {
      "epoch": 2.6986301369863015,
      "step": 197,
      "train/learning_rate_real": 2.1416926404018994e-05
    },
    {
      "debug/num_lat_loss": 1743.0,
      "debug/num_lat_total": 3000.0,
      "debug/num_tok_loss": 1743.0,
      "debug/num_tok_total": 3000.0,
      "epoch": 2.6986301369863015,
      "step": 197
    },
    {
      "epoch": 2.6986301369863015,
      "step": 197,
      "train/ce_loss": 2.546875,
      "train/diffusion_loss": 0.31014642119407654
    },
    {
      "epoch": 2.6986301369863015,
      "step": 197,
      "train/learning_rate_real": 2.1416926404018994e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2639.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2639.0,
      "epoch": 2.6986301369863015,
      "step": 197
    },
    {
      "epoch": 2.6986301369863015,
      "step": 197,
      "train/ce_loss": 2.65625,
      "train/diffusion_loss": 0.41646116971969604
    },
    {
      "epoch": 2.6986301369863015,
      "step": 197,
      "train/learning_rate_real": 2.1416926404018994e-05
    },
    {
      "debug/num_lat_loss": 1808.0,
      "debug/num_lat_total": 2249.0,
      "debug/num_tok_loss": 1808.0,
      "debug/num_tok_total": 2249.0,
      "epoch": 2.712328767123288,
      "step": 198
    },
    {
      "epoch": 2.712328767123288,
      "step": 198,
      "train/ce_loss": 2.578125,
      "train/diffusion_loss": 0.5441051721572876
    },
    {
      "epoch": 2.712328767123288,
      "step": 198,
      "train/learning_rate_real": 2.137796798536398e-05
    },
    {
      "debug/num_lat_loss": 1728.0,
      "debug/num_lat_total": 2345.0,
      "debug/num_tok_loss": 1728.0,
      "debug/num_tok_total": 2345.0,
      "epoch": 2.712328767123288,
      "step": 198
    },
    {
      "epoch": 2.712328767123288,
      "step": 198,
      "train/ce_loss": 2.484375,
      "train/diffusion_loss": 0.4259048104286194
    },
    {
      "epoch": 2.712328767123288,
      "step": 198,
      "train/learning_rate_real": 2.137796798536398e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2249.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2249.0,
      "epoch": 2.712328767123288,
      "step": 198
    },
    {
      "epoch": 2.712328767123288,
      "step": 198,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.5017729997634888
    },
    {
      "epoch": 2.712328767123288,
      "step": 198,
      "train/learning_rate_real": 2.137796798536398e-05
    },
    {
      "debug/num_lat_loss": 1589.0,
      "debug/num_lat_total": 2480.0,
      "debug/num_tok_loss": 1589.0,
      "debug/num_tok_total": 2480.0,
      "epoch": 2.712328767123288,
      "step": 198
    },
    {
      "epoch": 2.712328767123288,
      "step": 198,
      "train/ce_loss": 2.90625,
      "train/diffusion_loss": 0.3666616976261139
    },
    {
      "epoch": 2.712328767123288,
      "step": 198,
      "train/learning_rate_real": 2.137796798536398e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2620.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2620.0,
      "epoch": 2.7260273972602738,
      "step": 199
    },
    {
      "epoch": 2.7260273972602738,
      "step": 199,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.4006059169769287
    },
    {
      "epoch": 2.7260273972602738,
      "step": 199,
      "train/learning_rate_real": 2.1338834764831845e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 3061.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 3061.0,
      "epoch": 2.7260273972602738,
      "step": 199
    },
    {
      "epoch": 2.7260273972602738,
      "step": 199,
      "train/ce_loss": 2.59375,
      "train/diffusion_loss": 0.322833776473999
    },
    {
      "epoch": 2.7260273972602738,
      "step": 199,
      "train/learning_rate_real": 2.1338834764831845e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2646.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2646.0,
      "epoch": 2.7260273972602738,
      "step": 199
    },
    {
      "epoch": 2.7260273972602738,
      "step": 199,
      "train/ce_loss": 2.671875,
      "train/diffusion_loss": 0.4167260527610779
    },
    {
      "epoch": 2.7260273972602738,
      "step": 199,
      "train/learning_rate_real": 2.1338834764831845e-05
    },
    {
      "debug/num_lat_loss": 1809.0,
      "debug/num_lat_total": 2461.0,
      "debug/num_tok_loss": 1809.0,
      "debug/num_tok_total": 2461.0,
      "epoch": 2.7260273972602738,
      "step": 199
    },
    {
      "epoch": 2.7260273972602738,
      "step": 199,
      "train/ce_loss": 2.390625,
      "train/diffusion_loss": 0.40909343957901
    },
    {
      "epoch": 2.7260273972602738,
      "step": 199,
      "train/learning_rate_real": 2.1338834764831845e-05
    },
    {
      "epoch": 2.73972602739726,
      "grad_norm": 1.5308177471160889,
      "learning_rate": 2.1338834764831845e-05,
      "loss": 2.7536,
      "step": 200
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2228.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2228.0,
      "epoch": 2.73972602739726,
      "step": 200
    },
    {
      "epoch": 2.73972602739726,
      "step": 200,
      "train/ce_loss": 2.546875,
      "train/diffusion_loss": 0.5127332806587219
    },
    {
      "epoch": 2.73972602739726,
      "step": 200,
      "train/learning_rate_real": 2.129952751293229e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 3063.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 3063.0,
      "epoch": 2.73972602739726,
      "step": 200
    },
    {
      "epoch": 2.73972602739726,
      "step": 200,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.3455106317996979
    },
    {
      "epoch": 2.73972602739726,
      "step": 200,
      "train/learning_rate_real": 2.129952751293229e-05
    },
    {
      "debug/num_lat_loss": 1611.0,
      "debug/num_lat_total": 2945.0,
      "debug/num_tok_loss": 1611.0,
      "debug/num_tok_total": 2945.0,
      "epoch": 2.73972602739726,
      "step": 200
    },
    {
      "epoch": 2.73972602739726,
      "step": 200,
      "train/ce_loss": 2.71875,
      "train/diffusion_loss": 0.3095567524433136
    },
    {
      "epoch": 2.73972602739726,
      "step": 200,
      "train/learning_rate_real": 2.129952751293229e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2855.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2855.0,
      "epoch": 2.73972602739726,
      "step": 200
    },
    {
      "epoch": 2.73972602739726,
      "step": 200,
      "train/ce_loss": 2.609375,
      "train/diffusion_loss": 0.3999537229537964
    },
    {
      "epoch": 2.73972602739726,
      "step": 200,
      "train/learning_rate_real": 2.129952751293229e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2412.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2412.0,
      "epoch": 2.7534246575342465,
      "step": 201
    },
    {
      "epoch": 2.7534246575342465,
      "step": 201,
      "train/ce_loss": 2.515625,
      "train/diffusion_loss": 0.46536168456077576
    },
    {
      "epoch": 2.7534246575342465,
      "step": 201,
      "train/learning_rate_real": 2.1260047003601576e-05
    },
    {
      "debug/num_lat_loss": 1718.0,
      "debug/num_lat_total": 2739.0,
      "debug/num_tok_loss": 1718.0,
      "debug/num_tok_total": 2739.0,
      "epoch": 2.7534246575342465,
      "step": 201
    },
    {
      "epoch": 2.7534246575342465,
      "step": 201,
      "train/ce_loss": 2.5625,
      "train/diffusion_loss": 0.39696988463401794
    },
    {
      "epoch": 2.7534246575342465,
      "step": 201,
      "train/learning_rate_real": 2.1260047003601576e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 3074.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 3074.0,
      "epoch": 2.7534246575342465,
      "step": 201
    },
    {
      "epoch": 2.7534246575342465,
      "step": 201,
      "train/ce_loss": 2.421875,
      "train/diffusion_loss": 0.3201371431350708
    },
    {
      "epoch": 2.7534246575342465,
      "step": 201,
      "train/learning_rate_real": 2.1260047003601576e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 3307.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 3307.0,
      "epoch": 2.7534246575342465,
      "step": 201
    },
    {
      "epoch": 2.7534246575342465,
      "step": 201,
      "train/ce_loss": 2.671875,
      "train/diffusion_loss": 0.3175411522388458
    },
    {
      "epoch": 2.7534246575342465,
      "step": 201,
      "train/learning_rate_real": 2.1260047003601576e-05
    },
    {
      "debug/num_lat_loss": 1746.0,
      "debug/num_lat_total": 2385.0,
      "debug/num_tok_loss": 1746.0,
      "debug/num_tok_total": 2385.0,
      "epoch": 2.767123287671233,
      "step": 202
    },
    {
      "epoch": 2.767123287671233,
      "step": 202,
      "train/ce_loss": 2.5,
      "train/diffusion_loss": 0.4627111554145813
    },
    {
      "epoch": 2.767123287671233,
      "step": 202,
      "train/learning_rate_real": 2.1220394014187312e-05
    },
    {
      "debug/num_lat_loss": 1813.0,
      "debug/num_lat_total": 2919.0,
      "debug/num_tok_loss": 1813.0,
      "debug/num_tok_total": 2919.0,
      "epoch": 2.767123287671233,
      "step": 202
    },
    {
      "epoch": 2.767123287671233,
      "step": 202,
      "train/ce_loss": 2.5625,
      "train/diffusion_loss": 0.3691011071205139
    },
    {
      "epoch": 2.767123287671233,
      "step": 202,
      "train/learning_rate_real": 2.1220394014187312e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2869.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2869.0,
      "epoch": 2.767123287671233,
      "step": 202
    },
    {
      "epoch": 2.767123287671233,
      "step": 202,
      "train/ce_loss": 2.453125,
      "train/diffusion_loss": 0.3681058883666992
    },
    {
      "epoch": 2.767123287671233,
      "step": 202,
      "train/learning_rate_real": 2.1220394014187312e-05
    },
    {
      "debug/num_lat_loss": 1742.0,
      "debug/num_lat_total": 2761.0,
      "debug/num_tok_loss": 1742.0,
      "debug/num_tok_total": 2761.0,
      "epoch": 2.767123287671233,
      "step": 202
    },
    {
      "epoch": 2.767123287671233,
      "step": 202,
      "train/ce_loss": 2.625,
      "train/diffusion_loss": 0.4040772616863251
    },
    {
      "epoch": 2.767123287671233,
      "step": 202,
      "train/learning_rate_real": 2.1220394014187312e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 3058.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 3058.0,
      "epoch": 2.780821917808219,
      "step": 203
    },
    {
      "epoch": 2.780821917808219,
      "step": 203,
      "train/ce_loss": 2.390625,
      "train/diffusion_loss": 0.3349795341491699
    },
    {
      "epoch": 2.780821917808219,
      "step": 203,
      "train/learning_rate_real": 2.1180569325433132e-05
    },
    {
      "debug/num_lat_loss": 1821.0,
      "debug/num_lat_total": 2268.0,
      "debug/num_tok_loss": 1821.0,
      "debug/num_tok_total": 2268.0,
      "epoch": 2.780821917808219,
      "step": 203
    },
    {
      "epoch": 2.780821917808219,
      "step": 203,
      "train/ce_loss": 2.40625,
      "train/diffusion_loss": 0.4954097867012024
    },
    {
      "epoch": 2.780821917808219,
      "step": 203,
      "train/learning_rate_real": 2.1180569325433132e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2661.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2661.0,
      "epoch": 2.780821917808219,
      "step": 203
    },
    {
      "epoch": 2.780821917808219,
      "step": 203,
      "train/ce_loss": 2.515625,
      "train/diffusion_loss": 0.44348451495170593
    },
    {
      "epoch": 2.780821917808219,
      "step": 203,
      "train/learning_rate_real": 2.1180569325433132e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2685.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2685.0,
      "epoch": 2.780821917808219,
      "step": 203
    },
    {
      "epoch": 2.780821917808219,
      "step": 203,
      "train/ce_loss": 2.46875,
      "train/diffusion_loss": 0.3852037191390991
    },
    {
      "epoch": 2.780821917808219,
      "step": 203,
      "train/learning_rate_real": 2.1180569325433132e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2232.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2232.0,
      "epoch": 2.7945205479452055,
      "step": 204
    },
    {
      "epoch": 2.7945205479452055,
      "step": 204,
      "train/ce_loss": 2.5625,
      "train/diffusion_loss": 0.49917471408843994
    },
    {
      "epoch": 2.7945205479452055,
      "step": 204,
      "train/learning_rate_real": 2.114057372146332e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2410.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2410.0,
      "epoch": 2.7945205479452055,
      "step": 204
    },
    {
      "epoch": 2.7945205479452055,
      "step": 204,
      "train/ce_loss": 2.640625,
      "train/diffusion_loss": 0.502190351486206
    },
    {
      "epoch": 2.7945205479452055,
      "step": 204,
      "train/learning_rate_real": 2.114057372146332e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2865.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2865.0,
      "epoch": 2.7945205479452055,
      "step": 204
    },
    {
      "epoch": 2.7945205479452055,
      "step": 204,
      "train/ce_loss": 2.578125,
      "train/diffusion_loss": 0.40779879689216614
    },
    {
      "epoch": 2.7945205479452055,
      "step": 204,
      "train/learning_rate_real": 2.114057372146332e-05
    },
    {
      "debug/num_lat_loss": 1628.0,
      "debug/num_lat_total": 2773.0,
      "debug/num_tok_loss": 1628.0,
      "debug/num_tok_total": 2773.0,
      "epoch": 2.7945205479452055,
      "step": 204
    },
    {
      "epoch": 2.7945205479452055,
      "step": 204,
      "train/ce_loss": 2.3125,
      "train/diffusion_loss": 0.3184314966201782
    },
    {
      "epoch": 2.7945205479452055,
      "step": 204,
      "train/learning_rate_real": 2.114057372146332e-05
    },
    {
      "debug/num_lat_loss": 1812.0,
      "debug/num_lat_total": 2456.0,
      "debug/num_tok_loss": 1812.0,
      "debug/num_tok_total": 2456.0,
      "epoch": 2.808219178082192,
      "step": 205
    },
    {
      "epoch": 2.808219178082192,
      "step": 205,
      "train/ce_loss": 2.484375,
      "train/diffusion_loss": 0.43674784898757935
    },
    {
      "epoch": 2.808219178082192,
      "step": 205,
      "train/learning_rate_real": 2.1100407989767395e-05
    },
    {
      "debug/num_lat_loss": 1706.0,
      "debug/num_lat_total": 2690.0,
      "debug/num_tok_loss": 1706.0,
      "debug/num_tok_total": 2690.0,
      "epoch": 2.808219178082192,
      "step": 205
    },
    {
      "epoch": 2.808219178082192,
      "step": 205,
      "train/ce_loss": 2.4375,
      "train/diffusion_loss": 0.3507964015007019
    },
    {
      "epoch": 2.808219178082192,
      "step": 205,
      "train/learning_rate_real": 2.1100407989767395e-05
    },
    {
      "debug/num_lat_loss": 1761.0,
      "debug/num_lat_total": 2607.0,
      "debug/num_tok_loss": 1761.0,
      "debug/num_tok_total": 2607.0,
      "epoch": 2.808219178082192,
      "step": 205
    },
    {
      "epoch": 2.808219178082192,
      "step": 205,
      "train/ce_loss": 2.5,
      "train/diffusion_loss": 0.43347081542015076
    },
    {
      "epoch": 2.808219178082192,
      "step": 205,
      "train/learning_rate_real": 2.1100407989767395e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2637.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2637.0,
      "epoch": 2.808219178082192,
      "step": 205
    },
    {
      "epoch": 2.808219178082192,
      "step": 205,
      "train/ce_loss": 2.375,
      "train/diffusion_loss": 0.42806491255760193
    },
    {
      "epoch": 2.808219178082192,
      "step": 205,
      "train/learning_rate_real": 2.1100407989767395e-05
    },
    {
      "debug/num_lat_loss": 1765.0,
      "debug/num_lat_total": 2845.0,
      "debug/num_tok_loss": 1765.0,
      "debug/num_tok_total": 2845.0,
      "epoch": 2.821917808219178,
      "step": 206
    },
    {
      "epoch": 2.821917808219178,
      "step": 206,
      "train/ce_loss": 2.40625,
      "train/diffusion_loss": 0.3783215582370758
    },
    {
      "epoch": 2.821917808219178,
      "step": 206,
      "train/learning_rate_real": 2.106007292118457e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2449.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2449.0,
      "epoch": 2.821917808219178,
      "step": 206
    },
    {
      "epoch": 2.821917808219178,
      "step": 206,
      "train/ce_loss": 2.40625,
      "train/diffusion_loss": 0.4463353157043457
    },
    {
      "epoch": 2.821917808219178,
      "step": 206,
      "train/learning_rate_real": 2.106007292118457e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2400.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2400.0,
      "epoch": 2.821917808219178,
      "step": 206
    },
    {
      "epoch": 2.821917808219178,
      "step": 206,
      "train/ce_loss": 2.421875,
      "train/diffusion_loss": 0.44486525654792786
    },
    {
      "epoch": 2.821917808219178,
      "step": 206,
      "train/learning_rate_real": 2.106007292118457e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2861.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2861.0,
      "epoch": 2.821917808219178,
      "step": 206
    },
    {
      "epoch": 2.821917808219178,
      "step": 206,
      "train/ce_loss": 2.578125,
      "train/diffusion_loss": 0.37948623299598694
    },
    {
      "epoch": 2.821917808219178,
      "step": 206,
      "train/learning_rate_real": 2.106007292118457e-05
    },
    {
      "debug/num_lat_loss": 1727.0,
      "debug/num_lat_total": 2969.0,
      "debug/num_tok_loss": 1727.0,
      "debug/num_tok_total": 2969.0,
      "epoch": 2.8356164383561646,
      "step": 207
    },
    {
      "epoch": 2.8356164383561646,
      "step": 207,
      "train/ce_loss": 2.484375,
      "train/diffusion_loss": 0.3458055555820465
    },
    {
      "epoch": 2.8356164383561646,
      "step": 207,
      "train/learning_rate_real": 2.1019569309888216e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2899.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2899.0,
      "epoch": 2.8356164383561646,
      "step": 207
    },
    {
      "epoch": 2.8356164383561646,
      "step": 207,
      "train/ce_loss": 2.375,
      "train/diffusion_loss": 0.35966601967811584
    },
    {
      "epoch": 2.8356164383561646,
      "step": 207,
      "train/learning_rate_real": 2.1019569309888216e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2866.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2866.0,
      "epoch": 2.8356164383561646,
      "step": 207
    },
    {
      "epoch": 2.8356164383561646,
      "step": 207,
      "train/ce_loss": 2.515625,
      "train/diffusion_loss": 0.3660614788532257
    },
    {
      "epoch": 2.8356164383561646,
      "step": 207,
      "train/learning_rate_real": 2.1019569309888216e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 3065.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 3065.0,
      "epoch": 2.8356164383561646,
      "step": 207
    },
    {
      "epoch": 2.8356164383561646,
      "step": 207,
      "train/ce_loss": 2.515625,
      "train/diffusion_loss": 0.3202878534793854
    },
    {
      "epoch": 2.8356164383561646,
      "step": 207,
      "train/learning_rate_real": 2.1019569309888216e-05
    },
    {
      "debug/num_lat_loss": 1758.0,
      "debug/num_lat_total": 2190.0,
      "debug/num_tok_loss": 1758.0,
      "debug/num_tok_total": 2190.0,
      "epoch": 2.8493150684931505,
      "step": 208
    },
    {
      "epoch": 2.8493150684931505,
      "step": 208,
      "train/ce_loss": 2.375,
      "train/diffusion_loss": 0.5103146433830261
    },
    {
      "epoch": 2.8493150684931505,
      "step": 208,
      "train/learning_rate_real": 2.0978897953370204e-05
    },
    {
      "debug/num_lat_loss": 1756.0,
      "debug/num_lat_total": 2629.0,
      "debug/num_tok_loss": 1756.0,
      "debug/num_tok_total": 2629.0,
      "epoch": 2.8493150684931505,
      "step": 208
    },
    {
      "epoch": 2.8493150684931505,
      "step": 208,
      "train/ce_loss": 2.40625,
      "train/diffusion_loss": 0.3646072745323181
    },
    {
      "epoch": 2.8493150684931505,
      "step": 208,
      "train/learning_rate_real": 2.0978897953370204e-05
    },
    {
      "debug/num_lat_loss": 1679.0,
      "debug/num_lat_total": 2529.0,
      "debug/num_tok_loss": 1679.0,
      "debug/num_tok_total": 2529.0,
      "epoch": 2.8493150684931505,
      "step": 208
    },
    {
      "epoch": 2.8493150684931505,
      "step": 208,
      "train/ce_loss": 2.703125,
      "train/diffusion_loss": 0.4177684485912323
    },
    {
      "epoch": 2.8493150684931505,
      "step": 208,
      "train/learning_rate_real": 2.0978897953370204e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2852.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2852.0,
      "epoch": 2.8493150684931505,
      "step": 208
    },
    {
      "epoch": 2.8493150684931505,
      "step": 208,
      "train/ce_loss": 2.453125,
      "train/diffusion_loss": 0.3632550835609436
    },
    {
      "epoch": 2.8493150684931505,
      "step": 208,
      "train/learning_rate_real": 2.0978897953370204e-05
    },
    {
      "debug/num_lat_loss": 1707.0,
      "debug/num_lat_total": 2360.0,
      "debug/num_tok_loss": 1707.0,
      "debug/num_tok_total": 2360.0,
      "epoch": 2.863013698630137,
      "step": 209
    },
    {
      "epoch": 2.863013698630137,
      "step": 209,
      "train/ce_loss": 2.765625,
      "train/diffusion_loss": 0.45615288615226746
    },
    {
      "epoch": 2.863013698630137,
      "step": 209,
      "train/learning_rate_real": 2.0938059652425196e-05
    },
    {
      "debug/num_lat_loss": 1817.0,
      "debug/num_lat_total": 2475.0,
      "debug/num_tok_loss": 1817.0,
      "debug/num_tok_total": 2475.0,
      "epoch": 2.863013698630137,
      "step": 209
    },
    {
      "epoch": 2.863013698630137,
      "step": 209,
      "train/ce_loss": 2.40625,
      "train/diffusion_loss": 0.4588419795036316
    },
    {
      "epoch": 2.863013698630137,
      "step": 209,
      "train/learning_rate_real": 2.0938059652425196e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2627.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2627.0,
      "epoch": 2.863013698630137,
      "step": 209
    },
    {
      "epoch": 2.863013698630137,
      "step": 209,
      "train/ce_loss": 2.4375,
      "train/diffusion_loss": 0.40409165620803833
    },
    {
      "epoch": 2.863013698630137,
      "step": 209,
      "train/learning_rate_real": 2.0938059652425196e-05
    },
    {
      "debug/num_lat_loss": 1648.0,
      "debug/num_lat_total": 1866.0,
      "debug/num_tok_loss": 1648.0,
      "debug/num_tok_total": 1866.0,
      "epoch": 2.863013698630137,
      "step": 209
    },
    {
      "epoch": 2.863013698630137,
      "step": 209,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.5563381910324097
    },
    {
      "epoch": 2.863013698630137,
      "step": 209,
      "train/learning_rate_real": 2.0938059652425196e-05
    },
    {
      "epoch": 2.8767123287671232,
      "grad_norm": 1.6836766004562378,
      "learning_rate": 2.0938059652425196e-05,
      "loss": 2.6799,
      "step": 210
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2425.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2425.0,
      "epoch": 2.8767123287671232,
      "step": 210
    },
    {
      "epoch": 2.8767123287671232,
      "step": 210,
      "train/ce_loss": 2.421875,
      "train/diffusion_loss": 0.478886216878891
    },
    {
      "epoch": 2.8767123287671232,
      "step": 210,
      "train/learning_rate_real": 2.0897055211134912e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2417.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2417.0,
      "epoch": 2.8767123287671232,
      "step": 210
    },
    {
      "epoch": 2.8767123287671232,
      "step": 210,
      "train/ce_loss": 2.421875,
      "train/diffusion_loss": 0.48459577560424805
    },
    {
      "epoch": 2.8767123287671232,
      "step": 210,
      "train/learning_rate_real": 2.0897055211134912e-05
    },
    {
      "debug/num_lat_loss": 1696.0,
      "debug/num_lat_total": 2684.0,
      "debug/num_tok_loss": 1696.0,
      "debug/num_tok_total": 2684.0,
      "epoch": 2.8767123287671232,
      "step": 210
    },
    {
      "epoch": 2.8767123287671232,
      "step": 210,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.37361788749694824
    },
    {
      "epoch": 2.8767123287671232,
      "step": 210,
      "train/learning_rate_real": 2.0897055211134912e-05
    },
    {
      "debug/num_lat_loss": 1734.0,
      "debug/num_lat_total": 2366.0,
      "debug/num_tok_loss": 1734.0,
      "debug/num_tok_total": 2366.0,
      "epoch": 2.8767123287671232,
      "step": 210
    },
    {
      "epoch": 2.8767123287671232,
      "step": 210,
      "train/ce_loss": 2.359375,
      "train/diffusion_loss": 0.45766681432724
    },
    {
      "epoch": 2.8767123287671232,
      "step": 210,
      "train/learning_rate_real": 2.0897055211134912e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 3123.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 3123.0,
      "epoch": 2.8904109589041096,
      "step": 211
    },
    {
      "epoch": 2.8904109589041096,
      "step": 211,
      "train/ce_loss": 2.390625,
      "train/diffusion_loss": 0.343945175409317
    },
    {
      "epoch": 2.8904109589041096,
      "step": 211,
      "train/learning_rate_real": 2.0855885436852256e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2211.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2211.0,
      "epoch": 2.8904109589041096,
      "step": 211
    },
    {
      "epoch": 2.8904109589041096,
      "step": 211,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.5252296924591064
    },
    {
      "epoch": 2.8904109589041096,
      "step": 211,
      "train/learning_rate_real": 2.0855885436852256e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2873.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2873.0,
      "epoch": 2.8904109589041096,
      "step": 211
    },
    {
      "epoch": 2.8904109589041096,
      "step": 211,
      "train/ce_loss": 2.359375,
      "train/diffusion_loss": 0.3848054111003876
    },
    {
      "epoch": 2.8904109589041096,
      "step": 211,
      "train/learning_rate_real": 2.0855885436852256e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2198.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2198.0,
      "epoch": 2.8904109589041096,
      "step": 211
    },
    {
      "epoch": 2.8904109589041096,
      "step": 211,
      "train/ce_loss": 2.453125,
      "train/diffusion_loss": 0.49421730637550354
    },
    {
      "epoch": 2.8904109589041096,
      "step": 211,
      "train/learning_rate_real": 2.0855885436852256e-05
    },
    {
      "debug/num_lat_loss": 1744.0,
      "debug/num_lat_total": 2587.0,
      "debug/num_tok_loss": 1744.0,
      "debug/num_tok_total": 2587.0,
      "epoch": 2.904109589041096,
      "step": 212
    },
    {
      "epoch": 2.904109589041096,
      "step": 212,
      "train/ce_loss": 2.4375,
      "train/diffusion_loss": 0.40978991985321045
    },
    {
      "epoch": 2.904109589041096,
      "step": 212,
      "train/learning_rate_real": 2.0814551140185452e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 3320.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 3320.0,
      "epoch": 2.904109589041096,
      "step": 212
    },
    {
      "epoch": 2.904109589041096,
      "step": 212,
      "train/ce_loss": 2.453125,
      "train/diffusion_loss": 0.2829601466655731
    },
    {
      "epoch": 2.904109589041096,
      "step": 212,
      "train/learning_rate_real": 2.0814551140185452e-05
    },
    {
      "debug/num_lat_loss": 1819.0,
      "debug/num_lat_total": 2921.0,
      "debug/num_tok_loss": 1819.0,
      "debug/num_tok_total": 2921.0,
      "epoch": 2.904109589041096,
      "step": 212
    },
    {
      "epoch": 2.904109589041096,
      "step": 212,
      "train/ce_loss": 2.4375,
      "train/diffusion_loss": 0.4063016176223755
    },
    {
      "epoch": 2.904109589041096,
      "step": 212,
      "train/learning_rate_real": 2.0814551140185452e-05
    },
    {
      "debug/num_lat_loss": 1653.0,
      "debug/num_lat_total": 1976.0,
      "debug/num_tok_loss": 1653.0,
      "debug/num_tok_total": 1976.0,
      "epoch": 2.904109589041096,
      "step": 212
    },
    {
      "epoch": 2.904109589041096,
      "step": 212,
      "train/ce_loss": 2.484375,
      "train/diffusion_loss": 0.48956847190856934
    },
    {
      "epoch": 2.904109589041096,
      "step": 212,
      "train/learning_rate_real": 2.0814551140185452e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2225.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2225.0,
      "epoch": 2.9178082191780823,
      "step": 213
    },
    {
      "epoch": 2.9178082191780823,
      "step": 213,
      "train/ce_loss": 2.34375,
      "train/diffusion_loss": 0.49811357259750366
    },
    {
      "epoch": 2.9178082191780823,
      "step": 213,
      "train/learning_rate_real": 2.0773053134982063e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 2.9178082191780823,
      "step": 213
    },
    {
      "epoch": 2.9178082191780823,
      "step": 213,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.43500933051109314
    },
    {
      "epoch": 2.9178082191780823,
      "step": 213,
      "train/learning_rate_real": 2.0773053134982063e-05
    },
    {
      "debug/num_lat_loss": 1544.0,
      "debug/num_lat_total": 3028.0,
      "debug/num_tok_loss": 1544.0,
      "debug/num_tok_total": 3028.0,
      "epoch": 2.9178082191780823,
      "step": 213
    },
    {
      "epoch": 2.9178082191780823,
      "step": 213,
      "train/ce_loss": 2.3125,
      "train/diffusion_loss": 0.23581035435199738
    },
    {
      "epoch": 2.9178082191780823,
      "step": 213,
      "train/learning_rate_real": 2.0773053134982063e-05
    },
    {
      "debug/num_lat_loss": 1698.0,
      "debug/num_lat_total": 2457.0,
      "debug/num_tok_loss": 1698.0,
      "debug/num_tok_total": 2457.0,
      "epoch": 2.9178082191780823,
      "step": 213
    },
    {
      "epoch": 2.9178082191780823,
      "step": 213,
      "train/ce_loss": 2.4375,
      "train/diffusion_loss": 0.423569917678833
    },
    {
      "epoch": 2.9178082191780823,
      "step": 213,
      "train/learning_rate_real": 2.0773053134982063e-05
    },
    {
      "debug/num_lat_loss": 1762.0,
      "debug/num_lat_total": 2201.0,
      "debug/num_tok_loss": 1762.0,
      "debug/num_tok_total": 2201.0,
      "epoch": 2.9315068493150687,
      "step": 214
    },
    {
      "epoch": 2.9315068493150687,
      "step": 214,
      "train/ce_loss": 2.53125,
      "train/diffusion_loss": 0.514683723449707
    },
    {
      "epoch": 2.9315068493150687,
      "step": 214,
      "train/learning_rate_real": 2.0731392238312985e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2424.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2424.0,
      "epoch": 2.9315068493150687,
      "step": 214
    },
    {
      "epoch": 2.9315068493150687,
      "step": 214,
      "train/ce_loss": 2.609375,
      "train/diffusion_loss": 0.4812524616718292
    },
    {
      "epoch": 2.9315068493150687,
      "step": 214,
      "train/learning_rate_real": 2.0731392238312985e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 1998.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 1998.0,
      "epoch": 2.9315068493150687,
      "step": 214
    },
    {
      "epoch": 2.9315068493150687,
      "step": 214,
      "train/ce_loss": 2.5,
      "train/diffusion_loss": 0.5399637222290039
    },
    {
      "epoch": 2.9315068493150687,
      "step": 214,
      "train/learning_rate_real": 2.0731392238312985e-05
    },
    {
      "debug/num_lat_loss": 1683.0,
      "debug/num_lat_total": 2877.0,
      "debug/num_tok_loss": 1683.0,
      "debug/num_tok_total": 2877.0,
      "epoch": 2.9315068493150687,
      "step": 214
    },
    {
      "epoch": 2.9315068493150687,
      "step": 214,
      "train/ce_loss": 2.484375,
      "train/diffusion_loss": 0.3324914872646332
    },
    {
      "epoch": 2.9315068493150687,
      "step": 214,
      "train/learning_rate_real": 2.0731392238312985e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2618.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2618.0,
      "epoch": 2.9452054794520546,
      "step": 215
    },
    {
      "epoch": 2.9452054794520546,
      "step": 215,
      "train/ce_loss": 2.390625,
      "train/diffusion_loss": 0.4014064073562622
    },
    {
      "epoch": 2.9452054794520546,
      "step": 215,
      "train/learning_rate_real": 2.0689569270456337e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2896.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2896.0,
      "epoch": 2.9452054794520546,
      "step": 215
    },
    {
      "epoch": 2.9452054794520546,
      "step": 215,
      "train/ce_loss": 2.59375,
      "train/diffusion_loss": 0.37228548526763916
    },
    {
      "epoch": 2.9452054794520546,
      "step": 215,
      "train/learning_rate_real": 2.0689569270456337e-05
    },
    {
      "debug/num_lat_loss": 1748.0,
      "debug/num_lat_total": 2388.0,
      "debug/num_tok_loss": 1748.0,
      "debug/num_tok_total": 2388.0,
      "epoch": 2.9452054794520546,
      "step": 215
    },
    {
      "epoch": 2.9452054794520546,
      "step": 215,
      "train/ce_loss": 2.40625,
      "train/diffusion_loss": 0.4462081789970398
    },
    {
      "epoch": 2.9452054794520546,
      "step": 215,
      "train/learning_rate_real": 2.0689569270456337e-05
    },
    {
      "debug/num_lat_loss": 1757.0,
      "debug/num_lat_total": 2409.0,
      "debug/num_tok_loss": 1757.0,
      "debug/num_tok_total": 2409.0,
      "epoch": 2.9452054794520546,
      "step": 215
    },
    {
      "epoch": 2.9452054794520546,
      "step": 215,
      "train/ce_loss": 2.390625,
      "train/diffusion_loss": 0.4493180215358734
    },
    {
      "epoch": 2.9452054794520546,
      "step": 215,
      "train/learning_rate_real": 2.0689569270456337e-05
    },
    {
      "debug/num_lat_loss": 1700.0,
      "debug/num_lat_total": 2334.0,
      "debug/num_tok_loss": 1700.0,
      "debug/num_tok_total": 2334.0,
      "epoch": 2.958904109589041,
      "step": 216
    },
    {
      "epoch": 2.958904109589041,
      "step": 216,
      "train/ce_loss": 2.421875,
      "train/diffusion_loss": 0.4480825364589691
    },
    {
      "epoch": 2.958904109589041,
      "step": 216,
      "train/learning_rate_real": 2.0647585054881343e-05
    },
    {
      "debug/num_lat_loss": 1813.0,
      "debug/num_lat_total": 2465.0,
      "debug/num_tok_loss": 1813.0,
      "debug/num_tok_total": 2465.0,
      "epoch": 2.958904109589041,
      "step": 216
    },
    {
      "epoch": 2.958904109589041,
      "step": 216,
      "train/ce_loss": 2.40625,
      "train/diffusion_loss": 0.4440514147281647
    },
    {
      "epoch": 2.958904109589041,
      "step": 216,
      "train/learning_rate_real": 2.0647585054881343e-05
    },
    {
      "debug/num_lat_loss": 1624.0,
      "debug/num_lat_total": 2321.0,
      "debug/num_tok_loss": 1624.0,
      "debug/num_tok_total": 2321.0,
      "epoch": 2.958904109589041,
      "step": 216
    },
    {
      "epoch": 2.958904109589041,
      "step": 216,
      "train/ce_loss": 2.421875,
      "train/diffusion_loss": 0.4189290702342987
    },
    {
      "epoch": 2.958904109589041,
      "step": 216,
      "train/learning_rate_real": 2.0647585054881343e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 3103.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 3103.0,
      "epoch": 2.958904109589041,
      "step": 216
    },
    {
      "epoch": 2.958904109589041,
      "step": 216,
      "train/ce_loss": 2.484375,
      "train/diffusion_loss": 0.339295357465744
    },
    {
      "epoch": 2.958904109589041,
      "step": 216,
      "train/learning_rate_real": 2.0647585054881343e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 2.9726027397260273,
      "step": 217
    },
    {
      "epoch": 2.9726027397260273,
      "step": 217,
      "train/ce_loss": 2.5625,
      "train/diffusion_loss": 0.4040837287902832
    },
    {
      "epoch": 2.9726027397260273,
      "step": 217,
      "train/learning_rate_real": 2.0605440418232066e-05
    },
    {
      "debug/num_lat_loss": 1579.0,
      "debug/num_lat_total": 2672.0,
      "debug/num_tok_loss": 1579.0,
      "debug/num_tok_total": 2672.0,
      "epoch": 2.9726027397260273,
      "step": 217
    },
    {
      "epoch": 2.9726027397260273,
      "step": 217,
      "train/ce_loss": 2.234375,
      "train/diffusion_loss": 0.3396430015563965
    },
    {
      "epoch": 2.9726027397260273,
      "step": 217,
      "train/learning_rate_real": 2.0605440418232066e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2432.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2432.0,
      "epoch": 2.9726027397260273,
      "step": 217
    },
    {
      "epoch": 2.9726027397260273,
      "step": 217,
      "train/ce_loss": 2.3125,
      "train/diffusion_loss": 0.4572865664958954
    },
    {
      "epoch": 2.9726027397260273,
      "step": 217,
      "train/learning_rate_real": 2.0605440418232066e-05
    },
    {
      "debug/num_lat_loss": 1648.0,
      "debug/num_lat_total": 2584.0,
      "debug/num_tok_loss": 1648.0,
      "debug/num_tok_total": 2584.0,
      "epoch": 2.9726027397260273,
      "step": 217
    },
    {
      "epoch": 2.9726027397260273,
      "step": 217,
      "train/ce_loss": 2.4375,
      "train/diffusion_loss": 0.3818599283695221
    },
    {
      "epoch": 2.9726027397260273,
      "step": 217,
      "train/learning_rate_real": 2.0605440418232066e-05
    },
    {
      "debug/num_lat_loss": 1808.0,
      "debug/num_lat_total": 2894.0,
      "debug/num_tok_loss": 1808.0,
      "debug/num_tok_total": 2894.0,
      "epoch": 2.9863013698630136,
      "step": 218
    },
    {
      "epoch": 2.9863013698630136,
      "step": 218,
      "train/ce_loss": 2.359375,
      "train/diffusion_loss": 0.35479843616485596
    },
    {
      "epoch": 2.9863013698630136,
      "step": 218,
      "train/learning_rate_real": 2.0563136190311195e-05
    },
    {
      "debug/num_lat_loss": 1386.0,
      "debug/num_lat_total": 1839.0,
      "debug/num_tok_loss": 1386.0,
      "debug/num_tok_total": 1839.0,
      "epoch": 2.9863013698630136,
      "step": 218
    },
    {
      "epoch": 2.9863013698630136,
      "step": 218,
      "train/ce_loss": 2.328125,
      "train/diffusion_loss": 0.4515104293823242
    },
    {
      "epoch": 2.9863013698630136,
      "step": 218,
      "train/learning_rate_real": 2.0563136190311195e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2863.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2863.0,
      "epoch": 2.9863013698630136,
      "step": 218
    },
    {
      "epoch": 2.9863013698630136,
      "step": 218,
      "train/ce_loss": 2.375,
      "train/diffusion_loss": 0.37860241532325745
    },
    {
      "epoch": 2.9863013698630136,
      "step": 218,
      "train/learning_rate_real": 2.0563136190311195e-05
    },
    {
      "debug/num_lat_loss": 440.0,
      "debug/num_lat_total": 864.0,
      "debug/num_tok_loss": 440.0,
      "debug/num_tok_total": 864.0,
      "epoch": 2.9863013698630136,
      "step": 218
    },
    {
      "epoch": 2.9863013698630136,
      "step": 218,
      "train/ce_loss": 2.3125,
      "train/diffusion_loss": 0.2369404137134552
    },
    {
      "epoch": 2.9863013698630136,
      "step": 218,
      "train/learning_rate_real": 2.0563136190311195e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 3075.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 3075.0,
      "epoch": 3.0,
      "step": 219
    },
    {
      "epoch": 3.0,
      "step": 219,
      "train/ce_loss": 2.375,
      "train/diffusion_loss": 0.2987498342990875
    },
    {
      "epoch": 3.0,
      "step": 219,
      "train/learning_rate_real": 2.0520673204063662e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2680.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2680.0,
      "epoch": 3.0,
      "step": 219
    },
    {
      "epoch": 3.0,
      "step": 219,
      "train/ce_loss": 2.484375,
      "train/diffusion_loss": 0.39434877038002014
    },
    {
      "epoch": 3.0,
      "step": 219,
      "train/learning_rate_real": 2.0520673204063662e-05
    },
    {
      "debug/num_lat_loss": 1591.0,
      "debug/num_lat_total": 2230.0,
      "debug/num_tok_loss": 1591.0,
      "debug/num_tok_total": 2230.0,
      "epoch": 3.0,
      "step": 219
    },
    {
      "epoch": 3.0,
      "step": 219,
      "train/ce_loss": 2.375,
      "train/diffusion_loss": 0.462157279253006
    },
    {
      "epoch": 3.0,
      "step": 219,
      "train/learning_rate_real": 2.0520673204063662e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 2439.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 2439.0,
      "epoch": 3.0,
      "step": 219
    },
    {
      "epoch": 3.0,
      "step": 219,
      "train/ce_loss": 2.359375,
      "train/diffusion_loss": 0.46685951948165894
    },
    {
      "epoch": 3.0,
      "step": 219,
      "train/learning_rate_real": 2.0520673204063662e-05
    },
    {
      "epoch": 3.0136986301369864,
      "grad_norm": 1.6216073036193848,
      "learning_rate": 2.0520673204063662e-05,
      "loss": 2.7037,
      "step": 220
    },
    {
      "debug/num_lat_loss": 1633.0,
      "debug/num_lat_total": 2494.0,
      "debug/num_tok_loss": 1633.0,
      "debug/num_tok_total": 2494.0,
      "epoch": 3.0136986301369864,
      "step": 220
    },
    {
      "epoch": 3.0136986301369864,
      "step": 220,
      "train/ce_loss": 2.453125,
      "train/diffusion_loss": 0.4172014892101288
    },
    {
      "epoch": 3.0136986301369864,
      "step": 220,
      "train/learning_rate_real": 2.0478052295560253e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2664.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2664.0,
      "epoch": 3.0136986301369864,
      "step": 220
    },
    {
      "epoch": 3.0136986301369864,
      "step": 220,
      "train/ce_loss": 2.265625,
      "train/diffusion_loss": 0.39119380712509155
    },
    {
      "epoch": 3.0136986301369864,
      "step": 220,
      "train/learning_rate_real": 2.0478052295560253e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2433.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2433.0,
      "epoch": 3.0136986301369864,
      "step": 220
    },
    {
      "epoch": 3.0136986301369864,
      "step": 220,
      "train/ce_loss": 2.375,
      "train/diffusion_loss": 0.4629015624523163
    },
    {
      "epoch": 3.0136986301369864,
      "step": 220,
      "train/learning_rate_real": 2.0478052295560253e-05
    },
    {
      "debug/num_lat_loss": 1753.0,
      "debug/num_lat_total": 2620.0,
      "debug/num_tok_loss": 1753.0,
      "debug/num_tok_total": 2620.0,
      "epoch": 3.0136986301369864,
      "step": 220
    },
    {
      "epoch": 3.0136986301369864,
      "step": 220,
      "train/ce_loss": 2.4375,
      "train/diffusion_loss": 0.40112146735191345
    },
    {
      "epoch": 3.0136986301369864,
      "step": 220,
      "train/learning_rate_real": 2.0478052295560253e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2418.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2418.0,
      "epoch": 3.0273972602739727,
      "step": 221
    },
    {
      "epoch": 3.0273972602739727,
      "step": 221,
      "train/ce_loss": 2.390625,
      "train/diffusion_loss": 0.4611798822879791
    },
    {
      "epoch": 3.0273972602739727,
      "step": 221,
      "train/learning_rate_real": 2.0435274303981154e-05
    },
    {
      "debug/num_lat_loss": 1812.0,
      "debug/num_lat_total": 3345.0,
      "debug/num_tok_loss": 1812.0,
      "debug/num_tok_total": 3345.0,
      "epoch": 3.0273972602739727,
      "step": 221
    },
    {
      "epoch": 3.0273972602739727,
      "step": 221,
      "train/ce_loss": 2.578125,
      "train/diffusion_loss": 0.2868610918521881
    },
    {
      "epoch": 3.0273972602739727,
      "step": 221,
      "train/learning_rate_real": 2.0435274303981154e-05
    },
    {
      "debug/num_lat_loss": 1762.0,
      "debug/num_lat_total": 2405.0,
      "debug/num_tok_loss": 1762.0,
      "debug/num_tok_total": 2405.0,
      "epoch": 3.0273972602739727,
      "step": 221
    },
    {
      "epoch": 3.0273972602739727,
      "step": 221,
      "train/ce_loss": 2.453125,
      "train/diffusion_loss": 0.4683093726634979
    },
    {
      "epoch": 3.0273972602739727,
      "step": 221,
      "train/learning_rate_real": 2.0435274303981154e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 3055.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 3055.0,
      "epoch": 3.0273972602739727,
      "step": 221
    },
    {
      "epoch": 3.0273972602739727,
      "step": 221,
      "train/ce_loss": 2.515625,
      "train/diffusion_loss": 0.3245576322078705
    },
    {
      "epoch": 3.0273972602739727,
      "step": 221,
      "train/learning_rate_real": 2.0435274303981154e-05
    },
    {
      "debug/num_lat_loss": 1693.0,
      "debug/num_lat_total": 2910.0,
      "debug/num_tok_loss": 1693.0,
      "debug/num_tok_total": 2910.0,
      "epoch": 3.041095890410959,
      "step": 222
    },
    {
      "epoch": 3.041095890410959,
      "step": 222,
      "train/ce_loss": 2.328125,
      "train/diffusion_loss": 0.32742902636528015
    },
    {
      "epoch": 3.041095890410959,
      "step": 222,
      "train/learning_rate_real": 2.0392340071599418e-05
    },
    {
      "debug/num_lat_loss": 1656.0,
      "debug/num_lat_total": 2383.0,
      "debug/num_tok_loss": 1656.0,
      "debug/num_tok_total": 2383.0,
      "epoch": 3.041095890410959,
      "step": 222
    },
    {
      "epoch": 3.041095890410959,
      "step": 222,
      "train/ce_loss": 2.515625,
      "train/diffusion_loss": 0.4179707467556
    },
    {
      "epoch": 3.041095890410959,
      "step": 222,
      "train/learning_rate_real": 2.0392340071599418e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2634.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2634.0,
      "epoch": 3.041095890410959,
      "step": 222
    },
    {
      "epoch": 3.041095890410959,
      "step": 222,
      "train/ce_loss": 2.46875,
      "train/diffusion_loss": 0.40202271938323975
    },
    {
      "epoch": 3.041095890410959,
      "step": 222,
      "train/learning_rate_real": 2.0392340071599418e-05
    },
    {
      "debug/num_lat_loss": 1826.0,
      "debug/num_lat_total": 2481.0,
      "debug/num_tok_loss": 1826.0,
      "debug/num_tok_total": 2481.0,
      "epoch": 3.041095890410959,
      "step": 222
    },
    {
      "epoch": 3.041095890410959,
      "step": 222,
      "train/ce_loss": 2.640625,
      "train/diffusion_loss": 0.4611271023750305
    },
    {
      "epoch": 3.041095890410959,
      "step": 222,
      "train/learning_rate_real": 2.0392340071599418e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2004.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2004.0,
      "epoch": 3.0547945205479454,
      "step": 223
    },
    {
      "epoch": 3.0547945205479454,
      "step": 223,
      "train/ce_loss": 2.328125,
      "train/diffusion_loss": 0.5542777180671692
    },
    {
      "epoch": 3.0547945205479454,
      "step": 223,
      "train/learning_rate_real": 2.03492504437644e-05
    },
    {
      "debug/num_lat_loss": 1797.0,
      "debug/num_lat_total": 2015.0,
      "debug/num_tok_loss": 1797.0,
      "debug/num_tok_total": 2015.0,
      "epoch": 3.0547945205479454,
      "step": 223
    },
    {
      "epoch": 3.0547945205479454,
      "step": 223,
      "train/ce_loss": 2.484375,
      "train/diffusion_loss": 0.5250970125198364
    },
    {
      "epoch": 3.0547945205479454,
      "step": 223,
      "train/learning_rate_real": 2.03492504437644e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 1998.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 1998.0,
      "epoch": 3.0547945205479454,
      "step": 223
    },
    {
      "epoch": 3.0547945205479454,
      "step": 223,
      "train/ce_loss": 2.3125,
      "train/diffusion_loss": 0.5293837785720825
    },
    {
      "epoch": 3.0547945205479454,
      "step": 223,
      "train/learning_rate_real": 2.03492504437644e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2439.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2439.0,
      "epoch": 3.0547945205479454,
      "step": 223
    },
    {
      "epoch": 3.0547945205479454,
      "step": 223,
      "train/ce_loss": 2.359375,
      "train/diffusion_loss": 0.43268001079559326
    },
    {
      "epoch": 3.0547945205479454,
      "step": 223,
      "train/learning_rate_real": 2.03492504437644e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2434.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2434.0,
      "epoch": 3.0684931506849313,
      "step": 224
    },
    {
      "epoch": 3.0684931506849313,
      "step": 224,
      "train/ce_loss": 2.453125,
      "train/diffusion_loss": 0.4283905625343323
    },
    {
      "epoch": 3.0684931506849313,
      "step": 224,
      "train/learning_rate_real": 2.0306006268885074e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2432.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2432.0,
      "epoch": 3.0684931506849313,
      "step": 224
    },
    {
      "epoch": 3.0684931506849313,
      "step": 224,
      "train/ce_loss": 2.359375,
      "train/diffusion_loss": 0.4826100170612335
    },
    {
      "epoch": 3.0684931506849313,
      "step": 224,
      "train/learning_rate_real": 2.0306006268885074e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2845.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2845.0,
      "epoch": 3.0684931506849313,
      "step": 224
    },
    {
      "epoch": 3.0684931506849313,
      "step": 224,
      "train/ce_loss": 2.421875,
      "train/diffusion_loss": 0.3821423351764679
    },
    {
      "epoch": 3.0684931506849313,
      "step": 224,
      "train/learning_rate_real": 2.0306006268885074e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2458.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2458.0,
      "epoch": 3.0684931506849313,
      "step": 224
    },
    {
      "epoch": 3.0684931506849313,
      "step": 224,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.4636325240135193
    },
    {
      "epoch": 3.0684931506849313,
      "step": 224,
      "train/learning_rate_real": 2.0306006268885074e-05
    },
    {
      "debug/num_lat_loss": 1724.0,
      "debug/num_lat_total": 2739.0,
      "debug/num_tok_loss": 1724.0,
      "debug/num_tok_total": 2739.0,
      "epoch": 3.0821917808219177,
      "step": 225
    },
    {
      "epoch": 3.0821917808219177,
      "step": 225,
      "train/ce_loss": 2.34375,
      "train/diffusion_loss": 0.3534652590751648
    },
    {
      "epoch": 3.0821917808219177,
      "step": 225,
      "train/learning_rate_real": 2.0262608398413375e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2422.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2422.0,
      "epoch": 3.0821917808219177,
      "step": 225
    },
    {
      "epoch": 3.0821917808219177,
      "step": 225,
      "train/ce_loss": 2.296875,
      "train/diffusion_loss": 0.4696789085865021
    },
    {
      "epoch": 3.0821917808219177,
      "step": 225,
      "train/learning_rate_real": 2.0262608398413375e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2639.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2639.0,
      "epoch": 3.0821917808219177,
      "step": 225
    },
    {
      "epoch": 3.0821917808219177,
      "step": 225,
      "train/ce_loss": 2.296875,
      "train/diffusion_loss": 0.4197196960449219
    },
    {
      "epoch": 3.0821917808219177,
      "step": 225,
      "train/learning_rate_real": 2.0262608398413375e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2431.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2431.0,
      "epoch": 3.0821917808219177,
      "step": 225
    },
    {
      "epoch": 3.0821917808219177,
      "step": 225,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.45736926794052124
    },
    {
      "epoch": 3.0821917808219177,
      "step": 225,
      "train/learning_rate_real": 2.0262608398413375e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2853.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2853.0,
      "epoch": 3.095890410958904,
      "step": 226
    },
    {
      "epoch": 3.095890410958904,
      "step": 226,
      "train/ce_loss": 2.4375,
      "train/diffusion_loss": 0.34873902797698975
    },
    {
      "epoch": 3.095890410958904,
      "step": 226,
      "train/learning_rate_real": 2.02190576868274e-05
    },
    {
      "debug/num_lat_loss": 1384.0,
      "debug/num_lat_total": 2466.0,
      "debug/num_tok_loss": 1384.0,
      "debug/num_tok_total": 2466.0,
      "epoch": 3.095890410958904,
      "step": 226
    },
    {
      "epoch": 3.095890410958904,
      "step": 226,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.2928074300289154
    },
    {
      "epoch": 3.095890410958904,
      "step": 226,
      "train/learning_rate_real": 2.02190576868274e-05
    },
    {
      "debug/num_lat_loss": 1808.0,
      "debug/num_lat_total": 2903.0,
      "debug/num_tok_loss": 1808.0,
      "debug/num_tok_total": 2903.0,
      "epoch": 3.095890410958904,
      "step": 226
    },
    {
      "epoch": 3.095890410958904,
      "step": 226,
      "train/ce_loss": 2.4375,
      "train/diffusion_loss": 0.3605611324310303
    },
    {
      "epoch": 3.095890410958904,
      "step": 226,
      "train/learning_rate_real": 2.02190576868274e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2245.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2245.0,
      "epoch": 3.095890410958904,
      "step": 226
    },
    {
      "epoch": 3.095890410958904,
      "step": 226,
      "train/ce_loss": 2.390625,
      "train/diffusion_loss": 0.5095975399017334
    },
    {
      "epoch": 3.095890410958904,
      "step": 226,
      "train/learning_rate_real": 2.02190576868274e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2201.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2201.0,
      "epoch": 3.1095890410958904,
      "step": 227
    },
    {
      "epoch": 3.1095890410958904,
      "step": 227,
      "train/ce_loss": 2.453125,
      "train/diffusion_loss": 0.47939595580101013
    },
    {
      "epoch": 3.1095890410958904,
      "step": 227,
      "train/learning_rate_real": 2.01753549916146e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2665.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2665.0,
      "epoch": 3.1095890410958904,
      "step": 227
    },
    {
      "epoch": 3.1095890410958904,
      "step": 227,
      "train/ce_loss": 2.421875,
      "train/diffusion_loss": 0.4275529086589813
    },
    {
      "epoch": 3.1095890410958904,
      "step": 227,
      "train/learning_rate_real": 2.01753549916146e-05
    },
    {
      "debug/num_lat_loss": 1765.0,
      "debug/num_lat_total": 3038.0,
      "debug/num_tok_loss": 1765.0,
      "debug/num_tok_total": 3038.0,
      "epoch": 3.1095890410958904,
      "step": 227
    },
    {
      "epoch": 3.1095890410958904,
      "step": 227,
      "train/ce_loss": 2.296875,
      "train/diffusion_loss": 0.30026838183403015
    },
    {
      "epoch": 3.1095890410958904,
      "step": 227,
      "train/learning_rate_real": 2.01753549916146e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2440.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2440.0,
      "epoch": 3.1095890410958904,
      "step": 227
    },
    {
      "epoch": 3.1095890410958904,
      "step": 227,
      "train/ce_loss": 2.390625,
      "train/diffusion_loss": 0.4443340599536896
    },
    {
      "epoch": 3.1095890410958904,
      "step": 227,
      "train/learning_rate_real": 2.01753549916146e-05
    },
    {
      "debug/num_lat_loss": 1601.0,
      "debug/num_lat_total": 2706.0,
      "debug/num_tok_loss": 1601.0,
      "debug/num_tok_total": 2706.0,
      "epoch": 3.1232876712328768,
      "step": 228
    },
    {
      "epoch": 3.1232876712328768,
      "step": 228,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.2934297025203705
    },
    {
      "epoch": 3.1232876712328768,
      "step": 228,
      "train/learning_rate_real": 2.0131501173254895e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2850.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2850.0,
      "epoch": 3.1232876712328768,
      "step": 228
    },
    {
      "epoch": 3.1232876712328768,
      "step": 228,
      "train/ce_loss": 2.28125,
      "train/diffusion_loss": 0.3723796010017395
    },
    {
      "epoch": 3.1232876712328768,
      "step": 228,
      "train/learning_rate_real": 2.0131501173254895e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2232.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2232.0,
      "epoch": 3.1232876712328768,
      "step": 228
    },
    {
      "epoch": 3.1232876712328768,
      "step": 228,
      "train/ce_loss": 2.34375,
      "train/diffusion_loss": 0.4982549846172333
    },
    {
      "epoch": 3.1232876712328768,
      "step": 228,
      "train/learning_rate_real": 2.0131501173254895e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2419.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2419.0,
      "epoch": 3.1232876712328768,
      "step": 228
    },
    {
      "epoch": 3.1232876712328768,
      "step": 228,
      "train/ce_loss": 2.28125,
      "train/diffusion_loss": 0.433102548122406
    },
    {
      "epoch": 3.1232876712328768,
      "step": 228,
      "train/learning_rate_real": 2.0131501173254895e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2213.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2213.0,
      "epoch": 3.136986301369863,
      "step": 229
    },
    {
      "epoch": 3.136986301369863,
      "step": 229,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.48188430070877075
    },
    {
      "epoch": 3.136986301369863,
      "step": 229,
      "train/learning_rate_real": 2.0087497095203724e-05
    },
    {
      "debug/num_lat_loss": 1725.0,
      "debug/num_lat_total": 2152.0,
      "debug/num_tok_loss": 1725.0,
      "debug/num_tok_total": 2152.0,
      "epoch": 3.136986301369863,
      "step": 229
    },
    {
      "epoch": 3.136986301369863,
      "step": 229,
      "train/ce_loss": 2.515625,
      "train/diffusion_loss": 0.5034520030021667
    },
    {
      "epoch": 3.136986301369863,
      "step": 229,
      "train/learning_rate_real": 2.0087497095203724e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 3262.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 3262.0,
      "epoch": 3.136986301369863,
      "step": 229
    },
    {
      "epoch": 3.136986301369863,
      "step": 229,
      "train/ce_loss": 2.234375,
      "train/diffusion_loss": 0.26973801851272583
    },
    {
      "epoch": 3.136986301369863,
      "step": 229,
      "train/learning_rate_real": 2.0087497095203724e-05
    },
    {
      "debug/num_lat_loss": 1756.0,
      "debug/num_lat_total": 2179.0,
      "debug/num_tok_loss": 1756.0,
      "debug/num_tok_total": 2179.0,
      "epoch": 3.136986301369863,
      "step": 229
    },
    {
      "epoch": 3.136986301369863,
      "step": 229,
      "train/ce_loss": 2.40625,
      "train/diffusion_loss": 0.5100567936897278
    },
    {
      "epoch": 3.136986301369863,
      "step": 229,
      "train/learning_rate_real": 2.0087497095203724e-05
    },
    {
      "epoch": 3.1506849315068495,
      "grad_norm": 1.9824601411819458,
      "learning_rate": 2.0087497095203724e-05,
      "loss": 2.7385,
      "step": 230
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2430.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2430.0,
      "epoch": 3.1506849315068495,
      "step": 230
    },
    {
      "epoch": 3.1506849315068495,
      "step": 230,
      "train/ce_loss": 2.34375,
      "train/diffusion_loss": 0.4837017357349396
    },
    {
      "epoch": 3.1506849315068495,
      "step": 230,
      "train/learning_rate_real": 2.004334362387505e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2625.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2625.0,
      "epoch": 3.1506849315068495,
      "step": 230
    },
    {
      "epoch": 3.1506849315068495,
      "step": 230,
      "train/ce_loss": 2.390625,
      "train/diffusion_loss": 0.3880443274974823
    },
    {
      "epoch": 3.1506849315068495,
      "step": 230,
      "train/learning_rate_real": 2.004334362387505e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2656.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2656.0,
      "epoch": 3.1506849315068495,
      "step": 230
    },
    {
      "epoch": 3.1506849315068495,
      "step": 230,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.40594106912612915
    },
    {
      "epoch": 3.1506849315068495,
      "step": 230,
      "train/learning_rate_real": 2.004334362387505e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2609.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2609.0,
      "epoch": 3.1506849315068495,
      "step": 230
    },
    {
      "epoch": 3.1506849315068495,
      "step": 230,
      "train/ce_loss": 2.421875,
      "train/diffusion_loss": 0.4162349998950958
    },
    {
      "epoch": 3.1506849315068495,
      "step": 230,
      "train/learning_rate_real": 2.004334362387505e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2194.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2194.0,
      "epoch": 3.1643835616438354,
      "step": 231
    },
    {
      "epoch": 3.1643835616438354,
      "step": 231,
      "train/ce_loss": 2.359375,
      "train/diffusion_loss": 0.5041800141334534
    },
    {
      "epoch": 3.1643835616438354,
      "step": 231,
      "train/learning_rate_real": 1.9999041628624304e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2626.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2626.0,
      "epoch": 3.1643835616438354,
      "step": 231
    },
    {
      "epoch": 3.1643835616438354,
      "step": 231,
      "train/ce_loss": 2.34375,
      "train/diffusion_loss": 0.4167364835739136
    },
    {
      "epoch": 3.1643835616438354,
      "step": 231,
      "train/learning_rate_real": 1.9999041628624304e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2878.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2878.0,
      "epoch": 3.1643835616438354,
      "step": 231
    },
    {
      "epoch": 3.1643835616438354,
      "step": 231,
      "train/ce_loss": 2.3125,
      "train/diffusion_loss": 0.35796892642974854
    },
    {
      "epoch": 3.1643835616438354,
      "step": 231,
      "train/learning_rate_real": 1.9999041628624304e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2670.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2670.0,
      "epoch": 3.1643835616438354,
      "step": 231
    },
    {
      "epoch": 3.1643835616438354,
      "step": 231,
      "train/ce_loss": 2.46875,
      "train/diffusion_loss": 0.42231622338294983
    },
    {
      "epoch": 3.1643835616438354,
      "step": 231,
      "train/learning_rate_real": 1.9999041628624304e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 2884.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 2884.0,
      "epoch": 3.1780821917808217,
      "step": 232
    },
    {
      "epoch": 3.1780821917808217,
      "step": 232,
      "train/ce_loss": 2.140625,
      "train/diffusion_loss": 0.3421187996864319
    },
    {
      "epoch": 3.1780821917808217,
      "step": 232,
      "train/learning_rate_real": 1.995459198173127e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2199.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2199.0,
      "epoch": 3.1780821917808217,
      "step": 232
    },
    {
      "epoch": 3.1780821917808217,
      "step": 232,
      "train/ce_loss": 2.34375,
      "train/diffusion_loss": 0.4964865744113922
    },
    {
      "epoch": 3.1780821917808217,
      "step": 232,
      "train/learning_rate_real": 1.995459198173127e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 3.1780821917808217,
      "step": 232
    },
    {
      "epoch": 3.1780821917808217,
      "step": 232,
      "train/ce_loss": 2.84375,
      "train/diffusion_loss": 0.4140637218952179
    },
    {
      "epoch": 3.1780821917808217,
      "step": 232,
      "train/learning_rate_real": 1.995459198173127e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2224.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2224.0,
      "epoch": 3.1780821917808217,
      "step": 232
    },
    {
      "epoch": 3.1780821917808217,
      "step": 232,
      "train/ce_loss": 2.390625,
      "train/diffusion_loss": 0.5294895172119141
    },
    {
      "epoch": 3.1780821917808217,
      "step": 232,
      "train/learning_rate_real": 1.995459198173127e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2461.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2461.0,
      "epoch": 3.191780821917808,
      "step": 233
    },
    {
      "epoch": 3.191780821917808,
      "step": 233,
      "train/ce_loss": 2.3125,
      "train/diffusion_loss": 0.4414790868759155
    },
    {
      "epoch": 3.191780821917808,
      "step": 233,
      "train/learning_rate_real": 1.9909995558382886e-05
    },
    {
      "debug/num_lat_loss": 1758.0,
      "debug/num_lat_total": 2615.0,
      "debug/num_tok_loss": 1758.0,
      "debug/num_tok_total": 2615.0,
      "epoch": 3.191780821917808,
      "step": 233
    },
    {
      "epoch": 3.191780821917808,
      "step": 233,
      "train/ce_loss": 2.234375,
      "train/diffusion_loss": 0.404286652803421
    },
    {
      "epoch": 3.191780821917808,
      "step": 233,
      "train/learning_rate_real": 1.9909995558382886e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2865.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2865.0,
      "epoch": 3.191780821917808,
      "step": 233
    },
    {
      "epoch": 3.191780821917808,
      "step": 233,
      "train/ce_loss": 2.375,
      "train/diffusion_loss": 0.3791257441043854
    },
    {
      "epoch": 3.191780821917808,
      "step": 233,
      "train/learning_rate_real": 1.9909995558382886e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2839.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2839.0,
      "epoch": 3.191780821917808,
      "step": 233
    },
    {
      "epoch": 3.191780821917808,
      "step": 233,
      "train/ce_loss": 2.296875,
      "train/diffusion_loss": 0.357394814491272
    },
    {
      "epoch": 3.191780821917808,
      "step": 233,
      "train/learning_rate_real": 1.9909995558382886e-05
    },
    {
      "debug/num_lat_loss": 1622.0,
      "debug/num_lat_total": 2249.0,
      "debug/num_tok_loss": 1622.0,
      "debug/num_tok_total": 2249.0,
      "epoch": 3.2054794520547945,
      "step": 234
    },
    {
      "epoch": 3.2054794520547945,
      "step": 234,
      "train/ce_loss": 2.4375,
      "train/diffusion_loss": 0.4440324306488037
    },
    {
      "epoch": 3.2054794520547945,
      "step": 234,
      "train/learning_rate_real": 1.9865253236656044e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 3095.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 3095.0,
      "epoch": 3.2054794520547945,
      "step": 234
    },
    {
      "epoch": 3.2054794520547945,
      "step": 234,
      "train/ce_loss": 2.328125,
      "train/diffusion_loss": 0.2975127100944519
    },
    {
      "epoch": 3.2054794520547945,
      "step": 234,
      "train/learning_rate_real": 1.9865253236656044e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2670.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2670.0,
      "epoch": 3.2054794520547945,
      "step": 234
    },
    {
      "epoch": 3.2054794520547945,
      "step": 234,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.4166703522205353
    },
    {
      "epoch": 3.2054794520547945,
      "step": 234,
      "train/learning_rate_real": 1.9865253236656044e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 3.2054794520547945,
      "step": 234
    },
    {
      "epoch": 3.2054794520547945,
      "step": 234,
      "train/ce_loss": 2.40625,
      "train/diffusion_loss": 0.3953518271446228
    },
    {
      "epoch": 3.2054794520547945,
      "step": 234,
      "train/learning_rate_real": 1.9865253236656044e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2202.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2202.0,
      "epoch": 3.219178082191781,
      "step": 235
    },
    {
      "epoch": 3.219178082191781,
      "step": 235,
      "train/ce_loss": 2.234375,
      "train/diffusion_loss": 0.4948474168777466
    },
    {
      "epoch": 3.219178082191781,
      "step": 235,
      "train/learning_rate_real": 1.9820365897500294e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2849.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2849.0,
      "epoch": 3.219178082191781,
      "step": 235
    },
    {
      "epoch": 3.219178082191781,
      "step": 235,
      "train/ce_loss": 2.34375,
      "train/diffusion_loss": 0.3483217656612396
    },
    {
      "epoch": 3.219178082191781,
      "step": 235,
      "train/learning_rate_real": 1.9820365897500294e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2191.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2191.0,
      "epoch": 3.219178082191781,
      "step": 235
    },
    {
      "epoch": 3.219178082191781,
      "step": 235,
      "train/ce_loss": 2.359375,
      "train/diffusion_loss": 0.535424530506134
    },
    {
      "epoch": 3.219178082191781,
      "step": 235,
      "train/learning_rate_real": 1.9820365897500294e-05
    },
    {
      "debug/num_lat_loss": 1715.0,
      "debug/num_lat_total": 2083.0,
      "debug/num_tok_loss": 1715.0,
      "debug/num_tok_total": 2083.0,
      "epoch": 3.219178082191781,
      "step": 235
    },
    {
      "epoch": 3.219178082191781,
      "step": 235,
      "train/ce_loss": 2.375,
      "train/diffusion_loss": 0.5188778042793274
    },
    {
      "epoch": 3.219178082191781,
      "step": 235,
      "train/learning_rate_real": 1.9820365897500294e-05
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 2891.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 2891.0,
      "epoch": 3.232876712328767,
      "step": 236
    },
    {
      "epoch": 3.232876712328767,
      "step": 236,
      "train/ce_loss": 2.375,
      "train/diffusion_loss": 0.34874579310417175
    },
    {
      "epoch": 3.232876712328767,
      "step": 236,
      "train/learning_rate_real": 1.977533442472047e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2208.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2208.0,
      "epoch": 3.232876712328767,
      "step": 236
    },
    {
      "epoch": 3.232876712328767,
      "step": 236,
      "train/ce_loss": 2.234375,
      "train/diffusion_loss": 0.5000365972518921
    },
    {
      "epoch": 3.232876712328767,
      "step": 236,
      "train/learning_rate_real": 1.977533442472047e-05
    },
    {
      "debug/num_lat_loss": 1632.0,
      "debug/num_lat_total": 2935.0,
      "debug/num_tok_loss": 1632.0,
      "debug/num_tok_total": 2935.0,
      "epoch": 3.232876712328767,
      "step": 236
    },
    {
      "epoch": 3.232876712328767,
      "step": 236,
      "train/ce_loss": 2.546875,
      "train/diffusion_loss": 0.2920040786266327
    },
    {
      "epoch": 3.232876712328767,
      "step": 236,
      "train/learning_rate_real": 1.977533442472047e-05
    },
    {
      "debug/num_lat_loss": 1652.0,
      "debug/num_lat_total": 2382.0,
      "debug/num_tok_loss": 1652.0,
      "debug/num_tok_total": 2382.0,
      "epoch": 3.232876712328767,
      "step": 236
    },
    {
      "epoch": 3.232876712328767,
      "step": 236,
      "train/ce_loss": 2.40625,
      "train/diffusion_loss": 0.392981618642807
    },
    {
      "epoch": 3.232876712328767,
      "step": 236,
      "train/learning_rate_real": 1.977533442472047e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2626.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2626.0,
      "epoch": 3.2465753424657535,
      "step": 237
    },
    {
      "epoch": 3.2465753424657535,
      "step": 237,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.42288437485694885
    },
    {
      "epoch": 3.2465753424657535,
      "step": 237,
      "train/learning_rate_real": 1.973015970495934e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2887.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2887.0,
      "epoch": 3.2465753424657535,
      "step": 237
    },
    {
      "epoch": 3.2465753424657535,
      "step": 237,
      "train/ce_loss": 2.28125,
      "train/diffusion_loss": 0.3476720452308655
    },
    {
      "epoch": 3.2465753424657535,
      "step": 237,
      "train/learning_rate_real": 1.973015970495934e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2652.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2652.0,
      "epoch": 3.2465753424657535,
      "step": 237
    },
    {
      "epoch": 3.2465753424657535,
      "step": 237,
      "train/ce_loss": 2.328125,
      "train/diffusion_loss": 0.4026311933994293
    },
    {
      "epoch": 3.2465753424657535,
      "step": 237,
      "train/learning_rate_real": 1.973015970495934e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2657.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2657.0,
      "epoch": 3.2465753424657535,
      "step": 237
    },
    {
      "epoch": 3.2465753424657535,
      "step": 237,
      "train/ce_loss": 2.265625,
      "train/diffusion_loss": 0.39144831895828247
    },
    {
      "epoch": 3.2465753424657535,
      "step": 237,
      "train/learning_rate_real": 1.973015970495934e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2621.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2621.0,
      "epoch": 3.26027397260274,
      "step": 238
    },
    {
      "epoch": 3.26027397260274,
      "step": 238,
      "train/ce_loss": 2.453125,
      "train/diffusion_loss": 0.3839724361896515
    },
    {
      "epoch": 3.26027397260274,
      "step": 238,
      "train/learning_rate_real": 1.9684842627680088e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 3045.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 3045.0,
      "epoch": 3.26027397260274,
      "step": 238
    },
    {
      "epoch": 3.26027397260274,
      "step": 238,
      "train/ce_loss": 2.328125,
      "train/diffusion_loss": 0.31859639286994934
    },
    {
      "epoch": 3.26027397260274,
      "step": 238,
      "train/learning_rate_real": 1.9684842627680088e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2437.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2437.0,
      "epoch": 3.26027397260274,
      "step": 238
    },
    {
      "epoch": 3.26027397260274,
      "step": 238,
      "train/ce_loss": 2.265625,
      "train/diffusion_loss": 0.4462120532989502
    },
    {
      "epoch": 3.26027397260274,
      "step": 238,
      "train/learning_rate_real": 1.9684842627680088e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2214.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2214.0,
      "epoch": 3.26027397260274,
      "step": 238
    },
    {
      "epoch": 3.26027397260274,
      "step": 238,
      "train/ce_loss": 2.21875,
      "train/diffusion_loss": 0.4857374131679535
    },
    {
      "epoch": 3.26027397260274,
      "step": 238,
      "train/learning_rate_real": 1.9684842627680088e-05
    },
    {
      "debug/num_lat_loss": 1612.0,
      "debug/num_lat_total": 2263.0,
      "debug/num_tok_loss": 1612.0,
      "debug/num_tok_total": 2263.0,
      "epoch": 3.2739726027397262,
      "step": 239
    },
    {
      "epoch": 3.2739726027397262,
      "step": 239,
      "train/ce_loss": 2.21875,
      "train/diffusion_loss": 0.41768062114715576
    },
    {
      "epoch": 3.2739726027397262,
      "step": 239,
      "train/learning_rate_real": 1.963938408514886e-05
    },
    {
      "debug/num_lat_loss": 1678.0,
      "debug/num_lat_total": 2321.0,
      "debug/num_tok_loss": 1678.0,
      "debug/num_tok_total": 2321.0,
      "epoch": 3.2739726027397262,
      "step": 239
    },
    {
      "epoch": 3.2739726027397262,
      "step": 239,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.4716084599494934
    },
    {
      "epoch": 3.2739726027397262,
      "step": 239,
      "train/learning_rate_real": 1.963938408514886e-05
    },
    {
      "debug/num_lat_loss": 1821.0,
      "debug/num_lat_total": 3149.0,
      "debug/num_tok_loss": 1821.0,
      "debug/num_tok_total": 3149.0,
      "epoch": 3.2739726027397262,
      "step": 239
    },
    {
      "epoch": 3.2739726027397262,
      "step": 239,
      "train/ce_loss": 2.3125,
      "train/diffusion_loss": 0.3284153938293457
    },
    {
      "epoch": 3.2739726027397262,
      "step": 239,
      "train/learning_rate_real": 1.963938408514886e-05
    },
    {
      "debug/num_lat_loss": 1808.0,
      "debug/num_lat_total": 2454.0,
      "debug/num_tok_loss": 1808.0,
      "debug/num_tok_total": 2454.0,
      "epoch": 3.2739726027397262,
      "step": 239
    },
    {
      "epoch": 3.2739726027397262,
      "step": 239,
      "train/ce_loss": 2.21875,
      "train/diffusion_loss": 0.45075204968452454
    },
    {
      "epoch": 3.2739726027397262,
      "step": 239,
      "train/learning_rate_real": 1.963938408514886e-05
    },
    {
      "epoch": 3.287671232876712,
      "grad_norm": 1.4538198709487915,
      "learning_rate": 1.963938408514886e-05,
      "loss": 2.699,
      "step": 240
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2900.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2900.0,
      "epoch": 3.287671232876712,
      "step": 240
    },
    {
      "epoch": 3.287671232876712,
      "step": 240,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.340977281332016
    },
    {
      "epoch": 3.287671232876712,
      "step": 240,
      "train/learning_rate_real": 1.959378497241715e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2838.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2838.0,
      "epoch": 3.287671232876712,
      "step": 240
    },
    {
      "epoch": 3.287671232876712,
      "step": 240,
      "train/ce_loss": 2.265625,
      "train/diffusion_loss": 0.38470348715782166
    },
    {
      "epoch": 3.287671232876712,
      "step": 240,
      "train/learning_rate_real": 1.959378497241715e-05
    },
    {
      "debug/num_lat_loss": 1760.0,
      "debug/num_lat_total": 2607.0,
      "debug/num_tok_loss": 1760.0,
      "debug/num_tok_total": 2607.0,
      "epoch": 3.287671232876712,
      "step": 240
    },
    {
      "epoch": 3.287671232876712,
      "step": 240,
      "train/ce_loss": 2.328125,
      "train/diffusion_loss": 0.3994775712490082
    },
    {
      "epoch": 3.287671232876712,
      "step": 240,
      "train/learning_rate_real": 1.959378497241715e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2628.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2628.0,
      "epoch": 3.287671232876712,
      "step": 240
    },
    {
      "epoch": 3.287671232876712,
      "step": 240,
      "train/ce_loss": 2.5625,
      "train/diffusion_loss": 0.40093958377838135
    },
    {
      "epoch": 3.287671232876712,
      "step": 240,
      "train/learning_rate_real": 1.959378497241715e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2614.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2614.0,
      "epoch": 3.3013698630136985,
      "step": 241
    },
    {
      "epoch": 3.3013698630136985,
      "step": 241,
      "train/ce_loss": 2.296875,
      "train/diffusion_loss": 0.4155711829662323
    },
    {
      "epoch": 3.3013698630136985,
      "step": 241,
      "train/learning_rate_real": 1.95480461873042e-05
    },
    {
      "debug/num_lat_loss": 1814.0,
      "debug/num_lat_total": 2688.0,
      "debug/num_tok_loss": 1814.0,
      "debug/num_tok_total": 2688.0,
      "epoch": 3.3013698630136985,
      "step": 241
    },
    {
      "epoch": 3.3013698630136985,
      "step": 241,
      "train/ce_loss": 2.328125,
      "train/diffusion_loss": 0.4016936421394348
    },
    {
      "epoch": 3.3013698630136985,
      "step": 241,
      "train/learning_rate_real": 1.95480461873042e-05
    },
    {
      "debug/num_lat_loss": 1707.0,
      "debug/num_lat_total": 2338.0,
      "debug/num_tok_loss": 1707.0,
      "debug/num_tok_total": 2338.0,
      "epoch": 3.3013698630136985,
      "step": 241
    },
    {
      "epoch": 3.3013698630136985,
      "step": 241,
      "train/ce_loss": 2.21875,
      "train/diffusion_loss": 0.4285438656806946
    },
    {
      "epoch": 3.3013698630136985,
      "step": 241,
      "train/learning_rate_real": 1.95480461873042e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2637.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2637.0,
      "epoch": 3.3013698630136985,
      "step": 241
    },
    {
      "epoch": 3.3013698630136985,
      "step": 241,
      "train/ce_loss": 2.296875,
      "train/diffusion_loss": 0.4184286594390869
    },
    {
      "epoch": 3.3013698630136985,
      "step": 241,
      "train/learning_rate_real": 1.95480461873042e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2450.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2450.0,
      "epoch": 3.315068493150685,
      "step": 242
    },
    {
      "epoch": 3.315068493150685,
      "step": 242,
      "train/ce_loss": 2.3125,
      "train/diffusion_loss": 0.46178358793258667
    },
    {
      "epoch": 3.315068493150685,
      "step": 242,
      "train/learning_rate_real": 1.9502168630379324e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 2459.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 2459.0,
      "epoch": 3.315068493150685,
      "step": 242
    },
    {
      "epoch": 3.315068493150685,
      "step": 242,
      "train/ce_loss": 2.203125,
      "train/diffusion_loss": 0.44902271032333374
    },
    {
      "epoch": 3.315068493150685,
      "step": 242,
      "train/learning_rate_real": 1.9502168630379324e-05
    },
    {
      "debug/num_lat_loss": 1701.0,
      "debug/num_lat_total": 2481.0,
      "debug/num_tok_loss": 1701.0,
      "debug/num_tok_total": 2481.0,
      "epoch": 3.315068493150685,
      "step": 242
    },
    {
      "epoch": 3.315068493150685,
      "step": 242,
      "train/ce_loss": 2.265625,
      "train/diffusion_loss": 0.43946367502212524
    },
    {
      "epoch": 3.315068493150685,
      "step": 242,
      "train/learning_rate_real": 1.9502168630379324e-05
    },
    {
      "debug/num_lat_loss": 1681.0,
      "debug/num_lat_total": 2530.0,
      "debug/num_tok_loss": 1681.0,
      "debug/num_tok_total": 2530.0,
      "epoch": 3.315068493150685,
      "step": 242
    },
    {
      "epoch": 3.315068493150685,
      "step": 242,
      "train/ce_loss": 2.328125,
      "train/diffusion_loss": 0.37286049127578735
    },
    {
      "epoch": 3.315068493150685,
      "step": 242,
      "train/learning_rate_real": 1.9502168630379324e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2653.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2653.0,
      "epoch": 3.328767123287671,
      "step": 243
    },
    {
      "epoch": 3.328767123287671,
      "step": 243,
      "train/ce_loss": 2.203125,
      "train/diffusion_loss": 0.3862497806549072
    },
    {
      "epoch": 3.328767123287671,
      "step": 243,
      "train/learning_rate_real": 1.9456153204944172e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2199.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2199.0,
      "epoch": 3.328767123287671,
      "step": 243
    },
    {
      "epoch": 3.328767123287671,
      "step": 243,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.4877125918865204
    },
    {
      "epoch": 3.328767123287671,
      "step": 243,
      "train/learning_rate_real": 1.9456153204944172e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 1988.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 1988.0,
      "epoch": 3.328767123287671,
      "step": 243
    },
    {
      "epoch": 3.328767123287671,
      "step": 243,
      "train/ce_loss": 2.234375,
      "train/diffusion_loss": 0.5290765166282654
    },
    {
      "epoch": 3.328767123287671,
      "step": 243,
      "train/learning_rate_real": 1.9456153204944172e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2226.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2226.0,
      "epoch": 3.328767123287671,
      "step": 243
    },
    {
      "epoch": 3.328767123287671,
      "step": 243,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.47163859009742737
    },
    {
      "epoch": 3.328767123287671,
      "step": 243,
      "train/learning_rate_real": 1.9456153204944172e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 3078.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 3078.0,
      "epoch": 3.3424657534246576,
      "step": 244
    },
    {
      "epoch": 3.3424657534246576,
      "step": 244,
      "train/ce_loss": 2.328125,
      "train/diffusion_loss": 0.31024348735809326
    },
    {
      "epoch": 3.3424657534246576,
      "step": 244,
      "train/learning_rate_real": 1.941000081701492e-05
    },
    {
      "debug/num_lat_loss": 1828.0,
      "debug/num_lat_total": 3372.0,
      "debug/num_tok_loss": 1828.0,
      "debug/num_tok_total": 3372.0,
      "epoch": 3.3424657534246576,
      "step": 244
    },
    {
      "epoch": 3.3424657534246576,
      "step": 244,
      "train/ce_loss": 2.28125,
      "train/diffusion_loss": 0.2503184974193573
    },
    {
      "epoch": 3.3424657534246576,
      "step": 244,
      "train/learning_rate_real": 1.941000081701492e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2654.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2654.0,
      "epoch": 3.3424657534246576,
      "step": 244
    },
    {
      "epoch": 3.3424657534246576,
      "step": 244,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.4020564556121826
    },
    {
      "epoch": 3.3424657534246576,
      "step": 244,
      "train/learning_rate_real": 1.941000081701492e-05
    },
    {
      "debug/num_lat_loss": 1761.0,
      "debug/num_lat_total": 2814.0,
      "debug/num_tok_loss": 1761.0,
      "debug/num_tok_total": 2814.0,
      "epoch": 3.3424657534246576,
      "step": 244
    },
    {
      "epoch": 3.3424657534246576,
      "step": 244,
      "train/ce_loss": 2.359375,
      "train/diffusion_loss": 0.35802125930786133
    },
    {
      "epoch": 3.3424657534246576,
      "step": 244,
      "train/learning_rate_real": 1.941000081701492e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2435.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2435.0,
      "epoch": 3.356164383561644,
      "step": 245
    },
    {
      "epoch": 3.356164383561644,
      "step": 245,
      "train/ce_loss": 2.265625,
      "train/diffusion_loss": 0.45023059844970703
    },
    {
      "epoch": 3.356164383561644,
      "step": 245,
      "train/learning_rate_real": 1.9363712375304478e-05
    },
    {
      "debug/num_lat_loss": 1655.0,
      "debug/num_lat_total": 2583.0,
      "debug/num_tok_loss": 1655.0,
      "debug/num_tok_total": 2583.0,
      "epoch": 3.356164383561644,
      "step": 245
    },
    {
      "epoch": 3.356164383561644,
      "step": 245,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.39521628618240356
    },
    {
      "epoch": 3.356164383561644,
      "step": 245,
      "train/learning_rate_real": 1.9363712375304478e-05
    },
    {
      "debug/num_lat_loss": 1610.0,
      "debug/num_lat_total": 2674.0,
      "debug/num_tok_loss": 1610.0,
      "debug/num_tok_total": 2674.0,
      "epoch": 3.356164383561644,
      "step": 245
    },
    {
      "epoch": 3.356164383561644,
      "step": 245,
      "train/ce_loss": 2.140625,
      "train/diffusion_loss": 0.3283180594444275
    },
    {
      "epoch": 3.356164383561644,
      "step": 245,
      "train/learning_rate_real": 1.9363712375304478e-05
    },
    {
      "debug/num_lat_loss": 1762.0,
      "debug/num_lat_total": 3026.0,
      "debug/num_tok_loss": 1762.0,
      "debug/num_tok_total": 3026.0,
      "epoch": 3.356164383561644,
      "step": 245
    },
    {
      "epoch": 3.356164383561644,
      "step": 245,
      "train/ce_loss": 2.28125,
      "train/diffusion_loss": 0.29643863439559937
    },
    {
      "epoch": 3.356164383561644,
      "step": 245,
      "train/learning_rate_real": 1.9363712375304478e-05
    },
    {
      "debug/num_lat_loss": 1623.0,
      "debug/num_lat_total": 1897.0,
      "debug/num_tok_loss": 1623.0,
      "debug/num_tok_total": 1897.0,
      "epoch": 3.3698630136986303,
      "step": 246
    },
    {
      "epoch": 3.3698630136986303,
      "step": 246,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.5394054055213928
    },
    {
      "epoch": 3.3698630136986303,
      "step": 246,
      "train/learning_rate_real": 1.931728879120456e-05
    },
    {
      "debug/num_lat_loss": 1760.0,
      "debug/num_lat_total": 2409.0,
      "debug/num_tok_loss": 1760.0,
      "debug/num_tok_total": 2409.0,
      "epoch": 3.3698630136986303,
      "step": 246
    },
    {
      "epoch": 3.3698630136986303,
      "step": 246,
      "train/ce_loss": 2.21875,
      "train/diffusion_loss": 0.4408913254737854
    },
    {
      "epoch": 3.3698630136986303,
      "step": 246,
      "train/learning_rate_real": 1.931728879120456e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2205.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2205.0,
      "epoch": 3.3698630136986303,
      "step": 246
    },
    {
      "epoch": 3.3698630136986303,
      "step": 246,
      "train/ce_loss": 2.140625,
      "train/diffusion_loss": 0.4855363368988037
    },
    {
      "epoch": 3.3698630136986303,
      "step": 246,
      "train/learning_rate_real": 1.931728879120456e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2417.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2417.0,
      "epoch": 3.3698630136986303,
      "step": 246
    },
    {
      "epoch": 3.3698630136986303,
      "step": 246,
      "train/ce_loss": 2.234375,
      "train/diffusion_loss": 0.4715399444103241
    },
    {
      "epoch": 3.3698630136986303,
      "step": 246,
      "train/learning_rate_real": 1.931728879120456e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 3078.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 3078.0,
      "epoch": 3.383561643835616,
      "step": 247
    },
    {
      "epoch": 3.383561643835616,
      "step": 247,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.3111913204193115
    },
    {
      "epoch": 3.383561643835616,
      "step": 247,
      "train/learning_rate_real": 1.9270730978767764e-05
    },
    {
      "debug/num_lat_loss": 1818.0,
      "debug/num_lat_total": 2691.0,
      "debug/num_tok_loss": 1818.0,
      "debug/num_tok_total": 2691.0,
      "epoch": 3.383561643835616,
      "step": 247
    },
    {
      "epoch": 3.383561643835616,
      "step": 247,
      "train/ce_loss": 2.21875,
      "train/diffusion_loss": 0.3947170674800873
    },
    {
      "epoch": 3.383561643835616,
      "step": 247,
      "train/learning_rate_real": 1.9270730978767764e-05
    },
    {
      "debug/num_lat_loss": 1759.0,
      "debug/num_lat_total": 1974.0,
      "debug/num_tok_loss": 1759.0,
      "debug/num_tok_total": 1974.0,
      "epoch": 3.383561643835616,
      "step": 247
    },
    {
      "epoch": 3.383561643835616,
      "step": 247,
      "train/ce_loss": 2.234375,
      "train/diffusion_loss": 0.5423211455345154
    },
    {
      "epoch": 3.383561643835616,
      "step": 247,
      "train/learning_rate_real": 1.9270730978767764e-05
    },
    {
      "debug/num_lat_loss": 1811.0,
      "debug/num_lat_total": 2899.0,
      "debug/num_tok_loss": 1811.0,
      "debug/num_tok_total": 2899.0,
      "epoch": 3.383561643835616,
      "step": 247
    },
    {
      "epoch": 3.383561643835616,
      "step": 247,
      "train/ce_loss": 2.234375,
      "train/diffusion_loss": 0.4050007164478302
    },
    {
      "epoch": 3.383561643835616,
      "step": 247,
      "train/learning_rate_real": 1.9270730978767764e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2637.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2637.0,
      "epoch": 3.3972602739726026,
      "step": 248
    },
    {
      "epoch": 3.3972602739726026,
      "step": 248,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.4002581834793091
    },
    {
      "epoch": 3.3972602739726026,
      "step": 248,
      "train/learning_rate_real": 1.922403985468955e-05
    },
    {
      "debug/num_lat_loss": 1755.0,
      "debug/num_lat_total": 2821.0,
      "debug/num_tok_loss": 1755.0,
      "debug/num_tok_total": 2821.0,
      "epoch": 3.3972602739726026,
      "step": 248
    },
    {
      "epoch": 3.3972602739726026,
      "step": 248,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.3785499334335327
    },
    {
      "epoch": 3.3972602739726026,
      "step": 248,
      "train/learning_rate_real": 1.922403985468955e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 3077.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 3077.0,
      "epoch": 3.3972602739726026,
      "step": 248
    },
    {
      "epoch": 3.3972602739726026,
      "step": 248,
      "train/ce_loss": 2.21875,
      "train/diffusion_loss": 0.33860430121421814
    },
    {
      "epoch": 3.3972602739726026,
      "step": 248,
      "train/learning_rate_real": 1.922403985468955e-05
    },
    {
      "debug/num_lat_loss": 1611.0,
      "debug/num_lat_total": 2688.0,
      "debug/num_tok_loss": 1611.0,
      "debug/num_tok_total": 2688.0,
      "epoch": 3.3972602739726026,
      "step": 248
    },
    {
      "epoch": 3.3972602739726026,
      "step": 248,
      "train/ce_loss": 2.203125,
      "train/diffusion_loss": 0.33459946513175964
    },
    {
      "epoch": 3.3972602739726026,
      "step": 248,
      "train/learning_rate_real": 1.922403985468955e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 3101.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 3101.0,
      "epoch": 3.410958904109589,
      "step": 249
    },
    {
      "epoch": 3.410958904109589,
      "step": 249,
      "train/ce_loss": 2.265625,
      "train/diffusion_loss": 0.33131060004234314
    },
    {
      "epoch": 3.410958904109589,
      "step": 249,
      "train/learning_rate_real": 1.9177216338290214e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2604.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2604.0,
      "epoch": 3.410958904109589,
      "step": 249
    },
    {
      "epoch": 3.410958904109589,
      "step": 249,
      "train/ce_loss": 2.140625,
      "train/diffusion_loss": 0.4083004295825958
    },
    {
      "epoch": 3.410958904109589,
      "step": 249,
      "train/learning_rate_real": 1.9177216338290214e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 2679.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 2679.0,
      "epoch": 3.410958904109589,
      "step": 249
    },
    {
      "epoch": 3.410958904109589,
      "step": 249,
      "train/ce_loss": 2.265625,
      "train/diffusion_loss": 0.3885483145713806
    },
    {
      "epoch": 3.410958904109589,
      "step": 249,
      "train/learning_rate_real": 1.9177216338290214e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2629.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2629.0,
      "epoch": 3.410958904109589,
      "step": 249
    },
    {
      "epoch": 3.410958904109589,
      "step": 249,
      "train/ce_loss": 2.203125,
      "train/diffusion_loss": 0.4085504710674286
    },
    {
      "epoch": 3.410958904109589,
      "step": 249,
      "train/learning_rate_real": 1.9177216338290214e-05
    },
    {
      "epoch": 3.4246575342465753,
      "grad_norm": 1.3147482872009277,
      "learning_rate": 1.9177216338290214e-05,
      "loss": 2.6215,
      "step": 250
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2662.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2662.0,
      "epoch": 3.4246575342465753,
      "step": 250
    },
    {
      "epoch": 3.4246575342465753,
      "step": 250,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.3955235183238983
    },
    {
      "epoch": 3.4246575342465753,
      "step": 250,
      "train/learning_rate_real": 1.913026135149678e-05
    },
    {
      "debug/num_lat_loss": 1649.0,
      "debug/num_lat_total": 2162.0,
      "debug/num_tok_loss": 1649.0,
      "debug/num_tok_total": 2162.0,
      "epoch": 3.4246575342465753,
      "step": 250
    },
    {
      "epoch": 3.4246575342465753,
      "step": 250,
      "train/ce_loss": 2.234375,
      "train/diffusion_loss": 0.4867064654827118
    },
    {
      "epoch": 3.4246575342465753,
      "step": 250,
      "train/learning_rate_real": 1.913026135149678e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2884.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2884.0,
      "epoch": 3.4246575342465753,
      "step": 250
    },
    {
      "epoch": 3.4246575342465753,
      "step": 250,
      "train/ce_loss": 2.328125,
      "train/diffusion_loss": 0.36458444595336914
    },
    {
      "epoch": 3.4246575342465753,
      "step": 250,
      "train/learning_rate_real": 1.913026135149678e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2216.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2216.0,
      "epoch": 3.4246575342465753,
      "step": 250
    },
    {
      "epoch": 3.4246575342465753,
      "step": 250,
      "train/ce_loss": 2.359375,
      "train/diffusion_loss": 0.5149307250976562
    },
    {
      "epoch": 3.4246575342465753,
      "step": 250,
      "train/learning_rate_real": 1.913026135149678e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2664.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2664.0,
      "epoch": 3.4383561643835616,
      "step": 251
    },
    {
      "epoch": 3.4383561643835616,
      "step": 251,
      "train/ce_loss": 2.21875,
      "train/diffusion_loss": 0.4316481351852417
    },
    {
      "epoch": 3.4383561643835616,
      "step": 251,
      "train/learning_rate_real": 1.9083175818824838e-05
    },
    {
      "debug/num_lat_loss": 1748.0,
      "debug/num_lat_total": 2607.0,
      "debug/num_tok_loss": 1748.0,
      "debug/num_tok_total": 2607.0,
      "epoch": 3.4383561643835616,
      "step": 251
    },
    {
      "epoch": 3.4383561643835616,
      "step": 251,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.4034005403518677
    },
    {
      "epoch": 3.4383561643835616,
      "step": 251,
      "train/learning_rate_real": 1.9083175818824838e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2887.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2887.0,
      "epoch": 3.4383561643835616,
      "step": 251
    },
    {
      "epoch": 3.4383561643835616,
      "step": 251,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.35007497668266296
    },
    {
      "epoch": 3.4383561643835616,
      "step": 251,
      "train/learning_rate_real": 1.9083175818824838e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2418.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2418.0,
      "epoch": 3.4383561643835616,
      "step": 251
    },
    {
      "epoch": 3.4383561643835616,
      "step": 251,
      "train/ce_loss": 2.15625,
      "train/diffusion_loss": 0.4340359568595886
    },
    {
      "epoch": 3.4383561643835616,
      "step": 251,
      "train/learning_rate_real": 1.9083175818824838e-05
    },
    {
      "debug/num_lat_loss": 1707.0,
      "debug/num_lat_total": 2995.0,
      "debug/num_tok_loss": 1707.0,
      "debug/num_tok_total": 2995.0,
      "epoch": 3.452054794520548,
      "step": 252
    },
    {
      "epoch": 3.452054794520548,
      "step": 252,
      "train/ce_loss": 2.34375,
      "train/diffusion_loss": 0.2904086709022522
    },
    {
      "epoch": 3.452054794520548,
      "step": 252,
      "train/learning_rate_real": 1.9035960667360348e-05
    },
    {
      "debug/num_lat_loss": 1664.0,
      "debug/num_lat_total": 2293.0,
      "debug/num_tok_loss": 1664.0,
      "debug/num_tok_total": 2293.0,
      "epoch": 3.452054794520548,
      "step": 252
    },
    {
      "epoch": 3.452054794520548,
      "step": 252,
      "train/ce_loss": 2.546875,
      "train/diffusion_loss": 0.4232002794742584
    },
    {
      "epoch": 3.452054794520548,
      "step": 252,
      "train/learning_rate_real": 1.9035960667360348e-05
    },
    {
      "debug/num_lat_loss": 1761.0,
      "debug/num_lat_total": 2390.0,
      "debug/num_tok_loss": 1761.0,
      "debug/num_tok_total": 2390.0,
      "epoch": 3.452054794520548,
      "step": 252
    },
    {
      "epoch": 3.452054794520548,
      "step": 252,
      "train/ce_loss": 2.140625,
      "train/diffusion_loss": 0.4461905360221863
    },
    {
      "epoch": 3.452054794520548,
      "step": 252,
      "train/learning_rate_real": 1.9035960667360348e-05
    },
    {
      "debug/num_lat_loss": 1717.0,
      "debug/num_lat_total": 2956.0,
      "debug/num_tok_loss": 1717.0,
      "debug/num_tok_total": 2956.0,
      "epoch": 3.452054794520548,
      "step": 252
    },
    {
      "epoch": 3.452054794520548,
      "step": 252,
      "train/ce_loss": 2.171875,
      "train/diffusion_loss": 0.32390090823173523
    },
    {
      "epoch": 3.452054794520548,
      "step": 252,
      "train/learning_rate_real": 1.9035960667360348e-05
    },
    {
      "debug/num_lat_loss": 1600.0,
      "debug/num_lat_total": 2489.0,
      "debug/num_tok_loss": 1600.0,
      "debug/num_tok_total": 2489.0,
      "epoch": 3.4657534246575343,
      "step": 253
    },
    {
      "epoch": 3.4657534246575343,
      "step": 253,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.37611258029937744
    },
    {
      "epoch": 3.4657534246575343,
      "step": 253,
      "train/learning_rate_real": 1.8988616826741386e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2665.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2665.0,
      "epoch": 3.4657534246575343,
      "step": 253
    },
    {
      "epoch": 3.4657534246575343,
      "step": 253,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.39329659938812256
    },
    {
      "epoch": 3.4657534246575343,
      "step": 253,
      "train/learning_rate_real": 1.8988616826741386e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2415.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2415.0,
      "epoch": 3.4657534246575343,
      "step": 253
    },
    {
      "epoch": 3.4657534246575343,
      "step": 253,
      "train/ce_loss": 2.3125,
      "train/diffusion_loss": 0.43804121017456055
    },
    {
      "epoch": 3.4657534246575343,
      "step": 253,
      "train/learning_rate_real": 1.8988616826741386e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2610.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2610.0,
      "epoch": 3.4657534246575343,
      "step": 253
    },
    {
      "epoch": 3.4657534246575343,
      "step": 253,
      "train/ce_loss": 2.40625,
      "train/diffusion_loss": 0.4059012830257416
    },
    {
      "epoch": 3.4657534246575343,
      "step": 253,
      "train/learning_rate_real": 1.8988616826741386e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 3099.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 3099.0,
      "epoch": 3.4794520547945207,
      "step": 254
    },
    {
      "epoch": 3.4794520547945207,
      "step": 254,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.30580952763557434
    },
    {
      "epoch": 3.4794520547945207,
      "step": 254,
      "train/learning_rate_real": 1.894114522913985e-05
    },
    {
      "debug/num_lat_loss": 1745.0,
      "debug/num_lat_total": 3007.0,
      "debug/num_tok_loss": 1745.0,
      "debug/num_tok_total": 3007.0,
      "epoch": 3.4794520547945207,
      "step": 254
    },
    {
      "epoch": 3.4794520547945207,
      "step": 254,
      "train/ce_loss": 2.15625,
      "train/diffusion_loss": 0.30429425835609436
    },
    {
      "epoch": 3.4794520547945207,
      "step": 254,
      "train/learning_rate_real": 1.894114522913985e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2658.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2658.0,
      "epoch": 3.4794520547945207,
      "step": 254
    },
    {
      "epoch": 3.4794520547945207,
      "step": 254,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.3724602460861206
    },
    {
      "epoch": 3.4794520547945207,
      "step": 254,
      "train/learning_rate_real": 1.894114522913985e-05
    },
    {
      "debug/num_lat_loss": 1659.0,
      "debug/num_lat_total": 2506.0,
      "debug/num_tok_loss": 1659.0,
      "debug/num_tok_total": 2506.0,
      "epoch": 3.4794520547945207,
      "step": 254
    },
    {
      "epoch": 3.4794520547945207,
      "step": 254,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.3866651952266693
    },
    {
      "epoch": 3.4794520547945207,
      "step": 254,
      "train/learning_rate_real": 1.894114522913985e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 3073.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 3073.0,
      "epoch": 3.493150684931507,
      "step": 255
    },
    {
      "epoch": 3.493150684931507,
      "step": 255,
      "train/ce_loss": 2.28125,
      "train/diffusion_loss": 0.3223687708377838
    },
    {
      "epoch": 3.493150684931507,
      "step": 255,
      "train/learning_rate_real": 1.8893546809243084e-05
    },
    {
      "debug/num_lat_loss": 1517.0,
      "debug/num_lat_total": 2290.0,
      "debug/num_tok_loss": 1517.0,
      "debug/num_tok_total": 2290.0,
      "epoch": 3.493150684931507,
      "step": 255
    },
    {
      "epoch": 3.493150684931507,
      "step": 255,
      "train/ce_loss": 2.15625,
      "train/diffusion_loss": 0.40321141481399536
    },
    {
      "epoch": 3.493150684931507,
      "step": 255,
      "train/learning_rate_real": 1.8893546809243084e-05
    },
    {
      "debug/num_lat_loss": 1756.0,
      "debug/num_lat_total": 2819.0,
      "debug/num_tok_loss": 1756.0,
      "debug/num_tok_total": 2819.0,
      "epoch": 3.493150684931507,
      "step": 255
    },
    {
      "epoch": 3.493150684931507,
      "step": 255,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.37000900506973267
    },
    {
      "epoch": 3.493150684931507,
      "step": 255,
      "train/learning_rate_real": 1.8893546809243084e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2637.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2637.0,
      "epoch": 3.493150684931507,
      "step": 255
    },
    {
      "epoch": 3.493150684931507,
      "step": 255,
      "train/ce_loss": 2.171875,
      "train/diffusion_loss": 0.44469863176345825
    },
    {
      "epoch": 3.493150684931507,
      "step": 255,
      "train/learning_rate_real": 1.8893546809243084e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2870.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2870.0,
      "epoch": 3.506849315068493,
      "step": 256
    },
    {
      "epoch": 3.506849315068493,
      "step": 256,
      "train/ce_loss": 2.15625,
      "train/diffusion_loss": 0.3512857258319855
    },
    {
      "epoch": 3.506849315068493,
      "step": 256,
      "train/learning_rate_real": 1.88458225042355e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2643.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2643.0,
      "epoch": 3.506849315068493,
      "step": 256
    },
    {
      "epoch": 3.506849315068493,
      "step": 256,
      "train/ce_loss": 2.453125,
      "train/diffusion_loss": 0.4314759075641632
    },
    {
      "epoch": 3.506849315068493,
      "step": 256,
      "train/learning_rate_real": 1.88458225042355e-05
    },
    {
      "debug/num_lat_loss": 1616.0,
      "debug/num_lat_total": 2746.0,
      "debug/num_tok_loss": 1616.0,
      "debug/num_tok_total": 2746.0,
      "epoch": 3.506849315068493,
      "step": 256
    },
    {
      "epoch": 3.506849315068493,
      "step": 256,
      "train/ce_loss": 2.203125,
      "train/diffusion_loss": 0.3479451537132263
    },
    {
      "epoch": 3.506849315068493,
      "step": 256,
      "train/learning_rate_real": 1.88458225042355e-05
    },
    {
      "debug/num_lat_loss": 1814.0,
      "debug/num_lat_total": 2696.0,
      "debug/num_tok_loss": 1814.0,
      "debug/num_tok_total": 2696.0,
      "epoch": 3.506849315068493,
      "step": 256
    },
    {
      "epoch": 3.506849315068493,
      "step": 256,
      "train/ce_loss": 2.15625,
      "train/diffusion_loss": 0.43738123774528503
    },
    {
      "epoch": 3.506849315068493,
      "step": 256,
      "train/learning_rate_real": 1.88458225042355e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2861.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2861.0,
      "epoch": 3.5205479452054793,
      "step": 257
    },
    {
      "epoch": 3.5205479452054793,
      "step": 257,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.3625929653644562
    },
    {
      "epoch": 3.5205479452054793,
      "step": 257,
      "train/learning_rate_real": 1.8797973253780105e-05
    },
    {
      "debug/num_lat_loss": 1685.0,
      "debug/num_lat_total": 2453.0,
      "debug/num_tok_loss": 1685.0,
      "debug/num_tok_total": 2453.0,
      "epoch": 3.5205479452054793,
      "step": 257
    },
    {
      "epoch": 3.5205479452054793,
      "step": 257,
      "train/ce_loss": 2.15625,
      "train/diffusion_loss": 0.409828782081604
    },
    {
      "epoch": 3.5205479452054793,
      "step": 257,
      "train/learning_rate_real": 1.8797973253780105e-05
    },
    {
      "debug/num_lat_loss": 1641.0,
      "debug/num_lat_total": 2574.0,
      "debug/num_tok_loss": 1641.0,
      "debug/num_tok_total": 2574.0,
      "epoch": 3.5205479452054793,
      "step": 257
    },
    {
      "epoch": 3.5205479452054793,
      "step": 257,
      "train/ce_loss": 2.171875,
      "train/diffusion_loss": 0.3821832537651062
    },
    {
      "epoch": 3.5205479452054793,
      "step": 257,
      "train/learning_rate_real": 1.8797973253780105e-05
    },
    {
      "debug/num_lat_loss": 1612.0,
      "debug/num_lat_total": 2733.0,
      "debug/num_tok_loss": 1612.0,
      "debug/num_tok_total": 2733.0,
      "epoch": 3.5205479452054793,
      "step": 257
    },
    {
      "epoch": 3.5205479452054793,
      "step": 257,
      "train/ce_loss": 2.171875,
      "train/diffusion_loss": 0.3593869209289551
    },
    {
      "epoch": 3.5205479452054793,
      "step": 257,
      "train/learning_rate_real": 1.8797973253780105e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2408.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2408.0,
      "epoch": 3.5342465753424657,
      "step": 258
    },
    {
      "epoch": 3.5342465753424657,
      "step": 258,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.4660320281982422
    },
    {
      "epoch": 3.5342465753424657,
      "step": 258,
      "train/learning_rate_real": 1.8750000000000002e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 3292.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 3292.0,
      "epoch": 3.5342465753424657,
      "step": 258
    },
    {
      "epoch": 3.5342465753424657,
      "step": 258,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.3105265200138092
    },
    {
      "epoch": 3.5342465753424657,
      "step": 258,
      "train/learning_rate_real": 1.8750000000000002e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2452.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2452.0,
      "epoch": 3.5342465753424657,
      "step": 258
    },
    {
      "epoch": 3.5342465753424657,
      "step": 258,
      "train/ce_loss": 2.171875,
      "train/diffusion_loss": 0.41989848017692566
    },
    {
      "epoch": 3.5342465753424657,
      "step": 258,
      "train/learning_rate_real": 1.8750000000000002e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2661.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2661.0,
      "epoch": 3.5342465753424657,
      "step": 258
    },
    {
      "epoch": 3.5342465753424657,
      "step": 258,
      "train/ce_loss": 2.171875,
      "train/diffusion_loss": 0.4183150827884674
    },
    {
      "epoch": 3.5342465753424657,
      "step": 258,
      "train/learning_rate_real": 1.8750000000000002e-05
    },
    {
      "debug/num_lat_loss": 1706.0,
      "debug/num_lat_total": 2475.0,
      "debug/num_tok_loss": 1706.0,
      "debug/num_tok_total": 2475.0,
      "epoch": 3.547945205479452,
      "step": 259
    },
    {
      "epoch": 3.547945205479452,
      "step": 259,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.4180438816547394
    },
    {
      "epoch": 3.547945205479452,
      "step": 259,
      "train/learning_rate_real": 1.8701903687459858e-05
    },
    {
      "debug/num_lat_loss": 1758.0,
      "debug/num_lat_total": 3245.0,
      "debug/num_tok_loss": 1758.0,
      "debug/num_tok_total": 3245.0,
      "epoch": 3.547945205479452,
      "step": 259
    },
    {
      "epoch": 3.547945205479452,
      "step": 259,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.2874610424041748
    },
    {
      "epoch": 3.547945205479452,
      "step": 259,
      "train/learning_rate_real": 1.8701903687459858e-05
    },
    {
      "debug/num_lat_loss": 1731.0,
      "debug/num_lat_total": 2750.0,
      "debug/num_tok_loss": 1731.0,
      "debug/num_tok_total": 2750.0,
      "epoch": 3.547945205479452,
      "step": 259
    },
    {
      "epoch": 3.547945205479452,
      "step": 259,
      "train/ce_loss": 2.40625,
      "train/diffusion_loss": 0.3924647271633148
    },
    {
      "epoch": 3.547945205479452,
      "step": 259,
      "train/learning_rate_real": 1.8701903687459858e-05
    },
    {
      "debug/num_lat_loss": 1752.0,
      "debug/num_lat_total": 2381.0,
      "debug/num_tok_loss": 1752.0,
      "debug/num_tok_total": 2381.0,
      "epoch": 3.547945205479452,
      "step": 259
    },
    {
      "epoch": 3.547945205479452,
      "step": 259,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.4388349950313568
    },
    {
      "epoch": 3.547945205479452,
      "step": 259,
      "train/learning_rate_real": 1.8701903687459858e-05
    },
    {
      "epoch": 3.5616438356164384,
      "grad_norm": 1.548999309539795,
      "learning_rate": 1.8701903687459858e-05,
      "loss": 2.5412,
      "step": 260
    },
    {
      "debug/num_lat_loss": 1756.0,
      "debug/num_lat_total": 2591.0,
      "debug/num_tok_loss": 1756.0,
      "debug/num_tok_total": 2591.0,
      "epoch": 3.5616438356164384,
      "step": 260
    },
    {
      "epoch": 3.5616438356164384,
      "step": 260,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.4313473403453827
    },
    {
      "epoch": 3.5616438356164384,
      "step": 260,
      "train/learning_rate_real": 1.8653685263147278e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 3259.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 3259.0,
      "epoch": 3.5616438356164384,
      "step": 260
    },
    {
      "epoch": 3.5616438356164384,
      "step": 260,
      "train/ce_loss": 2.15625,
      "train/diffusion_loss": 0.26685136556625366
    },
    {
      "epoch": 3.5616438356164384,
      "step": 260,
      "train/learning_rate_real": 1.8653685263147278e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2464.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2464.0,
      "epoch": 3.5616438356164384,
      "step": 260
    },
    {
      "epoch": 3.5616438356164384,
      "step": 260,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.4555809795856476
    },
    {
      "epoch": 3.5616438356164384,
      "step": 260,
      "train/learning_rate_real": 1.8653685263147278e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2659.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2659.0,
      "epoch": 3.5616438356164384,
      "step": 260
    },
    {
      "epoch": 3.5616438356164384,
      "step": 260,
      "train/ce_loss": 2.28125,
      "train/diffusion_loss": 0.4081794023513794
    },
    {
      "epoch": 3.5616438356164384,
      "step": 260,
      "train/learning_rate_real": 1.8653685263147278e-05
    },
    {
      "debug/num_lat_loss": 1617.0,
      "debug/num_lat_total": 1826.0,
      "debug/num_tok_loss": 1617.0,
      "debug/num_tok_total": 1826.0,
      "epoch": 3.5753424657534247,
      "step": 261
    },
    {
      "epoch": 3.5753424657534247,
      "step": 261,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.558996856212616
    },
    {
      "epoch": 3.5753424657534247,
      "step": 261,
      "train/learning_rate_real": 1.860534567645419e-05
    },
    {
      "debug/num_lat_loss": 1759.0,
      "debug/num_lat_total": 2823.0,
      "debug/num_tok_loss": 1759.0,
      "debug/num_tok_total": 2823.0,
      "epoch": 3.5753424657534247,
      "step": 261
    },
    {
      "epoch": 3.5753424657534247,
      "step": 261,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.36656317114830017
    },
    {
      "epoch": 3.5753424657534247,
      "step": 261,
      "train/learning_rate_real": 1.860534567645419e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2679.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2679.0,
      "epoch": 3.5753424657534247,
      "step": 261
    },
    {
      "epoch": 3.5753424657534247,
      "step": 261,
      "train/ce_loss": 2.625,
      "train/diffusion_loss": 0.4010484516620636
    },
    {
      "epoch": 3.5753424657534247,
      "step": 261,
      "train/learning_rate_real": 1.860534567645419e-05
    },
    {
      "debug/num_lat_loss": 1717.0,
      "debug/num_lat_total": 2360.0,
      "debug/num_tok_loss": 1717.0,
      "debug/num_tok_total": 2360.0,
      "epoch": 3.5753424657534247,
      "step": 261
    },
    {
      "epoch": 3.5753424657534247,
      "step": 261,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.4341590404510498
    },
    {
      "epoch": 3.5753424657534247,
      "step": 261,
      "train/learning_rate_real": 1.860534567645419e-05
    },
    {
      "debug/num_lat_loss": 1671.0,
      "debug/num_lat_total": 2086.0,
      "debug/num_tok_loss": 1671.0,
      "debug/num_tok_total": 2086.0,
      "epoch": 3.589041095890411,
      "step": 262
    },
    {
      "epoch": 3.589041095890411,
      "step": 262,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.5023366808891296
    },
    {
      "epoch": 3.589041095890411,
      "step": 262,
      "train/learning_rate_real": 1.855688587915813e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2659.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2659.0,
      "epoch": 3.589041095890411,
      "step": 262
    },
    {
      "epoch": 3.589041095890411,
      "step": 262,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.3760988116264343
    },
    {
      "epoch": 3.589041095890411,
      "step": 262,
      "train/learning_rate_real": 1.855688587915813e-05
    },
    {
      "debug/num_lat_loss": 1754.0,
      "debug/num_lat_total": 3033.0,
      "debug/num_tok_loss": 1754.0,
      "debug/num_tok_total": 3033.0,
      "epoch": 3.589041095890411,
      "step": 262
    },
    {
      "epoch": 3.589041095890411,
      "step": 262,
      "train/ce_loss": 2.03125,
      "train/diffusion_loss": 0.3116809129714966
    },
    {
      "epoch": 3.589041095890411,
      "step": 262,
      "train/learning_rate_real": 1.855688587915813e-05
    },
    {
      "debug/num_lat_loss": 1649.0,
      "debug/num_lat_total": 2578.0,
      "debug/num_tok_loss": 1649.0,
      "debug/num_tok_total": 2578.0,
      "epoch": 3.589041095890411,
      "step": 262
    },
    {
      "epoch": 3.589041095890411,
      "step": 262,
      "train/ce_loss": 2.171875,
      "train/diffusion_loss": 0.388486385345459
    },
    {
      "epoch": 3.589041095890411,
      "step": 262,
      "train/learning_rate_real": 1.855688587915813e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2426.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2426.0,
      "epoch": 3.602739726027397,
      "step": 263
    },
    {
      "epoch": 3.602739726027397,
      "step": 263,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.447339802980423
    },
    {
      "epoch": 3.602739726027397,
      "step": 263,
      "train/learning_rate_real": 1.8508306825403504e-05
    },
    {
      "debug/num_lat_loss": 1590.0,
      "debug/num_lat_total": 2696.0,
      "debug/num_tok_loss": 1590.0,
      "debug/num_tok_total": 2696.0,
      "epoch": 3.602739726027397,
      "step": 263
    },
    {
      "epoch": 3.602739726027397,
      "step": 263,
      "train/ce_loss": 2.203125,
      "train/diffusion_loss": 0.3028818666934967
    },
    {
      "epoch": 3.602739726027397,
      "step": 263,
      "train/learning_rate_real": 1.8508306825403504e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2880.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2880.0,
      "epoch": 3.602739726027397,
      "step": 263
    },
    {
      "epoch": 3.602739726027397,
      "step": 263,
      "train/ce_loss": 2.3125,
      "train/diffusion_loss": 0.3490007519721985
    },
    {
      "epoch": 3.602739726027397,
      "step": 263,
      "train/learning_rate_real": 1.8508306825403504e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2440.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2440.0,
      "epoch": 3.602739726027397,
      "step": 263
    },
    {
      "epoch": 3.602739726027397,
      "step": 263,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.45419830083847046
    },
    {
      "epoch": 3.602739726027397,
      "step": 263,
      "train/learning_rate_real": 1.8508306825403504e-05
    },
    {
      "debug/num_lat_loss": 1754.0,
      "debug/num_lat_total": 2809.0,
      "debug/num_tok_loss": 1754.0,
      "debug/num_tok_total": 2809.0,
      "epoch": 3.616438356164384,
      "step": 264
    },
    {
      "epoch": 3.616438356164384,
      "step": 264,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.35602056980133057
    },
    {
      "epoch": 3.616438356164384,
      "step": 264,
      "train/learning_rate_real": 1.8459609471682816e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 3066.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 3066.0,
      "epoch": 3.616438356164384,
      "step": 264
    },
    {
      "epoch": 3.616438356164384,
      "step": 264,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.3372049629688263
    },
    {
      "epoch": 3.616438356164384,
      "step": 264,
      "train/learning_rate_real": 1.8459609471682816e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 3320.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 3320.0,
      "epoch": 3.616438356164384,
      "step": 264
    },
    {
      "epoch": 3.616438356164384,
      "step": 264,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.254958838224411
    },
    {
      "epoch": 3.616438356164384,
      "step": 264,
      "train/learning_rate_real": 1.8459609471682816e-05
    },
    {
      "debug/num_lat_loss": 1811.0,
      "debug/num_lat_total": 3562.0,
      "debug/num_tok_loss": 1811.0,
      "debug/num_tok_total": 3562.0,
      "epoch": 3.616438356164384,
      "step": 264
    },
    {
      "epoch": 3.616438356164384,
      "step": 264,
      "train/ce_loss": 2.203125,
      "train/diffusion_loss": 0.2555168569087982
    },
    {
      "epoch": 3.616438356164384,
      "step": 264,
      "train/learning_rate_real": 1.8459609471682816e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2649.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2649.0,
      "epoch": 3.6301369863013697,
      "step": 265
    },
    {
      "epoch": 3.6301369863013697,
      "step": 265,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.3796527087688446
    },
    {
      "epoch": 3.6301369863013697,
      "step": 265,
      "train/learning_rate_real": 1.841079477681782e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2466.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2466.0,
      "epoch": 3.6301369863013697,
      "step": 265
    },
    {
      "epoch": 3.6301369863013697,
      "step": 265,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.43377572298049927
    },
    {
      "epoch": 3.6301369863013697,
      "step": 265,
      "train/learning_rate_real": 1.841079477681782e-05
    },
    {
      "debug/num_lat_loss": 1757.0,
      "debug/num_lat_total": 2607.0,
      "debug/num_tok_loss": 1757.0,
      "debug/num_tok_total": 2607.0,
      "epoch": 3.6301369863013697,
      "step": 265
    },
    {
      "epoch": 3.6301369863013697,
      "step": 265,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.3975103199481964
    },
    {
      "epoch": 3.6301369863013697,
      "step": 265,
      "train/learning_rate_real": 1.841079477681782e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2435.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2435.0,
      "epoch": 3.6301369863013697,
      "step": 265
    },
    {
      "epoch": 3.6301369863013697,
      "step": 265,
      "train/ce_loss": 2.265625,
      "train/diffusion_loss": 0.46887922286987305
    },
    {
      "epoch": 3.6301369863013697,
      "step": 265,
      "train/learning_rate_real": 1.841079477681782e-05
    },
    {
      "debug/num_lat_loss": 1611.0,
      "debug/num_lat_total": 1820.0,
      "debug/num_tok_loss": 1611.0,
      "debug/num_tok_total": 1820.0,
      "epoch": 3.643835616438356,
      "step": 266
    },
    {
      "epoch": 3.643835616438356,
      "step": 266,
      "train/ce_loss": 2.3125,
      "train/diffusion_loss": 0.5174155235290527
    },
    {
      "epoch": 3.643835616438356,
      "step": 266,
      "train/learning_rate_real": 1.8361863701940642e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2195.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2195.0,
      "epoch": 3.643835616438356,
      "step": 266
    },
    {
      "epoch": 3.643835616438356,
      "step": 266,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.49644601345062256
    },
    {
      "epoch": 3.643835616438356,
      "step": 266,
      "train/learning_rate_real": 1.8361863701940642e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2215.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2215.0,
      "epoch": 3.643835616438356,
      "step": 266
    },
    {
      "epoch": 3.643835616438356,
      "step": 266,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.4839715361595154
    },
    {
      "epoch": 3.643835616438356,
      "step": 266,
      "train/learning_rate_real": 1.8361863701940642e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2000.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2000.0,
      "epoch": 3.643835616438356,
      "step": 266
    },
    {
      "epoch": 3.643835616438356,
      "step": 266,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.538507878780365
    },
    {
      "epoch": 3.643835616438356,
      "step": 266,
      "train/learning_rate_real": 1.8361863701940642e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2395.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2395.0,
      "epoch": 3.6575342465753424,
      "step": 267
    },
    {
      "epoch": 3.6575342465753424,
      "step": 267,
      "train/ce_loss": 1.9921875,
      "train/diffusion_loss": 0.4636920392513275
    },
    {
      "epoch": 3.6575342465753424,
      "step": 267,
      "train/learning_rate_real": 1.831281721047487e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 3290.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 3290.0,
      "epoch": 3.6575342465753424,
      "step": 267
    },
    {
      "epoch": 3.6575342465753424,
      "step": 267,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.2822487950325012
    },
    {
      "epoch": 3.6575342465753424,
      "step": 267,
      "train/learning_rate_real": 1.831281721047487e-05
    },
    {
      "debug/num_lat_loss": 1706.0,
      "debug/num_lat_total": 2268.0,
      "debug/num_tok_loss": 1706.0,
      "debug/num_tok_total": 2268.0,
      "epoch": 3.6575342465753424,
      "step": 267
    },
    {
      "epoch": 3.6575342465753424,
      "step": 267,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.45802122354507446
    },
    {
      "epoch": 3.6575342465753424,
      "step": 267,
      "train/learning_rate_real": 1.831281721047487e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2398.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2398.0,
      "epoch": 3.6575342465753424,
      "step": 267
    },
    {
      "epoch": 3.6575342465753424,
      "step": 267,
      "train/ce_loss": 2.234375,
      "train/diffusion_loss": 0.44514697790145874
    },
    {
      "epoch": 3.6575342465753424,
      "step": 267,
      "train/learning_rate_real": 1.831281721047487e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2853.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2853.0,
      "epoch": 3.671232876712329,
      "step": 268
    },
    {
      "epoch": 3.671232876712329,
      "step": 268,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.3616383671760559
    },
    {
      "epoch": 3.671232876712329,
      "step": 268,
      "train/learning_rate_real": 1.8263656268116576e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2850.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2850.0,
      "epoch": 3.671232876712329,
      "step": 268
    },
    {
      "epoch": 3.671232876712329,
      "step": 268,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.37581169605255127
    },
    {
      "epoch": 3.671232876712329,
      "step": 268,
      "train/learning_rate_real": 1.8263656268116576e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2891.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2891.0,
      "epoch": 3.671232876712329,
      "step": 268
    },
    {
      "epoch": 3.671232876712329,
      "step": 268,
      "train/ce_loss": 2.359375,
      "train/diffusion_loss": 0.33970674872398376
    },
    {
      "epoch": 3.671232876712329,
      "step": 268,
      "train/learning_rate_real": 1.8263656268116576e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2232.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2232.0,
      "epoch": 3.671232876712329,
      "step": 268
    },
    {
      "epoch": 3.671232876712329,
      "step": 268,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.4902125298976898
    },
    {
      "epoch": 3.671232876712329,
      "step": 268,
      "train/learning_rate_real": 1.8263656268116576e-05
    },
    {
      "debug/num_lat_loss": 1762.0,
      "debug/num_lat_total": 2194.0,
      "debug/num_tok_loss": 1762.0,
      "debug/num_tok_total": 2194.0,
      "epoch": 3.684931506849315,
      "step": 269
    },
    {
      "epoch": 3.684931506849315,
      "step": 269,
      "train/ce_loss": 2.328125,
      "train/diffusion_loss": 0.46788713335990906
    },
    {
      "epoch": 3.684931506849315,
      "step": 269,
      "train/learning_rate_real": 1.8214381842815293e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2014.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2014.0,
      "epoch": 3.684931506849315,
      "step": 269
    },
    {
      "epoch": 3.684931506849315,
      "step": 269,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.5531131625175476
    },
    {
      "epoch": 3.684931506849315,
      "step": 269,
      "train/learning_rate_real": 1.8214381842815293e-05
    },
    {
      "debug/num_lat_loss": 1646.0,
      "debug/num_lat_total": 2081.0,
      "debug/num_tok_loss": 1646.0,
      "debug/num_tok_total": 2081.0,
      "epoch": 3.684931506849315,
      "step": 269
    },
    {
      "epoch": 3.684931506849315,
      "step": 269,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.4983122944831848
    },
    {
      "epoch": 3.684931506849315,
      "step": 269,
      "train/learning_rate_real": 1.8214381842815293e-05
    },
    {
      "debug/num_lat_loss": 1672.0,
      "debug/num_lat_total": 2674.0,
      "debug/num_tok_loss": 1672.0,
      "debug/num_tok_total": 2674.0,
      "epoch": 3.684931506849315,
      "step": 269
    },
    {
      "epoch": 3.684931506849315,
      "step": 269,
      "train/ce_loss": 2.28125,
      "train/diffusion_loss": 0.3529197573661804
    },
    {
      "epoch": 3.684931506849315,
      "step": 269,
      "train/learning_rate_real": 1.8214381842815293e-05
    },
    {
      "epoch": 3.6986301369863015,
      "grad_norm": 1.421845555305481,
      "learning_rate": 1.8214381842815293e-05,
      "loss": 2.6522,
      "step": 270
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2646.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2646.0,
      "epoch": 3.6986301369863015,
      "step": 270
    },
    {
      "epoch": 3.6986301369863015,
      "step": 270,
      "train/ce_loss": 2.140625,
      "train/diffusion_loss": 0.41246217489242554
    },
    {
      "epoch": 3.6986301369863015,
      "step": 270,
      "train/learning_rate_real": 1.8164994904754966e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2858.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2858.0,
      "epoch": 3.6986301369863015,
      "step": 270
    },
    {
      "epoch": 3.6986301369863015,
      "step": 270,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.3775860667228699
    },
    {
      "epoch": 3.6986301369863015,
      "step": 270,
      "train/learning_rate_real": 1.8164994904754966e-05
    },
    {
      "debug/num_lat_loss": 1659.0,
      "debug/num_lat_total": 2525.0,
      "debug/num_tok_loss": 1659.0,
      "debug/num_tok_total": 2525.0,
      "epoch": 3.6986301369863015,
      "step": 270
    },
    {
      "epoch": 3.6986301369863015,
      "step": 270,
      "train/ce_loss": 2.171875,
      "train/diffusion_loss": 0.39445722103118896
    },
    {
      "epoch": 3.6986301369863015,
      "step": 270,
      "train/learning_rate_real": 1.8164994904754966e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 3281.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 3281.0,
      "epoch": 3.6986301369863015,
      "step": 270
    },
    {
      "epoch": 3.6986301369863015,
      "step": 270,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.27408111095428467
    },
    {
      "epoch": 3.6986301369863015,
      "step": 270,
      "train/learning_rate_real": 1.8164994904754966e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 3.712328767123288,
      "step": 271
    },
    {
      "epoch": 3.712328767123288,
      "step": 271,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.41432589292526245
    },
    {
      "epoch": 3.712328767123288,
      "step": 271,
      "train/learning_rate_real": 1.811549642633486e-05
    },
    {
      "debug/num_lat_loss": 1651.0,
      "debug/num_lat_total": 2817.0,
      "debug/num_tok_loss": 1651.0,
      "debug/num_tok_total": 2817.0,
      "epoch": 3.712328767123288,
      "step": 271
    },
    {
      "epoch": 3.712328767123288,
      "step": 271,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.311215341091156
    },
    {
      "epoch": 3.712328767123288,
      "step": 271,
      "train/learning_rate_real": 1.811549642633486e-05
    },
    {
      "debug/num_lat_loss": 1673.0,
      "debug/num_lat_total": 2315.0,
      "debug/num_tok_loss": 1673.0,
      "debug/num_tok_total": 2315.0,
      "epoch": 3.712328767123288,
      "step": 271
    },
    {
      "epoch": 3.712328767123288,
      "step": 271,
      "train/ce_loss": 2.171875,
      "train/diffusion_loss": 0.41784414649009705
    },
    {
      "epoch": 3.712328767123288,
      "step": 271,
      "train/learning_rate_real": 1.811549642633486e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2860.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2860.0,
      "epoch": 3.712328767123288,
      "step": 271
    },
    {
      "epoch": 3.712328767123288,
      "step": 271,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.37519437074661255
    },
    {
      "epoch": 3.712328767123288,
      "step": 271,
      "train/learning_rate_real": 1.811549642633486e-05
    },
    {
      "debug/num_lat_loss": 1809.0,
      "debug/num_lat_total": 2240.0,
      "debug/num_tok_loss": 1809.0,
      "debug/num_tok_total": 2240.0,
      "epoch": 3.7260273972602738,
      "step": 272
    },
    {
      "epoch": 3.7260273972602738,
      "step": 272,
      "train/ce_loss": 2.03125,
      "train/diffusion_loss": 0.5138973593711853
    },
    {
      "epoch": 3.7260273972602738,
      "step": 272,
      "train/learning_rate_real": 1.8065887382150394e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2850.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2850.0,
      "epoch": 3.7260273972602738,
      "step": 272
    },
    {
      "epoch": 3.7260273972602738,
      "step": 272,
      "train/ce_loss": 2.140625,
      "train/diffusion_loss": 0.36435139179229736
    },
    {
      "epoch": 3.7260273972602738,
      "step": 272,
      "train/learning_rate_real": 1.8065887382150394e-05
    },
    {
      "debug/num_lat_loss": 1602.0,
      "debug/num_lat_total": 2483.0,
      "debug/num_tok_loss": 1602.0,
      "debug/num_tok_total": 2483.0,
      "epoch": 3.7260273972602738,
      "step": 272
    },
    {
      "epoch": 3.7260273972602738,
      "step": 272,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.36120906472206116
    },
    {
      "epoch": 3.7260273972602738,
      "step": 272,
      "train/learning_rate_real": 1.8065887382150394e-05
    },
    {
      "debug/num_lat_loss": 1656.0,
      "debug/num_lat_total": 2507.0,
      "debug/num_tok_loss": 1656.0,
      "debug/num_tok_total": 2507.0,
      "epoch": 3.7260273972602738,
      "step": 272
    },
    {
      "epoch": 3.7260273972602738,
      "step": 272,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.3839026093482971
    },
    {
      "epoch": 3.7260273972602738,
      "step": 272,
      "train/learning_rate_real": 1.8065887382150394e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 3089.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 3089.0,
      "epoch": 3.73972602739726,
      "step": 273
    },
    {
      "epoch": 3.73972602739726,
      "step": 273,
      "train/ce_loss": 2.171875,
      "train/diffusion_loss": 0.28552016615867615
    },
    {
      "epoch": 3.73972602739726,
      "step": 273,
      "train/learning_rate_real": 1.801616874897396e-05
    },
    {
      "debug/num_lat_loss": 1807.0,
      "debug/num_lat_total": 3122.0,
      "debug/num_tok_loss": 1807.0,
      "debug/num_tok_total": 3122.0,
      "epoch": 3.73972602739726,
      "step": 273
    },
    {
      "epoch": 3.73972602739726,
      "step": 273,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.28701597452163696
    },
    {
      "epoch": 3.73972602739726,
      "step": 273,
      "train/learning_rate_real": 1.801616874897396e-05
    },
    {
      "debug/num_lat_loss": 1811.0,
      "debug/num_lat_total": 2696.0,
      "debug/num_tok_loss": 1811.0,
      "debug/num_tok_total": 2696.0,
      "epoch": 3.73972602739726,
      "step": 273
    },
    {
      "epoch": 3.73972602739726,
      "step": 273,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.3732485771179199
    },
    {
      "epoch": 3.73972602739726,
      "step": 273,
      "train/learning_rate_real": 1.801616874897396e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2415.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2415.0,
      "epoch": 3.73972602739726,
      "step": 273
    },
    {
      "epoch": 3.73972602739726,
      "step": 273,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.4355056583881378
    },
    {
      "epoch": 3.73972602739726,
      "step": 273,
      "train/learning_rate_real": 1.801616874897396e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2215.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2215.0,
      "epoch": 3.7534246575342465,
      "step": 274
    },
    {
      "epoch": 3.7534246575342465,
      "step": 274,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.4883570373058319
    },
    {
      "epoch": 3.7534246575342465,
      "step": 274,
      "train/learning_rate_real": 1.7966341505735695e-05
    },
    {
      "debug/num_lat_loss": 1740.0,
      "debug/num_lat_total": 3005.0,
      "debug/num_tok_loss": 1740.0,
      "debug/num_tok_total": 3005.0,
      "epoch": 3.7534246575342465,
      "step": 274
    },
    {
      "epoch": 3.7534246575342465,
      "step": 274,
      "train/ce_loss": 2.03125,
      "train/diffusion_loss": 0.2945389747619629
    },
    {
      "epoch": 3.7534246575342465,
      "step": 274,
      "train/learning_rate_real": 1.7966341505735695e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2434.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2434.0,
      "epoch": 3.7534246575342465,
      "step": 274
    },
    {
      "epoch": 3.7534246575342465,
      "step": 274,
      "train/ce_loss": 2.234375,
      "train/diffusion_loss": 0.4722411036491394
    },
    {
      "epoch": 3.7534246575342465,
      "step": 274,
      "train/learning_rate_real": 1.7966341505735695e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2676.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2676.0,
      "epoch": 3.7534246575342465,
      "step": 274
    },
    {
      "epoch": 3.7534246575342465,
      "step": 274,
      "train/ce_loss": 2.234375,
      "train/diffusion_loss": 0.3966304659843445
    },
    {
      "epoch": 3.7534246575342465,
      "step": 274,
      "train/learning_rate_real": 1.7966341505735695e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2198.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2198.0,
      "epoch": 3.767123287671233,
      "step": 275
    },
    {
      "epoch": 3.767123287671233,
      "step": 275,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.5182521939277649
    },
    {
      "epoch": 3.767123287671233,
      "step": 275,
      "train/learning_rate_real": 1.79164066335042e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2640.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2640.0,
      "epoch": 3.767123287671233,
      "step": 275
    },
    {
      "epoch": 3.767123287671233,
      "step": 275,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.35757267475128174
    },
    {
      "epoch": 3.767123287671233,
      "step": 275,
      "train/learning_rate_real": 1.79164066335042e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2456.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2456.0,
      "epoch": 3.767123287671233,
      "step": 275
    },
    {
      "epoch": 3.767123287671233,
      "step": 275,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.43631818890571594
    },
    {
      "epoch": 3.767123287671233,
      "step": 275,
      "train/learning_rate_real": 1.79164066335042e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2419.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2419.0,
      "epoch": 3.767123287671233,
      "step": 275
    },
    {
      "epoch": 3.767123287671233,
      "step": 275,
      "train/ce_loss": 2.359375,
      "train/diffusion_loss": 0.4431777894496918
    },
    {
      "epoch": 3.767123287671233,
      "step": 275,
      "train/learning_rate_real": 1.79164066335042e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 3064.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 3064.0,
      "epoch": 3.780821917808219,
      "step": 276
    },
    {
      "epoch": 3.780821917808219,
      "step": 276,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.29707273840904236
    },
    {
      "epoch": 3.780821917808219,
      "step": 276,
      "train/learning_rate_real": 1.7866365115467233e-05
    },
    {
      "debug/num_lat_loss": 1617.0,
      "debug/num_lat_total": 2684.0,
      "debug/num_tok_loss": 1617.0,
      "debug/num_tok_total": 2684.0,
      "epoch": 3.780821917808219,
      "step": 276
    },
    {
      "epoch": 3.780821917808219,
      "step": 276,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.35491320490837097
    },
    {
      "epoch": 3.780821917808219,
      "step": 276,
      "train/learning_rate_real": 1.7866365115467233e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2858.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2858.0,
      "epoch": 3.780821917808219,
      "step": 276
    },
    {
      "epoch": 3.780821917808219,
      "step": 276,
      "train/ce_loss": 2.15625,
      "train/diffusion_loss": 0.340061217546463
    },
    {
      "epoch": 3.780821917808219,
      "step": 276,
      "train/learning_rate_real": 1.7866365115467233e-05
    },
    {
      "debug/num_lat_loss": 1633.0,
      "debug/num_lat_total": 2353.0,
      "debug/num_tok_loss": 1633.0,
      "debug/num_tok_total": 2353.0,
      "epoch": 3.780821917808219,
      "step": 276
    },
    {
      "epoch": 3.780821917808219,
      "step": 276,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.387778639793396
    },
    {
      "epoch": 3.780821917808219,
      "step": 276,
      "train/learning_rate_real": 1.7866365115467233e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2425.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2425.0,
      "epoch": 3.7945205479452055,
      "step": 277
    },
    {
      "epoch": 3.7945205479452055,
      "step": 277,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.4345017671585083
    },
    {
      "epoch": 3.7945205479452055,
      "step": 277,
      "train/learning_rate_real": 1.781621793691234e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2424.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2424.0,
      "epoch": 3.7945205479452055,
      "step": 277
    },
    {
      "epoch": 3.7945205479452055,
      "step": 277,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.4521588981151581
    },
    {
      "epoch": 3.7945205479452055,
      "step": 277,
      "train/learning_rate_real": 1.781621793691234e-05
    },
    {
      "debug/num_lat_loss": 1513.0,
      "debug/num_lat_total": 2387.0,
      "debug/num_tok_loss": 1513.0,
      "debug/num_tok_total": 2387.0,
      "epoch": 3.7945205479452055,
      "step": 277
    },
    {
      "epoch": 3.7945205479452055,
      "step": 277,
      "train/ce_loss": 2.328125,
      "train/diffusion_loss": 0.3901711404323578
    },
    {
      "epoch": 3.7945205479452055,
      "step": 277,
      "train/learning_rate_real": 1.781621793691234e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2876.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2876.0,
      "epoch": 3.7945205479452055,
      "step": 277
    },
    {
      "epoch": 3.7945205479452055,
      "step": 277,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.3412567973136902
    },
    {
      "epoch": 3.7945205479452055,
      "step": 277,
      "train/learning_rate_real": 1.781621793691234e-05
    },
    {
      "debug/num_lat_loss": 1662.0,
      "debug/num_lat_total": 2611.0,
      "debug/num_tok_loss": 1662.0,
      "debug/num_tok_total": 2611.0,
      "epoch": 3.808219178082192,
      "step": 278
    },
    {
      "epoch": 3.808219178082192,
      "step": 278,
      "train/ce_loss": 2.140625,
      "train/diffusion_loss": 0.3693002462387085
    },
    {
      "epoch": 3.808219178082192,
      "step": 278,
      "train/learning_rate_real": 1.7765966085207448e-05
    },
    {
      "debug/num_lat_loss": 1758.0,
      "debug/num_lat_total": 2808.0,
      "debug/num_tok_loss": 1758.0,
      "debug/num_tok_total": 2808.0,
      "epoch": 3.808219178082192,
      "step": 278
    },
    {
      "epoch": 3.808219178082192,
      "step": 278,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.35720962285995483
    },
    {
      "epoch": 3.808219178082192,
      "step": 278,
      "train/learning_rate_real": 1.7765966085207448e-05
    },
    {
      "debug/num_lat_loss": 1765.0,
      "debug/num_lat_total": 2622.0,
      "debug/num_tok_loss": 1765.0,
      "debug/num_tok_total": 2622.0,
      "epoch": 3.808219178082192,
      "step": 278
    },
    {
      "epoch": 3.808219178082192,
      "step": 278,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.42649921774864197
    },
    {
      "epoch": 3.808219178082192,
      "step": 278,
      "train/learning_rate_real": 1.7765966085207448e-05
    },
    {
      "debug/num_lat_loss": 1810.0,
      "debug/num_lat_total": 2686.0,
      "debug/num_tok_loss": 1810.0,
      "debug/num_tok_total": 2686.0,
      "epoch": 3.808219178082192,
      "step": 278
    },
    {
      "epoch": 3.808219178082192,
      "step": 278,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.37720000743865967
    },
    {
      "epoch": 3.808219178082192,
      "step": 278,
      "train/learning_rate_real": 1.7765966085207448e-05
    },
    {
      "debug/num_lat_loss": 1597.0,
      "debug/num_lat_total": 2464.0,
      "debug/num_tok_loss": 1597.0,
      "debug/num_tok_total": 2464.0,
      "epoch": 3.821917808219178,
      "step": 279
    },
    {
      "epoch": 3.821917808219178,
      "step": 279,
      "train/ce_loss": 2.46875,
      "train/diffusion_loss": 0.40019384026527405
    },
    {
      "epoch": 3.821917808219178,
      "step": 279,
      "train/learning_rate_real": 1.7715610549781457e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2434.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2434.0,
      "epoch": 3.821917808219178,
      "step": 279
    },
    {
      "epoch": 3.821917808219178,
      "step": 279,
      "train/ce_loss": 1.9921875,
      "train/diffusion_loss": 0.4430074989795685
    },
    {
      "epoch": 3.821917808219178,
      "step": 279,
      "train/learning_rate_real": 1.7715610549781457e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 3096.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 3096.0,
      "epoch": 3.821917808219178,
      "step": 279
    },
    {
      "epoch": 3.821917808219178,
      "step": 279,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.34121590852737427
    },
    {
      "epoch": 3.821917808219178,
      "step": 279,
      "train/learning_rate_real": 1.7715610549781457e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 3084.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 3084.0,
      "epoch": 3.821917808219178,
      "step": 279
    },
    {
      "epoch": 3.821917808219178,
      "step": 279,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.35795098543167114
    },
    {
      "epoch": 3.821917808219178,
      "step": 279,
      "train/learning_rate_real": 1.7715610549781457e-05
    },
    {
      "epoch": 3.8356164383561646,
      "grad_norm": 1.58546781539917,
      "learning_rate": 1.7715610549781457e-05,
      "loss": 2.5047,
      "step": 280
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2629.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2629.0,
      "epoch": 3.8356164383561646,
      "step": 280
    },
    {
      "epoch": 3.8356164383561646,
      "step": 280,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.40037307143211365
    },
    {
      "epoch": 3.8356164383561646,
      "step": 280,
      "train/learning_rate_real": 1.766515232210473e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2863.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2863.0,
      "epoch": 3.8356164383561646,
      "step": 280
    },
    {
      "epoch": 3.8356164383561646,
      "step": 280,
      "train/ce_loss": 2.3125,
      "train/diffusion_loss": 0.36349400877952576
    },
    {
      "epoch": 3.8356164383561646,
      "step": 280,
      "train/learning_rate_real": 1.766515232210473e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2648.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2648.0,
      "epoch": 3.8356164383561646,
      "step": 280
    },
    {
      "epoch": 3.8356164383561646,
      "step": 280,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.3854428827762604
    },
    {
      "epoch": 3.8356164383561646,
      "step": 280,
      "train/learning_rate_real": 1.766515232210473e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2873.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2873.0,
      "epoch": 3.8356164383561646,
      "step": 280
    },
    {
      "epoch": 3.8356164383561646,
      "step": 280,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.34019672870635986
    },
    {
      "epoch": 3.8356164383561646,
      "step": 280,
      "train/learning_rate_real": 1.766515232210473e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2658.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2658.0,
      "epoch": 3.8493150684931505,
      "step": 281
    },
    {
      "epoch": 3.8493150684931505,
      "step": 281,
      "train/ce_loss": 2.40625,
      "train/diffusion_loss": 0.4039023518562317
    },
    {
      "epoch": 3.8493150684931505,
      "step": 281,
      "train/learning_rate_real": 1.7614592395669568e-05
    },
    {
      "debug/num_lat_loss": 1734.0,
      "debug/num_lat_total": 2388.0,
      "debug/num_tok_loss": 1734.0,
      "debug/num_tok_total": 2388.0,
      "epoch": 3.8493150684931505,
      "step": 281
    },
    {
      "epoch": 3.8493150684931505,
      "step": 281,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.4264967739582062
    },
    {
      "epoch": 3.8493150684931505,
      "step": 281,
      "train/learning_rate_real": 1.7614592395669568e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2652.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2652.0,
      "epoch": 3.8493150684931505,
      "step": 281
    },
    {
      "epoch": 3.8493150684931505,
      "step": 281,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.39664873480796814
    },
    {
      "epoch": 3.8493150684931505,
      "step": 281,
      "train/learning_rate_real": 1.7614592395669568e-05
    },
    {
      "debug/num_lat_loss": 1625.0,
      "debug/num_lat_total": 2494.0,
      "debug/num_tok_loss": 1625.0,
      "debug/num_tok_total": 2494.0,
      "epoch": 3.8493150684931505,
      "step": 281
    },
    {
      "epoch": 3.8493150684931505,
      "step": 281,
      "train/ce_loss": 2.15625,
      "train/diffusion_loss": 0.4028310775756836
    },
    {
      "epoch": 3.8493150684931505,
      "step": 281,
      "train/learning_rate_real": 1.7614592395669568e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 3115.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 3115.0,
      "epoch": 3.863013698630137,
      "step": 282
    },
    {
      "epoch": 3.863013698630137,
      "step": 282,
      "train/ce_loss": 2.203125,
      "train/diffusion_loss": 0.3257860243320465
    },
    {
      "epoch": 3.863013698630137,
      "step": 282,
      "train/learning_rate_real": 1.756393176597067e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2842.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2842.0,
      "epoch": 3.863013698630137,
      "step": 282
    },
    {
      "epoch": 3.863013698630137,
      "step": 282,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.33505338430404663
    },
    {
      "epoch": 3.863013698630137,
      "step": 282,
      "train/learning_rate_real": 1.756393176597067e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2608.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2608.0,
      "epoch": 3.863013698630137,
      "step": 282
    },
    {
      "epoch": 3.863013698630137,
      "step": 282,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.39998874068260193
    },
    {
      "epoch": 3.863013698630137,
      "step": 282,
      "train/learning_rate_real": 1.756393176597067e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2870.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2870.0,
      "epoch": 3.863013698630137,
      "step": 282
    },
    {
      "epoch": 3.863013698630137,
      "step": 282,
      "train/ce_loss": 2.15625,
      "train/diffusion_loss": 0.3786977231502533
    },
    {
      "epoch": 3.863013698630137,
      "step": 282,
      "train/learning_rate_real": 1.756393176597067e-05
    },
    {
      "debug/num_lat_loss": 1739.0,
      "debug/num_lat_total": 2152.0,
      "debug/num_tok_loss": 1739.0,
      "debug/num_tok_total": 2152.0,
      "epoch": 3.8767123287671232,
      "step": 283
    },
    {
      "epoch": 3.8767123287671232,
      "step": 283,
      "train/ce_loss": 2.171875,
      "train/diffusion_loss": 0.5135148763656616
    },
    {
      "epoch": 3.8767123287671232,
      "step": 283,
      "train/learning_rate_real": 1.751317143048552e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2412.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2412.0,
      "epoch": 3.8767123287671232,
      "step": 283
    },
    {
      "epoch": 3.8767123287671232,
      "step": 283,
      "train/ce_loss": 2.3125,
      "train/diffusion_loss": 0.43617475032806396
    },
    {
      "epoch": 3.8767123287671232,
      "step": 283,
      "train/learning_rate_real": 1.751317143048552e-05
    },
    {
      "debug/num_lat_loss": 1762.0,
      "debug/num_lat_total": 2612.0,
      "debug/num_tok_loss": 1762.0,
      "debug/num_tok_total": 2612.0,
      "epoch": 3.8767123287671232,
      "step": 283
    },
    {
      "epoch": 3.8767123287671232,
      "step": 283,
      "train/ce_loss": 2.140625,
      "train/diffusion_loss": 0.3687136173248291
    },
    {
      "epoch": 3.8767123287671232,
      "step": 283,
      "train/learning_rate_real": 1.751317143048552e-05
    },
    {
      "debug/num_lat_loss": 1731.0,
      "debug/num_lat_total": 2563.0,
      "debug/num_tok_loss": 1731.0,
      "debug/num_tok_total": 2563.0,
      "epoch": 3.8767123287671232,
      "step": 283
    },
    {
      "epoch": 3.8767123287671232,
      "step": 283,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.4157203435897827
    },
    {
      "epoch": 3.8767123287671232,
      "step": 283,
      "train/learning_rate_real": 1.751317143048552e-05
    },
    {
      "debug/num_lat_loss": 1641.0,
      "debug/num_lat_total": 2787.0,
      "debug/num_tok_loss": 1641.0,
      "debug/num_tok_total": 2787.0,
      "epoch": 3.8904109589041096,
      "step": 284
    },
    {
      "epoch": 3.8904109589041096,
      "step": 284,
      "train/ce_loss": 2.453125,
      "train/diffusion_loss": 0.35626301169395447
    },
    {
      "epoch": 3.8904109589041096,
      "step": 284,
      "train/learning_rate_real": 1.7462312388654752e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 1998.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 1998.0,
      "epoch": 3.8904109589041096,
      "step": 284
    },
    {
      "epoch": 3.8904109589041096,
      "step": 284,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.5404520630836487
    },
    {
      "epoch": 3.8904109589041096,
      "step": 284,
      "train/learning_rate_real": 1.7462312388654752e-05
    },
    {
      "debug/num_lat_loss": 1759.0,
      "debug/num_lat_total": 2832.0,
      "debug/num_tok_loss": 1759.0,
      "debug/num_tok_total": 2832.0,
      "epoch": 3.8904109589041096,
      "step": 284
    },
    {
      "epoch": 3.8904109589041096,
      "step": 284,
      "train/ce_loss": 2.421875,
      "train/diffusion_loss": 0.3632473945617676
    },
    {
      "epoch": 3.8904109589041096,
      "step": 284,
      "train/learning_rate_real": 1.7462312388654752e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2420.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2420.0,
      "epoch": 3.8904109589041096,
      "step": 284
    },
    {
      "epoch": 3.8904109589041096,
      "step": 284,
      "train/ce_loss": 2.296875,
      "train/diffusion_loss": 0.44463783502578735
    },
    {
      "epoch": 3.8904109589041096,
      "step": 284,
      "train/learning_rate_real": 1.7462312388654752e-05
    },
    {
      "debug/num_lat_loss": 1623.0,
      "debug/num_lat_total": 2039.0,
      "debug/num_tok_loss": 1623.0,
      "debug/num_tok_total": 2039.0,
      "epoch": 3.904109589041096,
      "step": 285
    },
    {
      "epoch": 3.904109589041096,
      "step": 285,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.4851874113082886
    },
    {
      "epoch": 3.904109589041096,
      "step": 285,
      "train/learning_rate_real": 1.7411355641862466e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2418.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2418.0,
      "epoch": 3.904109589041096,
      "step": 285
    },
    {
      "epoch": 3.904109589041096,
      "step": 285,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.4470239281654358
    },
    {
      "epoch": 3.904109589041096,
      "step": 285,
      "train/learning_rate_real": 1.7411355641862466e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2431.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2431.0,
      "epoch": 3.904109589041096,
      "step": 285
    },
    {
      "epoch": 3.904109589041096,
      "step": 285,
      "train/ce_loss": 2.140625,
      "train/diffusion_loss": 0.4692850112915039
    },
    {
      "epoch": 3.904109589041096,
      "step": 285,
      "train/learning_rate_real": 1.7411355641862466e-05
    },
    {
      "debug/num_lat_loss": 1823.0,
      "debug/num_lat_total": 2917.0,
      "debug/num_tok_loss": 1823.0,
      "debug/num_tok_total": 2917.0,
      "epoch": 3.904109589041096,
      "step": 285
    },
    {
      "epoch": 3.904109589041096,
      "step": 285,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.34697669744491577
    },
    {
      "epoch": 3.904109589041096,
      "step": 285,
      "train/learning_rate_real": 1.7411355641862466e-05
    },
    {
      "debug/num_lat_loss": 1807.0,
      "debug/num_lat_total": 2454.0,
      "debug/num_tok_loss": 1807.0,
      "debug/num_tok_total": 2454.0,
      "epoch": 3.9178082191780823,
      "step": 286
    },
    {
      "epoch": 3.9178082191780823,
      "step": 286,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.43287089467048645
    },
    {
      "epoch": 3.9178082191780823,
      "step": 286,
      "train/learning_rate_real": 1.736030219341651e-05
    },
    {
      "debug/num_lat_loss": 1797.0,
      "debug/num_lat_total": 2659.0,
      "debug/num_tok_loss": 1797.0,
      "debug/num_tok_total": 2659.0,
      "epoch": 3.9178082191780823,
      "step": 286
    },
    {
      "epoch": 3.9178082191780823,
      "step": 286,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.3962542414665222
    },
    {
      "epoch": 3.9178082191780823,
      "step": 286,
      "train/learning_rate_real": 1.736030219341651e-05
    },
    {
      "debug/num_lat_loss": 1686.0,
      "debug/num_lat_total": 2676.0,
      "debug/num_tok_loss": 1686.0,
      "debug/num_tok_total": 2676.0,
      "epoch": 3.9178082191780823,
      "step": 286
    },
    {
      "epoch": 3.9178082191780823,
      "step": 286,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.37192660570144653
    },
    {
      "epoch": 3.9178082191780823,
      "step": 286,
      "train/learning_rate_real": 1.736030219341651e-05
    },
    {
      "debug/num_lat_loss": 1825.0,
      "debug/num_lat_total": 2491.0,
      "debug/num_tok_loss": 1825.0,
      "debug/num_tok_total": 2491.0,
      "epoch": 3.9178082191780823,
      "step": 286
    },
    {
      "epoch": 3.9178082191780823,
      "step": 286,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.43956512212753296
    },
    {
      "epoch": 3.9178082191780823,
      "step": 286,
      "train/learning_rate_real": 1.736030219341651e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 2464.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 2464.0,
      "epoch": 3.9315068493150687,
      "step": 287
    },
    {
      "epoch": 3.9315068493150687,
      "step": 287,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.42794516682624817
    },
    {
      "epoch": 3.9315068493150687,
      "step": 287,
      "train/learning_rate_real": 1.7309153048528735e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2636.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2636.0,
      "epoch": 3.9315068493150687,
      "step": 287
    },
    {
      "epoch": 3.9315068493150687,
      "step": 287,
      "train/ce_loss": 2.015625,
      "train/diffusion_loss": 0.3934583365917206
    },
    {
      "epoch": 3.9315068493150687,
      "step": 287,
      "train/learning_rate_real": 1.7309153048528735e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 3072.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 3072.0,
      "epoch": 3.9315068493150687,
      "step": 287
    },
    {
      "epoch": 3.9315068493150687,
      "step": 287,
      "train/ce_loss": 1.984375,
      "train/diffusion_loss": 0.2945156395435333
    },
    {
      "epoch": 3.9315068493150687,
      "step": 287,
      "train/learning_rate_real": 1.7309153048528735e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2416.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2416.0,
      "epoch": 3.9315068493150687,
      "step": 287
    },
    {
      "epoch": 3.9315068493150687,
      "step": 287,
      "train/ce_loss": 2.15625,
      "train/diffusion_loss": 0.4588899314403534
    },
    {
      "epoch": 3.9315068493150687,
      "step": 287,
      "train/learning_rate_real": 1.7309153048528735e-05
    },
    {
      "debug/num_lat_loss": 1812.0,
      "debug/num_lat_total": 2689.0,
      "debug/num_tok_loss": 1812.0,
      "debug/num_tok_total": 2689.0,
      "epoch": 3.9452054794520546,
      "step": 288
    },
    {
      "epoch": 3.9452054794520546,
      "step": 288,
      "train/ce_loss": 2.140625,
      "train/diffusion_loss": 0.4178195893764496
    },
    {
      "epoch": 3.9452054794520546,
      "step": 288,
      "train/learning_rate_real": 1.72579092142952e-05
    },
    {
      "debug/num_lat_loss": 1753.0,
      "debug/num_lat_total": 3043.0,
      "debug/num_tok_loss": 1753.0,
      "debug/num_tok_total": 3043.0,
      "epoch": 3.9452054794520546,
      "step": 288
    },
    {
      "epoch": 3.9452054794520546,
      "step": 288,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.34568458795547485
    },
    {
      "epoch": 3.9452054794520546,
      "step": 288,
      "train/learning_rate_real": 1.72579092142952e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2622.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2622.0,
      "epoch": 3.9452054794520546,
      "step": 288
    },
    {
      "epoch": 3.9452054794520546,
      "step": 288,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.3778199851512909
    },
    {
      "epoch": 3.9452054794520546,
      "step": 288,
      "train/learning_rate_real": 1.72579092142952e-05
    },
    {
      "debug/num_lat_loss": 1749.0,
      "debug/num_lat_total": 1749.0,
      "debug/num_tok_loss": 1749.0,
      "debug/num_tok_total": 1749.0,
      "epoch": 3.9452054794520546,
      "step": 288
    },
    {
      "epoch": 3.9452054794520546,
      "step": 288,
      "train/ce_loss": 2.390625,
      "train/diffusion_loss": 0.5831629037857056
    },
    {
      "epoch": 3.9452054794520546,
      "step": 288,
      "train/learning_rate_real": 1.72579092142952e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2861.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2861.0,
      "epoch": 3.958904109589041,
      "step": 289
    },
    {
      "epoch": 3.958904109589041,
      "step": 289,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.3550923764705658
    },
    {
      "epoch": 3.958904109589041,
      "step": 289,
      "train/learning_rate_real": 1.720657169967633e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2450.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2450.0,
      "epoch": 3.958904109589041,
      "step": 289
    },
    {
      "epoch": 3.958904109589041,
      "step": 289,
      "train/ce_loss": 2.296875,
      "train/diffusion_loss": 0.43373000621795654
    },
    {
      "epoch": 3.958904109589041,
      "step": 289,
      "train/learning_rate_real": 1.720657169967633e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2404.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2404.0,
      "epoch": 3.958904109589041,
      "step": 289
    },
    {
      "epoch": 3.958904109589041,
      "step": 289,
      "train/ce_loss": 2.015625,
      "train/diffusion_loss": 0.44662579894065857
    },
    {
      "epoch": 3.958904109589041,
      "step": 289,
      "train/learning_rate_real": 1.720657169967633e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2840.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2840.0,
      "epoch": 3.958904109589041,
      "step": 289
    },
    {
      "epoch": 3.958904109589041,
      "step": 289,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.33653688430786133
    },
    {
      "epoch": 3.958904109589041,
      "step": 289,
      "train/learning_rate_real": 1.720657169967633e-05
    },
    {
      "epoch": 3.9726027397260273,
      "grad_norm": 1.321590781211853,
      "learning_rate": 1.720657169967633e-05,
      "loss": 2.6218,
      "step": 290
    },
    {
      "debug/num_lat_loss": 1765.0,
      "debug/num_lat_total": 3055.0,
      "debug/num_tok_loss": 1765.0,
      "debug/num_tok_total": 3055.0,
      "epoch": 3.9726027397260273,
      "step": 290
    },
    {
      "epoch": 3.9726027397260273,
      "step": 290,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.3077630400657654
    },
    {
      "epoch": 3.9726027397260273,
      "step": 290,
      "train/learning_rate_real": 1.7155141515477075e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 3049.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 3049.0,
      "epoch": 3.9726027397260273,
      "step": 290
    },
    {
      "epoch": 3.9726027397260273,
      "step": 290,
      "train/ce_loss": 1.9453125,
      "train/diffusion_loss": 0.32115334272384644
    },
    {
      "epoch": 3.9726027397260273,
      "step": 290,
      "train/learning_rate_real": 1.7155141515477075e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2655.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2655.0,
      "epoch": 3.9726027397260273,
      "step": 290
    },
    {
      "epoch": 3.9726027397260273,
      "step": 290,
      "train/ce_loss": 2.3125,
      "train/diffusion_loss": 0.3869441747665405
    },
    {
      "epoch": 3.9726027397260273,
      "step": 290,
      "train/learning_rate_real": 1.7155141515477075e-05
    },
    {
      "debug/num_lat_loss": 1765.0,
      "debug/num_lat_total": 2605.0,
      "debug/num_tok_loss": 1765.0,
      "debug/num_tok_total": 2605.0,
      "epoch": 3.9726027397260273,
      "step": 290
    },
    {
      "epoch": 3.9726027397260273,
      "step": 290,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.4380507469177246
    },
    {
      "epoch": 3.9726027397260273,
      "step": 290,
      "train/learning_rate_real": 1.7155141515477075e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2399.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2399.0,
      "epoch": 3.9863013698630136,
      "step": 291
    },
    {
      "epoch": 3.9863013698630136,
      "step": 291,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.4650937616825104
    },
    {
      "epoch": 3.9863013698630136,
      "step": 291,
      "train/learning_rate_real": 1.7103619674326984e-05
    },
    {
      "debug/num_lat_loss": 1822.0,
      "debug/num_lat_total": 3365.0,
      "debug/num_tok_loss": 1822.0,
      "debug/num_tok_total": 3365.0,
      "epoch": 3.9863013698630136,
      "step": 291
    },
    {
      "epoch": 3.9863013698630136,
      "step": 291,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.2749086022377014
    },
    {
      "epoch": 3.9863013698630136,
      "step": 291,
      "train/learning_rate_real": 1.7103619674326984e-05
    },
    {
      "debug/num_lat_loss": 1824.0,
      "debug/num_lat_total": 2476.0,
      "debug/num_tok_loss": 1824.0,
      "debug/num_tok_total": 2476.0,
      "epoch": 3.9863013698630136,
      "step": 291
    },
    {
      "epoch": 3.9863013698630136,
      "step": 291,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.4461573362350464
    },
    {
      "epoch": 3.9863013698630136,
      "step": 291,
      "train/learning_rate_real": 1.7103619674326984e-05
    },
    {
      "debug/num_lat_loss": 438.0,
      "debug/num_lat_total": 438.0,
      "debug/num_tok_loss": 438.0,
      "debug/num_tok_total": 438.0,
      "epoch": 3.9863013698630136,
      "step": 291
    },
    {
      "epoch": 3.9863013698630136,
      "step": 291,
      "train/ce_loss": 1.9375,
      "train/diffusion_loss": 0.6075020432472229
    },
    {
      "epoch": 3.9863013698630136,
      "step": 291,
      "train/learning_rate_real": 1.7103619674326984e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2397.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2397.0,
      "epoch": 4.0,
      "step": 292
    },
    {
      "epoch": 4.0,
      "step": 292,
      "train/ce_loss": 1.9609375,
      "train/diffusion_loss": 0.43957024812698364
    },
    {
      "epoch": 4.0,
      "step": 292,
      "train/learning_rate_real": 1.705200719066028e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2442.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2442.0,
      "epoch": 4.0,
      "step": 292
    },
    {
      "epoch": 4.0,
      "step": 292,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.43320709466934204
    },
    {
      "epoch": 4.0,
      "step": 292,
      "train/learning_rate_real": 1.705200719066028e-05
    },
    {
      "debug/num_lat_loss": 1608.0,
      "debug/num_lat_total": 2588.0,
      "debug/num_tok_loss": 1608.0,
      "debug/num_tok_total": 2588.0,
      "epoch": 4.0,
      "step": 292
    },
    {
      "epoch": 4.0,
      "step": 292,
      "train/ce_loss": 2.203125,
      "train/diffusion_loss": 0.36547115445137024
    },
    {
      "epoch": 4.0,
      "step": 292,
      "train/learning_rate_real": 1.705200719066028e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2672.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2672.0,
      "epoch": 4.0,
      "step": 292
    },
    {
      "epoch": 4.0,
      "step": 292,
      "train/ce_loss": 2.03125,
      "train/diffusion_loss": 0.3984234035015106
    },
    {
      "epoch": 4.0,
      "step": 292,
      "train/learning_rate_real": 1.705200719066028e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2219.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2219.0,
      "epoch": 4.013698630136986,
      "step": 293
    },
    {
      "epoch": 4.013698630136986,
      "step": 293,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.47886258363723755
    },
    {
      "epoch": 4.013698630136986,
      "step": 293,
      "train/learning_rate_real": 1.7000305080695878e-05
    },
    {
      "debug/num_lat_loss": 1595.0,
      "debug/num_lat_total": 2270.0,
      "debug/num_tok_loss": 1595.0,
      "debug/num_tok_total": 2270.0,
      "epoch": 4.013698630136986,
      "step": 293
    },
    {
      "epoch": 4.013698630136986,
      "step": 293,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.3977213501930237
    },
    {
      "epoch": 4.013698630136986,
      "step": 293,
      "train/learning_rate_real": 1.7000305080695878e-05
    },
    {
      "debug/num_lat_loss": 1747.0,
      "debug/num_lat_total": 2594.0,
      "debug/num_tok_loss": 1747.0,
      "debug/num_tok_total": 2594.0,
      "epoch": 4.013698630136986,
      "step": 293
    },
    {
      "epoch": 4.013698630136986,
      "step": 293,
      "train/ce_loss": 2.015625,
      "train/diffusion_loss": 0.3679512143135071
    },
    {
      "epoch": 4.013698630136986,
      "step": 293,
      "train/learning_rate_real": 1.7000305080695878e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2877.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2877.0,
      "epoch": 4.013698630136986,
      "step": 293
    },
    {
      "epoch": 4.013698630136986,
      "step": 293,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.35806283354759216
    },
    {
      "epoch": 4.013698630136986,
      "step": 293,
      "train/learning_rate_real": 1.7000305080695878e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 4.027397260273973,
      "step": 294
    },
    {
      "epoch": 4.027397260273973,
      "step": 294,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.3923610746860504
    },
    {
      "epoch": 4.027397260273973,
      "step": 294,
      "train/learning_rate_real": 1.6948514362417385e-05
    },
    {
      "debug/num_lat_loss": 1752.0,
      "debug/num_lat_total": 2811.0,
      "debug/num_tok_loss": 1752.0,
      "debug/num_tok_total": 2811.0,
      "epoch": 4.027397260273973,
      "step": 294
    },
    {
      "epoch": 4.027397260273973,
      "step": 294,
      "train/ce_loss": 1.9609375,
      "train/diffusion_loss": 0.3480333983898163
    },
    {
      "epoch": 4.027397260273973,
      "step": 294,
      "train/learning_rate_real": 1.6948514362417385e-05
    },
    {
      "debug/num_lat_loss": 1577.0,
      "debug/num_lat_total": 2007.0,
      "debug/num_tok_loss": 1577.0,
      "debug/num_tok_total": 2007.0,
      "epoch": 4.027397260273973,
      "step": 294
    },
    {
      "epoch": 4.027397260273973,
      "step": 294,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.4790566563606262
    },
    {
      "epoch": 4.027397260273973,
      "step": 294,
      "train/learning_rate_real": 1.6948514362417385e-05
    },
    {
      "debug/num_lat_loss": 1594.0,
      "debug/num_lat_total": 2240.0,
      "debug/num_tok_loss": 1594.0,
      "debug/num_tok_total": 2240.0,
      "epoch": 4.027397260273973,
      "step": 294
    },
    {
      "epoch": 4.027397260273973,
      "step": 294,
      "train/ce_loss": 2.5,
      "train/diffusion_loss": 0.42749252915382385
    },
    {
      "epoch": 4.027397260273973,
      "step": 294,
      "train/learning_rate_real": 1.6948514362417385e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2838.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2838.0,
      "epoch": 4.041095890410959,
      "step": 295
    },
    {
      "epoch": 4.041095890410959,
      "step": 295,
      "train/ce_loss": 1.84375,
      "train/diffusion_loss": 0.35384467244148254
    },
    {
      "epoch": 4.041095890410959,
      "step": 295,
      "train/learning_rate_real": 1.689663605555306e-05
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 2467.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 2467.0,
      "epoch": 4.041095890410959,
      "step": 295
    },
    {
      "epoch": 4.041095890410959,
      "step": 295,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.4224318265914917
    },
    {
      "epoch": 4.041095890410959,
      "step": 295,
      "train/learning_rate_real": 1.689663605555306e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2208.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2208.0,
      "epoch": 4.041095890410959,
      "step": 295
    },
    {
      "epoch": 4.041095890410959,
      "step": 295,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.47826892137527466
    },
    {
      "epoch": 4.041095890410959,
      "step": 295,
      "train/learning_rate_real": 1.689663605555306e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 3044.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 3044.0,
      "epoch": 4.041095890410959,
      "step": 295
    },
    {
      "epoch": 4.041095890410959,
      "step": 295,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.3062922954559326
    },
    {
      "epoch": 4.041095890410959,
      "step": 295,
      "train/learning_rate_real": 1.689663605555306e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2660.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2660.0,
      "epoch": 4.054794520547945,
      "step": 296
    },
    {
      "epoch": 4.054794520547945,
      "step": 296,
      "train/ce_loss": 1.9921875,
      "train/diffusion_loss": 0.393525630235672
    },
    {
      "epoch": 4.054794520547945,
      "step": 296,
      "train/learning_rate_real": 1.6844671181555717e-05
    },
    {
      "debug/num_lat_loss": 1724.0,
      "debug/num_lat_total": 2509.0,
      "debug/num_tok_loss": 1724.0,
      "debug/num_tok_total": 2509.0,
      "epoch": 4.054794520547945,
      "step": 296
    },
    {
      "epoch": 4.054794520547945,
      "step": 296,
      "train/ce_loss": 2.21875,
      "train/diffusion_loss": 0.44487008452415466
    },
    {
      "epoch": 4.054794520547945,
      "step": 296,
      "train/learning_rate_real": 1.6844671181555717e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2675.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2675.0,
      "epoch": 4.054794520547945,
      "step": 296
    },
    {
      "epoch": 4.054794520547945,
      "step": 296,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.428099662065506
    },
    {
      "epoch": 4.054794520547945,
      "step": 296,
      "train/learning_rate_real": 1.6844671181555717e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 1990.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 1990.0,
      "epoch": 4.054794520547945,
      "step": 296
    },
    {
      "epoch": 4.054794520547945,
      "step": 296,
      "train/ce_loss": 2.15625,
      "train/diffusion_loss": 0.5531203150749207
    },
    {
      "epoch": 4.054794520547945,
      "step": 296,
      "train/learning_rate_real": 1.6844671181555717e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 3108.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 3108.0,
      "epoch": 4.068493150684931,
      "step": 297
    },
    {
      "epoch": 4.068493150684931,
      "step": 297,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.3036717176437378
    },
    {
      "epoch": 4.068493150684931,
      "step": 297,
      "train/learning_rate_real": 1.679262076358263e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2454.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2454.0,
      "epoch": 4.068493150684931,
      "step": 297
    },
    {
      "epoch": 4.068493150684931,
      "step": 297,
      "train/ce_loss": 2.015625,
      "train/diffusion_loss": 0.45707595348358154
    },
    {
      "epoch": 4.068493150684931,
      "step": 297,
      "train/learning_rate_real": 1.679262076358263e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2618.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2618.0,
      "epoch": 4.068493150684931,
      "step": 297
    },
    {
      "epoch": 4.068493150684931,
      "step": 297,
      "train/ce_loss": 1.953125,
      "train/diffusion_loss": 0.3870525360107422
    },
    {
      "epoch": 4.068493150684931,
      "step": 297,
      "train/learning_rate_real": 1.679262076358263e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2852.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2852.0,
      "epoch": 4.068493150684931,
      "step": 297
    },
    {
      "epoch": 4.068493150684931,
      "step": 297,
      "train/ce_loss": 2.203125,
      "train/diffusion_loss": 0.34175747632980347
    },
    {
      "epoch": 4.068493150684931,
      "step": 297,
      "train/learning_rate_real": 1.679262076358263e-05
    },
    {
      "debug/num_lat_loss": 1809.0,
      "debug/num_lat_total": 2696.0,
      "debug/num_tok_loss": 1809.0,
      "debug/num_tok_total": 2696.0,
      "epoch": 4.082191780821918,
      "step": 298
    },
    {
      "epoch": 4.082191780821918,
      "step": 298,
      "train/ce_loss": 1.9765625,
      "train/diffusion_loss": 0.41354861855506897
    },
    {
      "epoch": 4.082191780821918,
      "step": 298,
      "train/learning_rate_real": 1.674048582647538e-05
    },
    {
      "debug/num_lat_loss": 1715.0,
      "debug/num_lat_total": 2156.0,
      "debug/num_tok_loss": 1715.0,
      "debug/num_tok_total": 2156.0,
      "epoch": 4.082191780821918,
      "step": 298
    },
    {
      "epoch": 4.082191780821918,
      "step": 298,
      "train/ce_loss": 2.03125,
      "train/diffusion_loss": 0.4522693455219269
    },
    {
      "epoch": 4.082191780821918,
      "step": 298,
      "train/learning_rate_real": 1.674048582647538e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2651.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2651.0,
      "epoch": 4.082191780821918,
      "step": 298
    },
    {
      "epoch": 4.082191780821918,
      "step": 298,
      "train/ce_loss": 1.9765625,
      "train/diffusion_loss": 0.40085849165916443
    },
    {
      "epoch": 4.082191780821918,
      "step": 298,
      "train/learning_rate_real": 1.674048582647538e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2849.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2849.0,
      "epoch": 4.082191780821918,
      "step": 298
    },
    {
      "epoch": 4.082191780821918,
      "step": 298,
      "train/ce_loss": 1.921875,
      "train/diffusion_loss": 0.36254966259002686
    },
    {
      "epoch": 4.082191780821918,
      "step": 298,
      "train/learning_rate_real": 1.674048582647538e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2433.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2433.0,
      "epoch": 4.095890410958904,
      "step": 299
    },
    {
      "epoch": 4.095890410958904,
      "step": 299,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.4671931564807892
    },
    {
      "epoch": 4.095890410958904,
      "step": 299,
      "train/learning_rate_real": 1.6688267396739686e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2682.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2682.0,
      "epoch": 4.095890410958904,
      "step": 299
    },
    {
      "epoch": 4.095890410958904,
      "step": 299,
      "train/ce_loss": 1.9375,
      "train/diffusion_loss": 0.408421128988266
    },
    {
      "epoch": 4.095890410958904,
      "step": 299,
      "train/learning_rate_real": 1.6688267396739686e-05
    },
    {
      "debug/num_lat_loss": 1753.0,
      "debug/num_lat_total": 2388.0,
      "debug/num_tok_loss": 1753.0,
      "debug/num_tok_total": 2388.0,
      "epoch": 4.095890410958904,
      "step": 299
    },
    {
      "epoch": 4.095890410958904,
      "step": 299,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.43699023127555847
    },
    {
      "epoch": 4.095890410958904,
      "step": 299,
      "train/learning_rate_real": 1.6688267396739686e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2423.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2423.0,
      "epoch": 4.095890410958904,
      "step": 299
    },
    {
      "epoch": 4.095890410958904,
      "step": 299,
      "train/ce_loss": 1.8984375,
      "train/diffusion_loss": 0.4659460484981537
    },
    {
      "epoch": 4.095890410958904,
      "step": 299,
      "train/learning_rate_real": 1.6688267396739686e-05
    },
    {
      "epoch": 4.109589041095891,
      "grad_norm": 1.2555819749832153,
      "learning_rate": 1.6688267396739686e-05,
      "loss": 2.6268,
      "step": 300
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2662.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2662.0,
      "epoch": 4.109589041095891,
      "step": 300
    },
    {
      "epoch": 4.109589041095891,
      "step": 300,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.39393725991249084
    },
    {
      "epoch": 4.109589041095891,
      "step": 300,
      "train/learning_rate_real": 1.6635966502525174e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2202.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2202.0,
      "epoch": 4.109589041095891,
      "step": 300
    },
    {
      "epoch": 4.109589041095891,
      "step": 300,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.5026405453681946
    },
    {
      "epoch": 4.109589041095891,
      "step": 300,
      "train/learning_rate_real": 1.6635966502525174e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2420.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2420.0,
      "epoch": 4.109589041095891,
      "step": 300
    },
    {
      "epoch": 4.109589041095891,
      "step": 300,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.4620644152164459
    },
    {
      "epoch": 4.109589041095891,
      "step": 300,
      "train/learning_rate_real": 1.6635966502525174e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2869.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2869.0,
      "epoch": 4.109589041095891,
      "step": 300
    },
    {
      "epoch": 4.109589041095891,
      "step": 300,
      "train/ce_loss": 2.296875,
      "train/diffusion_loss": 0.3546100854873657
    },
    {
      "epoch": 4.109589041095891,
      "step": 300,
      "train/learning_rate_real": 1.6635966502525174e-05
    },
    {
      "debug/num_lat_loss": 1473.0,
      "debug/num_lat_total": 1691.0,
      "debug/num_tok_loss": 1473.0,
      "debug/num_tok_total": 1691.0,
      "epoch": 4.123287671232877,
      "step": 301
    },
    {
      "epoch": 4.123287671232877,
      "step": 301,
      "train/ce_loss": 1.890625,
      "train/diffusion_loss": 0.5275248289108276
    },
    {
      "epoch": 4.123287671232877,
      "step": 301,
      "train/learning_rate_real": 1.6583584173605164e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2630.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2630.0,
      "epoch": 4.123287671232877,
      "step": 301
    },
    {
      "epoch": 4.123287671232877,
      "step": 301,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.37552115321159363
    },
    {
      "epoch": 4.123287671232877,
      "step": 301,
      "train/learning_rate_real": 1.6583584173605164e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2865.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2865.0,
      "epoch": 4.123287671232877,
      "step": 301
    },
    {
      "epoch": 4.123287671232877,
      "step": 301,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.35189786553382874
    },
    {
      "epoch": 4.123287671232877,
      "step": 301,
      "train/learning_rate_real": 1.6583584173605164e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 2456.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 2456.0,
      "epoch": 4.123287671232877,
      "step": 301
    },
    {
      "epoch": 4.123287671232877,
      "step": 301,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.44780659675598145
    },
    {
      "epoch": 4.123287671232877,
      "step": 301,
      "train/learning_rate_real": 1.6583584173605164e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2439.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2439.0,
      "epoch": 4.136986301369863,
      "step": 302
    },
    {
      "epoch": 4.136986301369863,
      "step": 302,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.4349138140678406
    },
    {
      "epoch": 4.136986301369863,
      "step": 302,
      "train/learning_rate_real": 1.6531121441356362e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 3072.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 3072.0,
      "epoch": 4.136986301369863,
      "step": 302
    },
    {
      "epoch": 4.136986301369863,
      "step": 302,
      "train/ce_loss": 2.203125,
      "train/diffusion_loss": 0.28650379180908203
    },
    {
      "epoch": 4.136986301369863,
      "step": 302,
      "train/learning_rate_real": 1.6531121441356362e-05
    },
    {
      "debug/num_lat_loss": 1754.0,
      "debug/num_lat_total": 2813.0,
      "debug/num_tok_loss": 1754.0,
      "debug/num_tok_total": 2813.0,
      "epoch": 4.136986301369863,
      "step": 302
    },
    {
      "epoch": 4.136986301369863,
      "step": 302,
      "train/ce_loss": 1.9609375,
      "train/diffusion_loss": 0.33171990513801575
    },
    {
      "epoch": 4.136986301369863,
      "step": 302,
      "train/learning_rate_real": 1.6531121441356362e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 3066.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 3066.0,
      "epoch": 4.136986301369863,
      "step": 302
    },
    {
      "epoch": 4.136986301369863,
      "step": 302,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.33921998739242554
    },
    {
      "epoch": 4.136986301369863,
      "step": 302,
      "train/learning_rate_real": 1.6531121441356362e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2885.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2885.0,
      "epoch": 4.1506849315068495,
      "step": 303
    },
    {
      "epoch": 4.1506849315068495,
      "step": 303,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.3834764361381531
    },
    {
      "epoch": 4.1506849315068495,
      "step": 303,
      "train/learning_rate_real": 1.6478579338738577e-05
    },
    {
      "debug/num_lat_loss": 1765.0,
      "debug/num_lat_total": 2409.0,
      "debug/num_tok_loss": 1765.0,
      "debug/num_tok_total": 2409.0,
      "epoch": 4.1506849315068495,
      "step": 303
    },
    {
      "epoch": 4.1506849315068495,
      "step": 303,
      "train/ce_loss": 1.90625,
      "train/diffusion_loss": 0.44248247146606445
    },
    {
      "epoch": 4.1506849315068495,
      "step": 303,
      "train/learning_rate_real": 1.6478579338738577e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2679.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2679.0,
      "epoch": 4.1506849315068495,
      "step": 303
    },
    {
      "epoch": 4.1506849315068495,
      "step": 303,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.42659714818000793
    },
    {
      "epoch": 4.1506849315068495,
      "step": 303,
      "train/learning_rate_real": 1.6478579338738577e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2673.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2673.0,
      "epoch": 4.1506849315068495,
      "step": 303
    },
    {
      "epoch": 4.1506849315068495,
      "step": 303,
      "train/ce_loss": 1.9921875,
      "train/diffusion_loss": 0.40202435851097107
    },
    {
      "epoch": 4.1506849315068495,
      "step": 303,
      "train/learning_rate_real": 1.6478579338738577e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2186.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2186.0,
      "epoch": 4.164383561643835,
      "step": 304
    },
    {
      "epoch": 4.164383561643835,
      "step": 304,
      "train/ce_loss": 1.9453125,
      "train/diffusion_loss": 0.4802837073802948
    },
    {
      "epoch": 4.164383561643835,
      "step": 304,
      "train/learning_rate_real": 1.6425958900274362e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2456.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2456.0,
      "epoch": 4.164383561643835,
      "step": 304
    },
    {
      "epoch": 4.164383561643835,
      "step": 304,
      "train/ce_loss": 2.015625,
      "train/diffusion_loss": 0.4443361461162567
    },
    {
      "epoch": 4.164383561643835,
      "step": 304,
      "train/learning_rate_real": 1.6425958900274362e-05
    },
    {
      "debug/num_lat_loss": 1623.0,
      "debug/num_lat_total": 2061.0,
      "debug/num_tok_loss": 1623.0,
      "debug/num_tok_total": 2061.0,
      "epoch": 4.164383561643835,
      "step": 304
    },
    {
      "epoch": 4.164383561643835,
      "step": 304,
      "train/ce_loss": 1.96875,
      "train/diffusion_loss": 0.46792516112327576
    },
    {
      "epoch": 4.164383561643835,
      "step": 304,
      "train/learning_rate_real": 1.6425958900274362e-05
    },
    {
      "debug/num_lat_loss": 1628.0,
      "debug/num_lat_total": 2282.0,
      "debug/num_tok_loss": 1628.0,
      "debug/num_tok_total": 2282.0,
      "epoch": 4.164383561643835,
      "step": 304
    },
    {
      "epoch": 4.164383561643835,
      "step": 304,
      "train/ce_loss": 1.890625,
      "train/diffusion_loss": 0.4190717339515686
    },
    {
      "epoch": 4.164383561643835,
      "step": 304,
      "train/learning_rate_real": 1.6425958900274362e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2866.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2866.0,
      "epoch": 4.178082191780822,
      "step": 305
    },
    {
      "epoch": 4.178082191780822,
      "step": 305,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.3558429181575775
    },
    {
      "epoch": 4.178082191780822,
      "step": 305,
      "train/learning_rate_real": 1.6373261162028666e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2636.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2636.0,
      "epoch": 4.178082191780822,
      "step": 305
    },
    {
      "epoch": 4.178082191780822,
      "step": 305,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.4292551577091217
    },
    {
      "epoch": 4.178082191780822,
      "step": 305,
      "train/learning_rate_real": 1.6373261162028666e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2209.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2209.0,
      "epoch": 4.178082191780822,
      "step": 305
    },
    {
      "epoch": 4.178082191780822,
      "step": 305,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.49365416169166565
    },
    {
      "epoch": 4.178082191780822,
      "step": 305,
      "train/learning_rate_real": 1.6373261162028666e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2661.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2661.0,
      "epoch": 4.178082191780822,
      "step": 305
    },
    {
      "epoch": 4.178082191780822,
      "step": 305,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.3843974769115448
    },
    {
      "epoch": 4.178082191780822,
      "step": 305,
      "train/learning_rate_real": 1.6373261162028666e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 4.191780821917808,
      "step": 306
    },
    {
      "epoch": 4.191780821917808,
      "step": 306,
      "train/ce_loss": 2.15625,
      "train/diffusion_loss": 0.4051734507083893
    },
    {
      "epoch": 4.191780821917808,
      "step": 306,
      "train/learning_rate_real": 1.6320487161588423e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2429.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2429.0,
      "epoch": 4.191780821917808,
      "step": 306
    },
    {
      "epoch": 4.191780821917808,
      "step": 306,
      "train/ce_loss": 1.953125,
      "train/diffusion_loss": 0.4380723536014557
    },
    {
      "epoch": 4.191780821917808,
      "step": 306,
      "train/learning_rate_real": 1.6320487161588423e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2228.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2228.0,
      "epoch": 4.191780821917808,
      "step": 306
    },
    {
      "epoch": 4.191780821917808,
      "step": 306,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.48873797059059143
    },
    {
      "epoch": 4.191780821917808,
      "step": 306,
      "train/learning_rate_real": 1.6320487161588423e-05
    },
    {
      "debug/num_lat_loss": 1809.0,
      "debug/num_lat_total": 2467.0,
      "debug/num_tok_loss": 1809.0,
      "debug/num_tok_total": 2467.0,
      "epoch": 4.191780821917808,
      "step": 306
    },
    {
      "epoch": 4.191780821917808,
      "step": 306,
      "train/ce_loss": 2.21875,
      "train/diffusion_loss": 0.45243874192237854
    },
    {
      "epoch": 4.191780821917808,
      "step": 306,
      "train/learning_rate_real": 1.6320487161588423e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2608.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2608.0,
      "epoch": 4.205479452054795,
      "step": 307
    },
    {
      "epoch": 4.205479452054795,
      "step": 307,
      "train/ce_loss": 1.953125,
      "train/diffusion_loss": 0.4109704792499542
    },
    {
      "epoch": 4.205479452054795,
      "step": 307,
      "train/learning_rate_real": 1.6267637938042126e-05
    },
    {
      "debug/num_lat_loss": 1709.0,
      "debug/num_lat_total": 2144.0,
      "debug/num_tok_loss": 1709.0,
      "debug/num_tok_total": 2144.0,
      "epoch": 4.205479452054795,
      "step": 307
    },
    {
      "epoch": 4.205479452054795,
      "step": 307,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.47451481223106384
    },
    {
      "epoch": 4.205479452054795,
      "step": 307,
      "train/learning_rate_real": 1.6267637938042126e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2622.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2622.0,
      "epoch": 4.205479452054795,
      "step": 307
    },
    {
      "epoch": 4.205479452054795,
      "step": 307,
      "train/ce_loss": 1.9375,
      "train/diffusion_loss": 0.4249735176563263
    },
    {
      "epoch": 4.205479452054795,
      "step": 307,
      "train/learning_rate_real": 1.6267637938042126e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 3106.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 3106.0,
      "epoch": 4.205479452054795,
      "step": 307
    },
    {
      "epoch": 4.205479452054795,
      "step": 307,
      "train/ce_loss": 1.9453125,
      "train/diffusion_loss": 0.34032824635505676
    },
    {
      "epoch": 4.205479452054795,
      "step": 307,
      "train/learning_rate_real": 1.6267637938042126e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2863.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2863.0,
      "epoch": 4.219178082191781,
      "step": 308
    },
    {
      "epoch": 4.219178082191781,
      "step": 308,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.35815247893333435
    },
    {
      "epoch": 4.219178082191781,
      "step": 308,
      "train/learning_rate_real": 1.6214714531959348e-05
    },
    {
      "debug/num_lat_loss": 1714.0,
      "debug/num_lat_total": 2070.0,
      "debug/num_tok_loss": 1714.0,
      "debug/num_tok_total": 2070.0,
      "epoch": 4.219178082191781,
      "step": 308
    },
    {
      "epoch": 4.219178082191781,
      "step": 308,
      "train/ce_loss": 1.9453125,
      "train/diffusion_loss": 0.463321715593338
    },
    {
      "epoch": 4.219178082191781,
      "step": 308,
      "train/learning_rate_real": 1.6214714531959348e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 4.219178082191781,
      "step": 308
    },
    {
      "epoch": 4.219178082191781,
      "step": 308,
      "train/ce_loss": 1.96875,
      "train/diffusion_loss": 0.4105816185474396
    },
    {
      "epoch": 4.219178082191781,
      "step": 308,
      "train/learning_rate_real": 1.6214714531959348e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2862.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2862.0,
      "epoch": 4.219178082191781,
      "step": 308
    },
    {
      "epoch": 4.219178082191781,
      "step": 308,
      "train/ce_loss": 2.25,
      "train/diffusion_loss": 0.3394912779331207
    },
    {
      "epoch": 4.219178082191781,
      "step": 308,
      "train/learning_rate_real": 1.6214714531959348e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2594.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2594.0,
      "epoch": 4.232876712328767,
      "step": 309
    },
    {
      "epoch": 4.232876712328767,
      "step": 309,
      "train/ce_loss": 1.8828125,
      "train/diffusion_loss": 0.392853707075119
    },
    {
      "epoch": 4.232876712328767,
      "step": 309,
      "train/learning_rate_real": 1.6161717985370302e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2643.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2643.0,
      "epoch": 4.232876712328767,
      "step": 309
    },
    {
      "epoch": 4.232876712328767,
      "step": 309,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.4154187738895416
    },
    {
      "epoch": 4.232876712328767,
      "step": 309,
      "train/learning_rate_real": 1.6161717985370302e-05
    },
    {
      "debug/num_lat_loss": 1815.0,
      "debug/num_lat_total": 3125.0,
      "debug/num_tok_loss": 1815.0,
      "debug/num_tok_total": 3125.0,
      "epoch": 4.232876712328767,
      "step": 309
    },
    {
      "epoch": 4.232876712328767,
      "step": 309,
      "train/ce_loss": 1.9296875,
      "train/diffusion_loss": 0.33852076530456543
    },
    {
      "epoch": 4.232876712328767,
      "step": 309,
      "train/learning_rate_real": 1.6161717985370302e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2405.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2405.0,
      "epoch": 4.232876712328767,
      "step": 309
    },
    {
      "epoch": 4.232876712328767,
      "step": 309,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.4304135739803314
    },
    {
      "epoch": 4.232876712328767,
      "step": 309,
      "train/learning_rate_real": 1.6161717985370302e-05
    },
    {
      "epoch": 4.2465753424657535,
      "grad_norm": 1.5299005508422852,
      "learning_rate": 1.6161717985370302e-05,
      "loss": 2.6415,
      "step": 310
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2648.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2648.0,
      "epoch": 4.2465753424657535,
      "step": 310
    },
    {
      "epoch": 4.2465753424657535,
      "step": 310,
      "train/ce_loss": 2.015625,
      "train/diffusion_loss": 0.417819619178772
    },
    {
      "epoch": 4.2465753424657535,
      "step": 310,
      "train/learning_rate_real": 1.6108649341745262e-05
    },
    {
      "debug/num_lat_loss": 1807.0,
      "debug/num_lat_total": 2682.0,
      "debug/num_tok_loss": 1807.0,
      "debug/num_tok_total": 2682.0,
      "epoch": 4.2465753424657535,
      "step": 310
    },
    {
      "epoch": 4.2465753424657535,
      "step": 310,
      "train/ce_loss": 2.265625,
      "train/diffusion_loss": 0.39688125252723694
    },
    {
      "epoch": 4.2465753424657535,
      "step": 310,
      "train/learning_rate_real": 1.6108649341745262e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 3049.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 3049.0,
      "epoch": 4.2465753424657535,
      "step": 310
    },
    {
      "epoch": 4.2465753424657535,
      "step": 310,
      "train/ce_loss": 1.9921875,
      "train/diffusion_loss": 0.30879974365234375
    },
    {
      "epoch": 4.2465753424657535,
      "step": 310,
      "train/learning_rate_real": 1.6108649341745262e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2435.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2435.0,
      "epoch": 4.2465753424657535,
      "step": 310
    },
    {
      "epoch": 4.2465753424657535,
      "step": 310,
      "train/ce_loss": 2.015625,
      "train/diffusion_loss": 0.4344693720340729
    },
    {
      "epoch": 4.2465753424657535,
      "step": 310,
      "train/learning_rate_real": 1.6108649341745262e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2609.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2609.0,
      "epoch": 4.260273972602739,
      "step": 311
    },
    {
      "epoch": 4.260273972602739,
      "step": 311,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.3929198086261749
    },
    {
      "epoch": 4.260273972602739,
      "step": 311,
      "train/learning_rate_real": 1.6055509645974082e-05
    },
    {
      "debug/num_lat_loss": 1815.0,
      "debug/num_lat_total": 2700.0,
      "debug/num_tok_loss": 1815.0,
      "debug/num_tok_total": 2700.0,
      "epoch": 4.260273972602739,
      "step": 311
    },
    {
      "epoch": 4.260273972602739,
      "step": 311,
      "train/ce_loss": 1.8984375,
      "train/diffusion_loss": 0.4018617272377014
    },
    {
      "epoch": 4.260273972602739,
      "step": 311,
      "train/learning_rate_real": 1.6055509645974082e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2883.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2883.0,
      "epoch": 4.260273972602739,
      "step": 311
    },
    {
      "epoch": 4.260273972602739,
      "step": 311,
      "train/ce_loss": 2.203125,
      "train/diffusion_loss": 0.3610696494579315
    },
    {
      "epoch": 4.260273972602739,
      "step": 311,
      "train/learning_rate_real": 1.6055509645974082e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2217.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2217.0,
      "epoch": 4.260273972602739,
      "step": 311
    },
    {
      "epoch": 4.260273972602739,
      "step": 311,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.45695215463638306
    },
    {
      "epoch": 4.260273972602739,
      "step": 311,
      "train/learning_rate_real": 1.6055509645974082e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2225.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2225.0,
      "epoch": 4.273972602739726,
      "step": 312
    },
    {
      "epoch": 4.273972602739726,
      "step": 312,
      "train/ce_loss": 2.640625,
      "train/diffusion_loss": 0.49278274178504944
    },
    {
      "epoch": 4.273972602739726,
      "step": 312,
      "train/learning_rate_real": 1.6002299944345555e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2859.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2859.0,
      "epoch": 4.273972602739726,
      "step": 312
    },
    {
      "epoch": 4.273972602739726,
      "step": 312,
      "train/ce_loss": 1.96875,
      "train/diffusion_loss": 0.3408409357070923
    },
    {
      "epoch": 4.273972602739726,
      "step": 312,
      "train/learning_rate_real": 1.6002299944345555e-05
    },
    {
      "debug/num_lat_loss": 1810.0,
      "debug/num_lat_total": 2893.0,
      "debug/num_tok_loss": 1810.0,
      "debug/num_tok_total": 2893.0,
      "epoch": 4.273972602739726,
      "step": 312
    },
    {
      "epoch": 4.273972602739726,
      "step": 312,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.3809254467487335
    },
    {
      "epoch": 4.273972602739726,
      "step": 312,
      "train/learning_rate_real": 1.6002299944345555e-05
    },
    {
      "debug/num_lat_loss": 1825.0,
      "debug/num_lat_total": 2274.0,
      "debug/num_tok_loss": 1825.0,
      "debug/num_tok_total": 2274.0,
      "epoch": 4.273972602739726,
      "step": 312
    },
    {
      "epoch": 4.273972602739726,
      "step": 312,
      "train/ce_loss": 1.890625,
      "train/diffusion_loss": 0.49229633808135986
    },
    {
      "epoch": 4.273972602739726,
      "step": 312,
      "train/learning_rate_real": 1.6002299944345555e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2881.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2881.0,
      "epoch": 4.287671232876712,
      "step": 313
    },
    {
      "epoch": 4.287671232876712,
      "step": 313,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.3535464107990265
    },
    {
      "epoch": 4.287671232876712,
      "step": 313,
      "train/learning_rate_real": 1.594902128452688e-05
    },
    {
      "debug/num_lat_loss": 1718.0,
      "debug/num_lat_total": 3162.0,
      "debug/num_tok_loss": 1718.0,
      "debug/num_tok_total": 3162.0,
      "epoch": 4.287671232876712,
      "step": 313
    },
    {
      "epoch": 4.287671232876712,
      "step": 313,
      "train/ce_loss": 2.171875,
      "train/diffusion_loss": 0.29265493154525757
    },
    {
      "epoch": 4.287671232876712,
      "step": 313,
      "train/learning_rate_real": 1.594902128452688e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2659.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2659.0,
      "epoch": 4.287671232876712,
      "step": 313
    },
    {
      "epoch": 4.287671232876712,
      "step": 313,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.43232548236846924
    },
    {
      "epoch": 4.287671232876712,
      "step": 313,
      "train/learning_rate_real": 1.594902128452688e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2651.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2651.0,
      "epoch": 4.287671232876712,
      "step": 313
    },
    {
      "epoch": 4.287671232876712,
      "step": 313,
      "train/ce_loss": 2.203125,
      "train/diffusion_loss": 0.38878774642944336
    },
    {
      "epoch": 4.287671232876712,
      "step": 313,
      "train/learning_rate_real": 1.594902128452688e-05
    },
    {
      "debug/num_lat_loss": 1617.0,
      "debug/num_lat_total": 2966.0,
      "debug/num_tok_loss": 1617.0,
      "debug/num_tok_total": 2966.0,
      "epoch": 4.301369863013699,
      "step": 314
    },
    {
      "epoch": 4.301369863013699,
      "step": 314,
      "train/ce_loss": 1.984375,
      "train/diffusion_loss": 0.28403106331825256
    },
    {
      "epoch": 4.301369863013699,
      "step": 314,
      "train/learning_rate_real": 1.5895674715542985e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2427.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2427.0,
      "epoch": 4.301369863013699,
      "step": 314
    },
    {
      "epoch": 4.301369863013699,
      "step": 314,
      "train/ce_loss": 1.828125,
      "train/diffusion_loss": 0.4388461410999298
    },
    {
      "epoch": 4.301369863013699,
      "step": 314,
      "train/learning_rate_real": 1.5895674715542985e-05
    },
    {
      "debug/num_lat_loss": 1614.0,
      "debug/num_lat_total": 2100.0,
      "debug/num_tok_loss": 1614.0,
      "debug/num_tok_total": 2100.0,
      "epoch": 4.301369863013699,
      "step": 314
    },
    {
      "epoch": 4.301369863013699,
      "step": 314,
      "train/ce_loss": 1.9375,
      "train/diffusion_loss": 0.4645499289035797
    },
    {
      "epoch": 4.301369863013699,
      "step": 314,
      "train/learning_rate_real": 1.5895674715542985e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2851.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2851.0,
      "epoch": 4.301369863013699,
      "step": 314
    },
    {
      "epoch": 4.301369863013699,
      "step": 314,
      "train/ce_loss": 1.9453125,
      "train/diffusion_loss": 0.34211722016334534
    },
    {
      "epoch": 4.301369863013699,
      "step": 314,
      "train/learning_rate_real": 1.5895674715542985e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2198.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2198.0,
      "epoch": 4.315068493150685,
      "step": 315
    },
    {
      "epoch": 4.315068493150685,
      "step": 315,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.48268836736679077
    },
    {
      "epoch": 4.315068493150685,
      "step": 315,
      "train/learning_rate_real": 1.584226128775589e-05
    },
    {
      "debug/num_lat_loss": 1704.0,
      "debug/num_lat_total": 2348.0,
      "debug/num_tok_loss": 1704.0,
      "debug/num_tok_total": 2348.0,
      "epoch": 4.315068493150685,
      "step": 315
    },
    {
      "epoch": 4.315068493150685,
      "step": 315,
      "train/ce_loss": 1.9921875,
      "train/diffusion_loss": 0.4525335729122162
    },
    {
      "epoch": 4.315068493150685,
      "step": 315,
      "train/learning_rate_real": 1.584226128775589e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2857.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2857.0,
      "epoch": 4.315068493150685,
      "step": 315
    },
    {
      "epoch": 4.315068493150685,
      "step": 315,
      "train/ce_loss": 1.984375,
      "train/diffusion_loss": 0.33578816056251526
    },
    {
      "epoch": 4.315068493150685,
      "step": 315,
      "train/learning_rate_real": 1.584226128775589e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2849.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2849.0,
      "epoch": 4.315068493150685,
      "step": 315
    },
    {
      "epoch": 4.315068493150685,
      "step": 315,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.35660043358802795
    },
    {
      "epoch": 4.315068493150685,
      "step": 315,
      "train/learning_rate_real": 1.584226128775589e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2440.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2440.0,
      "epoch": 4.328767123287671,
      "step": 316
    },
    {
      "epoch": 4.328767123287671,
      "step": 316,
      "train/ce_loss": 1.828125,
      "train/diffusion_loss": 0.4368599057197571
    },
    {
      "epoch": 4.328767123287671,
      "step": 316,
      "train/learning_rate_real": 1.5788782052844015e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2404.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2404.0,
      "epoch": 4.328767123287671,
      "step": 316
    },
    {
      "epoch": 4.328767123287671,
      "step": 316,
      "train/ce_loss": 1.9609375,
      "train/diffusion_loss": 0.4441825747489929
    },
    {
      "epoch": 4.328767123287671,
      "step": 316,
      "train/learning_rate_real": 1.5788782052844015e-05
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 2459.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 2459.0,
      "epoch": 4.328767123287671,
      "step": 316
    },
    {
      "epoch": 4.328767123287671,
      "step": 316,
      "train/ce_loss": 2.140625,
      "train/diffusion_loss": 0.4697098135948181
    },
    {
      "epoch": 4.328767123287671,
      "step": 316,
      "train/learning_rate_real": 1.5788782052844015e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 1995.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 1995.0,
      "epoch": 4.328767123287671,
      "step": 316
    },
    {
      "epoch": 4.328767123287671,
      "step": 316,
      "train/ce_loss": 1.9921875,
      "train/diffusion_loss": 0.5420702695846558
    },
    {
      "epoch": 4.328767123287671,
      "step": 316,
      "train/learning_rate_real": 1.5788782052844015e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 3307.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 3307.0,
      "epoch": 4.342465753424658,
      "step": 317
    },
    {
      "epoch": 4.342465753424658,
      "step": 317,
      "train/ce_loss": 2.03125,
      "train/diffusion_loss": 0.28982651233673096
    },
    {
      "epoch": 4.342465753424658,
      "step": 317,
      "train/learning_rate_real": 1.573523806378151e-05
    },
    {
      "debug/num_lat_loss": 1765.0,
      "debug/num_lat_total": 2404.0,
      "debug/num_tok_loss": 1765.0,
      "debug/num_tok_total": 2404.0,
      "epoch": 4.342465753424658,
      "step": 317
    },
    {
      "epoch": 4.342465753424658,
      "step": 317,
      "train/ce_loss": 1.9453125,
      "train/diffusion_loss": 0.43960776925086975
    },
    {
      "epoch": 4.342465753424658,
      "step": 317,
      "train/learning_rate_real": 1.573523806378151e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 3084.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 3084.0,
      "epoch": 4.342465753424658,
      "step": 317
    },
    {
      "epoch": 4.342465753424658,
      "step": 317,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.3340285122394562
    },
    {
      "epoch": 4.342465753424658,
      "step": 317,
      "train/learning_rate_real": 1.573523806378151e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2621.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2621.0,
      "epoch": 4.342465753424658,
      "step": 317
    },
    {
      "epoch": 4.342465753424658,
      "step": 317,
      "train/ce_loss": 2.15625,
      "train/diffusion_loss": 0.3833756148815155
    },
    {
      "epoch": 4.342465753424658,
      "step": 317,
      "train/learning_rate_real": 1.573523806378151e-05
    },
    {
      "debug/num_lat_loss": 1609.0,
      "debug/num_lat_total": 2106.0,
      "debug/num_tok_loss": 1609.0,
      "debug/num_tok_total": 2106.0,
      "epoch": 4.3561643835616435,
      "step": 318
    },
    {
      "epoch": 4.3561643835616435,
      "step": 318,
      "train/ce_loss": 2.40625,
      "train/diffusion_loss": 0.47296836972236633
    },
    {
      "epoch": 4.3561643835616435,
      "step": 318,
      "train/learning_rate_real": 1.568163037481747e-05
    },
    {
      "debug/num_lat_loss": 1758.0,
      "debug/num_lat_total": 2826.0,
      "debug/num_tok_loss": 1758.0,
      "debug/num_tok_total": 2826.0,
      "epoch": 4.3561643835616435,
      "step": 318
    },
    {
      "epoch": 4.3561643835616435,
      "step": 318,
      "train/ce_loss": 1.90625,
      "train/diffusion_loss": 0.34956642985343933
    },
    {
      "epoch": 4.3561643835616435,
      "step": 318,
      "train/learning_rate_real": 1.568163037481747e-05
    },
    {
      "debug/num_lat_loss": 1709.0,
      "debug/num_lat_total": 2050.0,
      "debug/num_tok_loss": 1709.0,
      "debug/num_tok_total": 2050.0,
      "epoch": 4.3561643835616435,
      "step": 318
    },
    {
      "epoch": 4.3561643835616435,
      "step": 318,
      "train/ce_loss": 2.03125,
      "train/diffusion_loss": 0.5021321177482605
    },
    {
      "epoch": 4.3561643835616435,
      "step": 318,
      "train/learning_rate_real": 1.568163037481747e-05
    },
    {
      "debug/num_lat_loss": 1737.0,
      "debug/num_lat_total": 2369.0,
      "debug/num_tok_loss": 1737.0,
      "debug/num_tok_total": 2369.0,
      "epoch": 4.3561643835616435,
      "step": 318
    },
    {
      "epoch": 4.3561643835616435,
      "step": 318,
      "train/ce_loss": 2.234375,
      "train/diffusion_loss": 0.44898754358291626
    },
    {
      "epoch": 4.3561643835616435,
      "step": 318,
      "train/learning_rate_real": 1.568163037481747e-05
    },
    {
      "debug/num_lat_loss": 1761.0,
      "debug/num_lat_total": 2398.0,
      "debug/num_tok_loss": 1761.0,
      "debug/num_tok_total": 2398.0,
      "epoch": 4.36986301369863,
      "step": 319
    },
    {
      "epoch": 4.36986301369863,
      "step": 319,
      "train/ce_loss": 1.96875,
      "train/diffusion_loss": 0.41769304871559143
    },
    {
      "epoch": 4.36986301369863,
      "step": 319,
      "train/learning_rate_real": 1.562796004145522e-05
    },
    {
      "debug/num_lat_loss": 1760.0,
      "debug/num_lat_total": 2821.0,
      "debug/num_tok_loss": 1760.0,
      "debug/num_tok_total": 2821.0,
      "epoch": 4.36986301369863,
      "step": 319
    },
    {
      "epoch": 4.36986301369863,
      "step": 319,
      "train/ce_loss": 1.9609375,
      "train/diffusion_loss": 0.3780413866043091
    },
    {
      "epoch": 4.36986301369863,
      "step": 319,
      "train/learning_rate_real": 1.562796004145522e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2626.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2626.0,
      "epoch": 4.36986301369863,
      "step": 319
    },
    {
      "epoch": 4.36986301369863,
      "step": 319,
      "train/ce_loss": 1.9921875,
      "train/diffusion_loss": 0.42228835821151733
    },
    {
      "epoch": 4.36986301369863,
      "step": 319,
      "train/learning_rate_real": 1.562796004145522e-05
    },
    {
      "debug/num_lat_loss": 1797.0,
      "debug/num_lat_total": 2233.0,
      "debug/num_tok_loss": 1797.0,
      "debug/num_tok_total": 2233.0,
      "epoch": 4.36986301369863,
      "step": 319
    },
    {
      "epoch": 4.36986301369863,
      "step": 319,
      "train/ce_loss": 1.9765625,
      "train/diffusion_loss": 0.47614622116088867
    },
    {
      "epoch": 4.36986301369863,
      "step": 319,
      "train/learning_rate_real": 1.562796004145522e-05
    },
    {
      "epoch": 4.383561643835616,
      "grad_norm": 1.3093547821044922,
      "learning_rate": 1.562796004145522e-05,
      "loss": 2.6111,
      "step": 320
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2599.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2599.0,
      "epoch": 4.383561643835616,
      "step": 320
    },
    {
      "epoch": 4.383561643835616,
      "step": 320,
      "train/ce_loss": 1.9453125,
      "train/diffusion_loss": 0.440812349319458
    },
    {
      "epoch": 4.383561643835616,
      "step": 320,
      "train/learning_rate_real": 1.5574228120431497e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2640.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2640.0,
      "epoch": 4.383561643835616,
      "step": 320
    },
    {
      "epoch": 4.383561643835616,
      "step": 320,
      "train/ce_loss": 1.9453125,
      "train/diffusion_loss": 0.4079349935054779
    },
    {
      "epoch": 4.383561643835616,
      "step": 320,
      "train/learning_rate_real": 1.5574228120431497e-05
    },
    {
      "debug/num_lat_loss": 1641.0,
      "debug/num_lat_total": 2788.0,
      "debug/num_tok_loss": 1641.0,
      "debug/num_tok_total": 2788.0,
      "epoch": 4.383561643835616,
      "step": 320
    },
    {
      "epoch": 4.383561643835616,
      "step": 320,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.33988478779792786
    },
    {
      "epoch": 4.383561643835616,
      "step": 320,
      "train/learning_rate_real": 1.5574228120431497e-05
    },
    {
      "debug/num_lat_loss": 1754.0,
      "debug/num_lat_total": 2591.0,
      "debug/num_tok_loss": 1754.0,
      "debug/num_tok_total": 2591.0,
      "epoch": 4.383561643835616,
      "step": 320
    },
    {
      "epoch": 4.383561643835616,
      "step": 320,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.40686947107315063
    },
    {
      "epoch": 4.383561643835616,
      "step": 320,
      "train/learning_rate_real": 1.5574228120431497e-05
    },
    {
      "debug/num_lat_loss": 1762.0,
      "debug/num_lat_total": 3035.0,
      "debug/num_tok_loss": 1762.0,
      "debug/num_tok_total": 3035.0,
      "epoch": 4.397260273972603,
      "step": 321
    },
    {
      "epoch": 4.397260273972603,
      "step": 321,
      "train/ce_loss": 1.9140625,
      "train/diffusion_loss": 0.32785743474960327
    },
    {
      "epoch": 4.397260273972603,
      "step": 321,
      "train/learning_rate_real": 1.552043566969568e-05
    },
    {
      "debug/num_lat_loss": 1754.0,
      "debug/num_lat_total": 2171.0,
      "debug/num_tok_loss": 1754.0,
      "debug/num_tok_total": 2171.0,
      "epoch": 4.397260273972603,
      "step": 321
    },
    {
      "epoch": 4.397260273972603,
      "step": 321,
      "train/ce_loss": 1.9765625,
      "train/diffusion_loss": 0.47331783175468445
    },
    {
      "epoch": 4.397260273972603,
      "step": 321,
      "train/learning_rate_real": 1.552043566969568e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2424.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2424.0,
      "epoch": 4.397260273972603,
      "step": 321
    },
    {
      "epoch": 4.397260273972603,
      "step": 321,
      "train/ce_loss": 1.9609375,
      "train/diffusion_loss": 0.47246018052101135
    },
    {
      "epoch": 4.397260273972603,
      "step": 321,
      "train/learning_rate_real": 1.552043566969568e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2851.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2851.0,
      "epoch": 4.397260273972603,
      "step": 321
    },
    {
      "epoch": 4.397260273972603,
      "step": 321,
      "train/ce_loss": 1.9375,
      "train/diffusion_loss": 0.3304142653942108
    },
    {
      "epoch": 4.397260273972603,
      "step": 321,
      "train/learning_rate_real": 1.552043566969568e-05
    },
    {
      "debug/num_lat_loss": 1608.0,
      "debug/num_lat_total": 2673.0,
      "debug/num_tok_loss": 1608.0,
      "debug/num_tok_total": 2673.0,
      "epoch": 4.410958904109589,
      "step": 322
    },
    {
      "epoch": 4.410958904109589,
      "step": 322,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.31228986382484436
    },
    {
      "epoch": 4.410958904109589,
      "step": 322,
      "train/learning_rate_real": 1.546658374838894e-05
    },
    {
      "debug/num_lat_loss": 1760.0,
      "debug/num_lat_total": 2195.0,
      "debug/num_tok_loss": 1760.0,
      "debug/num_tok_total": 2195.0,
      "epoch": 4.410958904109589,
      "step": 322
    },
    {
      "epoch": 4.410958904109589,
      "step": 322,
      "train/ce_loss": 2.15625,
      "train/diffusion_loss": 0.49159204959869385
    },
    {
      "epoch": 4.410958904109589,
      "step": 322,
      "train/learning_rate_real": 1.546658374838894e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2843.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2843.0,
      "epoch": 4.410958904109589,
      "step": 322
    },
    {
      "epoch": 4.410958904109589,
      "step": 322,
      "train/ce_loss": 1.9296875,
      "train/diffusion_loss": 0.3542118966579437
    },
    {
      "epoch": 4.410958904109589,
      "step": 322,
      "train/learning_rate_real": 1.546658374838894e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2661.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2661.0,
      "epoch": 4.410958904109589,
      "step": 322
    },
    {
      "epoch": 4.410958904109589,
      "step": 322,
      "train/ce_loss": 1.9765625,
      "train/diffusion_loss": 0.3923184275627136
    },
    {
      "epoch": 4.410958904109589,
      "step": 322,
      "train/learning_rate_real": 1.546658374838894e-05
    },
    {
      "debug/num_lat_loss": 1592.0,
      "debug/num_lat_total": 2645.0,
      "debug/num_tok_loss": 1592.0,
      "debug/num_tok_total": 2645.0,
      "epoch": 4.424657534246576,
      "step": 323
    },
    {
      "epoch": 4.424657534246576,
      "step": 323,
      "train/ce_loss": 2.03125,
      "train/diffusion_loss": 0.34810689091682434
    },
    {
      "epoch": 4.424657534246576,
      "step": 323,
      "train/learning_rate_real": 1.5412673416823367e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 3060.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 3060.0,
      "epoch": 4.424657534246576,
      "step": 323
    },
    {
      "epoch": 4.424657534246576,
      "step": 323,
      "train/ce_loss": 1.9921875,
      "train/diffusion_loss": 0.3222844898700714
    },
    {
      "epoch": 4.424657534246576,
      "step": 323,
      "train/learning_rate_real": 1.5412673416823367e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2012.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2012.0,
      "epoch": 4.424657534246576,
      "step": 323
    },
    {
      "epoch": 4.424657534246576,
      "step": 323,
      "train/ce_loss": 1.953125,
      "train/diffusion_loss": 0.5204927921295166
    },
    {
      "epoch": 4.424657534246576,
      "step": 323,
      "train/learning_rate_real": 1.5412673416823367e-05
    },
    {
      "debug/num_lat_loss": 1762.0,
      "debug/num_lat_total": 3037.0,
      "debug/num_tok_loss": 1762.0,
      "debug/num_tok_total": 3037.0,
      "epoch": 4.424657534246576,
      "step": 323
    },
    {
      "epoch": 4.424657534246576,
      "step": 323,
      "train/ce_loss": 1.9375,
      "train/diffusion_loss": 0.29368212819099426
    },
    {
      "epoch": 4.424657534246576,
      "step": 323,
      "train/learning_rate_real": 1.5412673416823367e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2469.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2469.0,
      "epoch": 4.438356164383562,
      "step": 324
    },
    {
      "epoch": 4.438356164383562,
      "step": 324,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.45224830508232117
    },
    {
      "epoch": 4.438356164383562,
      "step": 324,
      "train/learning_rate_real": 1.5358705736461147e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2617.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2617.0,
      "epoch": 4.438356164383562,
      "step": 324
    },
    {
      "epoch": 4.438356164383562,
      "step": 324,
      "train/ce_loss": 1.7578125,
      "train/diffusion_loss": 0.4042223393917084
    },
    {
      "epoch": 4.438356164383562,
      "step": 324,
      "train/learning_rate_real": 1.5358705736461147e-05
    },
    {
      "debug/num_lat_loss": 1761.0,
      "debug/num_lat_total": 3032.0,
      "debug/num_tok_loss": 1761.0,
      "debug/num_tok_total": 3032.0,
      "epoch": 4.438356164383562,
      "step": 324
    },
    {
      "epoch": 4.438356164383562,
      "step": 324,
      "train/ce_loss": 2.03125,
      "train/diffusion_loss": 0.36794137954711914
    },
    {
      "epoch": 4.438356164383562,
      "step": 324,
      "train/learning_rate_real": 1.5358705736461147e-05
    },
    {
      "debug/num_lat_loss": 1660.0,
      "debug/num_lat_total": 2084.0,
      "debug/num_tok_loss": 1660.0,
      "debug/num_tok_total": 2084.0,
      "epoch": 4.438356164383562,
      "step": 324
    },
    {
      "epoch": 4.438356164383562,
      "step": 324,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.464321494102478
    },
    {
      "epoch": 4.438356164383562,
      "step": 324,
      "train/learning_rate_real": 1.5358705736461147e-05
    },
    {
      "debug/num_lat_loss": 1631.0,
      "debug/num_lat_total": 2769.0,
      "debug/num_tok_loss": 1631.0,
      "debug/num_tok_total": 2769.0,
      "epoch": 4.4520547945205475,
      "step": 325
    },
    {
      "epoch": 4.4520547945205475,
      "step": 325,
      "train/ce_loss": 1.75,
      "train/diffusion_loss": 0.2974879741668701
    },
    {
      "epoch": 4.4520547945205475,
      "step": 325,
      "train/learning_rate_real": 1.5304681769893603e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2836.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2836.0,
      "epoch": 4.4520547945205475,
      "step": 325
    },
    {
      "epoch": 4.4520547945205475,
      "step": 325,
      "train/ce_loss": 1.875,
      "train/diffusion_loss": 0.3495127558708191
    },
    {
      "epoch": 4.4520547945205475,
      "step": 325,
      "train/learning_rate_real": 1.5304681769893603e-05
    },
    {
      "debug/num_lat_loss": 1759.0,
      "debug/num_lat_total": 2821.0,
      "debug/num_tok_loss": 1759.0,
      "debug/num_tok_total": 2821.0,
      "epoch": 4.4520547945205475,
      "step": 325
    },
    {
      "epoch": 4.4520547945205475,
      "step": 325,
      "train/ce_loss": 1.921875,
      "train/diffusion_loss": 0.33067697286605835
    },
    {
      "epoch": 4.4520547945205475,
      "step": 325,
      "train/learning_rate_real": 1.5304681769893603e-05
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 2887.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 2887.0,
      "epoch": 4.4520547945205475,
      "step": 325
    },
    {
      "epoch": 4.4520547945205475,
      "step": 325,
      "train/ce_loss": 2.171875,
      "train/diffusion_loss": 0.36880069971084595
    },
    {
      "epoch": 4.4520547945205475,
      "step": 325,
      "train/learning_rate_real": 1.5304681769893603e-05
    },
    {
      "debug/num_lat_loss": 1634.0,
      "debug/num_lat_total": 2113.0,
      "debug/num_tok_loss": 1634.0,
      "debug/num_tok_total": 2113.0,
      "epoch": 4.465753424657534,
      "step": 326
    },
    {
      "epoch": 4.465753424657534,
      "step": 326,
      "train/ce_loss": 2.140625,
      "train/diffusion_loss": 0.4598163068294525
    },
    {
      "epoch": 4.465753424657534,
      "step": 326,
      "train/learning_rate_real": 1.525060258082031e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 3054.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 3054.0,
      "epoch": 4.465753424657534,
      "step": 326
    },
    {
      "epoch": 4.465753424657534,
      "step": 326,
      "train/ce_loss": 2.15625,
      "train/diffusion_loss": 0.32549595832824707
    },
    {
      "epoch": 4.465753424657534,
      "step": 326,
      "train/learning_rate_real": 1.525060258082031e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2874.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2874.0,
      "epoch": 4.465753424657534,
      "step": 326
    },
    {
      "epoch": 4.465753424657534,
      "step": 326,
      "train/ce_loss": 1.9609375,
      "train/diffusion_loss": 0.3679076135158539
    },
    {
      "epoch": 4.465753424657534,
      "step": 326,
      "train/learning_rate_real": 1.525060258082031e-05
    },
    {
      "debug/num_lat_loss": 1636.0,
      "debug/num_lat_total": 2335.0,
      "debug/num_tok_loss": 1636.0,
      "debug/num_tok_total": 2335.0,
      "epoch": 4.465753424657534,
      "step": 326
    },
    {
      "epoch": 4.465753424657534,
      "step": 326,
      "train/ce_loss": 1.8828125,
      "train/diffusion_loss": 0.4183434247970581
    },
    {
      "epoch": 4.465753424657534,
      "step": 326,
      "train/learning_rate_real": 1.525060258082031e-05
    },
    {
      "debug/num_lat_loss": 1696.0,
      "debug/num_lat_total": 2450.0,
      "debug/num_tok_loss": 1696.0,
      "debug/num_tok_total": 2450.0,
      "epoch": 4.47945205479452,
      "step": 327
    },
    {
      "epoch": 4.47945205479452,
      "step": 327,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.4099813997745514
    },
    {
      "epoch": 4.47945205479452,
      "step": 327,
      "train/learning_rate_real": 1.5196469234028144e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2427.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2427.0,
      "epoch": 4.47945205479452,
      "step": 327
    },
    {
      "epoch": 4.47945205479452,
      "step": 327,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.43626102805137634
    },
    {
      "epoch": 4.47945205479452,
      "step": 327,
      "train/learning_rate_real": 1.5196469234028144e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 2230.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 2230.0,
      "epoch": 4.47945205479452,
      "step": 327
    },
    {
      "epoch": 4.47945205479452,
      "step": 327,
      "train/ce_loss": 1.9921875,
      "train/diffusion_loss": 0.49635446071624756
    },
    {
      "epoch": 4.47945205479452,
      "step": 327,
      "train/learning_rate_real": 1.5196469234028144e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2236.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2236.0,
      "epoch": 4.47945205479452,
      "step": 327
    },
    {
      "epoch": 4.47945205479452,
      "step": 327,
      "train/ce_loss": 1.9375,
      "train/diffusion_loss": 0.49344027042388916
    },
    {
      "epoch": 4.47945205479452,
      "step": 327,
      "train/learning_rate_real": 1.5196469234028144e-05
    },
    {
      "debug/num_lat_loss": 1763.0,
      "debug/num_lat_total": 2401.0,
      "debug/num_tok_loss": 1763.0,
      "debug/num_tok_total": 2401.0,
      "epoch": 4.493150684931507,
      "step": 328
    },
    {
      "epoch": 4.493150684931507,
      "step": 328,
      "train/ce_loss": 2.015625,
      "train/diffusion_loss": 0.4326101243495941
    },
    {
      "epoch": 4.493150684931507,
      "step": 328,
      "train/learning_rate_real": 1.5142282795370305e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2400.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2400.0,
      "epoch": 4.493150684931507,
      "step": 328
    },
    {
      "epoch": 4.493150684931507,
      "step": 328,
      "train/ce_loss": 1.9140625,
      "train/diffusion_loss": 0.409049391746521
    },
    {
      "epoch": 4.493150684931507,
      "step": 328,
      "train/learning_rate_real": 1.5142282795370305e-05
    },
    {
      "debug/num_lat_loss": 1418.0,
      "debug/num_lat_total": 2125.0,
      "debug/num_tok_loss": 1418.0,
      "debug/num_tok_total": 2125.0,
      "epoch": 4.493150684931507,
      "step": 328
    },
    {
      "epoch": 4.493150684931507,
      "step": 328,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.36707603931427
    },
    {
      "epoch": 4.493150684931507,
      "step": 328,
      "train/learning_rate_real": 1.5142282795370305e-05
    },
    {
      "debug/num_lat_loss": 1766.0,
      "debug/num_lat_total": 2403.0,
      "debug/num_tok_loss": 1766.0,
      "debug/num_tok_total": 2403.0,
      "epoch": 4.493150684931507,
      "step": 328
    },
    {
      "epoch": 4.493150684931507,
      "step": 328,
      "train/ce_loss": 1.90625,
      "train/diffusion_loss": 0.43733519315719604
    },
    {
      "epoch": 4.493150684931507,
      "step": 328,
      "train/learning_rate_real": 1.5142282795370305e-05
    },
    {
      "debug/num_lat_loss": 1629.0,
      "debug/num_lat_total": 2350.0,
      "debug/num_tok_loss": 1629.0,
      "debug/num_tok_total": 2350.0,
      "epoch": 4.506849315068493,
      "step": 329
    },
    {
      "epoch": 4.506849315068493,
      "step": 329,
      "train/ce_loss": 1.859375,
      "train/diffusion_loss": 0.41867128014564514
    },
    {
      "epoch": 4.506849315068493,
      "step": 329,
      "train/learning_rate_real": 1.5088044331745352e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 3094.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 3094.0,
      "epoch": 4.506849315068493,
      "step": 329
    },
    {
      "epoch": 4.506849315068493,
      "step": 329,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.3149670660495758
    },
    {
      "epoch": 4.506849315068493,
      "step": 329,
      "train/learning_rate_real": 1.5088044331745352e-05
    },
    {
      "debug/num_lat_loss": 1673.0,
      "debug/num_lat_total": 2768.0,
      "debug/num_tok_loss": 1673.0,
      "debug/num_tok_total": 2768.0,
      "epoch": 4.506849315068493,
      "step": 329
    },
    {
      "epoch": 4.506849315068493,
      "step": 329,
      "train/ce_loss": 1.96875,
      "train/diffusion_loss": 0.31741902232170105
    },
    {
      "epoch": 4.506849315068493,
      "step": 329,
      "train/learning_rate_real": 1.5088044331745352e-05
    },
    {
      "debug/num_lat_loss": 1810.0,
      "debug/num_lat_total": 2689.0,
      "debug/num_tok_loss": 1810.0,
      "debug/num_tok_total": 2689.0,
      "epoch": 4.506849315068493,
      "step": 329
    },
    {
      "epoch": 4.506849315068493,
      "step": 329,
      "train/ce_loss": 2.015625,
      "train/diffusion_loss": 0.3984423577785492
    },
    {
      "epoch": 4.506849315068493,
      "step": 329,
      "train/learning_rate_real": 1.5088044331745352e-05
    },
    {
      "epoch": 4.52054794520548,
      "grad_norm": 1.4394961595535278,
      "learning_rate": 1.5088044331745352e-05,
      "loss": 2.5261,
      "step": 330
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 1997.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 1997.0,
      "epoch": 4.52054794520548,
      "step": 330
    },
    {
      "epoch": 4.52054794520548,
      "step": 330,
      "train/ce_loss": 2.203125,
      "train/diffusion_loss": 0.5411708950996399
    },
    {
      "epoch": 4.52054794520548,
      "step": 330,
      "train/learning_rate_real": 1.503375491107617e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2628.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2628.0,
      "epoch": 4.52054794520548,
      "step": 330
    },
    {
      "epoch": 4.52054794520548,
      "step": 330,
      "train/ce_loss": 1.921875,
      "train/diffusion_loss": 0.42081862688064575
    },
    {
      "epoch": 4.52054794520548,
      "step": 330,
      "train/learning_rate_real": 1.503375491107617e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2432.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2432.0,
      "epoch": 4.52054794520548,
      "step": 330
    },
    {
      "epoch": 4.52054794520548,
      "step": 330,
      "train/ce_loss": 1.9609375,
      "train/diffusion_loss": 0.43405744433403015
    },
    {
      "epoch": 4.52054794520548,
      "step": 330,
      "train/learning_rate_real": 1.503375491107617e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2633.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2633.0,
      "epoch": 4.52054794520548,
      "step": 330
    },
    {
      "epoch": 4.52054794520548,
      "step": 330,
      "train/ce_loss": 1.96875,
      "train/diffusion_loss": 0.38711363077163696
    },
    {
      "epoch": 4.52054794520548,
      "step": 330,
      "train/learning_rate_real": 1.503375491107617e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2209.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2209.0,
      "epoch": 4.534246575342466,
      "step": 331
    },
    {
      "epoch": 4.534246575342466,
      "step": 331,
      "train/ce_loss": 1.859375,
      "train/diffusion_loss": 0.4632979929447174
    },
    {
      "epoch": 4.534246575342466,
      "step": 331,
      "train/learning_rate_real": 1.4979415602288974e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2621.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2621.0,
      "epoch": 4.534246575342466,
      "step": 331
    },
    {
      "epoch": 4.534246575342466,
      "step": 331,
      "train/ce_loss": 1.8828125,
      "train/diffusion_loss": 0.41623392701148987
    },
    {
      "epoch": 4.534246575342466,
      "step": 331,
      "train/learning_rate_real": 1.4979415602288974e-05
    },
    {
      "debug/num_lat_loss": 1601.0,
      "debug/num_lat_total": 2037.0,
      "debug/num_tok_loss": 1601.0,
      "debug/num_tok_total": 2037.0,
      "epoch": 4.534246575342466,
      "step": 331
    },
    {
      "epoch": 4.534246575342466,
      "step": 331,
      "train/ce_loss": 1.9453125,
      "train/diffusion_loss": 0.4638037085533142
    },
    {
      "epoch": 4.534246575342466,
      "step": 331,
      "train/learning_rate_real": 1.4979415602288974e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2670.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2670.0,
      "epoch": 4.534246575342466,
      "step": 331
    },
    {
      "epoch": 4.534246575342466,
      "step": 331,
      "train/ce_loss": 1.8671875,
      "train/diffusion_loss": 0.3933698236942291
    },
    {
      "epoch": 4.534246575342466,
      "step": 331,
      "train/learning_rate_real": 1.4979415602288974e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2444.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2444.0,
      "epoch": 4.5479452054794525,
      "step": 332
    },
    {
      "epoch": 4.5479452054794525,
      "step": 332,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.43886104226112366
    },
    {
      "epoch": 4.5479452054794525,
      "step": 332,
      "train/learning_rate_real": 1.492502747529223e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2247.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2247.0,
      "epoch": 4.5479452054794525,
      "step": 332
    },
    {
      "epoch": 4.5479452054794525,
      "step": 332,
      "train/ce_loss": 2.375,
      "train/diffusion_loss": 0.48674336075782776
    },
    {
      "epoch": 4.5479452054794525,
      "step": 332,
      "train/learning_rate_real": 1.492502747529223e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2866.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2866.0,
      "epoch": 4.5479452054794525,
      "step": 332
    },
    {
      "epoch": 4.5479452054794525,
      "step": 332,
      "train/ce_loss": 2.015625,
      "train/diffusion_loss": 0.37624648213386536
    },
    {
      "epoch": 4.5479452054794525,
      "step": 332,
      "train/learning_rate_real": 1.492502747529223e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2828.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2828.0,
      "epoch": 4.5479452054794525,
      "step": 332
    },
    {
      "epoch": 4.5479452054794525,
      "step": 332,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.37082526087760925
    },
    {
      "epoch": 4.5479452054794525,
      "step": 332,
      "train/learning_rate_real": 1.492502747529223e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2624.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2624.0,
      "epoch": 4.561643835616438,
      "step": 333
    },
    {
      "epoch": 4.561643835616438,
      "step": 333,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.3982437252998352
    },
    {
      "epoch": 4.561643835616438,
      "step": 333,
      "train/learning_rate_real": 1.4870591600955622e-05
    },
    {
      "debug/num_lat_loss": 1648.0,
      "debug/num_lat_total": 2298.0,
      "debug/num_tok_loss": 1648.0,
      "debug/num_tok_total": 2298.0,
      "epoch": 4.561643835616438,
      "step": 333
    },
    {
      "epoch": 4.561643835616438,
      "step": 333,
      "train/ce_loss": 1.828125,
      "train/diffusion_loss": 0.44113418459892273
    },
    {
      "epoch": 4.561643835616438,
      "step": 333,
      "train/learning_rate_real": 1.4870591600955622e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2641.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2641.0,
      "epoch": 4.561643835616438,
      "step": 333
    },
    {
      "epoch": 4.561643835616438,
      "step": 333,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.38748985528945923
    },
    {
      "epoch": 4.561643835616438,
      "step": 333,
      "train/learning_rate_real": 1.4870591600955622e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2854.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2854.0,
      "epoch": 4.561643835616438,
      "step": 333
    },
    {
      "epoch": 4.561643835616438,
      "step": 333,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.39017578959465027
    },
    {
      "epoch": 4.561643835616438,
      "step": 333,
      "train/learning_rate_real": 1.4870591600955622e-05
    },
    {
      "debug/num_lat_loss": 1756.0,
      "debug/num_lat_total": 3040.0,
      "debug/num_tok_loss": 1756.0,
      "debug/num_tok_total": 3040.0,
      "epoch": 4.575342465753424,
      "step": 334
    },
    {
      "epoch": 4.575342465753424,
      "step": 334,
      "train/ce_loss": 1.9609375,
      "train/diffusion_loss": 0.30143100023269653
    },
    {
      "epoch": 4.575342465753424,
      "step": 334,
      "train/learning_rate_real": 1.4816109051088931e-05
    },
    {
      "debug/num_lat_loss": 1735.0,
      "debug/num_lat_total": 2178.0,
      "debug/num_tok_loss": 1735.0,
      "debug/num_tok_total": 2178.0,
      "epoch": 4.575342465753424,
      "step": 334
    },
    {
      "epoch": 4.575342465753424,
      "step": 334,
      "train/ce_loss": 2.03125,
      "train/diffusion_loss": 0.48089736700057983
    },
    {
      "epoch": 4.575342465753424,
      "step": 334,
      "train/learning_rate_real": 1.4816109051088931e-05
    },
    {
      "debug/num_lat_loss": 1761.0,
      "debug/num_lat_total": 2183.0,
      "debug/num_tok_loss": 1761.0,
      "debug/num_tok_total": 2183.0,
      "epoch": 4.575342465753424,
      "step": 334
    },
    {
      "epoch": 4.575342465753424,
      "step": 334,
      "train/ce_loss": 2.015625,
      "train/diffusion_loss": 0.49683213233947754
    },
    {
      "epoch": 4.575342465753424,
      "step": 334,
      "train/learning_rate_real": 1.4816109051088931e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 3103.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 3103.0,
      "epoch": 4.575342465753424,
      "step": 334
    },
    {
      "epoch": 4.575342465753424,
      "step": 334,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.3006209433078766
    },
    {
      "epoch": 4.575342465753424,
      "step": 334,
      "train/learning_rate_real": 1.4816109051088931e-05
    },
    {
      "debug/num_lat_loss": 1743.0,
      "debug/num_lat_total": 2408.0,
      "debug/num_tok_loss": 1743.0,
      "debug/num_tok_total": 2408.0,
      "epoch": 4.589041095890411,
      "step": 335
    },
    {
      "epoch": 4.589041095890411,
      "step": 335,
      "train/ce_loss": 1.8125,
      "train/diffusion_loss": 0.4070529639720917
    },
    {
      "epoch": 4.589041095890411,
      "step": 335,
      "train/learning_rate_real": 1.4761580898420969e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2884.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2884.0,
      "epoch": 4.589041095890411,
      "step": 335
    },
    {
      "epoch": 4.589041095890411,
      "step": 335,
      "train/ce_loss": 1.84375,
      "train/diffusion_loss": 0.34525975584983826
    },
    {
      "epoch": 4.589041095890411,
      "step": 335,
      "train/learning_rate_real": 1.4761580898420969e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2878.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2878.0,
      "epoch": 4.589041095890411,
      "step": 335
    },
    {
      "epoch": 4.589041095890411,
      "step": 335,
      "train/ce_loss": 1.8828125,
      "train/diffusion_loss": 0.3471052348613739
    },
    {
      "epoch": 4.589041095890411,
      "step": 335,
      "train/learning_rate_real": 1.4761580898420969e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2425.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2425.0,
      "epoch": 4.589041095890411,
      "step": 335
    },
    {
      "epoch": 4.589041095890411,
      "step": 335,
      "train/ce_loss": 2.03125,
      "train/diffusion_loss": 0.42148303985595703
    },
    {
      "epoch": 4.589041095890411,
      "step": 335,
      "train/learning_rate_real": 1.4761580898420969e-05
    },
    {
      "debug/num_lat_loss": 1793.0,
      "debug/num_lat_total": 2670.0,
      "debug/num_tok_loss": 1793.0,
      "debug/num_tok_total": 2670.0,
      "epoch": 4.602739726027397,
      "step": 336
    },
    {
      "epoch": 4.602739726027397,
      "step": 336,
      "train/ce_loss": 1.921875,
      "train/diffusion_loss": 0.39624324440956116
    },
    {
      "epoch": 4.602739726027397,
      "step": 336,
      "train/learning_rate_real": 1.470700821657843e-05
    },
    {
      "debug/num_lat_loss": 1713.0,
      "debug/num_lat_total": 2776.0,
      "debug/num_tok_loss": 1713.0,
      "debug/num_tok_total": 2776.0,
      "epoch": 4.602739726027397,
      "step": 336
    },
    {
      "epoch": 4.602739726027397,
      "step": 336,
      "train/ce_loss": 1.96875,
      "train/diffusion_loss": 0.3550858199596405
    },
    {
      "epoch": 4.602739726027397,
      "step": 336,
      "train/learning_rate_real": 1.470700821657843e-05
    },
    {
      "debug/num_lat_loss": 1813.0,
      "debug/num_lat_total": 3346.0,
      "debug/num_tok_loss": 1813.0,
      "debug/num_tok_total": 3346.0,
      "epoch": 4.602739726027397,
      "step": 336
    },
    {
      "epoch": 4.602739726027397,
      "step": 336,
      "train/ce_loss": 1.9140625,
      "train/diffusion_loss": 0.2674638628959656
    },
    {
      "epoch": 4.602739726027397,
      "step": 336,
      "train/learning_rate_real": 1.470700821657843e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2416.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2416.0,
      "epoch": 4.602739726027397,
      "step": 336
    },
    {
      "epoch": 4.602739726027397,
      "step": 336,
      "train/ce_loss": 2.015625,
      "train/diffusion_loss": 0.4429453909397125
    },
    {
      "epoch": 4.602739726027397,
      "step": 336,
      "train/learning_rate_real": 1.470700821657843e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2649.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2649.0,
      "epoch": 4.616438356164384,
      "step": 337
    },
    {
      "epoch": 4.616438356164384,
      "step": 337,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.4100815951824188
    },
    {
      "epoch": 4.616438356164384,
      "step": 337,
      "train/learning_rate_real": 1.4652392080064766e-05
    },
    {
      "debug/num_lat_loss": 1812.0,
      "debug/num_lat_total": 2258.0,
      "debug/num_tok_loss": 1812.0,
      "debug/num_tok_total": 2258.0,
      "epoch": 4.616438356164384,
      "step": 337
    },
    {
      "epoch": 4.616438356164384,
      "step": 337,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.48984119296073914
    },
    {
      "epoch": 4.616438356164384,
      "step": 337,
      "train/learning_rate_real": 1.4652392080064766e-05
    },
    {
      "debug/num_lat_loss": 1697.0,
      "debug/num_lat_total": 2333.0,
      "debug/num_tok_loss": 1697.0,
      "debug/num_tok_total": 2333.0,
      "epoch": 4.616438356164384,
      "step": 337
    },
    {
      "epoch": 4.616438356164384,
      "step": 337,
      "train/ce_loss": 1.828125,
      "train/diffusion_loss": 0.471801221370697
    },
    {
      "epoch": 4.616438356164384,
      "step": 337,
      "train/learning_rate_real": 1.4652392080064766e-05
    },
    {
      "debug/num_lat_loss": 1653.0,
      "debug/num_lat_total": 2943.0,
      "debug/num_tok_loss": 1653.0,
      "debug/num_tok_total": 2943.0,
      "epoch": 4.616438356164384,
      "step": 337
    },
    {
      "epoch": 4.616438356164384,
      "step": 337,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.34953343868255615
    },
    {
      "epoch": 4.616438356164384,
      "step": 337,
      "train/learning_rate_real": 1.4652392080064766e-05
    },
    {
      "debug/num_lat_loss": 1636.0,
      "debug/num_lat_total": 2744.0,
      "debug/num_tok_loss": 1636.0,
      "debug/num_tok_total": 2744.0,
      "epoch": 4.63013698630137,
      "step": 338
    },
    {
      "epoch": 4.63013698630137,
      "step": 338,
      "train/ce_loss": 1.8828125,
      "train/diffusion_loss": 0.33348938822746277
    },
    {
      "epoch": 4.63013698630137,
      "step": 338,
      "train/learning_rate_real": 1.4597733564239022e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2443.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2443.0,
      "epoch": 4.63013698630137,
      "step": 338
    },
    {
      "epoch": 4.63013698630137,
      "step": 338,
      "train/ce_loss": 1.9609375,
      "train/diffusion_loss": 0.4484327733516693
    },
    {
      "epoch": 4.63013698630137,
      "step": 338,
      "train/learning_rate_real": 1.4597733564239022e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2840.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2840.0,
      "epoch": 4.63013698630137,
      "step": 338
    },
    {
      "epoch": 4.63013698630137,
      "step": 338,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.37863439321517944
    },
    {
      "epoch": 4.63013698630137,
      "step": 338,
      "train/learning_rate_real": 1.4597733564239022e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 3330.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 3330.0,
      "epoch": 4.63013698630137,
      "step": 338
    },
    {
      "epoch": 4.63013698630137,
      "step": 338,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.26973050832748413
    },
    {
      "epoch": 4.63013698630137,
      "step": 338,
      "train/learning_rate_real": 1.4597733564239022e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2622.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2622.0,
      "epoch": 4.6438356164383565,
      "step": 339
    },
    {
      "epoch": 4.6438356164383565,
      "step": 339,
      "train/ce_loss": 1.9609375,
      "train/diffusion_loss": 0.41400426626205444
    },
    {
      "epoch": 4.6438356164383565,
      "step": 339,
      "train/learning_rate_real": 1.4543033745294664e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2432.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2432.0,
      "epoch": 4.6438356164383565,
      "step": 339
    },
    {
      "epoch": 4.6438356164383565,
      "step": 339,
      "train/ce_loss": 2.03125,
      "train/diffusion_loss": 0.43190518021583557
    },
    {
      "epoch": 4.6438356164383565,
      "step": 339,
      "train/learning_rate_real": 1.4543033745294664e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2633.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2633.0,
      "epoch": 4.6438356164383565,
      "step": 339
    },
    {
      "epoch": 4.6438356164383565,
      "step": 339,
      "train/ce_loss": 1.984375,
      "train/diffusion_loss": 0.4079636335372925
    },
    {
      "epoch": 4.6438356164383565,
      "step": 339,
      "train/learning_rate_real": 1.4543033745294664e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2636.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2636.0,
      "epoch": 4.6438356164383565,
      "step": 339
    },
    {
      "epoch": 4.6438356164383565,
      "step": 339,
      "train/ce_loss": 1.9140625,
      "train/diffusion_loss": 0.3774188756942749
    },
    {
      "epoch": 4.6438356164383565,
      "step": 339,
      "train/learning_rate_real": 1.4543033745294664e-05
    },
    {
      "epoch": 4.657534246575342,
      "grad_norm": 1.4297795295715332,
      "learning_rate": 1.4543033745294664e-05,
      "loss": 2.5798,
      "step": 340
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2879.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2879.0,
      "epoch": 4.657534246575342,
      "step": 340
    },
    {
      "epoch": 4.657534246575342,
      "step": 340,
      "train/ce_loss": 1.921875,
      "train/diffusion_loss": 0.35169917345046997
    },
    {
      "epoch": 4.657534246575342,
      "step": 340,
      "train/learning_rate_real": 1.4488293700238401e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2222.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2222.0,
      "epoch": 4.657534246575342,
      "step": 340
    },
    {
      "epoch": 4.657534246575342,
      "step": 340,
      "train/ce_loss": 1.8515625,
      "train/diffusion_loss": 0.5089139938354492
    },
    {
      "epoch": 4.657534246575342,
      "step": 340,
      "train/learning_rate_real": 1.4488293700238401e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2455.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2455.0,
      "epoch": 4.657534246575342,
      "step": 340
    },
    {
      "epoch": 4.657534246575342,
      "step": 340,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.47563648223876953
    },
    {
      "epoch": 4.657534246575342,
      "step": 340,
      "train/learning_rate_real": 1.4488293700238401e-05
    },
    {
      "debug/num_lat_loss": 1598.0,
      "debug/num_lat_total": 2266.0,
      "debug/num_tok_loss": 1598.0,
      "debug/num_tok_total": 2266.0,
      "epoch": 4.657534246575342,
      "step": 340
    },
    {
      "epoch": 4.657534246575342,
      "step": 340,
      "train/ce_loss": 2.03125,
      "train/diffusion_loss": 0.4347985088825226
    },
    {
      "epoch": 4.657534246575342,
      "step": 340,
      "train/learning_rate_real": 1.4488293700238401e-05
    },
    {
      "debug/num_lat_loss": 1654.0,
      "debug/num_lat_total": 2809.0,
      "debug/num_tok_loss": 1654.0,
      "debug/num_tok_total": 2809.0,
      "epoch": 4.671232876712329,
      "step": 341
    },
    {
      "epoch": 4.671232876712329,
      "step": 341,
      "train/ce_loss": 1.9375,
      "train/diffusion_loss": 0.32905861735343933
    },
    {
      "epoch": 4.671232876712329,
      "step": 341,
      "train/learning_rate_real": 1.443351450686896e-05
    },
    {
      "debug/num_lat_loss": 1739.0,
      "debug/num_lat_total": 2345.0,
      "debug/num_tok_loss": 1739.0,
      "debug/num_tok_total": 2345.0,
      "epoch": 4.671232876712329,
      "step": 341
    },
    {
      "epoch": 4.671232876712329,
      "step": 341,
      "train/ce_loss": 1.859375,
      "train/diffusion_loss": 0.4307333827018738
    },
    {
      "epoch": 4.671232876712329,
      "step": 341,
      "train/learning_rate_real": 1.443351450686896e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2417.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2417.0,
      "epoch": 4.671232876712329,
      "step": 341
    },
    {
      "epoch": 4.671232876712329,
      "step": 341,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.44704023003578186
    },
    {
      "epoch": 4.671232876712329,
      "step": 341,
      "train/learning_rate_real": 1.443351450686896e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2646.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2646.0,
      "epoch": 4.671232876712329,
      "step": 341
    },
    {
      "epoch": 4.671232876712329,
      "step": 341,
      "train/ce_loss": 2.203125,
      "train/diffusion_loss": 0.4116402566432953
    },
    {
      "epoch": 4.671232876712329,
      "step": 341,
      "train/learning_rate_real": 1.443351450686896e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2217.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2217.0,
      "epoch": 4.684931506849315,
      "step": 342
    },
    {
      "epoch": 4.684931506849315,
      "step": 342,
      "train/ce_loss": 1.921875,
      "train/diffusion_loss": 0.47604432702064514
    },
    {
      "epoch": 4.684931506849315,
      "step": 342,
      "train/learning_rate_real": 1.437869724375588e-05
    },
    {
      "debug/num_lat_loss": 1720.0,
      "debug/num_lat_total": 2373.0,
      "debug/num_tok_loss": 1720.0,
      "debug/num_tok_total": 2373.0,
      "epoch": 4.684931506849315,
      "step": 342
    },
    {
      "epoch": 4.684931506849315,
      "step": 342,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.46728429198265076
    },
    {
      "epoch": 4.684931506849315,
      "step": 342,
      "train/learning_rate_real": 1.437869724375588e-05
    },
    {
      "debug/num_lat_loss": 1759.0,
      "debug/num_lat_total": 3043.0,
      "debug/num_tok_loss": 1759.0,
      "debug/num_tok_total": 3043.0,
      "epoch": 4.684931506849315,
      "step": 342
    },
    {
      "epoch": 4.684931506849315,
      "step": 342,
      "train/ce_loss": 1.875,
      "train/diffusion_loss": 0.31619134545326233
    },
    {
      "epoch": 4.684931506849315,
      "step": 342,
      "train/learning_rate_real": 1.437869724375588e-05
    },
    {
      "debug/num_lat_loss": 1749.0,
      "debug/num_lat_total": 3012.0,
      "debug/num_tok_loss": 1749.0,
      "debug/num_tok_total": 3012.0,
      "epoch": 4.684931506849315,
      "step": 342
    },
    {
      "epoch": 4.684931506849315,
      "step": 342,
      "train/ce_loss": 1.8125,
      "train/diffusion_loss": 0.31996843218803406
    },
    {
      "epoch": 4.684931506849315,
      "step": 342,
      "train/learning_rate_real": 1.437869724375588e-05
    },
    {
      "debug/num_lat_loss": 1602.0,
      "debug/num_lat_total": 2671.0,
      "debug/num_tok_loss": 1602.0,
      "debug/num_tok_total": 2671.0,
      "epoch": 4.698630136986301,
      "step": 343
    },
    {
      "epoch": 4.698630136986301,
      "step": 343,
      "train/ce_loss": 1.859375,
      "train/diffusion_loss": 0.3488011360168457
    },
    {
      "epoch": 4.698630136986301,
      "step": 343,
      "train/learning_rate_real": 1.4323842990218273e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 2028.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 2028.0,
      "epoch": 4.698630136986301,
      "step": 343
    },
    {
      "epoch": 4.698630136986301,
      "step": 343,
      "train/ce_loss": 1.9609375,
      "train/diffusion_loss": 0.5258029699325562
    },
    {
      "epoch": 4.698630136986301,
      "step": 343,
      "train/learning_rate_real": 1.4323842990218273e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2204.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2204.0,
      "epoch": 4.698630136986301,
      "step": 343
    },
    {
      "epoch": 4.698630136986301,
      "step": 343,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.48519283533096313
    },
    {
      "epoch": 4.698630136986301,
      "step": 343,
      "train/learning_rate_real": 1.4323842990218273e-05
    },
    {
      "debug/num_lat_loss": 1805.0,
      "debug/num_lat_total": 3328.0,
      "debug/num_tok_loss": 1805.0,
      "debug/num_tok_total": 3328.0,
      "epoch": 4.698630136986301,
      "step": 343
    },
    {
      "epoch": 4.698630136986301,
      "step": 343,
      "train/ce_loss": 1.953125,
      "train/diffusion_loss": 0.2600420117378235
    },
    {
      "epoch": 4.698630136986301,
      "step": 343,
      "train/learning_rate_real": 1.4323842990218273e-05
    },
    {
      "debug/num_lat_loss": 1620.0,
      "debug/num_lat_total": 2705.0,
      "debug/num_tok_loss": 1620.0,
      "debug/num_tok_total": 2705.0,
      "epoch": 4.712328767123288,
      "step": 344
    },
    {
      "epoch": 4.712328767123288,
      "step": 344,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.3364776372909546
    },
    {
      "epoch": 4.712328767123288,
      "step": 344,
      "train/learning_rate_real": 1.4268952826303561e-05
    },
    {
      "debug/num_lat_loss": 1693.0,
      "debug/num_lat_total": 2331.0,
      "debug/num_tok_loss": 1693.0,
      "debug/num_tok_total": 2331.0,
      "epoch": 4.712328767123288,
      "step": 344
    },
    {
      "epoch": 4.712328767123288,
      "step": 344,
      "train/ce_loss": 1.8984375,
      "train/diffusion_loss": 0.4540991187095642
    },
    {
      "epoch": 4.712328767123288,
      "step": 344,
      "train/learning_rate_real": 1.4268952826303561e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 3066.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 3066.0,
      "epoch": 4.712328767123288,
      "step": 344
    },
    {
      "epoch": 4.712328767123288,
      "step": 344,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.31021925806999207
    },
    {
      "epoch": 4.712328767123288,
      "step": 344,
      "train/learning_rate_real": 1.4268952826303561e-05
    },
    {
      "debug/num_lat_loss": 1809.0,
      "debug/num_lat_total": 2033.0,
      "debug/num_tok_loss": 1809.0,
      "debug/num_tok_total": 2033.0,
      "epoch": 4.712328767123288,
      "step": 344
    },
    {
      "epoch": 4.712328767123288,
      "step": 344,
      "train/ce_loss": 1.9921875,
      "train/diffusion_loss": 0.5523440837860107
    },
    {
      "epoch": 4.712328767123288,
      "step": 344,
      "train/learning_rate_real": 1.4268952826303561e-05
    },
    {
      "debug/num_lat_loss": 1791.0,
      "debug/num_lat_total": 2638.0,
      "debug/num_tok_loss": 1791.0,
      "debug/num_tok_total": 2638.0,
      "epoch": 4.726027397260274,
      "step": 345
    },
    {
      "epoch": 4.726027397260274,
      "step": 345,
      "train/ce_loss": 1.875,
      "train/diffusion_loss": 0.41924095153808594
    },
    {
      "epoch": 4.726027397260274,
      "step": 345,
      "train/learning_rate_real": 1.4214027832766228e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2437.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2437.0,
      "epoch": 4.726027397260274,
      "step": 345
    },
    {
      "epoch": 4.726027397260274,
      "step": 345,
      "train/ce_loss": 2.03125,
      "train/diffusion_loss": 0.4393394887447357
    },
    {
      "epoch": 4.726027397260274,
      "step": 345,
      "train/learning_rate_real": 1.4214027832766228e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2427.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2427.0,
      "epoch": 4.726027397260274,
      "step": 345
    },
    {
      "epoch": 4.726027397260274,
      "step": 345,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.42627352476119995
    },
    {
      "epoch": 4.726027397260274,
      "step": 345,
      "train/learning_rate_real": 1.4214027832766228e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2412.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2412.0,
      "epoch": 4.726027397260274,
      "step": 345
    },
    {
      "epoch": 4.726027397260274,
      "step": 345,
      "train/ce_loss": 1.875,
      "train/diffusion_loss": 0.43422767519950867
    },
    {
      "epoch": 4.726027397260274,
      "step": 345,
      "train/learning_rate_real": 1.4214027832766228e-05
    },
    {
      "debug/num_lat_loss": 1765.0,
      "debug/num_lat_total": 2828.0,
      "debug/num_tok_loss": 1765.0,
      "debug/num_tok_total": 2828.0,
      "epoch": 4.739726027397261,
      "step": 346
    },
    {
      "epoch": 4.739726027397261,
      "step": 346,
      "train/ce_loss": 1.96875,
      "train/diffusion_loss": 0.3489011526107788
    },
    {
      "epoch": 4.739726027397261,
      "step": 346,
      "train/learning_rate_real": 1.4159069091046526e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2679.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2679.0,
      "epoch": 4.739726027397261,
      "step": 346
    },
    {
      "epoch": 4.739726027397261,
      "step": 346,
      "train/ce_loss": 2.03125,
      "train/diffusion_loss": 0.4036855101585388
    },
    {
      "epoch": 4.739726027397261,
      "step": 346,
      "train/learning_rate_real": 1.4159069091046526e-05
    },
    {
      "debug/num_lat_loss": 1677.0,
      "debug/num_lat_total": 2108.0,
      "debug/num_tok_loss": 1677.0,
      "debug/num_tok_total": 2108.0,
      "epoch": 4.739726027397261,
      "step": 346
    },
    {
      "epoch": 4.739726027397261,
      "step": 346,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.4629563093185425
    },
    {
      "epoch": 4.739726027397261,
      "step": 346,
      "train/learning_rate_real": 1.4159069091046526e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2429.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2429.0,
      "epoch": 4.739726027397261,
      "step": 346
    },
    {
      "epoch": 4.739726027397261,
      "step": 346,
      "train/ce_loss": 1.984375,
      "train/diffusion_loss": 0.45152392983436584
    },
    {
      "epoch": 4.739726027397261,
      "step": 346,
      "train/learning_rate_real": 1.4159069091046526e-05
    },
    {
      "debug/num_lat_loss": 1762.0,
      "debug/num_lat_total": 2395.0,
      "debug/num_tok_loss": 1762.0,
      "debug/num_tok_total": 2395.0,
      "epoch": 4.7534246575342465,
      "step": 347
    },
    {
      "epoch": 4.7534246575342465,
      "step": 347,
      "train/ce_loss": 2.015625,
      "train/diffusion_loss": 0.46235424280166626
    },
    {
      "epoch": 4.7534246575342465,
      "step": 347,
      "train/learning_rate_real": 1.4104077683249201e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2855.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2855.0,
      "epoch": 4.7534246575342465,
      "step": 347
    },
    {
      "epoch": 4.7534246575342465,
      "step": 347,
      "train/ce_loss": 1.9609375,
      "train/diffusion_loss": 0.3713798224925995
    },
    {
      "epoch": 4.7534246575342465,
      "step": 347,
      "train/learning_rate_real": 1.4104077683249201e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2630.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2630.0,
      "epoch": 4.7534246575342465,
      "step": 347
    },
    {
      "epoch": 4.7534246575342465,
      "step": 347,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.38970276713371277
    },
    {
      "epoch": 4.7534246575342465,
      "step": 347,
      "train/learning_rate_real": 1.4104077683249201e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2645.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2645.0,
      "epoch": 4.7534246575342465,
      "step": 347
    },
    {
      "epoch": 4.7534246575342465,
      "step": 347,
      "train/ce_loss": 2.015625,
      "train/diffusion_loss": 0.4143233001232147
    },
    {
      "epoch": 4.7534246575342465,
      "step": 347,
      "train/learning_rate_real": 1.4104077683249201e-05
    },
    {
      "debug/num_lat_loss": 1698.0,
      "debug/num_lat_total": 2550.0,
      "debug/num_tok_loss": 1698.0,
      "debug/num_tok_total": 2550.0,
      "epoch": 4.767123287671232,
      "step": 348
    },
    {
      "epoch": 4.767123287671232,
      "step": 348,
      "train/ce_loss": 2.171875,
      "train/diffusion_loss": 0.41071975231170654
    },
    {
      "epoch": 4.767123287671232,
      "step": 348,
      "train/learning_rate_real": 1.404905469212216e-05
    },
    {
      "debug/num_lat_loss": 1807.0,
      "debug/num_lat_total": 2453.0,
      "debug/num_tok_loss": 1807.0,
      "debug/num_tok_total": 2453.0,
      "epoch": 4.767123287671232,
      "step": 348
    },
    {
      "epoch": 4.767123287671232,
      "step": 348,
      "train/ce_loss": 2.109375,
      "train/diffusion_loss": 0.45104700326919556
    },
    {
      "epoch": 4.767123287671232,
      "step": 348,
      "train/learning_rate_real": 1.404905469212216e-05
    },
    {
      "debug/num_lat_loss": 1746.0,
      "debug/num_lat_total": 3005.0,
      "debug/num_tok_loss": 1746.0,
      "debug/num_tok_total": 3005.0,
      "epoch": 4.767123287671232,
      "step": 348
    },
    {
      "epoch": 4.767123287671232,
      "step": 348,
      "train/ce_loss": 1.8125,
      "train/diffusion_loss": 0.290730357170105
    },
    {
      "epoch": 4.767123287671232,
      "step": 348,
      "train/learning_rate_real": 1.404905469212216e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 2211.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 2211.0,
      "epoch": 4.767123287671232,
      "step": 348
    },
    {
      "epoch": 4.767123287671232,
      "step": 348,
      "train/ce_loss": 1.90625,
      "train/diffusion_loss": 0.47499173879623413
    },
    {
      "epoch": 4.767123287671232,
      "step": 348,
      "train/learning_rate_real": 1.404905469212216e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2209.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2209.0,
      "epoch": 4.780821917808219,
      "step": 349
    },
    {
      "epoch": 4.780821917808219,
      "step": 349,
      "train/ce_loss": 1.78125,
      "train/diffusion_loss": 0.48611530661582947
    },
    {
      "epoch": 4.780821917808219,
      "step": 349,
      "train/learning_rate_real": 1.3994001201035174e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2619.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2619.0,
      "epoch": 4.780821917808219,
      "step": 349
    },
    {
      "epoch": 4.780821917808219,
      "step": 349,
      "train/ce_loss": 2.171875,
      "train/diffusion_loss": 0.4095076024532318
    },
    {
      "epoch": 4.780821917808219,
      "step": 349,
      "train/learning_rate_real": 1.3994001201035174e-05
    },
    {
      "debug/num_lat_loss": 1762.0,
      "debug/num_lat_total": 2396.0,
      "debug/num_tok_loss": 1762.0,
      "debug/num_tok_total": 2396.0,
      "epoch": 4.780821917808219,
      "step": 349
    },
    {
      "epoch": 4.780821917808219,
      "step": 349,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.44954341650009155
    },
    {
      "epoch": 4.780821917808219,
      "step": 349,
      "train/learning_rate_real": 1.3994001201035174e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2835.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2835.0,
      "epoch": 4.780821917808219,
      "step": 349
    },
    {
      "epoch": 4.780821917808219,
      "step": 349,
      "train/ce_loss": 1.9140625,
      "train/diffusion_loss": 0.3251146972179413
    },
    {
      "epoch": 4.780821917808219,
      "step": 349,
      "train/learning_rate_real": 1.3994001201035174e-05
    },
    {
      "epoch": 4.794520547945205,
      "grad_norm": 1.2736142873764038,
      "learning_rate": 1.3994001201035174e-05,
      "loss": 2.6358,
      "step": 350
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2237.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2237.0,
      "epoch": 4.794520547945205,
      "step": 350
    },
    {
      "epoch": 4.794520547945205,
      "step": 350,
      "train/ce_loss": 1.9375,
      "train/diffusion_loss": 0.4950745105743408
    },
    {
      "epoch": 4.794520547945205,
      "step": 350,
      "train/learning_rate_real": 1.3938918293958536e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2622.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2622.0,
      "epoch": 4.794520547945205,
      "step": 350
    },
    {
      "epoch": 4.794520547945205,
      "step": 350,
      "train/ce_loss": 1.9609375,
      "train/diffusion_loss": 0.3680788576602936
    },
    {
      "epoch": 4.794520547945205,
      "step": 350,
      "train/learning_rate_real": 1.3938918293958536e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2629.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2629.0,
      "epoch": 4.794520547945205,
      "step": 350
    },
    {
      "epoch": 4.794520547945205,
      "step": 350,
      "train/ce_loss": 2.15625,
      "train/diffusion_loss": 0.43818479776382446
    },
    {
      "epoch": 4.794520547945205,
      "step": 350,
      "train/learning_rate_real": 1.3938918293958536e-05
    },
    {
      "debug/num_lat_loss": 1657.0,
      "debug/num_lat_total": 2493.0,
      "debug/num_tok_loss": 1657.0,
      "debug/num_tok_total": 2493.0,
      "epoch": 4.794520547945205,
      "step": 350
    },
    {
      "epoch": 4.794520547945205,
      "step": 350,
      "train/ce_loss": 1.8046875,
      "train/diffusion_loss": 0.38977447152137756
    },
    {
      "epoch": 4.794520547945205,
      "step": 350,
      "train/learning_rate_real": 1.3938918293958536e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2640.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2640.0,
      "epoch": 4.808219178082192,
      "step": 351
    },
    {
      "epoch": 4.808219178082192,
      "step": 351,
      "train/ce_loss": 1.796875,
      "train/diffusion_loss": 0.386251300573349
    },
    {
      "epoch": 4.808219178082192,
      "step": 351,
      "train/learning_rate_real": 1.3883807055441733e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2422.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2422.0,
      "epoch": 4.808219178082192,
      "step": 351
    },
    {
      "epoch": 4.808219178082192,
      "step": 351,
      "train/ce_loss": 1.9140625,
      "train/diffusion_loss": 0.4440755546092987
    },
    {
      "epoch": 4.808219178082192,
      "step": 351,
      "train/learning_rate_real": 1.3883807055441733e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2392.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2392.0,
      "epoch": 4.808219178082192,
      "step": 351
    },
    {
      "epoch": 4.808219178082192,
      "step": 351,
      "train/ce_loss": 1.84375,
      "train/diffusion_loss": 0.452121376991272
    },
    {
      "epoch": 4.808219178082192,
      "step": 351,
      "train/learning_rate_real": 1.3883807055441733e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 3081.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 3081.0,
      "epoch": 4.808219178082192,
      "step": 351
    },
    {
      "epoch": 4.808219178082192,
      "step": 351,
      "train/ce_loss": 1.828125,
      "train/diffusion_loss": 0.3008842468261719
    },
    {
      "epoch": 4.808219178082192,
      "step": 351,
      "train/learning_rate_real": 1.3883807055441733e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2432.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2432.0,
      "epoch": 4.821917808219178,
      "step": 352
    },
    {
      "epoch": 4.821917808219178,
      "step": 352,
      "train/ce_loss": 1.9453125,
      "train/diffusion_loss": 0.46000051498413086
    },
    {
      "epoch": 4.821917808219178,
      "step": 352,
      "train/learning_rate_real": 1.3828668570592069e-05
    },
    {
      "debug/num_lat_loss": 1762.0,
      "debug/num_lat_total": 3041.0,
      "debug/num_tok_loss": 1762.0,
      "debug/num_tok_total": 3041.0,
      "epoch": 4.821917808219178,
      "step": 352
    },
    {
      "epoch": 4.821917808219178,
      "step": 352,
      "train/ce_loss": 1.8515625,
      "train/diffusion_loss": 0.2769721448421478
    },
    {
      "epoch": 4.821917808219178,
      "step": 352,
      "train/learning_rate_real": 1.3828668570592069e-05
    },
    {
      "debug/num_lat_loss": 1653.0,
      "debug/num_lat_total": 2514.0,
      "debug/num_tok_loss": 1653.0,
      "debug/num_tok_total": 2514.0,
      "epoch": 4.821917808219178,
      "step": 352
    },
    {
      "epoch": 4.821917808219178,
      "step": 352,
      "train/ce_loss": 2.203125,
      "train/diffusion_loss": 0.3928878903388977
    },
    {
      "epoch": 4.821917808219178,
      "step": 352,
      "train/learning_rate_real": 1.3828668570592069e-05
    },
    {
      "debug/num_lat_loss": 1762.0,
      "debug/num_lat_total": 2826.0,
      "debug/num_tok_loss": 1762.0,
      "debug/num_tok_total": 2826.0,
      "epoch": 4.821917808219178,
      "step": 352
    },
    {
      "epoch": 4.821917808219178,
      "step": 352,
      "train/ce_loss": 1.78125,
      "train/diffusion_loss": 0.34956538677215576
    },
    {
      "epoch": 4.821917808219178,
      "step": 352,
      "train/learning_rate_real": 1.3828668570592069e-05
    },
    {
      "debug/num_lat_loss": 1797.0,
      "debug/num_lat_total": 2455.0,
      "debug/num_tok_loss": 1797.0,
      "debug/num_tok_total": 2455.0,
      "epoch": 4.835616438356165,
      "step": 353
    },
    {
      "epoch": 4.835616438356165,
      "step": 353,
      "train/ce_loss": 1.9609375,
      "train/diffusion_loss": 0.41297292709350586
    },
    {
      "epoch": 4.835616438356165,
      "step": 353,
      "train/learning_rate_real": 1.3773503925053314e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2881.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2881.0,
      "epoch": 4.835616438356165,
      "step": 353
    },
    {
      "epoch": 4.835616438356165,
      "step": 353,
      "train/ce_loss": 1.9375,
      "train/diffusion_loss": 0.3616207540035248
    },
    {
      "epoch": 4.835616438356165,
      "step": 353,
      "train/learning_rate_real": 1.3773503925053314e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2675.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2675.0,
      "epoch": 4.835616438356165,
      "step": 353
    },
    {
      "epoch": 4.835616438356165,
      "step": 353,
      "train/ce_loss": 1.8203125,
      "train/diffusion_loss": 0.41012150049209595
    },
    {
      "epoch": 4.835616438356165,
      "step": 353,
      "train/learning_rate_real": 1.3773503925053314e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2827.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2827.0,
      "epoch": 4.835616438356165,
      "step": 353
    },
    {
      "epoch": 4.835616438356165,
      "step": 353,
      "train/ce_loss": 1.8046875,
      "train/diffusion_loss": 0.34873032569885254
    },
    {
      "epoch": 4.835616438356165,
      "step": 353,
      "train/learning_rate_real": 1.3773503925053314e-05
    },
    {
      "debug/num_lat_loss": 1688.0,
      "debug/num_lat_total": 2114.0,
      "debug/num_tok_loss": 1688.0,
      "debug/num_tok_total": 2114.0,
      "epoch": 4.8493150684931505,
      "step": 354
    },
    {
      "epoch": 4.8493150684931505,
      "step": 354,
      "train/ce_loss": 1.7890625,
      "train/diffusion_loss": 0.5083751678466797
    },
    {
      "epoch": 4.8493150684931505,
      "step": 354,
      "train/learning_rate_real": 1.3718314204984332e-05
    },
    {
      "debug/num_lat_loss": 1752.0,
      "debug/num_lat_total": 2605.0,
      "debug/num_tok_loss": 1752.0,
      "debug/num_tok_total": 2605.0,
      "epoch": 4.8493150684931505,
      "step": 354
    },
    {
      "epoch": 4.8493150684931505,
      "step": 354,
      "train/ce_loss": 1.8203125,
      "train/diffusion_loss": 0.38362061977386475
    },
    {
      "epoch": 4.8493150684931505,
      "step": 354,
      "train/learning_rate_real": 1.3718314204984332e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2418.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2418.0,
      "epoch": 4.8493150684931505,
      "step": 354
    },
    {
      "epoch": 4.8493150684931505,
      "step": 354,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.433803528547287
    },
    {
      "epoch": 4.8493150684931505,
      "step": 354,
      "train/learning_rate_real": 1.3718314204984332e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2193.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2193.0,
      "epoch": 4.8493150684931505,
      "step": 354
    },
    {
      "epoch": 4.8493150684931505,
      "step": 354,
      "train/ce_loss": 1.9140625,
      "train/diffusion_loss": 0.4754595458507538
    },
    {
      "epoch": 4.8493150684931505,
      "step": 354,
      "train/learning_rate_real": 1.3718314204984332e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2642.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2642.0,
      "epoch": 4.863013698630137,
      "step": 355
    },
    {
      "epoch": 4.863013698630137,
      "step": 355,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.3864595293998718
    },
    {
      "epoch": 4.863013698630137,
      "step": 355,
      "train/learning_rate_real": 1.3663100497037681e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 2229.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 2229.0,
      "epoch": 4.863013698630137,
      "step": 355
    },
    {
      "epoch": 4.863013698630137,
      "step": 355,
      "train/ce_loss": 2.390625,
      "train/diffusion_loss": 0.5030604004859924
    },
    {
      "epoch": 4.863013698630137,
      "step": 355,
      "train/learning_rate_real": 1.3663100497037681e-05
    },
    {
      "debug/num_lat_loss": 1479.0,
      "debug/num_lat_total": 2221.0,
      "debug/num_tok_loss": 1479.0,
      "debug/num_tok_total": 2221.0,
      "epoch": 4.863013698630137,
      "step": 355
    },
    {
      "epoch": 4.863013698630137,
      "step": 355,
      "train/ce_loss": 1.9296875,
      "train/diffusion_loss": 0.3796306848526001
    },
    {
      "epoch": 4.863013698630137,
      "step": 355,
      "train/learning_rate_real": 1.3663100497037681e-05
    },
    {
      "debug/num_lat_loss": 1792.0,
      "debug/num_lat_total": 2426.0,
      "debug/num_tok_loss": 1792.0,
      "debug/num_tok_total": 2426.0,
      "epoch": 4.863013698630137,
      "step": 355
    },
    {
      "epoch": 4.863013698630137,
      "step": 355,
      "train/ce_loss": 1.859375,
      "train/diffusion_loss": 0.4208463728427887
    },
    {
      "epoch": 4.863013698630137,
      "step": 355,
      "train/learning_rate_real": 1.3663100497037681e-05
    },
    {
      "debug/num_lat_loss": 1742.0,
      "debug/num_lat_total": 2163.0,
      "debug/num_tok_loss": 1742.0,
      "debug/num_tok_total": 2163.0,
      "epoch": 4.876712328767123,
      "step": 356
    },
    {
      "epoch": 4.876712328767123,
      "step": 356,
      "train/ce_loss": 1.8125,
      "train/diffusion_loss": 0.4776431918144226
    },
    {
      "epoch": 4.876712328767123,
      "step": 356,
      "train/learning_rate_real": 1.360786388833824e-05
    },
    {
      "debug/num_lat_loss": 1803.0,
      "debug/num_lat_total": 3109.0,
      "debug/num_tok_loss": 1803.0,
      "debug/num_tok_total": 3109.0,
      "epoch": 4.876712328767123,
      "step": 356
    },
    {
      "epoch": 4.876712328767123,
      "step": 356,
      "train/ce_loss": 1.7578125,
      "train/diffusion_loss": 0.3375520706176758
    },
    {
      "epoch": 4.876712328767123,
      "step": 356,
      "train/learning_rate_real": 1.360786388833824e-05
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2639.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2639.0,
      "epoch": 4.876712328767123,
      "step": 356
    },
    {
      "epoch": 4.876712328767123,
      "step": 356,
      "train/ce_loss": 1.75,
      "train/diffusion_loss": 0.39082589745521545
    },
    {
      "epoch": 4.876712328767123,
      "step": 356,
      "train/learning_rate_real": 1.360786388833824e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2877.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2877.0,
      "epoch": 4.876712328767123,
      "step": 356
    },
    {
      "epoch": 4.876712328767123,
      "step": 356,
      "train/ce_loss": 1.8359375,
      "train/diffusion_loss": 0.3445090353488922
    },
    {
      "epoch": 4.876712328767123,
      "step": 356,
      "train/learning_rate_real": 1.360786388833824e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2858.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2858.0,
      "epoch": 4.890410958904109,
      "step": 357
    },
    {
      "epoch": 4.890410958904109,
      "step": 357,
      "train/ce_loss": 1.859375,
      "train/diffusion_loss": 0.34473174810409546
    },
    {
      "epoch": 4.890410958904109,
      "step": 357,
      "train/learning_rate_real": 1.355260546646177e-05
    },
    {
      "debug/num_lat_loss": 1650.0,
      "debug/num_lat_total": 2296.0,
      "debug/num_tok_loss": 1650.0,
      "debug/num_tok_total": 2296.0,
      "epoch": 4.890410958904109,
      "step": 357
    },
    {
      "epoch": 4.890410958904109,
      "step": 357,
      "train/ce_loss": 1.875,
      "train/diffusion_loss": 0.43163198232650757
    },
    {
      "epoch": 4.890410958904109,
      "step": 357,
      "train/learning_rate_real": 1.355260546646177e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2846.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2846.0,
      "epoch": 4.890410958904109,
      "step": 357
    },
    {
      "epoch": 4.890410958904109,
      "step": 357,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.34775587916374207
    },
    {
      "epoch": 4.890410958904109,
      "step": 357,
      "train/learning_rate_real": 1.355260546646177e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2443.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2443.0,
      "epoch": 4.890410958904109,
      "step": 357
    },
    {
      "epoch": 4.890410958904109,
      "step": 357,
      "train/ce_loss": 1.953125,
      "train/diffusion_loss": 0.4328345060348511
    },
    {
      "epoch": 4.890410958904109,
      "step": 357,
      "train/learning_rate_real": 1.355260546646177e-05
    },
    {
      "debug/num_lat_loss": 1742.0,
      "debug/num_lat_total": 2802.0,
      "debug/num_tok_loss": 1742.0,
      "debug/num_tok_total": 2802.0,
      "epoch": 4.904109589041096,
      "step": 358
    },
    {
      "epoch": 4.904109589041096,
      "step": 358,
      "train/ce_loss": 1.921875,
      "train/diffusion_loss": 0.3326120972633362
    },
    {
      "epoch": 4.904109589041096,
      "step": 358,
      "train/learning_rate_real": 1.3497326319413539e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2687.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2687.0,
      "epoch": 4.904109589041096,
      "step": 358
    },
    {
      "epoch": 4.904109589041096,
      "step": 358,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.38338905572891235
    },
    {
      "epoch": 4.904109589041096,
      "step": 358,
      "train/learning_rate_real": 1.3497326319413539e-05
    },
    {
      "debug/num_lat_loss": 1646.0,
      "debug/num_lat_total": 2367.0,
      "debug/num_tok_loss": 1646.0,
      "debug/num_tok_total": 2367.0,
      "epoch": 4.904109589041096,
      "step": 358
    },
    {
      "epoch": 4.904109589041096,
      "step": 358,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.4092291593551636
    },
    {
      "epoch": 4.904109589041096,
      "step": 358,
      "train/learning_rate_real": 1.3497326319413539e-05
    },
    {
      "debug/num_lat_loss": 1616.0,
      "debug/num_lat_total": 2325.0,
      "debug/num_tok_loss": 1616.0,
      "debug/num_tok_total": 2325.0,
      "epoch": 4.904109589041096,
      "step": 358
    },
    {
      "epoch": 4.904109589041096,
      "step": 358,
      "train/ce_loss": 1.9140625,
      "train/diffusion_loss": 0.4156040847301483
    },
    {
      "epoch": 4.904109589041096,
      "step": 358,
      "train/learning_rate_real": 1.3497326319413539e-05
    },
    {
      "debug/num_lat_loss": 1758.0,
      "debug/num_lat_total": 2175.0,
      "debug/num_tok_loss": 1758.0,
      "debug/num_tok_total": 2175.0,
      "epoch": 4.917808219178082,
      "step": 359
    },
    {
      "epoch": 4.917808219178082,
      "step": 359,
      "train/ce_loss": 1.8828125,
      "train/diffusion_loss": 0.4687121510505676
    },
    {
      "epoch": 4.917808219178082,
      "step": 359,
      "train/learning_rate_real": 1.3442027535606871e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2853.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2853.0,
      "epoch": 4.917808219178082,
      "step": 359
    },
    {
      "epoch": 4.917808219178082,
      "step": 359,
      "train/ce_loss": 1.8984375,
      "train/diffusion_loss": 0.372743159532547
    },
    {
      "epoch": 4.917808219178082,
      "step": 359,
      "train/learning_rate_real": 1.3442027535606871e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2617.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2617.0,
      "epoch": 4.917808219178082,
      "step": 359
    },
    {
      "epoch": 4.917808219178082,
      "step": 359,
      "train/ce_loss": 1.984375,
      "train/diffusion_loss": 0.41357186436653137
    },
    {
      "epoch": 4.917808219178082,
      "step": 359,
      "train/learning_rate_real": 1.3442027535606871e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 3291.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 3291.0,
      "epoch": 4.917808219178082,
      "step": 359
    },
    {
      "epoch": 4.917808219178082,
      "step": 359,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.2430008500814438
    },
    {
      "epoch": 4.917808219178082,
      "step": 359,
      "train/learning_rate_real": 1.3442027535606871e-05
    },
    {
      "epoch": 4.931506849315069,
      "grad_norm": 1.407039999961853,
      "learning_rate": 1.3442027535606871e-05,
      "loss": 2.5371,
      "step": 360
    },
    {
      "debug/num_lat_loss": 1765.0,
      "debug/num_lat_total": 2190.0,
      "debug/num_tok_loss": 1765.0,
      "debug/num_tok_total": 2190.0,
      "epoch": 4.931506849315069,
      "step": 360
    },
    {
      "epoch": 4.931506849315069,
      "step": 360,
      "train/ce_loss": 2.265625,
      "train/diffusion_loss": 0.4956938624382019
    },
    {
      "epoch": 4.931506849315069,
      "step": 360,
      "train/learning_rate_real": 1.3386710203841732e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2416.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2416.0,
      "epoch": 4.931506849315069,
      "step": 360
    },
    {
      "epoch": 4.931506849315069,
      "step": 360,
      "train/ce_loss": 1.8828125,
      "train/diffusion_loss": 0.43857550621032715
    },
    {
      "epoch": 4.931506849315069,
      "step": 360,
      "train/learning_rate_real": 1.3386710203841732e-05
    },
    {
      "debug/num_lat_loss": 1751.0,
      "debug/num_lat_total": 2801.0,
      "debug/num_tok_loss": 1751.0,
      "debug/num_tok_total": 2801.0,
      "epoch": 4.931506849315069,
      "step": 360
    },
    {
      "epoch": 4.931506849315069,
      "step": 360,
      "train/ce_loss": 1.796875,
      "train/diffusion_loss": 0.3335510492324829
    },
    {
      "epoch": 4.931506849315069,
      "step": 360,
      "train/learning_rate_real": 1.3386710203841732e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2210.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2210.0,
      "epoch": 4.931506849315069,
      "step": 360
    },
    {
      "epoch": 4.931506849315069,
      "step": 360,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.487785667181015
    },
    {
      "epoch": 4.931506849315069,
      "step": 360,
      "train/learning_rate_real": 1.3386710203841732e-05
    },
    {
      "debug/num_lat_loss": 1626.0,
      "debug/num_lat_total": 2109.0,
      "debug/num_tok_loss": 1626.0,
      "debug/num_tok_total": 2109.0,
      "epoch": 4.945205479452055,
      "step": 361
    },
    {
      "epoch": 4.945205479452055,
      "step": 361,
      "train/ce_loss": 1.8671875,
      "train/diffusion_loss": 0.4661395251750946
    },
    {
      "epoch": 4.945205479452055,
      "step": 361,
      "train/learning_rate_real": 1.3331375413283281e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2848.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2848.0,
      "epoch": 4.945205479452055,
      "step": 361
    },
    {
      "epoch": 4.945205479452055,
      "step": 361,
      "train/ce_loss": 1.8984375,
      "train/diffusion_loss": 0.33667802810668945
    },
    {
      "epoch": 4.945205479452055,
      "step": 361,
      "train/learning_rate_real": 1.3331375413283281e-05
    },
    {
      "debug/num_lat_loss": 1622.0,
      "debug/num_lat_total": 1888.0,
      "debug/num_tok_loss": 1622.0,
      "debug/num_tok_total": 1888.0,
      "epoch": 4.945205479452055,
      "step": 361
    },
    {
      "epoch": 4.945205479452055,
      "step": 361,
      "train/ce_loss": 1.8515625,
      "train/diffusion_loss": 0.47830551862716675
    },
    {
      "epoch": 4.945205479452055,
      "step": 361,
      "train/learning_rate_real": 1.3331375413283281e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 2236.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 2236.0,
      "epoch": 4.945205479452055,
      "step": 361
    },
    {
      "epoch": 4.945205479452055,
      "step": 361,
      "train/ce_loss": 1.84375,
      "train/diffusion_loss": 0.48921123147010803
    },
    {
      "epoch": 4.945205479452055,
      "step": 361,
      "train/learning_rate_real": 1.3331375413283281e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2447.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2447.0,
      "epoch": 4.958904109589041,
      "step": 362
    },
    {
      "epoch": 4.958904109589041,
      "step": 362,
      "train/ce_loss": 1.96875,
      "train/diffusion_loss": 0.43193677067756653
    },
    {
      "epoch": 4.958904109589041,
      "step": 362,
      "train/learning_rate_real": 1.3276024253440425e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2867.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2867.0,
      "epoch": 4.958904109589041,
      "step": 362
    },
    {
      "epoch": 4.958904109589041,
      "step": 362,
      "train/ce_loss": 1.84375,
      "train/diffusion_loss": 0.3341422379016876
    },
    {
      "epoch": 4.958904109589041,
      "step": 362,
      "train/learning_rate_real": 1.3276024253440425e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2427.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2427.0,
      "epoch": 4.958904109589041,
      "step": 362
    },
    {
      "epoch": 4.958904109589041,
      "step": 362,
      "train/ce_loss": 1.953125,
      "train/diffusion_loss": 0.4473453462123871
    },
    {
      "epoch": 4.958904109589041,
      "step": 362,
      "train/learning_rate_real": 1.3276024253440425e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2219.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2219.0,
      "epoch": 4.958904109589041,
      "step": 362
    },
    {
      "epoch": 4.958904109589041,
      "step": 362,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.4929288327693939
    },
    {
      "epoch": 4.958904109589041,
      "step": 362,
      "train/learning_rate_real": 1.3276024253440425e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2866.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2866.0,
      "epoch": 4.972602739726027,
      "step": 363
    },
    {
      "epoch": 4.972602739726027,
      "step": 363,
      "train/ce_loss": 2.015625,
      "train/diffusion_loss": 0.38495710492134094
    },
    {
      "epoch": 4.972602739726027,
      "step": 363,
      "train/learning_rate_real": 1.3220657814144394e-05
    },
    {
      "debug/num_lat_loss": 1604.0,
      "debug/num_lat_total": 3115.0,
      "debug/num_tok_loss": 1604.0,
      "debug/num_tok_total": 3115.0,
      "epoch": 4.972602739726027,
      "step": 363
    },
    {
      "epoch": 4.972602739726027,
      "step": 363,
      "train/ce_loss": 1.84375,
      "train/diffusion_loss": 0.2391890585422516
    },
    {
      "epoch": 4.972602739726027,
      "step": 363,
      "train/learning_rate_real": 1.3220657814144394e-05
    },
    {
      "debug/num_lat_loss": 1759.0,
      "debug/num_lat_total": 3247.0,
      "debug/num_tok_loss": 1759.0,
      "debug/num_tok_total": 3247.0,
      "epoch": 4.972602739726027,
      "step": 363
    },
    {
      "epoch": 4.972602739726027,
      "step": 363,
      "train/ce_loss": 1.953125,
      "train/diffusion_loss": 0.2867518663406372
    },
    {
      "epoch": 4.972602739726027,
      "step": 363,
      "train/learning_rate_real": 1.3220657814144394e-05
    },
    {
      "debug/num_lat_loss": 1759.0,
      "debug/num_lat_total": 2625.0,
      "debug/num_tok_loss": 1759.0,
      "debug/num_tok_total": 2625.0,
      "epoch": 4.972602739726027,
      "step": 363
    },
    {
      "epoch": 4.972602739726027,
      "step": 363,
      "train/ce_loss": 1.7578125,
      "train/diffusion_loss": 0.43952733278274536
    },
    {
      "epoch": 4.972602739726027,
      "step": 363,
      "train/learning_rate_real": 1.3220657814144394e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2640.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2640.0,
      "epoch": 4.986301369863014,
      "step": 364
    },
    {
      "epoch": 4.986301369863014,
      "step": 364,
      "train/ce_loss": 1.8046875,
      "train/diffusion_loss": 0.382199227809906
    },
    {
      "epoch": 4.986301369863014,
      "step": 364,
      "train/learning_rate_real": 1.3165277185527234e-05
    },
    {
      "debug/num_lat_loss": 1811.0,
      "debug/num_lat_total": 2251.0,
      "debug/num_tok_loss": 1811.0,
      "debug/num_tok_total": 2251.0,
      "epoch": 4.986301369863014,
      "step": 364
    },
    {
      "epoch": 4.986301369863014,
      "step": 364,
      "train/ce_loss": 1.875,
      "train/diffusion_loss": 0.5001628994941711
    },
    {
      "epoch": 4.986301369863014,
      "step": 364,
      "train/learning_rate_real": 1.3165277185527234e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2839.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2839.0,
      "epoch": 4.986301369863014,
      "step": 364
    },
    {
      "epoch": 4.986301369863014,
      "step": 364,
      "train/ce_loss": 2.140625,
      "train/diffusion_loss": 0.3672467768192291
    },
    {
      "epoch": 4.986301369863014,
      "step": 364,
      "train/learning_rate_real": 1.3165277185527234e-05
    },
    {
      "debug/num_lat_loss": 429.0,
      "debug/num_lat_total": 634.0,
      "debug/num_tok_loss": 429.0,
      "debug/num_tok_total": 634.0,
      "epoch": 4.986301369863014,
      "step": 364
    },
    {
      "epoch": 4.986301369863014,
      "step": 364,
      "train/ce_loss": 1.71875,
      "train/diffusion_loss": 0.3789863586425781
    },
    {
      "epoch": 4.986301369863014,
      "step": 364,
      "train/learning_rate_real": 1.3165277185527234e-05
    },
    {
      "debug/num_lat_loss": 1687.0,
      "debug/num_lat_total": 2672.0,
      "debug/num_tok_loss": 1687.0,
      "debug/num_tok_total": 2672.0,
      "epoch": 5.0,
      "step": 365
    },
    {
      "epoch": 5.0,
      "step": 365,
      "train/ce_loss": 2.03125,
      "train/diffusion_loss": 0.34250688552856445
    },
    {
      "epoch": 5.0,
      "step": 365,
      "train/learning_rate_real": 1.3109883458000391e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2026.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2026.0,
      "epoch": 5.0,
      "step": 365
    },
    {
      "epoch": 5.0,
      "step": 365,
      "train/ce_loss": 1.9375,
      "train/diffusion_loss": 0.5316159129142761
    },
    {
      "epoch": 5.0,
      "step": 365,
      "train/learning_rate_real": 1.3109883458000391e-05
    },
    {
      "debug/num_lat_loss": 1687.0,
      "debug/num_lat_total": 2221.0,
      "debug/num_tok_loss": 1687.0,
      "debug/num_tok_total": 2221.0,
      "epoch": 5.0,
      "step": 365
    },
    {
      "epoch": 5.0,
      "step": 365,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.4441116750240326
    },
    {
      "epoch": 5.0,
      "step": 365,
      "train/learning_rate_real": 1.3109883458000391e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2198.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2198.0,
      "epoch": 5.0,
      "step": 365
    },
    {
      "epoch": 5.0,
      "step": 365,
      "train/ce_loss": 1.8671875,
      "train/diffusion_loss": 0.5039470791816711
    },
    {
      "epoch": 5.0,
      "step": 365,
      "train/learning_rate_real": 1.3109883458000391e-05
    },
    {
      "debug/num_lat_loss": 1817.0,
      "debug/num_lat_total": 2692.0,
      "debug/num_tok_loss": 1817.0,
      "debug/num_tok_total": 2692.0,
      "epoch": 5.013698630136986,
      "step": 366
    },
    {
      "epoch": 5.013698630136986,
      "step": 366,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.3879682719707489
    },
    {
      "epoch": 5.013698630136986,
      "step": 366,
      "train/learning_rate_real": 1.3054477722233206e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 2440.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 2440.0,
      "epoch": 5.013698630136986,
      "step": 366
    },
    {
      "epoch": 5.013698630136986,
      "step": 366,
      "train/ce_loss": 1.8359375,
      "train/diffusion_loss": 0.42739516496658325
    },
    {
      "epoch": 5.013698630136986,
      "step": 366,
      "train/learning_rate_real": 1.3054477722233206e-05
    },
    {
      "debug/num_lat_loss": 1756.0,
      "debug/num_lat_total": 2605.0,
      "debug/num_tok_loss": 1756.0,
      "debug/num_tok_total": 2605.0,
      "epoch": 5.013698630136986,
      "step": 366
    },
    {
      "epoch": 5.013698630136986,
      "step": 366,
      "train/ce_loss": 1.8203125,
      "train/diffusion_loss": 0.39000993967056274
    },
    {
      "epoch": 5.013698630136986,
      "step": 366,
      "train/learning_rate_real": 1.3054477722233206e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2408.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2408.0,
      "epoch": 5.013698630136986,
      "step": 366
    },
    {
      "epoch": 5.013698630136986,
      "step": 366,
      "train/ce_loss": 1.9765625,
      "train/diffusion_loss": 0.4404014050960541
    },
    {
      "epoch": 5.013698630136986,
      "step": 366,
      "train/learning_rate_real": 1.3054477722233206e-05
    },
    {
      "debug/num_lat_loss": 1717.0,
      "debug/num_lat_total": 2276.0,
      "debug/num_tok_loss": 1717.0,
      "debug/num_tok_total": 2276.0,
      "epoch": 5.027397260273973,
      "step": 367
    },
    {
      "epoch": 5.027397260273973,
      "step": 367,
      "train/ce_loss": 1.9921875,
      "train/diffusion_loss": 0.4457962214946747
    },
    {
      "epoch": 5.027397260273973,
      "step": 367,
      "train/learning_rate_real": 1.299906106913147e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2860.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2860.0,
      "epoch": 5.027397260273973,
      "step": 367
    },
    {
      "epoch": 5.027397260273973,
      "step": 367,
      "train/ce_loss": 1.9296875,
      "train/diffusion_loss": 0.34024664759635925
    },
    {
      "epoch": 5.027397260273973,
      "step": 367,
      "train/learning_rate_real": 1.299906106913147e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 5.027397260273973,
      "step": 367
    },
    {
      "epoch": 5.027397260273973,
      "step": 367,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.408486008644104
    },
    {
      "epoch": 5.027397260273973,
      "step": 367,
      "train/learning_rate_real": 1.299906106913147e-05
    },
    {
      "debug/num_lat_loss": 1771.0,
      "debug/num_lat_total": 2405.0,
      "debug/num_tok_loss": 1771.0,
      "debug/num_tok_total": 2405.0,
      "epoch": 5.027397260273973,
      "step": 367
    },
    {
      "epoch": 5.027397260273973,
      "step": 367,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.44742128252983093
    },
    {
      "epoch": 5.027397260273973,
      "step": 367,
      "train/learning_rate_real": 1.299906106913147e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2645.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2645.0,
      "epoch": 5.041095890410959,
      "step": 368
    },
    {
      "epoch": 5.041095890410959,
      "step": 368,
      "train/ce_loss": 1.984375,
      "train/diffusion_loss": 0.3868914544582367
    },
    {
      "epoch": 5.041095890410959,
      "step": 368,
      "train/learning_rate_real": 1.2943634589815915e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2662.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2662.0,
      "epoch": 5.041095890410959,
      "step": 368
    },
    {
      "epoch": 5.041095890410959,
      "step": 368,
      "train/ce_loss": 1.8046875,
      "train/diffusion_loss": 0.3968140780925751
    },
    {
      "epoch": 5.041095890410959,
      "step": 368,
      "train/learning_rate_real": 1.2943634589815915e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2867.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2867.0,
      "epoch": 5.041095890410959,
      "step": 368
    },
    {
      "epoch": 5.041095890410959,
      "step": 368,
      "train/ce_loss": 1.9296875,
      "train/diffusion_loss": 0.3705860674381256
    },
    {
      "epoch": 5.041095890410959,
      "step": 368,
      "train/learning_rate_real": 1.2943634589815915e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2604.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2604.0,
      "epoch": 5.041095890410959,
      "step": 368
    },
    {
      "epoch": 5.041095890410959,
      "step": 368,
      "train/ce_loss": 1.8359375,
      "train/diffusion_loss": 0.40020421147346497
    },
    {
      "epoch": 5.041095890410959,
      "step": 368,
      "train/learning_rate_real": 1.2943634589815915e-05
    },
    {
      "debug/num_lat_loss": 1762.0,
      "debug/num_lat_total": 2392.0,
      "debug/num_tok_loss": 1762.0,
      "debug/num_tok_total": 2392.0,
      "epoch": 5.054794520547945,
      "step": 369
    },
    {
      "epoch": 5.054794520547945,
      "step": 369,
      "train/ce_loss": 1.984375,
      "train/diffusion_loss": 0.4384942352771759
    },
    {
      "epoch": 5.054794520547945,
      "step": 369,
      "train/learning_rate_real": 1.2888199375600754e-05
    },
    {
      "debug/num_lat_loss": 1728.0,
      "debug/num_lat_total": 2166.0,
      "debug/num_tok_loss": 1728.0,
      "debug/num_tok_total": 2166.0,
      "epoch": 5.054794520547945,
      "step": 369
    },
    {
      "epoch": 5.054794520547945,
      "step": 369,
      "train/ce_loss": 2.1875,
      "train/diffusion_loss": 0.4792060852050781
    },
    {
      "epoch": 5.054794520547945,
      "step": 369,
      "train/learning_rate_real": 1.2888199375600754e-05
    },
    {
      "debug/num_lat_loss": 1785.0,
      "debug/num_lat_total": 3099.0,
      "debug/num_tok_loss": 1785.0,
      "debug/num_tok_total": 3099.0,
      "epoch": 5.054794520547945,
      "step": 369
    },
    {
      "epoch": 5.054794520547945,
      "step": 369,
      "train/ce_loss": 1.8125,
      "train/diffusion_loss": 0.32182613015174866
    },
    {
      "epoch": 5.054794520547945,
      "step": 369,
      "train/learning_rate_real": 1.2888199375600754e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2642.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2642.0,
      "epoch": 5.054794520547945,
      "step": 369
    },
    {
      "epoch": 5.054794520547945,
      "step": 369,
      "train/ce_loss": 1.8515625,
      "train/diffusion_loss": 0.39480388164520264
    },
    {
      "epoch": 5.054794520547945,
      "step": 369,
      "train/learning_rate_real": 1.2888199375600754e-05
    },
    {
      "epoch": 5.068493150684931,
      "grad_norm": 1.216352105140686,
      "learning_rate": 1.2888199375600754e-05,
      "loss": 2.6214,
      "step": 370
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2209.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2209.0,
      "epoch": 5.068493150684931,
      "step": 370
    },
    {
      "epoch": 5.068493150684931,
      "step": 370,
      "train/ce_loss": 1.796875,
      "train/diffusion_loss": 0.4800213873386383
    },
    {
      "epoch": 5.068493150684931,
      "step": 370,
      "train/learning_rate_real": 1.2832756517972185e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2674.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2674.0,
      "epoch": 5.068493150684931,
      "step": 370
    },
    {
      "epoch": 5.068493150684931,
      "step": 370,
      "train/ce_loss": 1.9453125,
      "train/diffusion_loss": 0.3754417300224304
    },
    {
      "epoch": 5.068493150684931,
      "step": 370,
      "train/learning_rate_real": 1.2832756517972185e-05
    },
    {
      "debug/num_lat_loss": 1680.0,
      "debug/num_lat_total": 2451.0,
      "debug/num_tok_loss": 1680.0,
      "debug/num_tok_total": 2451.0,
      "epoch": 5.068493150684931,
      "step": 370
    },
    {
      "epoch": 5.068493150684931,
      "step": 370,
      "train/ce_loss": 1.9140625,
      "train/diffusion_loss": 0.4207042455673218
    },
    {
      "epoch": 5.068493150684931,
      "step": 370,
      "train/learning_rate_real": 1.2832756517972185e-05
    },
    {
      "debug/num_lat_loss": 1760.0,
      "debug/num_lat_total": 2827.0,
      "debug/num_tok_loss": 1760.0,
      "debug/num_tok_total": 2827.0,
      "epoch": 5.068493150684931,
      "step": 370
    },
    {
      "epoch": 5.068493150684931,
      "step": 370,
      "train/ce_loss": 1.8359375,
      "train/diffusion_loss": 0.3967849016189575
    },
    {
      "epoch": 5.068493150684931,
      "step": 370,
      "train/learning_rate_real": 1.2832756517972185e-05
    },
    {
      "debug/num_lat_loss": 1817.0,
      "debug/num_lat_total": 2455.0,
      "debug/num_tok_loss": 1817.0,
      "debug/num_tok_total": 2455.0,
      "epoch": 5.082191780821918,
      "step": 371
    },
    {
      "epoch": 5.082191780821918,
      "step": 371,
      "train/ce_loss": 1.90625,
      "train/diffusion_loss": 0.42998573184013367
    },
    {
      "epoch": 5.082191780821918,
      "step": 371,
      "train/learning_rate_real": 1.2777307108566896e-05
    },
    {
      "debug/num_lat_loss": 1750.0,
      "debug/num_lat_total": 2584.0,
      "debug/num_tok_loss": 1750.0,
      "debug/num_tok_total": 2584.0,
      "epoch": 5.082191780821918,
      "step": 371
    },
    {
      "epoch": 5.082191780821918,
      "step": 371,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.39576098322868347
    },
    {
      "epoch": 5.082191780821918,
      "step": 371,
      "train/learning_rate_real": 1.2777307108566896e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 5.082191780821918,
      "step": 371
    },
    {
      "epoch": 5.082191780821918,
      "step": 371,
      "train/ce_loss": 1.9765625,
      "train/diffusion_loss": 0.393177330493927
    },
    {
      "epoch": 5.082191780821918,
      "step": 371,
      "train/learning_rate_real": 1.2777307108566896e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2629.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2629.0,
      "epoch": 5.082191780821918,
      "step": 371
    },
    {
      "epoch": 5.082191780821918,
      "step": 371,
      "train/ce_loss": 1.9296875,
      "train/diffusion_loss": 0.389772891998291
    },
    {
      "epoch": 5.082191780821918,
      "step": 371,
      "train/learning_rate_real": 1.2777307108566896e-05
    },
    {
      "debug/num_lat_loss": 1614.0,
      "debug/num_lat_total": 2686.0,
      "debug/num_tok_loss": 1614.0,
      "debug/num_tok_total": 2686.0,
      "epoch": 5.095890410958904,
      "step": 372
    },
    {
      "epoch": 5.095890410958904,
      "step": 372,
      "train/ce_loss": 1.7890625,
      "train/diffusion_loss": 0.33671796321868896
    },
    {
      "epoch": 5.095890410958904,
      "step": 372,
      "train/learning_rate_real": 1.2721852239150577e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 3071.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 3071.0,
      "epoch": 5.095890410958904,
      "step": 372
    },
    {
      "epoch": 5.095890410958904,
      "step": 372,
      "train/ce_loss": 1.9140625,
      "train/diffusion_loss": 0.30351942777633667
    },
    {
      "epoch": 5.095890410958904,
      "step": 372,
      "train/learning_rate_real": 1.2721852239150577e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 5.095890410958904,
      "step": 372
    },
    {
      "epoch": 5.095890410958904,
      "step": 372,
      "train/ce_loss": 1.890625,
      "train/diffusion_loss": 0.37298259139060974
    },
    {
      "epoch": 5.095890410958904,
      "step": 372,
      "train/learning_rate_real": 1.2721852239150577e-05
    },
    {
      "debug/num_lat_loss": 1663.0,
      "debug/num_lat_total": 2501.0,
      "debug/num_tok_loss": 1663.0,
      "debug/num_tok_total": 2501.0,
      "epoch": 5.095890410958904,
      "step": 372
    },
    {
      "epoch": 5.095890410958904,
      "step": 372,
      "train/ce_loss": 1.8046875,
      "train/diffusion_loss": 0.375630259513855
    },
    {
      "epoch": 5.095890410958904,
      "step": 372,
      "train/learning_rate_real": 1.2721852239150577e-05
    },
    {
      "debug/num_lat_loss": 1618.0,
      "debug/num_lat_total": 2900.0,
      "debug/num_tok_loss": 1618.0,
      "debug/num_tok_total": 2900.0,
      "epoch": 5.109589041095891,
      "step": 373
    },
    {
      "epoch": 5.109589041095891,
      "step": 373,
      "train/ce_loss": 1.9375,
      "train/diffusion_loss": 0.2602494955062866
    },
    {
      "epoch": 5.109589041095891,
      "step": 373,
      "train/learning_rate_real": 1.2666393001596424e-05
    },
    {
      "debug/num_lat_loss": 1808.0,
      "debug/num_lat_total": 2465.0,
      "debug/num_tok_loss": 1808.0,
      "debug/num_tok_total": 2465.0,
      "epoch": 5.109589041095891,
      "step": 373
    },
    {
      "epoch": 5.109589041095891,
      "step": 373,
      "train/ce_loss": 1.8125,
      "train/diffusion_loss": 0.4340699315071106
    },
    {
      "epoch": 5.109589041095891,
      "step": 373,
      "train/learning_rate_real": 1.2666393001596424e-05
    },
    {
      "debug/num_lat_loss": 1777.0,
      "debug/num_lat_total": 2623.0,
      "debug/num_tok_loss": 1777.0,
      "debug/num_tok_total": 2623.0,
      "epoch": 5.109589041095891,
      "step": 373
    },
    {
      "epoch": 5.109589041095891,
      "step": 373,
      "train/ce_loss": 2.125,
      "train/diffusion_loss": 0.4099808931350708
    },
    {
      "epoch": 5.109589041095891,
      "step": 373,
      "train/learning_rate_real": 1.2666393001596424e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2411.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2411.0,
      "epoch": 5.109589041095891,
      "step": 373
    },
    {
      "epoch": 5.109589041095891,
      "step": 373,
      "train/ce_loss": 1.8359375,
      "train/diffusion_loss": 0.44482001662254333
    },
    {
      "epoch": 5.109589041095891,
      "step": 373,
      "train/learning_rate_real": 1.2666393001596424e-05
    },
    {
      "debug/num_lat_loss": 1754.0,
      "debug/num_lat_total": 2376.0,
      "debug/num_tok_loss": 1754.0,
      "debug/num_tok_total": 2376.0,
      "epoch": 5.123287671232877,
      "step": 374
    },
    {
      "epoch": 5.123287671232877,
      "step": 374,
      "train/ce_loss": 1.875,
      "train/diffusion_loss": 0.43012309074401855
    },
    {
      "epoch": 5.123287671232877,
      "step": 374,
      "train/learning_rate_real": 1.2610930487863637e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 3108.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 3108.0,
      "epoch": 5.123287671232877,
      "step": 374
    },
    {
      "epoch": 5.123287671232877,
      "step": 374,
      "train/ce_loss": 1.8203125,
      "train/diffusion_loss": 0.27529823780059814
    },
    {
      "epoch": 5.123287671232877,
      "step": 374,
      "train/learning_rate_real": 1.2610930487863637e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 2882.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 2882.0,
      "epoch": 5.123287671232877,
      "step": 374
    },
    {
      "epoch": 5.123287671232877,
      "step": 374,
      "train/ce_loss": 1.8671875,
      "train/diffusion_loss": 0.361605703830719
    },
    {
      "epoch": 5.123287671232877,
      "step": 374,
      "train/learning_rate_real": 1.2610930487863637e-05
    },
    {
      "debug/num_lat_loss": 1760.0,
      "debug/num_lat_total": 2841.0,
      "debug/num_tok_loss": 1760.0,
      "debug/num_tok_total": 2841.0,
      "epoch": 5.123287671232877,
      "step": 374
    },
    {
      "epoch": 5.123287671232877,
      "step": 374,
      "train/ce_loss": 1.9609375,
      "train/diffusion_loss": 0.33355703949928284
    },
    {
      "epoch": 5.123287671232877,
      "step": 374,
      "train/learning_rate_real": 1.2610930487863637e-05
    },
    {
      "debug/num_lat_loss": 1639.0,
      "debug/num_lat_total": 2514.0,
      "debug/num_tok_loss": 1639.0,
      "debug/num_tok_total": 2514.0,
      "epoch": 5.136986301369863,
      "step": 375
    },
    {
      "epoch": 5.136986301369863,
      "step": 375,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.3975639343261719
    },
    {
      "epoch": 5.136986301369863,
      "step": 375,
      "train/learning_rate_real": 1.2555465789975927e-05
    },
    {
      "debug/num_lat_loss": 1816.0,
      "debug/num_lat_total": 2899.0,
      "debug/num_tok_loss": 1816.0,
      "debug/num_tok_total": 2899.0,
      "epoch": 5.136986301369863,
      "step": 375
    },
    {
      "epoch": 5.136986301369863,
      "step": 375,
      "train/ce_loss": 2.046875,
      "train/diffusion_loss": 0.36548635363578796
    },
    {
      "epoch": 5.136986301369863,
      "step": 375,
      "train/learning_rate_real": 1.2555465789975927e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 3073.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 3073.0,
      "epoch": 5.136986301369863,
      "step": 375
    },
    {
      "epoch": 5.136986301369863,
      "step": 375,
      "train/ce_loss": 1.8515625,
      "train/diffusion_loss": 0.29560744762420654
    },
    {
      "epoch": 5.136986301369863,
      "step": 375,
      "train/learning_rate_real": 1.2555465789975927e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2853.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2853.0,
      "epoch": 5.136986301369863,
      "step": 375
    },
    {
      "epoch": 5.136986301369863,
      "step": 375,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.331356942653656
    },
    {
      "epoch": 5.136986301369863,
      "step": 375,
      "train/learning_rate_real": 1.2555465789975927e-05
    },
    {
      "debug/num_lat_loss": 1712.0,
      "debug/num_lat_total": 2923.0,
      "debug/num_tok_loss": 1712.0,
      "debug/num_tok_total": 2923.0,
      "epoch": 5.1506849315068495,
      "step": 376
    },
    {
      "epoch": 5.1506849315068495,
      "step": 376,
      "train/ce_loss": 1.875,
      "train/diffusion_loss": 0.32675445079803467
    },
    {
      "epoch": 5.1506849315068495,
      "step": 376,
      "train/learning_rate_real": 1.25e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 3063.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 3063.0,
      "epoch": 5.1506849315068495,
      "step": 376
    },
    {
      "epoch": 5.1506849315068495,
      "step": 376,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.3256686329841614
    },
    {
      "epoch": 5.1506849315068495,
      "step": 376,
      "train/learning_rate_real": 1.25e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2843.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2843.0,
      "epoch": 5.1506849315068495,
      "step": 376
    },
    {
      "epoch": 5.1506849315068495,
      "step": 376,
      "train/ce_loss": 1.875,
      "train/diffusion_loss": 0.34669750928878784
    },
    {
      "epoch": 5.1506849315068495,
      "step": 376,
      "train/learning_rate_real": 1.25e-05
    },
    {
      "debug/num_lat_loss": 1776.0,
      "debug/num_lat_total": 2408.0,
      "debug/num_tok_loss": 1776.0,
      "debug/num_tok_total": 2408.0,
      "epoch": 5.1506849315068495,
      "step": 376
    },
    {
      "epoch": 5.1506849315068495,
      "step": 376,
      "train/ce_loss": 2.234375,
      "train/diffusion_loss": 0.45397236943244934
    },
    {
      "epoch": 5.1506849315068495,
      "step": 376,
      "train/learning_rate_real": 1.25e-05
    },
    {
      "debug/num_lat_loss": 1619.0,
      "debug/num_lat_total": 2463.0,
      "debug/num_tok_loss": 1619.0,
      "debug/num_tok_total": 2463.0,
      "epoch": 5.164383561643835,
      "step": 377
    },
    {
      "epoch": 5.164383561643835,
      "step": 377,
      "train/ce_loss": 1.84375,
      "train/diffusion_loss": 0.41008567810058594
    },
    {
      "epoch": 5.164383561643835,
      "step": 377,
      "train/learning_rate_real": 1.2444534210024075e-05
    },
    {
      "debug/num_lat_loss": 1677.0,
      "debug/num_lat_total": 2644.0,
      "debug/num_tok_loss": 1677.0,
      "debug/num_tok_total": 2644.0,
      "epoch": 5.164383561643835,
      "step": 377
    },
    {
      "epoch": 5.164383561643835,
      "step": 377,
      "train/ce_loss": 1.8671875,
      "train/diffusion_loss": 0.3623366355895996
    },
    {
      "epoch": 5.164383561643835,
      "step": 377,
      "train/learning_rate_real": 1.2444534210024075e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2409.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2409.0,
      "epoch": 5.164383561643835,
      "step": 377
    },
    {
      "epoch": 5.164383561643835,
      "step": 377,
      "train/ce_loss": 1.890625,
      "train/diffusion_loss": 0.4463941156864166
    },
    {
      "epoch": 5.164383561643835,
      "step": 377,
      "train/learning_rate_real": 1.2444534210024075e-05
    },
    {
      "debug/num_lat_loss": 1622.0,
      "debug/num_lat_total": 2702.0,
      "debug/num_tok_loss": 1622.0,
      "debug/num_tok_total": 2702.0,
      "epoch": 5.164383561643835,
      "step": 377
    },
    {
      "epoch": 5.164383561643835,
      "step": 377,
      "train/ce_loss": 2.375,
      "train/diffusion_loss": 0.333406001329422
    },
    {
      "epoch": 5.164383561643835,
      "step": 377,
      "train/learning_rate_real": 1.2444534210024075e-05
    },
    {
      "debug/num_lat_loss": 1815.0,
      "debug/num_lat_total": 3121.0,
      "debug/num_tok_loss": 1815.0,
      "debug/num_tok_total": 3121.0,
      "epoch": 5.178082191780822,
      "step": 378
    },
    {
      "epoch": 5.178082191780822,
      "step": 378,
      "train/ce_loss": 1.890625,
      "train/diffusion_loss": 0.32326439023017883
    },
    {
      "epoch": 5.178082191780822,
      "step": 378,
      "train/learning_rate_real": 1.2389069512136362e-05
    },
    {
      "debug/num_lat_loss": 1768.0,
      "debug/num_lat_total": 2611.0,
      "debug/num_tok_loss": 1768.0,
      "debug/num_tok_total": 2611.0,
      "epoch": 5.178082191780822,
      "step": 378
    },
    {
      "epoch": 5.178082191780822,
      "step": 378,
      "train/ce_loss": 1.8984375,
      "train/diffusion_loss": 0.45180970430374146
    },
    {
      "epoch": 5.178082191780822,
      "step": 378,
      "train/learning_rate_real": 1.2389069512136362e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2833.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2833.0,
      "epoch": 5.178082191780822,
      "step": 378
    },
    {
      "epoch": 5.178082191780822,
      "step": 378,
      "train/ce_loss": 1.796875,
      "train/diffusion_loss": 0.39444664120674133
    },
    {
      "epoch": 5.178082191780822,
      "step": 378,
      "train/learning_rate_real": 1.2389069512136362e-05
    },
    {
      "debug/num_lat_loss": 1800.0,
      "debug/num_lat_total": 2673.0,
      "debug/num_tok_loss": 1800.0,
      "debug/num_tok_total": 2673.0,
      "epoch": 5.178082191780822,
      "step": 378
    },
    {
      "epoch": 5.178082191780822,
      "step": 378,
      "train/ce_loss": 1.8359375,
      "train/diffusion_loss": 0.42039254307746887
    },
    {
      "epoch": 5.178082191780822,
      "step": 378,
      "train/learning_rate_real": 1.2389069512136362e-05
    },
    {
      "debug/num_lat_loss": 1760.0,
      "debug/num_lat_total": 2181.0,
      "debug/num_tok_loss": 1760.0,
      "debug/num_tok_total": 2181.0,
      "epoch": 5.191780821917808,
      "step": 379
    },
    {
      "epoch": 5.191780821917808,
      "step": 379,
      "train/ce_loss": 1.9140625,
      "train/diffusion_loss": 0.45792582631111145
    },
    {
      "epoch": 5.191780821917808,
      "step": 379,
      "train/learning_rate_real": 1.233360699840358e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 3102.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 3102.0,
      "epoch": 5.191780821917808,
      "step": 379
    },
    {
      "epoch": 5.191780821917808,
      "step": 379,
      "train/ce_loss": 1.96875,
      "train/diffusion_loss": 0.3423309624195099
    },
    {
      "epoch": 5.191780821917808,
      "step": 379,
      "train/learning_rate_real": 1.233360699840358e-05
    },
    {
      "debug/num_lat_loss": 1795.0,
      "debug/num_lat_total": 3306.0,
      "debug/num_tok_loss": 1795.0,
      "debug/num_tok_total": 3306.0,
      "epoch": 5.191780821917808,
      "step": 379
    },
    {
      "epoch": 5.191780821917808,
      "step": 379,
      "train/ce_loss": 1.890625,
      "train/diffusion_loss": 0.24859732389450073
    },
    {
      "epoch": 5.191780821917808,
      "step": 379,
      "train/learning_rate_real": 1.233360699840358e-05
    },
    {
      "debug/num_lat_loss": 1814.0,
      "debug/num_lat_total": 2675.0,
      "debug/num_tok_loss": 1814.0,
      "debug/num_tok_total": 2675.0,
      "epoch": 5.191780821917808,
      "step": 379
    },
    {
      "epoch": 5.191780821917808,
      "step": 379,
      "train/ce_loss": 1.9140625,
      "train/diffusion_loss": 0.3955957889556885
    },
    {
      "epoch": 5.191780821917808,
      "step": 379,
      "train/learning_rate_real": 1.233360699840358e-05
    },
    {
      "epoch": 5.205479452054795,
      "grad_norm": 1.3502992391586304,
      "learning_rate": 1.233360699840358e-05,
      "loss": 2.4153,
      "step": 380
    },
    {
      "debug/num_lat_loss": 1749.0,
      "debug/num_lat_total": 2815.0,
      "debug/num_tok_loss": 1749.0,
      "debug/num_tok_total": 2815.0,
      "epoch": 5.205479452054795,
      "step": 380
    },
    {
      "epoch": 5.205479452054795,
      "step": 380,
      "train/ce_loss": 1.75,
      "train/diffusion_loss": 0.33071979880332947
    },
    {
      "epoch": 5.205479452054795,
      "step": 380,
      "train/learning_rate_real": 1.2278147760849427e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 3330.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 3330.0,
      "epoch": 5.205479452054795,
      "step": 380
    },
    {
      "epoch": 5.205479452054795,
      "step": 380,
      "train/ce_loss": 1.828125,
      "train/diffusion_loss": 0.2971540093421936
    },
    {
      "epoch": 5.205479452054795,
      "step": 380,
      "train/learning_rate_real": 1.2278147760849427e-05
    },
    {
      "debug/num_lat_loss": 1646.0,
      "debug/num_lat_total": 2170.0,
      "debug/num_tok_loss": 1646.0,
      "debug/num_tok_total": 2170.0,
      "epoch": 5.205479452054795,
      "step": 380
    },
    {
      "epoch": 5.205479452054795,
      "step": 380,
      "train/ce_loss": 1.8125,
      "train/diffusion_loss": 0.46348363161087036
    },
    {
      "epoch": 5.205479452054795,
      "step": 380,
      "train/learning_rate_real": 1.2278147760849427e-05
    },
    {
      "debug/num_lat_loss": 1652.0,
      "debug/num_lat_total": 2379.0,
      "debug/num_tok_loss": 1652.0,
      "debug/num_tok_total": 2379.0,
      "epoch": 5.205479452054795,
      "step": 380
    },
    {
      "epoch": 5.205479452054795,
      "step": 380,
      "train/ce_loss": 1.984375,
      "train/diffusion_loss": 0.41309061646461487
    },
    {
      "epoch": 5.205479452054795,
      "step": 380,
      "train/learning_rate_real": 1.2278147760849427e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 3326.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 3326.0,
      "epoch": 5.219178082191781,
      "step": 381
    },
    {
      "epoch": 5.219178082191781,
      "step": 381,
      "train/ce_loss": 1.90625,
      "train/diffusion_loss": 0.28062987327575684
    },
    {
      "epoch": 5.219178082191781,
      "step": 381,
      "train/learning_rate_real": 1.2222692891433105e-05
    },
    {
      "debug/num_lat_loss": 1807.0,
      "debug/num_lat_total": 2901.0,
      "debug/num_tok_loss": 1807.0,
      "debug/num_tok_total": 2901.0,
      "epoch": 5.219178082191781,
      "step": 381
    },
    {
      "epoch": 5.219178082191781,
      "step": 381,
      "train/ce_loss": 1.7734375,
      "train/diffusion_loss": 0.31966787576675415
    },
    {
      "epoch": 5.219178082191781,
      "step": 381,
      "train/learning_rate_real": 1.2222692891433105e-05
    },
    {
      "debug/num_lat_loss": 1719.0,
      "debug/num_lat_total": 2784.0,
      "debug/num_tok_loss": 1719.0,
      "debug/num_tok_total": 2784.0,
      "epoch": 5.219178082191781,
      "step": 381
    },
    {
      "epoch": 5.219178082191781,
      "step": 381,
      "train/ce_loss": 1.8671875,
      "train/diffusion_loss": 0.34212782979011536
    },
    {
      "epoch": 5.219178082191781,
      "step": 381,
      "train/learning_rate_real": 1.2222692891433105e-05
    },
    {
      "debug/num_lat_loss": 1775.0,
      "debug/num_lat_total": 2858.0,
      "debug/num_tok_loss": 1775.0,
      "debug/num_tok_total": 2858.0,
      "epoch": 5.219178082191781,
      "step": 381
    },
    {
      "epoch": 5.219178082191781,
      "step": 381,
      "train/ce_loss": 2.03125,
      "train/diffusion_loss": 0.36847585439682007
    },
    {
      "epoch": 5.219178082191781,
      "step": 381,
      "train/learning_rate_real": 1.2222692891433105e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2416.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2416.0,
      "epoch": 5.232876712328767,
      "step": 382
    },
    {
      "epoch": 5.232876712328767,
      "step": 382,
      "train/ce_loss": 1.8359375,
      "train/diffusion_loss": 0.445895254611969
    },
    {
      "epoch": 5.232876712328767,
      "step": 382,
      "train/learning_rate_real": 1.2167243482027816e-05
    },
    {
      "debug/num_lat_loss": 1603.0,
      "debug/num_lat_total": 2486.0,
      "debug/num_tok_loss": 1603.0,
      "debug/num_tok_total": 2486.0,
      "epoch": 5.232876712328767,
      "step": 382
    },
    {
      "epoch": 5.232876712328767,
      "step": 382,
      "train/ce_loss": 2.140625,
      "train/diffusion_loss": 0.36970221996307373
    },
    {
      "epoch": 5.232876712328767,
      "step": 382,
      "train/learning_rate_real": 1.2167243482027816e-05
    },
    {
      "debug/num_lat_loss": 1778.0,
      "debug/num_lat_total": 2218.0,
      "debug/num_tok_loss": 1778.0,
      "debug/num_tok_total": 2218.0,
      "epoch": 5.232876712328767,
      "step": 382
    },
    {
      "epoch": 5.232876712328767,
      "step": 382,
      "train/ce_loss": 1.8359375,
      "train/diffusion_loss": 0.4734830856323242
    },
    {
      "epoch": 5.232876712328767,
      "step": 382,
      "train/learning_rate_real": 1.2167243482027816e-05
    },
    {
      "debug/num_lat_loss": 1783.0,
      "debug/num_lat_total": 3087.0,
      "debug/num_tok_loss": 1783.0,
      "debug/num_tok_total": 3087.0,
      "epoch": 5.232876712328767,
      "step": 382
    },
    {
      "epoch": 5.232876712328767,
      "step": 382,
      "train/ce_loss": 1.8125,
      "train/diffusion_loss": 0.32064247131347656
    },
    {
      "epoch": 5.232876712328767,
      "step": 382,
      "train/learning_rate_real": 1.2167243482027816e-05
    },
    {
      "debug/num_lat_loss": 1644.0,
      "debug/num_lat_total": 1936.0,
      "debug/num_tok_loss": 1644.0,
      "debug/num_tok_total": 1936.0,
      "epoch": 5.2465753424657535,
      "step": 383
    },
    {
      "epoch": 5.2465753424657535,
      "step": 383,
      "train/ce_loss": 1.8203125,
      "train/diffusion_loss": 0.5105114579200745
    },
    {
      "epoch": 5.2465753424657535,
      "step": 383,
      "train/learning_rate_real": 1.2111800624399244e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 2870.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 2870.0,
      "epoch": 5.2465753424657535,
      "step": 383
    },
    {
      "epoch": 5.2465753424657535,
      "step": 383,
      "train/ce_loss": 1.84375,
      "train/diffusion_loss": 0.3411625623703003
    },
    {
      "epoch": 5.2465753424657535,
      "step": 383,
      "train/learning_rate_real": 1.2111800624399244e-05
    },
    {
      "debug/num_lat_loss": 1821.0,
      "debug/num_lat_total": 3362.0,
      "debug/num_tok_loss": 1821.0,
      "debug/num_tok_total": 3362.0,
      "epoch": 5.2465753424657535,
      "step": 383
    },
    {
      "epoch": 5.2465753424657535,
      "step": 383,
      "train/ce_loss": 1.875,
      "train/diffusion_loss": 0.2604753375053406
    },
    {
      "epoch": 5.2465753424657535,
      "step": 383,
      "train/learning_rate_real": 1.2111800624399244e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 5.2465753424657535,
      "step": 383
    },
    {
      "epoch": 5.2465753424657535,
      "step": 383,
      "train/ce_loss": 1.96875,
      "train/diffusion_loss": 0.38157889246940613
    },
    {
      "epoch": 5.2465753424657535,
      "step": 383,
      "train/learning_rate_real": 1.2111800624399244e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2421.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2421.0,
      "epoch": 5.260273972602739,
      "step": 384
    },
    {
      "epoch": 5.260273972602739,
      "step": 384,
      "train/ce_loss": 1.828125,
      "train/diffusion_loss": 0.4495629668235779
    },
    {
      "epoch": 5.260273972602739,
      "step": 384,
      "train/learning_rate_real": 1.2056365410184088e-05
    },
    {
      "debug/num_lat_loss": 1823.0,
      "debug/num_lat_total": 3148.0,
      "debug/num_tok_loss": 1823.0,
      "debug/num_tok_total": 3148.0,
      "epoch": 5.260273972602739,
      "step": 384
    },
    {
      "epoch": 5.260273972602739,
      "step": 384,
      "train/ce_loss": 1.71875,
      "train/diffusion_loss": 0.3212644159793854
    },
    {
      "epoch": 5.260273972602739,
      "step": 384,
      "train/learning_rate_real": 1.2056365410184088e-05
    },
    {
      "debug/num_lat_loss": 1573.0,
      "debug/num_lat_total": 2003.0,
      "debug/num_tok_loss": 1573.0,
      "debug/num_tok_total": 2003.0,
      "epoch": 5.260273972602739,
      "step": 384
    },
    {
      "epoch": 5.260273972602739,
      "step": 384,
      "train/ce_loss": 1.96875,
      "train/diffusion_loss": 0.46382641792297363
    },
    {
      "epoch": 5.260273972602739,
      "step": 384,
      "train/learning_rate_real": 1.2056365410184088e-05
    },
    {
      "debug/num_lat_loss": 1779.0,
      "debug/num_lat_total": 3060.0,
      "debug/num_tok_loss": 1779.0,
      "debug/num_tok_total": 3060.0,
      "epoch": 5.260273972602739,
      "step": 384
    },
    {
      "epoch": 5.260273972602739,
      "step": 384,
      "train/ce_loss": 1.984375,
      "train/diffusion_loss": 0.3454790711402893
    },
    {
      "epoch": 5.260273972602739,
      "step": 384,
      "train/learning_rate_real": 1.2056365410184088e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 3061.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 3061.0,
      "epoch": 5.273972602739726,
      "step": 385
    },
    {
      "epoch": 5.273972602739726,
      "step": 385,
      "train/ce_loss": 1.65625,
      "train/diffusion_loss": 0.30337586998939514
    },
    {
      "epoch": 5.273972602739726,
      "step": 385,
      "train/learning_rate_real": 1.2000938930868534e-05
    },
    {
      "debug/num_lat_loss": 1594.0,
      "debug/num_lat_total": 1716.0,
      "debug/num_tok_loss": 1594.0,
      "debug/num_tok_total": 1716.0,
      "epoch": 5.273972602739726,
      "step": 385
    },
    {
      "epoch": 5.273972602739726,
      "step": 385,
      "train/ce_loss": 1.96875,
      "train/diffusion_loss": 0.5590569972991943
    },
    {
      "epoch": 5.273972602739726,
      "step": 385,
      "train/learning_rate_real": 1.2000938930868534e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2871.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2871.0,
      "epoch": 5.273972602739726,
      "step": 385
    },
    {
      "epoch": 5.273972602739726,
      "step": 385,
      "train/ce_loss": 1.9296875,
      "train/diffusion_loss": 0.36918506026268005
    },
    {
      "epoch": 5.273972602739726,
      "step": 385,
      "train/learning_rate_real": 1.2000938930868534e-05
    },
    {
      "debug/num_lat_loss": 1781.0,
      "debug/num_lat_total": 3072.0,
      "debug/num_tok_loss": 1781.0,
      "debug/num_tok_total": 3072.0,
      "epoch": 5.273972602739726,
      "step": 385
    },
    {
      "epoch": 5.273972602739726,
      "step": 385,
      "train/ce_loss": 1.71875,
      "train/diffusion_loss": 0.3130257725715637
    },
    {
      "epoch": 5.273972602739726,
      "step": 385,
      "train/learning_rate_real": 1.2000938930868534e-05
    },
    {
      "debug/num_lat_loss": 1782.0,
      "debug/num_lat_total": 2639.0,
      "debug/num_tok_loss": 1782.0,
      "debug/num_tok_total": 2639.0,
      "epoch": 5.287671232876712,
      "step": 386
    },
    {
      "epoch": 5.287671232876712,
      "step": 386,
      "train/ce_loss": 1.859375,
      "train/diffusion_loss": 0.39055678248405457
    },
    {
      "epoch": 5.287671232876712,
      "step": 386,
      "train/learning_rate_real": 1.1945522277766794e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 3075.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 3075.0,
      "epoch": 5.287671232876712,
      "step": 386
    },
    {
      "epoch": 5.287671232876712,
      "step": 386,
      "train/ce_loss": 1.796875,
      "train/diffusion_loss": 0.29906129837036133
    },
    {
      "epoch": 5.287671232876712,
      "step": 386,
      "train/learning_rate_real": 1.1945522277766794e-05
    },
    {
      "debug/num_lat_loss": 1789.0,
      "debug/num_lat_total": 2658.0,
      "debug/num_tok_loss": 1789.0,
      "debug/num_tok_total": 2658.0,
      "epoch": 5.287671232876712,
      "step": 386
    },
    {
      "epoch": 5.287671232876712,
      "step": 386,
      "train/ce_loss": 2.015625,
      "train/diffusion_loss": 0.40184664726257324
    },
    {
      "epoch": 5.287671232876712,
      "step": 386,
      "train/learning_rate_real": 1.1945522277766794e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2665.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2665.0,
      "epoch": 5.287671232876712,
      "step": 386
    },
    {
      "epoch": 5.287671232876712,
      "step": 386,
      "train/ce_loss": 1.6796875,
      "train/diffusion_loss": 0.3778179883956909
    },
    {
      "epoch": 5.287671232876712,
      "step": 386,
      "train/learning_rate_real": 1.1945522277766794e-05
    },
    {
      "debug/num_lat_loss": 1808.0,
      "debug/num_lat_total": 2898.0,
      "debug/num_tok_loss": 1808.0,
      "debug/num_tok_total": 2898.0,
      "epoch": 5.301369863013699,
      "step": 387
    },
    {
      "epoch": 5.301369863013699,
      "step": 387,
      "train/ce_loss": 2.078125,
      "train/diffusion_loss": 0.3692452013492584
    },
    {
      "epoch": 5.301369863013699,
      "step": 387,
      "train/learning_rate_real": 1.1890116541999614e-05
    },
    {
      "debug/num_lat_loss": 1636.0,
      "debug/num_lat_total": 2728.0,
      "debug/num_tok_loss": 1636.0,
      "debug/num_tok_total": 2728.0,
      "epoch": 5.301369863013699,
      "step": 387
    },
    {
      "epoch": 5.301369863013699,
      "step": 387,
      "train/ce_loss": 1.7109375,
      "train/diffusion_loss": 0.35613057017326355
    },
    {
      "epoch": 5.301369863013699,
      "step": 387,
      "train/learning_rate_real": 1.1890116541999614e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 2651.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 2651.0,
      "epoch": 5.301369863013699,
      "step": 387
    },
    {
      "epoch": 5.301369863013699,
      "step": 387,
      "train/ce_loss": 1.90625,
      "train/diffusion_loss": 0.3847382962703705
    },
    {
      "epoch": 5.301369863013699,
      "step": 387,
      "train/learning_rate_real": 1.1890116541999614e-05
    },
    {
      "debug/num_lat_loss": 1684.0,
      "debug/num_lat_total": 2656.0,
      "debug/num_tok_loss": 1684.0,
      "debug/num_tok_total": 2656.0,
      "epoch": 5.301369863013699,
      "step": 387
    },
    {
      "epoch": 5.301369863013699,
      "step": 387,
      "train/ce_loss": 2.015625,
      "train/diffusion_loss": 0.3559465706348419
    },
    {
      "epoch": 5.301369863013699,
      "step": 387,
      "train/learning_rate_real": 1.1890116541999614e-05
    },
    {
      "debug/num_lat_loss": 1714.0,
      "debug/num_lat_total": 2295.0,
      "debug/num_tok_loss": 1714.0,
      "debug/num_tok_total": 2295.0,
      "epoch": 5.315068493150685,
      "step": 388
    },
    {
      "epoch": 5.315068493150685,
      "step": 388,
      "train/ce_loss": 1.7421875,
      "train/diffusion_loss": 0.47427451610565186
    },
    {
      "epoch": 5.315068493150685,
      "step": 388,
      "train/learning_rate_real": 1.1834722814472771e-05
    },
    {
      "debug/num_lat_loss": 1794.0,
      "debug/num_lat_total": 2667.0,
      "debug/num_tok_loss": 1794.0,
      "debug/num_tok_total": 2667.0,
      "epoch": 5.315068493150685,
      "step": 388
    },
    {
      "epoch": 5.315068493150685,
      "step": 388,
      "train/ce_loss": 1.765625,
      "train/diffusion_loss": 0.3911558985710144
    },
    {
      "epoch": 5.315068493150685,
      "step": 388,
      "train/learning_rate_real": 1.1834722814472771e-05
    },
    {
      "debug/num_lat_loss": 1774.0,
      "debug/num_lat_total": 2853.0,
      "debug/num_tok_loss": 1774.0,
      "debug/num_tok_total": 2853.0,
      "epoch": 5.315068493150685,
      "step": 388
    },
    {
      "epoch": 5.315068493150685,
      "step": 388,
      "train/ce_loss": 1.8671875,
      "train/diffusion_loss": 0.36137279868125916
    },
    {
      "epoch": 5.315068493150685,
      "step": 388,
      "train/learning_rate_real": 1.1834722814472771e-05
    },
    {
      "debug/num_lat_loss": 1709.0,
      "debug/num_lat_total": 2274.0,
      "debug/num_tok_loss": 1709.0,
      "debug/num_tok_total": 2274.0,
      "epoch": 5.315068493150685,
      "step": 388
    },
    {
      "epoch": 5.315068493150685,
      "step": 388,
      "train/ce_loss": 1.9765625,
      "train/diffusion_loss": 0.4615236818790436
    },
    {
      "epoch": 5.315068493150685,
      "step": 388,
      "train/learning_rate_real": 1.1834722814472771e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2647.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2647.0,
      "epoch": 5.328767123287671,
      "step": 389
    },
    {
      "epoch": 5.328767123287671,
      "step": 389,
      "train/ce_loss": 1.671875,
      "train/diffusion_loss": 0.4180532693862915
    },
    {
      "epoch": 5.328767123287671,
      "step": 389,
      "train/learning_rate_real": 1.1779342185855608e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2653.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2653.0,
      "epoch": 5.328767123287671,
      "step": 389
    },
    {
      "epoch": 5.328767123287671,
      "step": 389,
      "train/ce_loss": 1.96875,
      "train/diffusion_loss": 0.40186429023742676
    },
    {
      "epoch": 5.328767123287671,
      "step": 389,
      "train/learning_rate_real": 1.1779342185855608e-05
    },
    {
      "debug/num_lat_loss": 1760.0,
      "debug/num_lat_total": 1970.0,
      "debug/num_tok_loss": 1760.0,
      "debug/num_tok_total": 1970.0,
      "epoch": 5.328767123287671,
      "step": 389
    },
    {
      "epoch": 5.328767123287671,
      "step": 389,
      "train/ce_loss": 1.90625,
      "train/diffusion_loss": 0.5144637823104858
    },
    {
      "epoch": 5.328767123287671,
      "step": 389,
      "train/learning_rate_real": 1.1779342185855608e-05
    },
    {
      "debug/num_lat_loss": 1606.0,
      "debug/num_lat_total": 2294.0,
      "debug/num_tok_loss": 1606.0,
      "debug/num_tok_total": 2294.0,
      "epoch": 5.328767123287671,
      "step": 389
    },
    {
      "epoch": 5.328767123287671,
      "step": 389,
      "train/ce_loss": 1.8671875,
      "train/diffusion_loss": 0.4028507173061371
    },
    {
      "epoch": 5.328767123287671,
      "step": 389,
      "train/learning_rate_real": 1.1779342185855608e-05
    },
    {
      "epoch": 5.342465753424658,
      "grad_norm": 1.2754102945327759,
      "learning_rate": 1.1779342185855608e-05,
      "loss": 2.4553,
      "step": 390
    },
    {
      "debug/num_lat_loss": 1786.0,
      "debug/num_lat_total": 2866.0,
      "debug/num_tok_loss": 1786.0,
      "debug/num_tok_total": 2866.0,
      "epoch": 5.342465753424658,
      "step": 390
    },
    {
      "epoch": 5.342465753424658,
      "step": 390,
      "train/ce_loss": 1.7578125,
      "train/diffusion_loss": 0.35515695810317993
    },
    {
      "epoch": 5.342465753424658,
      "step": 390,
      "train/learning_rate_real": 1.1723975746559576e-05
    },
    {
      "debug/num_lat_loss": 1770.0,
      "debug/num_lat_total": 2440.0,
      "debug/num_tok_loss": 1770.0,
      "debug/num_tok_total": 2440.0,
      "epoch": 5.342465753424658,
      "step": 390
    },
    {
      "epoch": 5.342465753424658,
      "step": 390,
      "train/ce_loss": 2.015625,
      "train/diffusion_loss": 0.4256676137447357
    },
    {
      "epoch": 5.342465753424658,
      "step": 390,
      "train/learning_rate_real": 1.1723975746559576e-05
    },
    {
      "debug/num_lat_loss": 1736.0,
      "debug/num_lat_total": 2766.0,
      "debug/num_tok_loss": 1736.0,
      "debug/num_tok_total": 2766.0,
      "epoch": 5.342465753424658,
      "step": 390
    },
    {
      "epoch": 5.342465753424658,
      "step": 390,
      "train/ce_loss": 1.9453125,
      "train/diffusion_loss": 0.3603058457374573
    },
    {
      "epoch": 5.342465753424658,
      "step": 390,
      "train/learning_rate_real": 1.1723975746559576e-05
    },
    {
      "debug/num_lat_loss": 1552.0,
      "debug/num_lat_total": 2494.0,
      "debug/num_tok_loss": 1552.0,
      "debug/num_tok_total": 2494.0,
      "epoch": 5.342465753424658,
      "step": 390
    },
    {
      "epoch": 5.342465753424658,
      "step": 390,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.3582332134246826
    },
    {
      "epoch": 5.342465753424658,
      "step": 390,
      "train/learning_rate_real": 1.1723975746559576e-05
    },
    {
      "debug/num_lat_loss": 1772.0,
      "debug/num_lat_total": 2636.0,
      "debug/num_tok_loss": 1772.0,
      "debug/num_tok_total": 2636.0,
      "epoch": 5.3561643835616435,
      "step": 391
    },
    {
      "epoch": 5.3561643835616435,
      "step": 391,
      "train/ce_loss": 1.7734375,
      "train/diffusion_loss": 0.3876456618309021
    },
    {
      "epoch": 5.3561643835616435,
      "step": 391,
      "train/learning_rate_real": 1.1668624586716723e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2652.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2652.0,
      "epoch": 5.3561643835616435,
      "step": 391
    },
    {
      "epoch": 5.3561643835616435,
      "step": 391,
      "train/ce_loss": 1.875,
      "train/diffusion_loss": 0.40279945731163025
    },
    {
      "epoch": 5.3561643835616435,
      "step": 391,
      "train/learning_rate_real": 1.1668624586716723e-05
    },
    {
      "debug/num_lat_loss": 1761.0,
      "debug/num_lat_total": 2617.0,
      "debug/num_tok_loss": 1761.0,
      "debug/num_tok_total": 2617.0,
      "epoch": 5.3561643835616435,
      "step": 391
    },
    {
      "epoch": 5.3561643835616435,
      "step": 391,
      "train/ce_loss": 1.7578125,
      "train/diffusion_loss": 0.4103446900844574
    },
    {
      "epoch": 5.3561643835616435,
      "step": 391,
      "train/learning_rate_real": 1.1668624586716723e-05
    },
    {
      "debug/num_lat_loss": 1697.0,
      "debug/num_lat_total": 1921.0,
      "debug/num_tok_loss": 1697.0,
      "debug/num_tok_total": 1921.0,
      "epoch": 5.3561643835616435,
      "step": 391
    },
    {
      "epoch": 5.3561643835616435,
      "step": 391,
      "train/ce_loss": 1.7890625,
      "train/diffusion_loss": 0.541977047920227
    },
    {
      "epoch": 5.3561643835616435,
      "step": 391,
      "train/learning_rate_real": 1.1668624586716723e-05
    },
    {
      "debug/num_lat_loss": 1764.0,
      "debug/num_lat_total": 2620.0,
      "debug/num_tok_loss": 1764.0,
      "debug/num_tok_total": 2620.0,
      "epoch": 5.36986301369863,
      "step": 392
    },
    {
      "epoch": 5.36986301369863,
      "step": 392,
      "train/ce_loss": 2.171875,
      "train/diffusion_loss": 0.3768068253993988
    },
    {
      "epoch": 5.36986301369863,
      "step": 392,
      "train/learning_rate_real": 1.1613289796158269e-05
    },
    {
      "debug/num_lat_loss": 1596.0,
      "debug/num_lat_total": 2267.0,
      "debug/num_tok_loss": 1596.0,
      "debug/num_tok_total": 2267.0,
      "epoch": 5.36986301369863,
      "step": 392
    },
    {
      "epoch": 5.36986301369863,
      "step": 392,
      "train/ce_loss": 1.7734375,
      "train/diffusion_loss": 0.3962506353855133
    },
    {
      "epoch": 5.36986301369863,
      "step": 392,
      "train/learning_rate_real": 1.1613289796158269e-05
    },
    {
      "debug/num_lat_loss": 1813.0,
      "debug/num_lat_total": 2892.0,
      "debug/num_tok_loss": 1813.0,
      "debug/num_tok_total": 2892.0,
      "epoch": 5.36986301369863,
      "step": 392
    },
    {
      "epoch": 5.36986301369863,
      "step": 392,
      "train/ce_loss": 2.09375,
      "train/diffusion_loss": 0.38966646790504456
    },
    {
      "epoch": 5.36986301369863,
      "step": 392,
      "train/learning_rate_real": 1.1613289796158269e-05
    },
    {
      "debug/num_lat_loss": 1693.0,
      "debug/num_lat_total": 2233.0,
      "debug/num_tok_loss": 1693.0,
      "debug/num_tok_total": 2233.0,
      "epoch": 5.36986301369863,
      "step": 392
    },
    {
      "epoch": 5.36986301369863,
      "step": 392,
      "train/ce_loss": 1.8203125,
      "train/diffusion_loss": 0.4384276866912842
    },
    {
      "epoch": 5.36986301369863,
      "step": 392,
      "train/learning_rate_real": 1.1613289796158269e-05
    },
    {
      "debug/num_lat_loss": 1622.0,
      "debug/num_lat_total": 2212.0,
      "debug/num_tok_loss": 1622.0,
      "debug/num_tok_total": 2212.0,
      "epoch": 5.383561643835616,
      "step": 393
    },
    {
      "epoch": 5.383561643835616,
      "step": 393,
      "train/ce_loss": 1.8359375,
      "train/diffusion_loss": 0.4339134395122528
    },
    {
      "epoch": 5.383561643835616,
      "step": 393,
      "train/learning_rate_real": 1.155797246439313e-05
    },
    {
      "debug/num_lat_loss": 1773.0,
      "debug/num_lat_total": 2844.0,
      "debug/num_tok_loss": 1773.0,
      "debug/num_tok_total": 2844.0,
      "epoch": 5.383561643835616,
      "step": 393
    },
    {
      "epoch": 5.383561643835616,
      "step": 393,
      "train/ce_loss": 1.8359375,
      "train/diffusion_loss": 0.32827895879745483
    },
    {
      "epoch": 5.383561643835616,
      "step": 393,
      "train/learning_rate_real": 1.155797246439313e-05
    },
    {
      "debug/num_lat_loss": 1796.0,
      "debug/num_lat_total": 2872.0,
      "debug/num_tok_loss": 1796.0,
      "debug/num_tok_total": 2872.0,
      "epoch": 5.383561643835616,
      "step": 393
    },
    {
      "epoch": 5.383561643835616,
      "step": 393,
      "train/ce_loss": 1.9609375,
      "train/diffusion_loss": 0.3501279056072235
    },
    {
      "epoch": 5.383561643835616,
      "step": 393,
      "train/learning_rate_real": 1.155797246439313e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2403.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2403.0,
      "epoch": 5.383561643835616,
      "step": 393
    },
    {
      "epoch": 5.383561643835616,
      "step": 393,
      "train/ce_loss": 1.9453125,
      "train/diffusion_loss": 0.4598109722137451
    },
    {
      "epoch": 5.383561643835616,
      "step": 393,
      "train/learning_rate_real": 1.155797246439313e-05
    },
    {
      "debug/num_lat_loss": 1746.0,
      "debug/num_lat_total": 2820.0,
      "debug/num_tok_loss": 1746.0,
      "debug/num_tok_total": 2820.0,
      "epoch": 5.397260273972603,
      "step": 394
    },
    {
      "epoch": 5.397260273972603,
      "step": 394,
      "train/ce_loss": 1.7421875,
      "train/diffusion_loss": 0.3456258177757263
    },
    {
      "epoch": 5.397260273972603,
      "step": 394,
      "train/learning_rate_real": 1.150267368058646e-05
    },
    {
      "debug/num_lat_loss": 1798.0,
      "debug/num_lat_total": 2656.0,
      "debug/num_tok_loss": 1798.0,
      "debug/num_tok_total": 2656.0,
      "epoch": 5.397260273972603,
      "step": 394
    },
    {
      "epoch": 5.397260273972603,
      "step": 394,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.43467411398887634
    },
    {
      "epoch": 5.397260273972603,
      "step": 394,
      "train/learning_rate_real": 1.150267368058646e-05
    },
    {
      "debug/num_lat_loss": 1769.0,
      "debug/num_lat_total": 2838.0,
      "debug/num_tok_loss": 1769.0,
      "debug/num_tok_total": 2838.0,
      "epoch": 5.397260273972603,
      "step": 394
    },
    {
      "epoch": 5.397260273972603,
      "step": 394,
      "train/ce_loss": 1.828125,
      "train/diffusion_loss": 0.365997850894928
    },
    {
      "epoch": 5.397260273972603,
      "step": 394,
      "train/learning_rate_real": 1.150267368058646e-05
    },
    {
      "debug/num_lat_loss": 1801.0,
      "debug/num_lat_total": 2659.0,
      "debug/num_tok_loss": 1801.0,
      "debug/num_tok_total": 2659.0,
      "epoch": 5.397260273972603,
      "step": 394
    },
    {
      "epoch": 5.397260273972603,
      "step": 394,
      "train/ce_loss": 1.953125,
      "train/diffusion_loss": 0.39845091104507446
    },
    {
      "epoch": 5.397260273972603,
      "step": 394,
      "train/learning_rate_real": 1.150267368058646e-05
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 3332.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 3332.0,
      "epoch": 5.410958904109589,
      "step": 395
    },
    {
      "epoch": 5.410958904109589,
      "step": 395,
      "train/ce_loss": 2.0,
      "train/diffusion_loss": 0.28146234154701233
    },
    {
      "epoch": 5.410958904109589,
      "step": 395,
      "train/learning_rate_real": 1.1447394533538234e-05
    },
    {
      "debug/num_lat_loss": 1807.0,
      "debug/num_lat_total": 2464.0,
      "debug/num_tok_loss": 1807.0,
      "debug/num_tok_total": 2464.0,
      "epoch": 5.410958904109589,
      "step": 395
    },
    {
      "epoch": 5.410958904109589,
      "step": 395,
      "train/ce_loss": 1.8359375,
      "train/diffusion_loss": 0.45914003252983093
    },
    {
      "epoch": 5.410958904109589,
      "step": 395,
      "train/learning_rate_real": 1.1447394533538234e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2831.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2831.0,
      "epoch": 5.410958904109589,
      "step": 395
    },
    {
      "epoch": 5.410958904109589,
      "step": 395,
      "train/ce_loss": 1.828125,
      "train/diffusion_loss": 0.34179848432540894
    },
    {
      "epoch": 5.410958904109589,
      "step": 395,
      "train/learning_rate_real": 1.1447394533538234e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2622.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2622.0,
      "epoch": 5.410958904109589,
      "step": 395
    },
    {
      "epoch": 5.410958904109589,
      "step": 395,
      "train/ce_loss": 2.0625,
      "train/diffusion_loss": 0.39029791951179504
    },
    {
      "epoch": 5.410958904109589,
      "step": 395,
      "train/learning_rate_real": 1.1447394533538234e-05
    },
    {
      "debug/num_lat_loss": 1815.0,
      "debug/num_lat_total": 2038.0,
      "debug/num_tok_loss": 1815.0,
      "debug/num_tok_total": 2038.0,
      "epoch": 5.424657534246576,
      "step": 396
    },
    {
      "epoch": 5.424657534246576,
      "step": 396,
      "train/ce_loss": 1.9375,
      "train/diffusion_loss": 0.5618933439254761
    },
    {
      "epoch": 5.424657534246576,
      "step": 396,
      "train/learning_rate_real": 1.1392136111661766e-05
    },
    {
      "debug/num_lat_loss": 1799.0,
      "debug/num_lat_total": 2883.0,
      "debug/num_tok_loss": 1799.0,
      "debug/num_tok_total": 2883.0,
      "epoch": 5.424657534246576,
      "step": 396
    },
    {
      "epoch": 5.424657534246576,
      "step": 396,
      "train/ce_loss": 1.7109375,
      "train/diffusion_loss": 0.36794179677963257
    },
    {
      "epoch": 5.424657534246576,
      "step": 396,
      "train/learning_rate_real": 1.1392136111661766e-05
    },
    {
      "debug/num_lat_loss": 1811.0,
      "debug/num_lat_total": 2683.0,
      "debug/num_tok_loss": 1811.0,
      "debug/num_tok_total": 2683.0,
      "epoch": 5.424657534246576,
      "step": 396
    },
    {
      "epoch": 5.424657534246576,
      "step": 396,
      "train/ce_loss": 1.9140625,
      "train/diffusion_loss": 0.3997327983379364
    },
    {
      "epoch": 5.424657534246576,
      "step": 396,
      "train/learning_rate_real": 1.1392136111661766e-05
    },
    {
      "debug/num_lat_loss": 1804.0,
      "debug/num_lat_total": 2890.0,
      "debug/num_tok_loss": 1804.0,
      "debug/num_tok_total": 2890.0,
      "epoch": 5.424657534246576,
      "step": 396
    },
    {
      "epoch": 5.424657534246576,
      "step": 396,
      "train/ce_loss": 1.796875,
      "train/diffusion_loss": 0.34589481353759766
    },
    {
      "epoch": 5.424657534246576,
      "step": 396,
      "train/learning_rate_real": 1.1392136111661766e-05
    },
    {
      "debug/num_lat_loss": 1780.0,
      "debug/num_lat_total": 2631.0,
      "debug/num_tok_loss": 1780.0,
      "debug/num_tok_total": 2631.0,
      "epoch": 5.438356164383562,
      "step": 397
    },
    {
      "epoch": 5.438356164383562,
      "step": 397,
      "train/ce_loss": 1.953125,
      "train/diffusion_loss": 0.40771564841270447
    },
    {
      "epoch": 5.438356164383562,
      "step": 397,
      "train/learning_rate_real": 1.133689950296232e-05
    },
    {
      "debug/num_lat_loss": 1708.0,
      "debug/num_lat_total": 2290.0,
      "debug/num_tok_loss": 1708.0,
      "debug/num_tok_total": 2290.0,
      "epoch": 5.438356164383562,
      "step": 397
    },
    {
      "epoch": 5.438356164383562,
      "step": 397,
      "train/ce_loss": 1.875,
      "train/diffusion_loss": 0.4897439777851105
    },
    {
      "epoch": 5.438356164383562,
      "step": 397,
      "train/learning_rate_real": 1.133689950296232e-05
    },
    {
      "debug/num_lat_loss": 1787.0,
      "debug/num_lat_total": 3070.0,
      "debug/num_tok_loss": 1787.0,
      "debug/num_tok_total": 3070.0,
      "epoch": 5.438356164383562,
      "step": 397
    },
    {
      "epoch": 5.438356164383562,
      "step": 397,
      "train/ce_loss": 1.9296875,
      "train/diffusion_loss": 0.3240067660808563
    },
    {
      "epoch": 5.438356164383562,
      "step": 397,
      "train/learning_rate_real": 1.133689950296232e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2659.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2659.0,
      "epoch": 5.438356164383562,
      "step": 397
    },
    {
      "epoch": 5.438356164383562,
      "step": 397,
      "train/ce_loss": 1.8125,
      "train/diffusion_loss": 0.43447256088256836
    },
    {
      "epoch": 5.438356164383562,
      "step": 397,
      "train/learning_rate_real": 1.133689950296232e-05
    },
    {
      "debug/num_lat_loss": 1802.0,
      "debug/num_lat_total": 2654.0,
      "debug/num_tok_loss": 1802.0,
      "debug/num_tok_total": 2654.0,
      "epoch": 5.4520547945205475,
      "step": 398
    },
    {
      "epoch": 5.4520547945205475,
      "step": 398,
      "train/ce_loss": 1.7734375,
      "train/diffusion_loss": 0.41866445541381836
    },
    {
      "epoch": 5.4520547945205475,
      "step": 398,
      "train/learning_rate_real": 1.128168579501567e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2206.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2206.0,
      "epoch": 5.4520547945205475,
      "step": 398
    },
    {
      "epoch": 5.4520547945205475,
      "step": 398,
      "train/ce_loss": 1.8984375,
      "train/diffusion_loss": 0.47042030096054077
    },
    {
      "epoch": 5.4520547945205475,
      "step": 398,
      "train/learning_rate_real": 1.128168579501567e-05
    },
    {
      "debug/num_lat_loss": 1784.0,
      "debug/num_lat_total": 2436.0,
      "debug/num_tok_loss": 1784.0,
      "debug/num_tok_total": 2436.0,
      "epoch": 5.4520547945205475,
      "step": 398
    },
    {
      "epoch": 5.4520547945205475,
      "step": 398,
      "train/ce_loss": 1.9765625,
      "train/diffusion_loss": 0.44332772493362427
    },
    {
      "epoch": 5.4520547945205475,
      "step": 398,
      "train/learning_rate_real": 1.128168579501567e-05
    },
    {
      "debug/num_lat_loss": 1767.0,
      "debug/num_lat_total": 2612.0,
      "debug/num_tok_loss": 1767.0,
      "debug/num_tok_total": 2612.0,
      "epoch": 5.4520547945205475,
      "step": 398
    },
    {
      "epoch": 5.4520547945205475,
      "step": 398,
      "train/ce_loss": 1.921875,
      "train/diffusion_loss": 0.44071370363235474
    },
    {
      "epoch": 5.4520547945205475,
      "step": 398,
      "train/learning_rate_real": 1.128168579501567e-05
    },
    {
      "debug/num_lat_loss": 1806.0,
      "debug/num_lat_total": 2898.0,
      "debug/num_tok_loss": 1806.0,
      "debug/num_tok_total": 2898.0,
      "epoch": 5.465753424657534,
      "step": 399
    },
    {
      "epoch": 5.465753424657534,
      "step": 399,
      "train/ce_loss": 1.90625,
      "train/diffusion_loss": 0.3398825526237488
    },
    {
      "epoch": 5.465753424657534,
      "step": 399,
      "train/learning_rate_real": 1.122649607494669e-05
    },
    {
      "debug/num_lat_loss": 1756.0,
      "debug/num_lat_total": 2816.0,
      "debug/num_tok_loss": 1756.0,
      "debug/num_tok_total": 2816.0,
      "epoch": 5.465753424657534,
      "step": 399
    },
    {
      "epoch": 5.465753424657534,
      "step": 399,
      "train/ce_loss": 1.7578125,
      "train/diffusion_loss": 0.3638966679573059
    },
    {
      "epoch": 5.465753424657534,
      "step": 399,
      "train/learning_rate_real": 1.122649607494669e-05
    },
    {
      "debug/num_lat_loss": 1788.0,
      "debug/num_lat_total": 2657.0,
      "debug/num_tok_loss": 1788.0,
      "debug/num_tok_total": 2657.0,
      "epoch": 5.465753424657534,
      "step": 399
    },
    {
      "epoch": 5.465753424657534,
      "step": 399,
      "train/ce_loss": 1.8203125,
      "train/diffusion_loss": 0.4017951786518097
    },
    {
      "epoch": 5.465753424657534,
      "step": 399,
      "train/learning_rate_real": 1.122649607494669e-05
    },
    {
      "debug/num_lat_loss": 1790.0,
      "debug/num_lat_total": 2875.0,
      "debug/num_tok_loss": 1790.0,
      "debug/num_tok_total": 2875.0,
      "epoch": 5.465753424657534,
      "step": 399
    },
    {
      "epoch": 5.465753424657534,
      "step": 399,
      "train/ce_loss": 1.8984375,
      "train/diffusion_loss": 0.3224497437477112
    },
    {
      "epoch": 5.465753424657534,
      "step": 399,
      "train/learning_rate_real": 1.122649607494669e-05
    },
    {
      "epoch": 5.47945205479452,
      "grad_norm": 1.3492817878723145,
      "learning_rate": 1.122649607494669e-05,
      "loss": 2.5371,
      "step": 400
    }
  ],
  "logging_steps": 10,
  "max_steps": 730,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 10,
  "save_steps": 200,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1.4486628659807603e+17,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}
