{
  "assamese": {
    "n_samples": 500,
    "wer_raw": 32.62,
    "wer_norm": 31.92,
    "wer_numcanon": 31.92,
    "space_norm_wer": 27.26,
    "mer": 15.22,
    "cer_norm": 14.22,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.7,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.66,
      "norm_to_mer": 16.7
    }
  },
  "bengali": {
    "n_samples": 500,
    "wer_raw": 28.49,
    "wer_norm": 27.79,
    "wer_numcanon": 27.79,
    "space_norm_wer": 19.87,
    "mer": 6.95,
    "cer_norm": 6.97,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.7,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 7.92,
      "norm_to_mer": 20.84
    }
  },
  "english": {
    "n_samples": 500,
    "wer_raw": 39.33,
    "wer_norm": 27.32,
    "wer_numcanon": 26.92,
    "space_norm_wer": 17.14,
    "mer": 15.89,
    "cer_norm": 15.77,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 12.01,
      "norm_to_numcanon": 0.4,
      "norm_to_space_norm": 10.18,
      "norm_to_mer": 11.43
    }
  },
  "gujarati": {
    "n_samples": 500,
    "wer_raw": 25.21,
    "wer_norm": 25.18,
    "wer_numcanon": 25.18,
    "space_norm_wer": 19.7,
    "mer": 8.02,
    "cer_norm": 7.75,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.03,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 5.48,
      "norm_to_mer": 17.16
    }
  },
  "hindi": {
    "n_samples": 500,
    "wer_raw": 19.94,
    "wer_norm": 19.9,
    "wer_numcanon": 19.9,
    "space_norm_wer": 15.35,
    "mer": 8.03,
    "cer_norm": 7.48,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.04,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.55,
      "norm_to_mer": 11.87
    }
  },
  "kannada": {
    "n_samples": 500,
    "wer_raw": 36.26,
    "wer_norm": 36.19,
    "wer_numcanon": 36.19,
    "space_norm_wer": 27.66,
    "mer": 8.68,
    "cer_norm": 8.87,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.07,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 8.53,
      "norm_to_mer": 27.51
    }
  },
  "malayalam": {
    "n_samples": 500,
    "wer_raw": 48.75,
    "wer_norm": 46.14,
    "wer_numcanon": 46.14,
    "space_norm_wer": 36.13,
    "mer": 9.66,
    "cer_norm": 9.62,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 2.61,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.01,
      "norm_to_mer": 36.48
    }
  },
  "marathi": {
    "n_samples": 500,
    "wer_raw": 32.17,
    "wer_norm": 32.15,
    "wer_numcanon": 32.15,
    "space_norm_wer": 24.09,
    "mer": 9.8,
    "cer_norm": 9.76,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.02,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 8.06,
      "norm_to_mer": 22.35
    }
  },
  "odia": {
    "n_samples": 500,
    "wer_raw": 31.85,
    "wer_norm": 31.63,
    "wer_numcanon": 31.63,
    "space_norm_wer": 22.81,
    "mer": 8.79,
    "cer_norm": 8.69,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.22,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 8.82,
      "norm_to_mer": 22.84
    }
  },
  "punjabi": {
    "n_samples": 500,
    "wer_raw": 25.0,
    "wer_norm": 24.51,
    "wer_numcanon": 24.51,
    "space_norm_wer": 19.52,
    "mer": 8.66,
    "cer_norm": 8.21,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.49,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 4.99,
      "norm_to_mer": 15.85
    }
  },
  "tamil": {
    "n_samples": 500,
    "wer_raw": 42.72,
    "wer_norm": 40.51,
    "wer_numcanon": 40.51,
    "space_norm_wer": 30.73,
    "mer": 9.32,
    "cer_norm": 9.38,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 2.21,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 9.78,
      "norm_to_mer": 31.19
    }
  },
  "telugu": {
    "n_samples": 500,
    "wer_raw": 34.38,
    "wer_norm": 34.26,
    "wer_numcanon": 34.26,
    "space_norm_wer": 23.98,
    "mer": 7.65,
    "cer_norm": 7.88,
    "empty_hypotheses": 0,
    "normalization_delta": {
      "raw_to_norm": 0.12,
      "norm_to_numcanon": 0.0,
      "norm_to_space_norm": 10.28,
      "norm_to_mer": 26.61
    }
  },
  "__overall__": {
    "n_samples": 6000,
    "wer_raw": 32.89,
    "wer_norm": 31.35,
    "wer_numcanon": 31.32,
    "space_norm_wer": 23.71,
    "mer": 9.68,
    "cer_norm": 9.56
  },
  "__macro_avg__": {
    "n_languages": 12,
    "wer_raw": 33.06,
    "wer_norm": 31.46,
    "wer_numcanon": 31.42,
    "space_norm_wer": 23.69,
    "mer": 9.72,
    "cer_norm": 9.55
  },
  "__decode_stats__": {
    "per_language": {
      "assamese": {
        "n_samples": 500,
        "hit_max_tokens": 0,
        "degenerate_loop": 0,
        "avg_output_tokens": 25.81,
        "max_output_tokens": 101
      },
      "bengali": {
        "n_samples": 500,
        "hit_max_tokens": 0,
        "degenerate_loop": 0,
        "avg_output_tokens": 23.7,
        "max_output_tokens": 52
      },
      "english": {
        "n_samples": 500,
        "hit_max_tokens": 0,
        "degenerate_loop": 0,
        "avg_output_tokens": 18.81,
        "max_output_tokens": 136
      },
      "gujarati": {
        "n_samples": 500,
        "hit_max_tokens": 0,
        "degenerate_loop": 0,
        "avg_output_tokens": 22.84,
        "max_output_tokens": 46
      },
      "hindi": {
        "n_samples": 500,
        "hit_max_tokens": 0,
        "degenerate_loop": 0,
        "avg_output_tokens": 18.73,
        "max_output_tokens": 38
      },
      "kannada": {
        "n_samples": 500,
        "hit_max_tokens": 0,
        "degenerate_loop": 0,
        "avg_output_tokens": 29.87,
        "max_output_tokens": 58
      },
      "malayalam": {
        "n_samples": 500,
        "hit_max_tokens": 0,
        "degenerate_loop": 0,
        "avg_output_tokens": 30.83,
        "max_output_tokens": 60
      },
      "marathi": {
        "n_samples": 500,
        "hit_max_tokens": 0,
        "degenerate_loop": 0,
        "avg_output_tokens": 24.04,
        "max_output_tokens": 63
      },
      "odia": {
        "n_samples": 500,
        "hit_max_tokens": 0,
        "degenerate_loop": 0,
        "avg_output_tokens": 23.4,
        "max_output_tokens": 59
      },
      "punjabi": {
        "n_samples": 500,
        "hit_max_tokens": 0,
        "degenerate_loop": 0,
        "avg_output_tokens": 19.75,
        "max_output_tokens": 42
      },
      "tamil": {
        "n_samples": 500,
        "hit_max_tokens": 0,
        "degenerate_loop": 0,
        "avg_output_tokens": 29.31,
        "max_output_tokens": 54
      },
      "telugu": {
        "n_samples": 500,
        "hit_max_tokens": 0,
        "degenerate_loop": 0,
        "avg_output_tokens": 27.3,
        "max_output_tokens": 57
      }
    },
    "total_hit_max_tokens": 0,
    "total_degenerate_loop": 0,
    "no_repeat_ngram_size": 3,
    "max_new_tokens": 256
  },
  "__filtered_min_duration__": {
    "min_duration_sec": 0.5,
    "languages": {
      "assamese": {
        "n_samples": 478,
        "n_excluded": 22,
        "wer_norm": 31.75,
        "mer": 15.01,
        "cer_norm": 14.03
      },
      "bengali": {
        "n_samples": 500,
        "n_excluded": 0,
        "wer_norm": 27.79,
        "mer": 6.95,
        "cer_norm": 6.97
      },
      "english": {
        "n_samples": 463,
        "n_excluded": 37,
        "wer_norm": 22.81,
        "mer": 13.12,
        "cer_norm": 12.72
      },
      "gujarati": {
        "n_samples": 500,
        "n_excluded": 0,
        "wer_norm": 25.18,
        "mer": 8.02,
        "cer_norm": 7.75
      },
      "hindi": {
        "n_samples": 500,
        "n_excluded": 0,
        "wer_norm": 19.9,
        "mer": 8.03,
        "cer_norm": 7.48
      },
      "kannada": {
        "n_samples": 500,
        "n_excluded": 0,
        "wer_norm": 36.19,
        "mer": 8.68,
        "cer_norm": 8.87
      },
      "malayalam": {
        "n_samples": 500,
        "n_excluded": 0,
        "wer_norm": 46.14,
        "mer": 9.66,
        "cer_norm": 9.62
      },
      "marathi": {
        "n_samples": 500,
        "n_excluded": 0,
        "wer_norm": 32.15,
        "mer": 9.8,
        "cer_norm": 9.76
      },
      "odia": {
        "n_samples": 500,
        "n_excluded": 0,
        "wer_norm": 31.63,
        "mer": 8.79,
        "cer_norm": 8.69
      },
      "punjabi": {
        "n_samples": 500,
        "n_excluded": 0,
        "wer_norm": 24.51,
        "mer": 8.66,
        "cer_norm": 8.21
      },
      "tamil": {
        "n_samples": 500,
        "n_excluded": 0,
        "wer_norm": 40.51,
        "mer": 9.32,
        "cer_norm": 9.38
      },
      "telugu": {
        "n_samples": 500,
        "n_excluded": 0,
        "wer_norm": 34.26,
        "mer": 7.65,
        "cer_norm": 7.88
      }
    },
    "__overall__": {
      "n_samples": 5941,
      "n_excluded": 59,
      "wer_norm": 30.98,
      "mer": 9.49,
      "cer_norm": 9.34
    },
    "__macro_avg__": {
      "n_languages": 12,
      "wer_norm": 31.07,
      "mer": 9.47,
      "cer_norm": 9.28
    }
  },
  "__meta__": {
    "checkpoint": "/home/ubuntu/training/checkpoints/cohere-transcribe-ckpt-260000",
    "checkpoint_name": "ckpt-260000",
    "model_id": "cohere-transcribe",
    "model_type": "Cohere-Transcribe-Indic-2B",
    "dataset": "BayAreaBoys/indic-asr-benchmark-6k",
    "batch_size": 64,
    "inference_time_sec": 132.64,
    "total_audio_sec": 40354.46,
    "rtf": 0.0033,
    "timestamp": "2026-04-09T18:12:14Z",
    "gpu": "NVIDIA H200 80GB",
    "framework": "transformers",
    "normalization_version": "v1",
    "jiwer_version": "3.1.0"
  }
}