{
  "baselines": {
    "all_transcribed": {
      "coverage_pct": 99.401087,
      "final_missing_segments": 459058,
      "final_validated_segments": 76189410,
      "total_segments": 76648468,
      "videos": 497075
    },
    "selected_set": {
      "coverage_pct": 99.454046,
      "final_missing_segments": 386899,
      "final_validated_segments": 70479698,
      "total_segments": 70866597,
      "videos": 454854
    },
    "strict_set": {
      "coverage_pct": 99.451896,
      "final_missing_segments": 380710,
      "final_validated_segments": 69078678,
      "total_segments": 69459388,
      "videos": 448159
    }
  },
  "final_cleaned": {
    "coverage_pct": 99.463639,
    "dispose_segments": 33511362,
    "final_missing_segments": 325660,
    "final_validated_segments": 60390887,
    "golden_segments": 17486648,
    "historical_segments": 30930479,
    "missing_segments": 325660,
    "recover_v1_segments": 26892584,
    "recover_v2_segments": 2567824,
    "redo_segments": 9392877,
    "remaining_foreign_segments": 0,
    "remaining_inaudible_segments": 0,
    "remaining_lang_mismatch_flag_segments": 43305732,
    "remaining_no_speech_segments": 0,
    "remaining_ta_to_te_residual_segments": 0,
    "remaining_target_language_mismatch_pairs": 132,
    "remaining_target_language_mismatch_segments": 6077309,
    "remaining_unk_segments": 0,
    "total_segments": 60716547,
    "videos": 395513,
    "videos_with_remaining_lang_mismatch_flag_segments": 313322,
    "videos_with_remaining_target_language_mismatch_segments": 247508
  },
  "generated_at_epoch_s": 1773214871.842,
  "outputs": {
    "excluded_videos_csv": "final_data/final_cleaned_excluded_videos.csv",
    "keep_videos_csv": "final_data/final_cleaned_keep_videos.csv",
    "remaining_target_mismatch_pairs_csv": "final_data/final_cleaned_remaining_target_mismatch_pairs.csv",
    "segment_removal_summary_csv": "final_data/final_cleaned_segment_removal_summary.csv",
    "segments_parquet": "final_data/final_cleaned_segments.parquet",
    "video_rollup_parquet": "final_data/final_cleaned_video_rollup.parquet"
  },
  "postpass": {
    "affected_videos": 19951,
    "elapsed_s": 209.12,
    "removed_no_speech_segments": 50634
  },
  "rule_config": {
    "approved_theme_families": [
      "language_learning",
      "pakistan_urdu",
      "meeting_zoom_group",
      "startup_interview"
    ],
    "drop_blank_unk_inaudible_segments": true,
    "drop_known_non_target_youtube_audio_language": true,
    "drop_ta_to_te_residual_segments": true,
    "foreign_share_drop_threshold_pct": 5.0,
    "und_keep_rule": {
      "max_distinct_detected_languages": 3,
      "required_foreign_segments": 0
    }
  },
  "segment_filter_counts": {
    "candidate_segments": 60826058,
    "kept_segment_pct_after_video_filters": 99.81996,
    "kept_segments": 60716547,
    "removed_blank_unk_or_inaudible_segments": 66914,
    "removed_foreign_segments": 42279,
    "removed_ta_to_te_residual_segments": 318,
    "videos_removed_to_zero_segments": 0
  },
  "video_filter_counts": {
    "drop_any_pct_of_strict": 11.742484,
    "drop_any_videos": 52625,
    "drop_approved_theme_family_videos": 40011,
    "drop_audio_non_target_videos": 10290,
    "drop_foreign_share_ge5_videos": 6438,
    "drop_language_learning_videos": 14211,
    "drop_meeting_zoom_group_videos": 13055,
    "drop_pakistan_urdu_videos": 13554,
    "drop_startup_interview_videos": 1371,
    "drop_und_unsound_videos": 2455,
    "kept_after_video_filters_segments": 60826058,
    "kept_after_video_filters_videos": 395534,
    "strict_videos": 448159
  }
}
