{
  "source_path": "final_data/final_cleaned_segments_with_variants_rerouted.parquet",
  "output_path": "final_data/variant_only_suspicious_subset.parquet",
  "analysis_view_path": "final_data/variant_only_suspicious_analysis.parquet",
  "subset_rows": 100,
  "by_language": {
    "hi": 38,
    "te": 17,
    "bn": 8,
    "pa": 8,
    "gu": 8,
    "ml": 6,
    "mr": 5,
    "ta": 3,
    "or": 3,
    "kn": 3,
    "as": 1
  },
  "variant_bucket_counts": {
    "manual": 100
  },
  "transcription_bucket_counts": {
    "retain": 79,
    "clean": 21
  },
  "native_bucket_counts": {
    "retain": 65,
    "clean": 21,
    "manual": 14
  },
  "romanized_bucket_counts": {
    "manual": 92,
    "retain": 8
  },
  "native_only_rows": 8,
  "romanized_only_rows": 86,
  "both_variant_rows": 6
}