--- pid: 1533738 cwd: "/home/ubuntu/transcripts" command: "cd /home/ubuntu/transcripts && source venv/bin/activate && python3 << 'PYEOF'\nimport os, time, boto3\nfrom boto3.s3.transfer import TransferConfig\nfrom botocore.config import Config\nfrom dotenv import load_dotenv\n\nload_dotenv('.env')\ns3 = boto3.client('s3', endpoint_url=os.environ['R2_ENDPOINT_URL'], aws_access_key_id=os.environ['R2_ACCESS_KEY_ID'], aws_secret_access_key=os.environ['R2_SECRET_ACCESS_KEY'], region_name='auto', config=Config(max_pool_connections=16))\ntransfer_cfg = TransferConfig(multipart_threshold=256*1024*1024, multipart_chunksize=128*1024*1024, max_concurrency=8)\n\nBUCKET = 'metafiles'\nPREFIX = 'transcripts_metadata'\n\ntry:\n s3.head_bucket(Bucket=BUCKET)\n print(f\"Bucket '{BUCKET}' exists\")\nexcept:\n s3.create_bucket(Bucket=BUCKET)\n print(f\"Created bucket '{BUCKET}'\")\n\nALL_FILES = [\n (\"data/transcription_results.parquet\", \"data\"),\n (\"data/transcription_flags.parquet\", \"data\"),\n (\"data/recover_v2_consolidated.parquet\", \"data\"),\n (\"data/recover_manifest.parquet\", \"data\"),\n (\"data/youtube_video_metadata_all.csv\", \"data\"),\n (\"data/video_tts_classification_all.csv\", \"data\"),\n (\"data/video_tts_classification_final.csv\", \"data\"),\n (\"data/video_tts_dropped_by_channel.csv\", \"data\"),\n (\"data/golden_segments.csv\", \"data\"),\n (\"data/redo_segments.csv\", \"data\"),\n (\"data/dispose_segments.csv\", \"data\"),\n (\"data/video_summary.csv\", \"data\"),\n (\"final_data/final_cleaned_segments_with_variants_rerouted_repetition_filtered.parquet\", \"final_data\"),\n (\"final_data/final_cleaned_segments.parquet\", \"final_data\"),\n (\"final_data/transcript_variants_clean_rerouted.parquet\", \"final_data\"),\n (\"final_data/transcript_variants_merged.parquet\", \"final_data\"),\n (\"final_data/final_cleaned_excluded_videos.csv\", \"final_data\"),\n (\"final_data/final_cleaned_keep_videos.csv\", \"final_data\"),\n (\"final_data/final_cleaned_video_rollup.parquet\", \"final_data\"),\n (\"final_data/repetition_flags.parquet\", \"final_data\"),\n]\n\nt0 = time.time()\ntotal_bytes = 0\nfor i, (local_rel, subfolder) in enumerate(ALL_FILES, 1):\n local_path = f\"/home/ubuntu/transcripts/{local_rel}\"\n size = os.path.getsize(local_path)\n total_bytes += size\n r2_key = f\"{PREFIX}/{subfolder}/{os.path.basename(local_rel)}\"\n print(f\"[{i}/{len(ALL_FILES)}] {local_rel} ({size/(1024**3):.1f}GB) -> {r2_key}\")\n t1 = time.time()\n s3.upload_file(local_path, BUCKET, r2_key, Config=transfer_cfg)\n print(f\" done in {time.time()-t1:.0f}s\")\n\nelapsed = time.time() - t0\nprint(f\"\\n=== BACKUP COMPLETE ===\")\nprint(f\"Files: {len(ALL_FILES)}, Total: {total_bytes/(1024**3):.1f}GB, Time: {elapsed:.0f}s\")\nPYEOF" started_at: 2026-03-14T01:13:36.657Z running_for_seconds: 605 --- Bucket 'metafiles' exists [1/20] data/transcription_results.parquet (11.1GB) -> transcripts_metadata/data/transcription_results.parquet done in 77s [2/20] data/transcription_flags.parquet (2.1GB) -> transcripts_metadata/data/transcription_flags.parquet done in 16s [3/20] data/recover_v2_consolidated.parquet (7.0GB) -> transcripts_metadata/data/recover_v2_consolidated.parquet done in 45s [4/20] data/recover_manifest.parquet (0.3GB) -> transcripts_metadata/data/recover_manifest.parquet done in 7s [5/20] data/youtube_video_metadata_all.csv (2.5GB) -> transcripts_metadata/data/youtube_video_metadata_all.csv done in 17s [6/20] data/video_tts_classification_all.csv (0.2GB) -> transcripts_metadata/data/video_tts_classification_all.csv done in 13s [7/20] data/video_tts_classification_final.csv (0.2GB) -> transcripts_metadata/data/video_tts_classification_final.csv done in 13s [8/20] data/video_tts_dropped_by_channel.csv (0.0GB) -> transcripts_metadata/data/video_tts_dropped_by_channel.csv done in 1s [9/20] data/golden_segments.csv (2.0GB) -> transcripts_metadata/data/golden_segments.csv done in 17s [10/20] data/redo_segments.csv (1.4GB) -> transcripts_metadata/data/redo_segments.csv done in 11s [11/20] data/dispose_segments.csv (0.8GB) -> transcripts_metadata/data/dispose_segments.csv done in 8s [12/20] data/video_summary.csv (0.0GB) -> transcripts_metadata/data/video_summary.csv done in 1s [13/20] final_data/final_cleaned_segments_with_variants_rerouted_repetition_filtered.parquet (19.0GB) -> transcripts_metadata/final_data/final_cleaned_segments_with_variants_rerouted_repetition_filtered.parquet done in 140s [14/20] final_data/final_cleaned_segments.parquet (11.5GB) -> transcripts_metadata/final_data/final_cleaned_segments.parquet done in 116s [15/20] final_data/transcript_variants_clean_rerouted.parquet (6.4GB) -> transcripts_metadata/final_data/transcript_variants_clean_rerouted.parquet done in 46s [16/20] final_data/transcript_variants_merged.parquet (6.3GB) -> transcripts_metadata/final_data/transcript_variants_merged.parquet done in 75s [17/20] final_data/final_cleaned_excluded_videos.csv (0.0GB) -> transcripts_metadata/final_data/final_cleaned_excluded_videos.csv done in 0s [18/20] final_data/final_cleaned_keep_videos.csv (0.0GB) -> transcripts_metadata/final_data/final_cleaned_keep_videos.csv done in 1s [19/20] final_data/final_cleaned_video_rollup.parquet (0.0GB) -> transcripts_metadata/final_data/final_cleaned_video_rollup.parquet done in 0s [20/20] final_data/repetition_flags.parquet (0.0GB) -> transcripts_metadata/final_data/repetition_flags.parquet done in 3s === BACKUP COMPLETE === Files: 20, Total: 71.0GB, Time: 607s --- exit_code: 0 elapsed_ms: 607609 ended_at: 2026-03-14T01:23:44.266Z ---