o
    iS                     @  sx   d dl mZ d dlZd dlZd dlmZ d dlZedZed d Z	ed d Z
dd
dZdd Zedkr:e  dS dS )    )annotationsN)Pathz/home/ubuntu/transcriptsdataz&transcript_variant_prompt_samples.jsonz%transcript_variant_test_input.parquetreturnargparse.Namespacec                  C  sD   t jdd} | jdttd | jdttd | jdtdd |  S )Nz8Build local test parquet for transcript variant workers.)descriptionz--input-json)typedefaultz--outputz--rows   )argparseArgumentParseradd_argumentr   DEFAULT_INPUTDEFAULT_OUTPUTint
parse_args)parser r   .scripts/build_transcript_variant_test_input.pyr      s
   r   c                  C  s   t  } t| j }g }t| jD ]6}t||t|  }d|d|d< d|d d|d< |d  d|d|d	< |	d
|d
< |
| qtj|}| jjjddd |j| jdd tdt| d| j  d S )Nvariant_row_06drow_idvariant_video_   05dvideo_id_seg_
segment_idtextT)parentsexist_okF)indexzWrote z	 rows -> )r   jsonloads
input_json	read_textrangerowsdictlenpopappendpd	DataFramefrom_recordsoutputparentmkdir
to_parquetprint)argssamplesr'   idxsampledfr   r   r   main   s   r9   __main__)r   r   )
__future__r   r   r"   pathlibr   pandasr,   ROOTr   r   r   r9   __name__r   r   r   r   <module>   s    

