o
    9wi$                     @   sF   d dl Z d dlmZmZ dd Zddedefdd	ZG d
d dZdS )    N)get_conjunctions	get_commac                 C   s&   | t |  dk rt | S t | S )N      ?)mathfloorceil)n r	   X/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/whisperx/SubtitlesProcessor.pynormal_round   s   

r   Fsecondsis_vttc                 C   s   | dksJ dt | d }|d }||d 8 }|d }||d 8 }|d } || d 8 }|r0dnd}|d	d
}| |d	d
| d	| |dS )Nr   znon-negative timestamp expectedg     @@i6 i`  i  .,02d:03d)round)r   r   millisecondshoursminutes	separatorhours_markerr	   r	   r
   format_timestamp
   s   r   c                   @   sH   e Zd ZdddZdddZdd
dZdddZdddZdddZdS )SubtitlesProcessor-      Fc                 C   s\   t || _tt|| _|| _|| _|| _|| _|| _	g d}| j|v r,d| _d| _d S d S )N)thlomykmamkojazhtitateknmlhinemrarfaurkar      )
r   commasetr   conjunctionssegmentslangmax_line_lengthmin_char_length_splitterr   )selfr5   r6   r7   r8   r   complex_script_languagesr	   r	   r
   __init__"   s   


zSubtitlesProcessor.__init__Nc                 C   sh  d}|dkod||d  v }|t |d k od||d  v }|rl||d  d || d< |r<||d  d || d< d S |rV|||d  d  dkrL|n|d || d< d S || d t || d |  || d< d S |r||d  d t || d |  || d< ||d  d || d< d S |r|d || d< |d || d< d S d|| d< d|| d< d S )Ng      ?r   end   startr   word)len)r9   wordsinext_segment_start_timekhas_prev_endhas_next_startr	   r	   r
   estimate_timestamp_for_word/   s$    0,,z.SubtitlesProcessor.estimate_timestamp_for_wordTc           	      C   s   g }t | jD ]V\}}|d t| jk r| j|d  d nd }|r2| ||}|| ||| q|d }t |D ]\}}d|vsFd|vrM| ||| q:||d |d |d d q|S )Nr=   r>   rA   r<   textr>   r<   rH   )	enumerater5   r@   determine_advanced_split_pointsextend$generate_subtitles_from_split_pointsrG   append)	r9   advanced_splitting	subtitlesrB   segmentrC   split_pointsrA   r?   r	   r	   r
   process_segmentsL   s"   (
z#SubtitlesProcessor.process_segmentsc                    s  g }d}d}| d|d  | jdv rdnd t fddD }|}tD ]\}}	t|	tr7|	d n|	}
t|
  }||7 }||8 }|| }t|	tr_d	|	vsXd
|	vr_| || || j	krt
|| d }|| jkr|| |d }t fddt||d D }q*|
| jr|| jkr|| jkr|| |d }d}q*|
 | jv r|| jkr|| jkr||d  |}|}q*|S )Nr   rA   rH   r$   r#   r=   c                 3   s2    | ]}t |trt|d  nt|  V  qdS r?   N
isinstancedictr@   .0r?   )	add_spacer	   r
   	<genexpr>k   s   0 zESubtitlesProcessor.determine_advanced_split_points.<locals>.<genexpr>r?   r>   r<      c                 3   s>    | ]}t | trt| d  nt|   V  qdS rU   rV   )rZ   jr[   rA   r	   r
   r\   ~   s   < )getsplitr6   sumrJ   rW   rX   r@   rG   r7   r   r8   rN   rangeendswithr2   lowerr4   )r9   rQ   rC   rR   last_split_point
char_counttotal_char_countchar_count_afterrB   r?   	word_textword_lengthchar_count_beforemidpointr	   r_   r
   rK   c   s@   


" 
"z2SubtitlesProcessor.determine_advanced_split_pointsc              	   C   s  g }| d|d  }t|}|d |d  }|d }| jdvr#dnd}	d}
|D ]s}||
|d	  }t|}t|d trg|d d }|d
 d }|d	 t|k rZ||d	  d nd }|rf|| dkrf|}n|	| }|| | }|}|| }||7 }|||t|d ts|n	|	dd |D d |d	 }
q)|
t|k r||
d  }t|}t|d tr|d d }|d
 d }n|	| }|| | }|}|| }|r|| dkr|}|||d ur|n|d t|d ts|n	|	dd |D d |S )NrA   rH   r<   r>   rT     r   r=   g?c                 s       | ]}|d  V  qdS rU   r	   rY   r	   r	   r
   r\          zJSubtitlesProcessor.generate_subtitles_from_split_points.<locals>.<genexpr>rI   c                 s   rq   rU   r	   rY   r	   r	   r
   r\      rr   )	r`   ra   r@   r6   rW   rX   joinstriprN   )r9   rQ   rR   next_start_timerP   rA   total_word_count
total_timeelapsed_timeprefix	start_idxsplit_pointfragment_wordscurrent_word_count
start_timeend_timenext_start_time_for_wordfragmentcurrent_durationr	   r	   r
   rM      s\   $$
$z7SubtitlesProcessor.generate_subtitles_from_split_pointssubtitles.srtc                 C   s   |  |}dd }t|dddG}| jr|d |rCt|dD ],\}}t|d | j}t|d	 | j}	|d
  }
|||||	|
 q W d    t|S W d    t|S 1 sZw   Y  t|S )Nc                 S   s8   |  | d |  | d| d |  |d  d S )N
z --> z

)write)fileidxr~   r   rH   r	   r	   r
   write_subtitle   s   z/SubtitlesProcessor.save.<locals>.write_subtitlewzutf-8)encodingzWEBVTT

r=   r>   r<   rH   )rS   openr   r   rJ   r   rt   r@   )r9   filenamerO   rP   r   r   r   subtitler~   r   rH   r	   r	   r
   save   s&   



zSubtitlesProcessor.save)r   r   F)N)T)r   T)	__name__
__module____qualname__r;   rG   rS   rK   rM   r   r	   r	   r	   r
   r   !   s    




*@r   )F)	r   whisperx.conjunctionsr   r   r   floatboolr   r   r	   r	   r	   r
   <module>   s
    