o
    ۷i                     @   s4  d dl mZ d dlmZ d dlmZ d dlZd dlZ	d dl
Z
d dlmZ ddlmZmZ eeZe Zer:d dlZnedd	ejjd
edejjfddZd	ejjdejjdejddfddZd	ejjdejjde
jd
eddf
ddZ	ddee	jj ej B e
jB ee
j B dede
jd
ede!deddfddZ"dS )    )Iterator)Fraction)chainN)tqdm   )
get_loggeris_av_availablez`PyAV is required to use LTX 2.0 video export utilities. You can install it with `pip install av`	containeraudio_sample_ratereturnc                 C   s0   | j d|d}||j_d|j_td||j_|S )z/
    Prepare the audio stream for writing.
    aacratestereo   )
add_streamcodec_contextsample_ratelayoutr   	time_base)r	   r
   audio_stream r   [/home/ubuntu/vllm_env/lib/python3.10/site-packages/diffusers/pipelines/ltx2/export_utils.py_prepare_audio_stream(   s
   r   r   frame_inc                 C   s   |j }|jpd}|jpd}|jp|j}tjjj|||d}d}||D ]}	|	j	d u r.||	_	||	j
7 }|j|	_| ||	 q$| D ]}
| |
 qDd S )Nfltpr   )formatr   r   r   )r   r   r   r   avaudio	resamplerAudioResamplerresampleptssamplesmuxencode)r	   r   r   cctarget_formattarget_layouttarget_rateaudio_resampleraudio_next_ptsrframepacketr   r   r   _resample_audio3   s&   



r.   r#   c                 C   s   |j dkr|d d d f }|jd dkr|jd dkr|j}|jd dkr.td|j d|jtjkrCt|dd}|d tj}t	j
j| dd	  d
dd}||_t| || d S )Nr      r   z,Expected samples with 2 channels; got shape .g      g      ?g    @s16r   )r   r   )ndimshapeT
ValueErrordtypetorchint16cliptor   
AudioFramefrom_ndarray
contiguousreshapecpunumpyr   r.   )r	   r   r#   r
   r   r   r   r   _write_audioP   s    
rB   r   videofpsr   output_pathvideo_chunks_numberc                 C   s  t | tr"t | d tjjr"dd | D }tj|dd} t| } n/t | tjrQt	t
| | k| t| k}t|rG| d  d} ntd t| } t | tjrctj| |dd} t| } t| }|j\}	}
}}	tj|d	d
}|jdt|d}||_|
|_d|_|dur|du rtdt||}tt |g| |ddD ]#}|!d" }|D ]}tj#j$|dd}|%|D ]}|&| qqq|% D ]}|&| q|durt'|||| |(  dS )a$  
    Encodes a video with audio using the PyAV library. Based on code from the original LTX-2 repo:
    https://github.com/Lightricks/LTX-2/blob/4f410820b198e05074a1e92de793e3b59e9ab5a0/packages/ltx-pipelines/src/ltx_pipelines/utils/media_io.py#L182

    Args:
        video (`List[PIL.Image.Image]` or `np.ndarray` or `torch.Tensor`):
            A video tensor of shape [frames, height, width, channels] with integer pixel values in [0, 255]. If the
            input is a `np.ndarray`, it is expected to be a float array with values in [0, 1] (which is what pipelines
            usually return with `output_type="np"`).
        fps (`int`)
            The frames per second (FPS) of the encoded video.
        audio (`torch.Tensor`, *optional*):
            An audio waveform of shape [audio_channels, samples].
        audio_sample_rate: (`int`, *optional*):
            The sampling rate of the audio waveform. For LTX 2, this is typically 24000 (24 kHz).
        output_path (`str`):
            The path to save the encoded video to.
        video_chunks_number (`int`, *optional*, defaults to `1`):
            The number of chunks to split the video into for encoding. Each chunk will be encoded separately. The
            number of chunks to use often depends on the tiling config for the video VAE.
    r   c                 S   s   g | ]}t |qS r   )nparray).0framer   r   r   
<listcomp>   s    z encode_video.<locals>.<listcomp>)axis   uint8zSupplied `numpy.ndarray` does not have values in [0, 1]. The values will be assumed to be pixel values in [0, ..., 255] and will be used as is.)dimw)modelibx264r   yuv420pNz4audio_sample_rate is required when audio is providedzEncoding video chunks)totaldescr@   rgb24)r   ))
isinstancelistPILImagerG   stackr8   
from_numpyndarraylogical_and
zeros_like	ones_likeallroundastypeloggerwarningTensortensor_splititernextr4   r   openr   intwidthheightpix_fmtr6   r   r   r   r;   rA   
VideoFramer=   r%   r$   rB   close)rC   rD   r   r
   rE   rF   video_framesis_denormalizedfirst_chunk_rm   rl   r	   streamr   video_chunkvideo_chunk_cpuframe_arrayrJ   r-   r   r   r   encode_videon   sN    


ry   )r   )#collections.abcr   	fractionsr   	itertoolsr   rA   rG   	PIL.ImagerY   r8   r   utilsr   r   __name__rd   _CAN_USE_AVr   ImportErrorr	   	Containerrk   r   AudioStreamr   r<   r.   rf   rB   rX   rZ   r]   strry   r   r   r   r   <module>   sh   


$ 