o
    pi                     @   s@   d dl Z d dlmZmZmZ G dd deZG dd deZdS )    N)PreTrainedModelXLMRobertaConfigXLMRobertaModelc                       s"   e Zd ZdZd fdd	Z  ZS )MCLIPConfigzM-CLIP      c                    s"   || _ || _t jdi | d S )N )transformerDimensionsnumDimssuper__init__)selftransformerDimSizeimageDimSizekwargs	__class__r   h/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/diffusers/pipelines/kandinsky/text_encoder.pyr      s   zMCLIPConfig.__init__)r   r   )__name__
__module____qualname__
model_typer   __classcell__r   r   r   r   r      s    r   c                       s(   e Zd ZeZ fddZdd Z  ZS )MultilingualCLIPc                    s>   t  j|g|R i | t|| _tjj|j|jd| _	d S )N)in_featuresout_features)
r   r   r   transformertorchnnLinearr	   r
   LinearTransformation)r   configargsr   r   r   r   r      s
   
zMultilingualCLIP.__init__c                 C   sN   | j ||dd }||d jdd|jddd d d f  }| ||fS )N)	input_idsattention_maskr         )dim)r   	unsqueezesumr    )r   r#   r$   embsembs2r   r   r   forward   s   .zMultilingualCLIP.forward)r   r   r   r   config_classr   r,   r   r   r   r   r   r      s    r   )r   transformersr   r   r   r   r   r   r   r   r   <module>   s    	