o
    ߥiv                     @   sz  d dl Z d dlZd dlZd dlZd dlmZmZmZ d dlZd dl	Zd dl
Z
d dlmZ d dlmZmZmZmZmZ d dlmZ ddlmZ ddlmZ zd d	lmZ ejZW n eyc   ejZY nw eje
jejd
k rved ddgZ dd Z!dd Z"e
j#$ rdndddfde%dee%e
j&f de'de%fddZ(		d dee%ee% f de)de'dee
j*e
j+f fddZ,dS )!    N)AnyListUnion)Image)
CenterCropCompose	NormalizeResizeToTensor)tqdm   )build_model)SimpleTokenizer)InterpolationModez1.7.1z.PyTorch version 1.7.1 or higher is recommendedloadtokenizec                 C   s
   |  dS )NRGB)convert)image r   f/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/cv/text_driven_segmentation/clip.py_convert_image_to_rgb"   s   
r   c                 C   s&   t t| tdt| tt tddgS )N)interpolation)g3<4'?gwgM?gy{ ?)gB91?gwt.?g	U?)r   r	   BICUBICr   r   r
   r   )n_pxr   r   r   
_transform&   s   
r   cudacpuFnamedevicejitrootc           
         s  |st   }t dkr|  |t|jjfS tjj	 fddg d}dd |j
dD d fd	d
}|| ||j ||j t dkrtjj	dd g d}t|j
d d }| fdd}	||	 |	|j |	|j |  |t|j fS )Nr   c                      s   t g t  S N)torchonestor   r   )r   r   r   <lambda>?   s    zload.<locals>.<lambda>)example_inputsc                 S   s   g | ]
}d t |v r|qS )Device)repr).0nr   r   r   
<listcomp>@   s
    zload.<locals>.<listcomp>prim::Constantc                    s   zt | dr
| jgng }W n ty   g }Y nw t | dr%|| jj |D ]}|dD ]}d| v rDt|d drD|	  q.q'd S )Ngraphforward1r-   valuer   )
hasattrr/   RuntimeErrorappendr0   findAllNodesattributeNamesstr
startswithcopyAttributes)modulegraphsr/   node)device_noder   r   patch_deviceE   s$   

zload.<locals>.patch_devicec                   S   s   t g  S r"   )r#   r$   floatr   r   r   r   r&   [   s    aten::tor   c                    s   zt | dr
| jgng }W n ty   g }Y nw t | dr%|| jj |D ](}|dD ] }t| }dD ]}||  d dkrM||  	  q8q.q'd S )Nr/   r0   r@   )r      r1      )
r2   r/   r3   r4   r0   r5   listinputsr<   r9   )r:   r;   r/   r<   rD   i)
float_noder   r   patch_float_   s"   
zload.<locals>.patch_float)r   r%   r7   r?   r   visualinput_resolutionr#   r    tracer/   r5   applyencode_imageencode_textrC   findNoderD   r<   item)
r   r   r    r!   modeldevice_holderr>   float_holderfloat_inputrG   r   )r   r=   rF   r   r   1   s:   






M   textscontext_lengthtruncatereturnc                    s   t |tr|g} jd  jd  fdd|D }tjtjtjdk r6tjt	||tj
d}ntjt	||tjd}t|D ]0\}}t	||krh|r\|d| }|d< ntd	||  d
| t|||dt	|f< qE|S )a  
    Returns the tokenized representation of given input string(s)

    Parameters
    ----------
    texts : Union[str, List[str]]
        An input string or a list of input strings to tokenize

    context_length : int
        The context length to use; all CLIP models use 77 as the context length

    truncate: bool
        Whether to truncate the text in case its encoding is longer than the context length

    Returns
    -------
    A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length].
    We return LongTensor when torch version is <1.8.0, since older index_select requires indices to be long.
    z<|startoftext|>z<|endoftext|>c                    s"   g | ]}g  | g qS r   )encode)r*   text
_tokenizer	eot_token	sot_tokenr   r   r,      s    ztokenize.<locals>.<listcomp>z1.8.0)dtypeNr.   zInput z  is too long for context length )
isinstancer7   encoder	packagingversionparser#   __version__zeroslenlongint	enumerater3   tensor)r\   rU   rV   rW   
all_tokensresultrE   tokensr   r[   r   r   z   s0   




)rT   F)-hashlibosurllibwarningstypingr   r   r   rb   packaging.versionr#   PILr   torchvision.transformsr   r   r   r	   r
   r   rP   r   simple_tokenizerr   
_Tokenizerr   r   ImportErrorrc   rd   re   warn__all__r   r   r   is_availabler7   r   boolr   ri   	IntTensor
LongTensorr   r   r   r   r   <module>   sf   




L