o
    eiS                     @   sZ   d Z ddlmZmZ ddlZddlZddlmZ ddlm	Z	 ddl
mZ G dd dZdS )	zGWrappers for Flair embedding classes

Authors
* Sylvain de Langen 2024
    )ListUnionNSentence)
Embeddings)fetchc                   @   s   e Zd ZdZdeddfddZe		d	 dd	d
Ze	dfde
ee eee  f dejdejfddZdedejfddZdS )FlairEmbeddingsz
    Simple wrapper for generic Flair embeddings.

    Arguments
    ---------
    embeddings : Embeddings
        The Flair embeddings object. If you do not have one initialized, use
        :meth:`~FlairEmbeddings.from_hf` instead.
    
embeddingsreturnNc                 C   s
   || _ d S )N)r	   )selfr	    r   g/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/speechbrain/lobes/models/flair/embeddings.py__init__   s   
zFlairEmbeddings.__init__./model_checkpoints	model.binc                 O   sD   |d | dd d }tt|||d}t| |g|R i |S )a  Fetches and load flair embeddings according to the
        :func:`speechbrain.utils.fetching.fetch` semantics. Embedding files will
        be saved into a unique subdirectory in `save_path`.

        Arguments
        ---------
        embeddings_class : class
            The class to use to initialize the model, e.g. `FastTextEmbeddings`.
        source : str
            The location of the model (a directory or HF repo, for instance).
        save_path : str, optional
            The saving location for the model (i.e. the root for the download or
            symlink location).
        filename : str, optional
            The filename of the model. The default is the usual filename for
            this kind of model.
        *args
            Extra positional arguments to pass to the flair class constructor
        **kwargs
            Extra keyword arguments to pass to the flair class constructor

        Returns
        -------
        FlairEmbeddings
        z/flair-emb--/z--)savedir)replacestrr   r   )embeddings_classsource	save_pathfilenameargskwargstarget
local_pathr   r   r   from_hf   s   $zFlairEmbeddings.from_hf)   inputs
pad_tensorc                    s   t |tr	tddd |D }| j| tj| jj	
ddd |D }tdd |D   fdd|D }t|S )	aS  Extract embeddings for a batch of sentences.

        Arguments
        ---------
        inputs : list of sentences (str or list of tokens)
            Sentences to embed, in the form of batches of lists of tokens
            (list of str) or a str.
            In the case of token lists, tokens do *not* need to be already
            tokenized for this specific sequence tagger. However, a token may be
            considered as a single word.
            Similarly, out-of-vocabulary handling depends on the underlying
            embedding class.
        pad_tensor : torch.Tensor, optional
            What embedding tensor (of shape `[]`, living on the same device as
            the embeddings to insert as padding.

        Returns
        -------
        torch.Tensor
            Batch of shape `[len(inputs), max_len, embed_size]`
        z.Expected a list of sentences, not a single strc                 S   s   g | ]}t |qS r   r   .0sentencer   r   r   
<listcomp>e   s    z,FlairEmbeddings.__call__.<locals>.<listcomp>r   c                 S   s    g | ]}t d d |D qS )c                 S   s   g | ]}|j qS r   )	embedding)r"   tokenr   r   r   r$   o   s    z7FlairEmbeddings.__call__.<locals>.<listcomp>.<listcomp>)torchstackr!   r   r   r   r$   n   s    c                 s   s    | ]}| d V  qdS )r   N)sizer"   embr   r   r   	<genexpr>r   s    z+FlairEmbeddings.__call__.<locals>.<genexpr>c              
      s0   g | ]}t j| |d  dgd dqS )r   r   )dim)r'   catrepeatr)   r*   longest_embr    r   r   r$   s   s    )
isinstancer   
ValueErrorr	   embedtoflairdevicebroadcast_toembedding_length	unsqueezemaxr'   r(   )r   r   r    	sentencessentence_embsr   r0   r   __call__G   s$   

zFlairEmbeddings.__call__wordc                 C   s   | |gddddf S )a:  Embeds a single word.

        Arguments
        ---------
        word : str
            Word to embed. Out-of-vocabulary handling depends on the underlying
            embedding class.

        Returns
        -------
        torch.Tensor
            Embedding for a single word, of shape `[embed_size]`
        r   Nr   )r   r?   r   r   r   
embed_word{   s   zFlairEmbeddings.embed_word)r   r   )r
   r   )__name__
__module____qualname____doc__r   r   staticmethodr   r'   zerosr   r   r   Tensorr>   r@   r   r   r   r   r      s$    
*
4r   )rD   typingr   r   r6   r'   
flair.datar   flair.embeddingsr   speechbrain.utils.fetchingr   r   r   r   r   r   <module>   s    