o
    ei                     @   s   d Z ddlZdd ZdS )z@
Utilities for word embeddings

Authors
* Artem Ploujnikov 2021
    Nc                 C   s   ||k}|j dd}t| d|d| d| j}||d  }tt||D ]$\}\}	}
| ||	f ||< d|||
dddf< d|||| ddf< q.|S )af  Expands word embeddings to a sequence of character
    embeddings, assigning each character the word embedding
    of the word to which it belongs

    Arguments
    ---------
    emb: torch.Tensor
        a tensor of word embeddings
    seq: torch.Tensor
        a tensor of character embeddings
    seq_len: torch.Tensor
        a tensor of character embedding lengths
    word_separator: torch.Tensor
        the word separator being used

    Returns
    -------
    char_word_emb: torch.Tensor
        a combined character + word embedding tensor

    Example
    -------
    >>> import torch
    >>> emb = torch.tensor(
    ...     [[[1., 2., 3.],
    ...       [3., 1., 2.],
    ...       [0., 0., 0.]],
    ...      [[1., 3., 2.],
    ...       [3., 2., 1.],
    ...       [2., 3., 1.]]]
    ... )
    >>> seq = torch.tensor(
    ...     [[1, 2, 0, 2, 1, 0],
    ...      [1, 0, 1, 2, 0, 2]]
    ... )
    >>> seq_len = torch.tensor([4, 5])
    >>> word_separator = 0
    >>> expand_to_chars(emb, seq, seq_len, word_separator)
    tensor([[[1., 2., 3.],
             [1., 2., 3.],
             [0., 0., 0.],
             [3., 1., 2.],
             [3., 1., 2.],
             [0., 0., 0.]],
    <BLANKLINE>
            [[1., 3., 2.],
             [0., 0., 0.],
             [3., 2., 1.],
             [3., 2., 1.],
             [0., 0., 0.],
             [2., 3., 1.]]])
    )dimr   N)	cumsumtorchzerossizetodeviceint	enumeratezip)embseqseq_lenword_separatorword_boundarieswordschar_word_embseq_len_idxidxitemitem_length r   V/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/speechbrain/wordemb/util.pyexpand_to_chars   s   5 r   )__doc__r   r   r   r   r   r   <module>   s    