o
    ߥib                  	   @   sv  d dl mZmZmZmZ d dlZd dlm  mZ	 d dlmZ d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZmZ d d
lmZ ejejejdejejejdejejejdejej ejdG dd deZ!ejej"ej"dejejej"dejejej"dejejej"dejej ej"dG dd deZ#G dd dej$Z%dS )    )AnyDictListOptionalN)nn)CrossEntropyLoss)ACT2FN)Heads)	TorchHead)HEADS)'AttentionTokenClassificationModelOutputModelOutputBase
OutputKeysTokenClassificationModelOutput)Tasks)module_namec                       s>   e Zd Zd
 fdd	Z				ddefddZdd	 Z  ZS )LSTMCRFHeadd   Nc                    s@   t  j||d |d usJ t|d || _t|dd| _d S )Nhidden_size
num_labels   Tbatch_first)super__init__r   LinearffnCRFcrfselfr   r   kwargs	__class__ X/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/nlp/heads/crf_head.pyr   *   s   zLSTMCRFHead.__init__inputsc                 K   s   |  |j}td |dS )N)losslogits)r   last_hidden_stater   )r!   r'   attention_masklabel
label_maskoffset_mappingr"   r)   r%   r%   r&   forward0   s
   zLSTMCRFHead.forwardc                 C   X   | d }tj|jd |jdd d d f |d d d f k }| j||d}|S N   )devicer   	sumlongtorcharangeshaper4   r   decodesqueezer!   r)   r-   seq_lensmaskpredictsr%   r%   r&   r;   >      
zLSTMCRFHead.decode)r   NNNNN__name__
__module____qualname__r   r   r/   r;   __classcell__r%   r%   r#   r&   r   #   s    
r   c                       s<   e Zd Z fddZ				d	defddZdd Z  ZS )
TransformersCRFHeadc                    s8   t  jd||d| t||| _t|dd| _d S )Nr   Tr   r%   )r   r   r   r   linearr   r   r    r#   r%   r&   r   R   s   zTransformersCRFHead.__init__Nr'   c                 K   s   |  |j}|d urA|}|d }	t|}
t|jd D ]}|| || 	d
|	| d|
|d |	| d d f< q|
}td ||j|jdS )Nr2   r   )r(   r)   hidden_states
attentions)rI   r*   r6   r7   r8   
zeros_likeranger:   masked_select	unsqueezeviewr   rJ   rK   )r!   r'   r+   r,   r-   r.   r"   r)   r?   masked_lengthsmasked_logitsir%   r%   r&   r/   X   s(   
zTransformersCRFHead.forwardc                 C   r0   r1   r5   r=   r%   r%   r&   r;   q   rA   zTransformersCRFHead.decoderB   rC   r%   r%   r#   r&   rH   G   s    
rH   c                       s  e Zd ZdZd"dededdf fddZd#d	d
ZdefddZ			d$de
jde
jdee
j dede
jf
ddZ			d%de
jdee
j dee dee deeee   f
ddZ		d&de
jdee
j dee
j ddfddZde
jde
jde
jde
jfddZde
jde
jde
jfddZ	d'de
jde
jdee deee  fddZ	d'de
jde
jdedee deeee   f
d d!Z  ZS )(r   u  Conditional random field.
    This module implements a conditional random field [LMP01]_. The forward computation
    of this class computes the log likelihood of the given sequence of tags and
    emission score tensor. This class also has `~CRF.decode` method which finds
    the best tag sequence given an emission score tensor using `Viterbi algorithm`_.
    Args:
        num_tags: Number of tags.
        batch_first: Whether the first dimension corresponds to the size of a minibatch.
    Attributes:
        start_transitions (`~torch.nn.Parameter`): Start transition score tensor of size
            ``(num_tags,)``.
        end_transitions (`~torch.nn.Parameter`): End transition score tensor of size
            ``(num_tags,)``.
        transitions (`~torch.nn.Parameter`): Transition score tensor of size
            ``(num_tags, num_tags)``.
    .. [LMP01] Lafferty, J., McCallum, A., Pereira, F. (2001).
       "Conditional random fields: Probabilistic models for segmenting and
       labeling sequence data". *Proc. 18th International Conf. on Machine
       Learning*. Morgan Kaufmann. pp. 282–289.
    .. _Viterbi algorithm: https://en.wikipedia.org/wiki/Viterbi_algorithm

    Fnum_tagsr   returnNc                    sp   |dkrt d| t   || _|| _tt|| _	tt|| _
tt||| _|   d S )Nr   zinvalid number of tags: )
ValueErrorr   r   rT   r   r   	Parameterr8   emptystart_transitionsend_transitionstransitionsreset_parameters)r!   rT   r   r#   r%   r&   r      s   
zCRF.__init__c                 C   s:   t j| jdd t j| jdd t j| jdd dS )zInitialize the transition parameters.
        The parameters will be initialized randomly from a uniform distribution
        between -0.1 and 0.1.
        gg?N)r   inituniform_rY   rZ   r[   r!   r%   r%   r&   r\      s   zCRF.reset_parametersc                 C   s   | j j d| j dS )Nz
(num_tags=))r$   rD   rT   r_   r%   r%   r&   __repr__   s   zCRF.__repr__mean	emissionstagsr?   	reductionc                 C   s   |dvrt d| |du rtj|tj|jd}|jtjkr#| }| j|||d | jr@|	dd}|	dd}|	dd}| 
|||}| ||}|| }|dkrW|S |d	kr_| S |d
krg| S | |   S )a  Compute the conditional log likelihood of a sequence of tags given emission scores.
        Args:
            emissions (`~torch.Tensor`): Emission score tensor of size
                ``(seq_length, batch_size, num_tags)`` if ``batch_first`` is ``False``,
                ``(batch_size, seq_length, num_tags)`` otherwise.
            tags (`~torch.LongTensor`): Sequence of tags tensor of size
                ``(seq_length, batch_size)`` if ``batch_first`` is ``False``,
                ``(batch_size, seq_length)`` otherwise.
            mask (`~torch.ByteTensor`): Mask tensor of size ``(seq_length, batch_size)``
                if ``batch_first`` is ``False``, ``(batch_size, seq_length)`` otherwise.
            reduction: Specifies  the reduction to apply to the output:
                ``none|sum|mean|token_mean``. ``none``: no reduction will be applied.
                ``sum``: the output will be summed over batches. ``mean``: the output will be
                averaged over batches. ``token_mean``: the output will be averaged over tokens.
        Returns:
            `~torch.Tensor`: The log likelihood. This will have size ``(batch_size,)`` if
            reduction is ``none``, ``()`` otherwise.
        )noner6   rb   
token_meanzinvalid reduction: Ndtyper4   )rd   r?   r   r3   rf   r6   rb   )rV   r8   	ones_likeuint8r4   ri   byte	_validater   	transpose_compute_score_compute_normalizerr6   rb   float)r!   rc   rd   r?   re   	numeratordenominatorllhr%   r%   r&   r/      s*   zCRF.forwardnbestpad_tagc                 C   s   |du rd}|du rt j|jdd t j|jd}|jt jkr#| }| j||d | jr9|	dd}|	dd}|dkrG| 
|||dS | ||||S )a  Find the most likely tag sequence using Viterbi algorithm.
        Args:
            emissions (`~torch.Tensor`): Emission score tensor of size
                ``(seq_length, batch_size, num_tags)`` if ``batch_first`` is ``False``,
                ``(batch_size, seq_length, num_tags)`` otherwise.
            mask (`~torch.ByteTensor`): Mask tensor of size ``(seq_length, batch_size)``
                if ``batch_first`` is ``False``, ``(batch_size, seq_length)`` otherwise.
            nbest (`int`): Number of most probable paths for each sequence
            pad_tag (`int`): Tag at padded positions. Often input varies in length and
                the length will be padded to the maximum length in the batch. Tags at
                the padded positions will be assigned with a padding tag, i.e. `pad_tag`
        Returns:
            A PyTorch tensor of the best tag sequence for each batch of shape
            (nbest, batch_size, seq_length)
        Nr3   r   rh   )r?   r   )r8   onesr:   rk   r4   ri   rl   rm   r   rn   _viterbi_decoderO   _viterbi_decode_nbest)r!   rc   r?   ru   rv   r%   r%   r&   r;      s"   z
CRF.decodec                 C   s  |  dkrtd|   |d| jkr%td| j d|d |d urG|jd d |jkrGtdt|jd d  dt|j |d ur|jd d |jkritdt|jd d  dt|j | j or|d	  }| jo|d d d	f  }|s|std
d S d S d S )N   z(emissions must have dimension of 3, got r   z(expected last dimension of emissions is z, got z?the first two dimensions of emissions and tags must match, got z and z?the first two dimensions of emissions and mask must match, got r   z)mask of the first timestep must all be on)dimrV   sizerT   r:   tupler   all)r!   rc   rd   r?   no_empty_seqno_empty_seq_bfr%   r%   r&   rm     sD   
zCRF._validatec           
      C   s   |j \}}| }| j|d  }||dt||d f 7 }td|D ]'}|| j||d  || f ||  7 }|||t||| f ||  7 }q#| jddd }||t|f }	|| j	|	 7 }|S )Nr   r3   r{   )
r:   rq   rY   r8   r9   rM   r[   r7   r6   rZ   )
r!   rc   rd   r?   
seq_length
batch_sizescorerS   seq_ends	last_tagsr%   r%   r&   ro   "  s   
&&zCRF._compute_scorec           	      C   s   | d}| j|d  }td|D ](}|d}|| d}|| j | }tj|dd}t|| d||}q|| j7 }tj|ddS )Nr   r3   r   r   )	r|   rY   rM   rO   r[   r8   	logsumexpwhererZ   )	r!   rc   r?   r   r   rS   broadcast_scorebroadcast_emissions
next_scorer%   r%   r&   rp   B  s   


zCRF._compute_normalizerc              	   C   s  |d u rd}|j }|j\}}| j|d  }tj||| jftj|d}tj|| jftj|d}	tj||f|tj|d}
td|D ]?}|	d}|| 	d}|| j
 | }|jdd\}}t|| 	d ||}t|| 	d ||	}|||d < q?|| j }|jdd\}}| jddd }|dd }|d|ddddd| j|ddddd| j |dd }tj||ftj|d}tj|dtj|d}t|d ddD ]}t|| d|}|j|||< qt| ||
ddS )Nr   rh   r3   r   r   r2   )r4   r:   rY   r8   zerosrT   r7   fullrM   rO   r[   maxr   boolrZ   r6   rn   
contiguousscatter_rP   expandgatherdata)r!   rc   r?   rv   r4   r   r   r   history_idxoor_idxoor_tagrS   r   broadcast_emissionr   indices	end_score_end_tagr   best_tags_arr	best_tagsidxr%   r%   r&   rx   p  sb   




zCRF._viterbi_decodec              
   C   s  |d u rd}|j }|j\}}| j|d  }tj||| j|ftj|d}	tj|| j|ftj|d}
tj|||f|tj|d}td|D ]}|dkr\|	d}|| 	d}|| j
 | }n|	d}|| 	d	d}|| j
	d | }||d| jj|dd\}}|dkr|	ddd|}|| }|dd}|dd}t|| 	d 	d||}t|| 	d	d ||
}||	|d < qB|| j	d }||dj|dd\}}| jddd }|	dd }	|	d|dddddd| j||ddd|dd| j| |	dd }	tj|||ftj|d}tj|tj|ddd|d}t|d ddD ]}t|	| |dd|}|j|d| ||< q@t|	d||dddS )Nr   rh   r3   r2   r   r   )r4   r:   rY   r8   r   rT   r7   r   rM   rO   r[   rP   topkr   rn   r   r   rZ   r6   r   r   r9   r   r   permute)r!   rc   r?   ru   rv   r4   r   r   r   r   r   r   rS   r   r   r   r   r   r   r   r   r   r   r   r%   r%   r&   ry     s   	




zCRF._viterbi_decode_nbest)F)rU   N)Nrb   )NNN)NN)N)rD   rE   rF   __doc__intr   r   r\   strra   r8   Tensor
LongTensorr   
ByteTensorr/   r   r;   rm   ro   rp   FloatTensorrx   ry   rG   r%   r%   r#   r&   r   z   s    
	
5
)

 
1

ar   )&typingr   r   r   r   r8   torch.nn.functionalr   
functionalFtorch.nnr   transformers.activationsr   modelscope.metainfor	   modelscope.models.baser
   modelscope.models.builderr   modelscope.outputsr   r   r   r   modelscope.utils.constantr   register_moduletoken_classificationlstm_crfnamed_entity_recognitionword_segmentationpart_of_speechr   transformer_crfrH   Moduler   r%   r%   r%   r&   <module>   sB   *