o
    ߥiA                     @   s  d Z ddlmZ ddlZddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ dZedeG dd deZG dd dee
Zedeejej ej!dG dd deZ"dS )zTPyTorch Space model. mainly copied from :module:`~transformers.modeling_xlm_roberta`    )DictN)nn)CrossEntropyLoss)add_start_docstrings)PreTrainedModel)Models)Model
TorchModel)Tensor)MODELS)SbertForMaskedLM
SbertModelSbertPreTrainedModel)Tasks   )SpaceConfigaK  

    This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic
    methods the library implements for all its model (such as downloading or saving, resizing the input embeddings,
    pruning heads etc.)

    This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module)
    subclass. Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to
    general usage and behavior.

    Parameters:
        config ([`SpaceConfig`]): Model configuration class with all the parameters of the
            model. Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model
            weights.
zThe bare Space Model transformer outputting raw hidden-states without any specific head on top. It is identical with the Bert Model from Transformersc                   @   s   e Zd ZdZeZdS )
SpaceModelz
    This class overrides [`SbertModel`]. Please check the superclass for the appropriate
    documentation alongside usage examples.
    N)__name__
__module____qualname____doc__r   config_class r   r   e/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/nlp/space/dialog_state_tracking.pyr   6   s    r   c                       sJ   e Zd ZdZeZdZdZdgZ fddZ	dd Z
e fd	d
Z  ZS )SpacePreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    bertTposition_idsc                    s*   t  j|jfi | t t| | d S )N)super__init__name_or_pathr   )selfconfigkwargs	__class__r   r   r   O   s   zSpacePreTrainedModel.__init__c                 C   s   t |tjr |jjjd| jjd |jdur|jj	  dS dS t |tj
rC|jjjd| jjd |jdurA|jj|j 	  dS dS t |tjrX|jj	  |jjd dS dS )zInitialize the weights        )meanstdN      ?)
isinstancer   Linearweightdatanormal_r!   initializer_rangebiaszero_	Embeddingpadding_idx	LayerNormfill_)r    moduler   r   r   _init_weightsS   s$   

z"SpacePreTrainedModel._init_weightsc                    sP   | dd}|du rtdi |}| |}|S i }tt| jdd|i|}|S )aU  Instantiate the model.

        @param kwargs: Input args.
                    model_dir: The model dir used to load the checkpoint and the label information.
                    num_labels: An optional arg to tell the model how many classes to initialize.
                                    Method will call utils.parse_label_mapping if num_labels is not input.
                    label2id: An optional label2id mapping, which will cover the label2id in configuration (if exists).

        @return: The loaded model, which is initialized by transformers.PreTrainedModel.from_pretrained
        	model_dirNpretrained_model_name_or_pathr   )popr   r   r   from_pretrained)clsr"   r7   r!   modelmodel_kwargsr#   r   r   _instantiatee   s   z!SpacePreTrainedModel._instantiate)r   r   r   r   r   r   base_model_prefixsupports_gradient_checkpointing_keys_to_ignore_on_load_missingr   r6   classmethodr>   __classcell__r   r   r#   r   r   D   s    r   z
    Space Model transformer with Dialog state tracking heads on top (a inform projection
    layer with a dialog state layer and a set of slots including history infromation from
    previous dialog) e.g. for multiwoz2.2 tasks.
    )module_namec                       s\   e Zd Z fddZdeeef deeef fddZ										d
dd	Z  Z	S )SpaceForDSTc              	      sr  t t| | |j| _|j| _|j| _|j	| _
|j| _|j| _|j| _|j| _d| jv r5| jd| _nd| _t|| _t|j| _t|j| _| jr^| dtt| jt| j | jrq| dtt| jt| j t| j| j| j  }| jD ]3}| d| t|j | | j | d| t|j d | d| t|j | t| jd	  q| !  d S )
Nreferinform_projectionds_projectionclass_token_   refer_r   )"r   rE   r   dst_slot_list	slot_listdst_class_typesclass_typesdst_class_labelsclass_labelsdst_token_loss_for_nonpointabletoken_loss_for_nonpointabledst_refer_loss_for_nonpointablerefer_loss_for_nonpointabledst_class_aux_feats_informclass_aux_feats_informdst_class_aux_feats_dsclass_aux_feats_dsdst_class_loss_ratioclass_loss_ratioindexrefer_indexr   r   r   Dropoutdst_dropout_ratedropoutdst_heads_dropout_ratedropout_heads
add_moduler*   lenhidden_sizeinit_weights)r    r!   aux_dimsslotr#   r   r   r      sR   



zSpaceForDST.__init__inputreturnc                    s  ddl }ddl}|d }|d  |d } fdd|d D }|||d	kd }| jjD ]}|D ]}	d|| |	< q3q/| } |d |d
 |d |d |d |d |d ||d d	}
 fdd|d D } fdd|d D } fdd|d D } fdd|d D }| jdi |
}| jjD ] }|d | d
d
 }t	|D ]\}	}|dkr||| |	< qqW d   n1 sw   Y  |
|||||d|d dS )a  return the result by the model

        Args:
            input (Dict[str, Tensor]): the preprocessed data

        Returns:
            Dict[str, Tensor]: results
                Example:
                    {
                        'inputs': dict(input_ids, input_masks,start_pos), # tracking states
                        'outputs': dict(slots_logits),
                        'unique_ids': str(test-example.json-0), # default value
                        'input_ids_unmasked': array([101, 7632, 1010,0,0,0])
                        'values': array([{'taxi-leaveAt': 'none', 'taxi-destination': 'none'}]),
                        'inform':  array([{'taxi-leaveAt': 'none', 'taxi-destination': 'none'}]),
                        'prefix': str('final'), #default value
                        'ds':  array([{'taxi-leaveAt': 'none', 'taxi-destination': 'none'}])
                    }

        Example:
            >>> from modelscope.hub.snapshot_download import snapshot_download
            >>> from modelscope.models.nlp import SpaceForDST
            >>> from modelscope.preprocessors import DialogStateTrackingPreprocessor
            >>> cache_path = snapshot_download('damo/nlp_space_dialog-state-tracking')
            >>> model = SpaceForDST.from_pretrained(cache_path)
            >>> preprocessor = DialogStateTrackingPreprocessor(model_dir=cache_path)
            >>> print(model(preprocessor({
                    'utter': {
                        'User-1': "Hi, I'm looking for a train that is going"
                            "to cambridge and arriving there by 20:45, is there anything like that?"
                    },
                    'history_states': [{}]
                })))
        r   Nbatchfeatures
diag_statec                    s$   g | ]} |   jd d qS )-rL   )itemguidsplit.0irn   r   r   
<listcomp>   s   $ z'SpaceForDST.forward.<locals>.<listcomp>	   0r   rL                  )		input_ids
input_masksegment_ids	start_posend_posinform_slot_idrefer_idro   class_label_idc                       g | ]	} |   jqS r   )rq   rr   rt   rw   r   r   rx          c                    r   r   )rq   valuesrt   rw   r   r   rx      r   c                    r   r   )rq   input_ids_unmaskedrt   rw   r   r   rx      s    c                    r   r   )rq   informrt   rw   r   r   rx      r   finalds)inputsoutputs
unique_idsr   r   r   prefixr   r   )
numpytorchwherearrayr!   rN   no_grad_forwardmax	enumerate)r    rk   npr   rm   ro   	turn_itrsreset_diag_staterj   rv   r   r   r   r   r   r   updatesur   rw   r   forward   s^   #

zSpaceForDST.forwardNc           *      C   sp  | j |||||d}|j}|j}| |}| |}|d ur*tt| d }|d ur?t	tt| d dd}d}i }i }i }i }i }| j
D ]X}| jrg| jrgt|| || |fd}n | jrvt|| |fd}n| jrt|| |fd}n|}| t| d| |}| t| d| |}|jddd	\}}|d}|d}| t| d
| |}|||< |||< |||< |||< |
d ur|d ur|d ur|	d urt||  dkr|| d||< t||  dkr	|| d||< |d}|| d| || d| tdd}td|d} tdd}!| ||| }"| ||| }#|"|# d }$|| dk }%| jsR|$|%9 }$|!||	| }&t|
| | j }'| jsl|&|'9 }&|||
| }(| jdkr| j|( d| j d |$  d| j d |&  })n| j|( d| j |$  })||) 7 }|)||< qN|f|||||f |jf }|S )N)attention_masktoken_type_idsr   	head_maskr   r%   r(   r   rJ   rK   rG   )dimrM   none)	reduction)r   ignore_indexg       @rL   )r   last_hidden_statepooler_outputrb   r   stacklistr   floatclamprO   rY   r[   catrH   rI   rd   getattrrs   squeezerf   sizeclamp_r   rU   eqr_   rW   r]   sumembedding_output)*r    r   r   r   r   r   r   r   r   r   r   ro   r   sequence_outputpooled_outputinform_labelsdiag_state_labels
total_lossper_slot_per_example_lossper_slot_class_logitsper_slot_start_logitsper_slot_end_logitsper_slot_refer_logitsrj   pooled_output_auxclass_logitstoken_logitsstart_logits
end_logitsrefer_logitsignored_indexclass_loss_fcttoken_loss_fctrefer_loss_fct
start_lossend_loss
token_losstoken_is_pointable
refer_losstoken_is_referrable
class_lossper_example_lossr   r   r   r     s   




(




zSpaceForDST._forward)
NNNNNNNNNN)
r   r   r   r   r   strr
   r   r   rC   r   r   r#   r   rE   }   s    ".YrE   )#r   typingr   r   r   torch.nnr   transformers.file_utilsr   transformers.modeling_utilsr   modelscope.metainfor   modelscope.modelsr   r	   modelscope.models.baser
   modelscope.models.builderr    modelscope.models.nlp.structbertr   r   r   modelscope.utils.constantr   configurationr   SPACE_START_DOCSTRINGr   r   register_moduletask_oriented_conversation	space_dstrE   r   r   r   r   <module>   s:   	9