o
    i                     @   sR   d Z ddlZddlZddlZddlmZ ddlmZ G dd dejZ	dd Z
dS )	a  
This script is used to provide utility functions designed for multi-speaker ASR.

Copyright 2017 Johns Hopkins University (Shinji Watanabe)
 Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)

Most functions can be directly used as in asr_utils.py:
    CompareValueTrigger, restore_snapshot, adadelta_eps_decay, chainer_load,
    torch_snapshot, torch_save, torch_resume, AttributeDict, get_model_conf.

    N)	extension)parse_hypothesisc                   @   sJ   e Zd ZdZdddZdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dS )PlotAttentionReporta6  Plot attention reporter.

    Args:
        att_vis_fn (espnet.nets.*_backend.e2e_asr.calculate_all_attentions):
            Function of attention visualization.
        data (list[tuple(str, dict[str, dict[str, Any]])]): List json utt key items.
        outdir (str): Directory to save figures.
        converter (espnet.asr.*_backend.asr.CustomConverter):
            CustomConverter object. Function to convert data.
        device (torch.device): The destination device to send tensor.
        reverse (bool): If True, input and output length are reversed.

    Fc                 C   sL   || _ t|| _|| _|| _|| _|| _tj	
| js$t| j dS dS )zInitialize PlotAttentionReport.N)
att_vis_fncopydeepcopydataoutdir	converterdevicereverseospathexistsmakedirs)selfr   r   r	   r
   r   r    r   L/home/ubuntu/.local/lib/python3.10/site-packages/espnet/asr/asr_mix_utils.py__init__(   s   zPlotAttentionReport.__init__c                 C   sn   |   }t|D ],\}}t|D ]#\}}d| j| j| d |d f }| |||}| ||| qqdS )z&Plot and save imaged matrix of att_ws.z&%s/%s.ep.{.updater.epoch}.output%d.pngr      N)get_attention_weights	enumerater	   r   get_attention_weight_plot_and_save_attentionformat)r   trainer	att_ws_sdnsatt_wsidxatt_wfilenamer   r   r   __call__3   s   zPlotAttentionReport.__call__c           	      C   sp   |   }t|D ]-\}}t|D ]$\}}| |||}| |}|d| j| d  | | |  qqdS )z3Add image files of attention matrix to tensorboard.z%sr   N)r   r   r   draw_attention_plot
add_figurer   gcfclf)	r   loggerstepr   r   r   r   r    plotr   r   r   log_attentions@   s   
 
z"PlotAttentionReport.log_attentionsc                 C   s(   |  | j | jg| j}| j| }|S )al  Return attention weights.

        Returns:
            arr_ws_sd (numpy.ndarray): attention weights. It's shape would be
                differ from bachend.dtype=float
                * pytorch-> 1) multi-head case => (B, H, Lmax, Tmax). 2)
                  other case => (B, Lmax, Tmax).
                * chainer-> attention weights (B, Lmax, Tmax).

        )r
   	transformr   r   r   )r   batchr   r   r   r   r   J   s   
z)PlotAttentionReport.get_attention_weightsc                 C   s   | j r&t| j| d d d d d }t| j| d d | d d }n"t| j| d d | d d }t| j| d d d d d }t|jdkr^|ddd|d|f }|S |d|d|f }|S )z5Transform attention weight in regard to self.reverse.r   inputr   shapeoutput   N)r   intr   lenr.   )r   r   r    spkr_idxdec_lenenc_lenr   r   r   r   Y   s   "$""z(PlotAttentionReport.get_attention_weightc                 C   s   ddl }|d ddlm} t|jdkr;t|dD ]\}}|dt|| |j|dd |	d |
d	 qn|j|dd |	d |
d	 |  |S )
zVisualize attention weights matrix.

        Args:
            att_w(Tensor): Attention weight matrix.

        Returns:
            matplotlib.pyplot: pyplot object with attention matrix image.

        r   NAggr0   r   auto)aspectzEncoder IndexzDecoder Index)
matplotlibusematplotlib.pyplotpyplotr2   r.   r   subplotimshowxlabelylabeltight_layout)r   r    r9   plthawr   r   r   r#   g   s   




z'PlotAttentionReport.draw_attention_plotc                 C   s    |  |}|| |  d S )N)r#   savefigclose)r   r    r!   rB   r   r   r   r      s   

z,PlotAttentionReport._plot_and_save_attentionN)F)__name__
__module____qualname____doc__r   r"   r*   r   r   r#   r   r   r   r   r   r      s    

r   c                 C   s   t  }| d |d< t|}g |d< t|D ]a}g }|| }t|dD ]L\}}	t|	|\}
}}}t | d |  }|d  d| 7  < |
|d< ||d< ||d< ||d	< || |dkrntd
|d   td|d   q"|d | q|S )aW  Add N-best results to json.

    Args:
        js (dict[str, Any]): Groundtruth utterance dict.
        nbest_hyps_sd (list[dict[str, Any]]):
            List of hypothesis for multi_speakers (# Utts x # Spkrs).
        char_list (list[str]): List of characters.

    Returns:
        dict[str, Any]: N-best results added utterance dict.

    utt2spkr/   r   namez[%d]rec_text	rec_tokenrec_tokenidscorezgroundtruth: %stextzprediction : %s)	dictr2   ranger   r   itemsappendlogginginfo)jsnbest_hyps_sd	char_listnew_js	num_spkrsr   tmp_js
nbest_hypsnhyprM   rN   rO   rP   out_dicr   r   r   add_results_to_json   s,   
rb   )rJ   r   rV   r   chainer.trainingr   espnet.asr.asr_utilsr   	Extensionr   rb   r   r   r   r   <module>   s   p