o
    wi&A                     @   s   d dl Z d dlmZ d dlmZ d dlmZmZ d dlZ	d dl
Z
d dlmZ d dlmZ d dlmZmZ d dl
mZ d d	lmZ d d
lmZ d dlmZ dgZG dd dZG dd deZdS )    N)defaultdict)partial)ListOptional)CutSet)MixedCut)
DictConfig	OmegaConf)nn)PromptedAudioToTextMiniBatch)WER)SerializationMultiTaskMetricc                   @   s   e Zd ZdZdZdZdefddZedd Z	edd	 Z
ed
d Zedd Zedd Zedd Zedd Zedd Zedd Zdd Zdee fddZdS )ConstraintParserz5Boolean Parser class for constraint passing in configN
constraintc                 C   s   t dd|  }|s| jS | | t|dkr|d S g }dg| dg }|rb|  }dkr[g }|rZ|  }dkrS| | }sMt	d| |
| n|
| |s7n|
| |s+| | }spt	d| |S )	Nz([()])z \1    r   ()z4Malformed subexpression find in constraint parsing: z"Parser cannot resolve constraint: )resubstripsplit_no_constraint_resolve_primitiveslenpop_resolve_boolsSyntaxErrorappend)selfr   arraystackcexprefnc r&   c/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/collections/asr/metrics/multitask.pyparse_constraint)   s2   



z!ConstraintParser.parse_constraintc                 C   s    | j d u rtjtjd| _ | j S )N)z==z!=)_primitivesoperatoreqner   r&   r&   r'   
primitivesE   s
   
zConstraintParser.primitivesc                 C   s$   | j d u r| j| j| jd| _ | j S )N)andorxor)	_booleans_logical_and_logical_or_logical_xorr-   r&   r&   r'   booleansN   s   
zConstraintParser.booleansc                 C   s   | s	t d|  | | S )Nz:Malformed subexpression find in 'not' constraint parsing: 
ValueError)r#   
propertiesr&   r&   r'   _logical_notX   s   
zConstraintParser._logical_notc                 C   s,   | r|st d|  d| | |o||S )Nz:Malformed subexpression find in 'and' constraint parsing: z and r7   l_exprr_exprr9   r&   r&   r'   r3   ^      zConstraintParser._logical_andc                 C   s,   | r|st d|  d| | |p||S )Nz9Malformed subexpression find in 'or' constraint parsing: z or r7   r;   r&   r&   r'   r4   d   r>   zConstraintParser._logical_orc                 C   s,   | r|st d|  d| | |||A S )Nz:Malformed subexpression find in 'xor' constraint parsing: z xor r7   r;   r&   r&   r'   r5   j   r>   zConstraintParser._logical_xorc                 C   s   dS )NTr&   )r9   r&   r&   r'   r   p   s   zConstraintParser._no_constraintc                 C   s   | || |S Nget)r%   keyvalr9   r&   r&   r'   _static_constraintt   s   z#ConstraintParser._static_constraintc                 C   s.   | | }d uo| | }d uo| ||S r?   r@   )r%   key1key2r9   	prop_val1	prop_val2r&   r&   r'   _compare_constraintx   s
   z$ConstraintParser._compare_constraintc                 C   s   t |D ]O\}}| j D ]E\}}d| dd| d}}t||t||}}	|	r>|	 \}
}t| j||
|||< q|rQ| \}
}t| j||
|||< q	 qqd S )Nz
\.(\S+)\s*z\s*(\S+)z
\s*\.(\S+))		enumerater.   itemsr   matchgroupsr   rI   rD   )r   r   idxr"   noentailequalmatch_entailmatch_equalrE   rF   rC   r&   r&   r'   r      s   z$ConstraintParser._resolve_primitivesc                 C   s   d}g }|t |k r/|| }|dkr t| j||d  }|d7 }|| |d7 }|t |k s
|}| j D ]7\}}d}g }|t |k rk|| }||kr\t|| ||d  }|d7 }|| |d7 }|t |k sD|}q6t |dkrvd S |d S )Nr   notr   )r   r   r:   r   r6   rK   r   )r   r   rN   r!   r"   rO   rP   r&   r&   r'   r      s4   

zConstraintParser._resolve_bools)__name__
__module____qualname____doc__r)   r2   strr(   propertyr.   r6   staticmethodr:   r3   r4   r5   r   rD   rI   r   r   r   r&   r&   r&   r'   r   #   s2    

	






r   c                       s   e Zd ZdZdejdef fddZ		dded	e	j
d
e	j
de	j
dee dee fddZd	e	j
d
e	j
de	j
de	j
de	j
de	j
defddZdddZdd Zdd Z  ZS )r   a  
    Wrapper class for managing multiple metrics in multitask ASR/NLP models.

    This class enables conditional metric computation based on sample properties stored in Lhotse cuts.
    It's primarily designed for `EncDecMultiTaskModel` but can support any model with a prompt schema.

    Key Features:
        1. **Automatic Model Integration**: Instantiated metrics are automatically added as attributes
           to the parent model, enabling seamless integration with existing logging infrastructure.

        2. **Conditional Metric Updates**: Only samples meeting specific constraints are passed to
           each metric, avoiding inappropriate metric calculations (e.g., WER for translation tasks).

        3. **Flexible Constraint System**: Supports complex logical expressions for determining
           when metrics should be applied to samples.

        4. **Configuration Inheritance**: Global configuration parameters are automatically
           inherited by all metrics unless explicitly overridden.

    Args:
        model (nn.Module): Parent model that will receive metric instances as attributes.
                          Must have a `decoding` attribute for metrics that require decoding.
        cfg (DictConfig): Configuration dictionary containing metric definitions and constraints.

    Configuration Format:
        The configuration should follow this structure:

        ``'
        # Global parameters (inherited by all metrics unless overridden)
        log_predictions: true
        batch_dim_index: 0

        # Metric definitions
        metrics:
            wer:
                _target_: nemo.collections.asr.metrics.WER  # Metric class to instantiate
                constraint: ".task == transcribe"           # When to apply this metric
                use_cer: false                              # Metric-specific parameters
            bleu:
                _target_: nemo.collections.asr.metrics.BLEU
                constraint: ".task == translate"
                bleu_tokenizer: "13a"
                n_gram: 4

        ```

    Constraint Syntax:
        Constraints are evaluated against the `custom` dictionary of Lhotse cuts:

        - **Custom attribute Access**: `.task`, `.lang`, `.domain`
        - **Comparisons**: `==`, `!=`
        - **Logical Operations**: `and`, `or`, `not`, `xor`
        - **Property Comparisons**: `.source_lang == .target_lang`

        Examples:
        - `".task == transcribe"` - Apply to transcription tasks
        - `".task == translate and .source_lang != .target_lang"` - Cross-lingual translation
        - `"not .task == other"` - Apply to all tasks except 'other'
        - `".domain == medical or .domain == legal"` - Specific domains

    Usage Example:
        ```python
        # In model initialization
        if hasattr(cfg, 'multitask_metrics'):
            self.multitask_metrics = MultiTaskMetric(self, cfg.multitask_metrics)

        # During training/validation
        if hasattr(self, 'multitask_metrics'):
            metrics = self.multitask_metrics.eval(
                batch=batch,
                predictions=predictions,
                predictions_lengths=pred_lengths,
                predictions_mask=pred_mask,
                prefix="val",
                return_all_metrics=True
            )
            self.log_dict(metrics)
        ```

    Note:
        - Each metric receives the model's `decoding` instance for text decoding operations
        - Metrics are automatically instantiated for the parent model as attributes (e.g., `model.wer`, `model.bleu`)
        - Global configuration parameters are inherited unless explicitly overridden per metric
        - Metrics defined without 'constraint' keyword are called on every prediction sample
        - Empty batches (no samples matching constraints) are handled by child metrics.
    modelcfgc                    s   t    i i | _| _t|}t }t }|d	 D ]I\}}|dd}|	 D ]\}}	||vr8|	||< q,|j
|d< t|}
t|||
 t|
}||v rUtd|| |
| j|< ||| j|< qdS )z
        Initialize MultiTaskMetric with model and configuration.

        Args:
            model (nn.Module): Parent model that will contain metric instances
            cfg (DictConfig): Configuration containing metric definitions
        metricsr    decodingzMultiTaskMetric currently only supports one instance of each metric class. Please check your configs for duplicates values of `_target_` entry.N)super__init___metric_dict_constr_dictr	   to_containerr   setr   rK   ra   r   from_config_dictsetattrtype	TypeErroraddr(   )r   r]   r^   parser
seen_typesname
metric_cfgr   kvmetricmetric_type	__class__r&   r'   rc     s2   





zMultiTaskMetric.__init__FNbatchpredictionspredictions_lengthspredictions_maskreturn_all_metricsprefixc              
   C   sX   i }| j |||j|j|t|dd |jd | | j|r | dnd|d |   |S )Nprompt)rx   ry   targetstargets_lengthsrz   	input_idscuts_r`   )r|   r{   )update
transcripttranscript_lensgetattrr   computereset)r   rw   rx   ry   rz   r{   r|   metric_dictr&   r&   r'   eval2  s$   	
	zMultiTaskMetric.evalr~   r   r   r   c              
   C   sh   |  |\}}	| j D ]%\}
}||
 |	|
 }}|j|| || || || || || |d qd S )N)rx   ry   rz   r~   r   r   r   )_split_cutsrd   rK   r   )r   rx   ry   rz   r~   r   r   r   
cuts_split	idx_splitro   rs   cuts_subsetindicesr&   r&   r'   r   N  s   zMultiTaskMetric.updater`   c           	   	   C   s   i }| j  D ]=\}}t|tu r:| \}}}|r/|| d|| d|| d|i q|| d|i q||j||d q|S )Nwerwer_num	wer_denom)r{   r|   )rd   rK   rj   r   r   r   )	r   r{   r|   output_dictro   rs   r   r   r   r&   r&   r'   r   h  s,   



zMultiTaskMetric.computec                 C   s   dd | j  D  d S )Nc                 S   s   h | ]\}}|  qS r&   )r   ).0ro   rs   r&   r&   r'   	<setcomp>  s    z(MultiTaskMetric.reset.<locals>.<setcomp>)rd   rK   r-   r&   r&   r'   r     s   zMultiTaskMetric.resetc                 C   sz   t tt t}}t|D ]+\}}t|tr|jn|}| j D ]\}}||jr7|| 	| || 	| q q||fS )a  
        Split cuts based on metric constraints and return filtered subsets.

        This method evaluates each cut against all metric constraints and creates
        separate lists of cuts and indices for each metric.

        Args:
            cuts (CutSet): Input cuts containing sample metadata

        Returns:
            tuple: (cuts_splits, idx_splits) where:
                - cuts_splits (dict): Maps metric names to lists of matching cuts
                - idx_splits (dict): Maps metric names to lists of matching indices

        Note:
            - Handles both regular cuts and MixedCuts (uses first_non_padding_cut)
            - A single cut may match multiple metrics
            - Cuts not matching any constraints are ignored
        )
r   listrJ   
isinstancer   first_non_padding_cutre   rK   customr   )r   r   cuts_splits
idx_splitsrN   r"   rs   constrr&   r&   r'   r     s   
zMultiTaskMetric._split_cuts)FN)Fr`   )rV   rW   rX   rY   r
   Moduler   rc   r   torchTensorr   boolrZ   r   r   r   r   r   r   __classcell__r&   r&   ru   r'   r      sH    W4


!)r*   collectionsr   	functoolsr   typingr   r   regexr   r   lhotser   
lhotse.cutr   	omegaconfr   r	   r
   7nemo.collections.asr.data.audio_to_text_lhotse_promptedr    nemo.collections.asr.metrics.werr   nemo.core.classesr   __all__r   r   r&   r&   r&   r'   <module>   s"    
