o
    GiU                     @   sT  d dl mZ d dl mZ d dl mZ d dlZd dlZd dlZddlmZ ddl	m
Z
 G dd	 d	ejjZG d
d dejjZG dd dejjZG dd dejjZG dd dejjZG dd dejjZ		d&dede
dededededee dee defddZ		 			d'dede
ded!ed"ed#eej dee dee defd$d%ZdS )(    )List)Optional)TupleN   )Fsa)DenseFsaVecc                   @   X   e Zd Zededededejdejf
ddZedejde	d	d	d	ejf fd
dZ
d	S )_GetTotScoresFunctionfsaslog_semiringuse_double_scoresunused_scoresreturnc                 C   s2   |j ||d }|| _|| _|| _| | |S )a  Compute the total loglikes of an FsaVec.

        Args:
          fsas:
            The input FsaVec (must have 3 axes)
          log_semiring:
            True to use log semiring, false to use tropical
          use_double_scores:
            False to use float, i.e., single precision floating point,
            to compute log likes. True to use double precision.
          unused_scores:
            It is used only for backward propagation purpose.
            It should equal `fsas.scores`.

        Returns:
           The total loglike for each FSA in `fsas`.  If
          `use_double_scores==True`, its dtype is `torch.float64`; it is
          `torch.float32` otherwise.

        r   r   )_get_tot_scoresdetachr
   r   r   save_for_backward)ctxr
   r   r   r   
tot_scores r   ?/home/ubuntu/.local/lib/python3.10/site-packages/k2/autograd.pyforward    s   
z_GetTotScoresFunction.forwardtot_scores_gradNc                 C   s   | j }| j}| j}| j\}|du r8||}t|j|\}}|r*t|j||}	nt	|j||}	ddd|	fS |
||}
|rDtj}ntj}||j|
|}	ddd|	fS )a  
        Caution: this backward function uses a slightly indirect approach to
        compute the gradients.  Since the tot_scores are just computed as
        specific elements of `forward_scores`, the obvious way to get
        derivatives w.r.t. fsas.scores would be to set gradients w.r.t. the
        forward scores and then use BackpropGetForwardScores() to do the
        backprop.  But that might be a little slower than what we actually do.
        What we actually do is to compute the backward scores and use them and
        the forward scores to compute the posteriors, and let the derivs be the
        (posterior in FSA * loss_deriv w.r.t. that FSA's tot_prob).  The
        result is the same, and the underlying C++ code is simpler.
        (BackpropGetForwardScores() was added in order to compute slightly
        more difficult objective functions, that depend on the individual
        arc posteriors).
        FN)r
   r   r   saved_tensors_get_entering_arcs_k2shortest_patharcs'get_tot_scores_double_tropical_backward&get_tot_scores_float_tropical_backward_get_arc_post"get_tot_scores_double_log_backward!get_tot_scores_float_log_backward)r   r   r
   r   r   scoresentering_arcs_
ragged_intscores_gradarc_post
bprop_funcr   r   r   backwardJ   s*   
z_GetTotScoresFunction.backward__name__
__module____qualname__staticmethodr   booltorchTensorr   r   r*   r   r   r   r   r	          )r	   c                   @   r   )_GetForwardScoresFunctionr
   r   r   r   r   c                 C   2   |j ||d }|| _|| _|| _| | |S )a  Compute the forward scores of an FsaVec.

        Args:
          fsas:
            The input FsaVec (must have 3 axes)
          log_semiring:
            True to use log semiring, false to use tropical
          use_double_scores:
            False to use float, i.e., single precision floating point,
            to compute log likes. True to use double precision.
          unused_scores:
            It is used only for backward propagation purpose.
            It should equal `fsas.scores`.

        Returns:
           The total loglike for each FSA in `fsas`.  If
          `use_double_scores==True`, its dtype is `torch.float64`; it is
          `torch.float32` otherwise.

        r   )_get_forward_scoresr   r
   r   r   r   )r   r
   r   r   r   forward_scoresr   r   r   r   z   s   
z!_GetForwardScoresFunction.forwardforward_scores_gradNc              	   C   sr   | j }| j}| j}| j\}|rd }n||}| }| }|r$tjntj	}	|	|j
||||||d}
d d d |
fS )N)state_batchesleaving_arc_batchesr   r$   r7   forward_scores_deriv)r
   r   r   r   r   _get_state_batches_get_leaving_arc_batchesr   "backprop_get_forward_scores_double!backprop_get_forward_scores_floatr   )r   r8   r
   r   r   r7   r$   r9   r:   r)   r'   r   r   r   r*      s4   
	z"_GetForwardScoresFunction.backwardr+   r   r   r   r   r4   x   r3   r4   c                   @   r   )_GetBackwardScoresFunctionr
   r   r   r   r   c                 C   r5   )aH  Compute the backward scores of an FsaVec.

        Args:
          fsas:
            The input FsaVec (must have 3 axes)
          log_semiring:
            True to use log semiring, false to use tropical
          use_double_scores:
            False to use float, i.e., single precision floating point,
            to compute log likes. True to use double precision.
          unused_scores:
            It is used only for backward propagation purpose.
            It should equal `fsas.scores`.

        Returns:
          A torch.Tensor with shape equal to (num_states,)
        r   )_get_backward_scoresr   r
   r   r   r   )r   r
   r   r   r   backward_scoresr   r   r   r      s   
z"_GetBackwardScoresFunction.forwardbackward_scores_gradNc           
      C   s\   | j }| j}| j}| j\}| }| }|rtjntj}||j	|||||d}	d d d |	fS )N)r9   entering_arc_batchesr   rB   backward_scores_deriv)
r
   r   r   r   r<   _get_entering_arc_batchesr   #backprop_get_backward_scores_double"backprop_get_backward_scores_floatr   )
r   rC   r
   r   r   rB   r9   rD   r)   r'   r   r   r   r*      s,   z#_GetBackwardScoresFunction.backwardr+   r   r   r   r   r@      s    &r@   c                   @   sl   e Zd Zededededejdejdejdejfdd	Zed
ejde	dddejejejf fddZ
dS )_GetArcPostFunctionr
   r   r   r   r7   rB   r   c                 C   s.   |j ||d }|| _|| _| || |S )aq  Compute the arc-level posteriors of an FsaVec

        Args:
          fsas:
            The input FsaVec (must have 3 axes)
          log_semiring:
            True to use log semiring, false to use tropical
          use_double_scores:
            False to use float, i.e., single precision floating point,
            to compute log likes. True to use double precision.
          unused_scores:
            It is used only for backward propagation purpose.
            It should equal `fsas.scores`.
          forward_scores:
            The forward scores of the FSA, computed in a differentiable
            way by fsas.get_forward_scores(); must be provided as an
            explicit arg for backprop reasons.
          backward_scores:
            The backward scores of the FSA, computed in a differentiable
            way from fsas.get_backward_scores(); must be provided as an
            explicit arg for backprop reasons.

        Returns:
          The per-arc log-posterior for each arc in `fsas`.  If
          `use_double_scores==True`, its dtype is `torch.float64`; it is
          `torch.float32` otherwise.

        r   )r    r   r
   r   r   )r   r
   r   r   r   r7   rB   r(   r   r   r   r     s   'z_GetArcPostFunction.forwardarc_post_gradNc                 C   s\   | j }| j}| j\}}|rtjntj}| }|  }||j	||\}	}
d d d ||	|
fS )N)
r
   r   r   r   backprop_get_arc_post_doublebackprop_get_arc_post_float_get_incoming_arcsr   cloner   )r   rJ   r
   r   r7   rB   r)   incoming_arcsarc_scores_gradr8   rC   r   r   r   r*   B  s$   

z_GetArcPostFunction.backwardr+   r   r   r   r   rI     s&    1rI   c                   @   s   e Zd Ze		ddededee dedededed	e	j
d
e	j
dee dee de	j
fddZede	j
deddddddde	j
e	j
f	 fddZdS )_IntersectDensePrunedFunctionNa_fsasb_fsasout_fsasearch_beamoutput_beammin_active_statesmax_active_statesunused_scores_aunused_scores_bseqframe_idx_nameframe_idx_namer   c                 C   s  t |dksJ tj|j|j||||d\}}}t||d< |jddD ]3\}}t|tj	r5t
||}nt|tjs=J |jtjksEJ |j|ddd\}}t|d || q$| D ]\}}t|d || q\|| _|| _| ||	 d}|dur|j }|| }|j }t
|d|}|t
|d| }t|d |rJ t|d || |
dur|du r|j }|| }t|d |
rJ t|d |
| |d jS )a
  Intersect array of FSAs on CPU/GPU.

        Args:
          a_fsas:
            Input FsaVec, i.e., `decoding graphs`, one per sequence. It might
            just be a linear sequence of phones, or might be something more
            complicated. Must have either `a_fsas.shape[0] == b_fsas.dim0()`, or
            `a_fsas.shape[0] == 1` in which case the graph is shared.
          b_fsas:
            Input FSAs that correspond to neural network output.
          out_fsa:
            A list containing ONLY one entry which will be set to the
            generated FSA on return. We pass it as a list since the return
            value can only be types of torch.Tensor in the `forward` function.
          search_beam:
            Decoding beam, e.g. 20.  Smaller is faster, larger is more exact
            (less pruning). This is the default value; it may be modified by
            `min_active_states` and `max_active_states`.
          output_beam:
            Pruning beam for the output of intersection (vs. best path);
            equivalent to kaldi's lattice-beam.  E.g. 8.
          max_active_states:
            Maximum number of FSA states that are allowed to be active on any
            given frame for any given intersection/composition task. This is
            advisory, in that it will try not to exceed that but may not always
            succeed. You can use a very large number if no constraint is needed.
          min_active_states:
            Minimum number of FSA states that are allowed to be active on any
            given frame for any given intersection/composition task. This is
            advisory, in that it will try not to have fewer than this number
            active. Set it to zero if there is no constraint.
          unused_scores_a:
            It equals to `a_fsas.scores` and its sole purpose is for back
            propagation.
          unused_scores_b:
            It equals to `b_fsas.scores` and its sole purpose is for back
            propagation.
          seqframe_idx_name:
            If set (e.g. to 'seqframe'), an attribute in the output will be
            created that encodes the sequence-index and the frame-index within
            that sequence; this is equivalent to a row-index into b_fsas.values,
            or, equivalently, an element in b_fsas.shape.
          frame_idx_name:
            If set (e.g. to 'frame', an attribute in the output will be created
            that contains the frame-index within the corresponding sequence.
        Returns:
           Return `out_fsa[0].scores`.
        r   )rR   rS   rU   rV   rW   rX   r   Finclude_scoresaxisneed_value_indexesN)lenr   intersect_dense_prunedr   dense_fsa_vecr   named_tensor_attr
isinstancer1   r2   index_selectk2RaggedTensordtypeint32indexsetattrnamed_non_tensor_attr	arc_map_a	arc_map_br   scores_dim1shaperow_ids
row_splitshasattrr#   )r   rR   rS   rT   rU   rV   rW   rX   rY   rZ   r[   r\   
ragged_arcro   rp   namea_valuevaluer%   seqframe_idxnum_colsrr   fsa_idx0	frame_idxr   r   r   r   `  sV   =





z%_IntersectDensePrunedFunction.forwardout_fsa_gradc                 C   s   | j \}}| j}| j}tj|d|j|jdd}tj|j|j|jdd	 }t
||| t
|||d d d d d d d d ||d d fS Nr   F)rj   devicerequires_grad)r   ro   rp   r1   zerossizerj   r   rr   
contiguousr   	index_addviewr   r~   a_scoresb_scoresro   rp   grad_agrad_br   r   r   r*     <   
z&_IntersectDensePrunedFunction.backwardNNr,   r-   r.   r/   r   r   r   floatintr1   r2   r   strr   r   r*   r   r   r   r   rQ   ^  sB    	
rrQ   c                   @   s   e Zd Ze			ddededee dededede	j
d	e	j
d
ee	j
 dee dee de	j
fddZede	j
dedddde	j
e	j
f fddZdS )_IntersectDenseFunctionNrR   rS   rT   rV   
max_statesmax_arcsrY   rZ   
a_to_b_mapr[   r\   r   c                 C   s  t |dksJ tj|j|j|	|||d\}}}t||d< |jddD ]3\}}t|tj	r5t
||}nt|tjs=J |jtjksEJ |j|ddd\}}t|d || q$| D ]\}}t|d || q\|| _|| _| || d}|dur|j }ttttjddd	 d
k r|| }ntj||dd}|j }t
|d|}|t
|d| }t|d |rJ t|d || |
dur|du r|j }ttttjddd	 d
k r|| }ntj||dd}t|d |
rJ t|d |
| |d j S )a?  Intersect array of FSAs on CPU/GPU.

        Args:
          a_fsas:
            Input FsaVec, i.e., `decoding graphs`, one per sequence. It might
            just be a linear sequence of phones, or might be something more
            complicated. Must have number of FSAs equal to b_fsas.dim0(), if
            a_to_b_map not specified.
          b_fsas:
            Input FSAs that correspond to neural network output.
          out_fsa:
            A list containing ONLY one entry which will be set to the
            generated FSA on return. We pass it as a list since the return
            value can only be types of torch.Tensor in the `forward` function.
          output_beam:
            Pruning beam for the output of intersection (vs. best path);
            equivalent to kaldi's lattice-beam.  E.g. 8.
          unused_scores_a:
            It equals to `a_fsas.scores` and its sole purpose is for back
            propagation.
          unused_scores_b:
            It equals to `b_fsas.scores` and its sole purpose is for back
            propagation.
          a_to_b_map:
            Maps from FSA-index in a to FSA-index in b to use for it.
            If None, then we expect the number of FSAs in a_fsas to equal
            b_fsas.dim0().  If set, then it should be a Tensor with ndim=1
            and dtype=torch.int32, with a_to_b_map.shape[0] equal to the
            number of FSAs in a_fsas (i.e. a_fsas.shape[0] if
            len(a_fsas.shape) == 3, else 1); and elements
            `0 <= i < b_fsas.dim0()`.
          seqframe_idx_name:
            If set (e.g. to 'seqframe'), an attribute in the output will be
            created that encodes the sequence-index and the frame-index within
            that sequence; this is equivalent to a row-index into b_fsas.values,
            or, equivalently, an element in b_fsas.shape.
          frame_idx_name:
            If set (e.g. to 'frame', an attribute in the output will be created
            that contains the frame-index within the corresponding sequence.
        Returns:
           Return `out_fsa[0].scores`.
        r   )rR   rS   r   rV   r   r   r   Fr]   r_   N.   )r      floor)rounding_mode)!rb   r   intersect_denser   rd   r   re   rf   r1   r2   rg   rh   ri   rj   rk   rl   rm   rn   ro   rp   r   rq   tuplemapr   __version__splitdivrr   rs   rt   ru   r#   )r   rR   rS   rT   rV   r   r   rY   rZ   r   r[   r\   rv   ro   rp   rw   rx   ry   r%   rz   r{   rr   r|   r}   r   r   r   r     sf   7

"




"

z_IntersectDenseFunction.forwardr~   c                 C   s   | j \}}| j}| j}tj|dtj|jdd}tj|jtj|jdd	 }t
||| t
|||d d d d d d d ||d d d fS r   )r   ro   rp   r1   r   r   float32r   rr   r   r   r   r   r   r   r   r   r*   o  r   z _IntersectDenseFunction.backward)NNNr   r   r   r   r   r     sD    
	
ur   rR   rS   rU   rV   rW   rX   r[   r\   r   c           	      C   sl   t jjdkr| j dksJ | j |jjd d k sJ dg}t	| ||||||| j|j|| |d S )a  Intersect array of FSAs on CPU/GPU.

    Caution:
      `a_fsas` MUST be arc sorted.

    Args:
      a_fsas:
        Input FsaVec, i.e., `decoding graphs`, one per sequence. It might just
        be a linear sequence of phones, or might be something more complicated.
        Must have either `a_fsas.shape[0] == b_fsas.dim0()`, or
        `a_fsas.shape[0] == 1` in which case the graph is shared.
      b_fsas:
        Input FSAs that correspond to neural network output.
      search_beam:
        Decoding beam, e.g. 20.  Smaller is faster, larger is more exact
        (less pruning). This is the default value; it may be modified by
        `min_active_states` and `max_active_states`.
      output_beam:
         Beam to prune output, similar to lattice-beam in Kaldi.  Relative
         to best path of output.
      min_active_states:
        Minimum number of FSA states that are allowed to be active on any given
        frame for any given intersection/composition task. This is advisory,
        in that it will try not to have fewer than this number active.
        Set it to zero if there is no constraint.
      max_active_states:
        Maximum number of FSA states that are allowed to be active on any given
        frame for any given intersection/composition task. This is advisory,
        in that it will try not to exceed that but may not always succeed.
        You can use a very large number if no constraint is needed.
      seqframe_idx_name:
        If set (e.g. to 'seqframe'), an attribute in the output will be created
        that encodes the sequence-index and the frame-index within that
        sequence; this is equivalent to a row-index into b_fsas.values,
        or, equivalently, an element in b_fsas.shape.
      frame_idx_name:
        If set (e.g. to 'frame', an attribute in the output will be created
        that contains the frame-index within the corresponding sequence.

    Returns:
      The result of the intersection.
    Debugr   r   r   )
r   version
build_typelabelsminmaxr#   rr   rQ   apply)	rR   rS   rU   rV   rW   rX   r[   r\   rT   r   r   r   rc     s   3rc       @r   r   r   c           	      C   sl   t jjdkr| j dksJ | j |jjd d k sJ dg}t	| |||||| j|j||| |d S )a  Intersect array of FSAs on CPU/GPU.

    Caution:
      `a_fsas` MUST be arc sorted.

    Args:
      a_fsas:
        Input FsaVec, i.e., `decoding graphs`, one per sequence. It might just
        be a linear sequence of phones, or might be something more complicated.
        Must have `a_fsas.shape[0] == b_fsas.dim0()` if `a_to_b_map` is None.
        Otherwise, must have `a_fsas.shape[0] == a_to_b_map.shape[0]`
      b_fsas:
        Input FSAs that correspond to neural network output.
      output_beam:
        Beam to prune output, similar to lattice-beam in Kaldi.  Relative
        to best path of output.
      max_states:
        The max number of states to prune the output, mainly to avoid
        out-of-memory and numerical overflow, default 15,000,000.
      max_arcs:
        The max number of arcs to prune the output, mainly to avoid
        out-of-memory and numerical overflow, default 1073741824(2^30).
      a_to_b_map:
         Maps from FSA-index in a to FSA-index in b to use for it.
         If None, then we expect the number of FSAs in a_fsas to equal
         b_fsas.dim0().  If set, then it should be a Tensor with ndim=1
         and dtype=torch.int32, with a_to_b_map.shape[0] equal to the
         number of FSAs in a_fsas (i.e. a_fsas.shape[0] if
         len(a_fsas.shape) == 3, else 1); and elements 0 <= i < b_fsas.dim0().
      seqframe_idx_name:
        If set (e.g. to 'seqframe'), an attribute in the output will be created
        that encodes the sequence-index and the frame-index within that
        sequence; this is equivalent to a row-index into b_fsas.values,
        or, equivalently, an element in b_fsas.shape.
      frame_idx_name:
        If set (e.g. to 'frame', an attribute in the output will be created
        that contains the frame-index within the corresponding sequence.

    Returns:
      The result of the intersection (pruned to `output_beam`; this pruning
      is exact, it uses forward and backward scores.
    r   r   r   r   )
r   r   r   r   r   r   r#   rr   r   r   )	rR   rS   rV   r   r   r   r[   r\   rT   r   r   r   r     s   3
r   r   )r   r   NNN)typingr   r   r   r1   r   rh   fsar   rd   r   autogradFunctionr	   r4   r@   rI   rQ   r   r   r   r   rc   r2   r   r   r   r   r   <module>   sr   ZOGP  #
P