o
    Ni                     @   sn   d dl mZmZmZ d dlZd dlZd dlmZ d dl	m
Z
mZ d dlmZ d dlmZmZ G dd dZdS )	    )divisionprint_functionabsolute_importN)xrange)
csc_matrixissparse)DECODERS)count_tranxn
sparse_addc                   @   s6   e Zd ZdZ		dddZdd	 Zd
d Zdd ZdS )StructuredPerceptrona  Structured perceptron for sequence classification.

    The implemention is based on average structured perceptron algorithm of
    M. Collins.

    Parameters
    ----------

    lr_exp : float, default: 0.1
        The Exponent used for inverse scaling of learning rate. Given iteration
        number t, the effective learning rate is ``1. / (t ** lr_exp)``

    n_iter : int, default: 15
        Maximum number of epochs of the structured perceptron algorithm

    random_state : int, RandomState instance or None, optional (default=None)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by ``np.random``.

    verbose : int, default: 0 (quiet mode)
        Verbosity mode.

    References
    ----------

    M. Collins (2002). Discriminative training methods for hidden Markov
    models: Theory and experiments with perceptron algorithms. EMNLP.
    皙?   Nr   c                 C   s   || _ || _|| _|| _d S )N)lr_expn_iterverboserandom_state)selfr   r   r   r    r   O/home/ubuntu/.local/lib/python3.10/site-packages/indictrans/trunk/perceptron.py__init__/   s   
zStructuredPerceptron.__init__c                 C   sf   | j d u rtjjj}|S t| j trtj| j }|S t| j tjjjr)| j }|S td	t
| j )NzType {0} not supported.)r   nprandommtrand_rand
isinstanceintRandomState	TypeErrorformattype)r   r   r   r   r   _get_random_state6   s   

z&StructuredPerceptron._get_random_statec           )   
      s  t |}td }t t |}tt|}t |tt	t|t
| tt	t
|t|}t  fdd|D }t fdd|D }|d d jd }t j||fdd}	t |}
t |}t ||f}t |	}t |
}t |}t |}t |jd }|  }d	}| j}t
d| jd D ]/}d	||  }| jdkrtd
|dd tj  || d}t|D ]\}}|d | }t|r||	j }nt ||	j}||||
|}|| }||k }| jdkr|sddd t	t t!|t t!|D }td|| |r||7 }|| } |"ddk}!|!#t j$}!t%|!|  }"|"| 9 }"|"j| }#t&||}$t&||}%||%|$  }&||!d || d   }'||!d || d   }(t'|#t j(ro|	|#7 }	nt)|	|# ||&8 }|
|'8 }
||(8 }|#|9 }#|&|9 }&|'|9 }'|(|9 }(t'|#t j(r||#7 }nt)||# ||&8 }||'8 }||(8 }q| jdkrtd|t|d   tj  |d	7 }q|	|| 8 }	|
|| 8 }
||| 8 }||| 8 }t%|	| _*|
| _+|| _,|| _-|| _.| S )a  Fit the model to the given set of sequences.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_sequences, sequence_length,
                                                n_features)
            Feature matrix of train sequences.

        y : list of arrays, shape (n_sequences, sequence_length)
            Target labels.

        Returns
        -------
        self : object
            Returns self.
        viterbic                    s$   g | ]}t  fd d|D qS )c                    s   g | ]} | qS r   r   ).0itclass_idr   r   
<listcomp>]   s    z7StructuredPerceptron.fit.<locals>.<listcomp>.<listcomp>)r   arrayr"   tr$   r   r   r&   ]   s   $ z,StructuredPerceptron.fit.<locals>.<listcomp>c                    s"   g | ]}t |d d kqS )   )r   r'   reshaper(   )class_ranger   r   r&   _   s   " r   r+   F)orderg      ?zIteration {0}z ... 
)end    c                 S   s   g | ]}d  |qS )-)join)r"   str   r   r   r&          z-First sequence comparision: {0} ... loss: {1}r*      zTrain-set error = {0:.4f})/r   
atleast_2dr   uniquehstacklensetarangedictzipr   r'   shapezeros
zeros_liker    r   r   r   printr   sysstdoutflushshuffle	enumerater   Tdotdecodesumr4   mapstrr,   astypefloat64r   r	   r   ndarrayr
   coef_intercept_init_intercept_trans_intercept_final_classes_))r   Xydecoderclasses	n_classesid_classY_true
n_featureswb_initb_finalb_transw_avg
b_init_avgb_final_avgb_trans_avgsequence_idsrnd	avg_countr   r#   lrsum_lossid_iiX_iscorey_predy_t_ilosscompY_t_iY_predY_diffw_updatet_transp_transb_trans_updateb_init_updateb_final_updater   )r%   r-   r   fitB   s   






















zStructuredPerceptron.fitc                    s   g }t d }t|ts|g}|D ]/}t|r | jj }n j|jj}|| j	 j
 j}| fdd|D  q|S )a  Predict output sequences for input sequences in X.

        Parameters
        ----------
        X : {array-like, sparse matrix}, shape (n_sequences, sequence_length,
                                                n_features)
            Feature matrix of test sequences.

        Returns
        -------
        y : array, shape (n_sequences, sequence_length)
            Labels per sequence in X.
        r!   c                    s   g | ]} j | qS r   )rV   )r"   predr   r   r   r&      r6   z0StructuredPerceptron.predict.<locals>.<listcomp>)r   r   listr   rJ   rR   rI   toarrayrK   rT   rS   rU   append)r   rW   rX   rY   xscoresy_r   r   r   predict   s   

zStructuredPerceptron.predict)r   r   Nr   )__name__
__module____qualname____doc__r   r    r}   r   r   r   r   r   r      s    
 r   )
__future__r   r   r   rD   numpyr   	six.movesr   scipy.sparser   r   indictrans._decoder   indictrans._utilsr	   r
   r   r   r   r   r   <module>   s   