o
    Mih                     @   s   d Z ddlZddlZddlZddlZddlZejd dkZdd Zer*ddl	m
Z
 nzddlm
Z
 W n eyB   ed eZ
Y nw dZd	Zd
Zdd ZG dd deZdddZdd Zdd ZdddZdd Zdd Zdd ZdS ) zqData structures and functions of general utility,
shared between different modules and variants of the software.
    N   c                  O   s   dd S )Nc                 S      | S N )funcr   r   C/home/ubuntu/.local/lib/python3.10/site-packages/morfessor/utils.py<lambda>   s    z"_dummy_lru_cache.<locals>.<lambda>r   argskwargsr   r   r   _dummy_lru_cache   s   r   )	lru_cachezDLRU cache disabled, install backports.functools_lru_cache to enable.i@B i  Tc                    st   t sS zddlm  W n ty   G dd d}| Y nw ttjr. fdd}|S tdr8  S S )a  Decorator/function for displaying a progress bar when iterating
    through a list.

    iter_func can be both a function providing a iterator (for decorator
    style use) or an iterator itself.

    No progressbar is displayed when the show_progress_bar variable is set to
     false.

    If the progressbar module is available a fancy percentage style
    progressbar is displayed. Otherwise 60 dots are printed as indicator.

    r   )ProgressBarc                   @   s8   e Zd ZdZdZdd Zdd Zdd Zd	d
 ZeZ	dS )z$_progress.<locals>.SimpleProgressBarzlCreate a simple progress bar that prints 60 dots on a single
            line, proportional to the progress <   c                 S   s   d | _ d| _d| _d S )Nd   r   )itdotfreqiselfr   r   r   __init__F   s   
z-_progress.<locals>.SimpleProgressBar.__init__c                 S   s>   t || _d| _t|| j d | j | _| jdk rd| _| S )Nr      )iterr   r   lenNUM_DOTSr   )r   r   r   r   r   __call__K   s   

z-_progress.<locals>.SimpleProgressBar.__call__c                 S   r   r   r   r   r   r   r   __iter__V   s   z-_progress.<locals>.SimpleProgressBar.__iter__c                 S   s^   |  j d7  _ | j | j dkrtjd tj  zt| jW S  ty.   tjd  w )Nr   r   .
)	r   r   sysstderrwriteflushnextr   StopIterationr   r   r   r   __next__Y   s   
z-_progress.<locals>.SimpleProgressBar.__next__N)
__name__
__module____qualname____doc__r   r   r   r   r%   r#   r   r   r   r   SimpleProgressBarA   s    r*   c                     s4   t tt jr  | i |S | i |S r   )logging	getLoggerr&   isEnabledForINFOr	   r   	iter_funcr   r   r   l   s   z_progress.<locals>.ir   )show_progress_barprogressbarr   ImportError
isinstancetypesFunctionTypehasattr)r0   r*   r   r   r/   r   	_progress+   s   &+

r8   c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	Sparsea  A defaultdict-like data structure, which tries to remain as sparse
    as possible. If a value becomes equal to the default value, it (and the
    key associated with it) are transparently removed.

    Only supports immutable values, e.g. namedtuples.
    c                 O   s(   | d| _tj| g|R i | dS )zCreate a new Sparse datastructure.
        Keyword arguments:
            default: Default value. Unlike defaultdict this should be a
                       prototype immutable, not a factory.
        defaultN)pop_defaultdictr   )r   pargsr   r   r   r   r      s   zSparse.__init__c                 C   s(   zt | |W S  ty   | j Y S w r   )r=   __getitem__KeyErrorr<   )r   keyr   r   r   r?      s
   
zSparse.__getitem__c                 C   s:   d| v r|| j kr|| v r| |= d S d S t| || d S )Nr<   )r<   r=   __setitem__)r   rA   valuer   r   r   rB      s
   
zSparse.__setitem__N)r&   r'   r(   r)   r   r?   rB   r   r   r   r   r9   {   s
    
r9      c                 c   sN    g }| D ]}| | t||kr|| d }t||kr$t|V  qdS )zReturns all ngram tokens in an input sequence, for a specified n.
    E.g. ngrams(['A', 'B', 'A', 'B', 'D'], n=2) yields
    ('A', 'B'), ('B', 'A'), ('A', 'B'), ('B', 'D')
    N)appendr   tuple)sequencenwindowitemr   r   r   ngrams   s   

rK   c                 C   s:   d}t | D ]\}}|d du s||d k r||f}q|S )zaReturns the minimum value and the first index at which it can be
    found in the input sequence.)NNr   N)	enumerate)rG   bestr   rC   r   r   r   	minargmin   s   rN   c                 C   s$   | dksJ | dkrt S t|  S )z>Logarithm which uses constant value for log(0) instead of -infg        r   )LOGPROB_ZEROmathlog)xr   r   r   zlog   s   rS   c                 C   s   |gt | j }| | S )zsConvenience function to return a namedtuple initialized to zeros,
    without needing to know the number of fields.)r   _fields)constructorzerozerosr   r   r   	_nt_zeros   s   rX   c           	         s   t dd | D  t fddt|D }g }tdd | D }d}d}|D ]}||k r;t|\}}||7 }||k s-|| q'|S )a=  Samples with replacement from the data set so that the probability
    of each data point being selected is proportional to the occurrence count.
    Arguments:
        data: A list of tuples (weight, ...)
        num_samples: The number of samples to return
    Returns:
        a sorted list of indices to data
    c                 s       | ]}|d  V  qdS r   Nr   .0rR   r   r   r   	<genexpr>       z"weighted_sample.<locals>.<genexpr>c                    s   g | ]
}t d  d qS )r   r   )randomrandint)r\   _tokensr   r   
<listcomp>   s    z#weighted_sample.<locals>.<listcomp>c                 s   rY   rZ   r   r[   r   r   r   r]      r^   r   )sumsortedrangerL   r#   rE   )	datanum_samplestoken_indicesdata_indicesdditisample_token_indexweightr   rb   r   weighted_sample   s   	rr   c                 C   s   t dkr| S dd }|| S )a  Prints a progress bar for visualizing flow through a generator.
    The length of a generator is not known in advance, so the bar has
    no fixed length. GENERATOR_DOT_FREQ controls the frequency of dots.

    This function wraps the argument generator, returning a new generator.
    r   c                 s   sL    t | D ]\}}|t dkrtjd tj  |V  qtjd d S )Nr   r   r   )rL   GENERATOR_DOT_FREQr   r    r!   r"   )	generatorr   rR   r   r   r   _progress_wrapper   s   
z._generator_progress.<locals>._progress_wrapper)rs   )rt   ru   r   r   r   _generator_progress   s   rv   c                 C   s*   zt | tW S  ty   t | t Y S w r   )r4   
basestring	NameErrorstr)objr   r   r   
_is_string   s
   r{   )rD   )r   )r)   r+   rP   r_   r   r5   version_infoPY3r   	functoolsr   backports.functools_lru_cacher3   warningrO   rs   r1   r8   r=   r9   rK   rN   rS   rX   rr   rv   r{   r   r   r   r   <module>   s<    P
!

