o
    پi!                     @   s   d dl Z d dlmZmZmZmZmZmZmZ ddl	m
Z
 er>d dlmZ d dlmZ d dl	mZ d dlmZmZ d dlmZ G dd	 d	Zi i dd
fdedededee def
ddZdS )    N)TYPE_CHECKING	GeneratorIteratorListOptionalTupleUnion   )TransformerTokenizer)PreTrainedTokenizer)GenerationParametersSamplingParameters)OutlinesLogitsProcessorc                   @   s   e Zd ZdZ				dddZdeeee f d	d
dddefddZdeeee f d	d
ddde	e fddZ
dddee dee dedddeeeef ddf fddZdS )MLXLMz&
    Represents an `mlx_lm` model
    model	nn.Module	tokenizerr   c                 C   s   || _ || _t|j| _d S )N)r   mlx_tokenizerr
   
_tokenizerr   )selfr   r    r   I/home/ubuntu/.local/lib/python3.10/site-packages/outlines/models/mlxlm.py__init__   s
   
zMLXLM.__init__promptsgeneration_parametersr   sampling_parametersr   returnc                 C   s   |  ||||}dt|S )N )streamjoinlist)r   r   r   logits_processorr   streamerr   r   r   generate   s   zMLXLM.generatec                 c   s4   ddl m} t|\}}}t|\}	}
}}}|du r!td}t|ts*td|	dkr2td|
dkr:td|durBtd	|durJtd
|durRtd|||	|d}|| j	
|}| j	j}|  t| j|fi |t|D ]\\}}}|| jjkr n
|| |jV  qx|  |jV  dS )a  Generate text using `mlx_lm`.

        Arguments
        ---------
        prompts
            A prompt or list of prompts.
        generation_parameters
            An instance of `GenerationParameters` that contains the prompt,
            the maximum number of tokens, stop sequences and seed. All the
            arguments to `SequenceGeneratorAdapter`'s `__cal__` method.
        logits_processor
            The logits processor to use when generating text.
        sampling_parameters
            An instance of `SamplingParameters`, a dataclass that contains
            the name of the sampler to use and related parameters as available
            in Outlines.
        Returns
        -------
        The generated text.
        r   Ng    eAz6The `mlx-lm` library does not support batch inference.beam_searchz2The `mlx-lm` library does not support Beam Search.r	   z<The `mlx-lm` library does not allow to take several samples.z,The `mlx-lm` library does not support top_k.z+The `mlx-lm` library does not support seed.z.The `mlx-lm` library does not support stop_at.)temptop_psamplerr!   )mlx.corecoredataclassesastupleint
isinstancestrNotImplementedErrorarrayr   encodedetokenizerresetzipgenerate_stepranger   eos_token_id	add_tokenlast_segmentfinalize)r   r   r   r!   r   mx
max_tokensstop_atseedr'   num_samplesr&   top_ktemperaturegenerate_kwargsprompt_tokensr2   tokenprobnr   r   r   r   +   sX   
	

zMLXLM.streampromptmx.arrayr%   r&   r'   r!   r   Nc                 #   s    ddl m ddl |pddddtdtf f fdd} jj| j}|}g }		 | j|d |d
}
|
dddddf }
|durW|
	d}||	|}|	dd}
||
\}}|
 }||fV  |	| |}q/)a  
        Adapted from
        https://github.com/ml-explore/mlx-examples/blob/4872727/llms/mlx_lm/utils.py#L129

        A generator producing token ids based on the given prompt from the model.

            Args:
                prompt (mx.array): The input prompt.
                temp (float): The temperature for sampling, if 0 the argmax is used.
                  Default: ``0``.
                top_p (float, optional): Nulceus sampling, higher means model considers
                  more less likely words.
                sampler (str): The sampler string defined by SequenceGeneratorAdapter
                logits_processor (OutlinesLogitsProcessor): Augment logits before sampling.
        r   N      ?logitsrH   r   c                    s    | }dksdkrj| dd}n,dkr9d ur.dkr.dk r. j| }nj| d  }ntd	 d
|d|f }||fS )Ng        greedy)axismultinomialr   rI   r	   zInvalid mlx-lm sampler: ``)softmaxargmaxsample_utilstop_p_samplingrandomcategorical
ValueError)rJ   softmax_logitsrD   rE   mlx_lmr;   r'   rA   r&   r   r   sample   s   
z#MLXLM.generate_step.<locals>.sampleT)cacherL   r	   )r(   r)   rY   r   floatmodelsr[   make_prompt_cacher   reshapeitemappend)r   rG   r%   r&   r'   r!   rZ   r[   unprocessed_input_idsgenerated_idsrJ   	logits_1dnew_token_singlerE   	new_tokenr   rX   r   r5   |   s*   &



zMLXLM.generate_step)r   r   r   r   )__name__
__module____qualname____doc__r   r   r.   r   r#   r   r   r   r\   r   r   r,   r5   r   r   r   r   r      sL    


Qr   F
model_nametokenizer_configmodel_configadapter_pathlazyc           	      C   sd   zddl m} ddl}W n ty   tdw |j s!td|j| ||||d\}}t||S )a  Instantiate a model from the `mlx_lm` library and its tokenizer.

    Signature adapted from
    https://github.com/ml-explore/mlx-examples/blob/4872727/llms/mlx_lm/utils.py#L422

    Parameters
    ----------
    Args:
        path_or_hf_repo (Path): The path or the huggingface repository to load the model from.
        tokenizer_config (dict, optional): Configuration parameters specifically for the tokenizer.
            Defaults to an empty dictionary.
        model_config(dict, optional): Configuration parameters specifically for the model.
            Defaults to an empty dictionary.
        adapter_path (str, optional): Path to the LoRA adapters. If provided, applies LoRA layers
            to the model. Default: ``None``.
        lazy (bool): If False eval the model parameters to make sure they are
            loaded in memory before returning, otherwise they will be loaded
            when needed. Default: ``False``

    Returns
    -------
    A `MLXLM` model instance.

    r   NzKThe `mlx_lm` library needs to be installed in order to use `mlx_lm` models.z5You cannot use `mlx_lm` without Apple Silicon (Metal))rl   rm   rn   ro   )	r(   r)   rY   ImportErrormetalis_availableRuntimeErrorloadr   )	rk   rl   rm   rn   ro   r;   rY   r   r   r   r   r   mlxlm   s$   


ru   )r*   typingr   r   r   r   r   r   r   transformersr
   r(   r)   r;   mlx.nnnnr   outlines.generate.apir   r   outlines.processorsr   r   r.   dictboolru   r   r   r   r   <module>   s4    $ 6