o
    ॵi                     @   sL   d dl mZmZ d dlZd dlZd dlmZ ddlm	Z	 G dd de	Z
dS )    )AnyDictN)ModeKeys   )OfaBasePreprocessorc                       s   e Zd ZdZejf fdd	Zdeee	f deee	f fddZ
deee	f deee	f fdd	Zdeee	f deee	f fd
dZ  ZS )OfaSudokuPreprocessorz+
    OFA preprocessor for sudoku tasks
    c                    sL  t t| j|||g|R i | | jjdd| _| jdd| _| jdd| _| jrg | _	g | _
tdD ]-}tdD ]&}| j	|d  | j
|d  |d	krW|d	ksc| j	d
 | j
d
 q=q7t| j
| _
t| j	| _	t| | j}t| j
|g}	t| j	|g}
t| j|	| jg| _
t| j|
| jg| _	dS dS )zpreprocess the data

        Args:
            cfg(modelscope.utils.config.ConfigDict) : model config
            model_dir (str): model path,
            mode: preprocessor mode (model mode)
        promptz solve the sudoku .seg_embeddingFmax_struct_length   	   r      r   N)superr   __init__cfgmodelgetinstruction_textr	   r
   input_puzzle_rowinput_puzzle_colrangeappendtorchtensor
zeros_liketokenize_textcatbos_itemeos_item)selfr   	model_dirmodeargskwargsidxjdxinstruct_segr   r   	__class__ W/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/preprocessors/ofa/sudoku.pyr      sH   


zOfaSudokuPreprocessor.__init__datareturnc                 C   s    | j tjkr| |S | |S )N)r!   r   TRAIN_build_train_sample_build_infer_sample)r   r+   r)   r)   r*   __call__:   s   

zOfaSudokuPreprocessor.__call__c                 C   sl   |  |}|d }|   }d|d| j }| j|dd|d< t| j	|d dd g|d< |S )	z
        build sample for training tasks.

        step 1. execute the `_build_infer_sample` function to get a batch sample
            for inference.
        step 2. process the label data for training.
        label NF)add_bostargetprev_output_tokens)
r/   lowerstripsplitjoinmax_tgt_lengthr   r   r   r   )r   r+   sampler4   target_token_listr)   r)   r*   r.   @   s   
z)OfaSudokuPreprocessor._build_train_samplec                 C   s   d| j v r	d|v sJ d|| j d  }d|   d| j }| || j }|d| j| j  }d|d}| j	rH| j
|d< | j|d< d	| j v r`| j d	 |v r`d
|| j d	  |d< |S )a-  
        build sample for inference tasks.

        step 1. Get the input random masked sudoku text input, which shold be
            generated like below pseudo code.
            >>> sudo = np.random.randint(1, 9, size=(9, 9)) # a pseudo sudoku
            >>> sudo_text = " | ".join(" : ".join(str(c) for c in row) \
            >>>             for row in sudo)
        step 2. Limit the length, tokenize the input text and add the bos token
            to the front of the input as source input.
        step 3. Add a pseodo ids for every input.
        textz;there must be `text` column in task key map and source datar2   Ng        )idsourceseg_row_tokensseg_col_tokenssolutionz {}r1   )
column_mapr:   r7   r8   r9   r
   r   r   max_src_lengthr	   r   r   format)r   r+   r>   src_itemr<   r)   r)   r*   r/   Q   s$    


z)OfaSudokuPreprocessor._build_infer_sample)__name__
__module____qualname____doc__r   	INFERENCEr   r   strr   r0   r.   r/   __classcell__r)   r)   r'   r*   r      s    "*"*r   )typingr   r   numpynpr   modelscope.utils.constantr   baser   r   r)   r)   r)   r*   <module>   s   