o
    bi                     @   s   d dl Z d dlZd dlZd dlmZ d dlZd dlZd dlm	Z	 d dlm
Z d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d dlmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZ e rud dl m!Z! e!j"Z"nej"Z"G dd dej#Z$dddZ%G dd deZ&dS )    N)partial)backend)	callbacks)
optimizers)tree)config)distribution_lib)is_nnx_enabled)trainer)array_slicing)data_adapter_utils)EpochIterator)traceback_utils)nnxc                       s4  e Zd Z fddZ		d0ddZdd Zd	d
 Zdd Zdd Zd1ddZ	d1ddZ
d1ddZd1ddZej																d2ddZej								d3ddZej	d4d d!Z				d5d"d#Z			d6d$d%Zd&d' Zd(d) Zd*d+ Z				d7d,d-Z					d8d.d/Z  ZS )9
JAXTrainerc                    s&   t    d | _d | _d | _d| _d S )NT)super__init__train_functiontest_functionpredict_function_jax_state_syncedself	__class__ Q/home/ubuntu/.local/lib/python3.10/site-packages/keras/src/backend/jax/trainer.pyr       s
   

zJAXTrainer.__init__FNc	              
   C   s   i }	| j r	||	d< | j|||fddi|	\}
}}|r#| j  || _| j||||||
||d\}}|r9| j  |\}}}|}|rl| jdurltt| jj|}t	j
|d | j|}W d   n1 sgw   Y  |||
||ffS )z?This method is stateless and is intended for use with jax.grad.trainingreturn_lossesT)xyy_predsample_weightr   Nstate_mapping)_call_has_training_argstateless_call_losses_overrideclearstateless_compute_loss	optimizerlistzip	variablesr   StatelessScope
scale_loss)r   trainable_variablesnon_trainable_variablesmetrics_variablesr   r    r"   r   optimizer_variableskwargsr!   losseslossr-   unscaled_lossmappingr   r   r   compute_loss_and_updates'   sR   



z#JAXTrainer.compute_loss_and_updatesc                 C   s   t jdd t| j|D d%}| jj|tdd t|D j	d d | 
||||}W d    n1 s7w   Y  g }	| jD ]}
||
}|d u rO|
j}|	| qA||	fS )Nc                 S   s   g | ]\}}||fqS r   r   ).0ref_vvr   r   r   
<listcomp>f   s    z8JAXTrainer._update_metrics_variables.<locals>.<listcomp>r#   c                 s   s    | ]	}|d ur|V  qd S Nr   )r:   ir   r   r   	<genexpr>m   s    z7JAXTrainer._update_metrics_variables.<locals>.<genexpr>r   )r"   )r   r.   r,   r2   _loss_trackerupdate_statenextr   flattenshapecompute_metricsget_current_valuevalueappend)r   r2   r7   r   r    r!   r"   scopelogsnew_metrics_variablesr;   new_vr   r   r   _update_metrics_variablesb   s.   


z$JAXTrainer._update_metrics_variablesc              
   C   s   |\}}}}t |\}}}	tj| jdd}
|
||||||	d|d\\}}}|\}}}}| j|||\}}| ||||||	\}}||||f}||fS )NT)has_aux)r   r3   )r   unpack_x_y_sample_weightjaxvalue_and_gradr9   r*   stateless_applyrN   )r   statedatar0   r1   r3   r2   r   r    r"   grad_fnr6   auxgradsr7   r!   rK   r   r   r   
train_step{   sJ   
zJAXTrainer.train_stepc              	   C   sl   |\}}}t |\}}}| j||||||dd\}	}
|
\}}}}| ||||||\}}|||f}||fS )NF)r   )r   rP   r9   rN   )r   rT   rU   r0   r1   r2   r   r    r"   r6   rW   r7   r!   rK   r   r   r   	test_step   s2   


zJAXTrainer.test_stepc           	      C   sL   |\}}i }| j rd|d< t|\}}}| j|||fi |\}}||fS )NFr   )r%   r   rP   r&   )	r   rT   rU   r0   r1   r4   r   _outputsr   r   r   predict_step   s   
zJAXTrainer.predict_stepc                    s`   j dkr(|rdd  jsjrt   fdd}|S fdd}|S fdd}|S )N   c                 S   s0   | d }| dd  D ]}t dd ||}q
|S )Nr   r^   c                 S   s   t j| |gS r>   )rQ   numpyconcatenate)t1t2r   r   r   <lambda>   s    z@JAXTrainer._make_function.<locals>.concatenate.<locals>.<lambda>)r   map_structure)r\   outputnext_outputr   r   r   r`      s   z.JAXTrainer._make_function.<locals>.concatenatec                    sx   t |}| |\}} |g}ztjd D ]}t |}| |\}} || qW n	 ty3   Y nw  |}|| fS Nr^   )rC   rangesteps_per_executionrI   StopIteration)rT   iteratorrU   r\   r[   _outputsr`   r   step_functionr   r   iterator_step   s   z0JAXTrainer._make_function.<locals>.iterator_stepc                    sd   t |}| |\}} zt jd D ]}t |}| |\}} qW || fS  ty1   Y || fS w rg   )rC   rh   ri   rj   )rT   rk   rU   r\   r[   )r   rn   r   r   ro      s   c                    s    | t |S r>   )rC   )rT   rk   )rn   r   r   ro      s   )ri   run_eagerlyjit_compilejit)r   rn   concatenate_outputsro   r   rm   r   _make_function   s   

 zJAXTrainer._make_functionc                 C   sj   | j d ur	|s	d S | js(| jr(d }t d ur|  }d |f}t| jd|d}n| j}| |}|| _ d S Nr   donate_argnumsout_shardings)	r   rp   rq   r   distribution_get_state_sharding_specrr   rY   rt   )r   forcerx   state_shardingsrY   rn   r   r   r   make_train_function  s   

zJAXTrainer.make_train_functionc           
      C   s|   | j d ur	|s	d S | js1| jr1d }t d ur(|  \}}}}|||f}d |f}t| jd|d}n| j}| |}	|	| _ d S ru   )	r   rp   rq   r   ry   rz   rr   rZ   rt   )
r   r{   rx   trainable_shardingsnon_trainable_shardingsr[   metrics_shardingsr|   rZ   rn   r   r   r   make_test_function  s0   

zJAXTrainer.make_test_functionc           	         s   j d ur
|s
j S fdd}js5jr5d }t d ur. \}}}}||f}d |f}t|d|d}j|dd  fdd}|_ d S )	Nc                    s      | |\}}|| d |ffS )Nr   )r]   )rT   rU   r\   r1   r   r   r   r]   ;  s   z6JAXTrainer.make_predict_function.<locals>.predict_stepr   rv   T)rs   c                    s    | |\}} || fS r>   r   )rT   rk   r\   )_step_functionr   r   rn   W  s   z7JAXTrainer.make_predict_function.<locals>.step_function)r   rp   rq   r   ry   rz   rr   rt   )	r   r{   r]   rx   r~   r   r[   r|   rn   r   )r   r   r   make_predict_function7  s4   
z JAXTrainer.make_predict_functionr^   auto        Tr   c           $      C   sH  |  d t }|r||k rtd|  |}d | _|r0|d u r0tj|||f|d\\}}}}|d ur<t	|\}}}t
||||||	|
| jd}| j|d |  t|tjshtj|d|dk|||j| d}|   d	| _i }d	}|  | jp{|}zt||D ]}|   || d| _| F |D ];\}}}|| | jr| jdddddd
}d	| _| ||\}}|\}}} }!||| |!d| _||| | jr nqW d    n1 sw   Y  |    t!| "|}"|d ur0| #||r0t$| dd d u rt
||||p
|| j|d	d| _| j%||||p|||ddd}#dd |#& D }#|"'|# |(||" |"}| jr> nqd}W |    t| j)t*j+rZ|dkrZ| j),| j- t$| dd d ure| `|rn|j.|d d | _| j/S |    t| j)t*j+r|dkr| j),| j- t$| dd d ur| `|r|j.|d d | _w )NfitzLimiting epochs to %d)validation_split)r   r    r"   
batch_sizesteps_per_epochshuffleclass_weightri   rk   Tr   )add_historyadd_progbarverboseepochsstepsmodelFr0   r1   r3   r2   purge_model_variablesr0   r1   r3   r2   _eval_epoch_iterator)r   r    r"   r   ri   r   r   )r   r    r"   r   r   r   return_dict_use_cached_eval_datasetc                 S   s   i | ]
\}}d | |qS )val_r   )r:   namevalr   r   r   
<dictcomp>  s    z"JAXTrainer.fit.<locals>.<dictcomp>)rK   )0_assert_compile_calledr   
max_epochswarningswarnr   r   train_validation_splitr   rP   JAXEpochIteratorri   _symbolic_buildreset
isinstancecallbacks_moduleCallbackListnum_batchesr}   stop_trainingon_train_begin_initial_epochrh   reset_metricson_epoch_beginr   catch_stop_iterationon_train_batch_begin_get_jax_stater   
_jax_stateon_train_batch_endjax_state_syncdict_get_metrics_result_or_logs_should_evalgetattrevaluateitemsupdateon_epoch_endr*   optimizers_module	Optimizerfinalize_variable_valuestrainable_weightson_train_endhistory)$r   r   r    r   r   r   r   r   validation_datar   r   r"   initial_epochr   validation_stepsvalidation_batch_sizevalidation_freqr   val_xval_yval_sample_weightepoch_iteratortraining_logstraining_finishedepoch
begin_stepend_steprk   rT   rK   r0   r1   r3   r2   
epoch_logsval_logsr   r   r   r   ]  s  






-	



zJAXTrainer.fitc	              	   K   s  |  d |	dd}
|	rtd|	 |
r| j}nt|||||d| jd}| j|d |  t|t	j
sDt	j
||dk|d|j| d	}|   d| _|  i }|   d
| _| C |D ]8\}}}|| | jrx| jd
d
d
d
d}d| _| ||\}}|\}}}|||d| _||| | jr nq_W d    n1 sw   Y  |   | |}|| d | _|r|S | |S )Nr   r   FzArguments not recognized: )r   r    r"   r   r   r   ri   r   r   r^   r   r   r   r   r   Tr0   r1   r2   r   r0   r1   r2   )r   pop
ValueErrorr   r   ri   r   r   r   r   r   r   r   stop_evaluatingon_test_beginr   r   r   on_test_batch_beginr   r   r   on_test_batch_endr   r   on_test_end_flatten_metrics_in_order)r   r   r    r   r   r"   r   r   r   r4   use_cached_eval_datasetr   rK   r   r   rk   rT   r0   r1   r2   r   r   r   r     s   

	

	&


zJAXTrainer.evaluatec              	   C   s  t |||d| jd}tdd |  D sK|D ]/\}}}tt|\}}}t r.| | nt	  | | W d    n1 sAw   Y   |
  t|tjs_tj||dk|d|j| d}|   d| _|  dd	 }	d
| _d }
d }| G |D ]<\}}}|| | jr| jd
d
d
d}d| _| ||\}}|\}}||d| _|	||
}
||d|i | jr nq|W d    n1 sw   Y  |   |  d | _t|tj|
S )NF)r   r   r   r   ri   c                 s       | ]}|j V  qd S r>   builtr:   layerr   r   r   r@         z%JAXTrainer.predict.<locals>.<genexpr>r   r^   r   c                 S   s4   |d u rt dd | }|S t | dd ||  |S )Nc                 S   s   | gS r>   r   )batch_outputr   r   r   rc     s    z?JAXTrainer.predict.<locals>.append_to_outputs.<locals>.<lambda>c                 S   s
   |  |S r>   )rI   )re   r   r   r   r   rc        
 )r   rd   map_structure_up_to)batch_outputsr\   r   r   r   append_to_outputs  s   z-JAXTrainer.predict.<locals>.append_to_outputsT)r0   r1   r   r0   r1   r\   )r   ri   all_flatten_layersr   rP   rC   r	   r   r.   r   r   r   r   r   r   stop_predictingon_predict_beginr   r   on_predict_batch_beginr   r   r   on_predict_batch_endr   on_predict_endr   r   npr`   )r   r   r   r   r   r   r   r[   rk   r   r\   r1   r   r   rT   r   r0   r   r   r   predict  s   



	


 zJAXTrainer.predictc                    s   |  d |d ur d urtd  d| t|  fdd}| jt| d |   | jdddddd	}d| _| 	|| \}}|\}	}
}}|	|
||d
| _
|   tdd |}|rf|S | |S )Ntrain_on_batchzkArguments `sample_weight` and `class_weight` cannot be specified at the same time. Received: sample_weight=z, class_weight=c                   3       t  fV  d S r>   _distribute_datar   r"   r   r    r   r   rU        z'JAXTrainer.train_on_batch.<locals>.data
data_batchTFr   r   c                 S   
   t | S r>   r   arrayr   r   r   r   rc     r   z+JAXTrainer.train_on_batch.<locals>.<lambda>)r   r   r   class_weight_to_sample_weightsr   rC   r}   r   r   r   r   r   r   rd   r   )r   r   r    r"   r   r   rU   rT   rK   r0   r1   r3   r2   r   r   r   r     sR   

zJAXTrainer.train_on_batchc                    s   |  d  fdd}| jt| d |   | jddddd}d| _| || \}}|\}}	}
||	|
d| _|   t	
d	d
 |}|rK|S | |S )Ntest_on_batchc                   3   r   r>   r   r   r   r   r   rU   %  r   z&JAXTrainer.test_on_batch.<locals>.datar   TFr   r   c                 S   r   r>   r   r   r   r   r   rc   @  r   z*JAXTrainer.test_on_batch.<locals>.<lambda>)r   r   rC   r   r   r   r   r   r   r   rd   r   )r   r   r    r"   r   rU   rT   rK   r0   r1   r2   r   r   r   r     s,   


zJAXTrainer.test_on_batchc                    s   t dd |  D s#t  |   W d    n1 sw   Y  |   | jddddd}d| _ fdd}| || \}}|\}}||d| _| 	  t
d	d
 |}|S )Nc                 s   r   r>   r   r   r   r   r   r@   F  r   z.JAXTrainer.predict_on_batch.<locals>.<genexpr>TFr   c                   3   s     fV  d S r>   r   r   r   r   r   rU   T  s   z)JAXTrainer.predict_on_batch.<locals>.datar   c                 S   r   r>   r   r   r   r   r   rc   ^  r   z-JAXTrainer.predict_on_batch.<locals>.<lambda>)r   r   r   r.   r   r   r   r   r   r   r   rd   )r   r   rT   rU   r   r0   r1   r   r   r   predict_on_batchE  s*   

zJAXTrainer.predict_on_batchc                 C   s   t | dd r	| jrd S | jdd }| jdd }| jdd }| jdd }|r9t| j|D ]	\}}|| q/|rKt| j|D ]	\}}|| qA|r^t| jj	|D ]	\}}|| qT|rpt| j
|D ]	\}}|| qfd| _d S )Nr   r0   r1   r3   r2   T)r   r   r   getr,   r0   assignr1   r*   r-   r2   )r   r0   r1   r3   r2   r;   r<   r   r   r   r   a  s.   
zJAXTrainer.jax_state_syncc                 C   sh   dd | j D }dd | jD }t| dr$| jd ur$dd | jjD }ng }dd | jD }||||fS )Nc                 S      g | ]}|j jqS r   rH   shardingr:   r<   r   r   r   r=   |      z7JAXTrainer._get_state_sharding_spec.<locals>.<listcomp>c                 S   r  r   r  r  r   r   r   r=     r  r*   c                 S   r  r   r  r  r   r   r   r=     r  c                 S   r  r   r  r  r   r   r   r=     s    )r0   r1   hasattrr*   r-   r2   )r   r~   r   optimizer_shardingsr   r   r   r   rz   {  s"   z#JAXTrainer._get_state_sharding_specc                 C   sb   |r| j D ]}d|_q|r| jD ]}d|_q|r"| jjD ]}d|_q|r-| jD ]}d|_q'dS dS )a  Remove all the model variable for memory saving.

        During JAX training, since the training function is stateless, we have
        to pass in and get the model weights over and over, during which the
        copy of the weights that attached to the Variable are still and
        occupying extra memory. We remove those variable to save memory (for
        better memory utilization) at the beginning of the epoch, and reattach
        the value back to variables at the end of the epoch, via
        `jax_state_sync()`.
        N)r0   _valuer1   r*   r-   r2   )r   r0   r1   r3   r2   r<   r   r   r   _purge_model_variables  s   


z!JAXTrainer._purge_model_variablesc                 C   s   g }|r| dd | jD  |r| dd | jD  |r*| dd | jjD  |r7| dd | jD  |rB| j||||d t|S )Nc                 S      g | ]}|j qS r   rH   r  r   r   r   r=         z-JAXTrainer._get_jax_state.<locals>.<listcomp>c                 S   r  r   r  r  r   r   r   r=     r  c                 S   r  r   r  r  r   r   r   r=     r  c                 S   r  r   r  r  r   r   r   r=     r  r   )rI   r0   r1   r*   r-   r2   r  tuple)r   r0   r1   r3   r2   r   rT   r   r   r   r     s"   zJAXTrainer._get_jax_state)FN)F)NNNr^   r   Nr   NTNNr   NNNr^   )NNNr   NNNF)Nr   NN)NNNF)NNF)FFFF)FFFFF)__name__
__module____qualname__r   r9   rN   rY   rZ   r]   rt   r}   r   r   r   filter_tracebackr   r   r   r   r   r  r   rz   r  r   __classcell__r   r   r   r   r      s    
;,

3

& <ec
?
)
 r   c                    sX   t    d ur%|d u rt fdd| }ttj jd}t|| |S ttj	| S )Nc                    s     | jS r>   )get_data_layoutrE   dry   r   r   rc     s    z"_distribute_data.<locals>.<lambda>)batch_dim_name)
r   ry   r   rd   r   jax_distribution_libdistribute_data_inputr  rQ   
device_put)rU   layoutsjax_dist_data_inputr   r  r   r     s   
r   c                   @   s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
r   c                 C   s
   t | jS r>   )rC   _epoch_iteratorr   r   r   r   __next__  s   
zJAXEpochIterator.__next__c                 C   s<   t  }|d ur| |S | jjr| j S | | j S r>   )r   ry   _get_distributed_iteratordata_adapterbuiltin_prefetchget_jax_iterator_prefetch_numpy_iterator)r   ry   r   r   r   _get_iterator  s   

zJAXEpochIterator._get_iteratorc                 #   sB    d}| j  D ]}|du rt fdd|}t||V  qdS )zALazily compute layouts to reduce host to device transfer latency.Nc                    s     | jjS r>   )r  rE   backend_layoutr  r  r   r   rc     s    z<JAXEpochIterator._get_distributed_iterator.<locals>.<lambda>)r#  r%  r   rd   r   )r   ry   r  rU   r   r  r   r"    s   
z*JAXEpochIterator._get_distributed_iteratorc                 #   sF    t  d fdd	}|dd r! V  |d sdS dS )a  Shard and prefetch batches on device.

        Most of the implementation has been borrowed from
        `flax.jax_utils.prefetch_to_device`

        This utility takes an iterator and returns a new iterator which fills an
        on device prefetch buffer. Eager prefetching can improve the performance
        of training loops significantly by overlapping compute and data
        transfer.
           c                    s$   t  | D ]	}t| qd S r>   )	itertoolsislicerI   r   )nrU   numpy_iteratorqueuer   r   enqueue  s   z:JAXEpochIterator._prefetch_numpy_iterator.<locals>.enqueue)r,  r^   N)r)  )collectionsdequepopleft)r   r.  r0  r   r-  r   r&    s   

z)JAXEpochIterator._prefetch_numpy_iteratorN)r  r  r  r!  r'  r"  r&  r   r   r   r   r     s
    r   r>   )'r1  r*  r   	functoolsr   rQ   r_   r   	keras.srcr   r   r   r   r   r   keras.src.backendr   r   r  keras.src.backend.configr	   keras.src.distributionkeras.src.trainersr
   base_trainer keras.src.trainers.data_adaptersr   r   !keras.src.trainers.epoch_iteratorr   keras.src.utilsr   flaxr   rr   Trainerr   r   r   r   r   r   r   <module>   sB           
1