o
    $iZ+                     @   s  d Z ddlmZ ddlZddlZddlZddlZddlZddlZ	ddl
mZ ddlmZ ejdkr7ed n4ddlmZmZmZmZmZmZmZmZmZmZ ddlmZmZmZ dd	lm Z  dd
l!m"Z" ddl#m$Z$ dd Z%d8ddZ&d9ddZ'dd Z(d8ddZ)G dd dej*Z+e,dkrddlZddl-m.Z. e/ Z0e0j1dddd e02 \Z3Z4e3j5rej6dd  dZ7e.e7d!d" d#d" d$d" d%d&Z8ej9e+ej:d'd(e3j5rd)nd*iej;e7d+dd,d-ej<e8d+d.dd/d0e3j5d1d2d3d4d5d6d7Z=e=>  dS dS ):zExample training a memory neural net on the bAbI dataset.

References Keras and is based off of https://keras.io/examples/babi_memnn/.
    )print_functionN)FileLock)tune)      )
LSTM
ActivationDenseDropout	EmbeddingInputPermuteaddconcatenatedot)Model
Sequential
load_model)RMSprop)pad_sequences)get_filec                 C   s   dd t d| D S )zReturn the tokens of a sentence including punctuation.

    >>> tokenize("Bob dropped the apple. Where is the apple?")
    ["Bob", "dropped", "the", "apple", ".", "Where", "is", "the", "apple", "?"]
    c                 S   s    g | ]}|r|  r|  qS  )strip.0xr   r   `/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/ray/tune/examples/pbt_memnn_example.py
<listcomp>/   s     ztokenize.<locals>.<listcomp>z(\W+)?)resplit)sentr   r   r   tokenize)   s   r!   Fc           
         s   g }g  | D ]\}| d }|dd\}}t|}|dkr!g  d|v rY|d\}}}t|}|rDtt| } fdd|D }ndd  D }||||f  d qt|}	 |	 q|S )	zParse stories provided in the bAbi tasks format

    If only_supporting is true, only the sentences
    that support the answer are kept.
    zutf-8    	c                    s   g | ]} |d   qS r#   r   )r   istoryr   r   r   F   s    z!parse_stories.<locals>.<listcomp>c                 S   s   g | ]}|r|qS r   r   r   r   r   r   r   I        )decoder   r   intr!   mapappend)
linesonly_supportingdatalinenidqa
supportingsubstoryr    r   r'   r   parse_stories2   s(   r8   c                    s0   dd  t |  |d} fdd|D }|S )zGiven a file name, read the file,
    retrieve the stories,
    and then convert the sentences into a single story.

    If max_length is supplied,
    any stories longer than max_length tokens will be discarded.
    c                 S   s
   t | g S N)sum)r1   r   r   r   flatten[   s   
zget_stories.<locals>.flatten)r0   c                    s4   g | ]\}}}rt  |k r |||fqS r   len)r   r(   r4   answerr;   
max_lengthr   r   r   _   s    zget_stories.<locals>.<listcomp>)r8   	readlines)fr0   r@   r1   r   r?   r   get_storiesR   s   	rC   c           
         s~   g g g }}}|D ]$\}}}	|  fdd|D  |  fdd|D  |  |	  q
t||dt||dt|fS )Nc                       g | ]} | qS r   r   r   wword_idxr   r   r   j   r)   z%vectorize_stories.<locals>.<listcomp>c                    rD   r   r   rE   rG   r   r   r   k   r)   )maxlen)r.   r   nparray)
rH   story_maxlenquery_maxlenr1   inputsqueriesanswersr(   queryr>   r   rG   r   vectorize_storiesg   s   

rR   c                 C   s   zt ddd}W n ty   td  w ddd}d}|| }t|}t||d	}t||d
}W d    n1 sDw   Y  | rW|d d }|d d }||fS )Nzbabi-tasks-v1-2.tar.gzzBhttps://s3.amazonaws.com/text-datasets/babi_tasks_1-20_v1-2.tar.gz)originzError downloading dataset, please download it manually:
$ wget http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-2.tar.gz
$ mv tasks_1-20_v1-2.tar.gz ~/.keras/datasets/babi-tasks-v1-2.tar.gzz8tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_{}.txtz6tasks_1-20_v1-2/en-10k/qa2_two-supporting-facts_{}.txt)single_supporting_fact_10ktwo_supporting_facts_10krT   traintest@   )r   	ExceptionprinttarfileopenrC   extractfileformat)finish_fastpath
challengeschallenge_type	challengetartrain_storiestest_storiesr   r   r   	read_datat   s0   
rg   c                   @   s4   e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdS )
MemNNModelc                 C   s  t  }| j| j D ]\}}}|t || |g O }q	t|}t|d }tdd | j| j D }tdd | j| j D }dd t|D }t|||| j\| _| _	| _
t|||| j\| _| _| _t|f}	t|f}
t }|t|dd |t| jd	d
 t }|t||d |t| jd	d
 t }|t|d|d |t| jd	d
 ||	}||	}||
}t||gdd}td|}t||g}td|}t||g}td|}t| jd	d
|}t||}td|}t|	|
g|}|S )z$Helper method for creating the modelr#   c                 s   s    | ]
\}}}t |V  qd S r9   r<   )r   r   _r   r   r   	<genexpr>       z)MemNNModel.build_model.<locals>.<genexpr>c                 s   s    | ]
\}}}t |V  qd S r9   r<   )r   ri   r   r   r   r   rj      rk   c                 S   s   i | ]	\}}||d  qS r%   r   )r   r&   cr   r   r   
<dictcomp>   s    z*MemNNModel.build_model.<locals>.<dictcomp>rX   )	input_dim
output_dimdropout333333?)rn   ro   input_length)   rs   )axessoftmax)rs   r#       )setre   rf   sortedr=   max	enumeraterR   inputs_trainqueries_trainanswers_traininputs_testqueries_testanswers_testr   r   r   r   r
   configgetr   r   r   r   r   r	   r   )selfvocabr(   r4   r>   
vocab_sizerL   rM   rH   input_sequencequestioninput_encoder_minput_encoder_cquestion_encoderinput_encoded_minput_encoded_cquestion_encodedmatchresponsemodelr   r   r   build_model   sV   



zMemNNModel.build_modelc                 C   s   t tjd t|d \| _| _W d    n1 sw   Y  |  }t| j	
dd| j	
ddd}|j|dd	gd
 || _d S )Nz~/.tune.lockr_   lrgMbP?rho?)r   r   sparse_categorical_crossentropyaccuracy)	optimizerlossmetrics)r   osr`   
expanduserrg   re   rf   r   r   r   r   compiler   )r   r   r   rmspropr   r   r   setup   s   
zMemNNModel.setupc                 C   sl   | j j| j| jg| j| jdd| jdd| j| jg| j	fdd | j j
| j| jg| jdd\}}d|iS )	N
batch_sizerv   epochsr#   r   )r   r   validation_dataverbose)r   mean_accuracy)r   fitr{   r|   r}   r   r   r~   r   r   evaluate)r   ri   r   r   r   r   step   s   

zMemNNModel.stepc                 C   s   |d }| j | d S Nz/model)r   saver   checkpoint_dir	file_pathr   r   r   save_checkpoint  s   zMemNNModel.save_checkpointc                 C   s   | ` |d }t|| _ d S r   )r   r   r   r   r   r   load_checkpoint  s   zMemNNModel.load_checkpointN)__name__
__module____qualname__r   r   r   r   r   r   r   r   r   rh      s    Prh   __main__)PopulationBasedTrainingz--smoke-test
store_truezFinish quickly for testing)actionhelprs   )num_cpusc                   C      t jddS Nr   r#   rJ   randomuniformr   r   r   r   <lambda>$      r   c                   C   s   dt jdd S )N
   ir   )rJ   r   randintr   r   r   r   r   %  s    c                   C   r   r   r   r   r   r   r   r   &  r   )rp   r   r   )perturbation_intervalhyperparam_mutationspbt_babi_memnntraining_iteration   d   r   )checkpoint_frequencycheckpoint_score_attributenum_to_keep)namestopcheckpoint_configry   T)	schedulermetricmodenum_samplesreuse_actorsrv   r#   rq   g{Gz?r   )r_   r   r   rp   r   r   )
run_configtune_configparam_space)F)FN)?__doc__
__future__r   argparser   r   sysr[   numpyrJ   filelockr   rayr   version_infoexittensorflow.keras.layersr   r   r	   r
   r   r   r   r   r   r   tensorflow.keras.modelsr   r   r   tensorflow.keras.optimizersr   'tensorflow.keras.preprocessing.sequencer   tensorflow.keras.utilsr   r!   r8   rC   rR   rg   	Trainablerh   r   ray.tune.schedulersr   ArgumentParserparseradd_argumentparse_known_argsargsri   
smoke_testinitr   pbtTuner	RunConfigCheckpointConfig
TuneConfigtunerr   r   r   r   r   <module>   s    
0
	
 
&y		