o
    ߥi"                     @   s   d dl mZ d dlZd dlZd dlmZ dd Zdd Zdd	 Z	d
d Z
ejdd Zdd Zdd Z	ddedee defddZddee fddZdd ZdS )    )OptionalN)data_opsc                 C   s   | j }g }|jr|t| j |tjtjtjtdtj	tj
g ||r+tjntj | jrI|tjtdg |tj|j|jd | jrU|tdg |tj |tj |S )z8Input pipeline data transformers that are not ensembled.g        	template_)max_templatessubsample_templates)
v2_featurerandom_delete_msaappendr   extendcast_to_64bit_intscorrect_msa_restypessqueeze_features!randomly_replace_msa_with_unknownmake_seq_maskmake_msa_maskmake_hhblits_profile_v2make_hhblits_profileuse_templatesmake_template_maskmake_pseudo_betacrop_templatesr   r   use_template_torsion_anglesatom37_to_torsion_anglesmake_atom14_masksmake_target_feat)
common_cfgmode_cfgr   	operators r   b/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/science/unifold/data/process.pynonensembled_fns   sJ   
r    c              	   C   s   g }| j r|j|j }n|j}t| j}|jrP|jr9| jr+tj	|j
|||j|jd}n	tj|j
||d}|| |t| |t||| j|j
|j |S )N)	crop_sizeshape_schemaseedspatial_crop_probca_ca_threshold)r!   r"   r#   )$reduce_msa_clusters_by_max_templatesmax_msa_clustersr   dictfeatures
fixed_sizecropis_multimerr   crop_to_size_multimerr!   r$   r%   crop_to_size_singler	   select_featmake_fixed_sizemax_extra_msa)r   r   crop_and_fix_size_seedr   pad_msa_clusters
crop_featscrop_fnr   r   r   crop_and_fix_size_fns3   s@   

r6   c           	      C   sL  g }| j }| j}|jr|s|t| j d|v r#|t|j | jr-|j|j	 }n|j}|}| j
}| js:J | j}|tj|d||jd d| v r]|tj| j|j||jd | jrw|rj|t  n|t  |tj |r|tj n|tj |r|r|t| |S |t| |S |tj |S )zDInput pipeline data transformers that can be ensembled and averaged.max_distillation_msa_clustersT)
keep_extragumbel_samplebiased_msa_by_chain
masked_msa)r9   
share_mask)r,   r   block_delete_msar	   r   sample_msa_distillationr7   r&   r'   r   r1   resample_msa_in_recyclingr9   
sample_msar:   make_masked_msar;   masked_msa_replace_fractionr<   msa_cluster_featuresnearest_neighbor_clusters_v2nearest_neighbor_clusterssummarize_clustersmake_msa_feat_v2make_msa_featmake_extra_msa_featcrop_extra_msadelete_extra_msa)	r   r   r   multimer_moder   r3   r'   r1   r9   r   r   r   ensembled_fnsY   sj   


rM   c                    s   t dd jtd }t | fddt }jr3r-r3|t  t	|td d }j
}tfdd	t|| }t	fd
dD | S )zCBased on the config, apply filters and transformations to the data.is_distillationr   r2   c                    sb   |   }t }t||}rr!t j|}t|S t|}t j|}|S )z2Function to be mapped over the ensemble dimension.)copyrM   composer   r/   recycling_features)dataidfnsnew_d)r   r5   rN   r   rL   r   r   wrap_ensemble_fn   s   z*process_features.<locals>.wrap_ensemble_fnnum_recycling_iters   c                    s
    | S Nr   )x)tensorsrW   r   r   <lambda>   s   
 z"process_features.<locals>.<lambda>c                    s"   i | ]}|t j | gd dqS )r   dim)torchstack.0k)r\   r   r   
<dictcomp>   s   " z$process_features.<locals>.<dictcomp>)boolgetr,   intr6   r    
supervisedr
   label_transform_fnrP   num_ensemblesmap_fnr`   arangeupdate)r\   r   r   r2   nonensemblednum_recyclingrk   ensemble_tensorsr   )r   r5   rN   r   rL   r\   rW   r   process_features   s.   

rr   c                 C   s   |D ]}|| } q| S rZ   r   )r[   fsfr   r   r   rP      s   
rP   c                 C   s   t | d jdkrDtdd | D }g }| D ],}|jd |k r;| d j|g|jdd  R  }||d |jd df< n|}|| qn| }tj|ddS )Nr   rY   c                 s   s    | ]}|j d  V  qdS )r   N)shape)rc   vr   r   r   	<genexpr>   s    z!pad_then_stack.<locals>.<genexpr>.r^   )lenru   max	new_zerosr	   r`   ra   )valuessize
new_valuesrv   resr   r   r   pad_then_stack   s    r   c                    sJ   fdd|D }|d   }i }|D ] t fdd|D | < q|S )Nc                    s   g | ]} |qS r   r   )rc   elem)funr   r   
<listcomp>       zmap_fn.<locals>.<listcomp>r   c                    s   g | ]}|  qS r   r   )rc   dict_i)featr   r   r      r   )keysr   )r   r[   	ensemblesr)   ensembled_dictr   )r   r   r   rl      s   
rl   labelnum_ensemblereturnc                    sT   d| v sJ d| v sJ d| v sJ t t | }  d ur( fdd|  D } | S )Naatypeall_atom_positionsall_atom_maskc                    s.   i | ]\} |t  fd dtD qS )c                    s   g | ]} qS r   r   )rc   _rv   r   r   r      s    z3process_single_label.<locals>.<dictcomp>.<listcomp>)r`   ra   rangerb   r   r   r   re      s    z(process_single_label.<locals>.<dictcomp>)rP   rj   items)r   r   r   r   r   process_single_label   s   
r   c                    s    fdd| D S )Nc                    s   g | ]}t | qS r   )r   )rc   llr   r   r   r      s    z"process_labels.<locals>.<listcomp>r   )labels_listr   r   r   r   process_labels   s   r   c                   C   s(   t jt jt jt dt dt jt jgS )N )r   r   make_atom14_positionsatom37_to_framesr   r   get_backbone_framesget_chi_anglesr   r   r   r   rj      s   rj   rZ   )typingr   numpynpr`   &modelscope.models.science.unifold.datar   r    r6   rM   rr   curry1rP   r   rl   r(   rh   r   r   rj   r   r   r   r   <module>   s(   '&C0

