o
    ߥi                     @   s  U d dl Z d dlmZmZ d dlmZ d dlmZmZm	Z	 d dl
Zd dlZd dlmZ d dlmZmZmZmZ d dlmZmZmZmZ d dlmZ d d	lmZmZ eeej f Z!eeej f Z"e"e#d
< g dZ$dd Z%dd Z&dd Z'dd Z(dd Z)dd Z*e(dd Z+dd Z,dd Z-dd Z.d d! Z/dd"d#Z0e(	$	$dd%d&Z1e(d'd( Z2e(d)d* Z3e(d+d, Z4d-d. Z5e(d/d0 Z6e(dd2d3Z7d4d5 Z8d6d7 Z9e(dd8d9Z:d:d; Z;d<d= Z<e(dd?d@Z=e(dAdB Z>ddDdEZ?dFdG Z@dHdI ZAdJdK ZBdLdM ZCe(	$	$ddNdOZDe(	 	 ddPdQZEdRdS ZFdTdU ZGdVdW ZHe(dXdY ZIe(dZd[ ZJd\d] ZKd^d_ ZLd`da ZMddcddZNe(	>ddedfZOdgdh ZPdidj ZQe(	$ddkdlZRe(dmdn ZSe(dodp ZTdqe!dreUdse	eU dtejVfdudvZW		$ddwejVdreUdse	eU dxeXdtejVf
dydzZY		$dd
e!dreUdse	eU dxeXdtejVf
d{d|ZZ	}dd
e!dreUdseUd~e[de[dteeU fddZ\dejVdtejVfddZ]dejVdejVdejVdtejVfddZ^dd Z_dS )    N)reducewraps)add)ListMutableMappingOptional)
data_utils)batched_gatherone_hottensor_tree_maptree_map)N_EXTRA_MSAN_MSAN_RESN_TPL)residue_constants)FrameRotationprotein)msadeletion_matrixmsa_maskmsa_row_mask	bert_masktrue_msa
msa_chainsc                 C   sX   |   D ]%\}}|dr|tj| |< q|jtjtjtjfv r)|tj	| |< q| S )N_mask)
itemsendswithtypetorchfloat32dtypeint32uint8int8int64)r   kv r)   c/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/science/unifold/data/data_ops.pycast_to_64bit_ints#   s   
r+   c                 C   s   t j| d jt jd| d< | S )Naatyper"   seq_maskr    onesshaper!   r   r)   r)   r*   make_seq_mask.   s   
r3   c                 C   s"   t j| d jd t jd| d< | S )Ntemplate_aatyper   r-   template_maskr/   r2   r)   r)   r*   make_template_mask4   s   
r6   c                    s   t   fdd}|S )z#Supply all arguments but the first.c                     s    fddS )Nc                    s   | g R i S Nr)   x)argsfkwargsr)   r*   <lambda>?   s    z$curry1.<locals>.fc.<locals>.<lambda>r)   r:   r<   r;   r>   r*   fc=   s   zcurry1.<locals>.fc)r   )r;   r@   r)   r?   r*   curry1:   s   rA   c                 C   s\   | d   | d< tj}tj|tjddd| d jd }t	|d| d   | d< | S )z1Correct MSA restype to have the same order as rc.r   r-      r   )
longrc MAP_HHBLITS_AATYPE_TO_OUR_AATYPEr    tensorr%   	unsqueezeexpandr1   gather)r   new_order_list	new_orderr)   r)   r*   correct_msa_restypesD   s   rM   c                 C   s  t | d jdkrtj| d dd| d< d| v r)t | d jdkr)| d d | d< dD ];}|| v rft | | jrf| | jd }t|trf|dkrft| | r[tj| | dd| |< q+tj| | dd	| |< q+d
D ]}|| v r~t | | jr~| | d | |< qi| S )z=Remove singleton and repeated dimensions in protein features.r,      rB   dim
resolutionrC   r   )
domain_namer   num_alignments
seq_lengthsequencesuperfamilyr   between_segment_residuesresidue_indextemplate_all_atom_mask)axis)rT   rS   )	lenr1   r    argmax
isinstanceint	is_tensorsqueezenp)r   r'   	final_dimr)   r)   r*   squeeze_featuresP   s"   rc   c                 C   s   |dkrLt j| d j|k }d}d}t|| d |k}t|t| d | | d | d< t j| d j|k }t|t| d | | d | d< | S )z&Replace a portion of the MSA with 'X'.        r         r,   )ra   randomrandr1   r    logical_andwhere	ones_like)r   replace_proportionr   x_idxgap_idxaatype_maskr)   r)   r*   !randomly_replace_msa_with_unknownr   s(   rp   c                 C   s:   d}t tjdd| }t t ||  |  }|S )zGenerate Gumbel Noise of given Shape.
    This generates samples from Gumbel(0, 1).
    Args:
        shape: Shape of noise to return.
    Returns:
        Gumbel noise of given shape.
    ư>r   rC   )r    
from_numpyra   rg   uniformlog)r1   epsilonuniform_noisegumbelr)   r)   r*   gumbel_noise   s   rx   c                 C   s   t | j}tj| | ddS )a;  Samples from a probability distribution given by 'logits'.
    This uses Gumbel-max trick to implement the sampling in an efficient manner.
    Args:
        logits: Logarithm of probabilities to sample from, probabilities can be
        unnormalized.
    Returns:
        Sample from logprobs in one-hot form.
    rB   rO   )rx   r1   r    r\   logitszr)   r)   r*   gumbel_max_sample   s   
	r|   c                 C   s   t | j}tj| | dddS )a   Samples with replacement from a distribution given by 'logits'.
    This uses Gumbel trick to implement the sampling an efficient manner. For a
    distribution over k items this samples k times without replacement, so this
    is effectively sampling a random permutation with probabilities over the
    permutations derived from the logprobs.
    Args:
        logits: Logarithm of probabilities to sample from, probabilities can be
        unnormalized.
    Returns:
        Sample from logprobs in index
    rB   T)rP   
descending)rx   r1   r    argsortry   r)   r)   r*   gumbel_argsort_sample_idx   s   
r   c                 C   s4   t tj| d d }t jt dg|fddS )NrC   r   rO   )r    rr   ra   rg   permutationcatrG   )num_seqshuffledr)   r)   r*   uniform_permutation   s   r   c                 C   sb  t j|  dddk}t j|t jd}d|| < t|jdks!J |dd  }|dd  }|jd dkr:t dgS |d ur|dd  d}d|| < t	j
|dd\}}| }|dk }|| }|dk }	d	|d
  ||< d|| < |D ]}
|
dkr||
k}| }|dkr||  ||	|  9  < qxt |d
 }t|d }t jt dg|fddS )NrB   rO   r   r-   g    .rC   Treturn_counts      ?rq   )r    sumrD   
zeros_liker!   r[   r1   rG   reshapera   uniquert   r   r   )r   r   has_msarz   keyscountsnum_has_msanum_pair
num_unpair
num_chainsr'   cur_maskcur_cntr   r)   r)   r*   gumbel_permutation   s8   


r   Fc                 C   s   | d j d }t||}|st|}n|rd| v r| d nd}t| d |}t||}t|||| g\}	}
tD ] }|| v rY|rNt| | d|
| d| < t| | d|	| |< q9| S )zLSample MSA randomly, remaining sequences are stored are stored as `extra_*`.r   r   r   Nr   extra_)r1   minr   r   r    splitMSA_FEATURE_NAMESindex_select)r   max_seq
keep_extragumbel_samplebiased_msa_by_chainr   num_selindex_orderr   sel_seqnot_sel_seqr'   r)   r)   r*   
sample_msa   s0   




r   c                 C   s(   d| v r| d dkrt |dd| } | S )Nis_distillationrC   F)r   )r   )r   r   r)   r)   r*   sample_msa_distillation   s   r   c                 C   s   | d j d }| d j d }|j| }||krSttjj|d |d dd d }t|d }tj	t
dg|fdd}tD ]}|| v rRt| | d|| |< qA| S )Nr   r   rC   FreplacerO   )r1   max_msa_entryr    rr   ra   rg   choicerD   sortr   rG   r   r   )r   configr   seq_lenr   
keep_indexr'   r)   r)   r*   random_delete_msa   s&   
r   c                 C   sj   | d j d }t||}ttj|d | }tD ]}d| | v r2t| d|  d|| d| < q| S )N	extra_msar   r   )	r1   r   r    rr   ra   rg   r   r   r   )r   max_extra_msar   r   select_indicesr'   r)   r)   r*   crop_extra_msa  s   
r   c                 C   s$   t D ]}d| | v r| d| = q| S )Nr   )r   )r   r'   r)   r)   r*   delete_extra_msa  s
   
r   c                 C   s  d| v r| d dkr| S | d j d }||jkr| S ttj|tjd|j tj}|j	r:t
jd|jd }n|j}tt
jd||g}|d d d f td| }t|d|d }t|d}ttd|d  |d  tdd  f }|jdd	\}	}
|	|
dk }|d}ttd d  |d  gd}t|d dksJ tD ]}|| v rtj| | d|d
| |< q| S )Nr   rC   r   r   r-   rB   rN   Tr   )index)r1   min_num_msar    floorrG   r!   msa_fraction_per_blocktor#   randomize_num_blocksra   rg   randint
num_blocksrr   arangeclipr   viewhstackzerosrD   r^   r   r   )r   r   r   block_num_seqnbdel_block_starts
del_blocksdel_indicescombineduniquesr   
differencekeep_indicesr'   r)   r)   r*   block_delete_msa&  sL   

r   rd   c                 C   s   t t d|t d t dgd}t| d d}| d d d d d d f | }t| d d}| d d d d d d f | }|j\}}}	|j\}
}	}	||
|d }|| ||d dd}|| }t j|dd		 | d
< | S )Nrf   rC   r   r      r   r   extra_msa_maskrO   extra_cluster_assignment)
r    r   r0   r   r
   r1   r   	transposer\   rD   )r   gap_agreement_weightweightsmsa_one_hotsample_one_hotextra_msa_one_hotextra_one_hotr   num_res_extra_num_seqab	agreementr)   r)   r*   nearest_neighbor_clustersJ  s"   r   c                 C   s   t |jdkr|jd | jd ksJ |j|jd gdt | jdd   R  }|| j}|gt| jdd   }tj| d||  }|	| j
}|S )NrC   r   rC   )r[   r1   r   rI   listr    r   scatter_add_floatr   r"   )datasegment_idsnum_segmentsr1   rG   r)   r)   r*   unsorted_segment_sume  s   r   c                    s   d j d   fdd}d }dd  || }||dddddf td	 d
 }|td d
7 }||dddddf  d< ~||d  }|d 7 }|| d< ~S )z=Produce profile and deletion_matrix_mean within each cluster.r   r   c                    s   t | d  S )Nr   )r   r8   r   r   r)   r*   csumu  s   
z summarize_clusters.<locals>.csumr   rq   r   Nr   r   cluster_profileextra_deletion_matrixr   cluster_deletion_mean)r1   r
   )r   r   maskmask_countsmsa_sumdel_sumr)   r   r*   summarize_clustersq  s   (r   c                 C   s  t jdgd |g dg t jd}| d }| d }t| d d}t| d	 d}|d
d
d
d
d
f | }|d
d
d
d
d
f | }|| }	|	|	jd |	jd |	jd  }	||jd |jd |jd  }
|	|
j }t jjj	d| dd}|t 
d||9 }t j|dd}|d7 }t 
d||}||7 }||d
d
d
d
f  }| d }| d }t 
d||| }||7 }||d
d
d
f  }|| d< || d< | S )zFAssign each extra MSA sequence to its nearest neighbor in sampled MSA.r   rf   rd   r-   r   r   r   r   r   Nr   rC   rN   g     @@rO   z
mr, nr->mnrB   znm, mrc->nrcr   r   z
nm, mc->ncr   r   )r    rG   r!   r
   r   r1   Tnn
functionalsoftmaxeinsumr   )batchr   r   r   
extra_maskr   r   msa_one_hot_maskedextra_one_hot_maskedt1t2r   cluster_assignmentcluster_countr   r   r   r   r   r   r)   r)   r*   nearest_neighbor_clusters_v2  sF   	"
r   c                 C   sD   d| vrt j| d jt jd| d< t j| d jd t jd| d< | S )z:Mask features are all ones, but will later be zero-padded.r   r   r-   r   r   r/   r2   r)   r)   r*   make_msa_mask  s   

r   c              	   C   s   | j d dkr?t| tjd }tjd }tjd }tt|d dgt|j  dg |d|d	d	f |d|d	d	f }n|j	g | j dR  }|d	urs| j d dkrht||d|f |d|f }||fS t
|  }||fS |S )
zCreate pseudo beta features.r   GCACB.NrC      .N)r1   r    eqrE   restype_order
atom_orderrj   tiler[   	new_zerosr   r   )r,   all_atom_positionsall_atom_maskis_glyca_idxcb_idxpseudo_betapseudo_beta_maskr)   r)   r*   pseudo_beta_fn  s&   

 
r   c                 C   sP   |dv sJ t | |rdnd | |d  | |rdnd \| |d < | |d < | S )	z9Create pseudo-beta (alpha for glycine) position and mask.)r  	template_r4   r,   r  rY   r  r  r  )r  )r   prefixr)   r)   r*   make_pseudo_beta  s   


r  c                 C   s   t || |< | S r7   r    rG   )r   keyvaluer)   r)   r*   add_constant_field  s   r  绽|=c                 C   s`   | j }|d }t| | d|g} t }|tjd tj| d|d}t||d d S )NrB   i  rC   )	generator)	r1   r    r   	Generatormanual_seedra   rg   r   multinomial)probsru   dsnum_classesgenr   r)   r)   r*   shaped_categorical  s   r   c                 C   s0   d| v r| S t | d d}tj|dd| d< | S )7Compute the HHblits MSA profile if not already present.hhblits_profiler      r   rO   )r
   r    mean)r   r   r)   r)   r*   make_hhblits_profile  s
   r%  c                 C   sL   t | d d}| d dddddf }||9 }|jdd|jddd  S )zCompute the MSA profile.r   r#  r   Nr   rO   r  )r
   r   )r   ohr   r)   r)   r*   make_msa_profile  s   r'  c                 C   s   d| v r| S t | | d< | S )r!  r"  )r'  r2   r)   r)   r*   make_hhblits_profile_v2  s   r(  c                 C   s   d|vr| S |d }|d }|d }|  }|dk}|D ]*}||k}t|| d }	|	dkrF||@ }
| d d |
f }|d|	| d d |f< q| S )Nnum_sym	entity_idsym_idrC   r   )r   r^   repeat)mask_positionr   r*  r+  r)  unique_entity_idsfirst_sym_maskcur_entity_idcur_entity_maskcur_num_symcur_sym_maskcur_sym_bert_maskr)   r)   r*   share_mask_by_entity  s$   r5  c                 C   sn  t jdgd ddg t jd}|j| |j| d   |jt| d d  }ttt	dd	 t
t|jD }d
|d
< d|j |j |j }|dksKJ t jjj|||d}| d j}	t tjj|	 |k }
|
| d  M }
d| v ry|
| d  M }
|rt|
| }
|rt |d }t|}nt|}t |
|| d }|| d  9 }|
t j| d< | d | d< || d< | S )z Create data for BERT on raw MSA.g?re   rd   r-   r"  r   r#  c                 S      g | ]}d qS )r   r   r)   .0r   r)   r)   r*   
<listcomp>@      z#make_masked_msa.<locals>.<listcomp>rC   r   )r  r   r   rq   r   )r    rG   r!   uniform_probprofile_prob	same_probr
   r   r   r   ranger[   r1   r   r   padrr   ra   rg   rh   boolr5  rt   r|   r   rj   rD   r   )r   r   replace_fractionr   
share_mask	random_aacategorical_probs
pad_shapes	mask_probshr-  rz   bert_msar)   r)   r*   make_masked_msa/  sB   


rJ  c              	      sX  ddd}|dur$d| v r| d j d n| d j d }||kr$||d}d	| v r8| d	 j d }||kr8||d
}t|t|t|t|i |  D ]c\}	|	dkrOqFtj }
||	 }d}t|
t|ksqJ | d|	 d|
 d|  fddt|
|D }fddt	|D }|
  ttj| }|rtjj|| |	< t| |	 || |	< qF| S )z;Guess at the MSA and sequence dimension to make fixed size.   c                 S   s   t || | d | | S )NrC   max)cur_size
multiplierr)   r)   r*   get_pad_sizej  s   z%make_fixed_size.<locals>.get_pad_sizeNr,   r   r   rC   r      r   z0Rank mismatch between shape and shape schema for z: z vs c                    s    g | ]\}}  |d p|qS r7   )get)r9  s1s2)pad_size_mapr)   r*   r:    s    z#make_fixed_size.<locals>.<listcomp>c                    s"   g | ]\}}d | j |  fqS r   r1   )r9  ip)r(   r)   r*   r:    s   " )rK  )r1   r   r   r   r   r   r   r[   zip	enumeratereverse	itertoolschainr    r   r   r@  r   )r   shape_schemamsa_cluster_sizeextra_msa_sizer   num_templatesrP  input_num_resinput_extra_msa_sizer'   r1   schemamsgpad_sizepaddingr)   )rV  r(   r*   make_fixed_size_  sF   



.
rj  c                 C   s   | d   | d< d| v rt| d tjdd}n%tj| d tjd}d| v r;| d }tj|ddd	d }d
||< | }t| d d}tj	|dd|g}tj
|dd| d< | S )$Create and concatenate MSA features.r,   rW   r   rC   r-   asym_lenrB   rO   Nr   rf   target_feat)rD   r    r   r   r!   r   cumsumr   r
   rH   r   )r   	has_breakrl  entity_endsaatype_1hotrm  r)   r)   r*   make_target_feat  s"   rr  c                 C   s   t | d d}t| d dd}t| d d dtj  }|tj|dd	tj|dd	g}d
| v rMt| d d dtj  }|| d
 tj|dd	g d| v rlt| d dd| d< t| d d dtj  | d< tj|dd	| d< | S )rk  r   r   r   rd   r         @       @rB   rO   r   r   r   extra_msa_has_deletionextra_msa_deletion_valuemsa_feat)	r
   r    r   atanra   pirH   extendr   )r   msa_1hothas_deletiondeletion_valuerw  deletion_mean_valuer)   r)   r*   make_msa_feat  s@   



r  c                 C   s   t | d d}| d }t|ddd }t|d dtj  d }t| d	 d dtj  d }|||| d
 |g}tj|dd| d< | S )rk  r   r   r   rd   r   r   rs  rt  r   r   rB   rO   rw  )r
   r    r   rx  ra   ry  arctanr   )r   r{  r   r|  r}  r~  rw  r)   r)   r*   make_msa_feat_v2  s"   r  c                 C   sz   | d d | }| d d | }t |dd}t |d dtj  }| d d | }|| d< || d< || d< || d	< | S )
Nr   r   rd   r   rs  rt  r   ru  rv  )r    r   rx  ra   ry  )r   num_extra_msar   r   r|  r}  r   r)   r)   r*   make_extra_msa_feat  s   r  c                    s    fdd|   D S )Nc                    s   i | ]\}}| v r||qS r)   r)   r9  r'   r(   feature_listr)   r*   
<dictcomp>  s    zselect_feat.<locals>.<dictcomp>)r   )r   r  r)   r  r*   select_feat  s   r  c                 C   s   d| v r| S t jtjt j| d jd}t jtjt j| d jd}t jtjt j| d jd}t jtj	t j| d jd}| d 
 }|| 
 | d< || 
 | d< || | d< || | d< | S )z>Construct denser atom positions (14 dimensions instead of 37).atom14_atom_existsr,   r"   deviceresidx_atom14_to_atom37residx_atom37_to_atom14atom37_atom_exists)r    rG   rE   restype_atom14_to_atom37r&   r  restype_atom37_to_atom14restype_atom14_maskr!   restype_atom37_maskrD   )r   r  r  r  r  protein_aatyper)   r)   r*   make_atom14_masks  sH   r  c                 C   s,   t dd | tj} t| }tdd |}|S )Nc                 S   
   t | S r7   r  )nr)   r)   r*   r=        
 z&make_atom14_masks_np.<locals>.<lambda>c                 S   r  r7   )ra   array)tr)   r)   r*   r=     r  )r   ra   ndarrayr  r   )r   outr)   r)   r*   make_atom14_masks_np  s   r  c           
   	   C   s8  | d   | d< | d  | d< | d  | d< | d }| d }|t| d |dt| d jdd d }|d	 t| d |d
t| d jdd
 d }|| d< || d< || d< tjtj| d j	| d j
d}|| d  }td||}|| d< td||}|| d< tjtj| d j	| d j
d}	|	| d  | d< | S )z?Constructs denser atom positions (14 dimensions instead of 37).r,   r  r  r  r  rB   NrP   num_batch_dimsr   atom14_gt_existsatom14_gt_positionsr  z...rac,...rab->...rbcatom14_alt_gt_positionsz...ra,...rab->...rbatom14_alt_gt_existsatom14_atom_is_ambiguous)rD   r   r	   r[   r1   r    rG   rE   renaming_matricesr"   r  r   restype_atom14_is_ambiguous)
r   residx_atom14_maskr  residx_atom14_gt_maskresidx_atom14_gt_positionsr  renaming_transformalternative_gt_positionsalternative_gt_maskr  r)   r)   r*   make_atom14_positions!  s^   r  :0yE>c                    s  | d }| d }| d }t |jd d }tjg ddtd}g d|d d d	d d f< g d
|d d dd d f< ttjD ]-\}}tj| }	t	dD ]}
tj
| |
 rjtj|	 |
 }|dd  |||
d d d f< qKq>|g |jd d ddR }d|d< d|d< |tj
|dd ddd f< tj  d	 d< t fdd}||}||}|jg d| |jR  }t||d|d}t||dt |jd d d}tj|dd	d d f |ddd d f |ddd d f |d}t||d|d}t||dt |jd d d}tj|ddd	 | }tjd|j|jd}t|g d| dddR }d|d< d|d< t|d }|t|d }|jg d| ddR  }tjd|j|jd}t|g d| ddddR }tj D ]7\}	}tjtj |	  }t!t"tj
| d }
d|d||
d f< d|d||
d ddf< d|d||
d ddf< qst||d|d}t||d!|d}t|d }|t|d }|# }|# }|| d"< || d#< || d$< || d%< || d&< | S )'Nr,   r  r  rB   )rf   rQ  r  r  r-   )Cr   Nr   )r   r  Or  rK  rC   rf   rQ  .r   ).r  .re   c                    s    |  S r7   r)   r8   lookuptabler)   r*   r=     s    z"atom37_to_frames.<locals>.<lambda>r   r  r  rN   )p_neg_x_axisorigin
p_xy_planeepsrO   r  ).r   r   r   ).r   rN   rN   )matrigidgroups_gt_framesrigidgroups_gt_existsrigidgroups_group_existsrigidgroups_group_is_ambiguousrigidgroups_alt_gt_frames)$r[   r1   ra   fullobjectr\  rE   restypesrestype_1to3r?  chi_angles_maskchi_angles_atomsr  
new_tensorr  copy	vectorizer   r	   r   from_3_pointsr    r   eyer"   r  r  r   composeresidue_atom_renaming_swapsr   r  restype_3to1r^   r   to_tensor_4x4)r   r  r,   r  r  
batch_dims"restype_rigidgroup_base_atom_namesrestyperestype_letterresnamechi_idxnamesrestype_rigidgroup_masklookup"restype_rigidgroup_base_atom37_idx!residx_rigidgroup_base_atom37_idxbase_atom_pos	gt_framesgroup_existsgt_atoms_exist	gt_existsrotsrestype_rigidgroup_is_ambiguousrestype_rigidgroup_rotsr   residx_rigidgroup_is_ambiguousresidx_rigidgroup_ambiguity_rotalt_gt_framesgt_frames_tensoralt_gt_frames_tensorr)   r  r*   atom37_to_framesb  s   




"

r  c              	   C   s8  | |d  }| |d  }| |d  }|j d dkrL|j }|jg |ddR  | |d < |jg |ddR  | |d	 < |jg |dR  | |d
 < | S tj|dd}|g |j d d ddd}tj||dd dd d d d f gdd}|g |j d d dd}tj||dd dd d f gdd}tj|dddd d f |dd dd d f gdd}	tj|dddd d f |dd dd d f gdd}
tj|dd dd d f |dddd d f gdd}tj|dddf ddtj|dd df dd }|d tj|dd df d|jd }tj|dd df d|jd|d  }tjtj	|j
d}|d|d d d d f }t||dt|j d d }ttj}|g d ||}||d d f }t||dt|j d d d}tj|d|jd}|| }tj|	dd d d d d f |
dd d d d d f |dd d d d d f |gdd}tj|d |d |d |gdd}tj|ddd d f |ddd d f |ddd d f dd}| |ddd d f }tj|d |d gdd}ttjt|d|jd d!d }|| }||g d"d#t|j d d  td d f   }|tj|df }tj|jg |j dR  d$d%|  gdd}||d  }|d&krtjt|j d d t|j d d gdd}||d  |d|d    }||d  |d|d    }|| |d < || |d	 < || |d
 < | S )'Nr,   r  r  rB   r      rN   torsion_angles_sin_cosalt_torsion_angles_sin_costorsion_angles_maskre   rL  r  rC   %   r  .rO   r  rK     ).rN   )rP   r"   ).rK  r  )rd   rd   rd   rd   r  r   r  )r  ).rC   T)rP   r"   keepdims)r   r   g      r   r   r   r   r7   r   rt  r  )r1   r  r    clampr   prodr"   	as_tensorrE   chi_atom_indicesr  r	   r[   r   r  appendr  r   r  invertapplystacksqrtr   squareslicechi_pi_periodicnew_onesr0   r   )r   r  r,   r  r  
base_shaper@  prev_all_atom_positionsprev_all_atom_maskpre_omega_atom_posphi_atom_pospsi_atom_pospre_omega_maskphi_maskpsi_maskr  atom_indiceschis_atom_posr  	chis_maskchi_angle_atoms_masktorsions_atom_posr  torsion_framesfourth_atom_rel_posr  denomchi_is_ambiguousmirror_torsion_anglesr  placeholder_torsionsr)   r)   r*   atom37_to_torsion_angles  sP    $***







	
r  c                 C   s4   | d ddd d d d f | d< | d d | d< | S )Nr  .r   true_frame_tensorr  r  
frame_maskr)   r2   r)   r)   r*   get_backbone_frames  s
   
r  c                 C   sP   | d j }| d ddd d d f || d< | d ddd f || d< | S )Nr  r  .r  chi_angles_sin_cosr  chi_mask)r"   r   )r   r"   r)   r)   r*   get_chi_angles  s
   
r  c                 C   s  d| v r| d j d }nd}|dkr|r0t|tjd|d }tjtjj||ddtjd}ntj	t||tjd}| 
 D ]H\}}|drz|| }W n4 ty } z(t|j| td	| t||j  td
|  tddd | 
 D  W Y d }~nd }~ww || |< q?| S )Nr5   rB   r   rC   Fr   r-   templaterc  zprotein:zprotein_shape:c                 S   s$   i | ]\}}d t |v r||jqS rX  )dirr1   r  r)   r)   r*   r    s    z"crop_templates.<locals>.<dictcomp>)r1   r   ra   rg   r   r    rG   r   r&   r   r   
startswith	Exceptionprint	__class__)r   max_templatessubsample_templatesrc  template_idxr'   r(   exr)   r)   r*   crop_templates  sD   



r#  c                 C   s@   d| v r| d j d n| d j d }t|||}t| ||} | S )crop to size.r,   r   r   rC   )r1   get_single_crop_idxapply_crop_idx)r   	crop_sizer`  seedr   crop_idxr)   r)   r*   crop_to_size_single  s   r*  c           	      C   s   t j|dd tj |k }W d   n1 sw   Y  d| v o'| d dk}|r3t|||d| S |r=t| |||}nt| ||}t| ||S )r$  multimer_cropr  Nr   rC   )r'  r`  r(  )	r   
numpy_seedra   rg   rh   r*  get_spatial_crop_idxget_contiguous_crop_idxr&  )	r   r'  r`  r(  spatial_crop_probca_ca_thresholduse_spatial_cropr   r)  r)   r)   r*   crop_to_size_multimer  s(   
r3  r   r'  random_seedreturnc                 C   sj   | |k r	t | S t| ttjd| | d }t ||| W  d    S 1 s.w   Y  d S )Nr   rC   )r    r   r   r-  r^   ra   rg   r   )r   r'  r4  
crop_startr)   r)   r*   r%    s   
$r%  rl  use_multinomialc              	   C   sD  |st j|dd tjt| }W d   n1 sw   Y  |  }t|}dd | D }t	|D ]K\}}	| |	 }
||
8 }t
||
}t
|
td|| }t j||dd ttjjt|t|d d	}W d   n1 ssw   Y  ||8 }|||	< q5t|}|S | t|  }ttjj||d
}t
|| }|S )z"get crop sizes for contiguous cropmultimer_contiguous_permr,  Nc                 S   r6  rW  r)   r8  r)   r)   r*   r:    r;  z-get_crop_sizes_each_chain.<locals>.<listcomp>r   multimer_contiguous_crop_sizerC   )lowhigh)pvals)r   r-  ra   rg   r   r[   r   r    rG   r\  r   rM  r^   r   rr   r  )rl  r'  r4  r7  shuffle_idxnum_left
num_budget
crop_sizesjidxthis_lenmax_sizemin_sizethis_crop_sizeentity_probsr)   r)   r*   get_crop_sizes_each_chain  sB   



rH  c              	   C   s   | d j d }||krt|S d| v sJ | d }t||||}g }tjdtjd}tj|dd2 t||D ]$\}	}
t	j
dt|	|
 d }|t|| || |
  ||	7 }q8W d    n1 sgw   Y  t|S )Nr,   r   rl  r-   "multimer_contiguous_crop_start_idxr,  rC   )r1   r    r   rH  rG   r&   r   r-  r[  ra   rg   r   r^   r  r   )r   r'  r4  r7  r   rl  r@  	crop_idxsasym_offsetllcsz
this_startr)   r)   r*   r/    s0   




	r/       L@r1  infc                 C   s4  t jd }| d d|d d f }| d d|f  }|jdddk r*t| ||S |d |dd d d f  }t|}	t|	| d	 ||}
t	|
rit
j|d
d ttj|
}W d    n1 scw   Y  nt| ||S |	| }||| < tjd|jd |jd d }||7 }t|d | }| jS )Nr   r  .r  rB   rO   rC   r   asym_idmultimer_spatial_cropr,  r   r  gMbP?)rE   r  rA  r   allr/  get_pairwise_distancesget_interface_candidatesr    anyr   r-  r^   ra   rg   r   r   r1   r  r   r~   r   values)r   r'  r4  r1  rP  r
  	ca_coordsca_mask	pair_maskca_distancesinterface_candidates
target_resto_target_distances	break_tieretr)   r)   r*   r.  .  s<   



r.  coordsc                 C   s,   |  d|  d }ttj|d ddS )Nr  r  rN   rB   rO   )rH   r    r  r   )ra  
coord_diffr)   r)   r*   rT  W  s   rT  r[  rQ  rZ  c                 C   s\   |d |dd d d f k}| d|    | } tj| dk| |k @ dd}|jddd }|S )	Nr   .r   r   rB   rO   T)as_tuple)r   r    r   nonzero)r[  rQ  rZ  r1  in_same_asymcnt_interfacesr\  r)   r)   r*   rU  \  s   rU  c                 C   sX   i }|   D ]#\}}||vrqt|| D ]\}}|tkr$t|||}q|||< q|S r7   )r   r\  r   r    r   )r   r`  r)  cropped_proteinr'   r(   rY  dim_sizer)   r)   r*   r&  l  s   
r&  r7   )FF)rd   )r  )r  r7  )r  )F)NF)rO  )`r^  	functoolsr   r   operatorr   typingr   r   r   numpyra   r    unicore.datar   unicore.utilsr	   r
   r   r   (modelscope.models.science.unifold.configr   r   r   r   &modelscope.models.science.unifold.datar   rE   /modelscope.models.science.unifold.modules.framer   r   strr  	NumpyDict	TorchDict__annotations__r   r+   r3   r6   rA   rM   rc   rp   rx   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r   r%  r'  r(  r5  rJ  rj  rr  r  r  r  r  r  r  r  r  r  r  r  r#  r*  r3  r^   Tensorr%  rA  rH  r/  r   r.  rT  rU  r&  r)   r)   r)   r*   <module>   s&  

"

"



#3



	/6

%
A   	-





)
#
)
