o
    ॵiL                  	   @   sx   d dl Z d dlmZmZ d dlZd dlmZ d dlmZ dee	 dee	 fddZ
d	e	d
e	dee	 dee	 fddZdS )    N)MappingSequence)logging)protein	sequencesdescriptionsc                 C   s   g }| D ]}||vr| | qdd ttj|D }g }t| |D ]\}}tj|| }|| d  | | | q"||fS )zx
    Makes a mapping from PDB-format chain ID to sequence and description,
    and parses the order of multi-chains
    c                 S   s   i | ]
\}}|g |d qS ))r   sequence ).0chain_idseqr	   r	   _/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/science/unifold/msa/utils.py
<dictcomp>   s    z$get_chain_id_map.<locals>.<dictcomp>r   )appendzipr   PDB_CHAIN_IDSindex)r   r   unique_seqsr   chain_id_mapchain_orderdesr   r	   r	   r   get_chain_id_map   s   

r   
fasta_nameoutput_dir_basec              	   C   s  t |t |krtdt | dt | dt |tjkr(tdt | dt||\}}tj|| }tj|sAt	| tj|d}t
|d}tj||dd	d
 W d   n1 saw   Y  tj|d}	t
|	d}|d| W d   n1 sw   Y  tdd| g }
g }| D ]G}| d| }tj||d }d|}|| d }t
|d}|d| d |  W d   n1 sw   Y  |
| || q|
|fS )zz
    Divides the multi-chains fasta into several single fasta files and
    records multi-chains mapping information.
    z7sequences and descriptions must have equal length. Got z != .z=Cannot process more chains than the PDB format supports. Got z chains.zchain_id_map.jsonw   T)indent	sort_keysNz
chains.txt z/Mapping multi-chains fasta with chain order: %sz_{}z.fastazchain_{}r   >
)len
ValueErrorr   PDB_MAX_CHAINSr   ospathjoinexistsmakedirsopenjsondumpwriter   infokeysformatr   )r   r   r   r   r   r   
output_dirchain_id_map_pathfchain_order_path
temp_names
temp_pathsr   	temp_name	temp_pathr   r   r	   r	   r   divide_multi_chains+   sP   




r9   )r%   typingr   r   r+   abslr   &modelscope.models.science.unifold.datar   strr   r9   r	   r	   r	   r   <module>   s&   
