o
    ॵi                     @   s`   d Z ddlZddlZddlmZ ddlmZ ddlmZ dee	 de	fd	d
Z
G dd dZdS )zA Python wrapper for Kalign.    N)Sequence)logging   )utils	sequencesreturnc                 C   s^   dd t dt| d D }g }t| |D ]\}}|d| d  ||d  qd|S )z"Converts sequences to an a3m file.c                 S   s   g | ]}d | qS )zsequence %d ).0ir   r   f/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/science/unifold/msa/tools/kalign.py
<listcomp>   s    z_to_a3m.<locals>.<listcomp>r   >
 )rangelenzipappendjoin)r   namesa3msequencenamer   r   r   _to_a3m   s   
r   c                   @   s4   e Zd ZdZdefddZdee defddZd	S )
Kalignz$Python wrapper of the Kalign binary.binary_pathc                C   s
   || _ dS )zInitializes the Python Kalign wrapper.

        Args:
            binary_path: The path to the Kalign binary.

        Raises:
            RuntimeError: If Kalign binary not found within the path.
        N)r   )selfr   r   r   r   __init__%   s   
	zKalign.__init__r   r   c              	   C   s  t dt| |D ]}t|dk rtd|t|f q
t }tj|d}tj|d}t	|d}|
t| W d   n1 sGw   Y  | jd|d	|d
dg}t dd| tj|tjtjd}td | \}	}
| }t d|	d|
d W d   n1 sw   Y  |rtd|	d|
df t	|}| }W d   n1 sw   Y  |W  d   S 1 sw   Y  dS )ab  Aligns the sequences and returns the alignment in A3M string.

        Args:
            sequences: A list of query sequence strings. The sequences have to be at
                least 6 residues long (Kalign requires this). Note that the order in
                which you give the sequences might alter the output slightly as
                different alignment tree might get constructed.

        Returns:
            A string with the alignment in a3m format.

        Raises:
            RuntimeError: If Kalign fails.
            ValueError: If any of the sequences is less than 6 residues long.
        zAligning %d sequences   zSKalign requires all sequences to be at least 6 residues long. Got %s (%d residues).zinput.fastaz
output.a3mwNz-iz-oz-formatfastazLaunching subprocess "%s" )stdoutstderrzKalign queryzKalign stdout:
%s

stderr:
%s
zutf-8z%Kalign failed
stdout:
%s

stderr:
%s
)r   infor   
ValueErrorr   tmpdir_managerospathr   openwriter   r   
subprocessPopenPIPEtimingcommunicatewaitdecodeRuntimeErrorread)r   r   squery_tmp_dirinput_fasta_pathoutput_a3m_pathfcmdprocessr"   r#   retcoder   r   r   r   align0   s^   



	

$zKalign.alignN)__name__
__module____qualname____doc__strr   r   r<   r   r   r   r   r   "   s    r   )r@   r'   r+   typingr   abslr   r   r   rA   r   r   r   r   r   r   <module>   s   
