o
    پiZ                     @   s   d dl Z d dlmZ d dlmZmZmZmZ eG dd dZeG dd dZ	de
d	ee
 d
ee
e
f fddZdde
ded
ee	 fddZG dd dZG dd dZG dd dZdS )    N)	dataclass)IteratorListOptionalTuplec                   @   s.   e Zd ZU dZeed< eed< dZeed< dS )Eventz2Represents a parsed event from the Harmony stream.
event_typecontentNraw_text)__name__
__module____qualname____doc__str__annotations__r
    r   r   T/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/parser/harmony_parser.pyr      s
   
 r   c                   @   s*   e Zd ZU dZeed< eed< eed< dS )Tokenz)A structural token in the Harmony format.typestartendN)r   r   r   r   r   r   intr   r   r   r   r      s
   
 r   texttokensreturnc                 C   s   | sdS d}|D ]*}|sqt t|d t| }t|ddD ]}|| | d r1t||} nqq|dkr;| dfS | d|  | | d fS )z~
    Holds back the longest suffix of `text` that could be a prefix of any token.
    Returns (emit_now, keep_for_later).
    ) r   r      Nr   )minlenrange
startswithmax)r   r   max_holdtokLkr   r   r   prefix_hold   s    
r'   	start_posc                 #   s   dddddddd}|}d	}|t | k r| d
|}|dkr n||kr+td||V  d	}| D ]\}}| ||rPt|||t | V  |t | }d} nq1|s| |d  t fdd|D }	|	rttd|t | V  t | }nId}td||d V  | d|d }
|
dkr| d
|
d }|dkrtd|d |V  |}ntd|d t | V  t | }n
|d }|t | k s|t | k rtd|t | V  dS |t | kr|r| D ]}| |rtd||V   dS qdS dS dS )z6Iterate over structural tokens in left-to-right order.STARTCHANNELMESSAGE	CONSTRAINENDCALLRETURN	<|start|><|channel|>z<|message|>z<|constrain|>z<|end|><|call|>z
<|return|>Fz<|r   TEXTTNc                 3   s    | ]}|  V  qd S N)r!   ).0littailr   r   	<genexpr>Q   s    ziter_tokens.<locals>.<genexpr>   z|>)r   findr   itemsr!   anykeysendswith)r   r(   TOKENSposhas_unknown_tokens
marker_posfound_tokenliteral
token_type
is_partial	close_posnext_markerr   r8   r   iter_tokens.   sn   
4
rK   c                   @   s   e Zd ZdZdd Zdedeee ef fddZ	dedee
 d	edeeeef  fd
dZdedee fddZdedee
 d	edeeee ef  fddZdedee
 dedefddZdedefddZdS )CanonicalStrategyz9Parses the canonical Harmony format with channel markers.c                 C   s   g d| _ d S )Nr0   )guard_tokensselfr   r   r   __init__~   s   zCanonicalStrategy.__init__r   r   c                 C   s  g }t t|}|s|dfS d}|t|k r|| }|jdkri|t|d krCt||j|j | j\}}|r?|t	d| ||fS | 
|||rO|d7 }n||j|j }| |sd|t	d| |d7 }nf|jdv r| |||}	|	d u r| |||}
|
r|
\}}|| ||fS || j}|||d  fS |	\}}|r|| |}n%| 
|||r|d7 }n||j|j }| |s|t	d| |d7 }|t|k s|dfS )Nr   r   r4   r   normal)r)   r*   )listrK   r   r   r'   r   r   rM   appendr   $_is_commentary_filler_between_blocks_is_standalone_structural_token_parse_block_parse_partial_analysis)rO   r   eventsr   rB   tokenemitholdr	   block_resultpartial_resulteventremaining_textremaining_startnew_posr   r   r   parse   sV   









6zCanonicalStrategy.parser   r(   c                 C   s  |}|t |k r|| jdkr|d7 }d}d}t|t |D ]}|| jdkr.|du r.|}q|| jdkr9|} nq|du sB|du rDdS |d t |k rS||d  jn|| j}|| j}	|||	 }
| |
}|dkrndS || j}||d }||| j| }td||fS )z@Try to parse partial analysis content for incremental streaming.r)   r   Nr*   r+   analysis	reasoning)r   r   r    r   r   _extract_channel_typer   )rO   r   r   r(   rB   channel_posmessage_posichannel_startchannel_endchannel_headerchannel_typecontent_startr	   r_   r   r   r   rW      s6   


z)CanonicalStrategy._parse_partial_analysisheader_textc                 C   sB   |  }| drdS | drdS | drdS dS )z[Extract channel type from header, ignoring other attributes like to=... or <|constrain|>...rc   
commentaryfinalN)striplowerr!   )rO   rn   header_cleanr   r   r   re      s   z'CanonicalStrategy._extract_channel_typec                 C   s  |}|t |k r|| jdkr|d7 }d}d}t|t |D ]}|| jdkr.|du r.|}q|| jdkr9|} nq|du r@dS |du rz|| j}d}	t|d t |D ]}|| jdv ra|}	 nqT|	du rhdS ||||	 j }
td|
|	d fS |d }|t |k r|| jn||d  j}|| j}||| }| |}|sdS |d }|| j}|}|dkr|t |k r|| jd	kr|d7 }|t |k r|| jd	ksnE|d
kr|t |k r|| jdvr|d7 }|t |k r|| jdvsn"|t |k r|| jdvr|d7 }|t |k r|| jdvs|t |kr2|dkr0||d }
td|
|fS dS || }|||j }
|d
krg|jdkr^||| j|j }td|
 ||d fS td|
|d fS |dkr|jdkr||| j|j }td|
 ||d fS td|
|d fS |dkr|
}|jd	kr|d t |k r||d  }|jdkr|||j|j 7 }td||d fS td||d fS d|d fS )zGParse a channel block. Returns (event, next_pos) or None if incomplete.r)   r   Nr*   r+   )r-   r.   r/   rQ   rp   r/   rc   )r-   r.   r.   	tool_callrd   ro   r4   r;   )r   r   r    r   r   r   re   rq   )rO   r   r   r(   rB   rf   rg   rh   rm   end_token_posr	   ri   rj   rk   rl   end_pos	end_tokenr
   final_content
next_tokenr   r   r   rV     s   
$






zCanonicalStrategy._parse_blockrB   c                 C   s   || }||j |j  }|dkr8|d t|k r8||d  }||d  }|jdkr8|jdkr8| dkr8dS |dkr[||d  }|jdkr[|jdkrNdS |jdkr[| dkr[dS d	S )
z`Check if this is commentary filler text or problematic structural tokens in malformed sequences.r   r   r.   r*   ro   Tr+   r4   F)r   r   rq   r   r   rr   )rO   r   r   rB   current_tokencurrent_text
prev_tokenry   r   r   r   rT   l  s$   




z6CanonicalStrategy._is_commentary_filler_between_blocksr	   c                 C   s   |  }g d}||v S )zOCheck if content is just a standalone structural token that should be filtered.r0   )rq   )rO   r	   content_strippedstructural_tokensr   r   r   rU     s   	z1CanonicalStrategy._is_standalone_structural_tokenN)r   r   r   r   rP   r   r   r   r   rb   r   r   r   rW   re   rV   boolrT   rU   r   r   r   r   rL   {   sB    @
-
h
(rL   c                   @   sD   e Zd ZdZdd ZdefddZdedeee	 ef fd	d
Z
dS )TextStrategyz.Parses the text-based Harmony fallback format.c                 C   sH   d| _ tdtjtjB tdtjtjB tdtjtjB d| _d S )Nr   zK^\s*(?:assistant)?\s*(analysis|commentary)(.*?)\s*assistantfinal\s*(.*)\s*$z^\s*assistantfinal\s*(.*)\s*$z2^\s*(?:assistant)?\s*(analysis|commentary)(.*)\s*$)analysis_then_final
final_onlyanalysis_only)buffer_contextrecompile
IGNORECASEDOTALLpatternsrN   r   r   r   rP     s   

zTextStrategy.__init__bufferc                 C   s
   || _ d S r5   )r   )rO   r   r   r   r   set_buffer_context  s   
zTextStrategy.set_buffer_contextr   r   c                 C   s$  g }| j d |}|rN| \}}}| dkr(| r(|td|  n| dkr<| r<|td|  | rJ|td|  |dfS td|tj	rf| }d|v rfd	|vrf||fS | j d
 |}|r|
d}| r|td|  |dfS | j d |}|r| \}}t|d	g\}	}
| dkr|	r|td|	 |
r||d |d |
 fS ||fS | dkr|	r|
r|	n|	 }|td| |
r||d |d |
 fS |dfS ||d |d |
 fS t|g d\}	}
|	r|td|	 ||
fS )Nr   rc   rd   ro   rQ   r   z.(?:^|\s)(?:assistant)?\s*(analysis|commentary)assistantfinassistantfinalr   r   r   r;   )rc   ro   r   )r   matchgroupsrr   rq   rS   r   r   searchr   groupr'   r   )rO   r   rX   mchannelrd   rp   lowr	   rZ   r[   content_outr   r   r   rb     sV   
zTextStrategy.parseN)r   r   r   r   rP   r   r   r   r   r   rb   r   r   r   r   r     s
    "r   c                   @   s.   e Zd ZdZdd Zdedee fddZdS )	HarmonyParserz@Facade for parsing Harmony format, switching between strategies.c                 C   s   d | _ d| _d| _d| _d S )Nr   F)strategy_buffer_should_filter_commentary_partial_commentaryrN   r   r   r   rP     s   zHarmonyParser.__init__chunkr   c           	      C   sN  |  j |7  _ | jd u r+d| j v sd| j v rt | _ntd| j tjr)t | _ng S t| jdr8| j| j  | j	| j \}}| j 
 d}|| _ g }|D ]T}d}|jdkr| js_| jr| j|j   }|dkrvd	}d
| _d| _nd|rd	}|| _n
d
| _d| _nd
| _|rqP|jdkrd	| _d
| _n|rd	| _|| qP|S )Nr2   r1   z=(?:^|\s)(?:assistant)?\s*(analysis|commentary|assistantfinal)r   r3   FrQ   ro   Tr   rt   )r   r   rL   r   r   r   r   hasattrr   rb   rstripr@   r   r   r   r	   rq   rr   r!   rS   )	rO   r   rX   	remainingbuffer_has_call_tokenfiltered_eventsr^   should_filterpotential_commentaryr   r   r   rb     sX   





zHarmonyParser.parseN)	r   r   r   r   rP   r   r   r   rb   r   r   r   r   r     s    
r   )r   )r   dataclassesr   typingr   r   r   r   r   r   r   r'   r   rK   rL   r   r   r   r   r   r   <module>   s    "M  *R