o
    fw%                     @   s   d dl mZ d dlZd dlZzddlmZ W n ey0   e dkr(ed d dl	mZ Y nw ddl
mZ ejejejd	d
 Zejejejdd Zd$ddZejd%ddZd&ddZd&ddZejd%ddZd&ddZd&ddZd&ddZd$ddZd&d d!Zd$d"d#ZdS )'    )unicode_literalsN   )StringMatcherPyPyzYUsing slow pure-python SequenceMatcher. Install python-Levenshtein to remove this warning)SequenceMatcher)utilsc                 C   s.   t | |\} }td | |}t d|  S )Nd   )r   make_type_consistentr   intrratio)s1s2m r   H/home/ubuntu/webapp/venv/lib/python3.10/site-packages/fuzzywuzzy/fuzz.pyr      s   r   c                 C   s   t | |\} }t| t|kr| }|}n|}| }td||}| }g }|D ]8}|d |d  dkr;|d |d  nd}|t| }	|||	 }
td||
}| }|dkrZ dS || q't dt| S )zR"Return the ratio of the most similar substring
    as a number between 0 and 100.Nr   r   gףp=
?r   )	r   r	   lenr   get_matching_blocksr   appendr
   max)r   r   shorterlongerr   blocksscoresblock
long_startlong_endlong_substrm2rr   r   r   partial_ratio   s&   (r   Tc                 C   s4   |r	t j| |dn| }| }dt|}| S )z*Return a cleaned string with token sorted.force_ascii )r   full_processsplitjoinsortedstrip)sr!   r#   tstokenssorted_stringr   r   r   _process_and_sortK   s   r,   c                 C   s4   t | ||d}t |||d}|rt||S t||S )Nr#   )r,   r   r   )r   r   partialr!   r#   sorted1sorted2r   r   r   _token_sortZ   s
   

r1   c                 C      t | |d||dS )zpReturn a measure of the sequences' similarity between 0 and 100
    but sorting the token before comparing.
    Fr.   r!   r#   r1   r   r   r!   r#   r   r   r   token_sort_ratioe      r6   c                 C   r2   )z}Return the ratio of the most similar substring as a number between
    0 and 100 but sorting the token before comparing.
    Tr3   r4   r5   r   r   r   partial_token_sort_ratiol   r7   r8   c                 C   s  |s| |krdS |rt j| |dn| }|rt j||dn|}t |s%dS t |s,dS t| }t| }||}	||}
||}dt|	}dt|
}dt|}|d | }|d | }|	 }|	 }|	 }|ryt
}nt}|||||||||g}t|S )a	  Find all alphanumeric tokens in each string...
        - treat them as a set
        - construct two strings of the form:
            <sorted_intersection><sorted_remainder>
        - take ratios of those two strings
        - controls for unordered partial matchesr   r    r   r"   )r   r#   validate_stringsetr$   intersection
differencer%   r&   r'   r   r   r   )r   r   r.   r!   r#   p1p2tokens1tokens2r;   diff1to2diff2to1sorted_sectsorted_1to2sorted_2to1combined_1to2combined_2to1
ratio_funcpairwiser   r   r   
_token_sets   s:   	




rJ   c                 C   r2   )NFr3   rJ   r5   r   r   r   token_set_ratio      rL   c                 C   r2   )NTr3   rK   r5   r   r   r   partial_token_set_ratio   rM   rN   c                 C   sP   |rt j| |d}t j||d}n| }|}t |sdS t |s#dS t||S )a  
    Quick ratio comparison between two strings.

    Runs full_process from utils on both strings
    Short circuits if either of the strings is empty after processing.

    :param s1:
    :param s2:
    :param force_ascii: Allow only ASCII characters (Default: True)
    :full_process: Process inputs, used here to avoid double processing in extract functions (Default: True)
    :return: similarity ratio
    r    r   )r   r#   r9   r   )r   r   r!   r#   r=   r>   r   r   r   QRatio   s   


rO   c                 C      t | |d|dS )z
    Unicode quick ratio

    Calls QRatio with force_ascii set to False

    :param s1:
    :param s2:
    :return: similarity ratio
    Fr!   r#   )rO   r   r   r#   r   r   r   UQRatio   s   
rS   c                 C   s$  |rt j| |d}t j||d}n| }|}t |sdS t |s#dS d}d}d}t||}	ttt|t|tt|t| }
|
dk rHd}|
dkrNd	}|rwt||| }t	||dd
| | }t
||dd
| | }t t|	|||S t||dd
| }t||dd
| }t t|	||S )aj  
    Return a measure of the sequences' similarity between 0 and 100, using different algorithms.

    **Steps in the order they occur**

    #. Run full_process from utils on both strings
    #. Short circuit if this makes either string empty
    #. Take the ratio of the two processed strings (fuzz.ratio)
    #. Run checks to compare the length of the strings
        * If one of the strings is more than 1.5 times as long as the other
          use partial_ratio comparisons - scale partial results by 0.9
          (this makes sure only full results can return 100)
        * If one of the strings is over 8 times as long as the other
          instead scale by 0.6

    #. Run the other ratio functions
        * if using partial ratio functions call partial_ratio,
          partial_token_sort_ratio and partial_token_set_ratio
          scale all of these by the ratio based on length
        * otherwise call token_sort_ratio and token_set_ratio
        * all token based comparisons are scaled by 0.95
          (on top of any partial scalars)

    #. Take the highest value from these results
       round it and return it as an integer.

    :param s1:
    :param s2:
    :param force_ascii: Allow only ascii characters
    :type force_ascii: bool
    :full_process: Process inputs, used here to avoid double processing in extract functions (Default: True)
    :return:
    r    r   Tgffffff?g?g      ?F   g333333?r-   )r   r#   r9   r   floatr   r   minr   r8   rN   r
   r6   rL   )r   r   r!   r#   r=   r>   try_partialunbase_scalepartial_scalebase	len_ratior.   ptsorptsertsortserr   r   r   WRatio   sD   #


(r`   c                 C   rP   )zReturn a measure of the sequences' similarity between 0 and 100,
    using different algorithms. Same as WRatio but preserving unicode.
    FrQ   )r`   rR   r   r   r   UWRatio.  s   ra   )T)TTT)TT)
__future__r   platformwarningsr   r   ImportErrorpython_implementationwarndifflib r   check_for_nonecheck_for_equivalencecheck_empty_stringr   r   r,   r1   r6   r8   rJ   rL   rN   rO   rS   r`   ra   r   r   r   r   <module>   s@   

)



4

	

N