
    -i.                         S SK Jr  S SKJr  S SKJr  S SKrSSKJ	r	J
r
JrJrJr  SSKJr  SSS	.S
 jrSS jr " S S\5      rS r " S S\5      rS rS rSS.S jrSS jr " S S\5      rS rg)    )Counter)suppress)
NamedTupleN   )_isin_searchsorteddeviceget_namespacexpx)is_scalar_nanFreturn_inversereturn_countsc                R    U R                   [        :X  a
  [        XUS9$ [        XUS9$ )a)  Helper function to find unique values with support for python objects.

Uses pure python method for object dtype, and numpy method for
all other dtypes.

Parameters
----------
values : ndarray
    Values to check for unknowns.

return_inverse : bool, default=False
    If True, also return the indices of the unique values.

return_counts : bool, default=False
    If True, also return the number of times each unique item appears in
    values.

Returns
-------
unique : ndarray
    The sorted unique values.

unique_inverse : ndarray
    The indices to reconstruct the original array from the unique array.
    Only provided if `return_inverse` is True.

unique_counts : ndarray
    The number of times each of the unique values comes up in the original
    array. Only provided if `return_counts` is True.
r   )dtypeobject_unique_python
_unique_np)valuesr   r   s      H/var/www/html/venv/lib/python3.13/site-packages/sklearn/utils/_encode.py_uniquer      s6    > ||v
 	
 ]     c                 <   [        U 5      u  p4Su  pVU(       a  U(       a  UR                  U 5      u  ptpVOGU(       a  UR                  U 5      u  puO,U(       a  UR                  U 5      u  pvOUR	                  U 5      nUR
                  (       a`  [        US   5      (       aM  [        XsR                  US9nUSUS-    nU(       a  XXX:  '   U(       a  UR                  XhS 5      Xh'   USUS-    nU4n	U(       a  X4-  n	U(       a  X4-  n	[        U	5      S:X  a  U	S   $ U	$ )zHelper function to find unique values for numpy arrays that correctly
accounts for nans. See `_unique` documentation for details.)NNxpNr   r   )r
   
unique_allunique_inverseunique_countsunique_valuessizer   r   nansumlen)
r   r   r   r   _inversecountsuniquesnan_idxrets
             r   r   r   =   s    &!EB OG-&(mmF&;#GV	,,V4	**62""6* ||gbk22B7-GaK()0G%& ffVH%56FOMgk*F*CzyX]3q6++r   c                   4    \ rS rSr% Sr\\S'   \\S'   S rSrg)MissingValuesd   z'Data class for missing data informationr"   nonec                     / nU R                   (       a  UR                  S5        U R                  (       a  UR                  [        R                  5        U$ )z3Convert tuple to a list where None is always first.N)r.   appendr"   np)selfoutputs     r   to_listMissingValues.to_listj   s6    99MM$88MM"&&!r    N)	__name__
__module____qualname____firstlineno____doc__bool__annotations__r4   __static_attributes__r6   r   r   r,   r,   d   s    1	I
Jr   r,   c                     U  Vs1 s H  ob  [        U5      (       d  M  UiM     nnU(       d  U [        SSS94$ SU;   a%  [        U5      S:X  a  [        SSS9nO[        SSS9nO
[        SSS9nX-
  nXC4$ s  snf )a  Extract missing values from `values`.

Parameters
----------
values: set
    Set of values to extract missing from.

Returns
-------
output: set
    Set with missing values extracted.

missing_values: MissingValues
    Object with missing value information.
NF)r"   r.   r   T)r   r,   r$   )r   valuemissing_values_setoutput_missing_valuesr3   s        r   _extract_missingrC   t   s    " "!%]mE6J6   }U;;;!!!"a'$1e$$G! %2d$F! -$U C (F(('s
   A6A6c                   2   ^  \ rS rSrSrU 4S jrS rSrU =r$ )_nandict   z!Dictionary with support for nans.c                    > [         TU ]  U5        UR                  5        H  u  p#[        U5      (       d  M  X0l          g    g N)super__init__itemsr   	nan_value)r2   mappingkeyr@   	__class__s       r   rJ   _nandict.__init__   s5    !!--/JCS!!!& *r   c                 r    [        U S5      (       a  [        U5      (       a  U R                  $ [        U5      e)NrL   )hasattrr   rL   KeyErrorr2   rN   s     r   __missing___nandict.__missing__   -    4%%-*<*<>>!smr   )rL   )	r7   r8   r9   r:   r;   rJ   rU   r>   __classcell__rO   s   @r   rE   rE      s    + r   rE   c                     [        X5      u  p#[        [        U5       VVs0 s H  u  pEXT_M	     snn5      nUR                  U  Vs/ s H  ovU   PM	     sn[	        U 5      S9$ s  snnf s  snf )z,Map values based on its position in uniques.)r	   )r
   rE   	enumerateasarrayr	   )r   r(   r   r%   ivaltablevs           r   _map_to_integerra      sb    &*EB9W+=>+=cf+=>?E::0AQx0:HH ?0s   A#
A)c                    [        U 5      n[        U5      u  p4[        U5      nUR                  UR	                  5       5        [
        R                  " XPR                  S9nU4nU(       a  U[        X5      4-  nU(       a  U[        X5      4-  n[        U5      S:X  a  US   $ U$ ! [         a1    [        S [        S U  5       5       5       5      n[        SU 35      ef = f)Nr   c              3   8   #    U  H  oR                   v   M     g 7frH   )r9   ).0ts     r   	<genexpr>!_unique_python.<locals>.<genexpr>   s     L/K!~~/K   c              3   8   #    U  H  n[        U5      v   M     g 7frH   )type)re   r`   s     r   rg   rh      s     2KFq477Fri   zPEncoders require their input argument must be uniformly strings or numbers. Got r   r   )setrC   sortedextendr4   r1   arrayr   	TypeErrorra   _get_countsr$   )r   r   r   uniques_setmissing_valuesr(   typesr*   s           r   r   r      s    
&k&6{&C#%~--/0((7,,7 *C022F,..X]3q6++  
Ls2KF2K/KLL'',g/
 	

s   A B' ';C"T)check_unknownc                   [        X5      u  p4UR                  U R                  S5      (       d   [        X5      $ U(       a   [        X5      nU(       a  [        SU 35      e[        XUS9$ ! [         a  n[        SU 35      eSnAff = f)ax  Helper function to encode values into [0, n_uniques - 1].

Uses pure python method for object dtype, and numpy method for
all other dtypes.
The numpy method has the limitation that the `uniques` need to
be sorted. Importantly, this is not checked but assumed to already be
the case. The calling method needs to ensure this for all non-object
values.

Parameters
----------
values : ndarray
    Values to encode.
uniques : ndarray
    The unique values in `values`. If the dtype is not object, then
    `uniques` needs to be sorted.
check_unknown : bool, default=True
    If True, check for values in `values` that are not in `unique`
    and raise an error. This is ignored for object dtype, and treated as
    True in this case. This parameter is useful for
    _BaseEncoder._transform() to avoid calling _check_unknown()
    twice.

Returns
-------
encoded : ndarray
    Encoded values
numericz%y contains previously unseen labels: Nr   )r
   isdtyper   ra   rS   
ValueError_check_unknownr   )r   r(   ru   r   r%   ediffs          r   _encoder}      s    : &*EB::fllI..	J"633 !&2D #H!OPPW44  	JDQCHII	Js   
A, ,
B	6BB	c                   ^^ [        X5      u  p4SnUR                  U R                  S5      (       Gd6  [        U 5      n[	        U5      u  pg[        U5      m[	        T5      u  mmUT-
  nUR
                  =(       a    TR
                  (       + n	UR                  =(       a    TR                  (       + n
UU4S jnU(       a`  U(       d  U	(       d  U
(       a(  UR                  U  Vs/ s H
  o" U5      PM     sn5      nO#UR                  [        U 5      UR                  S9n[        U5      nU
(       a  UR                  S5        U	(       a  UR                  [        R
                  5        OUR                  U 5      n[        R                   " XSUS9nU(       aA  UR"                  (       a  [%        XU5      nO#UR                  [        U 5      UR                  S9nUR'                  UR)                  U5      5      (       aY  UR)                  U5      nUR'                  U5      (       a2  UR"                  (       a  U(       a  UR)                  U 5      nSX_'   X)    n[        U5      nU(       a  X4$ U$ s  snf )a=  
Helper function to check for unknowns in values to be encoded.

Uses pure python method for object dtype, and numpy method for
all other dtypes.

Parameters
----------
values : array
    Values to check for unknowns.
known_values : array
    Known values. Must be unique.
return_mask : bool, default=False
    If True, return a mask of the same shape as `values` indicating
    the valid values.

Returns
-------
diff : list
    The unique values present in `values` and not in `know_values`.
valid_mask : boolean array
    Additionally returned if ``return_mask=True``.

Nrw   c                    > U T;   =(       d<    TR                   =(       a    U S L =(       d    TR                  =(       a    [        U 5      $ rH   )r.   r"   r   )r@   missing_in_uniquesrr   s    r   is_valid _check_unknown.<locals>.is_valid  sA    $ E&++=E&**C}U/Cr   rc   T)assume_uniquer   r   )r
   rx   r   rl   rC   r"   r.   ro   onesr$   r<   listr0   r1   r    r   	setdiff1dr!   r   anyisnan)r   known_valuesreturn_maskr   r%   
valid_mask
values_setmissing_in_valuesr|   nan_in_diffnone_in_diffr   r@   r    diff_is_nanis_nanr   rr   s                   @@r   rz   rz      s   2 &/EBJ::fllI..[
(8(D%
,'*:;*G''K''++J4F4J4J0J(--M6H6M6M2M	 {lXXF&KF5xF&KL
WWS[W@
DzKKKK((0}}]QSTyy"6<
WWS[W@
 66"((<())((4.Kvvk""99XXf-F)*J& L)DzKC 'Ls   $I.c                   8   ^  \ rS rSrSrU 4S jrS rS rSrU =r	$ )_NaNCounteriJ  z$Counter with support for nan values.c                 B   > [         TU ]  U R                  U5      5        g rH   )rI   rJ   _generate_items)r2   rK   rO   s     r   rJ   _NaNCounter.__init__M  s    --e45r   c              #      #    U HF  n[        U5      (       d  Uv   M  [        U S5      (       d  SU l        U =R                  S-  sl        MH     g7f)z>Generate items without nans. Stores the nan counts separately.	nan_countr   r   N)r   rR   r   )r2   rK   items      r   r   _NaNCounter._generate_itemsP  sD     D &&
4--!"NNaN s   AAc                 r    [        U S5      (       a  [        U5      (       a  U R                  $ [        U5      e)Nr   )rR   r   r   rS   rT   s     r   rU   _NaNCounter.__missing__Z  rW   r   )r   )
r7   r8   r9   r:   r;   rJ   r   rU   r>   rX   rY   s   @r   r   r   J  s    .6  r   r   c                 |   U R                   R                  S;   ak  [        U 5      n[        R                  " [        U5      [        R                  S9n[        U5       H#  u  pE[        [        5         X%   X4'   SSS5        M%     U$ [        U SS9u  pg[        R                  " XSS9n[        R                  " US   5      (       a#  [        R                  " US   5      (       a  SUS'   [        R                  " XaU   5      n	[        R                  " U[        R                  S9nXy   X8'   U$ ! , (       d  f       M  = f)zGet the count of each of the `uniques` in `values`.

The counts will use the order passed in by `uniques`. For non-object dtypes,
`uniques` is assumed to be sorted and `np.nan` is at the end.
OUrc   NT)r   )r   r   )r   kindr   r1   zerosr$   int64r[   r   rS   r   isinr   searchsorted
zeros_like)
r   r(   counterr3   r]   r   r    r'   uniques_in_valuesunique_valid_indicess
             r   rq   rq   `  s     ||D f%#g,bhh7 )GA(##M	 $# * &vTBM dK	xxb!""rxx'<'< $"??=BS:TU]]7"((3F & <FM $#s   1D,,
D;	)FF)F)collectionsr   
contextlibr   typingr   numpyr1   
_array_apir   r   r	   r
   r   _missingr   r   r   r,   rC   dictrE   ra   r   r}   rz   r   rq   r6   r   r   <module>r      s          $ ',5 &R$,NJ  #)Lt  I,4 /3 (5VQh' ,r   