
    -i6                    &   S SK r S SKrS SKrS SKrS SKJr  S SKJr  S SK	J
r
Jr  S SKJr  S SKJrJrJr  S SKJr  S r\R*                  R-                  S	/ S
Q5      S 5       r\R*                  R-                  S	/ S
Q5      S 5       r\R*                  R-                  S\R2                  \R4                  \R6                  /5      \R*                  R-                  S\R2                  \R4                  \R6                  /5      S 5       5       r\R*                  R-                  S\R2                  \R4                  \R6                  /5      S 5       rS rS rS r S r!S r"\R*                  R-                  S/ SQ/ SQ/\RF                  " / SQ/ SQ/5      \RF                  " / SQ/ SQ/\$S9\RF                  " / SQS\RJ                  S //\$S9\RF                  " / SQS\&" S!5      S //\$S9\RF                  " / S"Q/ S#Q/\$S9\RF                  " / S$QS\RJ                  S//\$S9\RF                  " / S$QS\&" S!5      S//\$S9// S%QS&9S' 5       r'\R*                  R-                  S	/ S
Q5      \R*                  R-                  S(S)S*/5      \R*                  R-                  S+SS,/5      S- 5       5       5       r(\R*                  R-                  S(S)S*/5      \R*                  R-                  S.S/S0/S1S0/S/S0/// S2Q/ S3Q/ S2Q/4S4S/S5S/S6S7/S5S/// S8Q/ S9Q/ S:Q/4/5      S; 5       5       r)S< r*\R*                  R-                  S+/ S=Q5      \R*                  R-                  S>/ S=Q5      S? 5       5       r+\R*                  R-                  S@SASB/5      \R*                  R-                  SS1S//\RF                  " SCSD/5      /5      SE 5       5       r,\R*                  R-                  S@SASB/5      SF 5       r-\R*                  R-                  SGSHS0/SIS0//SHSI/S0//\R\                  4\RF                  " S1S//SJS///5      S1SJ/S///\R^                  4\RF                  " SKS /SLS //\$S9SKSL/S //\R\                  4\RF                  " SKS /SLS //5      SKSL/S //\R`                  4\RF                  " S1S//\RJ                  S///5      S1\RJ                  /S///\R6                  4\RF                  " SK\RJ                  /S\RJ                  //\$S9SKS/\RJ                  //\R\                  4\RF                  " SK\&" S!5      /S\&" S!5      //\$S9SKS/\&" S!5      //\R\                  4// SMQS&9SN 5       r1\R*                  R-                  S	/ S
Q5      \R*                  R-                  SO\RF                  " SS7//\$S9Rd                  \RF                  " SSP//\$S9Rd                  / SQQ/\R\                  4\RF                  " S1S///SRS9Rd                  \RF                  " S1SS//SRS9Rd                  / STQ/\Rf                  4\RF                  " SS7//\$S9Rd                  \RF                  " SSP//\$S9Rd                  \RF                  " / SQQ5      /\R\                  4\RF                  " SS//\$S9Rd                  \RF                  " SS7//\$S9Rd                  / SUQ/\$4\RF                  " SS7//\$S9Rd                  \RF                  " S\RJ                  //\$S9Rd                  / SVQ/\$4\RF                  " SS//\$S9Rd                  \RF                  " S\RJ                  //\$S9Rd                  / SWQ/\$4// SXQS&9SY 5       5       r4SZ r5\R*                  R-                  S[\
\/5      S\ 5       r6S] r7S^ r8\R*                  R-                  S_S,S`Sa/4Sb/ ScQ4/ SdQSeSf/4// SgQS&9Sh 5       r9Si r:\R*                  R-                  S/ SQ/ SQ/\RF                  " / SjQ/ SkQ/5      \RF                  " / SQ/ SQ/\$S9// SlQS&9Sm 5       r;\R*                  R-                  SO\RF                  " SS7//\$S9Rd                  \RF                  " SSP//\$S9Rd                  / SQQ/\R\                  4\RF                  " S1S///SRS9Rd                  \RF                  " S1SS//SRS9Rd                  / STQ/\Rf                  4\RF                  " SS7//\$S9Rd                  \RF                  " SSP//\$S9Rd                  \RF                  " / SQQ5      /\R\                  4// SnQS&9So 5       r<Sp r=Sq r>\R*                  R-                  Sr\&\?/5      Ss 5       r@St rASu rBSv rCSw rDSx rESy rF\R*                  R-                  S+SbS,/5      Sz 5       rG\R*                  R-                  S{\RJ                  S\&" S!5      /5      S| 5       rH\R*                  R-                  S+SHSJ// S}Q/5      S~ 5       rI\R*                  R-                  SS*S)/SS/S&9\R*                  R-                  S+S,/ SQ/S,S/S&9S 5       5       rJ\R*                  R-                  S[\
\/5      S 5       rK\R*                  R-                  SSS/0SS0SS0S/SS.SSSS./5      \R*                  R-                  SS/ SQ//5      S 5       5       rL\R*                  R-                  S+SbS,S7//5      S 5       rM\R*                  R-                  S+S/SP//5      S 5       rN\R*                  R-                  SSSJ0SS0SS0SS0SS0SJSS.SSSS./5      S 5       rO\R*                  R-                  S+S,S7//5      S 5       rP\R*                  R-                  S+S/SP//5      S 5       rQS rR\R*                  R-                  SSJS1S.SSS0/5      S 5       rSS rTS rUS rVS rWS rX\R*                  R-                  SSS1S./5      S 5       rY\R*                  R-                  SS/SJS./5      S 5       rZ\R*                  R-                  S/ SQ5      \R*                  R-                  S/ SQ5      S 5       5       r[S r\\R*                  R-                  S{\RJ                  S/5      S 5       r]S r^\R*                  R-                  S	/ S
Q5      \R*                  R-                  SSS/5      S 5       5       r_\R*                  R-                  S	/ S
Q5      S 5       r`\R*                  R-                  S	/ S
Q5      S 5       ra\R*                  R-                  S	/ S
Q5      S 5       rbS rcS rd\R*                  R-                  S\RJ                  S/5      S 5       re\R*                  R-                  SSS/5      \R*                  R-                  S\RJ                  S/5      S 5       5       rf\R*                  R-                  SO\RF                  " S\RJ                  //\$S9Rd                  \RF                  " SS7//\$S9Rd                  \RF                  " SSP\RJ                  /\$S9/\R\                  4\RF                  " S\RJ                  //\$S9Rd                  \RF                  " SS7//\$S9Rd                  \RF                  " SSP\RJ                  /\$S9/\R\                  4\RF                  " S\RJ                  //\R6                  S9Rd                  \RF                  " SC//\R6                  S9Rd                  \RF                  " SSD\RJ                  /5      /\R6                  4// SQS&9S 5       rg\R*                  R-                  S[\
\/5      S 5       rh\R*                  R-                  S\RF                  " S\RJ                  SC//5      Rd                  \RF                  " S\RJ                  S//5      Rd                  \RF                  " SD//5      4\RF                  " / SQ/5      Rd                  \RF                  " / SQ/5      Rd                  \RF                  " \RJ                  //5      4\RF                  " S\RJ                  S7//\$S9Rd                  \RF                  " S\RJ                  S//5      Rd                  \RF                  " SP//\$S94\RF                  " / SQ/\$S9Rd                  \RF                  " / SQ/5      Rd                  \RF                  " \RJ                  //\$S94/5      S 5       ri\R*                  R-                  S\5      S 5       rjS rk\R*                  R-                  SSSL//\RF                  " SSL//SS9\RF                  " SSL//SS9/5      \R*                  R-                  SSKSL//\RF                  " SKSL//SS9\RF                  " SKSL//SS9/5      S 5       5       rlS rmS rnS ro\R*                  R-                  SS*S)/5      S 5       rp\R*                  R-                  S\RF                  " S/S//\$S9S /\RJ                  /\RJ                  //\R                  " S/S/S//\$S94\RF                  " \RJ                  /S/S//\$S9S /\RJ                  /\RJ                  //\R                  " S/\RJ                  /\RJ                  //\$S94/5      S 5       rrS rsS rtS ruS rvS rw\R*                  R-                  SSSJ0SS0SS0SS0SS0SJSS.SSSS./5      S 5       rxS ryS rzS r{S r|\R*                  R-                  SSS0SS/0/5      S 5       r}\R*                  R-                  SSS10SS0/5      S 5       r~S rS r\R*                  R-                  S[\
\/5      S 5       rg)    N)sparse)NotFittedError)OneHotEncoderOrdinalEncoder)is_scalar_nan)_convert_containerassert_allcloseassert_array_equal)CSR_CONTAINERSc                     [         R                  " / SQ/ SQ/5      n [        5       n[        SS9nUR                  U 5      nUR                  U 5      nUR                  S:X  d   eUR                  S:X  d   e[
        R                  " U5      (       d   e[
        R                  " U5      (       a   e[        UR                  5       / SQ/ SQ/5        [        UR                  5       U5        g )N         r   r   r   Fsparse_outputr      )              ?r   r   r   )r   r   r   r   r   )	nparrayr   fit_transformshaper   issparser
   toarray)X
enc_sparse	enc_denseX_trans_sparseX_trans_denses        \/var/www/html/venv/lib/python3.13/site-packages/sklearn/preprocessing/tests/test_encoders.py!test_one_hot_encoder_sparse_denser$      s     	)Y'(AJE2I--a0N++A.M6)))&(((??>****}----  #<>W"X ~--/?    handle_unknown)ignoreinfrequent_if_existwarnc                    [         R                  " / SQ/ SQ/ SQ/5      n[         R                  " / SQ/5      n[        SS9nUR                  U5        [        R
                  " [        SS9   UR                  U5        S S S 5        [        U S9nUR                  U5        UR                  5       n[        UR                  U5      R                  5       [         R                  " / S	Q/5      5        [        X$5        g ! , (       d  f       N= f)
N)r   r   r   )r   r   r   )r   r   r   )   r   r   errorr&   Found unknown categoriesmatch)r   r   r   r   r   r   r   )r   r   r   fitpytestraises
ValueError	transformcopyr
   r   r	   r&   r   X2oh	X2_passeds        r#   #test_one_hot_encoder_handle_unknownr;   *   s    
)Y	23A	9+	B 
g	.BFF1I	z)C	D
R 
E 
n	5BFF1I	I
Y'')
567
 B" 
E	Ds   ,C<<
D
c                    [         R                  " / SQ5      R                  S5      n[         R                  " SS/5      R                  S5      n[        U S9nUR	                  U5        UR                  5       n[        UR                  U5      R                  5       [         R                  " / SQ/ SQ/5      5        [        X$5        g )N)11111111223334444)r   55555r>   r-   )r   r   r   r   r   r   r   r   )	r   r   reshaper   r1   r6   r
   r5   r   r7   s        r#   +test_one_hot_encoder_handle_unknown_stringsrE   B   s    
23;;GDA	7D/	"	*	*7	3B
 
n	5BFF1I	I
Y'')
&(<=>
 r%r%   output_dtypeinput_dtypec                    [         R                  " SS//U S9R                  n[         R                  " SS/SS//US9n[        SUS9n[	        UR                  U5      R                  5       U5        [	        UR                  U5      R                  U5      R                  5       U5        [        SUSS9n[	        UR                  U5      U5        [	        UR                  U5      R                  U5      U5        g )Nr   r   dtypeauto)
categoriesrJ   F)rL   rJ   r   )	r   asarrayTr   r
   r   r   r1   r5   )rG   rF   r   
X_expectedr9   s        r#   test_one_hot_encoder_dtyperP   U   s     	

QF8;/11AaVaV,LAJ	&	=Br''*224jArvvay**1-557D	&E	RBr''*J7rvvay**1-z:r%   c                    [         R                  " S5      nUR                  SS/SS/S.5      n[        R                  " / SQ/ SQ/U S	9n[        U S	9n[        UR                  U5      R                  5       U5        [        UR                  U5      R                  U5      R                  5       U5        [        U S
S9n[        UR                  U5      U5        [        UR                  U5      R                  U5      U5        g )Npandasabr   r   ABr   r   r   r   r   r   r   r   rI   F)rJ   r   )r2   importorskip	DataFramer   r   r   r
   r   r   r1   r5   )rF   pdX_dfrO   r9   s        r#   !test_one_hot_encoder_dtype_pandasr^   d   s    			X	&B<<sCj1v67D<6lKJ	\	*Br''-557Drvvd|--d3;;=zJ	\	?Br''-z:rvvd|--d3Z@r%   c                  `   [        5       n / SQ/ SQ/ SQ/ SQ/nU R                  U5        U R                  5       n[        / SQU5        U R                  / SQ5      n[        / SQU5        [        R
                  " [        SS	9   U R                  S
S/5        S S S 5        g ! , (       d  f       g = f)N)Maler   girlr   r   )Female)   ra   r   
   )r`   3   boy   r   )r`   [   ra         )	x0_Femalex0_Malex1_1x1_41x1_51x1_91x2_boyx2_girlx3_1x3_2x3_12x3_21x4_3x4_10x4_30)onetwothreefourfive)
one_Femaleone_Maletwo_1two_41two_51two_91	three_boy
three_girlfour_1four_2four_12four_21five_3five_10five_30z!input_features should have lengthr/   rz   r{   )r   r1   get_feature_names_outr
   r2   r3   r4   )encr   feature_namesfeature_names2s       r#   "test_one_hot_encoder_feature_namesr   t   s    
/C!%"$		A GGAJ--/M	
" 	%* ../VWN	
" 	%* 
z)L	M!!5%.1 
N	M	Ms   B
B-c                      [        5       n [        R                  " SS//[        S9R                  nU R                  U5        U R                  5       n[        SS/U5        U R                  S/S9n[        SS	/U5        g )
Nu   c❤t1dat2rI   u	   x0_c❤t1x0_dat2u   n👍meinput_featuresu   n👍me_c❤t1u   n👍me_dat2)r   r   r   objectrN   r1   r   r
   )r   r   r   s      r#   *test_one_hot_encoder_feature_names_unicoder      st    
/C
8V$%V466AGGAJ--/MY/?--i[-IM(.9=Ir%   c                     S n [        U S9n[        R                  " SS//[        S9R                  nUR                  U5        UR                  5       n[        SS/U5        UR                  S/S	9n[        S
S/U5        S n[        US9R                  U5      nSn[        R                  " [        US9   UR                  5         SSS5        g! , (       d  f       g= f)z=Check the behaviour of `feature_name_combiner` as a callable.c                 $    U S-   [        U5      -   $ )N_)reprfeaturecategorys     r#   name_combinerHtest_one_hot_encoder_custom_feature_name_combiner.<locals>.name_combiner   s    }tH~--r%   )feature_name_combinerNoneNrI   z	x0_'None'x0_NonerS   r   za_'None'a_Nonec                     g)Nr    r   s     r#   wrong_combinerItest_one_hot_encoder_custom_feature_name_combiner.<locals>.wrong_combiner   s    r%   zMWhen `feature_name_combiner` is a callable, it should return a Python string.r/   )r   r   r   r   rN   r1   r   r
   r2   r3   	TypeError)r   r   r   r   r   err_msgs         r#   1test_one_hot_encoder_custom_feature_name_combinerr      s    . m
<C
64.!022AGGAJ--/MY/?--cU-CM
H-}= n
=
A
A!
DCW  
y	0!!# 
1	0	0s   6C
Cc                     [         R                  " SS//5      R                  n [        5       nUR	                  / SQ/S9  UR                  5       S   / SQ/:X  d   eUR                  U 5      R                  5       R                  S:X  d   eUR	                  / SQ/S9  UR                  U 5      R                  5       R                  S:X  d   eg )	Nr   r   )r   r   r   r   rL   rL   )r   r+   )r   r   r   r   r+   r   )	r   r   rN   r   
set_params
get_paramsr   r   r   )r   r9   s     r#   test_one_hot_encoder_set_paramsr      s    
1a&A	BMMl^M,==?<(\N:::A&&(..&888MMo.M/A&&(..&888r%   c                    [        SS9nUR                  U 5      n[        SSS9nUR                  U 5      n[        UR                  5       U5        [        R
                  " U5      (       a  UR                  S:X  d   eUR                  5       $ )NrK   r   FrL   r   csr)r   r   r	   r   r   r   format)r   r   Xtr1Xtr2s       r#   check_categorical_onehotr      st    
6
*CQD
6
?CQDDLLND)??4  T[[E%999<<>r%   r   defr   7   abcr   r   )rd   r   r   )r   r   r   )rT   rV   cat)rS   rW   r   rI   )rT   r   r   rS   r   nan)Nr   r   )rS   r   r   )Nr   N)mixednumericr   z	mixed-nanzmixed-float-nanz
mixed-Nonezmixed-None-nanzmixed-None-float-nan)idsc                 Z   [        [        R                  " U 5      S S 2S/4   5      n[        USS/SS//5        [        [        R                  " U 5      S S 2SS/4   5      n[        U/ SQ/ SQ/5        [	        SS9R                  U 5      n[        UR                  5       / SQ/ SQ/5        g )	Nr   r   )r   r   r   r   r   r   r   r   rK   r   )r   r   r   r   r   )r   r   r   r   r   )r   r   r   r	   r   r   r   )r   Xtrs     r#   test_one_hot_encoderr      s    0 #288A;q1#v#6
7CC1a&1a&)*
"288A;q1a&y#9
:CC,56
6
*
8
8
;CCKKMO_#EFr%   sparse_FTdropfirstc                    / SQ/ SQ/ SQ/n[        XS9nUR                  U5      n[        R                  " U[        S9n[        UR                  U5      U5        SS/SS/S	S//n[        US
US9nUR                  U5      n[        R                  " U5      n[        UR                  U5      U5        Uc  / SQ/ SQ/ SQ/n[        UU SS/SS// SQ/S9nUR                  U5      n[        R                  " U[        S9nS US'   [        UR                  U5      U5        SS/SS/S	S//n[        USS/SS//U S9nUR                  U5      n[        R                  " U[        S9nS US'   S US S 2S4'   [        UR                  U5      U5        [        R                  " / SQ/ SQ/5      n[        R                  " S5      n[        R                  " [        US9   UR                  U5        S S S 5        g ! , (       d  f       g = f)Nr   r   )r   r   r   r   r   rI   r   r   r   r   rK   )r   rL   r   r   r   )6   r   8   )r   r&   rL   )r   r   r   r   )r   rL   r&   r   r   r   r   r   r   )Shape of the passed X data is not correctr/   )r   r   r   r   r   r
   inverse_transformreescaper2   r3   r4   )r&   r   r   r   r   X_trexpmsgs           r#   test_one_hot_encoder_inverser     s    
8A
g
9CQD
((1F
#Cs,,T2C8
R1b'Ar7#A
g&t
LCQD
((1+Cs,,T2C8| ^^<!)A=

   #hhq'D	3006< Wq"g2w'!AR))

   #hhq'D	AqD	3006< 88Y	*+D
))?
@C	z	-d# 
.	-	-s   G33
Hz
X, X_transr   r   r   r   r   r   r   rz   r{   r|   rT   r   r   r   r   r   )r   r   r   r   r   )r   r   r   r   r   c                     [        US9R                  U 5      nSnU(       a  [        US5      n[        R                  " [
        US9   UR                  U5        SSS5        g! , (       d  f       g= f)zCheck that `inverse_transform` raise an error with unknown samples, no
dropped feature, and `handle_unknow="error`.
Non-regression test for:
https://github.com/scikit-learn/scikit-learn/issues/14934
r   zqSamples \[(\d )*\d\] can not be inverted when drop=None and handle_unknown='error' because they contain all zerosr   r/   N)r   r1   r   r2   r3   r4   r   )r   X_transr   r   r   s        r#   ?test_one_hot_encoder_inverse_transform_raise_error_with_unknownr   A  s]    & g
.
2
21
5C	A 
 $Wh7	z	-g& 
.	-	-s   A""
A0c                      [         R                  " SS/SS/SS//[        S9n [        SSS	9nUR	                  U 5      n[        UR                  U5      U 5        g )
Nr`   r   rb   r   r   rI   	if_binaryFr   r   )r   r   r   r   r   r
   r   )r   oher   s      r#   &test_one_hot_encoder_inverse_if_binaryr   a  sV    
61+!}xm<FKA
[
>CQDs,,T2A6r%   )r   r   N
reset_dropc                 ~   [         R                  " SS/SS/SS//[        S9n[        U SS9nUR	                  U5        UR                  U5      nUR                  5       nUR                  US	9  [        UR                  U5      U5        [        UR                  U5      U5        [        UR                  5       U5        g )
Nr`   r   rb   r   r   rI   Fr   r   )r   r   r   r   r1   r5   r   r   r
   r   r	   )r   r   r   r   r   r   s         r#   test_one_hot_encoder_drop_resetr   h  s     	61+!}xm<FKA
T
7CGGAJ==D--/MNN
N#s,,T2A6CMM!$d+s002MBr%   methodr1   r         @      @c                     [        5       nSn[        R                  " [        US9   [	        X!5      " U 5        S S S 5        g ! , (       d  f       g = f)Nz'Expected 2D array, got 1D array insteadr/   )r   r2   r3   r4   getattr)r   r   r9   r   s       r#   test_X_is_not_1Dr   w  s7     
B
3C	z	-A 
.	-	-s   A
Ac                    [         R                  " S5      nUR                  / SQ5      n[        5       nS[	        U5       S3n[         R
                  " [        US9   [        X05      " U5        S S S 5        g ! , (       d  f       g = f)NrR   )   r   r+   r   z+Expected a 2-dimensional container but got z	 instead.r/   )r2   rZ   Seriesr   typer3   r4   r   )r   r\   r   r9   r   s        r#   test_X_is_not_1D_pandasr     s`    			X	&B
		,A	B7Qy	
JC	z	-A 
.	-	-s   A77
BzX, cat_exp, cat_dtyper   r   r   rV   rW   )r   r   r   stringzmissing-floatzmissing-np.nan-objectzmissing-float-nan-objectc                    X S S S2   4 H  n[        SS9nUR                  U5        [        UR                  [        5      (       d   e[        UR                  U5       H  u  pVUR                  5       n[        US   5      (       a$  [        US   5      (       d   eUS S US S :X  d   eOUR                  5       U:X  d   e[        R                  " UR                  U5      (       a  M   e   M     g )NrA   rK   r   )r   r1   
isinstancecategories_listziptolistr   r   
issubdtyperJ   )r   cat_exp	cat_dtypeXir   resr   res_lists           r#   test_one_hot_encoder_categoriesr     s    F DbD'lv.#//40000COOW5HCzz|HSW%%$Xb\2222}CR000zz|s***==I6666 6 r%   zX, X2, cats, cat_dtypedrS   rT   cint64r+   r   r   r   )NrS   z)rS   rT   r  )rS   Nr  )r   r   zobject-stringzobject-string-nonezobject-string-nanzobject-None-and-nanc                    [        US9n[        R                  " / SQ/ SQ/5      n[        UR	                  U 5      R                  5       U5        [        UR                  S   5      [        US   5      :X  d   eUR                  S   R                  5       [        US   5      :X  d   eUR                  S   R                  U:X  d   e[        US9n[        R                  " [        SS9   UR                  U5        S S S 5        [        X$S9n[        R                  " / SQ/ SQ/5      n[        UR                  U5      R                  U5      R                  5       U5        g ! , (       d  f       Nl= f)	Nr   r   r   r   r   r   r   r   r.   r/   rL   r&   )r   r   r   )r   r   r   r
   r   r   r   rL   r   r   rJ   r2   r3   r4   r1   r5   )r   r8   catsr   r&   r   r   s          r#   )test_one_hot_encoder_specified_categoriesr    s$   f 4
(C
((O_5
6Cs((+335s;q!"d47m333??1$$&$tAw-777 ??1##y000 4
(C	z)C	D 
E
4
GC
((O_5
6Cswwr{,,R088:C@	 
E	Ds   )E!!
E/c                     [         R                  " SS//[        S9R                  n [	        / SQ/S9n[         R                  " / SQ/ SQ/5      n[        UR                  U 5      R                  U 5      R                  5       U5        [        UR                  U 5      R                  5       U5        UR                  S   R                  5       / SQ:X  d   e[         R                  " UR                  S   R                  [         R                  5      (       d   e[         R                  " S	S
//5      R                  n [	        / SQ/S9nSn[        R                   " ["        US9   UR                  U 5        S S S 5        g ! , (       d  f       g = f)NrS   rT   rI   )rT   rS   r  r   r  r  r   r   r   )r   r   r   z%Unsorted categories are not supportedr/   )r   r   r   rN   r   r
   r1   r5   r   r   r   r   r   rJ   object_r2   r3   r4   )r   r   r   r   s       r#   (test_one_hot_encoder_unsorted_categoriesr    s!   
3*V,..A
O#4
5C
((O_5
6Cswwqz++A.668#>s((+335s;??1$$&/999==+112::>>>> 	1a&A
I;
/C
1C	z	-! 
.	-	-s   "E==
FEncoderc                 4   [         R                  " S[         R                  S/5      /nU " US9n[         R                  " SS//[        S9R                  n[
        R                  " [        SS9   UR                  U5        SSS5        g! , (       d  f       g= f)zTest encoder for specified categories that nan is at the end.

Non-regression test for:
https://github.com/scikit-learn/scikit-learn/issues/27088
r   r   r   rI   zNan should be the last elementr/   N)	r   r   r   r   rN   r2   r3   r4   r1   r  r
  r   r   s       r#   ,test_encoder_nan_ending_specified_categoriesr    sl     HHa^$%D
T
"C
1a&(**A	z)I	J
 
K	J	Js   .B		
Bc                     [         R                  " SS/SS//[        S9R                  n [	        / SQ/ SQ/S9n[         R                  " / S	Q/ S
Q/5      n[        UR                  U 5      R                  5       U5        UR                  S   R                  5       / SQ:X  d   e[         R                  " UR                  S   R                  [         R                  5      (       d   eUR                  S   R                  5       / SQ:X  d   e[         R                  " UR                  S   R                  [         R                  5      (       d   eg )NrS   rT   r   r   rI   r  )r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   r   )r   r   r   rN   r   r
   r   r   r   r   r   rJ   r  r   r   r   s      r#   7test_one_hot_encoder_specified_categories_mixed_columnsr  $  s    
3*q!f%V466A
OY#?
@C
((24RS
TCs((+335s;??1$$&/999==+112::>>>>??1$$&)333==+112::>>>>r%   c                      [         R                  " S5      n U R                  SS/SS/S.5      n[        U5      n[	        U/ SQ/ SQ/5        g )	NrR   rS   rT   r   r   rU   rX   rY   )r2   rZ   r[   r   r	   )r\   r]   r   s      r#   test_one_hot_encoder_pandasr  1  sF    			X	&B<<sCj1v67D
"4
(CC,56r%   zdrop, expected_namesx0_cx2_br   )r  x1_2r  )r  r   rT   x0_bx2_a)r   binarymanualc                 ~    / SQ/ SQ/n[        U S9nUR                  U5        UR                  5       n[        X5        g )N)r  r   rS   )rT   r   rT   r   )r   r1   r   r
   )r   expected_namesr   r   r   s        r#   'test_one_hot_encoder_feature_names_dropr!  :  s9     
&A
T
"CGGAJ--/M~5r%   c                     SS/SS/SS//n [         R                  " / SQ/ SQ/ SQ/5      n[         R                  " S S	/5      n[        S
SS9nUR                  U 5      n[	        UR
                  U5        [        XA5        SS/SS/SS//n [         R                  " SS/SS/SS//5      n[         R                  " S	S /5      n[        S
SS9nUR                  U 5      n[	        UR
                  U5        [        XA5        g )Nrd   yes   norj   )r   r   r   r   rC   )r   r   r   r   r   r   Fr   truerS   falser   r   )r   r   r   r   r
   	drop_idx_r	   )r   expectedexpected_drop_idxr   results        r#   *test_one_hot_encoder_drop_equals_if_binaryr,  L  s    
er4j2u+.Axx	35IJH $+
[
>Cq!Fs}}&78F% ###7Axx#sc3Z#s<=H!T+
[
>Cq!Fs}}&78F%r%   )rd   r   r   )r$  r   r   )r   r   r   c                     [        5       n[        R                  " / SQ/ SQ/SS9n[        UR	                  U 5      UR                  S5      5        [        SS9n[        UR	                  U 5      U5        g )Nr   r   r   r   r   r   r  rI   float64)r   r   r   r
   r   astyper  s      r#   test_ordinal_encoderr2  d  s^     
C
((Iy)
9Cs((+SZZ	-BC
w
'Cs((+S1r%   )r   r   zobject-string-catc                 
   [        US9n[        R                  " S/S//5      n[        UR	                  U 5      U5        [        UR                  S   5      [        US   5      :X  d   eUR                  S   R                  5       [        US   5      :X  d   eUR                  S   R                  U:X  d   e[        US9n[        R                  " [        SS9   UR                  U5        S S S 5        g ! , (       d  f       g = f)Nr   r   r   r   r.   r/   )r   r   r   r
   r   r   rL   r   r   rJ   r2   r3   r4   r1   )r   r8   r
  r   r   r   s         r#   )test_ordinal_encoder_specified_categoriesr4  u  s    2 D
)C
((SEC5>
"Cs((+S1q!"d47m333??1$$&$tAw-777 ??1##y000 D
)C	z)C	D 
E	D	Ds   C44
Dc                     / SQ/ SQ/n [        5       nUR                  U 5      n[        R                  " U [        S9n[        UR                  U5      U5        [        R                  " / SQ/ SQ/5      n[        R                  " S5      n[        R                  " [        US9   UR                  U5        S S S 5        g ! , (       d  f       g = f)Nr   r   rI   )r   r   r   r   rX   r   r/   )r   r   r   r   r   r
   r   r   r   r2   r3   r4   )r   r   r   r   r   s        r#   test_ordinal_encoder_inverser6    s    	(A

CQD
((1F
#Cs,,T2C8 88\<01D
))?
@C	z	-d# 
.	-	-s   #B>>
Cc                     [        SSS9n [        R                  " SS/SS/SS	//[        S
9n[        R                  " SS/SS/SS//[        S
9nU R	                  U5        U R                  U5      n[        R                  " SS/SS/SS//SS
9n[        X45        U R                  U5      n[        R                  " SS /S S/SS//[        S
9n[        XV5        g )Nuse_encoded_valuer&   unknown_valuerS   xrT   yr  r  rI   xyblar   r   r   r  )r   r   r   r   r1   r5   r
   r   )r   X_fitr   X_trans_encr   X_trans_invinv_exps          r#   +test_ordinal_encoder_handle_unknowns_stringrD    s    
(;2
NCHHsCj3*sCj9HEhhdeS\C:>fMGGGEN--(K
((QGb!Wq!f-W
=C{(''4KhhddC[3*=VLG{,r%   rJ   c                    [        SSS9n[        R                  " SS/SS/SS	//U S
9n[        R                  " SS/SS/SS//U S
9nUR                  U5        UR	                  U5      n[        R                  " SS/SS/SS//SS
9n[        XE5        UR                  U5      n[        R                  " SS /S S/SS//[        S
9n[        Xg5        g )Nr8  r:  r      r      r   	   rI   rg      r   r  )r   r   r   r1   r5   r
   r   r   )rJ   r   r@  r   rA  r   rB  rC  s           r#   ,test_ordinal_encoder_handle_unknowns_numericrK    s    
(;4
PCHHq!fq!fq!f-U;EhhB"a1a&1?GGGEN--(K
((QIay1a&1
AC{(''4KhhD	D!9q!f5VDG{,r%   c                      [        S[        R                  S9n [        R                  " S/S/S//5      nU R	                  U5        U R                  S/S/S//5      n[        US/S/[        R                  //5        g )Nr8  r:  r   r   r   r+   r   )r   r   r   r   r1   r5   r
   )r   r@  r   s      r#   (test_ordinal_encoder_handle_unknowns_nanrM    so     (;266
RCHHqcA3_%EGGENmmaS1#sO,Gw!qcBFF8 45r%   c                      [        S[        R                  [        S9n [        R                  " S/S/S//5      n[
        R                  " [        SS9   U R                  U5        S S S 5        g ! , (       d  f       g = f)Nr8  )r&   r;  rJ   r   r   r   z'dtype parameter should be a float dtyper/   )	r   r   r   intr   r2   r3   r4   r1   )r   r@  s     r#   8test_ordinal_encoder_handle_unknowns_nan_non_float_dtyperP    s\     *"&&C HHqcA3_%E	z)R	S 
T	S	Ss   A..
A<c                      [         R                  " / SQ/[        S9R                  n / SQn[	        US9nSn[
        R                  " [        US9   UR                  U 5        S S S 5        g ! , (       d  f       g = f)N)LowMediumHighrS  rR  rI   )rR  rS  rT  r   z*Shape mismatch: if categories is an array,r/   )	r   r   r   rN   r   r2   r3   r4   r1   )r   r
  r   r   s       r#   +test_ordinal_encoder_raise_categories_shaperU    sU    
<=VLNNA$D
D
)C
6C	z	-
 
.	-	-s   A**
A8c            	         [        SS9n [        R                  " / SQ/ SQ/SS9n[        R                  " SS/S	S
//SS9[        R                  " SS/S	S
//SS9[        R                  " SS/SS//5      [        R                  " SS/SS//5      [        R                  " SS/S	S//SS94 H  nU R                  U5        [	        [        S5       Vs/ s H(  o0R                  U   R                  UR                  :H  PM*     sn5      (       d   e[        U R                  U5      R                  5       U5        M     SS/S	S
//nU R                  U5        [	        [        S5       Vs/ s H?  n[        R                  " U R                  U   R                  [        R                  5      PMA     sn5      (       d   e[        U R                  U5      R                  5       U5        SS/S	S//nU R                  U5        [	        [        S5       Vs/ s H  o0R                  U   R                  S:H  PM      sn5      (       d   e[        U R                  U5      R                  5       U5        g s  snf s  snf s  snf )NrK   r   )r   r   r   r   )r   r   r   r   r0  rI   r   r   r   r+   r  rS   rT   r  r      a   b   c   dr   )r   r   r   r1   allranger   rJ   r
   r5   r   r   integer)r   r   r   is       r#   test_encoder_dtypesr_    s   
6
*C
(((*>?y
QC 	1a&1a&!1
1a&1a&!3
3*sCj)*
4,t-.
1c(QH%X6 	
qJAOOA&,,7JKKKK3==+335s; Q!QAGGAJUSTXVXcooa066

CXVWWWWs}}Q'//137
SAs8AGGAJeAhGh"((H4hGHHHHs}}Q'//137 K
 W
 Hs   /I+
&AI0%I5c                     [         R                  " S5      n [        SS9n[        R                  " / SQ/ SQ/SS9nU R                  SS	/S
S/SS/S.SS9nUR                  U5        [        [        S	5       Vs/ s H  oAR                  U   R                  S:H  PM      sn5      (       d   e[        UR                  U5      R                  5       U5        U R                  SS	/SS/SS/S.5      n/ SQnUR                  U5        [        [        S
5       Vs/ s H   oAR                  U   R                  XT   :H  PM"     sn5      (       d   e[        UR                  U5      R                  5       U5        g s  snf s  snf )NrR   rK   r   )r   r   r   r   r   r   )r   r   r   r   r   r   r0  rI   r   r   r   r+   r   r   rV   rW   Cr  rS   rT   r   r   )r  r   r0  )r2   rZ   r   r   r   r[   r1   r[  r\  r   rJ   r
   r5   r   )r\   r   r   r   r^  expected_cat_types         r#   test_encoder_dtypes_pandasrd    sO   			X	&B
6
*C
((	')GHC
 	Aq6AaV<GLAGGAJU1XFX"((G3XFGGGGs}}Q'//137
Aq6c
#sDEA6GGAJ%PQ(S(Q"((,=,@@(STTTTs}}Q'//137 G Ts   8%E5'E:c                      [        5       n SS/SS//n[        R                  " 5          [        R                  " S5        U R	                  U5        S S S 5        g ! , (       d  f       g = f)Nr`   r   rb   r   r,   )r   warningscatch_warningssimplefilterr   )r   r   s     r#   test_one_hot_encoder_warningri    sO    
/C
!xm$A		 	 	"g&! 
#	"	"s   (A
A'c                 @   SS/SS/SS//n[        U SSSS/SS//S9nUR                  U5        S	S//n[        R                  " SS//5      nS
n[        R
                  " [        US9   UR                  U5      nSSS5        [        WU5        g! , (       d  f       N= f)z,Check handle_unknown='warn' works correctly.rS   r   rT   r   r   Fr)   r   r   r&   rL   r  qFound unknown categories in columns \[0\] during transform. These unknown categories will be encoded as all zerosr/   N	r   r1   r   r   r2   warnsUserWarningr5   r	   )r   r   r   X_testrO   warn_msgr   s          r#   test_ohe_handle_unknown_warnrr  %  s     qC8c1X&A
#JA'	C GGAJAhZFAq6(#J	A  
k	2--' 
3GZ( 
3	2   (B
Bmissing_valuec                 (   SSSSU /n[        US9n/ SQ/ SQSSSSU //nUR                  U5      R                  5       n/ SQ/ S	Q/ S
Q/n[        XE5        UR                  UL d   e[        UR                  UR                  5       VVs/ s H	  u  pgXg   PM     nnnUR                  U5      n	[        R                  " U[        S9n
[        US   5      (       a  [        US S US S 5        [        US   5      (       d   e[        US   5      (       d   e[        U
S S 2S S24   U	S S 2S S24   5        [        U
SS S24   U	SS S24   5        [        U
S   5      (       d   e[        U	S   5      (       d   eg [        X5        [        X5        g s  snnf )Nr   rg   r   r   r   )r   rg   r   r   rS   )r   rg   r   r   rS   )r   r   r   r   r   )r   r   r   r   r   r   rI   rA   )rA   rA   )r   r   r   r
   r   r   r   r(  r   r   r   r   r   )rt  cats_to_dropr   r   transr   r   r   dropped_catsX_inv_transX_arrays              r#    test_one_hot_encoder_drop_manualr{  ?  s   2q"m4L
\
*C	Ar=)	A
 a ((*EO_
=Cu"88|### *-S__cmm)L)L)L   ''.Khhq'G \"%&&<,l3B.?@\"-....\"-....71crc6?K3B3,?@ 	72ss7+[SbS-ABWV_----[01111<670)s   F)r   r   rc   rS   c                     [        U S9nSn[        R                  " [        US9   UR	                  / SQ/ SQ/ SQ/5        S S S 5        g ! , (       d  f       g = f)Nr   z-`drop` should have length equal to the numberr/   r   r   )r   r   ;   )r   r2   r3   r4   r1   )r   r   r   s      r#   test_invalid_drop_lengthr~  d  s>    
T
"C=G	z	1@A 
2	1	1s   A		
Adensityr   denserS   r   rT   r  c                    [        U S9n[        XS9n/ SQ/ SQ/nUR                  U5        UR                  U5        [        UR                  UR                  5        US:X  a  [        UR                  S5        O>[        XR                  UR                  5       H  u  pVnU[        U5         U:X  a  M   e   [        UR                  [        R                  5      (       d   eUR                  R                  [        :X  d   eg )Nr   r   )r  r   rS   r  r   r   )r   r1   r
   r   r(  r   rO  r   r   ndarrayrJ   r   )r  r   ohe_baseohe_testr   drop_catdrop_idxcat_lists           r#   test_categoriesr  l  s     73H7>H	&ALLOLLOx++X-A-ABw8--q1,/$$h&:&:-
(H CM*h666-
 h(("**5555##v---r%   c                 d    U " 5       R                  5       R                  R                  (       d   eg )N)__sklearn_tags__
input_tagscategorical)r  s    r#   "test_encoders_has_categorical_tagsr    s"    9%%'22>>>>r%   kwargsmax_categoriesmin_frequency   g(\?r   )r  r  rg   rL   rK   rS   rT   r  r   c                 2   [         R                  " S/S-  S/S-  -   S/S-  -   S/S-  -   /5      R                  n[        SUS	S
S.U D6R	                  U5      n[        UR                  / SQ/5        S/S/S/S/S//n[         R                  " SS/SS/SS/SS/SS//5      nUR                  U5      n[        XV5        S/S/S-  -    Vs/ s H  ow/PM     nnUR                  U5      n	[        X5        UR                  5       n
[        SS/U
5        gs  snf )zlTest that different parameters for combine 'a', 'c', and 'd' into
the infrequent category works as expected.rS   r   rT   r$  r  rd   r   r   r(   F)rL   r&   r   rS   r  r   er   r   infrequent_sklearnr+   r  x0_infrequent_sklearnNr   r   r   rN   r   r1   r
   infrequent_categories_r5   r	   r   r   )r  rL   X_trainr   rp  r)  r   colexpected_invX_invr   s              r#   test_ohe_infrequent_two_levelsr    sF    hh	SEBJ.#;seaiGHIKKG
 , 	
 
c'l  s11O3DEecUSEC53%0Fxx!Q!Q!Q!Q!Q@AHmmF#GH&&)U.B-Ca-G%GH%GcE%GLH!!'*E|+--/M 78-H Is   Dc                    [         R                  " S/S-  S/S-  -   S/S-  -   S/S-  -   /5      R                  n[        S	S
SU S9R	                  U5      nUR
                  S   UR                  S      S:X  d   e[         R                  " S/S//5      nUR                  U5      n[        S/S//U5        UR                  5       n[        S/U5        UR                  U5      n[        S/S//U5        g)z3Test two levels and dropping the frequent category.rS   r   rT   r$  r  rd   r   r   r(   Fr   r&   r   r  r   r   r   r  r  N)r   r   rN   r   r1   r   r(  r5   r	   r   r
   r   )r   r  r   rp  r   r   	X_inverses          r#   ,test_ohe_infrequent_two_levels_drop_frequentr    s    hh	SEBJ.#;seaiGHIKKG
,	
 
c'l  ??1cmmA./3666XXusen%FmmF#GaS1#J(--/M/0-@%%g.I 456	Br%   c                 ,   [         R                  " S/S-  S/S-  -   S/S-  -   S/S-  -   /5      R                  n[        S	S
SU S9nSU S   < S3n[        R
                  " [        US9   UR                  U5        SSS5        g! , (       d  f       g= f)z[Test two levels and dropping any infrequent category removes the
whole infrequent category.rS   r   rT   r$  r  rd   r   r   r(   Fr   r  Unable to drop category r   ( from feature 0 because it is infrequentr/   Nr   r   rN   r   r2   r3   r4   r1   r   r  r   r   s       r#   5test_ohe_infrequent_two_levels_drop_infrequent_errorsr    s    
 hh	SEBJ.#;seaiGHIKKG
,	C %T!WK/W
XC	z	- 
.	-	-   *B
BrI  gQ?g{Gz?rH  c                 
   [         R                  " S/S-  S/S-  -   S/S-  -   S/S-  -   /5      R                  n[        SS	S
S.U D6R	                  U5      n[        UR                  SS//5        S/S/S/S/S//n[         R                  " / SQ/ SQ/ SQ/ SQ/ SQ/5      nUR                  U5      n[        XE5        S/S/S/S/S//nUR                  U5      n[        Xg5        UR                  5       n[        / SQU5        g)zgTest that different parameters for combing 'a', and 'd' into
the infrequent category works as expected.rS   r   rT   r$  r  rd   r   r   r(   Fr&   r   r  r/  r   r   r   r.  r  )r  r  r  Nr   r  )	r  r  r   rp  r)  r   r  r  r   s	            r#    test_ohe_infrequent_three_levelsr    s#     hh	SEBJ.#;seaiGHIKKG
 ,EEK	c'l  s11S#J<@ecUSEC53%0FxxIy)YOPHmmF#GH& 
				L !!'*E|+--/M@-Pr%   c                 "   [         R                  " S/S-  S/S-  -   S/S-  -   S/S-  -   /5      R                  n[        S	S
SU S9R	                  U5      n[         R                  " S/S/S//5      n[        SS/SS/SS//UR                  U5      5        UR                  SS9R	                  U5        Sn[        R                  " [        US9   UR                  S/S//5      nSSS5        [        SS/SS//W5        g! , (       d  f       N!= f)z5Test three levels and dropping the frequent category.rS   r   rT   r$  r  rd   r   r   r(   Fr  r   r   r'   r-   r.   r/   r  N)r   r   rN   r   r1   r	   r5   r   r2   rn  ro  )r   r  r   rp  r   r   s         r#   .test_ohe_infrequent_three_levels_drop_frequentr    s    hh	SEBJ.#;seaiGHIKKG
,	
 
c'l  XXusecU+,FaVaVaV,cmmF.CD NN(N+//8
$C	k	---#/ 
. aVaV$g. 
.	-s   D  
Dc                 ,   [         R                  " S/S-  S/S-  -   S/S-  -   S/S-  -   /5      R                  n[        S	S
SU S9nSU S   < S3n[        R
                  " [        US9   UR                  U5        SSS5        g! , (       d  f       g= f)z7Test three levels and dropping the infrequent category.rS   r   rT   r$  r  rd   r   r   r(   Fr  r  r   r  r/   Nr  r  s       r#   7test_ohe_infrequent_three_levels_drop_infrequent_errorsr    s     hh	SEBJ.#;seaiGHIKKG
,	C %T!WK/W
XC	z	- 
.	-	-r  c                     [         R                  " S/S-  S/S-  -   S/S-  -   S/S-  -   /5      R                  n [        S	S
SS9R	                  U 5      n[        UR                  SS//5        S/S/S/S//n[         R                  " / SQ/ SQ/ SQ/ SQ/5      nUR                  U5      n[        X45        S//nSn[        R                  " [        US9   UR                  U5        SSS5        g! , (       d  f       g= f)ziTest that different parameters for combining 'a', and 'd' into
the infrequent category works as expected.rS   r   rT   r$  r  rd   r   r   r,   F)r&   r   r  r/  r  r.  badz.Found unknown categories \['bad'\] in column 0r/   N)r   r   rN   r   r1   r
   r  r5   r	   r2   r3   r4   )r  r   rp  r)  r   r   s         r#   (test_ohe_infrequent_handle_unknown_errorr  '  s     hh	SEBJ.#;seaiGHIKKG
eA	c'l  s11S#J<@ ecUSEC5)FxxIy)DEHmmF#GH& gYF
;C	z	-f 
.	-	-s   C00
C>c                    [         R                  " S/S-  S/S-  -   /[        S9R                  n[	        S/ SQ/SSS	.U D6R                  U5      nS/S
/S/S/S//n[         R                  " SS/SS/SS/SS/SS//5      nUR                  U5      n[        XE5        SSS//nS/S//nU H@  nUR                  US9R                  U5        [        S/S//UR                  U5      5        MB     g)zG'a' is the only frequent category, all other categories are infrequent.rS   r   r  rj   rI   r  r   rS   rT   Fr(   rL   r   r&   rT   r  r   r   r   r   r   r   Nr   )	r   r   r   rN   r   r1   r5   r	   r   )r  r  r   rp  r)  r   dropsr   s           r#   5test_ohe_infrequent_two_levels_user_cats_one_frequentr  ?  s    hh	SEBJ./v>@@G
 (), 	
 
c'l  ecUSEC53%0Fxx!Q!Q!Q!Q!Q@AHmmF#GH& kC5)EecU^FD!%%g.!qc
CMM&$9: r%   c                     [         R                  " S/S-  S/S-  -   S/S-  -   S/S-  -   /[        S	9R                  n [	        / S
Q/SSSS9R                  U 5      n[        UR                  / SQ/5        S/S/S/S/S//n[         R                  " SS/SS/SS/SS/SS//5      nUR                  U5      n[        X45        S/S/S-  -    Vs/ s H  oU/PM     nnUR                  U5      n[        Xg5        gs  snf )zFTest that the order of the categories provided by a user is respected.rS   r   rT   r$  r  rd   r   r   rI   r  Fr(   r   rL   r   r&   r  )r  r   rS   r  r   r   r  r+   Nr   r   r   rN   r   r1   r
   r  r5   r	   r   )r  r   rp  r)  r   r  r  r  s           r#   (test_ohe_infrequent_two_levels_user_catsr  [  s(   hh
cURZ	3%"*	,uqy	89a  (),	
 
c'l  s11O3DEecUSEC53%0Fxx!Q!Q!Q!Q!Q@AHmmF#GH& '*U.B-Ca-G%GH%GcE%GLH!!'*E|+ Is   C9c                     [         R                  " S/S-  S/S-  -   S/S-  -   S/S-  -   /[        S	9R                  n [	        / S
Q/SSSS9R                  U 5      n[        UR                  SS//5        S/S/S/S/S//n[         R                  " / SQ/ SQ/ SQ/ SQ/ SQ/5      nUR                  U5      n[        X45        S/S/S/S/S//nUR                  U5      n[        XV5        g)zTest that the order of the categories provided by a user is respected.
In this case 'c' is encoded as the first category and 'b' is encoded
as the second one.rS   r   rT   r$  r  rd   r   r   rI   r  r   rT   rS   Fr(   r  r  r.  r  r/  r  Nr  )r  r   rp  r)  r   r  r  s          r#   *test_ohe_infrequent_three_levels_user_catsr  v  s   
 hh
cURZ	3%"*	,uqy	89a  (),	
 
c'l  s11S#J<@ecUSEC53%0FxxIy)YOPHmmF#GH&
 
				L !!'*E|+r%   c                      [         R                  / SQ/ SQ4   n [        SSSS9nUR                  U 5        SS/SS//nUR	                  U5      n[        U/ S	Q/ S
Q/5        g)z]Test infrequent categories where feature 0 has infrequent categories,
and feature 1 does not.	r   r   r   r   r   r   r   r   r   	r   r   r   r   r   r   r   r   r   r   r   F)r  r   r   r   r   r   r   r   r   )r   r   r   r   N)r   c_r   r1   r5   r	   )r   r   rp  r   s       r#   test_ohe_infrequent_mixedr    sc     	)+FFGA
q{%
PCGGAJ!fq!fFmmF#G GlL9:r%   c            
      J   [         R                  / SQ/ SQ/ SQ4   n [        SSSS9nUR                  U 5      R	                  5       n[        UR                  S   S	S
/5        [        UR                  S	   S	S/5        [        UR                  S
   S5        UR                  5       n[        / SQU5        / SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/	n[        XB5        / SQ/ SQ/nUR                  U5      n/ SQ/ SQ/n[        XFR	                  5       5        UR                  U5      n[         R                  " / SQ/ SQ/[        S9n[        X5        [        SSSS9R                  U 5      n[        R                  " [         SS9   UR                  U5        SSS5        / S Q/ S!Q/nUR                  U5      n/ S"Q/ SQ/n[        XFR	                  5       5        UR                  U5      n[         R                  " / S#Q/ S$Q/[        S9n[        X5        g! , (       d  f       N= f)%z?Test infrequent categories with feature matrix with 3 features.r  )	r   r   r   r   r   rd   r   r   r   )	r   r   r   r   r   r   r   r   r   rK   r   r(   rL   r  r&   r   r   r   rd   N)x0_0x0_3r  x1_0x1_5x1_infrequent_sklearnx2_0x2_1)r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r   r   )r+   r   r   )r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r   r  N)r  r   NrI   r,   r.   r/   )r   r   r   )r   rd   r   )r   r   r   r   r   r   r   r   )r  r  r   )r   r  r   )r   r  r   r   r   r
   r  r   r	   r5   r   r   r   r1   r2   r3   r4   )	r   r   r   r   r)  rp  X_test_transr  r  s	            r#   'test_ohe_infrequent_multiple_categoriesr    s    	#$#	%	A !<QC "**,Gs11!4q!f=s11!4q"g>s11!4d;
 --/M		
 	 	!        
H H&#F==(L )*BCHH2245!!,/E88	(*IJRXL |+ !G	c!f  
z)C	Df 
E $F==(L(*BCHH2245!!,/E88	8:VWL |+! 
E	Ds   H
H"c            
         [         R                  " S5      n U R                  / SQ/ SQS.SS/S9n[        SS	S
S9nUR	                  U5      R                  5       n[        UR                  S   SS/5        [        UR                  S   / SQ5        / SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/ SQ/	n[        XC5        U R                  SS/SS/S.SS/S9n/ SQ/ SQ/nUR                  U5      n[        XFR                  5       5        UR                  U5      n[        R                  " SS/SS//[        S9n[        X5        U R                  SS/SS/S.SS/S9nUR                  U5      R                  5       n/ SQ/ SQ/n[        XF5        UR                  U5      n[        R                  " SS/SS//[        S9n[        X5        g)zHTest infrequent categories with a pandas dataframe with multiple dtypes.rR   	rS   fr  r  r  rS   r  rT   rT   	r   r   r   rd   rd   rg   r   r   r   )strrO  r  rO  columnsrK   r   r(   r  r   rS   rT   r   r   r   rg   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   r   r   r   r     rg   r  rI   r  r   N)r2   rZ   r[   r   r   r   r
   r  r	   r5   r   r   r   r   )	r\   r   r   r   r)  rp  r  r  r  s	            r#   .test_ohe_infrequent_multiple_categories_dtypesr    s    
		X	&B
@1	
  	 	A !<QC "**,Gs11!4sCjAs11!4jA 	
H H&\\3*b"X>PU\WF"$67H==(LH2245!!,/E88
 4	5=Q7RSL |+ \\3*b!W=u~\VF==(002L"$67HH+!!,/E88
#	$';Q&?@L |+r%   ri   )r  r  c                     [         R                  " S/S-  S/S-  -   S/S-  -   S/S-  -   /5      R                  n[        SS	S
S.U D6nUR	                  U5        UR                  S//5      n[        US//5        g),All user provided categories are infrequent.rS   r   rT   r$  r  rd   r   r   r(   Fr  r   Nr   )r   r   rN   r   r1   r5   r	   r  r  r   r   s       r#   $test_ohe_infrequent_one_level_errorsr  H  s     hh	SEBJ.#;seaiGHIKKG
 ,EEKC GGGmmcUG$GGqcU#r%   c                     [         R                  " S/S-  /[        S9R                  n[	        S/ SQ/SSS.U D6R                  U5      nUR                  S/S//5      n[        US	/S	//5        g
)r  r  r   rI   r  Fr(   r  rS   r   Nr   )r   r   r   rN   r   r1   r5   r	   r  s       r#   5test_ohe_infrequent_user_cats_unknown_training_errorsr  V  s     hh	{&133G
 (), 	
 
c'l  mmcUSEN+GGqcA3Z(r%   zinput_dtype, category_dtype)OOOUUOUUSOSUSS
array_type)r   r   	dataframec                    [         R                  " S/S//U S9n[         R                  " SS/US9/n[        USS9R                  U5      n[	        S/S/S/S//X S9nUR                  U5      n[         R                  " SS/SS/SS/SS//5      n[        Xx5        [        US9R                  U5      n	U	R                  U5      n[         R                  " S/S/S/S//5      n[        Xx5        g	)
a  Check that encoding work with object, unicode, and byte string dtypes.
Non-regression test for:
https://github.com/scikit-learn/scikit-learn/issues/15616
https://github.com/scikit-learn/scikit-learn/issues/15726
https://github.com/scikit-learn/scikit-learn/issues/19677
rT   rS   rI   Fr   r   r   r   N)	r   r   r   r1   r   r5   r	   r   r
   )
rG   category_dtyper  r   rL   r   rp  r   r)  oes
             r#   test_encoders_string_categoriesr  g  s     	3%#{3A((C:^<=J
:U
C
G
G
JC
use$jF mmF#Gxx!Q!Q!Q!Q89HG&	:	.	2	21	5Bll6"Gxx!qcA3,-Hw)r%   c                  .   [         R                  " S/S//SS9n [         R                  " SS/SS9/n[        USS9n[        R                  " S5      n[
        R                  " [        US	9   UR                  U 5        S
S
S
5        g
! , (       d  f       g
= f)zCheck that this mixture of predefined categories and X raises an error.

Categories defined as bytes can not easily be compared to data that is
a string.
rT   rS   UrI   SFr   zjIn column 0, the predefined categories have type 'bytes' which is incompatible with values of type 'str_'.r/   N)	r   r   r   r   r   r2   r3   r4   r1   )r   rL   r   r   s       r#   $test_mixed_string_bytes_categoricalsr    sy     	3%#s+A((C:S12J
:U
CC
))	'C
 
z	-
 
.	-	-s   +B
Bc                     [         R                  " SSU SU //[        S9R                  n[	        SSS9R                  U5      nUR                  5       n[        USSS	U  3/5        g )
NrS   rT   rI   Fr'   r   r&   x0_ar  x0_)r   r   r   rN   r   r1   r   r
   )rt  r   r   namess       r#   )test_ohe_missing_values_get_feature_namesr    se     	3]C?@OQQA
eH
E
I
I!
LC%%'Euvv]O/DEFr%   c            	          [         R                  " S5      n U R                  / SQ[        R                  " SSS[        R
                  /[        S9S.SS	/S
9n[        R                  " / SQ/ SQ/ SQ/ SQ/5      n[        U5      n[        X25        g )NrR   )dogr   Nr   r   r   r+   rI   )col1col2r  r  r  )r   r   r   r   r   r   r   )r   r   r   r   r   r   r   )r   r   r   r   r   r   r   )r   r   r   r   r   r   r   )	r2   rZ   r[   r   r   r   floatr   r	   )r\   dfexpected_df_transr   s       r#   %test_ohe_missing_value_support_pandasr    s    			X	&B	/HHaArvv.e<	
   
 
B !!!!		
 #2
&CC+r%   pd_nan_typepd.NAznp.nanc           
      "   [         R                  " S5      nU S:X  a  UR                  O[        R                  nUR                  SUR                  SSUSS/SS905      n[        R                  " / S	Q/ S
Q/ SQ/ SQ/ S
Q/5      n[        SUS9nUR                  U5      n[        XW5        [        UR                  5      S:X  d   e[        UR                  S   S S / SQ5        [        R                  " UR                  S   S   5      (       d   eg )NrR   r  r  r  rS   rT   r   rI   )r   r   r   r   )r   r   r   r   )r   r   r   r   r  Fr  r   r   rA   r  )r2   rZ   NAr   r   r[   r   r   r   r   r	   lenr   r
   isnan)r  r&   r\   pd_missing_valuer  r   r   df_transs           r#   1test_ohe_missing_value_support_pandas_categoricalr
    s     
		X	&B +w 6ruuBFF	BIIsC)93DJIW	

B
 	
 eN
KC  $H%0s1$$$sq)#2.@88COOA&r*++++r%   c                    SS/SS/SS//n[        SSU S9nUR                  U5      n[        R                  " / S	Q/ S
Q/ SQ/5      n[	        X45        SS//n[        R                  " / S	Q/5      nSn[
        R                  " [        US9   UR                  U5      nSSS5        [	        X45        UR                  U5      n[        U[        R                  " SS//[        S95        g! , (       d  f       NQ= f)zVCheck drop='first' and handle_unknown='ignore'/'infrequent_if_exist'
during transform.rS   r   rT   r   r   r   Fr   r   r&   r   r   )r   r   r   r  r   tFound unknown categories in columns \[0, 1\] during transform. These unknown categories will be encoded as all zerosr/   NrI   r   r   r   r   r	   r2   rn  ro  r5   r   r
   r   r&   r   r   r   rO   rp  rq  r  s           r#   /test_ohe_drop_first_handle_unknown_ignore_warnsr    s     qC8c1X&A
E.C "G	
J G( AhZF9+&J	 
 
k	2--' 
3G( !!*-Eubhhaz@A 
3	2   C((
C6c                    SS/SS/SS//n[        SSU S9nUR                  U5      n[        R                  " / S	Q/ S
Q/ SQ/5      n[	        X45        SS//n[        R                  " / SQ/5      nSn[
        R                  " [        US9   UR                  U5      nSSS5        [	        X45        UR                  U5      n[        U[        R                  " SS//[        S95        g! , (       d  f       NQ= f)zDCheck drop='if_binary' and handle_unknown='ignore' during transform.rS   r   rT   r   r   r   Fr  r  r   rX   r  r   )r   r   r   r   r  r/   NrI   r  r  s           r#   3test_ohe_drop_if_binary_handle_unknown_ignore_warnsr    s     qC8c1X&A
nC "G	
J G( AhZF<.)J	 
 
k	2--' 
3G( !!*-Eubhhd}FCD 
3	2r  c                 @   SS/SS/SS//n[        SSU SS/SS//S9nUR                  U5        S	S//n[        R                  " SS//5      nS
n[        R
                  " [        US9   UR                  U5      nSSS5        [        WU5        g! , (       d  f       N= f)zjCheck drop='first' and handle_unknown='ignore'/'infrequent_if_exist'
during fit with categories passed in.rS   r   rT   r   r   r   Frk  r  rl  r/   Nrm  )r&   r   r   rp  rO   rq  r   s          r#   'test_ohe_drop_first_explicit_categoriesr  &  s    
 qC8c1X&A
%#JA'	C GGAJAhZFAq6(#J	A  
k	2--' 
3GZ( 
3	2rs  c                     [         R                  " S5      n U R                  / SQ/ SQS.SS/S9n[        SS	9nUR	                  SS
9  Sn[         R
                  " [        US9   UR                  U5        SSS5        UR                  U5        [         R
                  " [        US9   UR                  U5        SSS5        g! , (       d  f       NS= f! , (       d  f       g= f)zJRaise informative error message when pandas output and sparse_output=True.rR   r  )r  rT   rT   )rS   rT   rS   rT   r  Tr   r5   zxPandas output does not support sparse data. Set sparse_output=False to output pandas dataframes or disable Pandas outputr/   N)
r2   rZ   r[   r   
set_outputr3   r4   r   r1   r5   )r\   r  r   r   s       r#   'test_ohe_more_informative_error_messager  A  s    			X	&B	IO<sCj	QB
d
+CNNXN&	S  
z	-" 
. GGBK	z	-b 
.	-	 
.	- 
.	-s   #C'C
C
C!c                  F   [         R                  " [         R                  SSS//5      R                  n [	        [         R
                  S9nS[         R
                   3n[        R                  " [        US9   UR                  U 5        SSS5        g! , (       d  f       g= f)zDTest ordinal encoder with nan passthrough fails when dtype=np.int32.r   r   rI   zdThere are missing values in features \[0\]. For OrdinalEncoder to encode missing values with dtype: r/   N)
r   r   r   rN   r   int32r2   r3   r4   r1   )r   r  r   s      r#   Btest_ordinal_encoder_passthrough_missing_values_float_errors_dtyper  U  su     	2663S)*+--A	bhh	'B	002z	;  
z	-
q	 
.	-	-s   7B
B encoded_missing_valuer9  c                    [         R                  " [         R                  SSS//[         R                  S9R                  n[        U S9R                  U5      n[        UR                  5      S:X  d   e[        UR                  S   SS[         R                  /5        UR                  U5      n[        X0/S/S/S//5        UR                  U5      n[        XA5        g)	z.Test ordinal encoder with nan on float dtypes.r   r   rI   r  r   r   r   N)r   r   r   r0  rN   r   r1   r  r   r	   r5   r   )r  r   r  r   r  s        r#   5test_ordinal_encoder_passthrough_missing_values_floatr   c  s     	2663S)*"**=??A	.C	D	H	H	KBr~~!###BNN1%S"&&'9:ll1oGG5usecUKL$$W-II!r%   c           
         [         R                  " S5      nU S:X  a  UR                  O[        R                  nUR                  SUR                  SSUSS/SS905      n[        US	9R                  U5      n[        UR                  5      S
:X  d   e[        UR                  S   SS / SQ5        [        R                  " UR                  S   S   5      (       d   eUR                  U5      n[        US/S/U/S/S//5        UR                  U5      nUR                   S:X  d   e[        USS2S4   SS/5        [        USS2S4   SS/5        [        R                  " US   5      (       d   eg)z0Check ordinal encoder is compatible with pandas.rR   r  r  r  rS   rT   r   rI   r  r   r   Nr   r  rA          @r   r   )r   r   r   r   )r2   rZ   r  r   r   r[   r   r   r1   r  r   r
   r  r5   r	   r   r   )r  r  r\   r  r  r  r	  r  s           r#   =test_ordinal_encoder_missing_value_support_pandas_categoricalr#  u  sf    
		X	&B +w 6ruuBFF	BIIsC)93DJIW	

B 
.C	D	H	H	LBr~~!###r~~a(!,o>88BNN1%b)****||BHHuse.C-DsecUST$$X.I??f$$$y!Q'#s4yQ'#s488IdO$$$$r%   r"  )zobject-None-missing-valuezobject-nan-missing_valueznumeric-missing-valuec                 t   [        US9n[        R                  " S/[        R                  //5      n[	        UR                  U 5      U5        UR                  S   R                  U:X  d   e[        US9n[        R                  " [        SS9   UR                  U5        SSS5        g! , (       d  f       g= f)z.Test ordinal encoder for specified categories.r   r   r   r.   r/   N)r   r   r   r   r
   r   r   rJ   r2   r3   r4   r1   )r   r8   r
  r   r  r   s         r#   =test_ordinal_encoder_specified_categories_missing_passthroughr%    s    L 
4	(B
((SEBFF8$
%Cr''*C0 >>!""i/// 
4	(B	z)C	D
r
 
E	D	Ds   B))
B7c                    [         R                  " / SQ[        S9/nU " US9n[         R                  " SS//[        S9R                  n[        R
                  " [        SS9   UR                  U5        SSS5        g! , (       d  f       g= f)	zTest encoder for specified categories have duplicate values.

Non-regression test for:
https://github.com/scikit-learn/scikit-learn/issues/27088
)rS   rT   rS   rI   r   rS   rT   z5the predefined categories contain duplicate elements.r/   N)r   r   r   rN   r2   r3   r4   r1   r  s       r#   +test_encoder_duplicate_specified_categoriesr'    sl     HH_F34D
T
"C
3*V,..A	Q
 	

 
 
s   "A==
BzX, expected_X_trans, X_testr   r   )r   r   r   )r   r"  r   r  )r  rS   rT   )r"  r   r   c                     [        SSS9nUR                  U 5      n[        XA5        [        UR                  U5      S//5        g)z>Test the interaction between missing values and handle_unknownr8  rA   r:  g      N)r   r   r	   r5   )r   expected_X_transrp  r  r   s        r#   /test_ordinal_encoder_handle_missing_and_unknownr*    s@    8 
':"	MBq!GG.BLL(D6(3r%   csr_containerc                 .   [         R                  " / SQ/ SQ/5      nU " U5      n[        5       nSn[        R                  " [
        US9   UR                  U5        SSS5        [        R                  " [
        US9   UR                  U5        SSS5        UR                  U5      nU " U5      n[        R                  " [
        US9   UR                  U5        SSS5        g! , (       d  f       N= f! , (       d  f       Nl= f! , (       d  f       g= f)zCheck that we raise proper error with sparse input in OrdinalEncoder.
Non-regression test for:
https://github.com/scikit-learn/scikit-learn/issues/19878
r   r   z2Sparse data was passed, but dense data is requiredr/   N)	r   r   r   r2   r3   r   r1   r   r   )r+  r   X_sparseencoderr   r   r!   s          r#   test_ordinal_encoder_sparser/    s     	)Y'(AQHGBG	y	0H 
1	y	0h' 
1 ##A&G"7+N	y	0!!.1 
1	0 
1	0	0	0
 
1	0s$   
C$=C5	D$
C25
D
Dc                  >   [         R                  " / SQ5      SS2[         R                  4   n [        / SQ/SSS9nUR	                  U 5        [        / SQ/SS9n[
        R                  " [        S	S
9   UR	                  U 5        SSS5        g! , (       d  f       g= f)zCheck OrdinalEncoder.fit works with unseen category when
`handle_unknown="use_encoded_value"`.
Non-regression test for:
https://github.com/scikit-learn/scikit-learn/issues/19872
)r   r   r   r   r   r   N)rA   r   r   r8  rF  )rL   r&   r;  r,   r	  r.   r/   )r   r   newaxisr   r1   r2   r3   r4   )r   r  s     r#   -test_ordinal_encoder_fit_with_unseen_categoryr2    sw     	#$Q

]3A	<0CSW
B FF1I	J<	HB	z)C	D
q	 
E	D	Ds   3B
Br  AAOr  rp  c                 z    [        SSS9nUR                  U 5        UR                  U5      n[        USS//5        g)zChecks that `OrdinalEncoder` transforms string dtypes.
Non-regression test for:
https://github.com/scikit-learn/scikit-learn/issues/19872
r8  ir:  r   N)r   r1   r5   r	   )r  rp  r   r   s       r#   1test_ordinal_encoder_handle_unknown_string_dtypesr6  &  s;    * (;2
NCGGGmmF#GGr1gY'r%   c                  6   [         R                  " / SQ5      R                  SS5      n [        5       R	                  U 5      n[        UR                  [         R                  " U SS9R                  5        UR                  U 5      n[        US/S/S/S//5        g)	zCheck that `OrdinalEncoder` accepts Python integers that are potentially
larger than 64 bits.
Non-regression test for:
https://github.com/scikit-learn/scikit-learn/issues/20721
)l   	HP
1& l   	H]viel   	 :?i}Ga l   IRK2e6krA   r   r   )axisr   r   N)
r   r   rD   r   r1   r
   r   sortrN   r5   )r   r.  r   s      r#   #test_ordinal_encoder_python_integerr:  B  s     		
	 gb!n  ""1%Gw**BGGAA,>,@,@A"Gw!qcA3 45r%   c                      [         R                  " S5      n / SQnU R                  / SQ/US9n[        5       R	                  U5      nUR                  5       n[        X5        g)z-Check feature names out is same as the input.rR   )rT   r  rS   r  r  N)r2   rZ   r[   r   r1   r   r
   )r\   r  r   r   feature_names_outs        r#   .test_ordinal_encoder_features_names_out_pandasr=  V  sU    			X	&BE
i[%0A



q
!C113u0r%   c                  &   [         R                  " S/S/[         R                  //[        S9n [	        S[         R                  SS9R                  U 5      nUR                  U 5      n[        US/S/S//5        [         R                  " S	/[         R                  //[        S9nUR                  U5      n[        U[         R                  /S//5        UR                  U5      nUS   S   b   e[         R                  " US   S   5      (       d   eg
)zECheck interactions between encode_unknown and missing value encoding.rS   rT   rI   r8  r&   r;  r  r   r   r  N)
r   r   r   r   r   r1   r5   r	   r   r  )r   r  r   rp  r  X_roundtrips         r#   0test_ordinal_encoder_unknown_missing_interactionrB  b  s     	3%#)8A	*ff 
 
c!f	  ll1oGGqcA3-. XXurvvh'v6F<<'LLBFF8bT"23 &&|4K q>!$$$ 88KN1%&&&&r%   with_pandasc                 v   [         R                  " SS/SS/S[         R                  //[        S9nSnU (       a.  [        R
                  " S5      nUR                  US	S
/S9nUS-   nOUS-   n[        SS9n[        R                  " [        US9   UR                  U5        SSS5        g! , (       d  f       g= f)zTCheck OrdinalEncoder errors when encoded_missing_value is used by
an known category.rS   r  rT   r   r  rI   zTencoded_missing_value \(1\) is already used to encode a known category in features: rR   letterpetr  z	\['pet'\]z\[1\]r   r  r/   N)r   r   r   r   r2   rZ   r[   r   r3   r4   r1   )rC  r   	error_msgr\   r  s        r#   0test_ordinal_encoder_encoded_missing_value_errorrH    s     	3,esBFFm<FKA
	 
   *LLXu$5L6,	(		a	0B	z	3
q	 
4	3	3s   B**
B8z4X_train, X_test_trans_expected, X_roundtrip_expected1c                    [        S[        R                  [        R                  S9R                  U 5      n[        R                  " S/[        R                  /S//5      nUR                  U5      n[        XQ5        UR                  U5      nUR                  S   n[        U5       HO  nX(S4   n	XhS4   n
U	c  U
b   eM  [        U	5      (       a  [        R                  " U
5      (       d   eMH  X:X  a  MO   e   g)zfCheck transform when unknown_value and encoded_missing_value is nan.

Non-regression test for #24082.
r8  r@  rI  rT   r   N)r   r   r   r1   r   r5   r	   r   r   r\  r   r  )r  X_test_trans_expectedX_roundtrip_expectedr  rp  r  rA  	n_samplesr^  expected_valvals              r#   9test_ordinal_encoder_unknown_missing_interaction_both_nanrP    s    4 
*ff ff
 
c'l	  XXurvvh./F<<'L L8&&|4K$**1-I9+qD1Q$;;<((88C== =&&& r%   c                  6   [         R                  " S5      n U R                  SS/SS/S.5      n[        5       nUR	                  SS9  Sn[         R
                  " [        US	9   UR                  U5        S
S
S
5        [        SS9R	                  SS9n[        SS9R	                  SS9nUR                  U5      nUR                  U5      n[        UR                  5       U5        [        UR                  5       UR                  5        g
! , (       d  f       N= f)z*Check OneHotEncoder works with set_output.rR   rS   rT   r   r   rU   r  zCPandas output does not support sparse data. Set sparse_output=Falser/   NFr   default)r2   rZ   r[   r   r  r3   r4   r   r	   to_numpyr
   r   r  )r\   r]   r   r0   ohe_default
ohe_pandas	X_defaultX_pandass           r#   test_one_hot_encoder_set_outputrX    s    			X	&B<<sCj1v67D
/CNNXN&QE	z	/$ 
0  e4??)?TKU3>>>RJ))$/I''-HH%%'3z7798;K;KL 
0	/s   #D


Dc                  |   [         R                  " S5      n U R                  SS/SS/S.5      n[        5       R	                  SS9n[        5       R	                  SS9nUR                  U5      nUR                  U5      n[        UR                  5       U5        [        UR                  5       UR                  5        g	)
z+Check OrdinalEncoder works with set_output.rR   rS   rT   r   r   rU   rR  r  N)r2   rZ   r[   r   r  r   r	   rS  r
   r   r  )r\   r]   ord_default
ord_pandasrV  rW  s         r#   test_ordinal_set_outputr\    s    			X	&B<<sCj1v67D "--	-BK!,,x,@J))$/I''-HH%%'3z7798;K;KLr%   c                     / SQSS//n [        U S9nUR                  SS//5        [        U 5      [        UR                  5      :X  d   e[	        UR                  5       H)  u  p#UR
                  [        :X  d   e[        X   U5        M+     g)zbCheck that the categories_ dtype is `object` for string categories

Regression test for gh-25171.
)asmmaseasrasacsrI  2r   r^  N)r   r1   r  r   	enumeraterJ   r   r
   )rL   r   nr   s       r#    test_predefined_categories_dtyperf    s|    
 6SzBJ
:
.CGGdC[Mz?c#//2222COO,yyF""":=#. -r%   c                  R   [         R                  " S/S/[         R                  //[        S9n [	        SS9R                  U 5      n[        US/S/S//5        [	        SSS	9R                  U 5      n[         R                  " S
//5      nUR                  U5      n[        US//5        g)zBCheck missing value or unknown encoding can equal the cardinality.r  r   rI   r   r  r   r   r8  r:  snakeN)	r   r   r   r   r   r   r	   r1   r5   )r   r   r   rp  s       r#   1test_ordinal_encoder_missing_unknown_encoding_maxri    s    
5'E7RVVH-V<A15CCAFGGqcA3_-
(;1
M
Q
QRS
TCXXyk"FmmF#GGqcU#r%   c                  ,   [         R                  " S/S-  S/S-  -   S/S-  -   S/S-  -   S/S-  -   /[        S9R                  n [	        SS	S
S9R                  U 5      n[        UR                  5       / SQ5        UR                  S   UR                  S      S:X  d   e[         R                  " S/S-  S/S-  -   S/S-  -   /[        S9R                  n [	        SS	SS9R                  U 5      n[        UR                  5       S/5        UR                  S   UR                  S      S:X  d   e[         R                  " S/S-  S/S-  -   S/S-  -   S/S-  -   S/S-  -   /[        S9R                  n [	        SS	S/S9R                  U 5      n[        UR                  5       / SQ5        UR                  S   UR                  S      S:X  d   e[	        SS	SS9R                  U 5      n[        UR                  5       / SQ5        UR                  b   eg)zcCheck drop_idx is defined correctly with infrequent categories.

Non-regression test for gh-25550.
rS   r   rT   r+   r  r   r  rI   Fr   )r  r   r   )r  x0_dx0_er  r   rd   r   r  )r  r  rl  r  N)r  r  rk  rl  r  )
r   r   r   rN   r   r1   r
   r   r   r(  )r   r   s     r#   #test_drop_idx_infrequent_categoriesrm    s&   
 	
cUQY	#	*cUQY	6#	BC6	a  au7
K
O
OPQ
RC!!#%V ??1cmmA./3666
3%!)seai'3%"*45VDFFA
au;
O
S
STU
VCs0025L4MN??1cmmA./3666

cUQY	#	*cUQY	6#	BC6	a  auC5
I
M
Ma
PC!!#%V ??1cmmA./3666
au4
H
L
LQ
OC!!#A ==   r%   c                    [         R                  " S/S-  S/S-  -   S/S-  -   S/S-  -   /5      R                  n[        SS	S
S.U D6R	                  U5      n[        UR                  / SQ/5        [        UR                  SS//5        S/S/S/S/S//nS/S/S/S/S
//nUR                  U5      n[        XT5        UR                  U5      nS/S/S/S/S//n[        Xg5        g)zGTest parameters for grouping 'a', and 'd' into the infrequent category.rS   r   rT   r$  r  rd   r   r   r8  rA   r:  r  r  r   r   r   r  Nr   )r   r   rN   r   r1   r
   r   r  r5   r	   r   )r  r  ordinalrp  expected_transr   r  expected_inverses           r#   ,test_ordinal_encoder_infrequent_three_levelsrr  6  s    hh	SEBJ.#;seaiGHIKKG *"@F	c'l  w**-A,BCw55c
|DecUSEC53%0FcA3aS2$/N'GG,))'2I					 y3r%   c                     [         R                  " S/S-  S/S-  -   S/S-  -   S/S-  -   /[        S	9R                  n [	        / S
Q/SSSS9R                  U 5      n[        UR                  / S
Q/5        [        UR                  SS//5        S/S/S/S/S//nS/S/S/S/S//nUR                  U5      n[        XC5        UR                  U5      nS/S/S/S/S//n[        XV5        g)zTest that the order of the categories provided by a user is respected.

In this case 'c' is encoded as the first category and 'b' is encoded
as the second one.
rS   r   rT   r$  r  rd   r   r   rI   r  r8  rA   )rL   r  r&   r;  r  r   r   r   r  N)r   r   r   rN   r   r1   r
   r   r  r5   r	   r   )r  ro  rp  rp  r   r  rq  s          r#   6test_ordinal_encoder_infrequent_three_levels_user_catsrt  ]  s'    hh
cURZ	3%"*	,uqy	89a  ()*	
 
c'l  w**-A,BCw55c
|DecUSEC53%0FcA3aS2$/N'GG,))'2I					 y3r%   c                     [         R                  " / SQ/ SQ45      n [        SS9R                  U 5      n[	        UR
                  S   SS/5        UR
                  S   b   eSS/SS//nSS/SS//nUR                  U5      n[        XC5        UR                  U5      n[         R                  " SS/S	S//[        S
9n[	        XV5        g)zETest when feature 0 has infrequent categories and feature 1 does not.r  r  r   r  r   r   r   Nr  rI   )r   column_stackr   r1   r
   r  r5   r	   r   r   r   )r   ro  rp  rp  r   r  rq  s          r#   %test_ordinal_encoder_infrequent_mixedrx    s     	46QRSAA.2215Gw55a81a&A))!,444!fq!fF!fq!f%N'GG,))'2Ixx!Q*>)B C6Ry3r%   c            	      \   [         R                  " S5      n U R                  / SQ5      nU R                  / SQ/ SQU R	                  S/S-  S/S-  -   S	/-   S
/-   US9S./ SQS9n[        SS9R                  U5      n[        UR                  S   SS/5        [        UR                  S   / SQ5        [        UR                  S   S
S	/5        U R                  / SQ/ SQU R	                  S/S	/-   S
/-   S/-   US9S./ SQS9n/ SQ/ SQ/ SQ/ SQ/nUR                  U5      n[        Xe5        g)zHTest infrequent categories with a pandas DataFrame with multiple dtypes.rR   )birdr   r  rh  r  r  r  r+   r   r   rh  rz  rI   )r  rO  r  r  rv  r   rS   rT   r   r  r   )rS   rT   r  r  )rg   r   rd   r   )r   r   r   )r   r   r   )r   r   r   r  N)r2   rZ   CategoricalDtyper[   r   r   r1   r
   r  r5   r	   )r\   categorical_dtyper   ro  rp  rp  r   s          r#   :test_ordinal_encoder_infrequent_multiple_categories_dtypesr}    sY    
		X	&B++,KL
@199!ugk)WI5@' % 	
 . 	 
	A A.2215G w55a83*Ew55a8*Ew55a867:KL\\'!997)#vh.%8' % 	
 .  
F  IyAN'GG,r%   c                     [         R                  " S/S-  S/S-  -   S/S-  -   S/S-  -   [         R                  /-   /[        S	9R                  n [        S
SSSS9R                  U 5      n[        UR                  / SQ/5        [         R                  " S/S/S/S/S/[         R                  //[        S	9nS/S/S/S/S/S//nUR                  U5      n[        XC5        g)zJCheck behavior of unknown_value and encoded_missing_value with infrequent.rS   r   rT   r$  r  rd   r   r   rI   r8  r   )r&   r;  r  r  r  r  r   r   N)r   r   r   r   rN   r   r1   r
   r  r5   r	   )r  ro  rp  rp  r   s        r#   .test_ordinal_encoder_infrequent_custom_mappingr    s    hh
cURZ	3%"*	,uqy	8BFF8	CDFa  *	
 
c'l  w557HIXXusecUSEC5266(C6RFcA3aS1#s3N'GG,r%   c                 Z   [         R                  " S/S-  S/S-  -   S/S-  -   S/S-  -   /[        S	9R                  n[	        S0 U DS
SS.D6R                  U5      n[	        S
SS9R                  U5      nS/S/S/S/S//n[        UR                  U5      UR                  U5      5        g)zMAll categories are considered frequent have same encoding as default encoder.rS   r   rT   r$  r  rd   r   r   rI   r8  rA   r:  r  Nr   r   r   r   rN   r   r1   r	   r5   )r  r  adjusted_encoderdefault_encoderrp  s        r#   !test_ordinal_encoder_all_frequentr    s     hh
cURZ	3%"*	,uqy	89a  & 
!4B	c'l  %*"	c'l  ecUSEC53%0F""6*O,E,Ef,Mr%   d   c                    [         R                  " S/S-  S/S-  -   S/S-  -   S/S-  -   /[        S	9R                  n[	        S0 U DS
SS.D6R                  U5      nS/S/S/S/S//n[        UR                  U5      S/S/S/S/S//5        g)zAWhen all categories are infrequent, they are all encoded as zero.rS   r   rT   r$  r  rd   r   r   rI   r8  rA   r:  r  r   Nr   r  )r  r  r.  rp  s       r#   #test_ordinal_encoder_all_infrequentr    s     hh
cURZ	3%"*	,uqy	89a   
!4B	c'l  ecUSEC53%0FG%%f-aS1#sRD/IJr%   c                     [         R                  " [         R                  /S-  S/S-  -   S/S-  -   S/-   S/-   /[        S9R                  n [        S	S
9R                  U 5      n[         R                  " SSS[         R                  //[        S9R                  nUR                  U5      n[        US/S/S/[         R                  //5        g)z5Check behavior when missing value appears frequently.r$  r  rd   r   r   rh  deerrI   r   rv  r   r   r   N	r   r   r   r   rN   r   r1   r5   r	   r   ro  rp  r   s       r#   -test_ordinal_encoder_missing_appears_frequentr  
	  s    

&&B%2	%!	3wi	?6(	JK	 a  A.2215GXXrvv67vFHHF'GGqcA3bffX67r%   c            	         [         R                  " [         R                  /S/S-  -   S/S-  -   S/-   S/-   S/S-  S	/S-  -   /[        S
9R                  n [        SS9R                  U 5      n[         R                  " SS/SS	/[         R                  S	/SS	/SS//[        S
9nUR                  U5      n[        USS/SS/[         R                  S/SS/SS//5        g)z7Check behavior when missing value appears infrequently.r  rd   r   r   rh  r  redrI  greenrI   r+   )r  r   r   r   Nr  r  s       r#   /test_ordinal_encoder_missing_appears_infrequentr  	  s    
 	VVHw|#ugk1WI=HGaK7)a-'	
 	 a  1-11!4GXXeWVVWGEN	
 	F 'GGq!fq!frvvqkAq6Aq6JKr%   c                     [         R                  " S/S/S//[        S9nU " / SQ/S9n[        R                  " [
        5         UR                  U5        SSS5        g! , (       d  f       g= f)a  Check that we raise a `NotFittedError` by calling transform before fit with
the encoders.

One could expect that the passing the `categories` argument to the encoder
would make it stateless. However, `fit` is making a couple of check, such as the
position of `np.nan`.
rV   rW   rb  rI   ra  r   N)r   r   r   r2   r3   r   r5   )r  r   r.  s      r#   test_encoder_not_fittedr  3	  sT     	3%#&f5A/!23G	~	&! 
'	&	&s   A
A,)r   rf  numpyr   r2   scipyr   sklearn.exceptionsr   sklearn.preprocessingr   r   sklearn.utils._missingr   sklearn.utils._testingr   r	   r
   sklearn.utils.fixesr   r$   markparametrizer;   rE   r  float32r0  rP   r^   r   r   r   r   r   r   r   r   r  r   r   r   r   r   r   r   r  r]  str_r   rN   r  r  r  r  r  r  r!  r,  r2  r4  r6  rD  rO  rK  rM  rP  rU  r_  rd  ri  rr  r{  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r
  r  r  r  r  r  r   r#  r%  r'  r*  r/  r2  r6  r:  r=  rB  rH  rM   rP  rX  r\  rf  ri  rm  rr  rt  rx  r}  r  r  r  r  r  r  r   r%   r#   <module>r     sc   	     - ? 0 
 /@. )+TU# V#. )+TU& V&$ "((BJJ

)KL2::rzz(JK
; L M
; "((BJJ

)KLA MA92xJ$4	9
 	(
+z*+
#%67vF
/C#78G
/Cuu#=>fM
"O4FC
/C#67vF
/Cut#<=VL			  .G/.G )+TUUDM2$1,$ 2 3 V,$^ UDM2b'Ar7QG	$y)Y&GHS\E3<'3%Fo?	
	'	 3'*7 !=>'CD
C E ?
C E?#;<1vrxxc
';<= > = E?#;< = "+r{	#uenrd%;RZZH	Aq6Aq6"	#q!fqc]BJJ?HHsElS%L1@3Z%!JJ	

 
C<#u.	/3*ug1FP	Aq6BFFA;'	(Arvv;*<bjjIHHsBFFmdBFF^4FC4[266(#JJ	
 HHsE%L)D%,+?@O4[5<.)JJ	
*	/   B7C B7" )+TU HHsCj\022HHsCj\022JJ		
 HHq!fXW-//HHq!fXW-//KHH		
 HHsCj\022HHsCj\022XXo&'JJ		
 HHtSk]&133HHtSk]&133		
 HHsCj\022HHsBFFm_F355		
 HHsDk]&133HHsBFFm_F355		
?%L	Q  0bAc0 VdA($ ]N$CD
 E

?7 	66"#	./	()
 	&  66&0 	(
+{+,
#%67vF
 	'  22  HHsCj\022HHsCj\022JJ		
 HHq!fXW-//HHq!fXW-//KHH		
 HHsCj\022HHsCj\022XXo&'JJ		
( 	3-  010"$- 5#,/- 0-6	868, +w!78) 9)2 2664u*FG!1 H!1H 5!*.A!BCB DB T5M'7JK'=!9?RS. T L.$ ]N$CD? E? 	1	"	$q1r2	 1E0F'GHI I	I6 +w!>?C @C. 3%#0 1" 	1	!	!	$	$q1q1QQ< 'C5!12/ 3/. 3%#0 10 !a8?A:NO;;2,6!,H;$X,v>,B bA$N#OP
$ Q
$ a1$M#NO) P)  !#M 'EF* G*6* 2664.9G :G,. )+TU((;<, = V,< )+TU"B V"BJ )+TU!E V!EH )+TU) V)4( 02662,?" @"" ((;<02662,?% @ =%>  3-7993*V4663RVV,F;<

	 3-7993*V4663RVV,F;<

	 3-

;==3%

3553RVV,-.

	%4	9  !DE!D$ ]N$CD E ! HHsBFFC()*,,HHsBFFC()*,,HHseW	
 HHo&'))HHo&'))HHrvvhZ 	
 HHsBFFC()8::HHsBFFC()*,,HHseWF+	
 HHo&f577HHo&'))HHrvvhZv.	
!24324 .92 :2," 

4+c*
4+c* 
s
3*S)
3*S)	( 	(6(	1'< u6 72 :
 HHsecU^62S266(RVVH%JJvv.f=	
 HHrvvhu-V<S266(RVVH%JJx"&&2&A	
&''&'BM.M /"	$!!H 	1	!	!	$	$q1q1446!4H4*--`-* 	1	!( 	1	#
K
K
8L8 ]N$CD Er%   