
    -il              
          S SK r S SKrS SKrS SKJrJr  S SKJr  S SK	J
r
  S SKJrJrJrJrJr  S SKJr  S SKJrJrJrJr  S r\R2                  R5                  S	\R6                  " / S
Q\R8                  S9/S4\R6                  " SS\R:                  /\R<                  S9/S4\R6                  " / SQ\S9/S4S/5      \R2                  R5                  SSS/5      \R2                  R5                  SSS/5      S 5       5       5       r \R2                  R5                  S\R6                  " / S
Q\R8                  S9/S4\R6                  " / SQ\S9/SS/4/5      \R2                  R5                  S\R6                  " / SQ5      \R6                  " / SQ5      /5      \R2                  R5                  SSS/5      S 5       5       5       r!\R2                  R5                  S \R6                  " S /S!-  S"/S!-  -   S#/-   /\R8                  S9RD                  / S
Q/4\R6                  " S$/S!-  S%/S!-  -   S&/-   /\S9RD                  / S'Q/4/5      \R2                  R5                  SS(S/5      S) 5       5       r#\R2                  R5                  S*/ S+QS,4\R6                  " / S-Q/ SQ/5      RD                  S.4/5      S/ 5       r$S0 r%\R2                  R5                  S1S"S2/S!-  S3S4/4/ SQS5-  S"S2/-   / S6Q4/ S7QS5-  S8S9/-   / S:Q4/5      S; 5       r&\R2                  R5                  S<S=S>/5      \R2                  R5                  SSS/5      \R2                  R5                  S/ S?Q5      S@ 5       5       5       r'\R2                  R5                  SA\R6                  " SB/SC-  5      SB4\R6                  " S /SC-  5      S 4\R6                  " SD/SC-  \S9S 4// SEQSF9\R2                  R5                  S/ SGQ5      SH 5       5       r(SI r)SJ r*\R2                  R5                  S/ SKQ5      SL 5       r+\R2                  R5                  SSMS/5      SN 5       r,SO r-g)P    N)assert_allcloseassert_array_equal)RandomForestRegressor)Ridge)KFoldShuffleSplitStratifiedKFoldcross_val_scoretrain_test_split)make_pipeline)KBinsDiscretizerLabelBinarizerLabelEncoderTargetEncoderc                 >   [         R                  " U[         R                  S9n[         R                  " U5      nUS:X  a  [         R                  " U5      n[        U5       Hj  nXU:H     nUR                  S   n	U	S:X  a  XTU'   M%  [         R                  " U5      n
X-  nXU-   -  nU[         R                  " U5      -  SU-
  U-  -   XG'   Ml     U$ [        U5       H=  nXU:H     n[         R                  " U5      XS-  -   nUR                  S   U-   nX-  XG'   M?     U$ )z0Simple Python implementation of target encoding.dtypeautor      )npzerosfloat64meanvarrangeshapesum)	X_ordinal	y_numericn_categoriessmoothcur_encodingsy_mean
y_variancecy_subsetn_iy_subset_variancemlambda_current_sumcurrent_cnts                  b/var/www/html/venv/lib/python3.13/site-packages/sklearn/preprocessing/tests/test_target_encoder.py_encode_targetr.      s   HH\<MWWYFVVI&
|$A a0H..#Cax#)a  "x 0!.A1WoG&)::a'kV=SSM % |$A a0H&&*V_<K"..+f4K*8M	 %
     zcategories, unknown_valuer   r      r            ?      @      @)catdogsnakebear)r      r!         @r   target_typebinary
continuousc                 *   Sn[         R                  " S/S-  S/S-  -   S/S-  -   /[         R                  S9R                  n[         R                  " / S	Q/[         R                  S9R                  nUR                  S   nU S
:X  a  Un	Un
OU S   U   n	U S   U   n
[         R
                  " X//45      n
[         R                  R                  U5      nSnUS:X  a1  UR                  SSUS9n[         R                  " SS/[        S9nX   nOUS:X  d   eUR                  SSUS9nUnUR                  U5      nUU   nU	U   n	UU   nUU   nUS:X  a  [        XSS9nO
[        XSS9n[         R                  " U[         R                  S9nUR!                  Xo5       H.  u  nnUUS4   UU   nn[#        UUXS5      nUUUS4      UUS4'   M0     [%        UU UUS9nUR'                  X5      nUR(                  U:X  d   e[+        UU5        [-        UR.                  5      S:X  d   eUS:X  a  [1        UR2                  W5        OUR2                  b   e[         R4                  " U5      n[#        USS2S4   XU5      n[+        UR.                  S   U5        UR6                  [8        R:                  " U5      :X  d   e[         R
                  " U[         R                  " U/5      45      R=                  SS5      nUR?                  U
5      n[+        UU5        g)zCheck encoding for binary and continuous targets.

Compare the values returned by `TargetEncoder.fit_transform` against the
expected encodings for cv splits from a naive reference Python
implementation in _encode_target.
r:   r      r      r1   (   r   r0   r   r=   lowhighsizer6   r7   r>   Tn_splitsrandom_stateshuffle)r!   
categoriescvrJ   N) r   arrayint64Tr   concatenaterandomRandomStaterandintobjectuniformpermutationr	   r   
empty_liker   splitr.   r   fit_transformtarget_type_r   len
encodings_r   classes_r   target_mean_pytestapproxreshape	transform)rL   unknown_valueglobal_random_seedr!   r<   r    X_train_int_arrayX_test_int_array	n_samplesX_trainX_testdata_rngrI   r   target_namesy_trainshuffled_idxrM   expected_X_fit_transform	train_idxtest_idxX_y_r"   target_encoderX_fit_transformr#   expected_encodingsexpected_X_test_transformX_test_transforms                                 r-   test_encodingrz   7   sJ   & L1#(aS2X"5b"@!ARTTxx288<>>!''*IV#!Q- 12A/0^^Vo%678Fyy$$%78HHh$$$C	xxf=) l***$$2I$F	''	2L),7l#Gl#G,'I h
 HtT  "}}->bjjQ!xx(9C	8"9a<0)I2FB&r2|D0=hk*1
 1-  D #'	N %227DO&&+555O%=>~(()Q...h>22LA&&... WWYF'!Q$& N--a02DE&&&--*???? !#	RXXvh/0!gb!n  &//7$&?@r/   zcategories, unknown_valuesrabbittarget_labels)r   r1   r:   )abr%   c           
         [         R                  R                  U 5      nSnSn[         R                  " UR	                  SSUS95      n[         R                  " UR	                  SSUS95      n	US   U   n
US   U	   n[         R
                  " X45      n[         R
                  " X45      nSS// SQ/nSn[         R                  " UR	                  SXS95      nUU   n[        5       R                  U5      nSn[        UU SS	9n[         R                  " UR                  S   UR                  S   U-  4[         R                  S
9n[        U5       Hw  u  nn[        U5       Hb  nUR                  UU5       HI  u  nnUSS2U4   nUUU4   UU   nn[        UU[!        U5      U5      nUUU-  -   nUUUU4      UUU4'   MK     Md     My     [#        UUU S9n U R                  UU5      n!U R$                  S:X  d   e['        U!U5        / n"[        U5       HP  u  nn[        U5       H;  nUSS2U4   n[        USS2U4   U[!        U5      U5      nU"R)                  U5        M=     MR     [!        U R*                  5      X-  :X  d   e[        X-  5       H  n#['        U R*                  U#   U"U#   5        M!     [-        U R.                  U5        [         R                  " SS/SS/SS//5      n$US:X  a  U$n%Oo[         R0                  " U$SS2SS24   [2        S
9n%[        U$R                  S   5       H  n&US   U$SS2U&4      U%SS2U&4'   M     [         R4                  " U%U45      n%[         R6                  " USS9n'[         R                  " U$R                  S   U$R                  S   U-  4[         R                  S
9n(U$R                  S   n)/ SQn[        U)S-
  5       H*  n*[        U"5       H  u  n#n+U+U$U*UU#   4      U(U*U#4'   M     M,     / SQn,[        X-  5       H  n#U'U,U#      U(U)S-
  U#4'   M     U R9                  U%5      n-['        U-U(5        g)z&Check encoding for multiclass targets.P   r1   r   rC   r:   r   r0   TrH   r   Nr!   rM   rJ   
multiclassr2      r   rN   axis)r   r   r   r   r   r   )r   r   r1   r   r   r1   )r   rS   rT   rO   rU   column_stackr   r[   r	   emptyr   r   	enumerater   rZ   r.   r]   r   r\   r   appendr^   r   r_   rY   rV   vstackr   rd   ).rf   rL   unknown_valuesr|   r!   rngri   
n_features
feat_1_int
feat_2_intfeat_1feat_2rj   X_train_intcategories_	n_classesy_train_intrn   y_train_encrI   rM   rp   f_idxcatsc_idxrq   rr   y_classrs   rt   current_encodingexp_idxru   rv   rw   i
X_test_intrk   
column_idxr#   rx   n_rowsrow_idxencmean_idxry   s.                                                 r-   test_encoding_multiclassr      s    ))

 2
3CIJ#++!!)+DEJ#++!!)+DEJ]:&F]:&Foov./G//:":;Kq69%KI((3;;19;MNKK(G "009KH	(:D
B
  "xx			1	{003i?@jj  !-t9%E')xx'A#	8%ah/$Y%56	8JB#1"b#d)V#L   59#45>N%0?(7):; (B & . #'N
 %227GDO&&,666O%=>  -t9%E!!U(+G-AuH%wD	6  %%&67 & . ~(()Z-CCCC:)*11!46H6KL +~..> Aq6Aq6Aq623Jz#2#q&1@
 0 0 34J$.qM*SbS*_2M$NF1j=! 5 FN34WW[q)F "			!	j..q1I=>jj! a FE!$ 23FAs47
7ERSHCT8U4V%gqj1 4 % "H9)*39(1+3F!&1*a-0 + &//7$&?@r/   zX, categories
   r   r:   r6   r7   r8   )r7   r6   cow      @c                    [         R                  R                  S5      nUR                  SSU R                  S   S9n[        XSS9R                  X5      nUR                  5       nUR                  U SS 5      nUS   [        R                  " U5      :X  d   e[        UR                  5      S	:X  d   eUR                  S   S   [        R                  " U5      :X  d   eg)
zHCustom categories with unknown categories that are not in training data.r   rG   r@   rC   )rL   r!   rJ   rN   N)r   r   r   )r   rS   rT   rW   r   r   fitr   rd   ra   rb   r]   r^   )XrL   r!   r   yr   r#   X_transs           r-   test_custom_categoriesr     s    $ ))


"C"1771:6A
:1
M
Q
QRS
WC VVXFmmAbcF#G4=FMM&1111s~~!###>>!R FMM&$9999r/   zy, msg)r   r1   r   r   z'Found input variables with inconsistent)r   r1   r   z7Target type was inferred to be 'multiclass-multioutput'c                     [         R                  " / SQ/5      R                  n[        5       n[        R
                  " [        US9   UR                  X 5        SSS5        g! , (       d  f       g= f)zCheck invalidate input.)r   r   r   matchN)r   rO   rQ   r   ra   raises
ValueErrorr[   )r   msgr   r   s       r-   test_errorsr   5  sI     	)A
/C	z	-! 
.	-	-s   A""
A0c                     [         R                  " / SQ/5      R                  n [         R                  " / SQ5      n[        SS9n[        R
                  " [        [        R                  " S5      S9   UR                  X5        SSS5        UR                  S:X  d   e[        SS	S
9nUR                  X5        UR                  S	:X  d   eg! , (       d  f       NN= f)z@Check inferred and specified `target_type` on regression target.)r   r   r   r   r   r   )r3          @r4   r   r4   r   r1   rM   zQThe least populated class in y has only 1 members, which is less than n_splits=2.r   Nr   r>   )rM   r<   )r   rO   rQ   r   ra   warnsUserWarningreescaper[   r\   )r   r   r   s      r-   test_use_regression_targetr   H  s    
$%&((A
/0A
1
C	ii

 	!
 |+++
1,
7Ca|+++
 
s   2C
Czy, feature_namesr1   AB   )A_1A_2A_3B_1B_2B_3)y1y2y3r   r   )A_y1A_y2A_y3B_y1B_y2B_y3c                    [         R                  " S5      nUR                  SS/S-  SS/S-  S.5      n[        SSS	S
9nUR	                  SS9  [        SSS	S
9nUR	                  SS9  UR                  X05      nUR                  X05      n[        UR                  5       U5        [        UR                  5       U5        [        UR                  5       UR                  5        g)z*Check TargetEncoder works with set_output.pandasr}   r~   r   r   r1   )r   r   r4   r   rM   r!   rJ   default)rd   N)ra   importorskip	DataFramer   
set_outputr[   r   to_numpyr   get_feature_names_outcolumns)r   feature_namespdX_dfenc_default
enc_pandas	X_defaultX_pandass           r-   !test_feature_names_out_set_outputr   ]  s     
		X	&B<<sCj2oQFRK@AD1SqAKY/!Ca@JH-))$2I''0HH%%'3z779=Iz7798;K;KLr/   	to_pandasTF)binary-ints
binary-strr>   c                    [         R                  " SS/SS/SS/SS/SS/SS/SS/SS//[         R                  S9nUS:X  a=  [         R                  " / SQ5      n[        5       R	                  U5      n[        SSSS9nOuUS	:X  a=  [         R                  " / S
Q5      n[        5       R	                  U5      n[        SSSS9nO2[         R                  " / SQ[         R                  S9nUn[        SSSS9n[         R                  " U5      n/ SQSS//n[         R                  " SS/SS/SS//[         R                  S9n	U (       ar  [        R                  " S5      n
U
R                  USS2S4   [         R                  " SS/[        S9USS2S4      S.5      nU
R                  U	SS2S4   / SQS.5      n	OUn[         R                  " U[         R                  S9n[        U5       HP  u  pUR!                  X55       H6  u  nnX?U4   X_   nn[#        UU[%        U5      U5      nUUUU4      UUU4'   M8     MR     / n[        U5       H4  u  p[#        USS2U4   U[%        U5      U5      nUR'                  U5        M6     [         R                  " US   S   US   S   /UUS   S   /US   S   U//[         R                  S9n[)        USSS9nUR	                  X5      n[+        UU5        [%        UR,                  5      S:X  d   e[/        S5       H  n[+        UR,                  U   UU   5        M!     UR1                  U	5      n[+        UU5        g)z,Check target encoder with multiple features.r   r   r1   r   r   )r}   r~   r}   r}   r~   r~   r}   r~   T)rJ   rK   r   )r:   r2   r:   r:   r:   r2   r2   r2   )r4   gffffff@g333333@g      @gffffff@g      @皙$@g333333@r0   r:   r   r   Nr6   r7   )feat0feat1)r7   r6   r8   r   )r   rO   rP   r   r[   r	   float32r   r   ra   r   r   rV   rY   r   r   rZ   r.   r]   r   r   r   r^   r   rd   )r   r!   r<   r   rn   	y_integerrM   r#   rL   rk   r   rj   rp   r   r   rq   rr   rs   rt   r   rw   rx   r   rv   r   ry   s                             r-   test_multiple_features_quickr   {  se   
 
Q!Q!Q!Q!Q!Q!Q!QHPRPXPXI l"((CD N009	QQ=		%((34 N009	QQ=((DBJJW	11d3WWYFaV$JXXFFG	

 hhF   *,,"1a45%.?	!Q$P
 q!t?VWX  "}}YbjjI ,#%88I#AIx%/0)2FB-b"c$iH8H(E/*9$Xu_5 $B -  ,)ahCIv
 	!!"23	 - !#"1%'9!'<Q'?@'*1-."1%v.	

 jj! v!!
<C''9OO%=>s~~!###1Xq)+=a+@A  }}V,$&?@r/   z	y, y_meang333333@r@   r}   )r>   r=   zbinary-string)ids)r   r           c           	      &   [         R                  " S/S-  /5      R                  nUR                  S   n[	        SUSS9nUR                  X05      n[        U[         R                  " U//USS95        UR                  S   S   [        R                  " U5      :X  d   eUR                  [        R                  " U5      :X  d   e[         R                  " S/S//5      nUR                  U5      n[        U[         R                  " U//SSS95        g)z5Check edge case where feature and target is constant.r   r@   r   r1   r   r   N)r   rO   rQ   r   r   r[   r   repeatr^   ra   rb   r`   rd   )	r   r#   r!   r   ri   r   r   rk   X_test_transs	            r-    test_constant_target_and_featurer     s     	1#(A
I
1V!
<C%GGRYYz91EF>>!Q6==#8888v}}V4444XXsQCj!F==(LL"))fXJ"BCr/   c                     SnSn[         R                  R                  U 5      nUR                  US9nUR	                  SXS9R                  SS5      nUR                  5       nXF   nXV   n[        SU S9nUR                  XT5      n[        S	S
9nUR                  XT5      n	[        SSU S9n
[        SU S9n[        XXKS9R                  5       S:  d   e[        XXKS9R                  5       S:  d   e[        XXKS9R                  5       S:  d   eg )NrA   i  rF   r   rN   r   T)rK   rJ   F)rK   r   r@   )n_estimatorsmin_samples_leafrJ   2   )rI   rJ   r   皙?      ?)r   rS   rT   normalrU   rc   argsortr   r[   r   r   r
   r   )rf   cardinalityri   r   rn   rj   y_sorted_indicesru   X_encoded_train_shuffledX_encoded_train_no_shuffled	regressorrM   s               r-   Ftest_fit_transform_not_associated_with_y_if_ordinal_categorical_is_notr     s7    KI
))

 2
3Cjjij(Gkk![k9AA"aHG ('G'G"4>PQN-;;GM"51N"0">">w"P &";MI 
r0B	CB9w>CCEKKK	WLQQS
		 		OTTV
		r/   c                  J   [         R                  " / SQ/5      R                  n [         R                  " / SQ5      n[        SSSS9nUR	                  X5      n[        US   [         R                  " USS	 5      5        [        US
   [         R                  " US	S 5      5        g	)zECheck edge case with zero smoothing and cv does not contain category.)
r   r   r   r   r   r   r   r   r   r   )
g @g333333@g333333?g@r3   g      "@r   gffffff,@g*@g      .@r   Fr1   )r!   rK   rM   r   r   NrN   )r   rO   rQ   r   r[   r   r   )r   r   r   r   s       r-   test_smooth_zeror    s    
01244A
GHA
sEa
8C%G GAJ!"/ GBK2A0r/   )r   g     @@r   c                 B   [         R                  R                  U5      nUR                  SS9nSn[	        USSS9R                  UR                  SS5      5      n[        XSUS	9u  pgpUR                  U5      n
XR                  [         R                  5         nXR                  [         R                  5         n[        XS
9nUR                  Xh5      nUR                  U5      nUR                  X5      nUR                  U5      n[        UU5        [        UU5        g )Ni  r   rA   averaged_inverted_cdfordinal)n_binsquantile_methodencoderN   r   rJ   r!   rJ   )r   rS   rT   r   r   r[   rc   r   rX   astypeint32r   rd   r   )r!   rf   r   r   r    r   rj   rk   rn   y_testpermutated_labelsX_train_permutedX_test_permutedru   X_train_encodedX_test_encodedX_train_permuted_encodedX_test_permuted_encodeds                     r-   3test_invariance_of_encoding_under_label_permutationr  )  s   
 ))

 2
3C 	


AL-DY	mAIIb!$%  (8	-($GW 5()AB'bhh(?@O"&RN$227DO#--f5N-;;<LV,66GO%=>N$;<r/   r   c                    [        SSSS9nSn[        R                  R                  U5      nUR	                  U5      nSUR	                  U5      -  nSn[        USS	US
9R                  XV-   R                  SS5      5      nUR                  U5      n	XR                  [        R                  5         nUR                  U5      n
UR                  [        SU-  5      USS9R                  SS5      n[        R                  " XU/SS9n[        XSS9u  pnnUR                  X5      nUR!                  X5      S:  d   eUR!                  UU5      S:  d   e[#        [%        XS9U5      R                  X5      nUS   R&                  nUR!                  X5      S:  d   U5       eUR!                  UU5      S:  d   U5       eUS   [(        R*                  " SSS9:X  d   e[        R,                  " USS  5      S:  R/                  5       (       d   e[%        XS9R                  X5      nUR1                  U5      nUR1                  U5      nUR                  UU5      nUR&                  nUR!                  UU5      S:  d   U5       eUR!                  UU5      S:  d   U5       e[-        US   5      [-        US   5      :  d   eg )Ngư>lsqrF)alphasolverfit_interceptiP  g?d   r  rW   )r  r  strategyrJ   rN   r   g?T)rF   replacer   r   r  r   r	  r   g{Gz?)absg?gffffff?r1   )r   r   rS   rT   randnr   r[   rc   rX   r
  r  choiceintrR   r   r   scorer   r   coef_ra   rb   r  allrd   )r!   rf   linear_regressionri   r   r   noiser    X_informativer  
X_shuffledX_near_unique_categoriesr   rj   rk   rn   r  	raw_modelmodel_with_cvcoefru   X_enc_no_cv_trainX_enc_no_cv_testmodel_no_cvs                           r-   *test_target_encoding_for_linear_regressionr/  M  s    DuM I
))

 2
3C		)A
 #))I&&EL$	
 mQY''A./  5%&:&:288&DEM /J  #zzC)O9d  *  gb!n 
 		$<=	A (81'M$GWf
 "%%g7I??7,s222??66*S000 "V68I	c'  ""Dw036<<6vv.4:d:4 7fmmA40000FF48s"'')))) #&CGGN '009%//7#''(97CK D.83>DD>-v6<BdB<
 tAw<#d1g,&&&r/   c                      [         R                  " SSS9n U R                  SS5         U R                  / SQ/ SQS.5      n[	        S	S
9R                  US/   US   5        SSS5        g! , (       d  f       g= f)z
Test target-encoder cython code when y is read-only.

The numpy array underlying df["y"] is read-only when copy-on-write is enabled.
Non-regression test for gh-27879.
r   z2.0)
minversionzmode.copy_on_writeT)r}   r~   r~   )r   r;   r5   )xr   r>   )r<   r2  r   N)ra   r   option_contextr   r   r   )r   dfs     r-   test_pandas_copy_on_writer5    se     
		X%	8B			/	6\\oFG,/33BuIr#wG 
7	6	6s   9A**
A8).r   numpyr   ra   numpy.testingr   r   sklearn.ensembler   sklearn.linear_modelr   sklearn.model_selectionr   r   r	   r
   r   sklearn.pipeliner   sklearn.preprocessingr   r   r   r   r.   markparametrizerO   rP   nanr   rV   rz   r   rQ   r   r   r   r   r   r   r   r  r  r/  r5   r/   r-   <module>rA     s   	   = 2 &  + < 
((9BHH
-	.2
((Cbff%RZZ
8	93?
((*&
9	:FC	 C=1<(@A]A B 2]A@  
((9BHH
-	.7
((*&
9	:VX<NO bhhy)288O+DE C=1eA 2eAP  HHqcBh!r)QC/0ACCK	

 HH2",y89a"#		
 C=1: 2 :  	@AHHi+,..E	
	 	 ,* 
Q"sCj!	Q!Q	!KL"dD\1<	

M
M& tUm4C=1(STOA U 2 5OAd 	3%"*	s#	1#(	Q	3%"*F	+Q/
 	2   #56D 7D +\1" #56 = 7 =F C=1o' 2o'd
Hr/   