
    -i                     P   S SK r S SKrS SKrS SKrS SKJr  S SKJr  S SKJ	r	  S SK
Jr  S SKJr  S SKrS SKrS SKJrJr  S SKJr  S S	KJr  S S
KJrJrJrJrJrJrJrJ r   S SK!J"r"J#r#J$r$  S SK%J&r&  S SK'J(r(  S SK)J*r*J+r+J,r,  S SK-J.r.J/r/J0r0  Sr1Sr2\1\2-   r3S r4S r5S r6S r7S r8S r9\Rt                  Rw                  S\\45      S 5       r<S r=S r>S r?S r@S rAS rBS  rCS! rDS" rES# rFS$ rGS% rHS& rIS' rJS( rKS) rLS* rM\Rt                  R                  \.S+S,9S- 5       rOS. rPS/ rQS0 rRS1 rSS2 rT\Rt                  Rw                  S\\45      S3 5       rUS4 rVS5 rWS6 rXS7 rYS8 rZ\Rt                  Rw                  S\\45      S9 5       r[S: r\S; r]S< r^S= r_S> r`S? ra\Rt                  Rw                  S@\R                  \R                  \R                  /5      SA 5       reSB rfSC rgSD rhSE riSF rjSG rkSH rlSI rmSJ rnSK roSL rp\Rt                  Rw                  S\\\45      SM 5       rq\Rt                  Rw                  SN\R                  \R                  /5      SO 5       rt\Rt                  Rw                  SP\" \/\05      5      SQ 5       ru\Rt                  Rw                  SR\R                  \R                  SS4\R                  \R                  SS4\R                  \R                  ST4\R                  \R                  ST4/5      SU 5       rx\Rt                  Rw                  SV\" SWSX9\" SWSX9\" SWSX9/5      SY 5       rySZ rzS[ r{\,\Rt                  Rw                  S\\05      S] 5       5       r|\Rt                  Rw                  S^\\\/5      S_ 5       r}\Rt                  Rw                  S^\\\/5      \Rt                  Rw                  S`Sa\~Sb4Sc\Sd4/5      Se 5       5       r\Rt                  Rw                  S^\\\GR                  " \5      /5      \Rt                  Rw                  SfSg Sh /5      \Rt                  Rw                  SiScSa/5      Sj 5       5       5       r\Rt                  Rw                  S^\\\/5      Sk 5       r\Rt                  Rw                  S\\\/5      \Rt                  Rw                  SlSmSn/SSSoSSpSqSrSs4	SSt SSoSSpSuSrSs4	SSv SSoSwSxSySuSz4	SSS{ SoSwS| S}SrS~4	SSSSSS SSrS~4	S/5      S 5       5       r\Rt                  Rw                  S\SSS.SSS./4\\1445      S 5       rS rS r\Rt                  Rw                  S^\\\\/5      S 5       r\Rt                  Rw                  S\\05      S 5       r\Rt                  Rw                  S\R                  \R                  /5      S 5       rS rS rg)    N)defaultdict)Mapping)partial)StringIO)product)assert_array_almost_equalassert_array_equal)sparse)clone)ENGLISH_STOP_WORDSCountVectorizerHashingVectorizerTfidfTransformerTfidfVectorizerstrip_accents_asciistrip_accents_unicode
strip_tags)GridSearchCVcross_val_scoretrain_test_split)Pipeline)	LinearSVC)assert_allclose_dense_sparseassert_almost_equalskip_if_32bit)_IS_WASMCSC_CONTAINERSCSR_CONTAINERS)zthe pizza pizza beer copyrightzthe pizza burger beer copyrightz!the the pizza beer beer copyrightzthe burger beer beer copyrightzthe coke burger coke copyrightzthe coke burger burger)zthe salad celeri copyrightz)the salad salad sparkling water copyrightzthe the celeri celeri copyrightzthe tomato tomato salad waterz the tomato salad water copyrightc                 4    [        U 5      R                  5       $ N)r   upperss    ]/var/www/html/venv/lib/python3.13/site-packages/sklearn/feature_extraction/tests/test_text.py	uppercaser%   9   s     #))++    c                 &    U R                  SS5      $ )N   ée)replacer"   s    r$   strip_eacuter+   =   s    99T3r&   c                 "    U R                  5       $ r    splitr"   s    r$   split_tokenizer/   A   s    779r&   c                     S/$ )Nthe_ultimate_feature r"   s    r$   lazy_analyzer3   E   s    "##r&   c                  *   Sn Sn[        U 5      U:X  d   eSn Sn[        U 5      U:X  d   eSn Sn[        U 5      U:X  d   eSn Sn[        U 5      U:X  d   eS	n S
n[        U 5      U:X  d   eSn Sn[        U 5      U:X  d   eSn S
n[        U 5      U:X  d   eg )N   àáâãäåçèéêëaaaaaaceeee   ìíîïñòóôõöùúûüýiiiinooooouuuuy   إu   ا   this is à testthis is a testu   öou   ̀́̂̃ u   ȫ)r   aexpecteds     r$   test_strip_accentsrA   I   s     AH #x///(A H #x/// 	AH #x/// 	AH #x/// 	AH #x/// 	#AH #x/// 	AH #x///r&   c                      Sn Sn[        U 5      U:X  d   eSn Sn[        U 5      U:X  d   eSn Sn[        U 5      U:X  d   eSn Sn[        U 5      U:X  d   eg )	Nr5   r6   r7   r8   r9   r=   r:   r;   )r   r>   s     r$   test_to_asciirC   m   sz     AHq!X---(A Hq!X--- 	AHq!X--- 	AHq!X---r&   
Vectorizerc                    U " SS9R                  5       nSn/ SQnU" U5      U:X  d   eSn/ SQnU" U5      U:X  d   eU " SS9R                  5       n[        S	5      n/ S
QnU" U5      U:X  d   eU " [        S9R                  5       nSn/ SQnU" U5      U:X  d   eU " [        SS9R                  5       nSn/ SQnU" U5      U:X  d   eg )Nasciistrip_accents:   J'ai mangé du kangourou  ce midi, c'était pas très bon.)
aimangedu	kangouroucemidietaitpastresbonz0This is a test, really.

 I met Harry yesterday.)thisistestreallymetharry	yesterdayfile)input'This is a test with a file-like object!)rT   rU   rV   withr[   likeobjectpreprocessoru;   J'ai mangé du kangourou  ce midi,  c'était pas très bon.)
AIMANGEDU	KANGOUROUCEMIDIETAITPASTRESBON)	tokenizerrH   )
zj'airK   rL   rM   rN   zmidi,zc'etaitrQ   rR   zbon.)build_analyzerr   r%   r/   )rD   watextr@   s       r$   test_word_analyzer_unigramsrq      s    	'	*	9	9	;BGDH d8x?DLHd8x	&	!	0	0	2B=>DGHd8x 
	+	:	:	<BHDH d8x 
nG	D	S	S	UBGDH d8xr&   c                  ^    [        SSSS9R                  5       n Sn/ SQnU " U5      U:X  d   eg )Nwordunicode      analyzerrH   ngram_rangerI   )rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   zai mangezmange duzdu kangourouzkangourou cezce midiz
midi etaitz	etait paszpas tresztres bon)r   rn   )ro   rp   r@   s      r$   'test_word_analyzer_unigrams_and_bigramsr{      sA    	yf
n  HDH* d8xr&   c                  |   Sn U R                  S5      n[        SSS9R                  5       n[        R                  " [
        5         U" U5        S S S 5        [        SSSS9R                  5       n[        R                  " [
        5         U" U5        S S S 5        g ! , (       d  f       NS= f! , (       d  f       g = f)	NrI   zutf-8ru   rF   )rz   encodingchar      )ry   rz   r}   )encoder   rn   pytestraisesUnicodeDecodeError)rp   
text_bytesro   cas       r$   test_unicode_decode_errorr      s     HDW%J 
Vg	>	M	M	OB	)	*
: 
+ 
Vg
n  
)	*
: 
+	* 
+	* 
+	*s   	B
	B-
B*-
B;c                  X   [        SSSS9R                  5       n Sn/ SQnU " U5      S S U:X  d   e/ SQnU " U5      S	S  U:X  d   eS
n/ SQnU " U5      S S U:X  d   e/ SQnU " U5      S	S  U:X  d   e[        SSSS9R                  5       n [        S5      n/ SQnU " U5      S S U:X  d   eg )Nr~   rt   r   rx   u9   J'ai mangé du kangourou  ce midi, c'était pas très bon)zj'az'aizai zi mz ma   )zs tresz tres ztres bzres bozes bon1This 
	is a test, really.

 I met Harry yesterday)thihisis zs iz is)z yesteyesteresterdsterdaterdayr[   r\   ry   rz   r]   r   rn   r   cngarp   r@   s      r$   test_char_ngram_analyzerr      s    yfn 	 GD2H:bq>X%%%AH:bc?h&&&BD2H:bq>X%%%AH:bc?h&&&v6n 	 =>D2H:bq>X%%%r&   c                      [        SSSS9R                  5       n Sn/ SQnU " U5      S S U:X  d   e/ SQnU " U5      S	S  U:X  d   e[        S
SSS9R                  5       n [        S5      n/ SQnU " U5      S S U:X  d   eg )Nchar_wbrt   r   rx   r   )z thr   r   r   z thir   )r   r   r   r   zerday r   r[   r   zA test with a file-like object!)z a z tetesestzst z tesr   r   r   s      r$   test_char_wb_ngram_analyzerr     s    )n 	 CD3H:bq>X%%%AH:bc?h&&&yfn 	 56D:H:bq>X%%%r&   c                      [        SSSS9R                  5       n Sn/ SQnU " U5      S S U:X  d   e/ SQnU " U5      S	S  U:X  d   e[        S
SSS9R                  5       n[        U5      nU" U5      U " U5      :X  d   eg )Nrs   rt   r   rx   r   )zthis is testzis test reallyztest really metr   )ztest really met harry yesterdayzthis is test really met harryz"is test really met harry yesterdayr[   r   r   )r   rp   r@   	cnga_filer[   s        r$   test_word_ngram_analyzerr     s    yfn 	 CDDH:bq>X%%%H
 :bc?h&&&v6n  D>DT?d4j(((r&   c                  N   SSS.n [        U R                  5       5      n[        [        [        [        [        [        5      4 H  nU" U 5      n[        US9nUR                  [        5        [        U[        5      (       a  UR                  U :X  d   eO[        UR                  5      U:X  d   eUR                  [        5      nUR                  S   [!        U5      :X  d   eU" U 5      n[        US9nUR#                  U5      n[!        U5      UR                  S   :X  a  M   e   g )Nr   rv   pizzabeer
vocabulary)setkeysdictlistiterr   r   intr   fitJUNK_FOOD_DOCS
isinstancer   vocabulary_	transformshapeleninverse_transform)vocabtermstypvvectXinvs          r$   &test_countvectorizer_custom_vocabularyr   6  s    #E

E dD'+s";<J!, a!!##u,,,t''(E111NN>*wwqzSZ'''J!,$$Q'3x1771:%%% =r&   c                     SS/n [        S[        U S94S[        5       4/5      nUR                  [        5      n[        UR                  S   R                  5      [        U 5      :X  d   eUR                  S   [        U 5      :X  d   eg )Nr   r   countr   tfidfrv   )
r   r   r   fit_transformALL_FOOD_DOCSr   named_stepsr   r   r   )what_we_likepiper   s      r$   /test_countvectorizer_custom_vocabulary_pipeliner   K  s    V$Lo>?&()	
D 	=)At(445\9JJJJ771:\****r&   c                      SSS.n Sn[         R                  " [        US9   [        U S9nUR	                  S/5        S S S 5        g ! , (       d  f       g = f)Nr   r   z$Vocabulary contains repeated indicesmatchr   pasta_sizilianar   r   
ValueErrorr   r   )r   msgr   s      r$   7test_countvectorizer_custom_vocabulary_repeated_indicesr   X  sE    #E
0C	z	-%0#$% 
.	-	-s   A
Ac                      SSS.n [         R                  " [        SS9   [        U S9nUR	                  S/5        S S S 5        g ! , (       d  f       g = f)Nrv   rw   r   zdoesn't contain indexr   r   pasta_verdurar   r   r   s     r$   0test_countvectorizer_custom_vocabulary_gap_indexr   `  sA    #E	z)@	A%0/"# 
B	A	As   A
Ac                     [        5       n U R                  SS9  U R                  5       [        :X  d   eU R                  SS9  [        R
                  " [        5         U R                  5         S S S 5        U R                  SS9  [        R
                  " [        5         U R                  5         S S S 5        / SQnU R                  US9  U R                  5       [        U5      :X  d   eg ! , (       d  f       N= f! , (       d  f       NR= f)Nenglish
stop_words_bad_str_stop__bad_unicode_stop_)someotherwords)r   
set_paramsget_stop_wordsr   r   r   r   r   )cvstoplists     r$   test_countvectorizer_stop_wordsr   g  s    		BMMYM'"4444MM-M.	z	"
 
#MM1M2	z	"
 
#)HMMXM&#h-/// 
#	" 
#	"s   C+C<+
C9<
D
c                  @   [         R                  " [        SS9   [        / S9n U R	                  S/5        S S S 5        [         R                  " [        SS9   [        SSS9nUR	                  / SQ5        S S S 5        g ! , (       d  f       NN= f! , (       d  f       g = f)	Nzempty vocabularyr   r   foo      ?r   )max_dfr   )zto be or not to bez
and me toozand so do your   )r   r   s     r$   %test_countvectorizer_empty_vocabularyr   v  sr    	z);	<"-% 
= 
z);	<39=	CD 
=	<	 
=	< 
=	<s   A>B>
B
Bc                      [        5       n U R                  [        S S 5      nU R                  [        SS  5      nUR                  S   UR                  S   :w  d   eg )Nr   rv   )r   r   r   r   )r   X1X2s      r$   test_fit_countvectorizer_twicer     sV    		B			-+	,B			-+	,B88A;"((1+%%%r&   c                      / SQn Sn[        US9nUR                  U 5        / SQnUR                  5       n[        XC5        g)zCheck `get_feature_names_out()` when a custom token pattern is passed.
Non-regression test for:
https://github.com/scikit-learn/scikit-learn/issues/12971
z&This is the 1st document in my corpus.z This document is the 2nd sample.zAnd this is the 3rd one.zIs this the 4th document?z'[0-9]{1,3}(?:st|nd|rd|th)\s\b(\w{2,})\btoken_pattern)documentonesampleN)r   r   get_feature_names_outr	   )corpusr   
vectorizerr@   feature_names_outs        r$   )test_countvectorizer_custom_token_patternr     sE    
F ?M }=JV$,H"88:(3r&   c                      / SQn SnSn[        US9n[        R                  " [        US9   UR	                  U 5        SSS5        g! , (       d  f       g= f)zCheck that we raise an error if token pattern capture several groups.
Non-regression test for:
https://github.com/scikit-learn/scikit-learn/issues/12971
r   z)([0-9]{1,3}(?:st|nd|rd|th))\s\b(\w{2,})\bz,More than 1 capturing group in token patternr   r   Nr   r   r   r   r   )r   r   err_msgr   s       r$   <test_countvectorizer_custom_token_pattern_with_several_groupr     sF    
F AM<G }=J	z	1v 
2	1	1s   A
Ac                  b   / SQn Sn[        SU S9n[        R                  " [        US9   UR	                  U 5        S S S 5        [
        R                  " 5          [
        R                  " S[        5        UR                  U 5        S S S 5        g ! , (       d  f       NY= f! , (       d  f       g = f)N)SampleUpperCase
VocabularyzyUpper case characters found in vocabulary while 'lowercase' is True. These entries will not be matched with any documentsT)	lowercaser   r   error)	r   r   warnsUserWarningr   warningscatch_warningssimplefilterr   )r   messager   s      r$   'test_countvectorizer_uppercase_in_vocabr    s     ;J	)  !4JGJ	k	1z" 
2 
	 	 	"g{3Z( 
#	" 
2	1 
#	"s   B-B 
B 
B.c                      / SQ/ SQ/ SQ/n [        SSS9R                  U 5      n/ SQnUR                  U5      n[        X#5        g)	z0Check get_feature_names_out for TfidfTransformerrv   rv   rv   rv   rv   r   rv   r   r   Tl2
smooth_idfnorm)r?   cbN)r   r   r   r	   )r   trfeature_names_inr   s       r$   %test_tf_transformer_feature_names_outr    sF    	Iy)A	T	5	9	9!	<B&001AB';r&   c                  x   / SQ/ SQ/ SQ/n [        SSS9nUR                  U 5      R                  5       nUS:  R                  5       (       d   e[	        US-  R                  S	S
9/ SQ5        / SQ/ SQ/ SQ/n [        SSS9nUR                  U 5      R                  5       nUS:  R                  5       (       d   eg )Nr	  r
  r  Tr  r  r   rw   rv   axisr   r   r   )r   r   toarrayallr   sumr   r  r   s      r$   test_tf_idf_smoothingr    s    	Iy)A	T	5BQ'')EQJ uaxnn!n4oF 
Iy)A	T	5BQ'')EQJr&   zcno floating point exceptions, see https://github.com/numpy/numpy/pull/21895#issuecomment-1311525881)reasonc                     / SQ/ SQ/ SQ/n [        SSS9nUR                  U 5      R                  5       nUS:  R                  5       (       d   e[	        US-  R                  S	S
9/ SQ5        / SQ/ SQ/ SQ/n [        SSS9nSn[        R                  " [        US9   UR                  U 5      R                  5         S S S 5        g ! , (       d  f       g = f)Nr	  r
  r  Fr  r  r   rw   rv   r  r  zdivide by zeror   )	r   r   r  r  r   r  r   r  RuntimeWarning)r   r  r   in_warning_messages       r$   test_tfidf_no_smoothingr"    s     
Iy)A	U	6BQ'')EQJ uaxnn!n4oF 
Iy)A	U	6B)	n,>	?
##% 
@	?	?s    C
Cc                      S/S/S//n [        SSS S9nUR                  U 5      R                  5       nUS   S:X  d   eUS   US   :  d   eUS   US   :  d   eUS   S:  d   eUS   S:  d   eg )Nrv   rw   r   TF)sublinear_tfuse_idfr  r   )r   r   r  r  s      r$   test_sublinear_tfr&    s    
qcA3A	tU	FBQ'')E8q==8eAh8eAh8a<<8a<<r&   c                  n	   [        [        S S 5      n [        S   /n[        [        5      S-
  n[        SS9nUR	                  U 5      n[        US5      (       a  UR                  5       nUSUR                  S   4   S:X  d   e[        UR                  S	9nX54 H  nUR                  U5      n[        US5      (       a  UR                  5       nUR                  nUSUS
   4   S:X  d   eUSUS   4   S:X  d   eUSUS   4   S:X  d   eSU;  d   eSU;  d   eUSUS   4   S:X  d   eUSUS   4   S:X  d   eUSUS   4   S:X  d   eUSUS   4   S:X  a  M   e   [        SS9n	U	R                  U5      R                  U5      R                  5       n
[        U	R                  5      [        UR                  5      :X  d   eU
R                  U[        UR                  5      4:X  d   eU	R                  W5      R                  5       nUR                  [        U5      [        UR                  5      4:X  d   e[        SSS9nUR                  U5      R                  U5      R                  5       n[        US5      (       a   e[        SS9n[        R                  " [         5         UR                  U5        S S S 5        [#        [$        R&                  " USS9S/U-  5        [        [        S S 5      n [)        SS9nUR*                  Ul        UR	                  U 5      R                  5       nUR,                  (       a   e[#        U
U5        UR                  U5      R                  5       n[#        UU5        [        S S	9n[        R                  " [         5         UR                  U 5        S S S 5        UR/                  SSS9  UR1                  5       nSn[3        U5      nU" U5      nUU:X  d   eUR/                  SS S9  [        R                  " [         5         UR1                  5         S S S 5        S Ul        [        R                  " [         5         UR5                  5         S S S 5        g ! , (       d  f       GN= f! , (       d  f       N= f! , (       d  f       Nl= f! , (       d  f       g = f)!Nrv         ?r   tocsrr   r   rw   r   saladtomatowaterthe	copyrightcokeburgerr   l1r  F)r  r%  idf_Tr%  r  r   rF   )rH   r   rI   _gabbledegook_)rH   rb   _invalid_analyzer_type_)r   r   r   r   r   hasattrr+  r   r   r   r   r  r5  r   r   r   r   r   npr  r   r   fixed_vocabulary_r   build_preprocessorr   rn   )
train_data	test_datan_trainv1counts_trainv2r   counts_testr   t1r   
tfidf_testt2tft3tvtfidf2tfidf_test2v3	processorrp   r@   results                          r$   test_vectorizerrO    sc   mCR()Jr"#I- 1$G 
	$B##J/L|W%%#))+2>>'223q888 
BNN	3B Xkk),;((%++-K]]
1j112a7771j223q8881j112a777 J&&& *,,, 1j001Q6661j223q8881j001Q6661j112a777/ 4 
t	$BFF< **<8@@BErww<3r~~....;;7C$78888 k*224JIBNN0CDDDD 
tU	3B			'	'	5	=	=	?Br6"""" 
$	'B	z	"
\" 
# bffRa03%'/B mCR()J	d	#B		BIj)113F####eV, ,,y)113Kj+6 
D	)B	z	"
Z  
# MM5M9%%'IGD"4(Ht_Fv MM 0tMD	z	"
 
# .BM	z	"
 
#	"Q 
#	", 
#	" 
#	"
 
#	"s0   (Q2RRR&2
R
R
R#&
R4c                     Su  pp#[        XX#S9nUR                  [        5        UR                  R                  U :X  d   eUR                  R
                  U:X  d   eUR                  R                  U:X  d   eUR                  R                  U:X  d   eSUl        SUl        SUl        SUl        UR                  R                  U :X  d   eUR                  R
                  U:X  d   eUR                  R                  U:X  d   eUR                  R                  U:X  d   eUR                  [        5        UR                  R                  UR                  :X  d   eUR                  R
                  UR
                  :X  d   eUR                  R                  UR                  :X  d   eUR                  R                  UR                  :X  d   eg )N)r  FFF)r  r%  r  r$  r3  T)r   r   r   _tfidfr  r%  r  r$  )r  r%  r  r$  rI  s        r$   test_tfidf_vectorizer_settersrR  i  s}   .G+D:	z
B FF>99>>T!!!99'''99:---99!!\111 BGBJBMBO99>>T!!!99'''99:---99!!\111FF>99>>RWW$$$99

***992==00099!!R__444r&   c                     [        5       n U R                  [        5      nUR                  nUR                  [        [        5      U R                  4:X  d   eUR                  U R                  :X  d   e[        R                  " UR                  5      S:  d   e[        R                  " UR                  5      S:  d   e[        R                  " UR                  5      S:  d   e[        R                  " UR                  5      S:  d   e[        UR                  S   5       H:  n[        [        R                  R                  US   R                  S5      S5        M<     [        SSS9n U R                  [        5      nUR                  [        [        5      U R                  4:X  d   eUR                  U R                  :X  d   eUR                  nXB:  d   eUSU-  :  d   e[        R                  " UR                  5      S:  d   e[        R                  " UR                  5      S:  d   e[        UR                  S   5       H:  n[        [        R                  R                  US   R                  S5      S5        M<     g )	Nr(  r   rv   rw   r   ru   r3  )rz   r  )r   r   r   nnzr   r   
n_featuresdtyper:  mindatamaxranger   linalgr  )r   r   	token_nnzi
ngrams_nnzs        r$   test_hashing_vectorizerr_    s   A	M"AI77s=)1<<888877agg 66!&&>B66!&&>A66!&&>A66!&&>A 1771:BIINN1Q499a8#>  	f48A	M"A77s=)1<<888877agg J!!!I%%% 66!&&>B66!&&>A 1771:BIINN1Q499a8#> r&   c                  Z   [        SS9n [        R                  " [        5         U R	                  5         S S S 5        U R
                  (       a   eU R                  [        5      nUR                  u  p#[        U R                  5      U:X  d   eU R	                  5       n[        U[        R                  5      (       d   eUR                  [        :X  d   e[        U5      U:X  d   e[!        / SQU5        [#        U5       H%  u  pVXPR                  R%                  U5      :X  a  M%   e   / SQn[        US9n U R	                  5       n[!        / SQU5        U R
                  (       d   e[#        U5       H%  u  pVXPR                  R%                  U5      :X  a  M%   e   g ! , (       d  f       GNm= f)Nr)  r*  	r   r2  celerir1  r   r,  	sparklingr-  r.  r   )r   r   r   r   r   r;  r   r   r   r   r   r   r:  ndarrayrV  r`   r	   	enumerateget)r   r   	n_samplesrU  feature_namesidxnamer   s           r$   test_feature_namesrk    su   		$B 
z	"
  " 
##### 	'AGGIr~~*,,,,,.MmRZZ0000&(((}+++
	
 	 }-	nn((.... .
E 
E	*B,,.M
	
 	 }-	nn((.... . 
#	"s   F
F*c                 z    1 SknU " SSS9nUR                  [        5        [        UR                  5      U:X  d   eg )N>   r   r   r,  r2  g333333?   )r   max_features)r   r   r   r   )rD   expected_vocabularyr   s      r$   test_vectorizer_max_featuresrp    s<    > 3Q7JNN=!z%%&*====r&   c                     [        SS9n [        SS9n[        S S9nU R                  [        5      R                  SS9nUR                  [        5      R                  SS9nUR                  [        5      R                  SS9nU R	                  5       nUR	                  5       nUR	                  5       nSUR                  5       :X  d   eSUR                  5       :X  d   eSUR                  5       :X  d   eSU[        R                  " U5         :X  d   eSU[        R                  " U5         :X  d   eSU[        R                  " U5         :X  d   eg )Nrv   rn  r   r   r     r/  )r   r   r   r  r   rY  r:  argmax)	cv_1cv_3cv_Nonecounts_1counts_3counts_None
features_1
features_3features_Nones	            r$   "test_count_vectorizer_max_featuresr~    s;    *D*D40G!!.15515=H!!.15515=H''7;;;CK++-J++-J113M !!!! Jryy23333Jryy23333M"))K"89999r&   c                  X   / SQn [        SSS9nUR                  U 5        SUR                  R                  5       ;   d   e[	        UR                  R                  5       5      S:X  d   eSUl        UR                  U 5        SUR                  R                  5       ;  d   e[	        UR                  R                  5       5      S:X  d   eS	Ul        UR                  U 5        SUR                  R                  5       ;  d   e[	        UR                  R                  5       5      S:X  d   eg )
Nabcdeaeatr~   r   ry   r   r?   r   r)  rm  rv   )r   r   r   r   r   r   r>  r   s     r$   test_vectorizer_max_dfr    s   %IF37DHHY$""''))))t$$&'1,,,DKHHYd&&++----t$$&'1,,,DKHHYd&&++----t$$&'1,,,r&   c                  X   / SQn [        SSS9nUR                  U 5        SUR                  R                  5       ;   d   e[	        UR                  R                  5       5      S:X  d   eSUl        UR                  U 5        SUR                  R                  5       ;  d   e[	        UR                  R                  5       5      S:X  d   eS	Ul        UR                  U 5        SUR                  R                  5       ;  d   e[	        UR                  R                  5       5      S:X  d   eg )
Nr  r~   rv   )ry   min_dfr?   r   rw   r  g?)r   r   r   r   r   r  r  s     r$   test_vectorizer_min_dfr  )  s   %IF15DHHY$""''))))t$$&'1,,,DKHHYd&&++----t$$&'1,,,DKHHYd&&++----t$$&'1,,,r&   c                     SS/n [        SSS9nUR                  U 5      R                  5       n[        / SQUR	                  5       5        [        / SQ/ SQ/U5        [        SSS	S
9nUR                  U 5      R                  5       n[        / SQ/ SQ/U5        [        SSS	[
        R                  S9nUR                  U 5      nUR                  [
        R                  :X  d   eg )Naaabcabbder~   r   r  )r?   r  r  dr)   )r   rv   rv   r   r   )rv   rw   r   rv   rv   T)ry   r   binary)rv   rv   rv   r   r   )rv   rv   r   rv   rv   )ry   r   r  rV  )r   r   r  r	   r   r:  float32rV  )r>  r   r   X_sparses       r$   test_count_binary_occurrencesr  ;  s    '"IF37D9%--/A0$2L2L2NO91= F3tDD9%--/A91= F3t2::VD!!),H>>RZZ'''r&   c                     SS/n [        SSS S9nUR                  U 5      n[        R                  " USS R                  5      S:X  d   e[        R                  " USS	 R                  5      S	:X  d   eUR
                  [        R                  :X  d   e[        SSS
S S9nUR                  U 5      n[        R                  " UR                  5      S:X  d   eUR
                  [        R                  :X  d   e[        SSS
S [        R                  S9nUR                  U 5      nUR
                  [        R                  :X  d   eg )Nr  r  Fr~   )alternate_signry   r  r   rv   r   rw   T)ry   r  r  r  )ry   r  r  r  rV  )r   r   r:  rY  rX  rV  float64)r>  r   r   s      r$   test_hashed_binary_occurrencesr  O  s   '"IEFNDy!A66!Aa&++!###66!Aa&++!###77bjj    dD 	y!A66!&&>Q77bjj    dRZZD 	y!A77bjj   r&   c                    [         nU " 5       nUR                  U5      nUR                  U5      n[        U[        5      (       d   eUR                  5       n[        X5       Hj  u  pg[        R                  " [        R                  " U" U5      5      5      n[        R                  " [        R                  " U5      5      n[        X5        Ml     [        R                  " U5      (       d   eUR                  S:X  d   eUR                  5       n	UR                  U	5      n
[        XJ5       H9  u  p[        [        R                  " U5      [        R                  " U5      5        M;     UR                  5       nUR                  U5      n[        XM5       H9  u  p[        [        R                  " U5      [        R                  " U5      5        M;     g )Ncsr)r   r   r   r   r   rn   zipr:  sortuniquer	   r
   issparseformatr  tocsc)rD   rX  r   transformed_datainversed_dataanalyzedocinversed_termsr   transformed_data2inversed_data2terms2transformed_data3inversed_data3terms3s                  r$   !test_vectorizer_inverse_transformr  i  se    DJ!//5001ABMmT****'')G"47		'#,/0>!:;51  8
 ??+,,,,""e+++ )002112CDN];2775>2776?; < )..0112CDN];2775>2776?; <r&   c                     [         [        -   n S/[        [         5      -  S/[        [        5      -  -   n[        XSSS9u  p#pE[	        S[        5       4S[        5       4/5      nSS	/S
S.n[        XgSSS9nUR                  X$5      R                  U5      n	[        X5        UR                  S:X  d   eUR                  R                  S   n
U
R                  S:X  d   eg )Nr(  rv   g?r   	test_sizerandom_stater   svcrv   rv   ru   hingesquared_hinge)vect__ngram_range	svc__lossr   )n_jobsr   r   )r   NOTJUNK_FOOD_DOCSr   r   r   r   r   r   r   predictr	   best_score_best_estimator_r   rz   rX  targetr=  r>  target_traintarget_testpipeline
parametersgrid_searchpredbest_vectorizers              r$   -test_count_vectorizer_pipeline_grid_selectionr    s    --D TC''1#4E0F*FFF 8H!84J< &/"34uik6JKLH %f-/J xA!DK ??:4<<YGDt)
 ""c)))!11==fEO&&&000r&   c                     [         [        -   n S/[        [         5      -  S/[        [        5      -  -   n[        XSSS9u  p#pE[	        S[        5       4S[        5       4/5      nSS	/S
SS.n[        XgSS9nUR                  X$5      R                  U5      n	[        X5        UR                  S:X  d   eUR                  R                  S   n
U
R                  S:X  d   eU
R                  S:X  d   eU
R                   (       a   eg )Nr(  rv   g?r   r  r   r  r  ru   )r3  r  r  )r  
vect__normr  )r  r   r  )r   r  r   r   r   r   r   r   r   r  r	   r  r  r   rz   r  r;  r  s              r$   'test_vectorizer_pipeline_grid_selectionr    s   --D TC''1#4E0F*FFF 8H!84J< &/"34uik6JKLH %f-"/J xA>K ??:4<<YGDt)
 ""c)))!11==fEO&&&0004'''00000r&   c                      [         [        -   n S/[        [         5      -  S/[        [        5      -  -   n[        S[	        5       4S[        5       4/5      n[        X USS9n[        U/ SQ5        g )Nr(  rv   r   r  r   )r   r  )r   r  r   r   r   r   r   r	   )rX  r  r  	cv_scoress       r$   )test_vectorizer_pipeline_cross_validationr    sh    --D TC''1#4E0F*FFF&/"34uik6JKLH1=Iy/2r&   c                     Sn [        5       nUR                  U /5      nUR                  S:X  d   e[        S SS9nUR	                  U /5      nUR                  S:X  d   eUR
                  UR
                  :X  d   e[        [        R                  " UR                  5      [        R                  " UR                  5      5        g )Nu   Машинное обучение — обширный подраздел искусственного интеллекта, изучающий методы построения алгоритмов, способных обучаться.)rv      F)r  r  )rv   i   )
r   r   r   r   r   rT  r	   r:  r  rX  )r   r   	X_countedX_hasheds       r$   test_vectorizer_unicoder    s    	1  D""H:.I??g%%%$u=D~~xj)H>>Z''' ==HLL((( rwwy~~.0FGr&   c                      SS/n [        U S9nUR                  [        5      nUR                  [        5      n[	        UR                  5       UR                  5       5        UR                  (       d   eg )Nr   rb  r   )r   r   r   r   r   r  r;  )r   r   X_1X_2s       r$   +test_tfidf_vectorizer_with_fixed_vocabularyr    sY    8$Jj1D


]
+C
..
'CckkmS[[];!!!!r&   c                     [        5       [        SS9[        SS9[        SS9[        5       [        [        S9[        [        S9[        [        S9R	                  [
        5      [        [        S	9R	                  [
        5      [        5       [        [        S9[        5       R	                  [
        5      /n U  H  n[        R                  " U5      n[        R                  " U5      n[        U5      UR                  :X  d   eUR                  5       UR                  5       :X  d   e[        UR                  [
        5      UR                  [
        5      5        M     g )
Nr3  r4  T)r  ru   rz   ra   )ry   rG   )r   r   r   r3   r   r   r+   r   pickledumpsloadstype	__class__
get_paramsr   r   )	instancesorigr#   copys       r$   test_pickling_vectorizerr    s    t$&f-Z0.Z044^Dl377G.n-I LL||ADzT^^+++ DOO$5555$~.~.	
 r&   factoryc                     [        5       nU " U5      nSn[        R                  " [        R                  " U5      5      nU" U5      nU" U5      nXe:X  d   eg)zWTokenizers cannot be pickled
https://github.com/scikit-learn/scikit-learn/issues/12833
rI   N)r   r  r  r  )r  vecfunctionrp   roundtripped_functionr@   rN  s          r$   test_pickling_built_processorsr    sQ     
Cs|HGD"LLh)?@~H"4(Fr&   c            	         [         R                  R                  S5      n [         R                  " / SQ5      n[	        SS5       H  n[        U R                  USSS95      n[        US9n[        R                  " [        R                  " U5      5      nUR                  [        5        UR                  [        5        [        UR                  5       UR                  5       5        M     g Nr   ra  d   r   F)sizer*   r   )r:  randomRandomStatearrayrZ  r   choicer   r  r  r  r   r   r	   r   )rngvocab_wordsx	vocab_setr   unpickled_cvs         r$   -test_countvectorizer_vocab_sets_when_picklingr  3  s     ))


"C((
	
K 1c]

;Q
FG		2||FLL$45
}'$$&(J(J(L	
 r&   c                  
   [         R                  R                  S5      n [         R                  " / SQ5      n[	        SS5       H  n[        5       nU R                  USSS9n[	        SS5       H	  nXSXE   '   M     [        US9n[        R                  " [        R                  " U5      5      nUR                  [        5        UR                  [        5        [        UR                  5       UR                  5       5        M     g r  )r:  r  r  r  rZ  r   r  r   r  r  r  r   r   r	   r   )r  r  r  
vocab_dictr   yr   r  s           r$   .test_countvectorizer_vocab_dicts_when_picklingr  O  s    
))


"C((
	
K 1c]V


;Q
>q!A#$ux  
3||FLL$45
}'$$&(J(J(L	
 r&   c                     [        5       R                  [        5      n [        5       R	                  U 5      n[
        R                  " U5      n[
        R                  " U5      n[        U5      UR                  :X  d   e[        UR                  U 5      R                  5       UR                  U 5      R                  5       5        g r    )r   r   r   r   r   r  r  r  r  r  r	   r  )r   r  r#   r  s       r$   test_pickling_transformerr  l  s    ''7A!!!$DTA<<?D:'''t))!,4468J8J18M8U8U8WXr&   c                  2   [        5       R                  [        5      n [        5       R	                  U 5      n[        5       nUR
                  Ul        [        UR                  U 5      R                  5       UR                  U 5      R                  5       5        g r    )	r   r   r   r   r   r5  r	   r   r  )r   r  r  s      r$   test_transformer_idf_setterr  u  si    ''7A!!!$DD		DIt~~a(002DNN14E4M4M4OPr&   c                     [        SS9n U R                  [        5        [        U R                  SS9nU R                  Ul        [        UR                  [        5      R                  5       U R                  [        5      R                  5       5        [        U R                  SS9nSn[        R                  " [        US9   U R                  Ul        S S S 5        g ! , (       d  f       g = f)NTr6  r   r%  Fz+`idf_` cannot be set when `user_idf=False`.r   )r   r   r   r   r5  r	   r   r  r   r   r   )r  r  r   s      r$   test_tfidf_vectorizer_setterr  }  s    4(DHH^d&6&6ED		DI~&..0~&..0
 d&6&6FD;G	z	1II	 
2	1	1s   C
C*c                  &   [        SS9n U R                  [        5        [        U R                  SS9n[	        U R
                  5      nS/US-   -  n[        R                  " [        5         [        USU5        S S S 5        g ! , (       d  f       g = f)NTr6  r  r   rv   r5  )
r   r   r   r   r   r5  r   r   r   setattr)r   r  expected_idf_leninvalid_idfs       r$   %test_tfidfvectorizer_invalid_idf_attrr    sn    4(DHH^d&6&6ED499~%+a/0K	z	"fk* 
#	"	"s   +B
Bc                      / SQn [        U S9n[        R                  " [        5         UR	                  / 5        S S S 5        g ! , (       d  f       g = f)N)r?   r  r  r?   r?   r   r   r   s     r$   test_non_unique_vocabr    s4    %Ee,D	z	" 
#	"	"s   A
Ac                      Sn [         nS n[        R                  " XS9   U" 5         S S S 5        g ! , (       d  f       g = f)Nz?np.nan is an invalid document, expected byte or unicode string.c                  \    [        5       n U R                  S[        R                  S/5        g )Nhello worldhello hello)r   r   r:  nan)hvs    r$   func0test_hashingvectorizer_nan_in_docs.<locals>.func  s#     
-?@r&   r   )r   r   r   )r  	exceptionr  s      r$   "test_hashingvectorizer_nan_in_docsr    s4     PGIA 
y	0 
1	0	0s   1
?c                  4   [        SSS S9n U R                  (       d   eU R                  SS/5      R                  5       n[	        UR                  5       / SQ5        U R                  SS/5      R                  5       n[	        UR                  5       / SQ5        g )NTF)r  r%  r  r  r  )rv   rv   rv   r   )r   r  r   r  r	   ravelr   )r   r   r   s      r$   test_tfidfvectorizer_binaryr    ss    tU>A88O8	67??AAqwwy,/	
m]3	4	<	<	>Brxxz<0r&   c                      [        SS9n U R                  [        5        [        U R                  U R
                  R                  5        g )NTr6  )r   r   r   r   r5  rQ  )r   s    r$   test_tfidfvectorizer_export_idfr    s0    4(DHH^dii)9)9:r&   c                      [        S/S9n [        U 5      nU R                  [        5        UR                  [        5        UR                  U R                  :X  d   eg )Nr/  r   )r   r   r   r   r   )
vect_vocabvect_vocab_clones     r$   test_vectorizer_vocab_cloner    sM     UG4JZ(NN=!''':+A+AAAAr&   c                    SnU " 5       n[         R                  " [        US9   UR                  S5        S S S 5        [         R                  " [        US9   UR	                  S5        S S S 5        UR	                  SS/5        [         R                  " [        US9   UR                  S5        S S S 5        g ! , (       d  f       N= f! , (       d  f       Nf= f! , (       d  f       g = f)NzBIterable over raw text documents expected, string object received.r   zhello world!	some textzsome other text)r   r   r   r   r   r   )rD   r  r  s      r$   &test_vectorizer_string_object_as_inputr    s     SG
,C	z	1.) 
2 
z	1 
2GG[+,-	z	1n% 
2	1 
2	1 
2	1 
2	1s#   B7CC7
C
C
C'X_dtypec                     [         R                  " SSU SS9n[        5       R                  U5      nUR                  UR                  :X  d   eg N
    N  *   rV  r  )r
   randr   r   rV  )r  r   X_transs      r$   test_tfidf_transformer_typer$    s?    BW2>A ..q1G==AGG###r&   zcsc_container, csr_containerc                     [         R                  " SS[        R                  SS9nU " U5      nU" U5      n[	        5       R                  U5      n[	        5       R                  U5      n[        XV5        UR                  UR                  :X  d   eg r  )r
   r"  r:  r  r   r   r   r  )csc_containercsr_containerr   X_cscX_csrX_trans_cscX_trans_csrs          r$   test_tfidf_transformer_sparser,    sx     	BRZZbAA!E!E"$2259K"$2259K :!3!3333r&   z0vectorizer_dtype, output_dtype, warning_expectedTFc                    [         R                  " / SQ5      n[        U S9nSnU(       a4  [        R                  " [
        US9   UR                  U5      nS S S 5        OJ[        R                  " 5          [        R                  " S[
        5        UR                  U5      nS S S 5        WR                  U:X  d   eg ! , (       d  f       N!= f! , (       d  f       N2= f)N)numpyscipysklearnrV  z'dtype' should be used.r   r   )r:  r  r   r   r  r  r   r  r  r  rV  )vectorizer_dtypeoutput_dtypewarning_expectedr   r   warning_msg_matchX_idfs          r$   test_tfidf_vectorizer_typer7    s     	./A '78J1\\+->?,,Q/E @? $$&!!';7,,Q/E ' ;;,&&& @? '&s   B<4-C<
C

Cr  )rw   rv   r  c                    U R                   n[        R                  " SU S35      n[        R                  " [
        US9   U R                  S/5        S S S 5        [        R                  " [
        US9   U R                  S/5        S S S 5        [        U [        5      (       a5  [        R                  " [
        US9   U R                  S/5        S S S 5        g g ! , (       d  f       N= f! , (       d  f       Nj= f! , (       d  f       g = f)NzInvalid value for ngram_range=z/ lower boundary larger than the upper boundary.r   zgood news everyone)rz   reescaper   r   r   r   r   r   r   r   )r  invalid_ranger  s      r$   $test_vectorizers_invalid_ngram_ranger<    s     OOMii
( 89 	9G
 
z	1%&' 
2 
z	1/01 
2 #())]]:W5MM/01 65 * 
2	1 
2	1 65s$    C4C+=C<
C(+
C9<
D
c                     U R                  5       nU R                  5       nU R                  5       nU R                  XU5      $ r    )r   build_tokenizerr<  _check_stop_words_consistency)	estimatorr   tokenize
preprocesss       r$   r?  r?     s?    ))+J((*H--/J22:8TTr&   c                     Sn SU -  n[        5       [        5       [        5       4 HZ  nUR                  / SQS9  [        R
                  " [        US9   UR                  S/5        S S S 5        U?[        U5      SL a  MZ   e   [        R                  " 5          [        R                  " S[        5        WR                  S/5        S S S 5        [        W5      b   eUR                  / S	QS9  [        R
                  " [        US9   UR                  S/5        S S S 5        g ! , (       d  f       N= f! , (       d  f       Ns= f! , (       d  f       g = f)
Nz\['and', 'll', 've'\]z}Your stop_words may be inconsistent with your preprocessing. Tokenizing the stop words generated tokens %s not in stop_words.)you'veyouyou'llANDr   r   r  Fr   )rD  rE  rF  blahrG  )r   r   r   r   r   r  r  r   _stop_words_idr?  r  r  r  )lstrr  r  s      r$   'test_vectorizer_stop_words_inconsistentrK  '  s   #D	')-	. 
  !?#46G6IJ"DE\\+W5}o. 6 ,S1U::: K 
	 	 	"g{3=/* 
# )-555 NNHNI	k	1=/* 
2	1 65 
#	" 
2	1s$   D".D3E"
D0	3
E
Er'  c                 B   U " S[         R                  S9n[         R                  nUR                  R                  U5      Ul        UR                  R                  U5      Ul        SSSS.n[        5       R                  X5      nX$R                  R                  :X  d   eg)z
Check that CountVectorizer._sort_features preserves the dtype of its sparse
feature matrix.

This test is skipped on 32bit platforms, see:
    https://github.com/scikit-learn/scikit-learn/pull/11295
for more details.
)r   r   r1  r   rv   rw   )zscikit-learnrU   zgreat!N)r:  int64indicesastypeindptrr   _sort_featuresrV  )r'  r   INDICES_DTYPEr   Xss        r$   7test_countvectorizer_sort_features_64bit_sparse_indicesrT  B  s~     	fBHH-A HHM		  /AIxx}-AH"#1:J			)	)!	8BJJ,,,,,r&   	Estimatorc                 &   SS0/nU " 5       n[        U5      SL d   eU " S S/S9n[        U5      S:X  d   e[        U5      b   eUR                  U5         " S S	U 5      nU" S/S
9n[        U5      S:X  d   eU " S S/S9n[        U5      SL d   eg )Nrp   r  Tc                     U S   $ Nrp   r2   r  s    r$   <lambda>?test_stop_word_validation_custom_preprocessor.<locals>.<lambda>e  s    1V9r&   and)rb   r   r   c                       \ rS rSrS rSrg)Ftest_stop_word_validation_custom_preprocessor.<locals>.CustomEstimatorik  c                     S $ )Nc                     U S   $ rX  r2   rY  s    r$   rZ  ktest_stop_word_validation_custom_preprocessor.<locals>.CustomEstimator.build_preprocessor.<locals>.<lambda>m  s    QvYr&   r2   )selfs    r$   r<  Ytest_stop_word_validation_custom_preprocessor.<locals>.CustomEstimator.build_preprocessorl  s    &&r&   r2   N)__name__
__module____qualname____firstlineno__r<  __static_attributes__r2   r&   r$   CustomEstimatorr^  k  s    	'r&   ri  r   c                 L    [         R                  " S5      R                  U 5      $ )Nz\w{1,})r9  compilefindallr  s    r$   rZ  r[  s  s    bjj3;;C@r&   )rm   r   )r?  r   )rU  rX  r  ri  s       r$   -test_stop_word_validation_custom_preprocessorrn  \  s     [!"D
+C(-555
!4%
IC(-888(-555d') ' eW
-C(-888
@eWC )-555r&   zinput_type, err_type, err_msgfilenamer=   r[   z$'str' object has no attribute 'read'c                     S/n[         R                  " X#S9   U " S US9R                  U5        S S S 5        g ! , (       d  f       g = f)N"this is text, not file or filenamer   c                 "    U R                  5       $ r    r-   rY  s    r$   rZ  .test_callable_analyzer_error.<locals>.<lambda>  s
    QWWYr&   ry   r\   )r   r   r   )rU  
input_typeerr_typer   rX  s        r$   test_callable_analyzer_errorrw  x  s;     11D	x	/.jAOOPTU 
0	/	/s	   9
Ary   c                     [        U S5      $ )Nr)openrm  s    r$   rZ  rZ    s
    T#s^r&   c                 "    U R                  5       $ r    )readrm  s    r$   rZ  rZ    s
    r&   ru  c                     S/n[         R                  " [        [        45         U " XS9R	                  U5        S S S 5        g ! , (       d  f       g = f)Nrq  rt  )r   r   FileNotFoundErrorAttributeErrorr   )rU  ry   ru  rX  s       r$   &test_callable_analyzer_change_behaviorr    s=     11D	)>:	;86DDTJ 
<	;	;s   A
Ac                     S nU R                  S5      nUR                  S5        [        R                  " [        SS9   U" USS9R                  U/5        S S S 5        g ! , (       d  f       g = f)Nc                     [        S5      e)Ntesting)	Exceptionrm  s    r$   ry   6test_callable_analyzer_reraise_error.<locals>.analyzer  s    	""r&   zfile.txtzsample content
r  r   r[   rt  )joinwriter   r   r  r   )tmpdirrU  ry   fs       r$   $test_callable_analyzer_reraise_errorr    sU    
# 	JAGG	y		2862@@!E 
3	2	2s   A  
A.zjstop_words, tokenizer, preprocessor, ngram_range, token_pattern,analyzer, unused_name, ovrd_name, ovrd_msgrD  rF  r  r~   z'stop_words'
'analyzer'	!= 'word'c                 "    U R                  5       $ r    r-   r"   s    r$   rZ  rZ    
    aggir&   z'tokenizer'c                 "    U R                  5       $ r    r-   r"   s    r$   rZ  rZ    r  r&   \w+rs   'token_pattern'zis not Nonec                 "    U R                  5       $ r    r!   r"   s    r$   rZ  rZ    r  r&   c                 "    U R                  5       $ r    r  r"   s    r$   rZ  rZ    r  r&   z'preprocessor'zis callableru   c                 "    U R                  5       $ r    r  r"   s    r$   rZ  rZ    r  r&   z'ngram_range')	NNNr  r  r~   r  r  r  c
           	          [         n
U " 5       nUR                  UUUUUUS9  SU< SU< SU	< 3n[        R                  " [        US9   UR                  U
5        S S S 5        g ! , (       d  f       g = f)N)r   rm   rb   rz   r   ry   zThe parameter z will not be used since  r   )r   r   r   r  r  r   )rD   r   rm   rb   rz   r   ry   unused_name	ovrd_nameovrd_msgr=  r   r   s                r$   test_unused_parameters_warnr    sn    r  J<DOO!#   	C
 
k	- 
.	-	-s   	A$$
A2zVectorizer, Xrv   rw   )r   barr   )r   bazc                     U " 5       n[        US5      (       a   eUR                  U5        [        US5      (       a   eg )Nn_features_in_)r9  r   )rD   r   r   s      r$   test_n_features_inr    sB     Jz#34444NN1z#344444r&   c                      [        SS9n U R                  SS/5      R                  nU R                  SS/5      R                  nX:X  d   eg )Nrv   rr  helloworld)r   r   r   )r  vocab1vocab2s      r$   )test_tie_breaking_sample_order_invariancer  %  sL     q
)CWWgw'(44FWWgw'(44Fr&   c                  f    [        SSS9n U R                  S/5      R                  nUS   S:  d   eg )Ni@B )rw   r   )rU  rz   z22pcs efuturer   )r   r   rN  )hashingrN  s     r$   2test_nonnegative_hashing_vectorizer_result_indicesr  .  s7    7GG 12::G1:??r&   c                 8    U " 5       n[        US5      (       a   eg)z0Check that vectorizers do not define set_output.
set_outputN)r9  )rU  r   s     r$   'test_vectorizers_do_not_have_set_outputr  5  s!    
 +CsL)))))r&   c                    [         R                  " SS[        R                  SS9nU " U5      nUR	                  5       n[        5       R                  U5      nUR                  USS9n[        X#5        XRLd   eUR                  USS9nXRL d   e[        R                  " [        5         [        X#5        SSS5        g! , (       d  f       g= f)	zJCheck the behaviour of TfidfTransformer.transform with the copy parameter.r  r  r   r!  T)r  FN)r
   r"  r:  r  r  r   r   r   r   r   r   AssertionError)r'  r   r)  X_csr_originaltransformerX_transforms         r$   test_tfidf_transformer_copyr  >  s     	BRZZbAA!E ZZ\N"$((/K''D'9K 7###''E':K	~	&$U; 
'	&	&s   (B==
CrV  c                     [        S5       Vs/ s H!  n[        [        R                  " 5       5      PM#     nn[	        U S9R                  U5      nUR                  R                  U :X  d   egs  snf )zCheck that `idf_` has the same dtype as the input data.

Non-regression test for:
https://github.com/scikit-learn/scikit-learn/issues/30016
i r1  N)rZ  struuiduuid4r   r   r5  rV  )rV  r]  r   r   s       r$   (test_tfidf_vectorizer_perserve_dtype_idfr  S  sY     %*'N3NqTZZ\	NA3 u-11!4J??  E))) 	4s   (A-c                  ^    [        5       n U R                  5       nUR                  (       a   eg)z7Test that HashingVectorizer has requires_fit=False tag.N)r   __sklearn_tags__requires_fit)r   tagss     r$   (test_hashing_vectorizer_requires_fit_tagr  _  s*    "$J&&(D     r&   c                  d    [        SS9n SS/nU R                  U5      nUR                  S:X  d   eg)z:Test that HashingVectorizer can transform without fitting.r  )rU  zThis is testzAnother test)rw   r  N)r   r   r   )r   r   rN  s      r$   -test_hashing_vectorizer_transform_without_fitr  f  s8    "b1Jn-F!!&)F<<7"""r&   )r  r9  r  r  collectionsr   collections.abcr   	functoolsr   ior   	itertoolsr   r.  r:  r   numpy.testingr   r	   r/  r
   sklearn.baser   sklearn.feature_extraction.textr   r   r   r   r   r   r   r   sklearn.model_selectionr   r   r   sklearn.pipeliner   sklearn.svmr   sklearn.utils._testingr   r   r   sklearn.utils.fixesr   r   r   r   r  r   r%   r+   r/   r3   rA   rC   markparametrizerq   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  xfailr"  r&  rO  rR  r_  rk  rp  r~  r  r  r  r  r  r  r  r  r  r  r  rn   r<  r>  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r$  r,  int32rM  r7  r<  r?  rK  rT  rn  r~  r  rw  paramr  r  r  r  r  r  r  r  r  r  r  r2   r&   r$   <module>r     sC    	   # #      G  	 	 	 T S % ! 
 I H  !22, $!0H.* 9J'KL:  M: z <&&4&().&*
+&$0E&4&&)*<  	M  &&&dN5:#?LD/N 'IJ> K>:4-$-$((!4 'IJ< K<>!1H$1N
3H0"
6 &&**''


8
:YQ +1;B ?O5FG&& RZZ$<=$ >$ "GNN$K44 6	2::t$	2::t$	RZZ'	RZZ'	'' 	f-F+F+22(U+6 .9- : -0 /?4EF662 /?4EF #	&+	!GHVV &' +-CD 
';<K =K /?4EF	F	F ?$5G 	5
 x 
	
 
	
 
	
 
	
 
	

	
qCITUIZ@ 	Qq11Q3GHI	.)55 /?4DFWX** .9< :<( 2::rzz":;* <*!#r&   