
    -i>                         S SK J r   S SKJrJr  S SKJr  S SKJr  S SKr	S SK
Jr  S SKJr  SSKJrJrJr  SS	KJr  SS
KJr   " S S\\5      rg)    )array)IterableMapping)Number)
itemgetterN)metadata_routing   )BaseEstimatorTransformerMixin_fit_context)check_array)check_is_fittedc                     ^  \ rS rSr% SrS\R                  0rS\/S/S/S.r	\
\S'   \R                  SS	S	S.S
 jrS	SSSS.S jr\" S	S9SS j5       rS r\" S	S9SS j5       r\
4S jrS rSS jrSS jrU 4S jrSrU =r$ )DictVectorizer   a
  Transforms lists of feature-value mappings to vectors.

This transformer turns lists of mappings (dict-like objects) of feature
names to feature values into Numpy arrays or scipy.sparse matrices for use
with scikit-learn estimators.

When feature values are strings, this transformer will do a binary one-hot
(aka one-of-K) coding: one boolean-valued feature is constructed for each
of the possible string values that the feature can take on. For instance,
a feature "f" that can take on the values "ham" and "spam" will become two
features in the output, one signifying "f=ham", the other "f=spam".

If a feature value is a sequence or set of strings, this transformer
will iterate over the values and will count the occurrences of each string
value.

However, note that this transformer will only do a binary one-hot encoding
when feature values are of type string. If categorical features are
represented as numeric values such as int or iterables of strings, the
DictVectorizer can be followed by
:class:`~sklearn.preprocessing.OneHotEncoder` to complete
binary one-hot encoding.

Features that do not occur in a sample (mapping) will have a zero value
in the resulting array/matrix.

For an efficiency comparison of the different feature extractors, see
:ref:`sphx_glr_auto_examples_text_plot_hashing_vs_dict_vectorizer.py`.

Read more in the :ref:`User Guide <dict_feature_extraction>`.

Parameters
----------
dtype : dtype, default=np.float64
    The type of feature values. Passed to Numpy array/scipy.sparse matrix
    constructors as the dtype argument.
separator : str, default="="
    Separator string used when constructing new features for one-hot
    coding.
sparse : bool, default=True
    Whether transform should produce scipy.sparse matrices.
sort : bool, default=True
    Whether ``feature_names_`` and ``vocabulary_`` should be
    sorted when fitting.

Attributes
----------
vocabulary_ : dict
    A dictionary mapping feature names to feature indices.

feature_names_ : list
    A list of length n_features containing the feature names (e.g., "f=ham"
    and "f=spam").

See Also
--------
FeatureHasher : Performs vectorization using only a hash function.
sklearn.preprocessing.OrdinalEncoder : Handles nominal/categorical
    features encoded as columns of arbitrary data types.

Examples
--------
>>> from sklearn.feature_extraction import DictVectorizer
>>> v = DictVectorizer(sparse=False)
>>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]
>>> X = v.fit_transform(D)
>>> X
array([[2., 0., 1.],
       [0., 1., 3.]])
>>> v.inverse_transform(X) == [{'bar': 2.0, 'foo': 1.0},
...                            {'baz': 1.0, 'foo': 3.0}]
True
>>> v.transform({'foo': 4, 'unseen_feature': 3})
array([[0., 0., 4.]])
	dict_typeno_validationbooleandtype	separatorsparsesort_parameter_constraints=Tc                4    Xl         X l        X0l        X@l        g Nr   )selfr   r   r   r   s        ^/var/www/html/venv/lib/python3.13/site-packages/sklearn/feature_extraction/_dict_vectorizer.py__init__DictVectorizer.__init__j   s    
"	    FNfittingtransformingindicesvaluesc                z   U H  n	[        U	[        5      (       a  U< U R                  < U	< 3n
Sn	O[        S[	        U	5       S35      eU(       a#  X;  a  [        U5      XJ'   UR                  U
5        U(       d  M{  X;   d  M  UR                  XJ   5        UR                  U R                  U	5      5        M     g)z)Add feature names for iterable of strings   zUnsupported type z; in iterable value. Only iterables of string are supported.N)
isinstancestrr   	TypeErrortypelenappendr   )r   fvfeature_namesvocabr$   r%   r&   r'   vvfeature_names              r   _add_iterable_element$DictVectorizer._add_iterable_elementp   s     B"c""+,dnnbA'Rz 2! ! 
 <4&)-&8#$$\2| 5u23djjn-! r"   )prefer_skip_nested_validationc                    / n0 nU H  nUR                  5        H  u  pg[        U[        5      (       a  U< U R                  < U< 3nOw[        U[        5      (       d  Uc  UnO\[        U[
        5      (       a  [        S[        U5       SU SU S35      e[        U[        5      (       a  SnU R                  XgX45        Wc  M  X;  d  M  [        U5      XH'   UR                  U5        M     M     U R                  (       a/  UR                  5         [        U5       V	Vs0 s H  u  pXi_M	     nn	nX0l        X@l        U $ s  snn	f )a  Learn a list of feature name -> indices mappings.

Parameters
----------
X : Mapping or iterable over Mappings
    Dict(s) or Mapping(s) from feature names (arbitrary Python
    objects) to feature values (strings or convertible to dtype).

    .. versionchanged:: 0.24
       Accepts multiple string values for one categorical feature.

y : (ignored)
    Ignored parameter.

Returns
-------
self : object
    DictVectorizer class instance.
NzUnsupported value type  for : z$.
Mapping objects are not supported.)itemsr*   r+   r   r   r   r,   r-   r   r6   r.   r/   r   	enumeratefeature_names_vocabulary_)
r   Xyr2   r3   xr0   r1   r5   is
             r   fitDictVectorizer.fit   s2   * A	a%%/0$..!#DL6**qy#$L7++#1$q' ; cA3 '== 
  8,,#'L..q]J+#0.1-.@+%,,\:% " * 99 &/&>?&>daQT&>E?+  @s   "Ec                    [        S5      R                  S:X  d   S5       eU R                  nU(       a  / n0 nOU R                  nU R                  nSn[        U[        5      (       a  U/OUn[        S5      nS/n/ n	U GHW  n
U
R                  5        GH$  u  p[        U[        5      (       a  U< U R                  < U< 3nSnO[        U[        5      (       d  Uc  UnOm[        U[        5      (       d.  [        U[        5      (       a  S nU R                  UUUUUUUU	S9  O*[        S[        U5       S	U S
U S[        U5       S3	5      eUc  M  U(       a#  X;  a  [        U5      X]'   UR!                  U5        X;   d  M  UR!                  X]   5        U	R!                  U R                  U5      5        GM'     UR!                  [        U5      5        GMZ     [        U5      S:X  a  [#        S5      e[$        R&                  " U[$        R(                  S9n[        U5      S-
  [        U5      4n[*        R,                  " XU4XS9nU(       av  U R.                  (       ae  UR/                  5         [$        R0                  " [        U5      [$        R2                  S9n[5        U5       H  u  nnX[   UU'   UX['   M     US S 2U4   nU R6                  (       a  UR9                  5         OUR;                  5       nU(       a  X@l        XPl        U$ )NrC      zsizeof(int) != 4 on your platform; please report this at https://github.com/scikit-learn/scikit-learn/issues and include the output from platform.platform() in your bug reportTr   r)   r#   zUnsupported value Type r:   r;   z.
z objects are not supported.zSample sequence X is empty.r   )shaper   )r   itemsizer   r>   r?   r*   r   r<   r+   r   r   r   r6   r,   r-   r.   r/   
ValueErrornp
frombufferintcsp
csr_matrixr   emptyint32r=   r   sort_indicestoarray)r   r@   r$   r   r2   r3   r%   r&   indptrr'   rB   r0   r1   r5   rI   result_matrix	map_indexnew_vals                     r   
_transformDictVectorizer._transform   s   
 Sz""a' 	
N	
' 

ME //M$$E a))QCq*  A	a%%/0$..!#DLA6**qy#$L#Aw//Jq(4K4K#'L..% '%1 '% / 	 $1$q' ; cA3c7)#>@   +<#<.1-.@+%,,\:#,u':;djjm4A "D MM#g,'G J v;!:;;--rww7Vq#e*-f%U

 tyy ]!3288DI'6
%*X	'"" 7 *!Y,7M;;&&()113M"/$r"   c                 "    U R                  USS9$ )a,  Learn a list of feature name -> indices mappings and transform X.

Like fit(X) followed by transform(X), but does not require
materializing X in memory.

Parameters
----------
X : Mapping or iterable over Mappings
    Dict(s) or Mapping(s) from feature names (arbitrary Python
    objects) to feature values (strings or convertible to dtype).

    .. versionchanged:: 0.24
       Accepts multiple string values for one categorical feature.

y : (ignored)
    Ignored parameter.

Returns
-------
Xa : {array, sparse matrix}
    Feature vectors; always 2-d.
Tr$   )rY   )r   r@   rA   s      r   fit_transformDictVectorizer.fit_transform(  s    0 q$//r"   c                    [        U S5        [        USS/S9nUR                  S   nU R                  n[	        U5       Vs/ s H	  oR" 5       PM     nn[
        R                  " U5      (       a.  [        UR                  5       6  H  u  pxXU4   Xg   XH   '   M     U$ [        U5       H2  u  py[        XSS24   5       H  u  pU
S:w  d  M  XU4   XU   '   M     M4     U$ s  snf )a  Transform array or sparse matrix X back to feature mappings.

X must have been produced by this DictVectorizer's transform or
fit_transform method; it may only have passed through transformers
that preserve the number of features and their order.

In the case of one-hot/one-of-K coding, the constructed feature
names and values are returned rather than the original ones.

Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
    Sample matrix.
dict_type : type, default=dict
    Constructor for feature mappings. Must conform to the
    collections.Mapping API.

Returns
-------
X_original : list of dict_type objects of shape (n_samples,)
    Feature mappings for the samples in X.
r>   csrcsc)accept_sparser   N)
r   r   rI   r>   rangerO   issparsezipnonzeror=   )r   r@   r   	n_samplesnames_dictsrC   jdr1   s              r   inverse_transform DictVectorizer.inverse_transformB  s    . 	./ %8GGAJ	##&+I&67&6&67;;q>>QYY[)%&!tW" *  "%(%a1g.DAAv&'1g( / )
  8s   Cc                 >    [        U SS/5        U R                  USS9$ )a  Transform feature->value dicts to array or sparse matrix.

Named features not encountered during fit or fit_transform will be
silently ignored.

Parameters
----------
X : Mapping or iterable over Mappings of shape (n_samples,)
    Dict(s) or Mapping(s) from feature names (arbitrary Python
    objects) to feature values (strings or convertible to dtype).

Returns
-------
Xa : {array, sparse matrix}
    Feature vectors; always 2-d.
r>   r?   Fr\   )r   rY   )r   r@   s     r   	transformDictVectorizer.transformm  s'    " 	/?@q%00r"   c                     [        U S5        [        S U R                   5       5      (       a&  U R                   Vs/ s H  n[        U5      PM     nnOU R                  n[        R
                  " U[        S9$ s  snf )a  Get output feature names for transformation.

Parameters
----------
input_features : array-like of str or None, default=None
    Not used, present here for API consistency by convention.

Returns
-------
feature_names_out : ndarray of str objects
    Transformed feature names.
r>   c              3   L   #    U  H  n[        U[        5      (       + v   M     g 7fr   )r*   r+   ).0names     r   	<genexpr>7DictVectorizer.get_feature_names_out.<locals>.<genexpr>  s     I5HT:dC(((5Hs   "$rH   )r   anyr>   r+   rL   asarrayobject)r   input_featuresru   r2   s       r   get_feature_names_out$DictVectorizer.get_feature_names_out  sj     	./IT5H5HIII373F3FG3F4SY3FMGM //Mzz-v66 Hs   A9c                 :   [        U S5        U(       d  [        R                  " U5      S   nU R                  n0 nU H  n[	        U5      XCU   '   M     X@l        [        UR                  5       [        S5      S9 VVs/ s H  u  peUPM	     snnU l        U $ s  snnf )ae  Restrict the features to those in support using feature selection.

This function modifies the estimator in-place.

Parameters
----------
support : array-like
    Boolean mask or list of indices (as returned by the get_support
    member of feature selectors).
indices : bool, default=False
    Whether support is a list of indices.

Returns
-------
self : object
    DictVectorizer class instance.

Examples
--------
>>> from sklearn.feature_extraction import DictVectorizer
>>> from sklearn.feature_selection import SelectKBest, chi2
>>> v = DictVectorizer()
>>> D = [{'foo': 1, 'bar': 2}, {'foo': 3, 'baz': 1}]
>>> X = v.fit_transform(D)
>>> support = SelectKBest(chi2, k=2).fit(X, [0, 1])
>>> v.get_feature_names_out()
array(['bar', 'baz', 'foo'], ...)
>>> v.restrict(support.get_support())
DictVectorizer()
>>> v.get_feature_names_out()
array(['bar', 'foo'], ...)
r>   r   r)   )key)	r   rL   wherer>   r.   r?   sortedr<   r   )r   supportr&   rh   	new_vocabrC   r0   s          r   restrictDictVectorizer.restrict  s    B 	./hhw'*G##	A"%i.IAh  % !2
1F
F$!AF
 	
s   >Bc                 h   > [         TU ]  5       nSUR                  l        SUR                  l        U$ )NTF)super__sklearn_tags__
input_tagsdicttwo_d_array)r   tags	__class__s     r   r   DictVectorizer.__sklearn_tags__  s-    w')#&+#r"   )r   r>   r   r   r   r?   r   )F)__name__
__module____qualname____firstlineno____doc__r   UNUSED4_DictVectorizer__metadata_request__inverse_transformr+   r   r   __annotations__rL   float64r    r6   r   rD   rY   r]   rm   rp   r|   r   r   __static_attributes____classcell__)r   s   @r   r   r      s    JZ .9:J:Q:Q,R) !U+	$D  !#

c$T  .> 53 63jaF 50 602 .2 )V1(7(0d r"   r   )r   collections.abcr   r   numbersr   operatorr   numpyrL   scipy.sparser   rO   sklearn.utilsr   baser
   r   r   utilsr   utils.validationr   r    r"   r   <module>r      s9     -     * @ @  .x%} xr"   