
    -iQ                         S SK r S SKJr  S SKrSSKJrJrJr  SSK	J
r
  SSKJrJrJr  SSKJrJr  SSKJrJrJrJrJr  S	S
KJr   " S S\\5      rg)    N)Integral   )BaseEstimatorTransformerMixin_fit_context)resample)IntervalOptions
StrOptions)_averaged_weighted_percentile_weighted_percentile)_check_feature_names_in_check_sample_weightcheck_arraycheck_is_fittedvalidate_data   )OneHotEncoderc                   (   \ rS rSr% Sr\" \SSSS9S/\" 1 Sk5      /\" 1 S	k5      /\" 1 S
k5      /\" \	\
R                  \
R                  15      S/\" \SSSS9S/S/S.r\\S'    SSSSSSSS.S jjr\" SS9SS j5       rS rS rS rSS jrSrg) KBinsDiscretizer   a6  
Bin continuous data into intervals.

Read more in the :ref:`User Guide <preprocessing_discretization>`.

.. versionadded:: 0.20

Parameters
----------
n_bins : int or array-like of shape (n_features,), default=5
    The number of bins to produce. Raises ValueError if ``n_bins < 2``.

encode : {'onehot', 'onehot-dense', 'ordinal'}, default='onehot'
    Method used to encode the transformed result.

    - 'onehot': Encode the transformed result with one-hot encoding
      and return a sparse matrix. Ignored features are always
      stacked to the right.
    - 'onehot-dense': Encode the transformed result with one-hot encoding
      and return a dense array. Ignored features are always
      stacked to the right.
    - 'ordinal': Return the bin identifier encoded as an integer value.

strategy : {'uniform', 'quantile', 'kmeans'}, default='quantile'
    Strategy used to define the widths of the bins.

    - 'uniform': All bins in each feature have identical widths.
    - 'quantile': All bins in each feature have the same number of points.
    - 'kmeans': Values in each bin have the same nearest center of a 1D
      k-means cluster.

    For an example of the different strategies see:
    :ref:`sphx_glr_auto_examples_preprocessing_plot_discretization_strategies.py`.

quantile_method : {"inverted_cdf", "averaged_inverted_cdf",
        "closest_observation", "interpolated_inverted_cdf", "hazen",
        "weibull", "linear", "median_unbiased", "normal_unbiased"},
        default="linear"
        Method to pass on to np.percentile calculation when using
        strategy="quantile". Only `averaged_inverted_cdf` and `inverted_cdf`
        support the use of `sample_weight != None` when subsampling is not
        active.

        .. versionadded:: 1.7

dtype : {np.float32, np.float64}, default=None
    The desired data-type for the output. If None, output dtype is
    consistent with input dtype. Only np.float32 and np.float64 are
    supported.

    .. versionadded:: 0.24

subsample : int or None, default=200_000
    Maximum number of samples, used to fit the model, for computational
    efficiency.
    `subsample=None` means that all the training samples are used when
    computing the quantiles that determine the binning thresholds.
    Since quantile computation relies on sorting each column of `X` and
    that sorting has an `n log(n)` time complexity,
    it is recommended to use subsampling on datasets with a
    very large number of samples.

    .. versionchanged:: 1.3
        The default value of `subsample` changed from `None` to `200_000` when
        `strategy="quantile"`.

    .. versionchanged:: 1.5
        The default value of `subsample` changed from `None` to `200_000` when
        `strategy="uniform"` or `strategy="kmeans"`.

random_state : int, RandomState instance or None, default=None
    Determines random number generation for subsampling.
    Pass an int for reproducible results across multiple function calls.
    See the `subsample` parameter for more details.
    See :term:`Glossary <random_state>`.

    .. versionadded:: 1.1

Attributes
----------
bin_edges_ : ndarray of ndarray of shape (n_features,)
    The edges of each bin. Contain arrays of varying shapes ``(n_bins_, )``
    Ignored features will have empty arrays.

n_bins_ : ndarray of shape (n_features,), dtype=np.int64
    Number of bins per feature. Bins whose width are too small
    (i.e., <= 1e-8) are removed with a warning.

n_features_in_ : int
    Number of features seen during :term:`fit`.

    .. versionadded:: 0.24

feature_names_in_ : ndarray of shape (`n_features_in_`,)
    Names of features seen during :term:`fit`. Defined only when `X`
    has feature names that are all strings.

    .. versionadded:: 1.0

See Also
--------
Binarizer : Class used to bin values as ``0`` or
    ``1`` based on a parameter ``threshold``.

Notes
-----

For a visualization of discretization on different datasets refer to
:ref:`sphx_glr_auto_examples_preprocessing_plot_discretization_classification.py`.
On the effect of discretization on linear models see:
:ref:`sphx_glr_auto_examples_preprocessing_plot_discretization.py`.

In bin edges for feature ``i``, the first and last values are used only for
``inverse_transform``. During transform, bin edges are extended to::

  np.concatenate([-np.inf, bin_edges_[i][1:-1], np.inf])

You can combine ``KBinsDiscretizer`` with
:class:`~sklearn.compose.ColumnTransformer` if you only want to preprocess
part of the features.

``KBinsDiscretizer`` might produce constant features (e.g., when
``encode = 'onehot'`` and certain bins do not contain any data).
These features can be removed with feature selection algorithms
(e.g., :class:`~sklearn.feature_selection.VarianceThreshold`).

Examples
--------
>>> from sklearn.preprocessing import KBinsDiscretizer
>>> X = [[-2, 1, -4,   -1],
...      [-1, 2, -3, -0.5],
...      [ 0, 3, -2,  0.5],
...      [ 1, 4, -1,    2]]
>>> est = KBinsDiscretizer(
...     n_bins=3, encode='ordinal', strategy='uniform'
... )
>>> est.fit(X)
KBinsDiscretizer(...)
>>> Xt = est.transform(X)
>>> Xt  # doctest: +SKIP
array([[ 0., 0., 0., 0.],
       [ 1., 1., 1., 0.],
       [ 2., 2., 2., 1.],
       [ 2., 2., 2., 2.]])

Sometimes it may be useful to convert the data back into the original
feature space. The ``inverse_transform`` function converts the binned
data into the original feature space. Each value will be equal to the mean
of the two bin edges.

>>> est.bin_edges_[0]
array([-2., -1.,  0.,  1.])
>>> est.inverse_transform(Xt)
array([[-1.5,  1.5, -3.5, -0.5],
       [-0.5,  2.5, -2.5, -0.5],
       [ 0.5,  3.5, -1.5,  0.5],
       [ 0.5,  3.5, -1.5,  1.5]])
r   Nleft)closedz
array-like>   onehotordinalonehot-dense>   kmeansuniformquantile>
   warnhazenlinearweibullinverted_cdfmedian_unbiasednormal_unbiasedclosest_observationaveraged_inverted_cdfinterpolated_inverted_cdfr   random_staten_binsencodestrategyquantile_methoddtype	subsampler*   _parameter_constraintsr   r   r    i@ )r-   r.   r/   r0   r1   r*   c                X    Xl         X l        X0l        X@l        XPl        X`l        Xpl        g Nr+   )selfr,   r-   r.   r/   r0   r1   r*   s           X/var/www/html/venv/lib/python3.13/site-packages/sklearn/preprocessing/_discretization.py__init__KBinsDiscretizer.__init__   s)      .
"(    T)prefer_skip_nested_validationc                  
   [        XSS9nU R                  [        R                  [        R                  4;   a  U R                  nOUR                  nUR
                  u  pVUb  [        X1UR                  S9nU R                  b2  XPR                  :  a#  [        USU R                  U R                  US9nSnUR
                  S   nU R                  U5      n[        R                  " U[        S9nU R                  n	U R                  S:X  a#  U	S:X  a  [        R                   " S	["        5        S
n	U R                  S:X  a  U	S;  a  Ub  [%        SU	 S35      eU R                  S:w  a	  Ub  US:g  n
O['        S5      n
[)        U5       GH  nUSS2U4   nX   R+                  5       nX   R-                  5       nX:X  aV  [        R                   " SU-  5        SX{'   [        R.                  " [        R0                  * [        R0                  /5      X'   M  U R                  S:X  a   [        R2                  " XX{   S-   5      X'   GOzU R                  S:X  a  [        R2                  " SSX{   S-   5      n0 nU	S
:w  a  Uc  U	US'   Uc;  [        R4                  " [        R6                  " X40 UD6[        R                  S9X'   O[8        [:        S.U	   n[        R4                  " U Vs/ s H
  nU" XUS9PM     sn[        R                  S9X'   OU R                  S:X  a  SSKJn  [        R2                  " XX{   S-   5      nUSS USS -   SS2S4   S-  nU" X{   USS9nURA                  USS2S4   US9RB                  SS2S4   nURE                  5         USS USS -   S-  X'   [        RF                  XU   U4   X'   U R                  S;   d  GMI  [        RH                  " X   [        R0                  S9S:  nX   U   X'   [K        X   5      S-
  X{   :w  d  GM  [        R                   " SU-  5        [K        X   5      S-
  X{'   GM     Xl&        Xpl'        SU RP                  ;   a  [S        U RN                   Vs/ s H  n[        RT                  " U5      PM     snU RP                  S:H  US 9U l+        U RV                  RA                  [        R                  " S[K        U RN                  5      45      5        U $ s  snf s  snf )!a  
Fit the estimator.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Data to be discretized.

y : None
    Ignored. This parameter exists only for compatibility with
    :class:`~sklearn.pipeline.Pipeline`.

sample_weight : ndarray of shape (n_samples,)
    Contains weight values to be associated with each sample.

    .. versionadded:: 1.3

    .. versionchanged:: 1.7
       Added support for strategy="uniform".

Returns
-------
self : object
    Returns the instance itself.
numericr0   NT)replace	n_samplesr*   sample_weightr   r   r    a%  The current default behavior, quantile_method='linear', will be changed to quantile_method='averaged_inverted_cdf' in scikit-learn version 1.9 to naturally support sample weight equivalence properties by default. Pass quantile_method='averaged_inverted_cdf' explicitly to silence this warning.r"   )r$   r(   zWhen fitting with strategy='quantile' and sample weights, quantile_method should either be set to 'averaged_inverted_cdf' or 'inverted_cdf', got quantile_method='z
' instead.r   z3Feature %d is constant and will be replaced with 0.r   d   method)percentile_rankr   r   )KMeans      ?)
n_clustersinitn_init)r@   )r   r   )to_beging:0yE>zqBins whose width are too small (i.e., <= 1e-8) in feature %d are removed. Consider decreasing the number of bins.r   )
categoriessparse_outputr0   ),r   r0   npfloat64float32shaper   r1   r   r*   _validate_n_binszerosobjectr/   r.   warningsr    FutureWarning
ValueErrorslicerangeminmaxarrayinflinspaceasarray
percentiler   r   clusterrD   fitcluster_centers_sortr_ediff1dlen
bin_edges_n_bins_r-   r   arange_encoder)r5   Xyr@   output_dtyper?   
n_featuresr,   	bin_edgesr/   nnz_weight_maskjjcolumncol_mincol_maxpercentile_levelspercentile_kwargspercentile_funcprD   uniform_edgesrH   kmcentersmaskis                             r6   ra   KBinsDiscretizer.fit   s   6 $3::"**bjj11::L77L !	$0QM>>%)nn*D ..!..+A !MWWQZ
&&z2HHZv6	 ..==J&?f+DMM  'O MMZ''PP)88G7H
T  ==J&=+D ,q0O $DkO
#Bq"uXF-113G-113G!IBN 
 "266'266): ;	}}	) "Gfj1n M	*,$&KK3
Q$G!
 %'!"h.=3H2A%h/ ($&JJfUCTU jj%IM )=1N' &''O %'JJ &7%6 ,FSTU%6 !jj%IM (*, !#Gfj1n M%ab)M#2,>>4H3N vzQG&&1d7O= ! ""1a4) !(ws|!;s B	 "g}g&E F	 }} 66zz)-"&&ADH )d 3	y}%)VZ7MM9;=>
 "%Y]!3a!7FJU $X $t{{")26,,?,QBIIaL,?"kkX5"DM MMbhh3t||+<'=>?aP @s    S6
7 S;c                    U R                   n[        U[        5      (       a  [        R                  " X[
        S9$ [        U[
        SSS9nUR                  S:  d  UR                  S   U:w  a  [        S5      eUS:  X2:g  -  n[        R                  " U5      S   nUR                  S   S:  aA  S	R                  S
 U 5       5      n[        SR                  [        R                  U5      5      eU$ )z0Returns n_bins_, the number of bins per feature.r=   TF)r0   copy	ensure_2dr   r   z8n_bins must be a scalar or array of shape (n_features,).r   z, c              3   8   #    U  H  n[        U5      v   M     g 7fr4   )str).0r}   s     r6   	<genexpr>4KBinsDiscretizer._validate_n_bins.<locals>.<genexpr>  s     B0A1A0As   zk{} received an invalid number of bins at indices {}. Number of bins must be at least 2, and must be an int.)r,   
isinstancer   rM   fullintr   ndimrP   rV   wherejoinformatr   __name__)r5   rn   	orig_binsr,   bad_nbins_valueviolating_indicesindicess          r6   rQ   !KBinsDiscretizer._validate_n_bins  s    KK	i**77:<<YcN;;?fll1o;WXX!A:&*=>HH_5a8""1%)iiB0ABBG::@&$--w;  r9   c                 z   [        U 5        U R                  c   [        R                  [        R                  4OU R                  n[        XSUSS9nU R                  n[        UR                  S   5       H,  n[        R                  " XE   SS USS2U4   SS9USS2U4'   M.     U R                  S	:X  a  U$ SnS
U R                  ;   a1  U R                  R                  nUR                  U R                  l         U R                  R                  U5      nX`R                  l        U$ ! X`R                  l        f = f)a3  
Discretize the data.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Data to be discretized.

Returns
-------
Xt : {ndarray, sparse matrix}, dtype={np.float32, np.float64}
    Data in the binned space. Will be a sparse matrix if
    `self.encode='onehot'` and ndarray otherwise.
NTF)r   r0   resetr   rE   right)sider   r   )r   r0   rM   rN   rO   r   rg   rX   rP   searchsortedr-   rj   	transform)r5   rk   r0   Xtro   rq   
dtype_initXt_encs           r6   r   KBinsDiscretizer.transform  s    	 -1JJ,>RZZ(DJJ4U%HOO	$B	a(;R2YWUBq"uI % ;;)#I
t{{",,J"$((DMM	-]],,R0F #-MM #-MMs   ;D( (D:c                 &   [        U 5        SU R                  ;   a  U R                  R                  U5      n[	        US[
        R                  [
        R                  4S9nU R                  R                  S   nUR                  S   U:w  a'  [        SR                  X2R                  S   5      5      e[        U5       HO  nU R                  U   nUSS USS -   S	-  nXbSS2U4   R                  [
        R                  5         USS2U4'   MQ     U$ )
az  
Transform discretized data back to original feature space.

Note that this function does not regenerate the original data
due to discretization rounding.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Transformed data in the binned space.

Returns
-------
X_original : ndarray, dtype={np.float32, np.float64}
    Data in the original feature space.
r   T)r   r0   r   r   z8Incorrect number of features. Expecting {}, received {}.NrE   rF   )r   r-   rj   inverse_transformr   rM   rN   rO   rh   rP   rV   r   rX   rg   astypeint64)r5   rk   Xinvrn   rq   ro   bin_centerss          r6   r   "KBinsDiscretizer.inverse_transform  s    $ 	t{{"//2A14

BJJ/GH\\''*
::a=J&JQQ

1  
#B+I$QR=9Sb>9S@K%ArE{&:&:288&DEDBK $
 r9   c                     [        U S5        [        X5      n[        U S5      (       a  U R                  R	                  U5      $ U$ )a\  Get output feature names.

Parameters
----------
input_features : array-like of str or None, default=None
    Input features.

    - If `input_features` is `None`, then `feature_names_in_` is
      used as feature names in. If `feature_names_in_` is not defined,
      then the following input feature names are generated:
      `["x0", "x1", ..., "x(n_features_in_ - 1)"]`.
    - If `input_features` is an array-like, then `input_features` must
      match `feature_names_in_` if `feature_names_in_` is defined.

Returns
-------
feature_names_out : ndarray of str objects
    Transformed feature names.
n_features_in_rj   )r   r   hasattrrj   get_feature_names_out)r5   input_featuress     r6   r   &KBinsDiscretizer.get_feature_names_out
  sC    ( 	./0F4$$==66~FF r9   )
rj   rg   r0   r-   r,   rh   r/   r*   r.   r1   )   )NNr4   )r   
__module____qualname____firstlineno____doc__r	   r   r   r
   typerM   rN   rO   r2   dict__annotations__r7   r   ra   rQ   r   r   r   __static_attributes__ r9   r6   r   r      s    ]@ Haf=|LCDE ABC
  $RZZ 894@xD@$G'(-$D 6 ) )& 5| 6||2%N%Nr9   r   )rT   numbersr   numpyrM   baser   r   r   utilsr   utils._param_validationr	   r
   r   utils.statsr   r   utils.validationr   r   r   r   r   	_encodersr   r   r   r9   r6   <module>r      sB   
    @ @  C C M  %L' Lr9   