
    -i9:                         S SK Jr  S SKrSSKJr  SSKJr  SSKJ	r	  SSK
Jr  SSKJr  SS	KJr  SS
KJrJrJr  SSKJrJrJrJr  SSKJr   " S S\5      rg)    )IntegralN   )_fit_context)pairwise_distances_chunked)_NAN_METRICS)_get_weights)	_get_mask)is_scalar_nan)HiddenInterval
StrOptions)FLOAT_DTYPES_check_feature_names_incheck_is_fittedvalidate_data   )_BaseImputerc                     ^  \ rS rSr% Sr0 \R                  E\" \SSSS9/\	" SS15      \
\" S5      /\	" \" \5      5      \
/S	/S
.Er\\S'   \R"                  SSSSSSS.U 4S jjrS r\" SS9SU 4S jj5       rU 4S jrSS jrSrU =r$ )
KNNImputer   a  Imputation for completing missing values using k-Nearest Neighbors.

Each sample's missing values are imputed using the mean value from
`n_neighbors` nearest neighbors found in the training set. Two samples are
close if the features that neither is missing are close.

Read more in the :ref:`User Guide <knnimpute>`.

.. versionadded:: 0.22

Parameters
----------
missing_values : int, float, str, np.nan or None, default=np.nan
    The placeholder for the missing values. All occurrences of
    `missing_values` will be imputed. For pandas' dataframes with
    nullable integer dtypes with missing values, `missing_values`
    should be set to np.nan, since `pd.NA` will be converted to np.nan.

n_neighbors : int, default=5
    Number of neighboring samples to use for imputation.

weights : {'uniform', 'distance'} or callable, default='uniform'
    Weight function used in prediction.  Possible values:

    - 'uniform' : uniform weights. All points in each neighborhood are
      weighted equally.
    - 'distance' : weight points by the inverse of their distance.
      in this case, closer neighbors of a query point will have a
      greater influence than neighbors which are further away.
    - callable : a user-defined function which accepts an
      array of distances, and returns an array of the same shape
      containing the weights.

metric : {'nan_euclidean'} or callable, default='nan_euclidean'
    Distance metric for searching neighbors. Possible values:

    - 'nan_euclidean'
    - callable : a user-defined function which conforms to the definition
      of ``func_metric(x, y, *, missing_values=np.nan)``. `x` and `y`
      corresponds to a row (i.e. 1-D arrays) of `X` and `Y`, respectively.
      The callable should returns a scalar distance value.

copy : bool, default=True
    If True, a copy of X will be created. If False, imputation will
    be done in-place whenever possible.

add_indicator : bool, default=False
    If True, a :class:`MissingIndicator` transform will stack onto the
    output of the imputer's transform. This allows a predictive estimator
    to account for missingness despite imputation. If a feature has no
    missing values at fit/train time, the feature won't appear on the
    missing indicator even if there are missing values at transform/test
    time.

keep_empty_features : bool, default=False
    If True, features that consist exclusively of missing values when
    `fit` is called are returned in results when `transform` is called.
    The imputed value is always `0`.

    .. versionadded:: 1.2

Attributes
----------
indicator_ : :class:`~sklearn.impute.MissingIndicator`
    Indicator used to add binary indicators for missing values.
    ``None`` if add_indicator is False.

n_features_in_ : int
    Number of features seen during :term:`fit`.

    .. versionadded:: 0.24

feature_names_in_ : ndarray of shape (`n_features_in_`,)
    Names of features seen during :term:`fit`. Defined only when `X`
    has feature names that are all strings.

    .. versionadded:: 1.0

See Also
--------
SimpleImputer : Univariate imputer for completing missing values
    with simple strategies.
IterativeImputer : Multivariate imputer that estimates values to impute for
    each feature with missing values from all the others.

References
----------
* `Olga Troyanskaya, Michael Cantor, Gavin Sherlock, Pat Brown, Trevor
  Hastie, Robert Tibshirani, David Botstein and Russ B. Altman, Missing
  value estimation methods for DNA microarrays, BIOINFORMATICS Vol. 17
  no. 6, 2001 Pages 520-525.
  <https://academic.oup.com/bioinformatics/article/17/6/520/272365>`_

Examples
--------
>>> import numpy as np
>>> from sklearn.impute import KNNImputer
>>> X = [[1, 2, np.nan], [3, 4, 3], [np.nan, 6, 5], [8, 8, 7]]
>>> imputer = KNNImputer(n_neighbors=2)
>>> imputer.fit_transform(X)
array([[1. , 2. , 4. ],
       [3. , 4. , 3. ],
       [5.5, 6. , 5. ],
       [8. , 8. , 7. ]])

For a more detailed example see
:ref:`sphx_glr_auto_examples_impute_plot_missing_values.py`.
r   Nleft)closeduniformdistanceboolean)n_neighborsweightsmetriccopy_parameter_constraints   nan_euclideanTF)missing_valuesr   r   r   r   add_indicatorkeep_empty_featuresc                T   > [         TU ]  UUUS9  X l        X0l        X@l        XPl        g )N)r#   r$   r%   )super__init__r   r   r   r   )	selfr#   r   r   r   r   r$   r%   	__class__s	           F/var/www/html/venv/lib/python3.13/site-packages/sklearn/impute/_knn.pyr(   KNNImputer.__init__   s7     	)' 3 	 	

 '	    c                 4   [         R                  " XS-
  SS9SS2SU24   nU[         R                  " UR                  S   5      SS2S4   U4   n[	        X`R
                  5      nUb  SU[         R                  " U5      '   O/[         R                  " U5      nSU[         R                  " U5      '   UR                  U5      nUR                  U5      n	[         R                  R                  XS9n[         R                  R                  USUS9R                  $ )a  Helper function to impute a single column.

Parameters
----------
dist_pot_donors : ndarray of shape (n_receivers, n_potential_donors)
    Distance matrix between the receivers and potential donors from
    training set. There must be at least one non-nan distance between
    a receiver and a potential donor.

n_neighbors : int
    Number of neighbors to consider.

fit_X_col : ndarray of shape (n_potential_donors,)
    Column of potential donors from training set.

mask_fit_X_col : ndarray of shape (n_potential_donors,)
    Missing mask for fit_X_col.

Returns
-------
imputed_values: ndarray of shape (n_receivers,)
    Imputed values for receiver.
r   axisNr   g        mask)r0   r   )npargpartitionarangeshaper   r   isnan	ones_liketakemaarrayaveragedata)
r)   dist_pot_donorsr   	fit_X_colmask_fit_X_col
donors_idxdonors_distweight_matrixdonorsdonors_masks
             r+   _calc_imputeKNNImputer._calc_impute   s    2 ___AoAN||O


 &IIj&&q)*1d73Z?
 %[,,? $58M"((=12LL5M36M"((;/0 
+$))*5V6uu}}V!]}CHHHr-   )prefer_skip_nested_validationc           	      P  > [        U R                  5      (       d  SnOSn[        U US[        UU R                  S9nXl        [        U R
                  U R                  5      U l        [        R                  " U R                  SS9) U l
        [        TU ]1  U R                  5        U $ )a^  Fit the imputer on X.

Parameters
----------
X : array-like shape of (n_samples, n_features)
    Input data, where `n_samples` is the number of samples and
    `n_features` is the number of features.

y : Ignored
    Not used, present here for API consistency by convention.

Returns
-------
self : object
    The fitted `KNNImputer` class instance.
T	allow-nanF)accept_sparsedtypeensure_all_finiter   r   r/   )r
   r#   r   r   r   _fit_Xr	   _mask_fit_Xr3   all_valid_maskr'   _fit_indicator)r)   XyrM   r*   s       r+   fitKNNImputer.fit   s    & T0011 $ +/
 $T[[$2E2EFFF4#3#3!<<t//0r-   c                   >^ ^^^	^
^^^ [        T 5        [        T R                  5      (       d  SnOSn[        T TS[        SUT R
                  SS9m[        TT R                  5      m	T R                  m
T R                  m[        TT ])  T	5      n[        R                  " T	SS2T4   5      (       d6  T R                  (       a  TnSUSS2T) 4'   O	TSS2T4   n[        TT ]9  XC5      $ [        R                  " T	SS2T4   R                  SS95      m[        R                   " T
5      m[        R"                  " TR$                  S   [&        S	9m[        R(                  " TR$                  S   5      TT'   UUU	U
UUU U4S
 jn[+        TTSS24   T R,                  T R.                  T R                  UUS9nU H  nM     T R                  (       a  TnSUSS2T) 4'   O	TSS2T4   n[        TT ]9  XC5      $ )aC  Impute all missing values in X.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    The input data to complete.

Returns
-------
X : array-like of shape (n_samples, n_output_features)
    The imputed dataset. `n_output_features` is the number of features
    that is not always missing during `fit`.
TrJ   F)rK   rL   force_writeablerM   r   resetNr   r   r/   )rL   c           	      X  > TX[        U 5      -    n[        TR                  S   5       GH|  nTU   (       d  M  TX#4   n[        R                  " U5      (       d  M3  [        R
                  " TS S 2U4   5      u  nU[        R                  " U5         nU TU   U-
     S S 2U4   n[        R                  " U5      R                  SS9nXh   n	U	R                  (       a|  [        R                  R                  TR                  S S 2U4   TS S 2U4   S9R                  5       n
U
TX4'   [        U	5      [        U5      :X  a  GM  Xh)    nU TU   U-
     S S 2U4   n[        TR                  [        U5      5      nTR!                  UUTR                  XS4   TXS4   5      nUTXc4'   GM     g )Nr   r/   r1   )lenranger6   r3   anynonzeroflatnonzeror7   rP   sizer:   r;   rN   meanminr   rF   )
dist_chunkstartrow_missing_chunkcolcol_maskpotential_donors_idxreceivers_idxdist_subsetall_nan_dist_maskall_nan_receivers_idxcol_meanr   valuerS   dist_idx_mapr2   
mask_fit_Xnon_missing_fix_Xrow_missing_idxr)   
valid_masks                r+   process_chunk+KNNImputer.transform.<locals>.process_chunk:  s    /J8O P QWWQZ(!# 1 67vvh''*,**5Fq#v5N*O'% !2"..2J K )m)Du)LM++
 %'HH[$9$=$=1$=$E!(5(H%(--!uu{{AsF+*QV2D  +  df  5=A+0101S5GG  %22D$EM",\--H5-P"Q//#K "$"2"2C8L4MN))KK 4 9:389	 ).-$%_ )r-   )r   r#   rM   reduce_func)r   r
   r#   r   r   r   r	   rO   rQ   r'   _transform_indicatorr3   r]   r%   _concatenate_indicatorr_   logical_notzerosr6   intr5   r   rN   r   )r)   rS   rM   X_indicatorXcrt   genchunkro   r2   rp   rq   rr   rs   r*   s   ``      @@@@@@r+   	transformKNNImputer.transform   s    	T0011 $ + /	
 D//0%%
%%
g248 vvd1j=)**''%&1zk>"q*}% 71"BB..am)<)@)@a)@)HINN:6 xx
#6(*		/2G2G2J(K_%3	. 3	.l )oq !KK;;../%
 E  ##B!"Bq:+~1j=!Bw-b>>r-   c                 n    [        U S5        [        X5      nXR                     nU R                  X!5      $ )ao  Get output feature names for transformation.

Parameters
----------
input_features : array-like of str or None, default=None
    Input features.

    - If `input_features` is `None`, then `feature_names_in_` is
      used as feature names in. If `feature_names_in_` is not defined,
      then the following input feature names are generated:
      `["x0", "x1", ..., "x(n_features_in_ - 1)"]`.
    - If `input_features` is an array-like, then `input_features` must
      match `feature_names_in_` if `feature_names_in_` is defined.

Returns
-------
feature_names_out : ndarray of str objects
    Transformed feature names.
n_features_in_)r   r   rQ   (_concatenate_indicator_feature_names_out)r)   input_featuresnamess      r+   get_feature_names_out KNNImputer.get_feature_names_out  s8    ( 	./0F//0<<USSr-   )rN   rO   rQ   r   r   r   r   )N)__name__
__module____qualname____firstlineno____doc__r   r    r   r   r   callabler   setr   dict__annotations__r3   nanr(   rF   r   rU   r   r   __static_attributes____classcell__)r*   s   @r+   r   r      s    kZ$

-
-$ 1d6BC	:676$<Pc,/0(;$D  vv! *0Id 5& 6&PD?LT Tr-   r   )numbersr   numpyr3   baser   metricsr   metrics.pairwiser   neighbors._baser   utils._maskr	   utils._missingr
   utils._param_validationr   r   r   utils.validationr   r   r   r   _baser   r    r-   r+   <module>r      sD       0 + * # * B B   CT CTr-   