
    -i@                     H   S r SSKrSSKJr  SSKJrJr  SSKrSSK	J
r
  SSKJr  SSKJr  SSKJr  S	S
KJrJr  S	SKJr  S	SKJr  S	SKJrJrJr  S	SKJrJr  S	SKJ r   SSK!J"r"  \RF                  " \RH                  5      RJ                  r&S r'SS jr(S r)S r* " S S\\"5      r+g)z<
A Theil-Sen Estimator for Multiple Linear Regression Model
    N)combinations)IntegralReal)effective_n_jobs)linalg)get_lapack_funcs)binom   )RegressorMixin_fit_context)ConvergenceWarning)check_random_state)HiddenInterval
StrOptions)Paralleldelayed)validate_data   )LinearModelc                 .   X-
  n[         R                  " [         R                  " US-  SS95      nU[        :  n[	        UR                  5       U R
                  S   :  5      nX$   nX4   SS2[         R                  4   n[        R                  " [         R                  " X#-  SS95      nU[        :  a8  [         R                  " XSS24   U-  SS9[         R                  " SU-  SS9-  nOSnSn[        SSXV-  -
  5      U-  [        SXV-  5      U-  -   $ )u  Modified Weiszfeld step.

This function defines one iteration step in order to approximate the
spatial median (L1 median). It is a form of an iteratively re-weighted
least squares method.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Training vector, where `n_samples` is the number of samples and
    `n_features` is the number of features.

x_old : ndarray of shape = (n_features,)
    Current start vector.

Returns
-------
x_new : ndarray of shape (n_features,)
    New iteration step.

References
----------
- On Computation of Spatial Median for Robust Data Mining, 2005
  T. Kärkkäinen and S. Äyrämö
  http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf
r
   r   axisr   Ng      ?        )npsqrtsum_EPSILONintshapenewaxisr   normmaxmin)Xx_olddiff	diff_normmaskis_x_old_in_Xquotient_normnew_directions           R/var/www/html/venv/lib/python3.13/site-packages/sklearn/linear_model/_theil_sen.py_modified_weiszfeld_stepr.      s   6 9DtQwQ/0I D
QWWQZ/0M:D2::.IKKt'7a @AMxqqzI5A>	MB
 
  	C}445E
c=0
1E
9	:    c                    U R                   S   S:X  a%  S[        R                  " U R                  5       SS94$ US-  n[        R                  " U SS9n[        U5       H3  n[        X5      n[        R                  " X5-
  S-  5      U:  a    XE4$ UnM5     [        R                  " SR                  US9[        5        WW4$ )	u  Spatial median (L1 median).

The spatial median is member of a class of so-called M-estimators which
are defined by an optimization problem. Given a number of p points in an
n-dimensional space, the point x minimizing the sum of all distances to the
p other points is called spatial median.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Training vector, where `n_samples` is the number of samples and
    `n_features` is the number of features.

max_iter : int, default=300
    Maximum number of iterations.

tol : float, default=1.e-3
    Stop the algorithm if spatial_median has converged.

Returns
-------
spatial_median : ndarray of shape = (n_features,)
    Spatial median.

n_iter : int
    Number of iterations needed.

References
----------
- On Computation of Spatial Median for Robust Data Mining, 2005
  T. Kärkkäinen and S. Äyrämö
  http://users.jyu.fi/~samiayr/pdf/ayramo_eurogen05.pdf
r   T)keepdimsr
   r   r   zYMaximum number of iterations {max_iter} reached in spatial median for TheilSen regressor.)max_iter)r    r   medianravelmeanranger.   r   warningswarnformatr   )r%   r2   tolspatial_median_oldn_iterspatial_medians         r-   _spatial_medianr>   P   s    D 	wwqzQ"))AGGI555AIC+/1!H66%61<=C !! "0 " 	vxv(		
 >!!r/   c                 :    SSSU-  -  X-
  S-   -  U-   S-
  U -  -
  $ )zApproximation of the breakdown point.

Parameters
----------
n_samples : int
    Number of samples.

n_subsamples : int
    Number of subsamples to consider.

Returns
-------
breakdown_point : float
    Approximation of breakdown point.
r   g      ? )	n_samplesn_subsampless     r-   _breakdown_pointrC      sE    " 	
A$%)AA)EF 	r/   c                    [        U5      nU R                  S   U-   nUR                  S   n[        R                  " UR                  S   U45      n[        R                  " XT45      n[        R
                  " [        XT5      5      n[        SXx45      u  n	[        U5       H-  u  pXSS24   USS2US24'   X   USU& U	" Xx5      S   SU Xj'   M/     U$ )aQ  Least Squares Estimator for TheilSenRegressor class.

This function calculates the least squares method on a subset of rows of X
and y defined by the indices array. Optionally, an intercept column is
added if intercept is set to true.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Design matrix, where `n_samples` is the number of samples and
    `n_features` is the number of features.

y : ndarray of shape (n_samples,)
    Target vector, where `n_samples` is the number of samples.

indices : ndarray of shape (n_subpopulation, n_subsamples)
    Indices of all subsamples with respect to the chosen subpopulation.

fit_intercept : bool
    Fit intercept or not.

Returns
-------
weights : ndarray of shape (n_subpopulation, n_features + intercept)
    Solution matrix of n_subpopulation solved least square problems.
r   r   )gelssN)	r   r    r   emptyoneszerosr#   r   	enumerate)r%   yindicesfit_intercept
n_featuresrB   weightsX_subpopulationy_subpopulationlstsqindexsubsets               r-   _lstsqrT      s    6 &Mm+J==#Lhha(*56Ggg|89OhhL =?O
_,NOHU"7+-.qy\=>)*)*&@CKZP ,
 Nr/   c                       \ rS rSr% SrS/S\" \" S15      5      /\" \SSSS9/S\	/\" \	S	SSS9/\" \S
SSS9/S/S\	/S/S.	r
\\S'   SSSSSSSSSS.	S jrS r\" SS9S 5       rSrg)TheilSenRegressor   ao  Theil-Sen Estimator: robust multivariate regression model.

The algorithm calculates least square solutions on subsets with size
n_subsamples of the samples in X. Any value of n_subsamples between the
number of features and samples leads to an estimator with a compromise
between robustness and efficiency. Since the number of least square
solutions is "n_samples choose n_subsamples", it can be extremely large
and can therefore be limited with max_subpopulation. If this limit is
reached, the subsets are chosen randomly. In a final step, the spatial
median (or L1 median) is calculated of all least square solutions.

Read more in the :ref:`User Guide <theil_sen_regression>`.

Parameters
----------
fit_intercept : bool, default=True
    Whether to calculate the intercept for this model. If set
    to false, no intercept will be used in calculations.

copy_X : bool, default=True
    If True, X will be copied; else, it may be overwritten.

    .. deprecated:: 1.6
        `copy_X` was deprecated in 1.6 and will be removed in 1.8.
        It has no effect as a copy is always made.

max_subpopulation : int, default=1e4
    Instead of computing with a set of cardinality 'n choose k', where n is
    the number of samples and k is the number of subsamples (at least
    number of features), consider only a stochastic subpopulation of a
    given maximal size if 'n choose k' is larger than max_subpopulation.
    For other than small problem sizes this parameter will determine
    memory usage and runtime if n_subsamples is not changed. Note that the
    data type should be int but floats such as 1e4 can be accepted too.

n_subsamples : int, default=None
    Number of samples to calculate the parameters. This is at least the
    number of features (plus 1 if fit_intercept=True) and the number of
    samples as a maximum. A lower number leads to a higher breakdown
    point and a low efficiency while a high number leads to a low
    breakdown point and a high efficiency. If None, take the
    minimum number of subsamples leading to maximal robustness.
    If n_subsamples is set to n_samples, Theil-Sen is identical to least
    squares.

max_iter : int, default=300
    Maximum number of iterations for the calculation of spatial median.

tol : float, default=1e-3
    Tolerance when calculating spatial median.

random_state : int, RandomState instance or None, default=None
    A random number generator instance to define the state of the random
    permutations generator. Pass an int for reproducible output across
    multiple function calls.
    See :term:`Glossary <random_state>`.

n_jobs : int, default=None
    Number of CPUs to use during the cross validation.
    ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
    ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
    for more details.

verbose : bool, default=False
    Verbose mode when fitting the model.

Attributes
----------
coef_ : ndarray of shape (n_features,)
    Coefficients of the regression model (median of distribution).

intercept_ : float
    Estimated intercept of regression model.

breakdown_ : float
    Approximated breakdown point.

n_iter_ : int
    Number of iterations needed for the spatial median.

n_subpopulation_ : int
    Number of combinations taken into account from 'n choose k', where n is
    the number of samples and k is the number of subsamples.

n_features_in_ : int
    Number of features seen during :term:`fit`.

    .. versionadded:: 0.24

feature_names_in_ : ndarray of shape (`n_features_in_`,)
    Names of features seen during :term:`fit`. Defined only when `X`
    has feature names that are all strings.

    .. versionadded:: 1.0

See Also
--------
HuberRegressor : Linear regression model that is robust to outliers.
RANSACRegressor : RANSAC (RANdom SAmple Consensus) algorithm.
SGDRegressor : Fitted by minimizing a regularized empirical loss with SGD.

References
----------
- Theil-Sen Estimators in a Multiple Linear Regression Model, 2009
  Xin Dang, Hanxiang Peng, Xueqin Wang and Heping Zhang
  http://home.olemiss.edu/~xdang/papers/MTSE.pdf

Examples
--------
>>> from sklearn.linear_model import TheilSenRegressor
>>> from sklearn.datasets import make_regression
>>> X, y = make_regression(
...     n_samples=200, n_features=2, noise=4.0, random_state=0)
>>> reg = TheilSenRegressor(random_state=0).fit(X, y)
>>> reg.score(X, y)
0.9884
>>> reg.predict(X[:1,])
array([-31.5871])
boolean
deprecatedr   Nleft)closedr   r   random_stateverbose	rL   copy_Xmax_subpopulationrB   r2   r:   r\   n_jobsr]   _parameter_constraintsTg     @,  MbP?Fc       	         p    Xl         X l        X0l        X@l        XPl        X`l        Xpl        Xl        Xl        g Nr^   )
selfrL   r_   r`   rB   r2   r:   r\   ra   r]   s
             r-   __init__TheilSenRegressor.__init__U  s5     +!2( (r/   c           	         U R                   nU R                  (       a  US-   nOUnUbz  X1:  a  [        SR                  X15      5      eX:  a6  XC:  a0  U R                  (       a  SOSn[        SR                  XTU5      5      eO+X1:w  a  [        SR                  X15      5      eO[	        XA5      n[        S[        R                  " [        X5      5      5      n[        [	        U R                  U5      5      nX74$ )Nr   z=Invalid parameter since n_subsamples > n_samples ({0} > {1}).z+1 zAInvalid parameter since n_features{0} > n_subsamples ({1} > {2}).z\Invalid parameter since n_subsamples != n_samples ({0} != {1}) while n_samples < n_features.)rB   rL   
ValueErrorr9   r$   r#   r   rintr	   r   r`   )rg   rA   rM   rB   n_dimplus_1all_combinationsn_subpopulations           r-   _check_subparams"TheilSenRegressor._check_subparamsl  s    ((NEE#' --3VL-L  &'%)%7%7TRF$!6&>  (  ,$((.|(G  - u0Lq"''%	*H"IJc$"8"8:JKL,,r/   )prefer_skip_nested_validationc           	        ^ ^^^ T R                   S:w  a  [        R                  " S[        5        [	        T R
                  5      n[        T TTSS9u  mmTR                  u  pET R                  XE5      u  nT l	        [        XF5      T l        T R                  (       a  [        SR                  T R                  5      5        [        SR                  U5      5        [        T R                  U-  5      n[        SR                  U5      5        [        SR                  T R                  5      5        [         R"                  " [%        XF5      5      T R&                  ::  a  [)        [+        [-        U5      U5      5      nO3[-        T R                  5       V	s/ s H  n	UR/                  XFS	S
9PM     nn	[1        T R2                  5      n
[         R4                  " X5      m[7        U
T R                  S9" UUU U4S j[-        U
5       5       5      n[         R8                  " U5      n[;        UT R<                  T R>                  S9u  T l         nT RB                  (       a  US   T l"        USS T l#        T $ ST l"        UT l#        T $ s  sn	f )zFit linear model.

Parameters
----------
X : ndarray of shape (n_samples, n_features)
    Training data.
y : ndarray of shape (n_samples,)
    Target values.

Returns
-------
self : returns an instance of self.
    Fitted `TheilSenRegressor` estimator.
rY   z`copy_X` was deprecated in 1.6 and will be removed in 1.8 since it has no effect internally. Simply leave this parameter to its default value to avoid this warning.T)	y_numericzBreakdown point: {0}zNumber of samples: {0}zTolerable outliers: {0}zNumber of subpopulations: {0}F)sizereplace)ra   r]   c              3   n   >#    U  H*  n[        [        5      " TTTU   TR                  5      v   M,     g 7frf   )r   rT   rL   ).0jobr%   
index_listrg   rJ   s     r-   	<genexpr>(TheilSenRegressor.fit.<locals>.<genexpr>  s5      @
$ FOAq*S/43E3EFF$s   25)r2   r:   r   r   Nr   )$r_   r7   r8   FutureWarningr   r\   r   r    rr   n_subpopulation_rC   
breakdown_r]   printr9   r   r   rm   r	   r`   listr   r6   choicer   ra   array_splitr   vstackr>   r2   r:   n_iter_rL   
intercept_coef_)rg   r%   rJ   r\   rA   rM   rB   tol_outliersrK   _ra   rN   coefsr|   s   ```          @r-   fitTheilSenRegressor.fit  s!     ;;,&MM/ 	 *$*;*;<T1a481 !	.2.C.C/
+d+ +9C<<(//@A*11)<=t:;L+22<@A1889N9NOP 77512d6L6LL<i(8,GHG t4455A ##I%#P5  
 "$++.^^G4
&$,,? @
V}@
 
 ))G$-dmm
e #AhDOqrDJ
  "DODJ/s   I>)r   r   r_   rL   r   r2   r`   r   ra   r   rB   r\   r:   r]   )__name__
__module____qualname____firstlineno____doc__r   r   r   r   r   rb   dict__annotations__rh   rr   r   r   __static_attributes__r@   r/   r-   rV   rV      s    vr $fZ%?@A&tQVDEx(h4?@sD89'(";$D   .#-J 5A 6Ar/   rV   )rc   rd   ),r   r7   	itertoolsr   numbersr   r   numpyr   joblibr   scipyr   scipy.linalg.lapackr   scipy.specialr	   baser   r   
exceptionsr   utilsr   utils._param_validationr   r   r   utils.parallelr   r   utils.validationr   _baser   finfodoubleepsr   r.   r>   rC   rT   rV   r@   r/   r-   <module>r      s{     " "  #  0  / + & B B . , 88BII""0f5"p6)XD Dr/   