
    -iA                     B    S r SSKrSSKJr  SSKJr  S r " S S5      rg)	zA
Loss functions for linear models with raw_prediction = X @ coef
    N)sparse   )squared_normc                     U R                   S   n[        R                  " U 5      (       a6  U R                  [        R                  " US4X"4S9-  U -  R                  5       $ USS2S4   U -  nU R                  U-  $ )z/Compute the sandwich product X.T @ diag(W) @ X.r   shapeN)r   r   issparseT
dia_matrixtoarray)XW	n_samplesWXs       T/var/www/html/venv/lib/python3.13/site-packages/sklearn/linear_model/_linear_loss.pysandwich_dotr      sq     
IqCC&##QF92HIIAM
')	 q$wZ!^ssRx    c                       \ rS rSrSrS rSS jrS rS rS r	    SS	 jr
    SS
 jr    SS jr      SS jr SS jrSrg)LinearModelLoss%   aJ  General class for loss functions with raw_prediction = X @ coef + intercept.

Note that raw_prediction is also known as linear predictor.

The loss is the average of per sample losses and includes a term for L2
regularization::

    loss = 1 / s_sum * sum_i s_i loss(y_i, X_i @ coef + intercept)
           + 1/2 * l2_reg_strength * ||coef||_2^2

with sample weights s_i=1 if sample_weight=None and s_sum=sum_i s_i.

Gradient and hessian, for simplicity without intercept, are::

    gradient = 1 / s_sum * X.T @ loss.gradient + l2_reg_strength * coef
    hessian = 1 / s_sum * X.T @ diag(loss.hessian) @ X
              + l2_reg_strength * identity

Conventions:
    if fit_intercept:
        n_dof =  n_features + 1
    else:
        n_dof = n_features

    if base_loss.is_multiclass:
        coef.shape = (n_classes, n_dof) or ravelled (n_classes * n_dof,)
    else:
        coef.shape = (n_dof,)

    The intercept term is at the end of the coef array:
    if base_loss.is_multiclass:
        if coef.shape (n_classes, n_dof):
            intercept = coef[:, -1]
        if coef.shape (n_classes * n_dof,)
            intercept = coef[n_features::n_dof] = coef[(n_dof-1)::n_dof]
        intercept.shape = (n_classes,)
    else:
        intercept = coef[-1]

    Shape of gradient follows shape of coef.
    gradient.shape = coef.shape

    But hessian (to make our lives simpler) are always 2-d:
    if base_loss.is_multiclass:
        hessian.shape = (n_classes * n_dof, n_classes * n_dof)
    else:
        hessian.shape = (n_dof, n_dof)

Note: If coef has shape (n_classes * n_dof,), the 2d-array can be reconstructed as

    coef.reshape((n_classes, -1), order="F")

The option order="F" makes coef[:, i] contiguous. This, in turn, makes the
coefficients without intercept, coef[:, :-1], contiguous and speeds up
matrix-vector computations.

Note: If the average loss per sample is wanted instead of the sum of the loss per
sample, one can simply use a rescaled sample_weight such that
sum(sample_weight) = 1.

Parameters
----------
base_loss : instance of class BaseLoss from sklearn._loss.
fit_intercept : bool
c                     Xl         X l        g N	base_lossfit_intercept)selfr   r   s      r   __init__LinearModelLoss.__init__h   s    "*r   Nc                    UR                   S   nU R                  R                  nU R                  (       a  US-   nOUnU R                  R                  (       a  [
        R                  " XU4USS9nU$ [
        R                  " XUS9nU$ )a  Allocate coef of correct shape with zeros.

Parameters:
-----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
    Training data.
dtype : data-type, default=None
    Overrides the data type of coef. With dtype=None, coef will have the same
    dtype as X.

Returns
-------
coef : ndarray of shape (n_dof,) or (n_classes, n_dof)
    Coefficients of a linear model.
   F)r   dtypeorder)r   r"   )r   r   	n_classesr   is_multiclassnp
zeros_like)r   r   r"   
n_featuresr$   n_dofcoefs          r   init_zero_coefLinearModelLoss.init_zero_coefl   sz      WWQZ
NN,,	NEE>>''==e*<EQTUD  ==u=Dr   c                 P   U R                   R                  (       d&  U R                  (       a  US   nUSS nX24$ SnUn X24$ UR                  S:X  a'  UR	                  U R                   R
                  S4SS9nOUnU R                  (       a  USS2S4   nUSS2SS24   nX24$ SnX24$ )a(  Helper function to get coefficients and intercept.

Parameters
----------
coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)
    Coefficients of a linear model.
    If shape (n_classes * n_dof,), the classes of one feature are contiguous,
    i.e. one reconstructs the 2d-array via
    coef.reshape((n_classes, -1), order="F").

Returns
-------
weights : ndarray of shape (n_features,) or (n_classes, n_features)
    Coefficients without intercept term.
intercept : float or ndarray of shape (n_classes,)
    Intercept terms.
N        r    r!   r#   )r   r%   r   ndimreshaper$   )r   r*   	interceptweightss       r   weight_intercept LinearModelLoss.weight_intercept   s    $ ~~++!! H	s)  !!  	 !! yyA~,,(@(@"'ES,Q!!#ArEN	!!SbS&/ !!  	!!r   c                     U R                  U5      u  p4U R                  R                  (       d  X#-  U-   nOX#R                  -  U-   nX4U4$ )a  Helper function to get coefficients, intercept and raw_prediction.

Parameters
----------
coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)
    Coefficients of a linear model.
    If shape (n_classes * n_dof,), the classes of one feature are contiguous,
    i.e. one reconstructs the 2d-array via
    coef.reshape((n_classes, -1), order="F").
X : {array-like, sparse matrix} of shape (n_samples, n_features)
    Training data.

Returns
-------
weights : ndarray of shape (n_features,) or (n_classes, n_features)
    Coefficients without intercept term.
intercept : float or ndarray of shape (n_classes,)
    Intercept terms.
raw_prediction : ndarray of shape (n_samples,) or             (n_samples, n_classes)
)r5   r   r%   r
   )r   r*   r   r4   r3   raw_predictions         r   weight_intercept_raw$LinearModelLoss.weight_intercept_raw   sM    , "2248~~++[94N ]Y6N>11r   c                 P    UR                   S:X  a  X-  O
[        U5      nSU-  U-  $ )z5Compute L2 penalty term l2_reg_strength/2 *||w||_2^2.r    g      ?)r1   r   )r   r4   l2_reg_strengthnorm2_ws       r   
l2_penaltyLinearModelLoss.l2_penalty   s,    '.||q'8'#l7>S_$w..r   c                     Uc  U R                  X5      u  pnOU R                  U5      u  pU R                  R                  UUSUS9n
[        R
                  " XS9n
XR                  X5      -   $ )aG  Compute the loss as weighted average over point-wise losses.

Parameters
----------
coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)
    Coefficients of a linear model.
    If shape (n_classes * n_dof,), the classes of one feature are contiguous,
    i.e. one reconstructs the 2d-array via
    coef.reshape((n_classes, -1), order="F").
X : {array-like, sparse matrix} of shape (n_samples, n_features)
    Training data.
y : contiguous array of shape (n_samples,)
    Observed, true target values.
sample_weight : None or contiguous array of shape (n_samples,), default=None
    Sample weights.
l2_reg_strength : float, default=0.0
    L2 regularization strength
n_threads : int, default=1
    Number of OpenMP threads to use.
raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
    Raw prediction values (in link space). If provided, these are used. If
    None, then raw_prediction = X @ coef + intercept is calculated.

Returns
-------
loss : float
    Weighted average of losses per sample, plus penalty.
Ny_truer8   sample_weight	n_threadsr4   )r9   r5   r   lossr&   averager>   )r   r*   r   yrC   r<   rD   r8   r4   r3   rF   s              r   rF   LinearModelLoss.loss   sz    N !151J1J41S.G!%!6!6t!<G~~"")	 # 
 zz$6oog???r   c                    UR                   U R                  R                  su  pn
U	[        U R                  5      -   nUc  U R                  X5      u  pnOU R                  U5      u  pU R                  R                  UUUUS9u  pUc  UO[        R                  " U5      nUR                  5       U-  nXR                  X5      -  nUU-  nU R                  R                  (       d]  [        R                  " XR                  S9nUR                  U-  X\-  -   USU	& U R                  (       a  UR                  5       US'   UU4$ [        R                  " X4UR                  SS9nUR                  U-  X\-  -   USS2SU	24'   U R                  (       a  UR                  SS9USS2S4'   UR                   S	:X  a  UR#                  SS
9nUU4$ )a  Computes the sum of loss and gradient w.r.t. coef.

Parameters
----------
coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)
    Coefficients of a linear model.
    If shape (n_classes * n_dof,), the classes of one feature are contiguous,
    i.e. one reconstructs the 2d-array via
    coef.reshape((n_classes, -1), order="F").
X : {array-like, sparse matrix} of shape (n_samples, n_features)
    Training data.
y : contiguous array of shape (n_samples,)
    Observed, true target values.
sample_weight : None or contiguous array of shape (n_samples,), default=None
    Sample weights.
l2_reg_strength : float, default=0.0
    L2 regularization strength
n_threads : int, default=1
    Number of OpenMP threads to use.
raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
    Raw prediction values (in link space). If provided, these are used. If
    None, then raw_prediction = X @ coef + intercept is calculated.

Returns
-------
loss : float
    Weighted average of losses per sample, plus penalty.

gradient : ndarray of shape coef.shape
     The gradient of the loss.
NrA   r"   r.   r!   r"   r#   r   axisr    r0   )r   r   r$   intr   r9   r5   loss_gradientr&   sumr>   r%   
empty_liker"   r
   emptyr1   ravel)r   r*   r   rH   rC   r<   rD   r8   r   r(   r$   r)   r4   r3   rF   grad_pointwisesw_sumgrads                     r   rP   LinearModelLoss.loss_gradient
  s   T ./WWdnn6N6N*S!3!344!151J1J41S.G!%!6!6t!<G#~~;;)'	  <  
 ,39NxxzF"99& ~~++==]];D !n 47P PD*!!)--/R Tz 88Y.gmm3OD#1#3#3a#7/:S#SDKZK !!,00a08QUyyA~zzz,Tzr   c                 @   UR                   U R                  R                  su  pn
U	[        U R                  5      -   nUc  U R                  X5      u  pnOU R                  U5      u  pU R                  R                  UUUUS9nUc  UO[        R                  " U5      nX-  nU R                  R                  (       d[  [        R                  " XR                  S9nUR                  U-  X\-  -   USU	& U R                  (       a  UR                  5       US'   U$ [        R                  " X4UR                  SS9nUR                  U-  X\-  -   USS2SU	24'   U R                  (       a  UR                  SS9USS2S4'   UR                  S	:X  a  UR!                  SS
9$ U$ )a-  Computes the gradient w.r.t. coef.

Parameters
----------
coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)
    Coefficients of a linear model.
    If shape (n_classes * n_dof,), the classes of one feature are contiguous,
    i.e. one reconstructs the 2d-array via
    coef.reshape((n_classes, -1), order="F").
X : {array-like, sparse matrix} of shape (n_samples, n_features)
    Training data.
y : contiguous array of shape (n_samples,)
    Observed, true target values.
sample_weight : None or contiguous array of shape (n_samples,), default=None
    Sample weights.
l2_reg_strength : float, default=0.0
    L2 regularization strength
n_threads : int, default=1
    Number of OpenMP threads to use.
raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
    Raw prediction values (in link space). If provided, these are used. If
    None, then raw_prediction = X @ coef + intercept is calculated.

Returns
-------
gradient : ndarray of shape coef.shape
     The gradient of the loss.
NrA   rK   r.   r!   rL   r   rM   r    r0   )r   r   r$   rO   r   r9   r5   gradientr&   rQ   r%   rR   r"   r
   rS   r1   rT   )r   r*   r   rH   rC   r<   rD   r8   r   r(   r$   r)   r4   r3   rU   rV   rW   s                    r   rZ   LinearModelLoss.gradientX  s   N ./WWdnn6N6N*S!3!344!151J1J41S.G!%!6!6t!<G00)'	 1 
 ,39N ~~++==]];D !n 47P PD*!!)--/RK88Y.gmm3OD#1#3#3a#7/:S#SDKZK !!,00a08QUyyA~zzz,,r   c
                 f   UR                   U R                  R                  su  pnU[        U R                  5      -   nU	c  U R                  X5      u  pn	OU R                  U5      u  pUc  U
O[        R                  " U5      nUc   [        R                  " XR                  SS9nOUR                   UR                   :w  a&  [        SUR                    SUR                    S35      eU R                  R                  (       a&  UR                  R                  (       d  [        S5      eUnUR                  nUc"  [        R                   " UU4UR                  S9nOUR                   UU4:w  a  [        S	UU4 S
UR                   < S35      eU R                  R                  (       aA  UR                  R"                  (       d&  UR                  R                  (       d  [        S5      eUnU R                  R                  (       GdE  U R                  R%                  UU	UUS9u  nnUU-  nUU-  n[        R&                  " US:*  US9S:  n[        R(                  " U5      nUR*                  U-  X^-  -   USU& U R                  (       a  UR                  5       US'   U(       a  UUU4$ [-        UU5      USU2SU24'   US:  aB  UR                  R"                  (       a  SOSnUR/                  SUS9SX-  US-   2==   U-  ss'   U R                  (       a4  UR*                  U-  nUUSS2S4'   UUSSS24'   UR                  5       US'   GOcU R                  R1                  UU	UUS9u  nnUU-  nUR/                  X4SS9nUR*                  U-  X^-  -   USS2SU24'   U R                  (       a  UR                  SS9USS2S4'   UR2                  S:X  a  UR5                  SS9nUb  UU-  nOSU-  n[7        U5       GHZ  nUSS2U4   SUSS2U4   -
  -  U-  n[-        UU5      UUX-  U2UX-  U24'   U R                  (       aP  UR*                  U-  nUUUX-  U2X-  U-   4'   UUX-  U-   UX-  U24'   UR                  5       UX-  U-   X-  U-   4'   [7        US-   U5       H  nUSS2U4   * USS2U4   -  U-  n[-        UU5      UUX-  U2UX-  U24'   U R                  (       aP  UR*                  U-  nUUUX-  U2X-  U-   4'   UUX-  U-   UX-  U24'   UR                  5       UX-  U-   X-  U-   4'   UUSU2USU24   UUSU2USU24'   M     GM]     US:  aK  UR                  R"                  (       a  SOSnUR/                  SUS9SUS-  U-  U-  X-  S-   2==   U-  ss'   SnUUU4$ )af  Computes gradient and hessian w.r.t. coef.

Parameters
----------
coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)
    Coefficients of a linear model.
    If shape (n_classes * n_dof,), the classes of one feature are contiguous,
    i.e. one reconstructs the 2d-array via
    coef.reshape((n_classes, -1), order="F").
X : {array-like, sparse matrix} of shape (n_samples, n_features)
    Training data.
y : contiguous array of shape (n_samples,)
    Observed, true target values.
sample_weight : None or contiguous array of shape (n_samples,), default=None
    Sample weights.
l2_reg_strength : float, default=0.0
    L2 regularization strength
n_threads : int, default=1
    Number of OpenMP threads to use.
gradient_out : None or ndarray of shape coef.shape
    A location into which the gradient is stored. If None, a new array
    might be created.
hessian_out : None or ndarray of shape (n_dof, n_dof) or             (n_classes * n_dof, n_classes * n_dof)
    A location into which the hessian is stored. If None, a new array
    might be created.
raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
    Raw prediction values (in link space). If provided, these are used. If
    None, then raw_prediction = X @ coef + intercept is calculated.

Returns
-------
gradient : ndarray of shape coef.shape
     The gradient of the loss.

hessian : ndarray of shape (n_dof, n_dof) or             (n_classes, n_dof, n_dof, n_classes)
    Hessian matrix.

hessian_warning : bool
    True if pointwise hessian has more than 25% of its elements non-positive.
Nr!   rL   z4gradient_out is required to have shape coef.shape = z; got .z"gradient_out must be F-contiguous.rK   z'hessian_out is required to have shape (z); got hessian_out.shape=zhessian_out must be contiguous.rA   r   rE   g      ?r.   Cr0   r    )r.   r.   rM   g      ?r   F)r   r   r$   rO   r   r9   r5   r&   rQ   rR   r"   
ValueErrorr%   flagsf_contiguoussizerS   c_contiguousgradient_hessianrG   absr
   r   r2   gradient_probar1   rT   range)r   r*   r   rH   rC   r<   rD   gradient_outhessian_outr8   r   r(   r$   r)   r4   r3   rV   rW   nhessrU   hess_pointwisehessian_warningr#   Xhprobaswkhls                                 r   rd    LinearModelLoss.gradient_hessian  s   n ./WWdnn6N6N*S!3!344!151J1J41S.G!%!6!6t!<G+39N ==]]#FD4::-Ftzzl S#))*!-  ^^)),2D2D2Q2QABBDII88QF'--8D1a&(9!Q$ @&$$&a)  ^^))!!..{7H7H7U7U>??D~~+++-1^^-L-L-+#	 .M .*NN f$Nf$N 

>Q.FM   VVN3N !n 47P PD*!!)--/RT?22-9!^-LD*kzk)*"  $zz66CRu-.R1CPQ	.RS#S !! SS>) "SbS"W "R"W-113V %)NN$A$A-+#	 %B %!NE f$N<< 2#<>D#1#3#3a#7/:S#SDKZK !!,00a08QUyyA~zzz,L ("V+6\9% !Q$K1uQT{?3b8 !A& 	.:	.:< %%qB  I2Y>!.24  !.2I2Y>@
  /!3Y5Ka5OOP q1ui0Aq!tuQT{2R7A %Q* I2Y>I2Y>@ ))SS1W  	 6B%2Q68  %2Q6	 6BD
 EEG Y3a79ORS9SST 8<ALyL!,Y,<V7WDIq|)|34+ 1/ &\ "#zz66CRu-Sy!|j058Y=NQR=RS$% 
 $OT?**r   c                 R  ^ ^^^^^^^^^^^^^ TR                   T R                  R                  su  nmmT[        T R                  5      -   mT R                  TT5      u  mpTc  UO[        R                  " T5      mT R                  R                  (       GdQ  T R                  R                  UU	TUS9u  pU
T-  n
UT-  n[        R                  " TTR                  S9nTR                  U
-  TT-  -   UST& T R                  (       a  U
R                  5       US'   UR                  5       m[        R                  " T5      (       a  [        R                  " US4Xw4S9T-  mOUSS2[        R                   4   T-  mT R                  (       aM  [        R"                  " [        R$                  " TR                  SS95      5      m[        R&                  " T5      mUUUUUUU 4S jnX4$ T R                  R)                  UU	TUS9u  n
mU
T-  n
[        R*                  " TT4TR                  S	S
9nU
R                  T-  TT-  -   USS2ST24'   T R                  (       a  U
R                  SS9USS2S4'   UUUUUUUUU UU4S jnTR,                  S:X  a  UR/                  S	S9U4$ X4$ )a  Computes gradient and hessp (hessian product function) w.r.t. coef.

Parameters
----------
coef : ndarray of shape (n_dof,), (n_classes, n_dof) or (n_classes * n_dof,)
    Coefficients of a linear model.
    If shape (n_classes * n_dof,), the classes of one feature are contiguous,
    i.e. one reconstructs the 2d-array via
    coef.reshape((n_classes, -1), order="F").
X : {array-like, sparse matrix} of shape (n_samples, n_features)
    Training data.
y : contiguous array of shape (n_samples,)
    Observed, true target values.
sample_weight : None or contiguous array of shape (n_samples,), default=None
    Sample weights.
l2_reg_strength : float, default=0.0
    L2 regularization strength
n_threads : int, default=1
    Number of OpenMP threads to use.

Returns
-------
gradient : ndarray of shape coef.shape
     The gradient of the loss.

hessp : callable
    Function that takes in a vector input of shape of gradient and
    and returns matrix-vector product with hessian.
NrA   rK   r.   r   r   rM   c                   > [         R                  " U 5      n[        R                  " T5      (       a  TR                  TU S T -  -  US T& O2[         R
                  R                  TR                  TU S T /5      US T& US T=== TU S T -  -  sss& TR                  (       a(  US T=== U S   T-  -  sss& TU S T -  TU S   -  -   US'   U$ )Nr.   )r&   rR   r   r	   r
   linalg	multi_dotr   )	sretr   hXhX_sumhessian_sumr<   r(   r   s	     r   hessp7LinearModelLoss.gradient_hessian_product.<locals>.hessp  s    mmA&??1%%'(ssb1[j>.A'BC$')yy':':ACCQ{
^;T'UC$KZ Oan$DD %%$"6$$q*~5ae8KKCG
r   r!   rL   c                 R  > U R                  TS4SS9n TR                  (       a  U S S 2S4   nU S S 2S S24   n OSnTU R                  -  U-   nUT
* U-  R                  SS9S S 2[        R
                  4   -  nUT
-  nTb  UTS S 2[        R
                  4   -  n[        R                  " TT4TR                  SS9nUR                  T-  T-  TU -  -   US S 2S T	24'   TR                  (       a  UR                  SS9T-  US S 2S4'   TR                  S:X  a  UR                  SS9$ U$ )Nr.   r!   r0   r   r    rM   rL   )
r2   r   r
   rQ   r&   newaxisrS   r"   r1   rT   )ry   s_intercepttmp	hess_prodr   r*   r<   r$   r)   r(   ro   rC   r   rV   r4   s       r   r~   r     s8   IIy"oSI9%%"#ArE(K!SbS&	A"#K!##g+))q)1!RZZ-@@u ,=BJJ77C HHi%7w}}TWX	-0UUQY&,@?UVCV,V	![j[.)%%'*wwAw'?Iae$99>$???55$$r   r    r0   )r   r   r$   rO   r   r9   r&   rQ   r%   rd   rR   r"   r
   r   r	   r   r   squeezeasarray
atleast_1drf   rS   r1   rT   )r   r*   r   rH   rC   r<   rD   r   r3   r8   rU   rl   rW   r~   r{   r|   r}   r$   r)   r(   ro   rV   r4   s   ``` ``        @@@@@@@@@r   gradient_hessian_product(LinearModelLoss.gradient_hessian_product  s   @ ./WWdnn6N6N*JS!3!344-1-F-FtQ-O*+39N~~+++-1^^-L-L-+#	 .M .*N f$Nf$N==W]];D !n 47P PD*!!)--/R ),,.Kq!!%%~q&9)AWX 
 $ArzzM2Q6!! BJJrvv1v~$>?v. \ {w %)NN$A$A-+#	 %B %!NE f$N88Y.gmm3OD#1#3#3a#7/G:S#SDKZK !!,00a08QU.% %. yyA~zzz,e33{r   r   r   )Nr/   r    N)Nr/   r    NNN)Nr/   r    )__name__
__module____qualname____firstlineno____doc__r   r+   r5   r9   r>   rF   rP   rZ   rd   r   __static_attributes__ r   r   r   r   %   s    @D+8%"N2@/ 4@v Lf G\ +D NOWr   r   )	r   numpyr&   scipyr   utils.extmathr   r   r   r   r   r   <module>r      s&      (.T Tr   