
    -ie                        S r SSKrSSKrSSKJr  SSKJr  SSKJ	r	  SSK
JrJrJrJrJrJrJrJrJrJrJr  SS	KJrJrJrJrJrJr   " S
 S5      r " S S\5      r " S S\5      r " S S\5      r  " S S\5      r! " S S\5      r" " S S\5      r# " S S\5      r$ " S S\5      r% " S S\5      r& " S S\5      r' " S  S!\5      r(\\\ \!\"\#\$\&\'\(S".
r)g)#z
This module contains loss classes suitable for fitting.

It is not part of the public API.
Specific losses are used for regression, binary classification or multiclass
classification.
    Nxlogy   )check_scalar)_weighted_percentile   )CyAbsoluteErrorCyExponentialLossCyHalfBinomialLossCyHalfGammaLossCyHalfMultinomialLossCyHalfPoissonLossCyHalfSquaredErrorCyHalfTweedieLossCyHalfTweedieLossIdentityCyHuberLossCyPinballLoss)HalfLogitLinkIdentityLinkInterval	LogitLinkLogLinkMultinomialLogitc                       \ rS rSrSrSrSrSrSS jrS r	S r
   SS	 jr    SS
 jr   SS jr    SS jrSS jrSS jrSS jr\R&                  S4S jrSrg)BaseLossF   a|  Base class for a loss function of 1-dimensional targets.

Conventions:

    - y_true.shape = sample_weight.shape = (n_samples,)
    - y_pred.shape = raw_prediction.shape = (n_samples,)
    - If is_multiclass is true (multiclass classification), then
      y_pred.shape = raw_prediction.shape = (n_samples, n_classes)
      Note that this corresponds to the return value of decision_function.

y_true, y_pred, sample_weight and raw_prediction must either be all float64
or all float32.
gradient and hessian must be either both float64 or both float32.

Note that y_pred = link.inverse(raw_prediction).

Specific loss classes can inherit specific link classes to satisfy
BaseLink's abstractmethods.

Parameters
----------
sample_weight : {None, ndarray}
    If sample_weight is None, the hessian might be constant.
n_classes : {None, int}
    The number of classes for classification, else None.

Attributes
----------
closs: CyLossFunction
link : BaseLink
interval_y_true : Interval
    Valid interval for y_true
interval_y_pred : Interval
    Valid Interval for y_pred
differentiable : bool
    Indicates whether or not loss function is differentiable in
    raw_prediction everywhere.
need_update_leaves_values : bool
    Indicates whether decision trees in gradient boosting need to uptade
    leave values after having been fit to the (negative) gradients.
approx_hessian : bool
    Indicates whether the hessian is approximated or exact. If,
    approximated, it should be larger or equal to the exact one.
constant_hessian : bool
    Indicates whether the hessian is one for this loss.
is_multiclass : bool
    Indicates whether n_classes > 2 is allowed.
TFNc                     Xl         X l        SU l        SU l        X0l        [        [        R                  * [        R                  SS5      U l        U R                  R                  U l	        g )NF)
closslinkapprox_hessianconstant_hessian	n_classesr   npinfinterval_y_trueinterval_y_pred)selfr   r   r"   s       E/var/www/html/venv/lib/python3.13/site-packages/sklearn/_loss/loss.py__init__BaseLoss.__init__   sP    
	# %"'F#yy88    c                 8    U R                   R                  U5      $ zUReturn True if y is in the valid range of y_true.

Parameters
----------
y : ndarray
)r%   includesr'   ys     r(   in_y_true_rangeBaseLoss.in_y_true_range        ##,,Q//r+   c                 8    U R                   R                  U5      $ )zUReturn True if y is in the valid range of y_pred.

Parameters
----------
y : ndarray
)r&   r.   r/   s     r(   in_y_pred_rangeBaseLoss.in_y_pred_range   r3   r+   c                     Uc  [         R                  " U5      nUR                  S:X  a$  UR                  S   S:X  a  UR	                  S5      nU R
                  R                  UUUUUS9  U$ )a  Compute the pointwise loss value for each input.

Parameters
----------
y_true : C-contiguous array of shape (n_samples,)
    Observed, true target values.
raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
    Raw prediction values (in link space).
sample_weight : None or C-contiguous array of shape (n_samples,)
    Sample weights.
loss_out : None or C-contiguous array of shape (n_samples,)
    A location into which the result is stored. If None, a new array
    might be created.
n_threads : int, default=1
    Might use openmp thread parallelism.

Returns
-------
loss : array of shape (n_samples,)
    Element-wise loss function.
r   r   y_trueraw_predictionsample_weightloss_out	n_threads)r#   
empty_likendimshapesqueezer   loss)r'   r9   r:   r;   r<   r=   s         r(   rB   BaseLoss.loss   ss    < }}V,H!#(<(<Q(?1(D+33A6N

)' 	 	
 r+   c           	         UcO  Uc-  [         R                  " U5      n[         R                  " U5      nO@[         R                  " XR                  S9nO!Uc  [         R                  " X$R                  S9nUR                  S:X  a$  UR                  S   S:X  a  UR                  S5      nUR                  S:X  a$  UR                  S   S:X  a  UR                  S5      nU R                  R                  UUUUUUS9  XE4$ )a  Compute loss and gradient w.r.t. raw_prediction for each input.

Parameters
----------
y_true : C-contiguous array of shape (n_samples,)
    Observed, true target values.
raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
    Raw prediction values (in link space).
sample_weight : None or C-contiguous array of shape (n_samples,)
    Sample weights.
loss_out : None or C-contiguous array of shape (n_samples,)
    A location into which the loss is stored. If None, a new array
    might be created.
gradient_out : None or C-contiguous array of shape (n_samples,) or array             of shape (n_samples, n_classes)
    A location into which the gradient is stored. If None, a new array
    might be created.
n_threads : int, default=1
    Might use openmp thread parallelism.

Returns
-------
loss : array of shape (n_samples,)
    Element-wise loss function.

gradient : array of shape (n_samples,) or (n_samples, n_classes)
    Element-wise gradients.
dtyper   r   )r9   r:   r;   r<   gradient_outr=   )r#   r>   rF   r?   r@   rA   r   loss_gradient)r'   r9   r:   r;   r<   rG   r=   s          r(   rH   BaseLoss.loss_gradient   s    L #==0!}}^<==7I7IJ!==~~NL !#(<(<Q(?1(D+33A6N!l&8&8&;q&@'//2L

  )'% 	! 	
 %%r+   c                 B   Uc  [         R                  " U5      nUR                  S:X  a$  UR                  S   S:X  a  UR	                  S5      nUR                  S:X  a$  UR                  S   S:X  a  UR	                  S5      nU R
                  R                  UUUUUS9  U$ )a  Compute gradient of loss w.r.t raw_prediction for each input.

Parameters
----------
y_true : C-contiguous array of shape (n_samples,)
    Observed, true target values.
raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
    Raw prediction values (in link space).
sample_weight : None or C-contiguous array of shape (n_samples,)
    Sample weights.
gradient_out : None or C-contiguous array of shape (n_samples,) or array             of shape (n_samples, n_classes)
    A location into which the result is stored. If None, a new array
    might be created.
n_threads : int, default=1
    Might use openmp thread parallelism.

Returns
-------
gradient : array of shape (n_samples,) or (n_samples, n_classes)
    Element-wise gradients.
r   r   )r9   r:   r;   rG   r=   )r#   r>   r?   r@   rA   r   gradient)r'   r9   r:   r;   rG   r=   s         r(   rK   BaseLoss.gradient  s    > ==8L !#(<(<Q(?1(D+33A6N!l&8&8&;q&@'//2L

)'% 	 	
 r+   c           	      B   UcG  Uc-  [         R                  " U5      n[         R                  " U5      nO0[         R                  " U5      nOUc  [         R                  " U5      nUR                  S:X  a$  UR                  S   S:X  a  UR	                  S5      nUR                  S:X  a$  UR                  S   S:X  a  UR	                  S5      nUR                  S:X  a$  UR                  S   S:X  a  UR	                  S5      nU R
                  R                  UUUUUUS9  XE4$ )aG  Compute gradient and hessian of loss w.r.t raw_prediction.

Parameters
----------
y_true : C-contiguous array of shape (n_samples,)
    Observed, true target values.
raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
    Raw prediction values (in link space).
sample_weight : None or C-contiguous array of shape (n_samples,)
    Sample weights.
gradient_out : None or C-contiguous array of shape (n_samples,) or array             of shape (n_samples, n_classes)
    A location into which the gradient is stored. If None, a new array
    might be created.
hessian_out : None or C-contiguous array of shape (n_samples,) or array             of shape (n_samples, n_classes)
    A location into which the hessian is stored. If None, a new array
    might be created.
n_threads : int, default=1
    Might use openmp thread parallelism.

Returns
-------
gradient : arrays of shape (n_samples,) or (n_samples, n_classes)
    Element-wise gradients.

hessian : arrays of shape (n_samples,) or (n_samples, n_classes)
    Element-wise hessians.
r   r   )r9   r:   r;   rG   hessian_outr=   )r#   r>   r?   r@   rA   r   gradient_hessian)r'   r9   r:   r;   rG   rN   r=   s          r(   rO   BaseLoss.gradient_hessian=  s   N "!}}^< mmN;!}}[9 --5K !#(<(<Q(?1(D+33A6N!l&8&8&;q&@'//2Lq [%6%6q%9Q%>%--a0K

##)'%# 	$ 	
 ((r+   c           
      N    [         R                  " U R                  UUSSUS9US9$ )a  Compute the weighted average loss.

Parameters
----------
y_true : C-contiguous array of shape (n_samples,)
    Observed, true target values.
raw_prediction : C-contiguous array of shape (n_samples,) or array of             shape (n_samples, n_classes)
    Raw prediction values (in link space).
sample_weight : None or C-contiguous array of shape (n_samples,)
    Sample weights.
n_threads : int, default=1
    Might use openmp thread parallelism.

Returns
-------
loss : float
    Mean or averaged loss function.
Nr8   weights)r#   averagerB   )r'   r9   r:   r;   r=   s        r(   __call__BaseLoss.__call__  s:    ( zzII-"#   "	
 		
r+   c                    [         R                  " XSS9nS[         R                  " UR                  5      R                  -  nU R
                  R                  [         R                  * :X  a  SnOKU R
                  R                  (       a  U R
                  R                  nOU R
                  R                  U-   nU R
                  R                  [         R                  :X  a  SnOKU R
                  R                  (       a  U R
                  R                  nOU R
                  R                  U-
  nUc  Uc  U R                  R                  U5      $ U R                  R                  [         R                  " X5U5      5      $ )a  Compute raw_prediction of an intercept-only model.

This can be used as initial estimates of predictions, i.e. before the
first iteration in fit.

Parameters
----------
y_true : array-like of shape (n_samples,)
    Observed, true target values.
sample_weight : None or array of shape (n_samples,)
    Sample weights.

Returns
-------
raw_prediction : numpy scalar or array of shape (n_classes,)
    Raw predictions of an intercept-only model.
r   rS   axis
   N)r#   rT   finforF   epsr&   lowr$   low_inclusivehighhigh_inclusiver   clip)r'   r9   r;   y_predr\   a_mina_maxs          r(   fit_intercept_onlyBaseLoss.fit_intercept_only  s   ( FB288FLL)---##w.E!!//((,,E((,,s2E$$.E!!00((--E((--3E=U]99>>&))99>>"''&"?@@r+   c                 .    [         R                  " U5      $ )z`Calculate term dropped in loss.

With this term added, the loss of perfect predictions is zero.
)r#   
zeros_liker'   r9   r;   s      r(   constant_to_optimal_zero!BaseLoss.constant_to_optimal_zero  s    
 }}V$$r+   Fc                 X   U[         R                  [         R                  4;  a  [        SU S35      eU R                  (       a  XR
                  4nOU4n[         R                  " XBUS9nU R                  (       a  [         R                  " SUS9nXV4$ [         R                  " XBUS9nXV4$ )a  Initialize arrays for gradients and hessians.

Unless hessians are constant, arrays are initialized with undefined values.

Parameters
----------
n_samples : int
    The number of samples, usually passed to `fit()`.
dtype : {np.float64, np.float32}, default=np.float64
    The dtype of the arrays gradient and hessian.
order : {'C', 'F'}, default='F'
    Order of the arrays gradient and hessian. The default 'F' makes the arrays
    contiguous along samples.

Returns
-------
gradient : C-contiguous array of shape (n_samples,) or array of shape             (n_samples, n_classes)
    Empty array (allocated but not initialized) to be used as argument
    gradient_out.
hessian : C-contiguous array of shape (n_samples,), array of shape
    (n_samples, n_classes) or shape (1,)
    Empty (allocated but not initialized) array to be used as argument
    hessian_out.
    If constant_hessian is True (e.g. `HalfSquaredError`), the array is
    initialized to ``1``.
zCValid options for 'dtype' are np.float32 and np.float64. Got dtype=z	 instead.)r@   rF   order)r   )r@   rF   )	r#   float32float64
ValueErroris_multiclassr"   emptyr!   ones)r'   	n_samplesrF   rn   r@   rK   hessians          r(   init_gradient_and_hessian"BaseLoss.init_gradient_and_hessian  s    8 RZZ00"G9. 
 /ELE88%EB  
 ggD6G    hhUuEG  r+   )r    r   r!   r&   r%   r   r"   N)NNr   NNNr   Nr   )__name__
__module____qualname____firstlineno____doc__differentiableneed_update_leaves_valuesrr   r)   r1   r5   rB   rH   rK   rO   rU   re   rj   r#   rp   rw   __static_attributes__ r+   r(   r   r   F   s    /t N %M900 +b =&F /j @)D
>(AT% :<3 1!r+   r   c                   0   ^  \ rS rSrSrSU 4S jjrSrU =r$ )HalfSquaredErrori  a  Half squared error with identity link, for regression.

Domain:
y_true and y_pred all real numbers

Link:
y_pred = raw_prediction

For a given sample x_i, half squared error is defined as::

    loss(x_i) = 0.5 * (y_true_i - raw_prediction_i)**2

The factor of 0.5 simplifies the computation of gradients and results in a
unit hessian (and is consistent with what is done in LightGBM). It is also
half the Normal distribution deviance.
c                 T   > [         TU ]  [        5       [        5       S9  US L U l        g )Nr   r   )superr)   r   r   r!   r'   r;   	__class__s     r(   r)   HalfSquaredError.__init__  s(    13,.I - 5r+   )r!   ry   r|   r}   r~   r   r   r)   r   __classcell__r   s   @r(   r   r     s    "6 6r+   r   c                   B   ^  \ rS rSrSrSrSrSU 4S jjrSS jrSr	U =r
$ )	AbsoluteErrori  a  Absolute error with identity link, for regression.

Domain:
y_true and y_pred all real numbers

Link:
y_pred = raw_prediction

For a given sample x_i, the absolute error is defined as::

    loss(x_i) = |y_true_i - raw_prediction_i|

Note that the exact hessian = 0 almost everywhere (except at one point, therefore
differentiable = False). Optimization routines like in HGBT, however, need a
hessian > 0. Therefore, we assign 1.
FTc                 b   > [         TU ]  [        5       [        5       S9  SU l        US L U l        g )Nr   T)r   r)   r	   r   r    r!   r   s     r(   r)   AbsoluteError.__init__3  s/    0|~F" - 5r+   c                 J    Uc  [         R                  " USS9$ [        XS5      $ )}Compute raw_prediction of an intercept-only model.

This is the weighted median of the target, i.e. over the samples
axis=0.
r   rY   2   )r#   medianr   ri   s      r(   re    AbsoluteError.fit_intercept_only8  s(      99V!,,'rBBr+   r    r!   ry   r|   r}   r~   r   r   r   r   r)   re   r   r   r   s   @r(   r   r     s&    " N $6
	C 	Cr+   r   c                   B   ^  \ rS rSrSrSrSrSU 4S jjrS	S jrSr	U =r
$ )
PinballLossiD  a3  Quantile loss aka pinball loss, for regression.

Domain:
y_true and y_pred all real numbers
quantile in (0, 1)

Link:
y_pred = raw_prediction

For a given sample x_i, the pinball loss is defined as::

    loss(x_i) = rho_{quantile}(y_true_i - raw_prediction_i)

    rho_{quantile}(u) = u * (quantile - 1_{u<0})
                      = -u *(1 - quantile)  if u < 0
                         u * quantile       if u >= 0

Note: 2 * PinballLoss(quantile=0.5) equals AbsoluteError().

Note that the exact hessian = 0 almost everywhere (except at one point, therefore
differentiable = False). Optimization routines like in HGBT, however, need a
hessian > 0. Therefore, we assign 1.

Additional Attributes
---------------------
quantile : float
    The quantile level of the quantile to be estimated. Must be in range (0, 1).
FTc           	         > [        US[        R                  SSSS9  [        TU ]  [        [        U5      S9[        5       S9  SU l        US L U l	        g )	Nquantiler   r   neithertarget_typemin_valmax_valinclude_boundaries)r   r   T)
r   numbersRealr   r)   r   floatr   r    r!   )r'   r;   r   r   s      r(   r)   PinballLoss.__init__e  s]    (	
 	x9 	 	
 # - 5r+   c                     Uc-  [         R                  " USU R                  R                  -  SS9$ [	        XSU R                  R                  -  5      $ )r   d   r   r   )r#   
percentiler   r   r   ri   s      r(   re   PinballLoss.fit_intercept_onlyu  sM      ==tzz/B/B)BKK'sTZZ-@-@'@ r+   r   )N      ?ry   r   r   s   @r(   r   r   D  s$    : N $6  r+   r   c                   B   ^  \ rS rSrSrSrSrSU 4S jjrS	S jrSr	U =r
$ )
	HuberLossi  a  Huber loss, for regression.

Domain:
y_true and y_pred all real numbers
quantile in (0, 1)

Link:
y_pred = raw_prediction

For a given sample x_i, the Huber loss is defined as::

    loss(x_i) = 1/2 * abserr**2            if abserr <= delta
                delta * (abserr - delta/2) if abserr > delta

    abserr = |y_true_i - raw_prediction_i|
    delta = quantile(abserr, self.quantile)

Note: HuberLoss(quantile=1) equals HalfSquaredError and HuberLoss(quantile=0)
equals delta * (AbsoluteError() - delta/2).

Additional Attributes
---------------------
quantile : float
    The quantile level which defines the breaking point `delta` to distinguish
    between absolute error and squared error. Must be in range (0, 1).

 Reference
---------
.. [1] Friedman, J.H. (2001). :doi:`Greedy function approximation: A gradient
  boosting machine <10.1214/aos/1013203451>`.
  Annals of Statistics, 29, 1189-1232.
FTc           	         > [        US[        R                  SSSS9  X l        [        TU ]  [        [        U5      S9[        5       S9  SU l	        S	U l
        g )
Nr   r   r   r   r   )deltar   TF)r   r   r   r   r   r)   r   r   r   r    r!   )r'   r;   r   r   r   s       r(   r)   HuberLoss.__init__  s]    (	
 !E%L1 	 	
 # %r+   c                 0   Uc  [         R                  " USSS9nO[        XS5      nX-
  n[         R                  " U5      [         R                  " U R
                  R                  [         R                  " U5      5      -  nU[         R                  " XRS9-   $ )r   r   r   r   rR   )	r#   r   r   signminimumr   r   absrT   )r'   r9   r;   r   diffterms         r(   re   HuberLoss.fit_intercept_only  sr      ]]62A6F)&DFwwt}rzz$***:*:BFF4LII

4???r+   )r    r!   r   )Ng?r   ry   r   r   s   @r(   r   r     s'    B N $&"@ @r+   r   c                   :   ^  \ rS rSrSrSU 4S jjrSS jrSrU =r$ )HalfPoissonLossi  aO  Half Poisson deviance loss with log-link, for regression.

Domain:
y_true in non-negative real numbers
y_pred in positive real numbers

Link:
y_pred = exp(raw_prediction)

For a given sample x_i, half the Poisson deviance is defined as::

    loss(x_i) = y_true_i * log(y_true_i/exp(raw_prediction_i))
                - y_true_i + exp(raw_prediction_i)

Half the Poisson deviance is actually the negative log-likelihood up to
constant terms (not involving raw_prediction) and simplifies the
computation of the gradients.
We also skip the constant term `y_true_i * log(y_true_i) - y_true_i`.
c                    > [         TU ]  [        5       [        5       S9  [	        S[
        R                  SS5      U l        g )Nr   r   TF)r   r)   r   r   r   r#   r$   r%   r   s     r(   r)   HalfPoissonLoss.__init__  s2    02C'2664?r+   c                 0    [        X5      U-
  nUb  X2-  nU$ ry   r   r'   r9   r;   r   s       r(   rj   (HalfPoissonLoss.constant_to_optimal_zero  s$    V$v-$!Dr+   r%   ry   	r|   r}   r~   r   r   r)   rj   r   r   r   s   @r(   r   r     s    (@ r+   r   c                   :   ^  \ rS rSrSrSU 4S jjrSS jrSrU =r$ )HalfGammaLossi  a&  Half Gamma deviance loss with log-link, for regression.

Domain:
y_true and y_pred in positive real numbers

Link:
y_pred = exp(raw_prediction)

For a given sample x_i, half Gamma deviance loss is defined as::

    loss(x_i) = log(exp(raw_prediction_i)/y_true_i)
                + y_true/exp(raw_prediction_i) - 1

Half the Gamma deviance is actually proportional to the negative log-
likelihood up to constant terms (not involving raw_prediction) and
simplifies the computation of the gradients.
We also skip the constant term `-log(y_true_i) - 1`.
c                    > [         TU ]  [        5       [        5       S9  [	        S[
        R                  SS5      U l        g )Nr   r   F)r   r)   r   r   r   r#   r$   r%   r   s     r(   r)   HalfGammaLoss.__init__   s1    0wyA'2665%@r+   c                 H    [         R                  " U5      * S-
  nUb  X2-  nU$ r{   )r#   logr   s       r(   rj   &HalfGammaLoss.constant_to_optimal_zero  s)    v"$!Dr+   r   ry   r   r   s   @r(   r   r     s    &A r+   r   c                   :   ^  \ rS rSrSrSU 4S jjrSS jrSrU =r$ )HalfTweedieLossi  a  Half Tweedie deviance loss with log-link, for regression.

Domain:
y_true in real numbers for power <= 0
y_true in non-negative real numbers for 0 < power < 2
y_true in positive real numbers for 2 <= power
y_pred in positive real numbers
power in real numbers

Link:
y_pred = exp(raw_prediction)

For a given sample x_i, half Tweedie deviance loss with p=power is defined
as::

    loss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)
                - y_true_i * exp(raw_prediction_i)**(1-p) / (1-p)
                + exp(raw_prediction_i)**(2-p) / (2-p)

Taking the limits for p=0, 1, 2 gives HalfSquaredError with a log link,
HalfPoissonLoss and HalfGammaLoss.

We also skip constant terms, but those are different for p=0, 1, 2.
Therefore, the loss is not continuous in `power`.

Note furthermore that although no Tweedie distribution exists for
0 < power < 1, it still gives a strictly consistent scoring function for
the expectation.
c                   > [         TU ]  [        [        U5      S9[	        5       S9  U R
                  R                  S::  a1  [        [        R                  * [        R                  SS5      U l
        g U R
                  R                  S:  a"  [        S[        R                  SS5      U l
        g [        S[        R                  SS5      U l
        g N)powerr   r   Fr   T)r   r)   r   r   r   r   r   r   r#   r$   r%   r'   r;   r   r   s      r(   r)   HalfTweedieLoss.__init__*  s    #%,7 	 	
 ::q #+RVVGRVVUE#JD ZZ!#+ArvvtU#CD #+Arvvue#DD r+   c                    U R                   R                  S:X  a  [        5       R                  XS9$ U R                   R                  S:X  a  [	        5       R                  XS9$ U R                   R                  S:X  a  [        5       R                  XS9$ U R                   R                  n[        R                  " [        R                  " US5      SU-
  5      SU-
  -  SU-
  -  nUb  XB-  nU$ )Nr   )r9   r;   r   r   )r   r   r   rj   r   r   r#   maximum)r'   r9   r;   pr   s        r(   rj   (HalfTweedieLoss.constant_to_optimal_zero6  s    ::q #%>> ?   ZZ""$== >   ZZ" ?;; <   

  A88BJJvq11q59QUCq1uMD(%Kr+   r   Ng      ?ry   r   r   s   @r(   r   r     s    <
E r+   r   c                   0   ^  \ rS rSrSrSU 4S jjrSrU =r$ )HalfTweedieLossIdentityiK  a"  Half Tweedie deviance loss with identity link, for regression.

Domain:
y_true in real numbers for power <= 0
y_true in non-negative real numbers for 0 < power < 2
y_true in positive real numbers for 2 <= power
y_pred in positive real numbers for power != 0
y_pred in real numbers for power = 0
power in real numbers

Link:
y_pred = raw_prediction

For a given sample x_i, half Tweedie deviance loss with p=power is defined
as::

    loss(x_i) = max(y_true_i, 0)**(2-p) / (1-p) / (2-p)
                - y_true_i * raw_prediction_i**(1-p) / (1-p)
                + raw_prediction_i**(2-p) / (2-p)

Note that the minimum value of this loss is 0.

Note furthermore that although no Tweedie distribution exists for
0 < power < 1, it still gives a strictly consistent scoring function for
the expectation.
c                 z  > [         TU ]  [        [        U5      S9[	        5       S9  U R
                  R                  S::  a1  [        [        R                  * [        R                  SS5      U l
        O]U R
                  R                  S:  a"  [        S[        R                  SS5      U l
        O![        S[        R                  SS5      U l
        U R
                  R                  S:X  a1  [        [        R                  * [        R                  SS5      U l        g [        S[        R                  SS5      U l        g r   )r   r)   r   r   r   r   r   r   r#   r$   r%   r&   r   s      r(   r)    HalfTweedieLossIdentity.__init__g  s    +%,? 	 	
 ::q #+RVVGRVVUE#JD ZZ!#+ArvvtU#CD #+Arvvue#DD ::q #+RVVGRVVUE#JD #+Arvvue#DD r+   r&   r%   r   r   r   s   @r(   r   r   K  s    6E Er+   r   c                   @   ^  \ rS rSrSrSU 4S jjrSS jrS rSrU =r	$ )HalfBinomialLossiy  a	  Half Binomial deviance loss with logit link, for binary classification.

This is also know as binary cross entropy, log-loss and logistic loss.

Domain:
y_true in [0, 1], i.e. regression on the unit interval
y_pred in (0, 1), i.e. boundaries excluded

Link:
y_pred = expit(raw_prediction)

For a given sample x_i, half Binomial deviance is defined as the negative
log-likelihood of the Binomial/Bernoulli distribution and can be expressed
as::

    loss(x_i) = log(1 + exp(raw_pred_i)) - y_true_i * raw_pred_i

See The Elements of Statistical Learning, by Hastie, Tibshirani, Friedman,
section 4.4.1 (about logistic regression).

Note that the formulation works for classification, y = {0, 1}, as well as
logistic regression, y = [0, 1].
If you add `constant_to_optimal_zero` to the loss, you get half the
Bernoulli/binomial deviance.

More details: Inserting the predicted probability y_pred = expit(raw_prediction)
in the loss gives the well known::

    loss(x_i) = - y_true_i * log(y_pred_i) - (1 - y_true_i) * log(1 - y_pred_i)
c                 j   > [         TU ]  [        5       [        5       SS9  [	        SSSS5      U l        g Nr   r   r   r"   r   r   T)r   r)   r   r   r   r%   r   s     r(   r)   HalfBinomialLoss.__init__  s8    $& 	 	

  (1dD9r+   c                 P    [        X5      [        SU-
  SU-
  5      -   nUb  X2-  nU$ r{   r   r   s       r(   rj   )HalfBinomialLoss.constant_to_optimal_zero  s3    V$uQZV'DD$!Dr+   c                 4   UR                   S:X  a$  UR                  S   S:X  a  UR                  S5      n[        R                  " UR                  S   S4UR
                  S9nU R                  R                  U5      USS2S4'   SUSS2S4   -
  USS2S4'   U$ zPredict probabilities.

Parameters
----------
raw_prediction : array of shape (n_samples,) or (n_samples, 1)
    Raw prediction values (in link space).

Returns
-------
proba : array of shape (n_samples, 2)
    Element-wise class probabilities.
r   r   r   rE   Nr?   r@   rA   r#   rs   rF   r   inverser'   r:   probas      r(   predict_probaHalfBinomialLoss.predict_proba       !#(<(<Q(?1(D+33A6N...q115^=Q=QRii''7ad%1+oadr+   r   ry   
r|   r}   r~   r   r   r)   rj   r   r   r   r   s   @r(   r   r   y  s    >: r+   r   c                   \   ^  \ rS rSrSrSrS
U 4S jjrS rSS jrS r	    SS jr
S	rU =r$ )HalfMultinomialLossi  a4  Categorical cross-entropy loss, for multiclass classification.

Domain:
y_true in {0, 1, 2, 3, .., n_classes - 1}
y_pred has n_classes elements, each element in (0, 1)

Link:
y_pred = softmax(raw_prediction)

Note: We assume y_true to be already label encoded. The inverse link is
softmax. But the full link function is the symmetric multinomial logit
function.

For a given sample x_i, the categorical cross-entropy loss is defined as
the negative log-likelihood of the multinomial distribution, it
generalizes the binary cross-entropy to more than 2 classes::

    loss_i = log(sum(exp(raw_pred_{i, k}), k=0..n_classes-1))
            - sum(y_true_{i, k} * raw_pred_{i, k}, k=0..n_classes-1)

See [1].

Note that for the hessian, we calculate only the diagonal part in the
classes: If the full hessian for classes k and l and sample i is H_i_k_l,
we calculate H_i_k_k, i.e. k=l.

Reference
---------
.. [1] :arxiv:`Simon, Noah, J. Friedman and T. Hastie.
    "A Blockwise Descent Algorithm for Group-penalized Multiresponse and
    Multinomial Regression".
    <1311.6529>`
Tc                    > [         TU ]  [        5       [        5       US9  [	        S[
        R                  SS5      U l        [	        SSSS5      U l        g )Nr   r   TFr   )	r   r)   r   r   r   r#   r$   r%   r&   )r'   r;   r"   r   s      r(   r)   HalfMultinomialLoss.__init__  sP    ')!# 	 	

  (2664?'1eU;r+   c                     U R                   R                  U5      =(       a,    [        R                  " UR	                  [
        5      U:H  5      $ r-   )r%   r.   r#   allastypeintr/   s     r(   r1   #HalfMultinomialLoss.in_y_true_range  s6     ##,,Q/NBFF188C=A;M4NNr+   c                    [         R                  " U R                  UR                  S9n[         R                  " UR                  5      R
                  n[        U R                  5       H<  n[         R                  " X:H  USS9X5'   [         R                  " X5   USU-
  5      X5'   M>     U R                  R                  USSS24   5      R                  S5      $ )zCompute raw_prediction of an intercept-only model.

This is the softmax of the weighted average of the target, i.e. over
the samples axis=0.
rE   r   rX   r   N)r#   zerosr"   rF   r[   r\   rangerT   ra   r   reshape)r'   r9   r;   outr\   ks         r(   re   &HalfMultinomialLoss.fit_intercept_only  s     hht~~V\\:hhv||$((t~~&AZZ]KCFWWSVS!c'2CF ' yy~~c$'l+33B77r+   c                 8    U R                   R                  U5      $ )zPredict probabilities.

Parameters
----------
raw_prediction : array of shape (n_samples, n_classes)
    Raw prediction values (in link space).

Returns
-------
proba : array of shape (n_samples, n_classes)
    Element-wise class probabilities.
)r   r   )r'   r:   s     r(   r   !HalfMultinomialLoss.predict_proba  s     yy  00r+   c           	      
   UcG  Uc-  [         R                  " U5      n[         R                  " U5      nO0[         R                  " U5      nOUc  [         R                  " U5      nU R                  R                  UUUUUUS9  XE4$ )a  Compute gradient and class probabilities fow raw_prediction.

Parameters
----------
y_true : C-contiguous array of shape (n_samples,)
    Observed, true target values.
raw_prediction : array of shape (n_samples, n_classes)
    Raw prediction values (in link space).
sample_weight : None or C-contiguous array of shape (n_samples,)
    Sample weights.
gradient_out : None or array of shape (n_samples, n_classes)
    A location into which the gradient is stored. If None, a new array
    might be created.
proba_out : None or array of shape (n_samples, n_classes)
    A location into which the class probabilities are stored. If None,
    a new array might be created.
n_threads : int, default=1
    Might use openmp thread parallelism.

Returns
-------
gradient : array of shape (n_samples, n_classes)
    Element-wise gradients.

proba : array of shape (n_samples, n_classes)
    Element-wise class probabilities.
)r9   r:   r;   rG   	proba_outr=   )r#   r>   r   gradient_proba)r'   r9   r:   r;   rG   r  r=   s          r(   r  "HalfMultinomialLoss.gradient_proba  s    H  !}}^<MM.9	!}}Y7l3I

!!)'% 	" 	
 &&r+   r   )N   ry   rz   )r|   r}   r~   r   r   rr   r)   r1   re   r   r  r   r   r   s   @r(   r   r     s=     D M<O81& 5' 5'r+   r   c                   @   ^  \ rS rSrSrSU 4S jjrSS jrS rSrU =r	$ )ExponentialLossiI  a  Exponential loss with (half) logit link, for binary classification.

This is also know as boosting loss.

Domain:
y_true in [0, 1], i.e. regression on the unit interval
y_pred in (0, 1), i.e. boundaries excluded

Link:
y_pred = expit(2 * raw_prediction)

For a given sample x_i, the exponential loss is defined as::

    loss(x_i) = y_true_i * exp(-raw_pred_i)) + (1 - y_true_i) * exp(raw_pred_i)

See:
- J. Friedman, T. Hastie, R. Tibshirani.
  "Additive logistic regression: a statistical view of boosting (With discussion
  and a rejoinder by the authors)." Ann. Statist. 28 (2) 337 - 407, April 2000.
  https://doi.org/10.1214/aos/1016218223
- A. Buja, W. Stuetzle, Y. Shen. (2005).
  "Loss Functions for Binary Class Probability Estimation and Classification:
  Structure and Applications."

Note that the formulation works for classification, y = {0, 1}, as well as
"exponential logistic" regression, y = [0, 1].
Note that this is a proper scoring rule, but without it's canonical link.

More details: Inserting the predicted probability
y_pred = expit(2 * raw_prediction) in the loss gives::

    loss(x_i) = y_true_i * sqrt((1 - y_pred_i) / y_pred_i)
        + (1 - y_true_i) * sqrt(y_pred_i / (1 - y_pred_i))
c                 j   > [         TU ]  [        5       [        5       SS9  [	        SSSS5      U l        g r   )r   r)   r
   r   r   r%   r   s     r(   r)   ExponentialLoss.__init__m  s8    #% 	 	

  (1dD9r+   c                 R    S[         R                  " USU-
  -  5      -  nUb  X2-  nU$ )Nr   )r#   sqrtr   s       r(   rj   (ExponentialLoss.constant_to_optimal_zerou  s1    BGGFa&j122$!Dr+   c                 4   UR                   S:X  a$  UR                  S   S:X  a  UR                  S5      n[        R                  " UR                  S   S4UR
                  S9nU R                  R                  U5      USS2S4'   SUSS2S4   -
  USS2S4'   U$ r   r   r   s      r(   r   ExponentialLoss.predict_proba|  r   r+   r   ry   r   r   s   @r(   r  r  I  s    !F: r+   r  )
squared_errorabsolute_errorpinball_loss
huber_losspoisson_loss
gamma_losstweedie_lossbinomial_lossmultinomial_lossexponential_loss)*r   r   numpyr#   scipy.specialr   utilsr   utils.statsr   _lossr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  _LOSSESr   r+   r(   <module>r      s  *      .    8z! z!B6x 6.#CH #CL<( <~F@ F@Rh @H >=h =@+Eh +E\Bx BJH'( H'VFh FT &###%+'r+   