
    -i"                         S r SSKrSSKJr  SSKJrJr  SSKJ	r	  \" \
\" S15      S/\R                  /S/SS/S	.S
S9SS.S j5       r\" \
\\" S15      S/SS/SS/S.S
S9SS.S j5       rg)z5Utilities for handling weights based on class labels.    N)sparse   )
StrOptionsvalidate_params)_check_sample_weightbalancedz
array-like)class_weightclassesysample_weightT)prefer_skip_nested_validation)r   c                   SSK Jn  [        U5      [        U5      -
  (       a  [        S5      eU b  [	        U 5      S:X  a3  [
        R                  " UR                  S   [
        R                  SS9nU$ U S:X  a  U" 5       nUR                  U5      n[        [
        R                  " XR                  5      5      (       d  [        S5      e[        X25      n[
        R                  " XsS	9nUR                  5       [	        UR                  5      U-  -  n	XR!                  U5         nU$ [
        R                  " UR                  S   [
        R                  SS9n/ n
[#        U5       H#  u  pX;   a  X   X['   M  U
R%                  U5        M%     [	        U5      [	        U
5      -
  nU
(       aB  U[	        U 5      :w  a3  [
        R&                  " U
5      R)                  5       n[        S
U S35      eU$ )a  Estimate class weights for unbalanced datasets.

Parameters
----------
class_weight : dict, "balanced" or None
    If "balanced", class weights will be given by
    `n_samples / (n_classes * np.bincount(y))` or their weighted equivalent if
    `sample_weight` is provided.
    If a dictionary is given, keys are classes and values are corresponding class
    weights.
    If `None` is given, the class weights will be uniform.

classes : ndarray
    Array of the classes occurring in the data, as given by
    `np.unique(y_org)` with `y_org` the original class labels.

y : array-like of shape (n_samples,)
    Array of original class labels per sample.

sample_weight : array-like of shape (n_samples,), default=None
    Array of weights that are assigned to individual samples. Only used when
    `class_weight='balanced'`.

Returns
-------
class_weight_vect : ndarray of shape (n_classes,)
    Array with `class_weight_vect[i]` the weight for i-th class.

References
----------
The "balanced" heuristic is inspired by
Logistic Regression in Rare Events Data, King, Zen, 2001.

Examples
--------
>>> import numpy as np
>>> from sklearn.utils.class_weight import compute_class_weight
>>> y = [1, 1, 1, 1, 0, 0]
>>> compute_class_weight(class_weight="balanced", classes=np.unique(y), y=y)
array([1.5 , 0.75])
   )LabelEncoderz8classes should include all valid labels that can be in yr   C)dtypeorderr   z.classes should have valid labels that are in y)weightszThe classes, z, are not in class_weight)preprocessingr   set
ValueErrorlennponesshapefloat64fit_transformallisinclasses_r   bincountsum	transform	enumerateappendarraytolist)r	   r
   r   r   r   weightley_indweighted_class_counts
recip_frequnweighted_classesicn_weighted_classes$unweighted_classes_user_friendly_strs                  M/var/www/html/venv/lib/python3.13/site-packages/sklearn/utils/class_weight.pycompute_class_weightr3      s   h -
1vGSTTs<0A5q)3G@ M? 
	#^  #2777KK011MNN,]> "E I*..044

 LL12& M! q)3Gg&DA (O	"))!,	 ' !\C0B,CC"4L8I"I3588<N3O3V3V3X0 DE F    
 M    zsparse matrix)r	   r   indices)r5   c          	         [         R                  " U5      (       d=  [        R                  " U5      nUR                  S:X  a  [        R
                  " US5      nUR                  S   nUb  U S:w  a  [        SU  S35      eUS:  ab  U b  [        U [        5      (       a  [        S5      e[        U [        5      (       a*  [        U 5      U:w  a  [        S[        U 5       S	U S
35      e/ n[        U5       GH?  n[         R                  " U5      (       a'  USS2U/4   R                  5       R                  5       nO	USS2U4   n[        R                  " U5      nSnU S:X  d  US:X  a  U n	OX   n	Ubd  Xb   n
[        R                  " U
5      n[        R                   " [#        XU
S9[        R$                  " X5      SS9n['        U5      ['        U5      -
  nO
[#        XUS9nU[        R$                  " Xv5         nU(       a#  SU[        R(                  " U[        U5      5      '   UR+                  U5        GMB     [        R,                  " US[        R.                  S9nU$ )a<  Estimate sample weights by class for unbalanced datasets.

Parameters
----------
class_weight : dict, list of dicts, "balanced", or None
    Weights associated with classes in the form `{class_label: weight}`.
    If not given, all classes are supposed to have weight one. For
    multi-output problems, a list of dicts can be provided in the same
    order as the columns of y.

    Note that for multioutput (including multilabel) weights should be
    defined for each class of every column in its own dict. For example,
    for four-class multilabel classification weights should be
    `[{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}]` instead of
    `[{1:1}, {2:5}, {3:1}, {4:1}]`.

    The `"balanced"` mode uses the values of y to automatically adjust
    weights inversely proportional to class frequencies in the input data:
    `n_samples / (n_classes * np.bincount(y))`.

    For multi-output, the weights of each column of y will be multiplied.

y : {array-like, sparse matrix} of shape (n_samples,) or (n_samples, n_outputs)
    Array of original class labels per sample.

indices : array-like of shape (n_subsample,), default=None
    Array of indices to be used in a subsample. Can be of length less than
    `n_samples` in the case of a subsample, or equal to `n_samples` in the
    case of a bootstrap subsample with repeated indices. If `None`, the
    sample weight will be calculated over the full sample. Only `"balanced"`
    is supported for `class_weight` if this is provided.

Returns
-------
sample_weight_vect : ndarray of shape (n_samples,)
    Array with sample weights as applied to the original `y`.

Examples
--------
>>> from sklearn.utils.class_weight import compute_sample_weight
>>> y = [1, 1, 1, 1, 0, 0]
>>> compute_sample_weight(class_weight="balanced", y=y)
array([0.75, 0.75, 0.75, 0.75, 1.5 , 1.5 ])
r   )r   Nr   zAThe only valid class_weight for subsampling is 'balanced'. Given .zSFor multi-output, class_weight should be a list of dicts, or the string 'balanced'.zYFor multi-output, number of elements in class_weight should match number of outputs. Got z element(s) while having z	 outputs.)r
   r   clip)modeg        r   )axisr   )r   issparser   
atleast_1dndimreshaper   r   
isinstancedictlistr   rangetoarrayflattenuniquetaker3   searchsortedr   r   r%   prodr   )r	   r   r5   	n_outputsexpanded_class_weightky_fullclasses_fullclasses_missingclass_weight_ky_subsampleclasses_subsampleweight_ks                r2   compute_sample_weightrT   j   s=   n ??1MM!66Q;

1g&A
I|z9!N!%
 	
 
Q:lD#A#A%  d++L0AY0N**-l*;)<<U+Y(  9??1q1#vY&&(002Fq!tWFyy(:%a)N)_N !/K "		+ 6ww$"  1@H ",/#6G2HHO+H BOOLAB?BHRWWVT/%:;<$$X.S V GG$9T  r4   )__doc__numpyr   scipyr   _param_validationr   r   
validationr   rA   ndarrayr3   rB   rT    r4   r2   <module>r\      s    ;
   : , z:,7>JJ<^&-	 #' EI QQh tZ%=tDO, $'
 #' 7; u!u!r4   