
    -i,                       S r SSKrSSKrSSKJrJr  SSKJr  SSKJ	r	  SSK
Jr  SSKJrJr  SSKJrJr  SSKrSS	KJr  S
SKJrJrJrJr  S
SKJrJrJr  S
SKJ r J!r!J"r"  S
SK#J$r$  S
SK%J&r&  S
SK'J(r(  S
SK)J*r*J+r+J,r,  / SQr- " S S5      r. " S S\&5      r/ " S S\&\S9r0 " S S\.\05      r1 " S S\.\05      r2 " S S\0\S9r3 " S  S!\.\35      r4 " S" S#\/\35      r5 " S$ S%\35      r6 " S& S'\/\35      r7 " S( S)\35      r8 " S* S+\/\05      r9 " S, S-\/\05      r: " S. S/\&\S9r; " S0 S1\.\;5      r< " S2 S3\.\;5      r= " S4 S5\&\S9r> " S6 S7\.\>5      r? " S8 S9\/\>5      r@ " S: S;\>5      rASSS< jrB " S= S>\05      rC " S? S@\05      rDSTSASB.SC jjrE\"" \ " \!SSDSESF9\ " \R                  SDSSGSF9S/\ " \!SSDSESF9\ " \R                  SDSSGSF9S/SH/SI/SJS/SK.SLSM9SSSSLSSK.SN j5       rG\H" \GSOSA5        S\I4SP jrJSQ rKSR rLg)Uz
The :mod:`sklearn.model_selection._split` module includes classes and
functions to split the data based on a preset strategy.
    N)ABCMetaabstractmethod)defaultdict)Iterable)	signature)chaincombinations)ceilfloor)comb   )_safe_indexingcheck_random_state	indexablemetadata_routing)_convert_to_numpyensure_common_namespace_deviceget_namespace)Interval
RealNotIntvalidate_params)_approximate_mode)_MetadataRequester)type_of_target)_num_samplescheck_arraycolumn_or_1d)BaseCrossValidator
GroupKFoldGroupShuffleSplitKFoldLeaveOneGroupOutLeaveOneOutLeavePGroupsOut	LeavePOutPredefinedSplitRepeatedKFoldRepeatedStratifiedKFoldShuffleSplitStratifiedGroupKFoldStratifiedKFoldStratifiedShuffleSplitcheck_cvtrain_test_splitc                   0   ^  \ rS rSrSrSU 4S jjrSrU =r$ )_UnsupportedGroupCVMixin;   z/Mixin for splitters that do not support Groups.c                    > Ub2  [         R                  " SU R                  R                   3[        5        [
        TU ]  XUS9$ )a  Generate indices to split data into training and test set.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Training data, where `n_samples` is the number of samples
    and `n_features` is the number of features.

y : array-like of shape (n_samples,)
    The target variable for supervised learning problems.

groups : object
    Always ignored, exists for compatibility.

Yields
------
train : ndarray
    The training set indices for that split.

test : ndarray
    The testing set indices for that split.
#The groups parameter is ignored by groups)warningswarn	__class____name__UserWarningsupersplitselfXyr5   r8   s       Q/var/www/html/venv/lib/python3.13/site-packages/sklearn/model_selection/_split.pyr<   _UnsupportedGroupCVMixin.split>   sD    . MM5dnn6M6M5NO w}Q&}11     NN)r9   
__module____qualname____firstlineno____doc__r<   __static_attributes____classcell__r8   s   @rA   r0   r0   ;   s    92 2rC   r0   c                        \ rS rSrSrSS0rSrg)GroupsConsumerMixin]   zA Mixin to ``groups`` by default.

This Mixin makes the object to request ``groups`` by default as ``True``.

.. versionadded:: 1.3
r5   TrD   N)r9   rF   rG   rH   rI   -_GroupsConsumerMixin__metadata_request__splitrJ   rD   rC   rA   rN   rN   ]   s     "*4 0rC   rN   c                   l    \ rS rSrSrS\R                  0rSS jrSS jr	SS jr
\SS j5       rS	 rS
rg)r   h   znBase class for all cross-validators.

Implementations must define `_iter_test_masks` or `_iter_test_indices`.
r5   Nc              #      #    [        XU5      u  pn[        R                  " [        U5      5      nU R	                  XU5       H%  nU[        R
                  " U5         nXE   nXe4v   M'     g7f)al  Generate indices to split data into training and test set.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Training data, where `n_samples` is the number of samples
    and `n_features` is the number of features.

y : array-like of shape (n_samples,)
    The target variable for supervised learning problems.

groups : array-like of shape (n_samples,), default=None
    Group labels for the samples used while splitting the dataset into
    train/test set.

Yields
------
train : ndarray
    The training set indices for that split.

test : ndarray
    The testing set indices for that split.
N)r   nparanger   _iter_test_maskslogical_not)r>   r?   r@   r5   indices
test_indextrain_indexs          rA   r<   BaseCrossValidator.splitt   sd     0 !v.f))LO,//f=J!".."<=K ,J)) >s   A+A-c              #      #    U R                  XU5       H-  n[        R                  " [        U5      [        S9nSXT'   Uv   M/     g7f)zoGenerates boolean masks corresponding to test sets.

By default, delegates to _iter_test_indices(X, y, groups)
dtypeTN)_iter_test_indicesrT   zerosr   bool)r>   r?   r@   r5   rY   	test_masks         rA   rV   #BaseCrossValidator._iter_test_masks   s@     
 11!?Ja=I$(I!O @s   AAc                     [         e)z5Generates integer indices corresponding to test sets.)NotImplementedErrorr>   r?   r@   r5   s       rA   r_   %BaseCrossValidator._iter_test_indices   s    !!rC   c                     g)zBReturns the number of splitting iterations in the cross-validator.NrD   rf   s       rA   get_n_splitsBaseCrossValidator.get_n_splits   s    rC   c                     [        U 5      $ N_build_reprr>   s    rA   __repr__BaseCrossValidator.__repr__       4  rC   rD   rE   NNN)r9   rF   rG   rH   rI   r   UNUSED,_BaseCrossValidator__metadata_request__splitr<   rV   r_   r   ri   rp   rJ   rD   rC   rA   r   r   h   sF     "*+;+B+B C*B" Q Q!rC   r   )	metaclassc                   ,    \ rS rSrSrSS jrSS jrSrg)r#      a(  Leave-One-Out cross-validator.

Provides train/test indices to split data in train/test sets. Each
sample is used once as a test set (singleton) while the remaining
samples form the training set.

Note: ``LeaveOneOut()`` is equivalent to ``KFold(n_splits=n)`` and
``LeavePOut(p=1)`` where ``n`` is the number of samples.

Due to the high number of test sets (which is the same as the
number of samples) this cross-validation method can be very costly.
For large datasets one should favor :class:`KFold`, :class:`ShuffleSplit`
or :class:`StratifiedKFold`.

Read more in the :ref:`User Guide <leave_one_out>`.

Examples
--------
>>> import numpy as np
>>> from sklearn.model_selection import LeaveOneOut
>>> X = np.array([[1, 2], [3, 4]])
>>> y = np.array([1, 2])
>>> loo = LeaveOneOut()
>>> loo.get_n_splits(X)
2
>>> print(loo)
LeaveOneOut()
>>> for i, (train_index, test_index) in enumerate(loo.split(X)):
...     print(f"Fold {i}:")
...     print(f"  Train: index={train_index}")
...     print(f"  Test:  index={test_index}")
Fold 0:
  Train: index=[1]
  Test:  index=[0]
Fold 1:
  Train: index=[0]
  Test:  index=[1]

See Also
--------
LeaveOneGroupOut : For splitting the data according to explicit,
    domain-specific stratification of the dataset.
GroupKFold : K-fold iterator variant with non-overlapping groups.
Nc                 n    [        U5      nUS::  a  [        SR                  U5      5      e[        U5      $ )N   z-Cannot perform LeaveOneOut with n_samples={}.)r   
ValueErrorformatrange)r>   r?   r@   r5   	n_sampless        rA   r_   LeaveOneOut._iter_test_indices   s:     O	>?FFyQ  YrC   c                 4    Uc  [        S5      e[        U5      $ )a  Returns the number of splitting iterations in the cross-validator.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Training data, where `n_samples` is the number of samples
    and `n_features` is the number of features.

y : object
    Always ignored, exists for compatibility.

groups : object
    Always ignored, exists for compatibility.

Returns
-------
n_splits : int
    Returns the number of splitting iterations in the cross-validator.
%The 'X' parameter should not be None.)r{   r   rf   s       rA   ri   LeaveOneOut.get_n_splits   s    ( 9DEEArC   rD   rE   )r9   rF   rG   rH   rI   r_   ri   rJ   rD   rC   rA   r#   r#      s    +Z rC   r#   c                   2    \ rS rSrSrS rSS jrSS jrSrg)	r%      a  Leave-P-Out cross-validator.

Provides train/test indices to split data in train/test sets. This results
in testing on all distinct samples of size p, while the remaining n - p
samples form the training set in each iteration.

Note: ``LeavePOut(p)`` is NOT equivalent to
``KFold(n_splits=n_samples // p)`` which creates non-overlapping test sets.

Due to the high number of iterations which grows combinatorically with the
number of samples this cross-validation method can be very costly. For
large datasets one should favor :class:`KFold`, :class:`StratifiedKFold`
or :class:`ShuffleSplit`.

Read more in the :ref:`User Guide <leave_p_out>`.

Parameters
----------
p : int
    Size of the test sets. Must be strictly less than the number of
    samples.

Examples
--------
>>> import numpy as np
>>> from sklearn.model_selection import LeavePOut
>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
>>> y = np.array([1, 2, 3, 4])
>>> lpo = LeavePOut(2)
>>> lpo.get_n_splits(X)
6
>>> print(lpo)
LeavePOut(p=2)
>>> for i, (train_index, test_index) in enumerate(lpo.split(X)):
...     print(f"Fold {i}:")
...     print(f"  Train: index={train_index}")
...     print(f"  Test:  index={test_index}")
Fold 0:
  Train: index=[2 3]
  Test:  index=[0 1]
Fold 1:
  Train: index=[1 3]
  Test:  index=[0 2]
Fold 2:
  Train: index=[1 2]
  Test:  index=[0 3]
Fold 3:
  Train: index=[0 3]
  Test:  index=[1 2]
Fold 4:
  Train: index=[0 2]
  Test:  index=[1 3]
Fold 5:
  Train: index=[0 1]
  Test:  index=[2 3]
c                     Xl         g rl   p)r>   r   s     rA   __init__LeavePOut.__init__4  s    rC   Nc              #     #    [        U5      nX@R                  ::  a%  [        SR                  U R                  U5      5      e[	        [        U5      U R                  5       H  n[        R                  " U5      v   M     g 7f)Nz8p={} must be strictly less than the number of samples={})r   r   r{   r|   r	   r}   rT   array)r>   r?   r@   r5   r~   combinations         rA   r_   LeavePOut._iter_test_indices7  sh      O	JQQFFI 
 (i(8$&&AK((;'' Bs   A?Bc                 l    Uc  [        S5      e[        [        [        U5      U R                  SS95      $ )aq  Returns the number of splitting iterations in the cross-validator.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Training data, where `n_samples` is the number of samples
    and `n_features` is the number of features.

y : object
    Always ignored, exists for compatibility.

groups : object
    Always ignored, exists for compatibility.
r   Texact)r{   intr   r   r   rf   s       rA   ri   LeavePOut.get_n_splitsB  s1     9DEE4Qt<==rC   r   rE   )	r9   rF   rG   rH   rI   r   r_   ri   rJ   rD   rC   rA   r%   r%      s    7r	(>rC   r%   c                   J   ^  \ rS rSrSr\S 5       rSU 4S jjrSS jrSr	U =r
$ )	
_BaseKFoldiV  z;Base class for K-Fold cross-validators and TimeSeriesSplit.c                ~   [        U[        R                  5      (       d  [        SU< S[	        U5      < S35      e[        U5      nUS::  a  [        SR                  U5      5      e[        U[        5      (       d  [        SR                  U5      5      eU(       d  Ub  [        S5      eXl	        X l
        X0l        g )Nz.The number of folds must be of Integral type. z	 of type z was passed.rz   zok-fold cross-validation requires at least one train/test split by setting n_splits=2 or more, got n_splits={0}.z&shuffle must be True or False; got {0}zSetting a random_state has no effect since shuffle is False. You should leave random_state to its default (None), or set shuffle=True.)
isinstancenumbersIntegralr{   typer   r|   ra   	TypeErrorn_splitsshufflerandom_state)r>   r   r   r   s       rA   r   _BaseKFold.__init__Y  s    (G$4$455/7hI  x=q=%%+VH%5  '4((DKKGTUU<3O  !(rC   c              #      >#    [        XU5      u  pn[        U5      nU R                  U:  a%  [        SR	                  U R                  U5      5      e[
        TU ]  XU5       H
  u  pVXV4v   M     g7f)az  Generate indices to split data into training and test set.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Training data, where `n_samples` is the number of samples
    and `n_features` is the number of features.

y : array-like of shape (n_samples,), default=None
    The target variable for supervised learning problems.

groups : array-like of shape (n_samples,), default=None
    Group labels for the samples used while splitting the dataset into
    train/test set.

Yields
------
train : ndarray
    The training set indices for that split.

test : ndarray
    The testing set indices for that split.
z\Cannot have number of splits n_splits={0} greater than the number of samples: n_samples={1}.N)r   r   r   r{   r|   r;   r<   )r>   r?   r@   r5   r~   traintestr8   s          rA   r<   _BaseKFold.splity  sq     0 !v.f O	==9$B&	2	  !7=v6KE+ 7s   A/A2c                     U R                   $ as  Returns the number of splitting iterations in the cross-validator.

Parameters
----------
X : object
    Always ignored, exists for compatibility.

y : object
    Always ignored, exists for compatibility.

groups : object
    Always ignored, exists for compatibility.

Returns
-------
n_splits : int
    Returns the number of splitting iterations in the cross-validator.
r   rf   s       rA   ri   _BaseKFold.get_n_splits      & }}rC   )r   r   r   rE   rs   )r9   rF   rG   rH   rI   r   r   r<   ri   rJ   rK   rL   s   @rA   r   r   V  s(    E) )>#J rC   r   c                   D   ^  \ rS rSrSrS	SSS.U 4S jjjrS
S jrSrU =r$ )r!   i  aR	  K-Fold cross-validator.

Provides train/test indices to split data in train/test sets. Split
dataset into k consecutive folds (without shuffling by default).

Each fold is then used once as a validation while the k - 1 remaining
folds form the training set.

Read more in the :ref:`User Guide <k_fold>`.

For visualisation of cross-validation behaviour and
comparison between common scikit-learn split methods
refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`

Parameters
----------
n_splits : int, default=5
    Number of folds. Must be at least 2.

    .. versionchanged:: 0.22
        ``n_splits`` default value changed from 3 to 5.

shuffle : bool, default=False
    Whether to shuffle the data before splitting into batches.
    Note that the samples within each split will not be shuffled.

random_state : int, RandomState instance or None, default=None
    When `shuffle` is True, `random_state` affects the ordering of the
    indices, which controls the randomness of each fold. Otherwise, this
    parameter has no effect.
    Pass an int for reproducible output across multiple function calls.
    See :term:`Glossary <random_state>`.

Examples
--------
>>> import numpy as np
>>> from sklearn.model_selection import KFold
>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
>>> y = np.array([1, 2, 3, 4])
>>> kf = KFold(n_splits=2)
>>> kf.get_n_splits(X)
2
>>> print(kf)
KFold(n_splits=2, random_state=None, shuffle=False)
>>> for i, (train_index, test_index) in enumerate(kf.split(X)):
...     print(f"Fold {i}:")
...     print(f"  Train: index={train_index}")
...     print(f"  Test:  index={test_index}")
Fold 0:
  Train: index=[2 3]
  Test:  index=[0 1]
Fold 1:
  Train: index=[0 1]
  Test:  index=[2 3]

Notes
-----
The first ``n_samples % n_splits`` folds have size
``n_samples // n_splits + 1``, other folds have size
``n_samples // n_splits``, where ``n_samples`` is the number of samples.

Randomized CV splitters may return different results for each call of
split. You can make the results identical by setting `random_state`
to an integer.

See Also
--------
StratifiedKFold : Takes class information into account to avoid building
    folds with imbalanced class distributions (for binary or multiclass
    classification tasks).

GroupKFold : K-fold iterator variant with non-overlapping groups.

RepeatedKFold : Repeats K-Fold n times.
FNr   r   c                "   > [         TU ]  XUS9  g N)r   r   r   r;   r   r>   r   r   r   r8   s       rA   r   KFold.__init__      (,WrC   c              #   X  #    [        U5      n[        R                  " U5      nU R                  (       a$  [	        U R
                  5      R                  U5        U R                  n[        R                  " XdU-  [        S9nUS XF-  === S-  sss& SnU H  n	XU	-   pXZU v   UnM     g 7f)Nr]   rz   r   )	r   rT   rU   r   r   r   r   fullr   )r>   r?   r@   r5   r~   rX   r   
fold_sizescurrent	fold_sizestartstops               rA   r_   KFold._iter_test_indices  s      O	))I&<<t00199'B==WWXH'<CH
)Y)*a/*#I!Y#64%%G $s   B(B*rD      rE   )	r9   rF   rG   rH   rI   r   r_   rJ   rK   rL   s   @rA   r!   r!     s*    JXXe$ X X rC   r!   c                   P   ^  \ rS rSrSrS
SSS.U 4S jjjrS rSU 4S jjrS	rU =r	$ )r   i  a	  K-fold iterator variant with non-overlapping groups.

Each group will appear exactly once in the test set across all folds (the
number of distinct groups has to be at least equal to the number of folds).

The folds are approximately balanced in the sense that the number of
samples is approximately the same in each test fold when `shuffle` is True.

Read more in the :ref:`User Guide <group_k_fold>`.

For visualisation of cross-validation behaviour and
comparison between common scikit-learn split methods
refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`

Parameters
----------
n_splits : int, default=5
    Number of folds. Must be at least 2.

    .. versionchanged:: 0.22
        ``n_splits`` default value changed from 3 to 5.

shuffle : bool, default=False
    Whether to shuffle the groups before splitting into batches.
    Note that the samples within each split will not be shuffled.

    .. versionadded:: 1.6

random_state : int, RandomState instance or None, default=None
    When `shuffle` is True, `random_state` affects the ordering of the
    indices, which controls the randomness of each fold. Otherwise, this
    parameter has no effect.
    Pass an int for reproducible output across multiple function calls.
    See :term:`Glossary <random_state>`.

    .. versionadded:: 1.6

Notes
-----
Groups appear in an arbitrary order throughout the folds.

Examples
--------
>>> import numpy as np
>>> from sklearn.model_selection import GroupKFold
>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]])
>>> y = np.array([1, 2, 3, 4, 5, 6])
>>> groups = np.array([0, 0, 2, 2, 3, 3])
>>> group_kfold = GroupKFold(n_splits=2)
>>> group_kfold.get_n_splits(X, y, groups)
2
>>> print(group_kfold)
GroupKFold(n_splits=2, random_state=None, shuffle=False)
>>> for i, (train_index, test_index) in enumerate(group_kfold.split(X, y, groups)):
...     print(f"Fold {i}:")
...     print(f"  Train: index={train_index}, group={groups[train_index]}")
...     print(f"  Test:  index={test_index}, group={groups[test_index]}")
Fold 0:
  Train: index=[2 3], group=[2 2]
  Test:  index=[0 1 4 5], group=[0 0 3 3]
Fold 1:
  Train: index=[0 1 4 5], group=[0 0 3 3]
  Test:  index=[2 3], group=[2 2]

See Also
--------
LeaveOneGroupOut : For splitting the data according to explicit
    domain-specific stratification of the dataset.

StratifiedKFold : Takes class information into account to avoid building
    folds with imbalanced class proportions (for binary or multiclass
    classification tasks).
FNr   c                "   > [         TU ]  XUS9  g )Nr   r   r   s       rA   r   GroupKFold.__init___  s    NrC   c              #     #    Uc  [        S5      e[        USSS S9n[        R                  " USS9u  pE[	        U5      nU R
                  U:  a  [        SU R
                  U4-  5      eU R                  (       a  [        U R                  5      nUR                  U5      n[        R                  " X@R
                  5      nU H4  n	[        R                  " X95      n
[        R                  " U
5      S   v   M6     g [        R                  " U5      n[        R                  " U5      S S S	2   nX   n[        R                  " U R
                  5      n[        R                  " [	        U5      5      n[!        U5       H0  u  nn[        R"                  " U5      nUU==   U-  ss'   UXU   '   M2     X   n[%        U R
                  5       H!  n[        R                  " UU:H  5      S   v   M#     g 7f)
N*The 'groups' parameter should not be None.r5   F
input_name	ensure_2dr^   Treturn_inversezOCannot have number of splits n_splits=%d greater than the number of groups: %d.r   )r{   r   rT   uniquelenr   r   r   r   permutationarray_splitisinwherebincountargsortr`   	enumerateargminr}   )r>   r?   r@   r5   unique_groups	group_idxn_groupsrngsplit_groupstest_group_idsrb   n_samples_per_grouprX   n_samples_per_foldgroup_to_foldgroup_indexweightlightest_foldfs                      rA   r_   GroupKFold._iter_test_indicesb  s    >IJJVEQUV#%99VD#I }%==8#259]]H4MN 
 <<$T%6%67COOM:M>>-GL".GGF;	hhy)!,, #/ #%++i"8 jj!45dd;G"5"> "$$--!8 HHS%78M (11D'E#V "		*< ="=1V;16Ck23 (F
 $.G4==)hhw!|,Q// *s   G,G.c                 $   > [         TU ]  XU5      $ al  Generate indices to split data into training and test set.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Training data, where `n_samples` is the number of samples
    and `n_features` is the number of features.

y : array-like of shape (n_samples,), default=None
    The target variable for supervised learning problems.

groups : array-like of shape (n_samples,)
    Group labels for the samples used while splitting the dataset into
    train/test set.

Yields
------
train : ndarray
    The training set indices for that split.

test : ndarray
    The testing set indices for that split.
r;   r<   r=   s       rA   r<   GroupKFold.split      0 w}Q6**rC   rD   r   rE   )
r9   rF   rG   rH   rI   r   r_   r<   rJ   rK   rL   s   @rA   r   r     s0    HTOe$ O O/0b+ +rC   r   c                   ^   ^  \ rS rSrSrSSSS.U 4S jjjrSS jrSS jrSU 4S	 jjrS
r	U =r
$ )r+   i  a
  Class-wise stratified K-Fold cross-validator.

Provides train/test indices to split data in train/test sets.

This cross-validation object is a variation of KFold that returns
stratified folds. The folds are made by preserving the percentage of
samples for each class in `y` in a binary or multiclass classification
setting.

Read more in the :ref:`User Guide <stratified_k_fold>`.

For visualisation of cross-validation behaviour and
comparison between common scikit-learn split methods
refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`

.. note::

    Stratification on the class label solves an engineering problem rather
    than a statistical one. See :ref:`stratification` for more details.

Parameters
----------
n_splits : int, default=5
    Number of folds. Must be at least 2.

    .. versionchanged:: 0.22
        ``n_splits`` default value changed from 3 to 5.

shuffle : bool, default=False
    Whether to shuffle each class's samples before splitting into batches.
    Note that the samples within each split will not be shuffled.

random_state : int, RandomState instance or None, default=None
    When `shuffle` is True, `random_state` affects the ordering of the
    indices, which controls the randomness of each fold for each class.
    Otherwise, leave `random_state` as `None`.
    Pass an int for reproducible output across multiple function calls.
    See :term:`Glossary <random_state>`.

Examples
--------
>>> import numpy as np
>>> from sklearn.model_selection import StratifiedKFold
>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
>>> y = np.array([0, 0, 1, 1])
>>> skf = StratifiedKFold(n_splits=2)
>>> skf.get_n_splits(X, y)
2
>>> print(skf)
StratifiedKFold(n_splits=2, random_state=None, shuffle=False)
>>> for i, (train_index, test_index) in enumerate(skf.split(X, y)):
...     print(f"Fold {i}:")
...     print(f"  Train: index={train_index}")
...     print(f"  Test:  index={test_index}")
Fold 0:
  Train: index=[1 3]
  Test:  index=[0 2]
Fold 1:
  Train: index=[0 2]
  Test:  index=[1 3]

Notes
-----
The implementation is designed to:

* Generate test sets such that all contain the same distribution of
  classes, or as close as possible.
* Be invariant to class label: relabelling ``y = ["Happy", "Sad"]`` to
  ``y = [1, 0]`` should not change the indices generated.
* Preserve order dependencies in the dataset ordering, when
  ``shuffle=False``: all samples from class k in some test set were
  contiguous in y, or separated in y by samples from classes other than k.
* Generate test sets where the smallest and largest differ by at most one
  sample.

.. versionchanged:: 0.22
    The previous implementation did not follow the last constraint.

See Also
--------
RepeatedStratifiedKFold : Repeats Stratified K-Fold n times.
FNr   c                "   > [         TU ]  XUS9  g r   r   r   s       rA   r   StratifiedKFold.__init__  r   rC   c                    [        U R                  5      n[        U5      u  pEU(       a  [        X$5      nO[        R
                  " U5      n[        U5      nSnXg;  a  [        SR                  Xv5      5      e[        U5      n[        R                  " USSS9u  pn
[        R                  " U	SS9u  pX   n[        U	5      n[        R                  " U5      n[        R                  " U5      n[        R                  " U R                  U:  5      (       a  [        SU R                  -  5      eU R                  U:  a)  [         R"                  " SXR                  4-  [$        5        [        R&                  " U5      n[        R
                  " [)        U R                  5       Vs/ s H(  n[        R                  " UUS U R                  2   US9PM*     sn5      n[        R*                  " [        U5      S	S
9n[)        U5       Hc  n[        R,                  " U R                  5      R/                  US S 2U4   5      nU R0                  (       a  UR1                  U5        UUUU:H  '   Me     U$ s  snf )Nbinary
multiclass1Supported target types are: {}. Got {!r} instead.T)return_indexr   r   Gn_splits=%d cannot be greater than the number of members in each class.SThe least populated class in y has only %d members, which is less than n_splits=%d.)	minlengthir]   )r   r   r   r   rT   asarrayr   r{   r|   r   r   r   r   minallr   r6   r7   r:   sortr}   emptyrU   repeatr   )r>   r?   r@   r   xpis_array_apitype_of_target_yallowed_target_types_y_idxy_inv
class_perm	y_encoded	n_classesy_counts
min_groupsy_orderr   
allocation
test_foldskfolds_for_classs                         rA   _make_test_folds StratifiedKFold._make_test_folds  s    !2!23
 )+!!(A

1A)!,77CJJ(  O))ADN% 		%=%	J	;;y)VVH%
66$--(*++47;}}F  ==:%MM<}}-. 	 '')$ZZ t}}--A GA$6$679M-

 XXc!fC0
y!A !ii6==jA>NOO||O,)8JyA~& " %s   $/I,c              #   r   #    U R                  X5      n[        U R                  5       H	  nXE:H  v   M     g 7frl   )r  r}   r   )r>   r?   r@   r5   r  r   s         rA   rV    StratifiedKFold._iter_test_masksK  s0     **10
t}}%A/! &s   57c                    > Ub2  [         R                  " SU R                  R                   3[        5        [        USSSS9n[        TU ]  XU5      $ )  Generate indices to split data into training and test set.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Training data, where `n_samples` is the number of samples
    and `n_features` is the number of features.

    Note that providing ``y`` is sufficient to generate the splits and
    hence ``np.zeros(n_samples)`` may be used as a placeholder for
    ``X`` instead of actual training data.

y : array-like of shape (n_samples,)
    The target variable for supervised learning problems.
    Stratification is done based on the y labels.

groups : object
    Always ignored, exists for compatibility.

Yields
------
train : ndarray
    The training set indices for that split.

test : ndarray
    The testing set indices for that split.

Notes
-----
Randomized CV splitters may return different results for each call of
split. You can make the results identical by setting `random_state`
to an integer.
Nr3   r@   Fr   r6   r7   r8   r9   r:   r   r;   r<   r=   s       rA   r<   StratifiedKFold.splitP  S    D MM5dnn6M6M5NO cU$Gw}Q6**rC   rD   r   rl   rE   )r9   rF   rG   rH   rI   r   r  rV   r<   rJ   rK   rL   s   @rA   r+   r+     s6    QfXe$ X XDL"
(+ (+rC   r+   c                   <   ^  \ rS rSrSrSU 4S jjrS rS rSrU =r	$ )r*   i{  a  Class-wise stratified K-Fold iterator variant with non-overlapping groups.

This cross-validation object is a variation of StratifiedKFold attempts to
return stratified folds with non-overlapping groups. The folds are made by
preserving the percentage of samples for each class in `y` in a binary or
multiclass classification setting.

Each group will appear exactly once in the test set across all folds (the
number of distinct groups has to be at least equal to the number of folds).

The difference between :class:`GroupKFold`
and `StratifiedGroupKFold` is that
the former attempts to create balanced folds such that the number of
distinct groups is approximately the same in each fold, whereas
`StratifiedGroupKFold` attempts to create folds which preserve the
percentage of samples for each class as much as possible given the
constraint of non-overlapping groups between splits.

Read more in the :ref:`User Guide <stratified_group_k_fold>`.

For visualisation of cross-validation behaviour and
comparison between common scikit-learn split methods
refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`

.. note::

    Stratification on the class label solves an engineering problem rather
    than a statistical one. See :ref:`stratification` for more details.

Parameters
----------
n_splits : int, default=5
    Number of folds. Must be at least 2.

shuffle : bool, default=False
    Whether to shuffle each class's samples before splitting into batches.
    Note that the samples within each split will not be shuffled.
    This implementation can only shuffle groups that have approximately the
    same y distribution, no global shuffle will be performed.

random_state : int or RandomState instance, default=None
    When `shuffle` is True, `random_state` affects the ordering of the
    indices, which controls the randomness of each fold for each class.
    Otherwise, leave `random_state` as `None`.
    Pass an int for reproducible output across multiple function calls.
    See :term:`Glossary <random_state>`.

Examples
--------
>>> import numpy as np
>>> from sklearn.model_selection import StratifiedGroupKFold
>>> X = np.ones((17, 2))
>>> y = np.array([0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
>>> groups = np.array([1, 1, 2, 2, 3, 3, 3, 4, 5, 5, 5, 5, 6, 6, 7, 8, 8])
>>> sgkf = StratifiedGroupKFold(n_splits=3)
>>> sgkf.get_n_splits(X, y)
3
>>> print(sgkf)
StratifiedGroupKFold(n_splits=3, random_state=None, shuffle=False)
>>> for i, (train_index, test_index) in enumerate(sgkf.split(X, y, groups)):
...     print(f"Fold {i}:")
...     print(f"  Train: index={train_index}")
...     print(f"         group={groups[train_index]}")
...     print(f"  Test:  index={test_index}")
...     print(f"         group={groups[test_index]}")
Fold 0:
  Train: index=[ 0  1  2  3  7  8  9 10 11 15 16]
         group=[1 1 2 2 4 5 5 5 5 8 8]
  Test:  index=[ 4  5  6 12 13 14]
         group=[3 3 3 6 6 7]
Fold 1:
  Train: index=[ 4  5  6  7  8  9 10 11 12 13 14]
         group=[3 3 3 4 5 5 5 5 6 6 7]
  Test:  index=[ 0  1  2  3 15 16]
         group=[1 1 2 2 8 8]
Fold 2:
  Train: index=[ 0  1  2  3  4  5  6 12 13 14 15 16]
         group=[1 1 2 2 3 3 3 6 6 7 8 8]
  Test:  index=[ 7  8  9 10 11]
         group=[4 5 5 5 5]

Notes
-----
The implementation is designed to:

* Mimic the behavior of StratifiedKFold as much as possible for trivial
  groups (e.g. when each group contains only one sample).
* Be invariant to class label: relabelling ``y = ["Happy", "Sad"]`` to
  ``y = [1, 0]`` should not change the indices generated.
* Stratify based on samples as much as possible while keeping
  non-overlapping groups constraint. That means that in some cases when
  there is a small number of groups containing a large number of samples
  the stratification will not be possible and the behavior will be close
  to GroupKFold.

See also
--------
StratifiedKFold: Takes class information into account to build folds which
    retain class distributions (for binary or multiclass classification
    tasks).

GroupKFold: K-fold iterator variant with non-overlapping groups.
c                 "   > [         TU ]  XUS9  g r   r   r   s       rA   r   StratifiedGroupKFold.__init__  r   rC   c              #     #    [        U R                  5      n[        R                  " U5      n[	        U5      nSnXV;  a  [        SR                  Xe5      5      e[        U5      n[        R                  " USSS9u  pxn	[        R                  " U R                  U	:  5      (       a  [        SU R                  -  5      e[        R                  " U	5      n
U R                  U
:  a)  [        R                  " SXR                  4-  [        5        [        U	5      n[        R                  " USSS9u  p|n[        R                   " [        U5      U45      n[#        X5       H  u  nnUUU4==   S-  ss'   M     [        R                   " U R                  U45      n[%        [&        5      nU R(                  (       a  UR)                  U5        [        R*                  " [        R,                  " USS9* S	S
9nU H:  nUU   nU R/                  UU	US9nUU==   U-  ss'   UU   R1                  U5        M<     [3        U R                  5       H2  n[5        U5       VVs/ s H  u  nnUUU   ;   d  M  UPM     nnnUv   M4     g s  snnf 7f)Nr   r   T)r   return_countsr   r   rz   axis	mergesortkind)y_counts_per_foldy_cntgroup_y_counts)r   r   rT   r   r   r{   r|   r   r   r   r   r   r6   r7   r:   r   r`   zipr   setr   r   std_find_best_foldaddr}   r   )r>   r?   r@   r5   r   r   r   r   r   r  n_smallest_classr   
groups_inv
groups_cnty_counts_per_group	class_idxr   r  groups_per_foldsorted_groups_idxr  	best_foldr   idxtest_indicess                            rA   r_   'StratifiedGroupKFold._iter_test_indices  sk      !!2!23JJqM)!,77CJJ(  O))Ad$O%66$--%'((47;}}F  66%===++MM<#]]34 	 J	$&II4t%
!z  XXs:	&BC$'$: Iyy)3494 %; HHdmmY%?@%c*<<KK*+ JJVV&Q//k
 +I/	:N,,"3- - I
 i(N:(I&**95 + t}}%A '0
&;&;NC 22 &;  
  &s   IJI>,I>2Jc                    S n[         R                  n[         R                  n[        U R                  5       H  nX==   U-  ss'   [         R                  " XR                  SS5      -  SS9nX==   U-  ss'   [         R                  " U5      n	[         R                  " X   5      n
X:  =(       d!    [         R                  " X5      =(       a    X:  nU(       d  M  U	nU
nUnM     U$ )Nrz   r   r   r  )	rT   infr}   r   r  reshapemeansumisclose)r>   r  r  r  r(  min_evalmin_samples_in_foldr   std_per_class	fold_evalsamples_in_foldis_current_fold_betters               rA   r  $StratifiedGroupKFold._find_best_fold8  s    	66 fft}}%A N2 FF#4}}Q7K#KRSTM N2 .I ff%6%9:O%.%9 &

9/ :#9 # &%$&5#	 & rC   rD   )r   FN)
r9   rF   rG   rH   rI   r   r_   r  rJ   rK   rL   s   @rA   r*   r*   {  s!    fPXOb rC   r*   c                   L   ^  \ rS rSrSrS
SSSS.U 4S jjjrSS jrS rS	rU =r	$ )TimeSeriesSplitiN  a  Time Series cross-validator.

Provides train/test indices to split time-ordered data, where other
cross-validation methods are inappropriate, as they would lead to training
on future data and evaluating on past data.
To ensure comparable metrics across folds, samples must be equally spaced.
Once this condition is met, each test set covers the same time duration,
while the train set size accumulates data from previous splits.

This cross-validation object is a variation of :class:`KFold`.
In the k-th split, it returns the first k folds as the train set and the
(k+1)-th fold as the test set.

Note that, unlike standard cross-validation methods, successive
training sets are supersets of those that come before them.

Read more in the :ref:`User Guide <time_series_split>`.

For visualisation of cross-validation behaviour and
comparison between common scikit-learn split methods
refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`

.. versionadded:: 0.18

Parameters
----------
n_splits : int, default=5
    Number of splits. Must be at least 2.

    .. versionchanged:: 0.22
        ``n_splits`` default value changed from 3 to 5.

max_train_size : int, default=None
    Maximum size for a single training set.

test_size : int, default=None
    Used to limit the size of the test set. Defaults to
    ``n_samples // (n_splits + 1)``, which is the maximum allowed value
    with ``gap=0``.

    .. versionadded:: 0.24

gap : int, default=0
    Number of samples to exclude from the end of each train set before
    the test set.

    .. versionadded:: 0.24

Examples
--------
>>> import numpy as np
>>> from sklearn.model_selection import TimeSeriesSplit
>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])
>>> y = np.array([1, 2, 3, 4, 5, 6])
>>> tscv = TimeSeriesSplit()
>>> print(tscv)
TimeSeriesSplit(gap=0, max_train_size=None, n_splits=5, test_size=None)
>>> for i, (train_index, test_index) in enumerate(tscv.split(X)):
...     print(f"Fold {i}:")
...     print(f"  Train: index={train_index}")
...     print(f"  Test:  index={test_index}")
Fold 0:
  Train: index=[0]
  Test:  index=[1]
Fold 1:
  Train: index=[0 1]
  Test:  index=[2]
Fold 2:
  Train: index=[0 1 2]
  Test:  index=[3]
Fold 3:
  Train: index=[0 1 2 3]
  Test:  index=[4]
Fold 4:
  Train: index=[0 1 2 3 4]
  Test:  index=[5]
>>> # Fix test_size to 2 with 12 samples
>>> X = np.random.randn(12, 2)
>>> y = np.random.randint(0, 2, 12)
>>> tscv = TimeSeriesSplit(n_splits=3, test_size=2)
>>> for i, (train_index, test_index) in enumerate(tscv.split(X)):
...     print(f"Fold {i}:")
...     print(f"  Train: index={train_index}")
...     print(f"  Test:  index={test_index}")
Fold 0:
  Train: index=[0 1 2 3 4 5]
  Test:  index=[6 7]
Fold 1:
  Train: index=[0 1 2 3 4 5 6 7]
  Test:  index=[8 9]
Fold 2:
  Train: index=[0 1 2 3 4 5 6 7 8 9]
  Test:  index=[10 11]
>>> # Add in a 2 period gap
>>> tscv = TimeSeriesSplit(n_splits=3, test_size=2, gap=2)
>>> for i, (train_index, test_index) in enumerate(tscv.split(X)):
...     print(f"Fold {i}:")
...     print(f"  Train: index={train_index}")
...     print(f"  Test:  index={test_index}")
Fold 0:
  Train: index=[0 1 2 3]
  Test:  index=[6 7]
Fold 1:
  Train: index=[0 1 2 3 4 5]
  Test:  index=[8 9]
Fold 2:
  Train: index=[0 1 2 3 4 5 6 7]
  Test:  index=[10 11]

For a more extended example see
:ref:`sphx_glr_auto_examples_applications_plot_cyclical_feature_engineering.py`.

Notes
-----
The training set has size ``i * n_samples // (n_splits + 1)
+ n_samples % (n_splits + 1)`` in the ``i`` th split,
with a test set of size ``n_samples//(n_splits + 1)`` by default,
where ``n_samples`` is the number of samples. Note that this
formula is only valid when ``test_size`` and ``max_train_size`` are
left to their default values.
Nr   )max_train_size	test_sizegapc                H   > [         TU ]  USS S9  X l        X0l        X@l        g )NFr   )r;   r   r;  r<  r=  )r>   r   r;  r<  r=  r8   s        rA   r   TimeSeriesSplit.__init__  s'    5tD,"rC   c                     Ub2  [         R                  " SU R                  R                   3[        5        U R                  U5      $ )a%  Generate indices to split data into training and test set.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Training data, where `n_samples` is the number of samples
    and `n_features` is the number of features.

y : array-like of shape (n_samples,)
    Always ignored, exists for compatibility.

groups : array-like of shape (n_samples,)
    Always ignored, exists for compatibility.

Yields
------
train : ndarray
    The training set indices for that split.

test : ndarray
    The testing set indices for that split.
r3   r6   r7   r8   r9   r:   _splitrf   s       rA   r<   TimeSeriesSplit.split  s>    . MM5dnn6M6M5NO {{1~rC   c              #   "  #    [        U5      u  n[        U5      nU R                  nUS-   nU R                  nU R                  b  U R                  OX$-  nXB:  a  [        SU SU S35      eX%-
  Xc-  -
  S::  a  [        SU SU S	U S
U S3	5      e[        R                  " U5      n[        X#U-  -
  X&5      nU HP  n	X-
  n
U R                  (       a+  U R                  U
:  a  XzU R                  -
  U
 XyX-    4v   MC  USU
 XyX-    4v   MR     g7f)ax  Generate indices to split data into training and test set.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Training data, where `n_samples` is the number of samples
    and `n_features` is the number of features.

Yields
------
train : ndarray
    The training set indices for that split.

test : ndarray
    The testing set indices for that split.
rz   NzCannot have number of folds=z$ greater than the number of samples=.r   zToo many splits=z for number of samples=z with test_size=z	 and gap=)
r   r   r   r=  r<  r{   rT   rU   r}   r;  )r>   r?   r~   r   n_foldsr=  r<  rX   test_starts
test_start	train_ends              rA   rB  TimeSeriesSplit._split  sY    " | O	==Q,hh"nn8DNNi>R 	
 .wi 8//8k<  ?i23q8"8* -;.yk3%qJ 
 ))I&I9(<<iS%J"(I""t':':Y'F(;(;;iH)?@  JY')?@  &s   DD)r=  r;  r<  r   rE   )
r9   rF   rG   rH   rI   r   r<   rB  rJ   rK   rL   s   @rA   r:  r:  N  s.    xtTTq  <4 4rC   r:  c                   @   ^  \ rS rSrSrS rSS jrSU 4S jjrSrU =r	$ )	r"   i$  a  Leave One Group Out cross-validator.

Provides train/test indices to split data such that each training set is
comprised of all samples except ones belonging to one specific group.
Arbitrary domain specific group information is provided as an array of integers
that encodes the group of each sample.

For instance the groups could be the year of collection of the samples
and thus allow for cross-validation against time-based splits.

Read more in the :ref:`User Guide <leave_one_group_out>`.

Notes
-----
Splits are ordered according to the index of the group left out. The first
split has testing set consisting of the group whose index in `groups` is
lowest, and so on.

Examples
--------
>>> import numpy as np
>>> from sklearn.model_selection import LeaveOneGroupOut
>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
>>> y = np.array([1, 2, 1, 2])
>>> groups = np.array([1, 1, 2, 2])
>>> logo = LeaveOneGroupOut()
>>> logo.get_n_splits(X, y, groups)
2
>>> logo.get_n_splits(groups=groups)  # 'groups' is always required
2
>>> print(logo)
LeaveOneGroupOut()
>>> for i, (train_index, test_index) in enumerate(logo.split(X, y, groups)):
...     print(f"Fold {i}:")
...     print(f"  Train: index={train_index}, group={groups[train_index]}")
...     print(f"  Test:  index={test_index}, group={groups[test_index]}")
Fold 0:
  Train: index=[2 3], group=[2 2]
  Test:  index=[0 1], group=[1 1]
Fold 1:
  Train: index=[0 1], group=[1 1]
  Test:  index=[2 3], group=[2 2]

See also
--------
GroupKFold: K-fold iterator variant with non-overlapping groups.
c              #      #    Uc  [        S5      e[        USSSS S9n[        R                  " U5      n[	        U5      S::  a  [        SU-  5      eU H	  nX5:H  v   M     g 7f)Nr   r5   TFr   copyr   r^   rz   zcThe groups parameter contains fewer than 2 unique groups (%s). LeaveOneGroupOut expects at least 2.)r{   r   rT   r   r   )r>   r?   r@   r5   r   r   s         rA   rV   !LeaveOneGroupOut._iter_test_masksU  sw     >IJJxde4
 		&)}"=?LM  A+ s   AA!c                 t    Uc  [        S5      e[        USSSS9n[        [        R                  " U5      5      $ )B  Returns the number of splitting iterations in the cross-validator.

Parameters
----------
X : object
    Always ignored, exists for compatibility.

y : object
    Always ignored, exists for compatibility.

groups : array-like of shape (n_samples,)
    Group labels for the samples used while splitting the dataset into
    train/test set. This 'groups' parameter must always be specified to
    calculate the number of splits, though the other parameters can be
    omitted.

Returns
-------
n_splits : int
    Returns the number of splitting iterations in the cross-validator.
Nr   r5   Fr   )r{   r   r   rT   r   rf   s       rA   ri   LeaveOneGroupOut.get_n_splitse  s:    , >IJJVEQUV299V$%%rC   c                 $   > [         TU ]  XU5      $ r   r   r=   s       rA   r<   LeaveOneGroupOut.split  r   rC   rD   rs   rE   )
r9   rF   rG   rH   rI   rV   ri   r<   rJ   rK   rL   s   @rA   r"   r"   $  s    .` &6+ +rC   r"   c                   F   ^  \ rS rSrSrS rS rSS jrS	U 4S jjrSr	U =r
$ )
r$   i  a  Leave P Group(s) Out cross-validator.

Provides train/test indices to split data according to a third-party
provided group. This group information can be used to encode arbitrary
domain specific stratifications of the samples as integers.

For instance the groups could be the year of collection of the samples
and thus allow for cross-validation against time-based splits.

The difference between LeavePGroupsOut and LeaveOneGroupOut is that
the former builds the test sets with all the samples assigned to
``p`` different values of the groups while the latter uses samples
all assigned the same groups.

Read more in the :ref:`User Guide <leave_p_groups_out>`.

Parameters
----------
n_groups : int
    Number of groups (``p``) to leave out in the test split.

Examples
--------
>>> import numpy as np
>>> from sklearn.model_selection import LeavePGroupsOut
>>> X = np.array([[1, 2], [3, 4], [5, 6]])
>>> y = np.array([1, 2, 1])
>>> groups = np.array([1, 2, 3])
>>> lpgo = LeavePGroupsOut(n_groups=2)
>>> lpgo.get_n_splits(X, y, groups)
3
>>> lpgo.get_n_splits(groups=groups)  # 'groups' is always required
3
>>> print(lpgo)
LeavePGroupsOut(n_groups=2)
>>> for i, (train_index, test_index) in enumerate(lpgo.split(X, y, groups)):
...     print(f"Fold {i}:")
...     print(f"  Train: index={train_index}, group={groups[train_index]}")
...     print(f"  Test:  index={test_index}, group={groups[test_index]}")
Fold 0:
  Train: index=[2], group=[3]
  Test:  index=[0 1], group=[1 2]
Fold 1:
  Train: index=[1], group=[2]
  Test:  index=[0 2], group=[1 3]
Fold 2:
  Train: index=[0], group=[1]
  Test:  index=[1 2], group=[2 3]

See Also
--------
GroupKFold : K-fold iterator variant with non-overlapping groups.
c                     Xl         g rl   r   )r>   r   s     rA   r   LeavePGroupsOut.__init__  s     rC   c              #     #    Uc  [        S5      e[        USSSS S9n[        R                  " U5      nU R                  [        U5      :  a'  [        SU R                  X@R                  S-   4-  5      e[        [        [        U5      5      U R                  5      nU HP  n[        R                  " [        U5      [        S9nU[        R                  " U5          H
  nSXsU:H  '   M     Uv   MR     g 7f)	Nr   r5   TFrM  zThe groups parameter contains fewer than (or equal to) n_groups (%d) numbers of unique groups (%s). LeavePGroupsOut expects that at least n_groups + 1 (%d) unique groups be presentrz   r]   )r{   r   rT   r   r   r   r	   r}   r`   r   ra   r   )	r>   r?   r@   r5   r   combirX   rY   ls	            rA   rV    LeavePGroupsOut._iter_test_masks  s     >IJJxde4
 		&)==C.. "]]M==1;LMN  U3}#56FG,q/>J"288G#45*.
Q;' 6	 s   C1C3c           	          Uc  [        S5      e[        USSSS9n[        [        [	        [
        R                  " U5      5      U R                  SS95      $ )rQ  Nr   r5   Fr   Tr   )r{   r   r   r   r   rT   r   r   rf   s       rA   ri   LeavePGroupsOut.get_n_splits  sL    , >IJJVEQUV4BIIf-.TJKKrC   c                 $   > [         TU ]  XU5      $ r   r   r=   s       rA   r<   LeavePGroupsOut.split  r   rC   rW  rs   rE   )r9   rF   rG   rH   rI   r   rV   ri   r<   rJ   rK   rL   s   @rA   r$   r$     s#    4l!*L6+ +rC   r$   c                   ^    \ rS rSrSrS\R                  0rSSS.S jrSS jr	SS	 jr
S
 rSrg)_RepeatedSplitsi   a  Repeated splits for an arbitrary randomized CV splitter.

Repeats splits for cross-validators n times with different randomization
in each repetition.

Parameters
----------
cv : callable
    Cross-validator class.

n_repeats : int, default=10
    Number of times cross-validator needs to be repeated.

random_state : int, RandomState instance or None, default=None
    Passes `random_state` to the arbitrary repeating cross validator.
    Pass an int for reproducible output across multiple function calls.
    See :term:`Glossary <random_state>`.

**cvargs : additional params
    Constructor parameters for cv. Must not contain random_state
    and shuffle.
r5   
   N)	n_repeatsr   c                   ^ [        U[        R                  5      (       d  [        S5      eUS::  a  [        S5      e[	        U4S jS 5       5      (       a  [        S5      eXl        X l        X0l        TU l        g )Nz/Number of repetitions must be of Integral type.r   z-Number of repetitions must be greater than 0.c              3   ,   >#    U  H	  oT;   v   M     g 7frl   rD   ).0keycvargss     rA   	<genexpr>+_RepeatedSplits.__init__.<locals>.<genexpr>E  s     D(Cf}(Cs   r   r   z0cvargs must not contain random_state or shuffle.)	r   r   r   r{   anycvrd  r   ri  )r>   rn  rd  r   ri  s       `rA   r   _RepeatedSplits.__init__>  sj    )W%5%566NOO>LMMD(CDDDOPP"(rC   c              #      #    U R                   n[        U R                  5      n[        U5       HB  nU R                  " SUSS.U R
                  D6nUR                  XU5       H
  u  pX4v   M     MD     g7f)am  Generates indices to split data into training and test set.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Training data, where `n_samples` is the number of samples
    and `n_features` is the number of features.

y : array-like of shape (n_samples,)
    The target variable for supervised learning problems.

groups : array-like of shape (n_samples,), default=None
    Group labels for the samples used while splitting the dataset into
    train/test set.

Yields
------
train : ndarray
    The training set indices for that split.

test : ndarray
    The testing set indices for that split.
Trl  NrD   )rd  r   r   r}   rn  ri  r<   )
r>   r?   r@   r5   rd  r   r)  rn  rZ   rY   s
             rA   r<   _RepeatedSplits.splitM  sm     0 NN	 !2!23#CGc4G4;;GB+-88A&+A'!-- ,B $s   A4A6c                     [        U R                  5      nU R                  " SUSS.U R                  D6nUR	                  XU5      U R
                  -  $ )a<  Returns the number of splitting iterations in the cross-validator.

Parameters
----------
X : object
    Always ignored, exists for compatibility.
    ``np.zeros(n_samples)`` may be used as a placeholder.

y : object
    Always ignored, exists for compatibility.
    ``np.zeros(n_samples)`` may be used as a placeholder.

groups : array-like of shape (n_samples,), default=None
    Group labels for the samples used while splitting the dataset into
    train/test set.

Returns
-------
n_splits : int
    Returns the number of splitting iterations in the cross-validator.
Trl  rD   )r   r   rn  ri  ri   rd  )r>   r?   r@   r5   r   rn  s         rA   ri   _RepeatedSplits.get_n_splitsm  sJ    , !!2!23WWC#tCt{{CqV,t~~==rC   c                     [        U 5      $ rl   rm   ro   s    rA   rp   _RepeatedSplits.__repr__  rr   rC   )rn  ri  rd  r   rE   rs   )r9   rF   rG   rH   rI   r   rt   (_RepeatedSplits__metadata_request__splitr   r<   ri   rp   rJ   rD   rC   rA   rb  rb     s5    6 "*+;+B+B C(* .@>4!rC   rb  c                   8   ^  \ rS rSrSrSSSS.U 4S jjrSrU =r$ )	r'   i  a3  Repeated K-Fold cross validator.

Repeats K-Fold `n_repeats` times with different randomization in each repetition.

Read more in the :ref:`User Guide <repeated_k_fold>`.

Parameters
----------
n_splits : int, default=5
    Number of folds. Must be at least 2.

n_repeats : int, default=10
    Number of times cross-validator needs to be repeated.

random_state : int, RandomState instance or None, default=None
    Controls the randomness of each repeated cross-validation instance.
    Pass an int for reproducible output across multiple function calls.
    See :term:`Glossary <random_state>`.

Examples
--------
>>> import numpy as np
>>> from sklearn.model_selection import RepeatedKFold
>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
>>> y = np.array([0, 0, 1, 1])
>>> rkf = RepeatedKFold(n_splits=2, n_repeats=2, random_state=2652124)
>>> rkf.get_n_splits(X, y)
4
>>> print(rkf)
RepeatedKFold(n_repeats=2, n_splits=2, random_state=2652124)
>>> for i, (train_index, test_index) in enumerate(rkf.split(X)):
...     print(f"Fold {i}:")
...     print(f"  Train: index={train_index}")
...     print(f"  Test:  index={test_index}")
...
Fold 0:
  Train: index=[0 1]
  Test:  index=[2 3]
Fold 1:
  Train: index=[2 3]
  Test:  index=[0 1]
Fold 2:
  Train: index=[1 2]
  Test:  index=[0 3]
Fold 3:
  Train: index=[0 3]
  Test:  index=[1 2]

Notes
-----
Randomized CV splitters may return different results for each call of
split. You can make the results identical by setting `random_state`
to an integer.

See Also
--------
RepeatedStratifiedKFold : Repeats Stratified K-Fold n times.
r   rc  Nr   rd  r   c                ,   > [         TU ]  [        X#US9  g N)rd  r   r   )r;   r   r!   r>   r   rd  r   r8   s       rA   r   RepeatedKFold.__init__  s    YH 	 	
rC   rD   r9   rF   rG   rH   rI   r   rJ   rK   rL   s   @rA   r'   r'     s    9v $% 
 
rC   r'   c                   H   ^  \ rS rSrSrSSSS.U 4S jjrS
U 4S jjrS	rU =r$ )r(   i  a  Repeated class-wise stratified K-Fold cross validator.

Repeats Stratified K-Fold n times with different randomization in each
repetition.

Read more in the :ref:`User Guide <repeated_k_fold>`.

.. note::

    Stratification on the class label solves an engineering problem rather
    than a statistical one. See :ref:`stratification` for more details.

Parameters
----------
n_splits : int, default=5
    Number of folds. Must be at least 2.

n_repeats : int, default=10
    Number of times cross-validator needs to be repeated.

random_state : int, RandomState instance or None, default=None
    Controls the generation of the random states for each repetition.
    Pass an int for reproducible output across multiple function calls.
    See :term:`Glossary <random_state>`.

Examples
--------
>>> import numpy as np
>>> from sklearn.model_selection import RepeatedStratifiedKFold
>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
>>> y = np.array([0, 0, 1, 1])
>>> rskf = RepeatedStratifiedKFold(n_splits=2, n_repeats=2,
...     random_state=36851234)
>>> rskf.get_n_splits(X, y)
4
>>> print(rskf)
RepeatedStratifiedKFold(n_repeats=2, n_splits=2, random_state=36851234)
>>> for i, (train_index, test_index) in enumerate(rskf.split(X, y)):
...     print(f"Fold {i}:")
...     print(f"  Train: index={train_index}")
...     print(f"  Test:  index={test_index}")
...
Fold 0:
  Train: index=[1 2]
  Test:  index=[0 3]
Fold 1:
  Train: index=[0 3]
  Test:  index=[1 2]
Fold 2:
  Train: index=[1 3]
  Test:  index=[0 2]
Fold 3:
  Train: index=[0 2]
  Test:  index=[1 3]

Notes
-----
Randomized CV splitters may return different results for each call of
split. You can make the results identical by setting `random_state`
to an integer.

See Also
--------
RepeatedKFold : Repeats K-Fold n times.
r   rc  Nrx  c                .   > [         TU ]  [        UUUS9  g rz  )r;   r   r+   r{  s       rA   r    RepeatedStratifiedKFold.__init__  s!    %	 	 	
rC   c                 8   > [        USSSS9n[        TU ]	  XUS9$ )r  r@   FNr   r4   )r   r;   r<   r=   s       rA   r<   RepeatedStratifiedKFold.split  s)    D cU$Gw}Q&}11rC   rD   rl   )	r9   rF   rG   rH   rI   r   r<   rJ   rK   rL   s   @rA   r(   r(     s'    @D $% 
 
#2 #2rC   r(   c                   p    \ rS rSrSrS\R                  0r SSSSS.S jjrSS jr	SS jr
SS	 jrS
 rSrg)BaseShuffleSpliti>  a  Base class for *ShuffleSplit.

Parameters
----------
n_splits : int, default=10
    Number of re-shuffling & splitting iterations.

test_size : float or int, default=None
    If float, should be between 0.0 and 1.0 and represent the proportion
    of the dataset to include in the test split. If int, represents the
    absolute number of test samples. If None, the value is set to the
    complement of the train size. If ``train_size`` is also None, it will
    be set to 0.1.

train_size : float or int, default=None
    If float, should be between 0.0 and 1.0 and represent the
    proportion of the dataset to include in the train split. If
    int, represents the absolute number of train samples. If None,
    the value is automatically set to the complement of the test size.

random_state : int, RandomState instance or None, default=None
    Controls the randomness of the training and testing indices produced.
    Pass an int for reproducible output across multiple function calls.
    See :term:`Glossary <random_state>`.
r5   Nr<  
train_sizer   c                B    Xl         X l        X0l        X@l        SU l        g )N皙?)r   r<  r  r   _default_test_size)r>   r   r<  r  r   s        rA   r   BaseShuffleSplit.__init___  s!     !"$("%rC   c              #   j   #    [        XU5      u  pnU R                  XU5       H
  u  pEXE4v   M     g7f)a  Generate indices to split data into training and test set.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Training data, where `n_samples` is the number of samples
    and `n_features` is the number of features.

y : array-like of shape (n_samples,)
    The target variable for supervised learning problems.

groups : array-like of shape (n_samples,), default=None
    Group labels for the samples used while splitting the dataset into
    train/test set.

Yields
------
train : ndarray
    The training set indices for that split.

test : ndarray
    The testing set indices for that split.

Notes
-----
Randomized CV splitters may return different results for each call of
split. You can make the results identical by setting `random_state`
to an integer.
N)r   _iter_indicesr>   r?   r@   r5   r   r   s         rA   r<   BaseShuffleSplit.splith  s8     < !v.f--aF;KE+ <s   13c              #     #    [        U5      n[        UU R                  U R                  U R                  S9u  pV[        U R                  5      n[        U R                  5       H$  nUR                  U5      n	U	SU n
XXe-    nX4v   M&     g7f)zGenerate (train, test) indicesdefault_test_sizeN)
r   _validate_shuffle_splitr<  r  r  r   r   r}   r   r   )r>   r?   r@   r5   r~   n_trainn_testr   r   r   ind_test	ind_trains               rA   r  BaseShuffleSplit._iter_indices  s      O	1NNOO"55	
 !!2!23t}}%A//)4K"7F+H#f.>@I%% &s   BBc                     U R                   $ r   r   rf   s       rA   ri   BaseShuffleSplit.get_n_splits  r   rC   c                     [        U 5      $ rl   rm   ro   s    rA   rp   BaseShuffleSplit.__repr__  rr   rC   )r  r   r   r<  r  rc  rE   rs   )r9   rF   rG   rH   rI   r   rt   *_BaseShuffleSplit__metadata_request__splitr   r<   r  ri   rp   rJ   rD   rC   rA   r  r  >  sF    < "*+;+B+B C &(,D& D&$*!rC   r  c                   >   ^  \ rS rSrSr SSSSS.U 4S jjjrSrU =r$ )r)   i  a  Random permutation cross-validator.

Yields indices to split data into training and test sets.

Note: contrary to other cross-validation strategies, random splits
do not guarantee that test sets across all folds will be mutually exclusive,
and might include overlapping samples. However, this is still very likely for
sizeable datasets.

Read more in the :ref:`User Guide <ShuffleSplit>`.

For visualisation of cross-validation behaviour and
comparison between common scikit-learn split methods
refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`

Parameters
----------
n_splits : int, default=10
    Number of re-shuffling & splitting iterations.

test_size : float or int, default=None
    If float, should be between 0.0 and 1.0 and represent the proportion
    of the dataset to include in the test split. If int, represents the
    absolute number of test samples. If None, the value is set to the
    complement of the train size. If ``train_size`` is also None, it will
    be set to 0.1.

train_size : float or int, default=None
    If float, should be between 0.0 and 1.0 and represent the
    proportion of the dataset to include in the train split. If
    int, represents the absolute number of train samples. If None,
    the value is automatically set to the complement of the test size.

random_state : int, RandomState instance or None, default=None
    Controls the randomness of the training and testing indices produced.
    Pass an int for reproducible output across multiple function calls.
    See :term:`Glossary <random_state>`.

Examples
--------
>>> import numpy as np
>>> from sklearn.model_selection import ShuffleSplit
>>> X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [3, 4], [5, 6]])
>>> y = np.array([1, 2, 1, 2, 1, 2])
>>> rs = ShuffleSplit(n_splits=5, test_size=.25, random_state=0)
>>> rs.get_n_splits(X)
5
>>> print(rs)
ShuffleSplit(n_splits=5, random_state=0, test_size=0.25, train_size=None)
>>> for i, (train_index, test_index) in enumerate(rs.split(X)):
...     print(f"Fold {i}:")
...     print(f"  Train: index={train_index}")
...     print(f"  Test:  index={test_index}")
Fold 0:
  Train: index=[1 3 0 4]
  Test:  index=[5 2]
Fold 1:
  Train: index=[4 0 2 5]
  Test:  index=[1 3]
Fold 2:
  Train: index=[1 2 4 0]
  Test:  index=[3 5]
Fold 3:
  Train: index=[3 4 1 0]
  Test:  index=[5 2]
Fold 4:
  Train: index=[3 5 1 0]
  Test:  index=[2 4]
>>> # Specify train and test size
>>> rs = ShuffleSplit(n_splits=5, train_size=0.5, test_size=.25,
...                   random_state=0)
>>> for i, (train_index, test_index) in enumerate(rs.split(X)):
...     print(f"Fold {i}:")
...     print(f"  Train: index={train_index}")
...     print(f"  Test:  index={test_index}")
Fold 0:
  Train: index=[1 3 0]
  Test:  index=[5 2]
Fold 1:
  Train: index=[4 0 2]
  Test:  index=[1 3]
Fold 2:
  Train: index=[1 2 4]
  Test:  index=[3 5]
Fold 3:
  Train: index=[3 4 1]
  Test:  index=[5 2]
Fold 4:
  Train: index=[3 5 1]
  Test:  index=[2 4]
Nr  c                4   > [         TU ]  UUUUS9  SU l        g Nr   r<  r  r   r  r;   r   r  r>   r   r<  r  r   r8   s        rA   r   ShuffleSplit.__init__  -     	!%	 	 	
 #&rC   r  r  r}  rL   s   @rA   r)   r)     s%    Zz 	&(,D	& 	&rC   r)   c                   Z   ^  \ rS rSrSr S	SSSS.U 4S jjjrU 4S jrS
U 4S jjrSrU =r	$ )r    i  a  Shuffle-Group(s)-Out cross-validation iterator.

Provides randomized train/test indices to split data according to a
third-party provided group. This group information can be used to encode
arbitrary domain specific stratifications of the samples as integers.

For instance the groups could be the year of collection of the samples
and thus allow for cross-validation against time-based splits.

The difference between :class:`LeavePGroupsOut` and ``GroupShuffleSplit`` is that
the former generates splits using all subsets of size ``p`` unique groups,
whereas ``GroupShuffleSplit`` generates a user-determined number of random
test splits, each with a user-determined fraction of unique groups.

For example, a less computationally intensive alternative to
``LeavePGroupsOut(p=10)`` would be
``GroupShuffleSplit(test_size=10, n_splits=100)``.

Contrary to other cross-validation strategies, the random splits
do not guarantee that test sets across all folds will be mutually exclusive,
and might include overlapping samples. However, this is still very likely for
sizeable datasets.

Note: The parameters ``test_size`` and ``train_size`` refer to groups, and
not to samples as in :class:`ShuffleSplit`.

Read more in the :ref:`User Guide <group_shuffle_split>`.

For visualisation of cross-validation behaviour and
comparison between common scikit-learn split methods
refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`

Parameters
----------
n_splits : int, default=5
    Number of re-shuffling & splitting iterations.

test_size : float, int, default=None
    If float, should be between 0.0 and 1.0 and represent the proportion
    of groups to include in the test split (rounded up). If int,
    represents the absolute number of test groups. If None, the value is
    set to the complement of the train size. If ``train_size`` is also None,
    it will be set to 0.2.

train_size : float or int, default=None
    If float, should be between 0.0 and 1.0 and represent the
    proportion of the groups to include in the train split. If
    int, represents the absolute number of train groups. If None,
    the value is automatically set to the complement of the test size.

random_state : int, RandomState instance or None, default=None
    Controls the randomness of the training and testing indices produced.
    Pass an int for reproducible output across multiple function calls.
    See :term:`Glossary <random_state>`.

Examples
--------
>>> import numpy as np
>>> from sklearn.model_selection import GroupShuffleSplit
>>> X = np.ones(shape=(8, 2))
>>> y = np.ones(shape=(8, 1))
>>> groups = np.array([1, 1, 2, 2, 2, 3, 3, 3])
>>> print(groups.shape)
(8,)
>>> gss = GroupShuffleSplit(n_splits=2, train_size=.7, random_state=42)
>>> gss.get_n_splits()
2
>>> print(gss)
GroupShuffleSplit(n_splits=2, random_state=42, test_size=None, train_size=0.7)
>>> for i, (train_index, test_index) in enumerate(gss.split(X, y, groups)):
...     print(f"Fold {i}:")
...     print(f"  Train: index={train_index}, group={groups[train_index]}")
...     print(f"  Test:  index={test_index}, group={groups[test_index]}")
Fold 0:
  Train: index=[2 3 4 5 6 7], group=[2 2 2 3 3 3]
  Test:  index=[0 1], group=[1 1]
Fold 1:
  Train: index=[0 1 5 6 7], group=[1 1 3 3 3]
  Test:  index=[2 3 4], group=[2 2 2]

See Also
--------
ShuffleSplit : Shuffles samples to create independent test/train sets.

LeavePGroupsOut : Train set leaves out all possible subsets of `p` groups.
Nr  c                4   > [         TU ]  UUUUS9  SU l        g )Nr  g?r  r  s        rA   r   GroupShuffleSplit.__init__v  r  rC   c              #   N  >#    Uc  [        S5      e[        USSS S9n[        R                  " USS9u  pE[        T
U ]  US9 H^  u  pg[        R                  " [        R                  " XV5      5      n[        R                  " [        R                  " XW5      5      n	X4v   M`     g 7f)Nr   r5   Fr   Tr   )r?   )r{   r   rT   r   r;   r  flatnonzeror   )r>   r?   r@   r5   classesgroup_indicesgroup_train
group_testr   r   r8   s             rA   r  GroupShuffleSplit._iter_indices  s     >IJJVEQUV!#6$!G',w'<w'<'G#K NN277=#FGE>>"''-"DED+ (Hs   B"B%c                 $   > [         TU ]  XU5      $ )a  Generate indices to split data into training and test set.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Training data, where `n_samples` is the number of samples
    and `n_features` is the number of features.

y : array-like of shape (n_samples,), default=None
    The target variable for supervised learning problems.

groups : array-like of shape (n_samples,)
    Group labels for the samples used while splitting the dataset into
    train/test set.

Yields
------
train : ndarray
    The training set indices for that split.

test : ndarray
    The testing set indices for that split.

Notes
-----
Randomized CV splitters may return different results for each call of
split. You can make the results identical by setting `random_state`
to an integer.
r   r=   s       rA   r<   GroupShuffleSplit.split  s    < w}Q6**rC   r  r   rE   
r9   rF   rG   rH   rI   r   r  r<   rJ   rK   rL   s   @rA   r    r      s4    Up 	&'+4	& 	&+ +rC   r    c                   X   ^  \ rS rSrSr S	SSSS.U 4S jjjrS
S jrS
U 4S jjrSrU =r	$ )r,   i  aD  Class-wise stratified ShuffleSplit cross-validator.

Provides train/test indices to split data in train/test sets.

This cross-validation object is a merge of :class:`StratifiedKFold` and
:class:`ShuffleSplit`, which returns stratified randomized folds. The folds
are made by preserving the percentage of samples for each class in `y` in a
binary or multiclass classification setting.

Note: like the :class:`ShuffleSplit` strategy, stratified random splits
do not guarantee that test sets across all folds will be mutually exclusive,
and might include overlapping samples. However, this is still very likely for
sizeable datasets.

Read more in the :ref:`User Guide <stratified_shuffle_split>`.

For visualisation of cross-validation behaviour and
comparison between common scikit-learn split methods
refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`

.. note::

    Stratification on the class label solves an engineering problem rather
    than a statistical one. See :ref:`stratification` for more details.

Parameters
----------
n_splits : int, default=10
    Number of re-shuffling & splitting iterations.

test_size : float or int, default=None
    If float, should be between 0.0 and 1.0 and represent the proportion
    of the dataset to include in the test split. If int, represents the
    absolute number of test samples. If None, the value is set to the
    complement of the train size. If ``train_size`` is also None, it will
    be set to 0.1.

train_size : float or int, default=None
    If float, should be between 0.0 and 1.0 and represent the
    proportion of the dataset to include in the train split. If
    int, represents the absolute number of train samples. If None,
    the value is automatically set to the complement of the test size.

random_state : int, RandomState instance or None, default=None
    Controls the randomness of the training and testing indices produced.
    Pass an int for reproducible output across multiple function calls.
    See :term:`Glossary <random_state>`.

Examples
--------
>>> import numpy as np
>>> from sklearn.model_selection import StratifiedShuffleSplit
>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4], [1, 2], [3, 4]])
>>> y = np.array([0, 0, 0, 1, 1, 1])
>>> sss = StratifiedShuffleSplit(n_splits=5, test_size=0.5, random_state=0)
>>> sss.get_n_splits(X, y)
5
>>> print(sss)
StratifiedShuffleSplit(n_splits=5, random_state=0, ...)
>>> for i, (train_index, test_index) in enumerate(sss.split(X, y)):
...     print(f"Fold {i}:")
...     print(f"  Train: index={train_index}")
...     print(f"  Test:  index={test_index}")
Fold 0:
  Train: index=[5 2 3]
  Test:  index=[4 1 0]
Fold 1:
  Train: index=[5 1 4]
  Test:  index=[0 2 3]
Fold 2:
  Train: index=[5 0 2]
  Test:  index=[4 3 1]
Fold 3:
  Train: index=[4 1 0]
  Test:  index=[2 3 5]
Fold 4:
  Train: index=[0 5 1]
  Test:  index=[3 4 2]
Nr  c                4   > [         TU ]  UUUUS9  SU l        g r  r  r  s        rA   r   StratifiedShuffleSplit.__init__	  r  rC   c           
   #     #    [        U5      n[        USSS S9n[        UU R                  U R                  U R
                  S9u  pV[        U5      u  px[        X'S9nUR                  S:X  aE  [        R                  " U V	s/ s H#  n	SR                  U	R                  S5      5      PM%     sn	5      n[        R                  " US	S
9u  pU
R                  S   n[        R                  " U5      n[        R                   " U5      S:  a  [#        S5      eX\:  a  [#        SX\4-  5      eXl:  a  [#        SXl4-  5      e[        R$                  " [        R&                  " USS9[        R(                  " U5      S S 5      n[+        U R,                  5      n[/        U R0                  5       H  n[3        XU5      nUU-
  n[3        UXo5      n/ n/ n[/        U5       Ha  nUR5                  UU   5      nUU   R7                  USS9nUR9                  US UU    5        UR9                  UUU   UU   UU   -    5        Mc     UR5                  U5      nUR5                  U5      nUU4v   M     g s  sn	f 7f)Nr@   Fr   r  )r   r    strTr   r   zThe least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.zLThe train_size = %d should be greater or equal to the number of classes = %dzKThe test_size = %d should be greater or equal to the number of classes = %dr  r  r   clip)mode)r   r   r  r<  r  r  r   r   ndimrT   r   joinastyper   shaper   r   r{   r<   r   cumsumr   r   r}   r   r   r   takeextend)r>   r?   r@   r5   r~   r  r  r   r   rowr  	y_indicesr   class_countsclass_indicesr   n_iclass_counts_remainingt_ir   r   r   r   perm_indices_class_is                           rA   r  $StratifiedShuffleSplit._iter_indices	  sb     O	cU$G1NNOO"55	
 a a'66Q; C##((3::e#45CDAYYq>MM!$	{{9-66,!##  69@8LM  69?8KL  JJy{3RYY|5LSb5Q
 !!2!23t}}%A $L3?C%1C%7"#$:FHCED9%!ool1o>'4Q'7'<'<[v'<'V$1(CF;<0Q#a&3q6/JK & OOE*E??4(D+) &C Ds   A>I+ *I&*GI+c                    > Ub2  [         R                  " SU R                  R                   3[        5        [        USSSS9n[        TU ]  XU5      $ )a  Generate indices to split data into training and test set.

Parameters
----------
X : array-like of shape (n_samples, n_features)
    Training data, where `n_samples` is the number of samples
    and `n_features` is the number of features.

    Note that providing ``y`` is sufficient to generate the splits and
    hence ``np.zeros(n_samples)`` may be used as a placeholder for
    ``X`` instead of actual training data.

y : array-like of shape (n_samples,) or (n_samples, n_labels)
    The target variable for supervised learning problems.
    Stratification is done based on the y labels.

groups : object
    Always ignored, exists for compatibility.

Yields
------
train : ndarray
    The training set indices for that split.

test : ndarray
    The testing set indices for that split.

Notes
-----
Randomized CV splitters may return different results for each call of
split. You can make the results identical by setting `random_state`
to an integer.
Nr3   r@   Fr   r  r=   s       rA   r<   StratifiedShuffleSplit.splitV	  r  rC   r  r  rl   r  rL   s   @rA   r,   r,     s6    Nb 	&(,D	& 	&HT(+ (+rC   r,   c                    Uc  Uc  Un[         R                  " U5      R                  R                  n[         R                  " U5      R                  R                  nUS:X  a  X:  d  US::  d  US:X  a&  US::  d  US:  a  [	        SR                  X5      5      eUS:X  a  X :  d  US::  d  US:X  a&  US::  d  US:  a  [	        SR                  X 5      5      eUb   US;  a  [	        SR                  U5      5      eUb   US;  a  [	        S	R                  U5      5      eUS:X  a*  US:X  a$  X!-   S:  a  [	        S
R                  X!-   5      5      eUS:X  a  [        X-  5      nOUS:X  a  [        U5      nUS:X  a  [        X -  5      nOUS:X  a  [        U5      nUc  U W-
  nOUc  U W-
  nWW-   U :  a  [	        SXv-   U 4-  5      e[        U5      [        U5      pgUS:X  a  [	        SR                  XU5      5      eXv4$ )zl
Validation helper to check if the train/test sizes are meaningful w.r.t. the
size of the data (n_samples).
r   r   r   rz   zqtest_size={0} should be either positive and smaller than the number of samples {1} or a float in the (0, 1) rangezrtrain_size={0} should be either positive and smaller than the number of samples {1} or a float in the (0, 1) range)r   r   z Invalid value for train_size: {}zInvalid value for test_size: {}zlThe sum of test_size and train_size = {}, should be in the (0, 1) range. Reduce test_size and/or train_size.z~The sum of train_size and test_size = %d, should be smaller than the number of samples %d. Reduce test_size and/or train_size.zWith n_samples={}, test_size={} and train_size={}, the resulting train set will be empty. Adjust any of the aforementioned parameters.)
rT   r   r^   r  r{   r|   r
   floatr   r   )r~   r<  r  r  test_size_typetrain_size_typer  r  s           rA   r  r  	  s4   
 Z/%	ZZ	*0055Njj,2277O#9#9Y!^#9>Y!^!6)7
 	
 	3J$;zQ3J!OzQ!6*8
 	
 /"C;BB:NOOz!A:AA)LMM#.C"7J<RUV<V::@&AW:X
 	

 i+,	3	y!#
./	C	
#f$		W$)# %-y9:
 	
 'lCKV!|))/	j)Q
 	
 ?rC   c                   >    \ rS rSrSrS rS
S jrS rS rS
S jr	S	r
g)r&   i	  a  Predefined split cross-validator.

Provides train/test indices to split data into train/test sets using a
predefined scheme specified by the user with the ``test_fold`` parameter.

Read more in the :ref:`User Guide <predefined_split>`.

.. versionadded:: 0.16

Parameters
----------
test_fold : array-like of shape (n_samples,)
    The entry ``test_fold[i]`` represents the index of the test set that
    sample ``i`` belongs to. It is possible to exclude sample ``i`` from
    any test set (i.e. include sample ``i`` in every training set) by
    setting ``test_fold[i]`` equal to -1.

Examples
--------
>>> import numpy as np
>>> from sklearn.model_selection import PredefinedSplit
>>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
>>> y = np.array([0, 0, 1, 1])
>>> test_fold = [0, 1, -1, 1]
>>> ps = PredefinedSplit(test_fold)
>>> ps.get_n_splits()
2
>>> print(ps)
PredefinedSplit(test_fold=array([ 0,  1, -1,  1]))
>>> for i, (train_index, test_index) in enumerate(ps.split()):
...     print(f"Fold {i}:")
...     print(f"  Train: index={train_index}")
...     print(f"  Test:  index={test_index}")
Fold 0:
  Train: index=[1 2 3]
  Test:  index=[0]
Fold 1:
  Train: index=[0 2]
  Test:  index=[1 3]
c                     [         R                  " U[        S9U l        [	        U R                  5      U l        [         R
                  " U R                  5      U l        U R                  U R                  S:g     U l        g )Nr]   r   )rT   r   r   	test_foldr   r   unique_folds)r>   r  s     rA   r   PredefinedSplit.__init__	  sW    )37%dnn5IIdnn5 --d.?.?2.EFrC   Nc                     Ub2  [         R                  " SU R                  R                   3[        5        U R                  5       $ )  Generate indices to split data into training and test set.

Parameters
----------
X : object
    Always ignored, exists for compatibility.

y : object
    Always ignored, exists for compatibility.

groups : object
    Always ignored, exists for compatibility.

Yields
------
train : ndarray
    The training set indices for that split.

test : ndarray
    The testing set indices for that split.
r3   rA  rf   s       rA   r<   PredefinedSplit.split	  s<    , MM5dnn6M6M5NO {{}rC   c              #      #    [         R                  " [        U R                  5      5      nU R	                  5        H%  nU[         R
                  " U5         nX   nX24v   M'     g7f)zGenerate indices to split data into training and test set.

Yields
------
train : ndarray
    The training set indices for that split.

test : ndarray
    The testing set indices for that split.
N)rT   rU   r   r  rV   rW   )r>   indrY   rZ   s       rA   rB  PredefinedSplit._split
  sT      iiDNN+,//1JbnnZ89KJ)) 2s   A$A&c              #      #    U R                    H]  n[        R                  " U R                  U:H  5      S   n[        R                  " [        U R                  5      [        S9nSX2'   Uv   M_     g7f)z3Generates boolean masks corresponding to test sets.r   r]   TN)r  rT   r   r  r`   r   ra   )r>   r   rY   rb   s       rA   rV    PredefinedSplit._iter_test_masks*
  sW     ""A$..A"56q9JT^^!4DAI$(I!O	 #s   A/A1c                 ,    [        U R                  5      $ r   )r   r  rf   s       rA   ri   PredefinedSplit.get_n_splits2
  s    & 4$$%%rC   )r  r  rs   )r9   rF   rG   rH   rI   r   r<   rB  rV   ri   rJ   rD   rC   rA   r&   r&   	  s"    'RG:*"&rC   r&   c                   2    \ rS rSrSrS rSS jrSS jrSrg)	_CVIterableWrapperiH
  z5Wrapper class for old style cv objects and iterables.c                 $    [        U5      U l        g rl   )listrn  )r>   rn  s     rA   r   _CVIterableWrapper.__init__K
  s    r(rC   Nc                 ,    [        U R                  5      $ r   )r   rn  rf   s       rA   ri   _CVIterableWrapper.get_n_splitsN
  s    & 477|rC   c              #   @   #    U R                    H
  u  pEXE4v   M     g7f)r  Nrn  r  s         rA   r<   _CVIterableWrapper.splitc
  s     ,  77KE+ #s   r  rs   )	r9   rF   rG   rH   rI   r   ri   r<   rJ   rD   rC   rA   r  r  H
  s    ?*rC   r  F)
classifierc                   U c  SOU n [        U [        R                  5      (       a.  U(       a  Ub  [        USS9S;   a  [	        U 5      $ [        U 5      $ [        U S5      (       a  [        U [        5      (       aC  [        U [        5      (       a  [        U [        5      (       a  [        SU -  5      e[        U 5      $ U $ )a  Input checker utility for building a cross-validator.

Parameters
----------
cv : int, cross-validation generator, iterable or None, default=5
    Determines the cross-validation splitting strategy.
    Possible inputs for cv are:
    - None, to use the default 5-fold cross validation,
    - integer, to specify the number of folds.
    - :term:`CV splitter`,
    - An iterable that generates (train, test) splits as arrays of indices.

    For integer/None inputs, if classifier is True and ``y`` is either
    binary or multiclass, :class:`StratifiedKFold` is used. In all other
    cases, :class:`KFold` is used.

    Refer :ref:`User Guide <cross_validation>` for the various
    cross-validation strategies that can be used here.

    .. versionchanged:: 0.22
        ``cv`` default value changed from 3-fold to 5-fold.

y : array-like, default=None
    The target variable for supervised learning problems.

classifier : bool, default=False
    Whether the task is a classification task, in which case
    stratified KFold will be used.

Returns
-------
checked_cv : a cross-validator instance.
    The return value is a cross-validator which generates the train/test
    splits via the ``split`` method.

Examples
--------
>>> from sklearn.model_selection import check_cv
>>> check_cv(cv=5, y=None, classifier=False)
KFold(...)
>>> check_cv(cv=5, y=[1, 1, 0, 0, 0, 0], classifier=True)
StratifiedKFold(...)
r   r@   )r   r   r<   ziExpected cv as an integer, cross-validation object (from sklearn.model_selection) or an iterable. Got %s.)r   r   r   r   r+   r!   hasattrr  r   r{   r  )rn  r@   r  s      rA   r-   r-   }
  s    X jbB"g&&''c26NN"2&&92w:b##6#6"h'':b#+>+>*,./ 
 ""%%IrC   rz   neither)closedleftr   booleanz
array-like)r<  r  r   r   stratifyT)prefer_skip_nested_validationc                   ^^ [        U5      nUS:X  a  [        S5      e[        U6 n[        US   5      n[	        XpUSS9u  pUSL a>  Ub  [        S5      e[
        R                  " U5      m[
        R                  " XU	-   5      mO6Ub  [        n
O[        n
U
" XUS9n[        UR                  US   US95      u  mm[        US   TT5      u  mm[        [        R                  " UU4S	 jU 5       5      5      $ )
aC  Split arrays or matrices into random train and test subsets.

Quick utility that wraps input validation,
``next(ShuffleSplit().split(X, y))``, and application to input data
into a single call for splitting (and optionally subsampling) data into a
one-liner.

Read more in the :ref:`User Guide <cross_validation>`.

Parameters
----------
*arrays : sequence of indexables with same length / shape[0]
    Allowed inputs are lists, numpy arrays, scipy-sparse
    matrices or pandas dataframes.

test_size : float or int, default=None
    If float, should be between 0.0 and 1.0 and represent the proportion
    of the dataset to include in the test split. If int, represents the
    absolute number of test samples. If None, the value is set to the
    complement of the train size. If ``train_size`` is also None, it will
    be set to 0.25.

train_size : float or int, default=None
    If float, should be between 0.0 and 1.0 and represent the
    proportion of the dataset to include in the train split. If
    int, represents the absolute number of train samples. If None,
    the value is automatically set to the complement of the test size.

random_state : int, RandomState instance or None, default=None
    Controls the shuffling applied to the data before applying the split.
    Pass an int for reproducible output across multiple function calls.
    See :term:`Glossary <random_state>`.

shuffle : bool, default=True
    Whether or not to shuffle the data before splitting. If shuffle=False
    then stratify must be None.

stratify : array-like, default=None
    If not None, data is split in a stratified fashion, using this as
    the class labels.
    Read more in the :ref:`User Guide <stratification>`.

Returns
-------
splitting : list, length=2 * len(arrays)
    List containing train-test split of inputs.

    .. versionadded:: 0.16
        If the input is sparse, the output will be a
        ``scipy.sparse.csr_matrix``. Else, output type is the same as the
        input type.

Examples
--------
>>> import numpy as np
>>> from sklearn.model_selection import train_test_split
>>> X, y = np.arange(10).reshape((5, 2)), range(5)
>>> X
array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])
>>> list(y)
[0, 1, 2, 3, 4]

>>> X_train, X_test, y_train, y_test = train_test_split(
...     X, y, test_size=0.33, random_state=42)
...
>>> X_train
array([[4, 5],
       [0, 1],
       [6, 7]])
>>> y_train
[2, 0, 3]
>>> X_test
array([[2, 3],
       [8, 9]])
>>> y_test
[1, 4]

>>> train_test_split(y, shuffle=False)
[[0, 1, 2], [3, 4]]

>>> from sklearn import datasets
>>> iris = datasets.load_iris(as_frame=True)
>>> X, y = iris['data'], iris['target']
>>> X.head()
    sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                5.1               3.5                1.4               0.2
1                4.9               3.0                1.4               0.2
2                4.7               3.2                1.3               0.2
3                4.6               3.1                1.5               0.2
4                5.0               3.6                1.4               0.2
>>> y.head()
0    0
1    0
2    0
3    0
4    0
...

>>> X_train, X_test, y_train, y_test = train_test_split(
... X, y, test_size=0.33, random_state=42)
...
>>> X_train.head()
    sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
96                 5.7               2.9                4.2               1.3
105                7.6               3.0                6.6               2.1
66                 5.6               3.0                4.5               1.5
0                  5.1               3.5                1.4               0.2
122                7.7               2.8                6.7               2.0
>>> y_train.head()
96     1
105    2
66     1
0      0
122    2
...
>>> X_test.head()
    sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
73                 6.1               2.8                4.7               1.2
18                 5.7               3.8                1.7               0.3
118                7.7               2.6                6.9               2.3
78                 6.0               2.9                4.5               1.5
76                 6.8               2.8                4.8               1.4
>>> y_test.head()
73     1
18     0
118    2
78     1
76     1
...
r   z$At least one array required as inputg      ?r  Fz@Stratified train/test split is not implemented for shuffle=Falser  )r?   r@   c              3   T   >#    U  H  n[        UT5      [        UT5      4v   M     g 7frl   )r   )rg  ar   r   s     rA   rj  #train_test_split.<locals>.<genexpr>  s'      
IOA^Au%~a'>?s   %()r   r{   r   r   r  rT   rU   r,   r)   nextr<   r   r  r   from_iterable)r<  r  r   r   r  arraysn_arraysr~   r  r  CVClassrn  r   r   s               @@rA   r.   r.   
  s   @ 6{H1}?@@FVAY'I-jDOG %R  		'"yyF"23 ,G"GvU288fQi88<=t0E4HKE4 
IO
 	
 rC   __test__c                    [         R                  " 5       n[         R                  " SSSS9  [        5       nUnSSUS-  -   S-  -   n[	        [        U R                  5       5      5       H  u  nu  p[        U	[        5      (       a  U< S[        U	5      < 3n
OU< SU" U	5      < 3n
[        U
5      S	:  a  U
S
S S-   U
SS
 -   n
US:  aK  U[        U
5      -   S:  d  SU
;   a  UR                  U5        [        U5      nOUR                  S5        US-  nUR                  U
5        U[        U
5      -  nM     [         R                  " S0 UD6  SR                  U5      nSR                  S UR                  S5       5       5      nU$ )a>  Pretty print the dictionary 'params'

Parameters
----------
params : dict
    The dictionary to pretty print

offset : int, default=0
    The offset in characters to add at the begin of each line.

printer : callable, default=repr
    The function to convert entries to strings, typically
    the builtin str or repr

r   @   r   )	precision	threshold	edgeitemsz,
rz   r  =i  Ni,  z...ir   K   
z,  c              3   B   #    U  H  oR                  S 5      v   M     g7f)r  N)rstrip)rg  r[  s     rA   rj  _pprint.<locals>.<genexpr>  s     ?->hhsmm->s   rD   )rT   get_printoptionsset_printoptionsr  r   sorteditemsr   r  r  r   appendr  r<   )paramsoffsetprinteroptionsparams_listthis_line_lengthline_sepr   r  v	this_reprliness               rA   _pprintr    sm   " !!#G!rQ?&KFaK3..Hvflln56	6Aa $%c!f-I $%gaj1Iy>C!$3%/)DE2BBIq5#i.0B6$):K""8,#&x= ""4( A% 9%C	N*' 7* "'"GGK EII?U[[->??ELrC   c                 2   U R                   n[        UR                  SUR                  5      n[        U5      nU[        R                  L a  / nOl[        UR                  R                  5        Vs/ s H=  nUR                  S:w  d  M  UR                  UR                  :w  d  M1  UR                  PM?     sn5      nU R                   R                  n[        5       nU H  n[        R                  " S[        5         [        R                   " SS9 n	[        XS 5      n
U
c-  [#        U S5      (       a  U R$                  R'                  US 5      n
S S S 5        [)        W	5      (       a8  U	S   R*                  [        L a"   [        R,                  R/                  S5        M  [        R,                  R/                  S5        W
Xx'   M     U< S[1        U[)        U5      S	9< S
3$ s  snf ! , (       d  f       N= f! [        R,                  R/                  S5        f = f)Ndeprecated_originalr>   alwaysT)recordri  r   ()r  ))r8   getattrr   r   objectr  
parametersvaluesnamer  VAR_KEYWORDr9   dictr6   simplefilterFutureWarningcatch_warningsr  ri  getr   categoryfilterspopr  )r>   clsinitinit_signatureargsr   
class_namer  rh  wvalues              rA   rn   rn     s   
..C3<<!6EDt_Nv (2299;;A66V# ()!--(? ;
 ((JVF
 	h6		$((540=WT8%<%< KKOOC6E 6 1vv!A$--=8  #H  #! $ "76#j/#JKK5 65   #s6   0GG$G<G5=G$.G5$
G2	.G55!Hc                     [        U SS5      n[        U SS5      n[        U[        R                  5      =(       d    U(       + $ )Nr   Tr   r   )r   r   r   r   )rn  r   r   s      rA   _yields_constant_splitsr6    s:     b)T*G2~q1LlG$4$45DWDrC   rl   )r   N)MrI   r   r6   abcr   r   collectionsr   collections.abcr   inspectr   	itertoolsr   r	   mathr
   r   numpyrT   scipy.specialr   utilsr   r   r   r   utils._array_apir   r   r   utils._param_validationr   r   r   utils.extmathr   utils.metadata_routingr   utils.multiclassr   utils.validationr   r   r   __all__r0   rN   r   r#   r%   r   r!   r   r+   r*   r:  r"   r$   rb  r'   r(   r  r)   r    r,   r  r&   r  r-   r   r.   setattrreprr  rn   r6  rD   rC   rA   <module>rI     s     ' # $  )     
 L K - 7 - F F*2 2D1, 1@!+w @!FL*,> L^Y>(*< Y>x[#w [|]$j ]@W+$j W+tJ+j J+ZP.
 PfSj Slt+*,> t+nB+)+= B+Jh!(G h!V?
,o ?
Dn26 n2bt!)W t!nf&+-= f&RO++-= O+dN+- N+bHVy&( y&x2+ 2j@ @F  ZAi8W%%q$v>
 ZAi8W%%q$v>

 ((;!4( #'!( r%$rp *e , d /d%LPErC   