
    -i6                        S r SSKrSSKrSSKrSSKJr  SSKJrJr  SSK	J
r
Jr  SSKrSSKrSSKJrJr  SSKJr  SS	KJrJrJr  S
SKJr  S
SKJrJrJrJr  \" SSSS9r\" SSSS9r \RB                  " \"5      r#\" \" 1 Sk5      S/\$\RJ                  S/S/S/S/S/S/S/\" \S
SSS9/\" \SSSS9/S.
SS9SSSSSSSSSS S.
S! j5       r& S$S" jr'S# r(g)%zKDDCUP 99 dataset.

A classic dataset for anomaly detection.

The dataset page is available from UCI Machine Learning Repository

https://archive.ics.uci.edu/ml/machine-learning-databases/kddcup99-mld/kddcup.data.gz

    N)GzipFile)IntegralReal)existsjoin   )Bunchcheck_random_state)shuffle)Interval
StrOptionsvalidate_params   )get_data_home)RemoteFileMetadata_convert_data_dataframe_fetch_remote
load_descrkddcup99_dataz.https://ndownloader.figshare.com/files/5976045@3b6c942aa0356c0ca35b7b595a26c89d343652c9db428893e7494f837b274292)filenameurlchecksumkddcup99_10_dataz.https://ndownloader.figshare.com/files/5976042@8045aca0d84e70e622d1148d7df782496f6333bf6eb979a1b0837c42a9fd9561>   SASFhttpsmtpbooleanrandom_stateleft)closedg        neither)
subset	data_homer   r!   	percent10download_if_missing
return_X_yas_frame	n_retriesdelayT)prefer_skip_nested_validationF         ?c        
   	      :   [        US9n[        UUUUU	S9n
U
R                  nU
R                  nU
R                  nU
R
                  nU S:X  a  US:H  n[        R                  " U5      nXSS24   nX   nUUSS24   nUU   nUR                  S   n[        U5      nUR                  SUS5      nUU   nUU   n[        R                  UU4   n[        R                  UU4   nU S:X  d  U S	:X  d  U S
:X  Ga  USS2S4   S:H  n[        R                  XSS24   XSS24   4   nUSS USS -   nX   n[        R                  " USS2S4   S-   R                  [        SS95      USS2S4'   [        R                  " USS2S4   S-   R                  [        SS95      USS2S4'   [        R                  " USS2S4   S-   R                  [        SS95      USS2S4'   U S	:X  aM  USS2S4   S:H  nX   nX   n[        R                  USS2S4   USS2S4   USS2S4   4   nUS   US   US   /nU S
:X  aM  USS2S4   S:H  nX   nX   n[        R                  USS2S4   USS2S4   USS2S4   4   nUS   US   US   /nU S:X  aE  [        R                  USS2S4   USS2S4   USS2S4   USS2S4   4   nUS   US   US   US   /nU(       a  [!        XUS9u  p[#        S5      nSnU(       a  [%        SXX5      u  npU(       a  X4$ ['        UUUUUUS9$ )a  Load the kddcup99 dataset (classification).

Download it if necessary.

=================   ====================================
Classes                                               23
Samples total                                    4898431
Dimensionality                                        41
Features            discrete (int) or continuous (float)
=================   ====================================

Read more in the :ref:`User Guide <kddcup99_dataset>`.

.. versionadded:: 0.18

Parameters
----------
subset : {'SA', 'SF', 'http', 'smtp'}, default=None
    To return the corresponding classical subsets of kddcup 99.
    If None, return the entire kddcup 99 dataset.

data_home : str or path-like, default=None
    Specify another download and cache folder for the datasets. By default
    all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

    .. versionadded:: 0.19

shuffle : bool, default=False
    Whether to shuffle dataset.

random_state : int, RandomState instance or None, default=None
    Determines random number generation for dataset shuffling and for
    selection of abnormal samples if `subset='SA'`. Pass an int for
    reproducible output across multiple function calls.
    See :term:`Glossary <random_state>`.

percent10 : bool, default=True
    Whether to load only 10 percent of the data.

download_if_missing : bool, default=True
    If False, raise an OSError if the data is not locally available
    instead of trying to download the data from the source site.

return_X_y : bool, default=False
    If True, returns ``(data, target)`` instead of a Bunch object. See
    below for more information about the `data` and `target` object.

    .. versionadded:: 0.20

as_frame : bool, default=False
    If `True`, returns a pandas Dataframe for the ``data`` and ``target``
    objects in the `Bunch` returned object; `Bunch` return object will also
    have a ``frame`` member.

    .. versionadded:: 0.24

n_retries : int, default=3
    Number of retries when HTTP errors are encountered.

    .. versionadded:: 1.5

delay : float, default=1.0
    Number of seconds between retries.

    .. versionadded:: 1.5

Returns
-------
data : :class:`~sklearn.utils.Bunch`
    Dictionary-like object, with the following attributes.

    data : {ndarray, dataframe} of shape (494021, 41)
        The data matrix to learn. If `as_frame=True`, `data` will be a
        pandas DataFrame.
    target : {ndarray, series} of shape (494021,)
        The regression target for each sample. If `as_frame=True`, `target`
        will be a pandas Series.
    frame : dataframe of shape (494021, 42)
        Only present when `as_frame=True`. Contains `data` and `target`.
    DESCR : str
        The full description of the dataset.
    feature_names : list
        The names of the dataset columns
    target_names: list
        The names of the target columns

(data, target) : tuple if ``return_X_y`` is True
    A tuple of two ndarray. The first containing a 2D array of
    shape (n_samples, n_features) with each row representing one
    sample and each column representing the features. The second
    ndarray of shape (n_samples,) containing the target samples.

    .. versionadded:: 0.20
r&   )r&   r'   r(   r+   r,   r   s   normal.Nr   i1  r   r   r      r      g?F)copy      r   s   https   smtp)r!   zkddcup99.rstfetch_kddcup99)datatargetframetarget_namesfeature_namesDESCR)r   _fetch_brute_kddcup99r8   r9   r<   r;   nplogical_notshaper
   randintr_c_logastypefloatshuffle_methodr   r   r	   )r%   r&   r   r!   r'   r(   r)   r*   r+   r,   kddcup99r8   r9   r<   r;   stnormal_samplesnormal_targetsabnormal_samplesabnormal_targetsn_samples_abnormalrfdescrr:   s                            M/var/www/html/venv/lib/python3.13/site-packages/sklearn/datasets/_kddcup99.pyr7   r7   6   s   t 	2I$/H ==D__F**M((L~j NN1d1:!!9-33A6),7  $6=+A.+A.uu^%556~'778~6)Vv-=BK1uuTSbS&\423</0%cr*]23-??VVT!Q$Z#-55e%5HIQT
VVT!Q$Z#-55e%5HIQT
VVT!Q$Z#-55e%5HIQT
VQT
g%A7DYF55adT!Q$Zad;<D*1-}Q/?qAQRMVQT
g%A7DYF55adT!Q$Zad;<D*1-}Q/?qAQRMT>55adT!Q$ZadT!Q$ZGHDa a a a 	M %dN'FE5dM
t |!#     c                    [        U S9n SnU(       a  [        U SU-   5      n[        nO[        U SU-   5      n[        n[        US5      n[        US5      n	[	        U5      n
/ S[
        4PSPS	PS
PS[
        4PS[
        4PS[
        4PS[
        4PS[
        4PS[
        4PS[
        4PS[
        4PS[
        4PS[
        4PS[
        4PS[
        4PS[
        4PS[
        4PS[
        4PS[
        4PS[
        4PS[
        4PS[
        4PS[
        4PS[        4PS [        4PS![        4PS"[        4PS#[        4PS$[        4PS%[        4PS&[
        4PS'[
        4PS([        4PS)[        4PS*[        4PS+[        4PS,[        4PS-[        4PS.[        4PS/[        4PS0PnU Vs/ s H  oS1   PM	     nnUS2   nUS3S2 nU
(       a/   [        R                  " U5      n[        R                  " U	5      nGOU(       Ga  [        U5        [        R                  S6UR                  -  5        [        XvX4S79  [         R"                  " U5      n[        R%                  S85        [        XgR&                  5      n[)        US9S:9n/ nUR+                  5        HC  nUR-                  5       nUR/                  UR1                  S;S<5      R3                  S=5      5        ME     UR5                  5         [        R%                  S>5        [6        R8                  " U5        [         R:                  " U[<        S?9n[?        S@5       H%  nUS3S32U4   RA                  UU   5      US3S32U4'   M'     US3S32S3S224   nUS3S32S24   n[        RB                  " UUS1SA9  [        RB                  " UU	S1SA9  O[        SB5      e[E        UUUU/SC9$ s  snf ! [         a  n[        S4U S535      UeS3nAff = f)Da  Load the kddcup99 dataset, downloading it if necessary.

Parameters
----------
data_home : str, default=None
    Specify another download and cache folder for the datasets. By default
    all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

download_if_missing : bool, default=True
    If False, raise an OSError if the data is not locally available
    instead of trying to download the data from the source site.

percent10 : bool, default=True
    Whether to load only 10 percent of the data.

n_retries : int, default=3
    Number of retries when HTTP errors are encountered.

delay : float, default=1.0
    Number of seconds between retries.

Returns
-------
dataset : :class:`~sklearn.utils.Bunch`
    Dictionary-like object, with the following attributes.

    data : ndarray of shape (494021, 41)
        Each row corresponds to the 41 features in the dataset.
    target : ndarray of shape (494021,)
        Each value corresponds to one of the 21 attack types or to the
        label 'normal.'.
    feature_names : list
        The names of the dataset columns
    target_names: list
        The names of the target columns
    DESCR : str
        Description of the kddcup99 dataset.

r1   z-py3kddcup99_10rI   samplestargetsduration)protocol_typeS4)serviceS11)flagS6	src_bytes	dst_byteslandwrong_fragmenturgenthotnum_failed_logins	logged_innum_compromised
root_shellsu_attemptednum_rootnum_file_creations
num_shellsnum_access_filesnum_outbound_cmdsis_host_loginis_guest_logincount	srv_countserror_ratesrv_serror_ratererror_ratesrv_rerror_ratesame_srv_ratediff_srv_ratesrv_diff_host_ratedst_host_countdst_host_srv_countdst_host_same_srv_ratedst_host_diff_srv_ratedst_host_same_src_port_ratedst_host_srv_diff_host_ratedst_host_serror_ratedst_host_srv_serror_ratedst_host_rerror_ratedst_host_srv_rerror_rate)labelsS16r   Nz7The cache for fetch_kddcup99 is invalid, please delete z! and run the fetch_kddcup99 againzDownloading %s)dirnamer+   r,   zextracting archiverQ   )r   mode
 ,zextraction done)dtype*   )compressz1Data not found and `download_if_missing` is False)r8   r9   r<   r;   )#r   r   ARCHIVE_10_PERCENTARCHIVEr   intrG   joblibload	ExceptionOSError_mkdirploggerinfor   r   r?   r   debugr   r   	readlinesdecodeappendreplacesplitcloseosremoveasarrayobjectrangerF   dumpr	   )r&   r(   r'   r+   r,   
dir_suffix
kddcup_dirarchivesamples_pathtargets_path	availabledtccolumn_namesr;   r<   XyeDTarchive_pathfile_Xylinejs                            rS   r>   r>   
  s   V 	2IJ)]Z%?@
$)Z*%<=

I.L
I.L|$I+
	S+
+
 	+
 		+

 
c+
 
c+
 
+
 
3+
 
3+
 
+
 
c"+
 
c+
 
C +
 
s+
 
+
  
S!+
" 
s##+
$ 
s%+
& 
S!'+
( 
c")+
* 
#++
, 
3-+
. 
#/+
0 
c1+
2 
3+
4 
E"5+
6 
7+
8 
E"9+
: 
% ;+
< 
% =+
> 
u%?+
@ 
3A+
B 
s#C+
D 
"5)E+
F 
"5)G+
H 
'.I+
J 
'.K+
L 
 'M+
N 
$U+O+
P 
 'Q+
R 
$U+S+
T 	U+
BZ #%%"QaD"L%#L "%M	L)AL)A 

$w{{23gYTXXb\)*J(8(89,S9OO%D;;=DIIdll4,22378 & 	&'
		,ZZ&)rA!Q$xr!u-Bq!tH  q#2#vJq"uI
 	A|a0A|a0IJJ#"^	 [ &  	I,?A 	s   N2,N7 7
OOOc                      [         R                  " U 5        g! [         a)  nUR                  [        R                  :w  a  e  SnAgSnAff = f)z_Ensure directory d exists (like mkdir -p on Unix)
No guarantee that the directory is writable.
N)r   makedirsr   errnoEEXIST)dr   s     rS   r   r     s:    
A 77ell" #s    
AAA)NTTr.   r/   ))__doc__r   loggingr   gzipr   numbersr   r   os.pathr   r   r   numpyr?   utilsr	   r
   r   rH   utils._param_validationr   r   r   r   r   _baser   r   r   r   r   r   	getLogger__name__r   strPathLiker7   r>   r    rT   rS   <module>r      s0     	  "     - - K K   8O (8O  
		8	$ :;TB2;;-;'([ ){ kKxD@A4d9=> #'" 
BBL RUXvrT   