
    -i>                     r   S SK r S SKrS SKrS SKrS SKrS SKrS SKJr  S SKJ	r	  S SK
Jr  S SKJr  S SKJrJrJrJrJrJrJr  S SKJrJr  S SKJr  S S	KJrJr  S S
KJr  S SK r!SSK"J#r#  SSK$J%r%  SSK&J'r'J(r(J)r)J*r*J+r+  SSK,J-r-  SSK.J/r/  S/r0Sr1Sr2Sr3Sr4\\\5\54      r6\\\5\54      r7S\5S\5S\54S jr8 SSS\5S\\5   S\\9   S\4S jjr: STS\;S \<S!\5S\4S" jjr= SUS!\5S\\5   S\;S \<4S# jjr> " S$ S%\?5      r@  SUS!\5S&\\5   S\\5   S\;S \<S\4S' jjrA  SUS(\5S)\\;\54   S\\5   S\;S \<4
S* jjrB  SUS+\;S\\5   S\;S \<S\\5\4   4
S, jjrC  SUS+\;S\\5   S\;S \<S\74
S- jjrD  SUS+\;S\\5   S\;S \<S\64
S. jjrES/\6S\;4S0 jrF   SVS!\5S\\5   S1\5S2\5S3\GS4\\5   S5\\5   S6\\\;\;4      S7\5S\;S \<S8\\   4S9 jjrHSSSS:.S!\5S;\IS\\5   S<\IS3\\G   S=\\5   S>\\5   S6\\\;\;4      S7\5S\;S \<S1\5S8\\   4S? jjrJS@ rKSA rL\+" \5S/\(" \'SSSBSC9\*" SD15      /\(" \'SSSBSC9S/\5\R                  S/\5\NS/\I/\I/\I\*" SE15      /\(" \'SSSBSC9/\(" \)SFSSGSC9/\*" 1 SHk5      /\GS/SI.SJSK9 SSSDSSSLSJSMSESSSESSN.S(\\5   S)\\5\;4   S+\\;   S\\\5\R                  4      SO\\\5\4      SP\ISQ\IS<\\5\I4   S\;S \<S1\5S8\\   4SR jjj5       rOg)W    N)closingwraps)join)TemporaryDirectory)AnyCallableDictListOptionalTupleUnion)	HTTPErrorURLError)urlparse)Requesturlopen)warn   )Bunch)check_pandas_support)IntegralIntervalReal
StrOptionsvalidate_params   )get_data_home)load_arff_from_gzip_filefetch_openmlzAhttps://api.openml.org/api/v1/json/data/list/data_name/{}/limit/2z*https://api.openml.org/api/v1/json/data/{}z3https://api.openml.org/api/v1/json/data/features/{}z4https://api.openml.org/api/v1/json/data/qualities/{}openml_path	data_homereturnc                 J    [         R                  R                  USU S-   5      $ )Nz
openml.orgz.gz)ospathr   )r!   r"   s     K/var/www/html/venv/lib/python3.13/site-packages/sklearn/datasets/_openml.py_get_local_pathr(   -   s    77<<	<u1DEE    no_retry_exceptionc                    ^ ^^ UUU 4S jnU$ )a  If the first call to the decorated function fails, the local cached
file is removed, and the function is called again. If ``data_home`` is
``None``, then the function is called once. We can provide a specific
exception to not retry on using `no_retry_exception` parameter.
c                 8   >^  [        T 5      UU UU4S j5       nU$ )Nc                  T  > Tc  T" U 0 UD6$  T" U 0 UD6$ ! [          a    e [         a}  nTb  [        UT5      (       a  e [        S[        5        [        TT5      n[        R                  R                  U5      (       a  [        R                  " U5        T" U 0 UD6s S nA$ S nAff = f)Nz!Invalid cache, redownloading file)
r   	Exception
isinstancer   RuntimeWarningr(   r%   r&   existsunlink)argskwexc
local_pathr"   fr*   r!   s       r'   wrapper;_retry_with_clean_cache.<locals>.decorator.<locals>.wrapper=   s     $~"~%&$~"~%  	&%1j+7 7 8.I,[)D
77>>*--IIj)$~"~%	&s    B'A2B"B'"B'r   )r7   r8   r"   r*   r!   s   ` r'   	decorator*_retry_with_clean_cache.<locals>.decorator<   s     	q	& 
	&$ r)    )r!   r"   r*   r:   s   ``` r'   _retry_with_clean_cacher=   1   s    , r)            ?	n_retriesdelayurlc                    ^ ^^ UU U4S jnU$ )a=  If the function call results in a network error, call the function again
up to ``n_retries`` times with a ``delay`` between each call. If the error
has a 412 status code, don't call the function again as this is a specific
OpenML error.
The url parameter is used to give more information to the user about the
error.
c                 8   >^  [        T 5      UU UU4S j5       nU$ )Nc                  R  > Tn  T" U 0 UD6$ ! [         [        4 a  n[        U[        5      (       a  UR                  S:X  a  e US:X  a  e [        ST S35        [        U[        5      (       a  UR                  5         US-  n[        R                  " T5         S nAOS nAff = fM  )N  r   z+A network error occurred while downloading z. Retrying...r   )	r   TimeoutErrorr/   r   coder   closetimesleep)r3   kwargsretry_countererA   r7   r@   rB   s       r'   r8   ;_retry_on_network_error.<locals>.decorator.<locals>.wrappera   s    %M&d-f-- ,/ &!!Y//AFFcM$)EcU-X "!Y//	!Q&MJJu%%& s    B$A<BB$r   )r7   r8   rA   r@   rB   s   ` r'   r:   *_retry_on_network_error.<locals>.decorator`   s     	q	& 
	&* r)   r<   )r@   rA   rB   r:   s   ``` r'   _retry_on_network_errorrQ   U   s    2 r)   c                    S n[        U 5      nUR                  SS5        UcJ  [        X#UR                  5      " [        5      " U5      nU" U5      (       a  [
        R                  " USS9$ U$ [        U 5      R                  R                  S5      n[        Xq5      n[        R                  R                  U5      u  p[        R                  R                  U5      (       d  [        R                  " U	SS	9   [        U	S
9 n[!        [        X#UR                  5      " [        5      " U5      5       nU" U5      (       a  ["        nO[
        R                  nU" [        R                  R%                  X5      S5       n[&        R(                  " Xm5        SSS5        SSS5        [&        R*                  " WR,                  U5        SSS5        [
        R                  " US5      $ ! , (       d  f       NV= f! , (       d  f       N_= f! , (       d  f       NG= f! [.         a<    [        R                  R                  U5      (       a  [        R0                  " U5        e f = f)a  
Returns a resource from OpenML.org. Caches it to data_home if required.

Parameters
----------
url : str
    OpenML URL that will be downloaded and cached locally. The path component
    of the URL is used to replicate the tree structure as sub-folders of the local
    cache folder.

data_home : str
    Directory to which the files will be cached. If None, no caching will
    be applied.

n_retries : int, default=3
    Number of retries when HTTP errors are encountered. Error with status
    code 412 won't be retried as they represent OpenML generic errors.

delay : float, default=1.0
    Number of seconds between retries.

Returns
-------
result : stream
    A stream to the OpenML resource.
c                 H    U R                  5       R                  SS5      S:H  $ )NzContent-Encoding gzip)infoget)_fsrcs    r'   is_gzip_encoded)_open_openml_url.<locals>.is_gzip_encoded   s!    zz| 2B76AAr)   zAccept-encodingrU   Nrb)fileobjmode/T)exist_ok)dirwb)r   
add_headerrQ   full_urlr   rU   GzipFiler   r&   lstripr(   r%   splitr1   makedirsr   r   openr   shutilcopyfileobjmovenamer.   r2   )rB   r"   r@   rA   rY   reqfsrcr!   r6   dir_name	file_nametmpdiropenerfdsts                 r'   _open_openml_urlrt   |   s   <B #,CNN$f-&yFwOPST4  ==D993-$$++C0K 8J''--
3H77>>*%%
Ht,	
 $1V+IcllKGT &t,,!%!%V ?F$**46 G DIIz2 2* ==T** GF  21  	ww~~j))		*%	s\   8	H 0H1AG4<G#G4)HH #
G1-G44
H	>H
HH H AIc                       \ rS rSrSrSrg)OpenMLError   zDHTTP 412 is a specific OpenML error code, indicating a generic errorr<   N)__name__
__module____qualname____firstlineno____doc____static_attributes__r<   r)   r'   rv   rv      s    Nr)   rv   error_messagec                    ^ ^^^ [        T TS9UUUU 4S j5       n U" 5       $ ! [         a  nUR                  S:w  a  Ue SnAOSnAff = f[        U5      e)a2  
Loads json data from the openml api.

Parameters
----------
url : str
    The URL to load from. Should be an official OpenML endpoint.

error_message : str or None
    The error message to raise if an acceptable OpenML error is thrown
    (acceptable error is, e.g., data id not found. Other errors, like 404's
    will throw the native error message).

data_home : str or None
    Location to cache the response. None if no cache is required.

n_retries : int, default=3
    Number of retries when HTTP errors are encountered. Error with status
    code 412 won't be retried as they represent OpenML generic errors.

delay : float, default=1.0
    Number of seconds between retries.

Returns
-------
json_data : json
    the json result from the OpenML server if the call was successful.
    An exception otherwise.
r"   c            	         > [        [        TTTTS95       n [        R                  " U R	                  5       R                  S5      5      sS S S 5        $ ! , (       d  f       g = f)Nr@   rA   zutf-8)r   rt   jsonloadsreaddecode)responser"   rA   r@   rB   s    r'   
_load_json5_get_json_content_from_openml_api.<locals>._load_json   sI    S)yN
::hmmo44W=>
 
 
s   3A
A"rF   N)r=   r   rH   rv   )rB   r~   r"   r@   rA   r   errors   ` ```  r'   !_get_json_content_from_openml_apir      sa    J SI6? 7?|  ::K  m
$$s   # 
A	AA	rl   versionc           	         US:X  a  [         R                  U 5      S-   nSR                  U 5      n[        UUUUUS9nUS   S   n[        U5      S:  aD  US   S	   =pS
U  SU	 S3n
U H!  nU
SUS	    SUS    S3-  n
U
SUS    S3-  n
M#     [	        U
5        US   $ [         S-   R                  X5      n [        USUUUS9nUS   S   S   $ ! [
         a&    US-  nSR                  X5      n[        UUUUUS9n N:f = f)a  
Utilizes the openml dataset listing api to find a dataset by
name/version
OpenML api function:
https://www.openml.org/api_docs#!/data/get_data_list_data_name_data_name

Parameters
----------
name : str
    name of the dataset

version : int or str
    If version is an integer, the exact name/version will be obtained from
    OpenML. If version is a string (value: "active") it will take the first
    version from OpenML that is annotated as active. Any other string
    values except "active" are treated as integer.

data_home : str or None
    Location to cache the response. None if no cache is required.

n_retries : int, default=3
    Number of retries when HTTP errors are encountered. Error with status
    code 412 won't be retried as they represent OpenML generic errors.

delay : float, default=1.0
    Number of seconds between retries.

Returns
-------
first_dataset : json
    json representation of the first dataset object that adhired to the
    search criteria

activez/status/active/zNo active dataset {} found.r"   r@   rA   datadatasetr   r   r   z:Multiple active versions of the dataset matching the name zC exist. Versions may be fundamentally different, returning version z. Available versions:
z
- version z
, status: status
z2  url: https://www.openml.org/search?type=data&id=didz/data_version/{}N)r~   r"   r@   rA   z/status/deactivatedz%Dataset {} with version {} not found.)_SEARCH_NAMEformatr   lenr   rv   )rl   r   r"   r@   rA   rB   	error_msg	json_dataresfirst_versionwarning_msgrs               r'   _get_data_info_by_namer     s   R (!!$'*;;188>	5
	 	*s8a<&)!fY&77M6 %%2O 4((  AiL>AhK=PRSSH5
RTU 
 1v ,,
4
4T
CC
5
	, VY'**  

 	$$;BB4Q	5
	
s   6C -C>=C>data_idc                 r    [         R                  U 5      nSR                  U 5      n[        UUUUUS9nUS   $ )N"Dataset with data_id {} not found.r   data_set_description)
_DATA_INFOr   r   r   r"   r@   rA   rB   r~   r   s          r'   _get_data_description_by_idr   g  sK     

G
$C8??HM1I +,,r)   c                 x    [         R                  U 5      nSR                  U 5      n[        UUUUUS9nUS   S   $ )Nr   r   data_featuresfeature)_DATA_FEATURESr   r   r   s          r'   _get_data_featuresr   z  sO     


(C8??HM1I _%i00r)   c                     [         R                  U 5      nSR                  U 5      n[        UUUUUS9nUR                  S0 5      R                  S/ 5      $ )Nr   r   data_qualitiesquality)_DATA_QUALITIESr   r   rW   r   s          r'   _get_data_qualitiesr     s^     
 
 
)C8??HM1I ==)2.229bAAr)   r   c                     SnU  Vs0 s H  o"S   US   _M     nn[        [        UR                  SU5      5      5      $ s  snf )a"  Get the number of samples from data qualities.

Parameters
----------
data_qualities : list of dict
    Used to retrieve the number of instances (samples) in the dataset.

Returns
-------
n_samples : int
    The number of samples in the dataset or -1 if data qualities are
    unavailable.
rl   valueNumberOfInstances)intfloatrW   )r   default_n_samplesd	qualitiess       r'   _get_num_samplesr     sL     0>?16AgJ&I?uY]]#68IJKLL @s   ?parseroutput_typeopenml_columns_infofeature_names_to_selecttarget_names_to_selectshapemd5_checksumread_csv_kwargsc           
      B  ^ [        XXS9m[        T5         [        R                  " 5       n[	        U4S jS5       H  nUR                  U5        M     UR                  5       nSSS5        WU:w  a  [        SU  SU SU S35      eS	 n[        UUUUUUU=(       d    0 S
9n U" XXU5      u  nnnnUUUU4$ ! , (       d  f       NZ= f! [         aJ  nUS:w  a  e SSK
Jn  [        UU5      (       d  e US   R                  SS9  U" XXU5      u  nnnn SnANfSnAff = f)a  Load the ARFF data associated with the OpenML URL.

In addition of loading the data, this function will also check the
integrity of the downloaded file from OpenML using MD5 checksum.

Parameters
----------
url : str
    The URL of the ARFF file on OpenML.

data_home : str
    The location where to cache the data.

parser : {"liac-arff", "pandas"}
    The parser used to parse the ARFF file.

output_type : {"numpy", "pandas", "sparse"}
    The type of the arrays that will be returned. The possibilities are:

    - `"numpy"`: both `X` and `y` will be NumPy arrays;
    - `"sparse"`: `X` will be sparse matrix and `y` will be a NumPy array;
    - `"pandas"`: `X` will be a pandas DataFrame and `y` will be either a
      pandas Series or DataFrame.

openml_columns_info : dict
    The information provided by OpenML regarding the columns of the ARFF
    file.

feature_names_to_select : list of str
    The list of the features to be selected.

target_names_to_select : list of str
    The list of the target variables to be selected.

shape : tuple or None
    With `parser="liac-arff"`, when using a generator to load the data,
    one needs to provide the shape of the data beforehand.

md5_checksum : str
    The MD5 checksum provided by OpenML to check the data integrity.

n_retries : int, default=3
    The number of times to retry downloading the data if it fails.

delay : float, default=1.0
    The delay between two consecutive downloads in seconds.

read_csv_kwargs : dict, default=None
    Keyword arguments to pass to `pandas.read_csv` when using the pandas parser.
    It allows to overwrite the default options.

    .. versionadded:: 1.3

Returns
-------
X : {ndarray, sparse matrix, dataframe}
    The data matrix.

y : {ndarray, dataframe, series}
    The target.

frame : dataframe or None
    A dataframe containing both `X` and `y`. `None` if
    `output_array_type != "pandas"`.

categories : list of str or None
    The names of the features that are categorical. `None` if
    `output_array_type == "pandas"`.
r   c                  &   > T R                  S5      $ )Ni   )r   )	gzip_files   r'   <lambda>%_load_arff_response.<locals>.<lambda>  s    ).."6r)   r)   Nzmd5 checksum of local file for z' does not match description: expected: z	 but got zP. Downloaded file could have been modified / corrupted, clean cache and retry...c                 z    [        XX#S9n[        U5         [        U40 UD6sS S S 5        $ ! , (       d  f       g = f)Nr   )rt   r   r   )rB   r"   r@   rA   arff_paramsr   s         r'   _open_url_and_load_gzip_file9_load_arff_response.<locals>._open_url_and_load_gzip_file  s/    $SyV	Y+IEE  s   ,
:)r   r   r   r   r   r   r   pandasr   ParserErrorr   ')	quotechar)rt   r   hashlibmd5iterupdate	hexdigest
ValueErrordictr.   pandas.errorsr   r/   )rB   r"   r   r   r   r   r   r   r   r@   rA   r   r   chunkactual_md5_checksumr   r   Xyframe
categoriesr5   r   r   s                          @r'   _load_arff_responser     sX   f !9RI		kkm6<EJJu =!mmo	 
 l*-cU 3%i0C/D E
 	
F
 / 75'-2K
">Ik#
1eZ( a
""c 
	@  
X-#{++
 	%&---<">Ik#
1eZ
s%   AB9$C
 9
C

DA DD)r@   rA   r   sparseas_framedata_columnstarget_columnsc       
         v   U Vs0 s H  oS   U_M
     nnU(       a  SnOU(       a  SnOSn[        X5        U H1  nUU   n[        US   5      nUS:  d  M  [        SUS    SU S	35      e   S
nUS:X  a  SSKJn  Un[        XU5      " [        5      " U UUUUUUUUU	U
US9u  nnnn[        UUUUUUS9$ s  snf )a  Download ARFF data, load it to a specific container and create to Bunch.

This function has a mechanism to retry/cache/clean the data.

Parameters
----------
url : str
    The URL of the ARFF file on OpenML.

sparse : bool
    Whether the dataset is expected to use the sparse ARFF format.

data_home : str
    The location where to cache the data.

as_frame : bool
    Whether or not to return the data into a pandas DataFrame.

openml_columns_info : list of dict
    The information regarding the columns provided by OpenML for the
    ARFF dataset. The information is stored as a list of dictionaries.

data_columns : list of str
    The list of the features to be selected.

target_columns : list of str
    The list of the target variables to be selected.

shape : tuple or None
    With `parser="liac-arff"`, when using a generator to load the data,
    one needs to provide the shape of the data beforehand.

md5_checksum : str
    The MD5 checksum provided by OpenML to check the data integrity.

n_retries : int, default=3
    Number of retries when HTTP errors are encountered. Error with status
    code 412 won't be retried as they represent OpenML generic errors.

delay : float, default=1.0
    Number of seconds between retries.

parser : {"liac-arff", "pandas"}
    The parser used to parse the ARFF file.

read_csv_kwargs : dict, default=None
    Keyword arguments to pass to `pandas.read_csv` when using the pandas parser.
    It allows to overwrite the default options.

    .. versionadded:: 1.3

Returns
-------
data : :class:`~sklearn.utils.Bunch`
    Dictionary-like object, with the following attributes.

    X : {ndarray, sparse matrix, dataframe}
        The data matrix.
    y : {ndarray, dataframe, series}
        The target.
    frame : dataframe or None
        A dataframe containing both `X` and `y`. `None` if
        `output_array_type != "pandas"`.
    categories : list of str or None
        The names of the features that are categorical. `None` if
        `output_array_type == "pandas"`.
rl   r   r   numpynumber_of_missing_valuesr   zTarget column 'z' has zE missing values. Missing values are not supported for target columns.Nr   )
r   r   r   r   r   r   r   r@   rA   r   )r   targetr   r   feature_namestarget_names)_verify_target_data_typer   r   r   r   r=   r   r   )rB   r   r"   r   r   r   r   r   r   r@   rA   r   r   r   features_dictr   rl   column_infon_missing_valuesr*   r   r   r   r   r   s                            r'   _download_data_to_bunchr   A  s-   h >QQ=P'V_g-=PMQ	 ];#D){+EFGa!+f"5!6f=M<N OO O 	   	.(5* 	) ,-!'Aq%" "# [ Rs   B6c                    [        U[        5      (       d  [        S[        U5      -  5      e[	        5       nU H  nX0;  a  [        SU S35      eX   S   S:X  a   UR                  [        R                  5        OUR                  [        5        X   S   S:X  a  [        SU S	35        X   S
   S:X  d  M~  [        SU S35        M     [        U5      S:  a  [        S5      eg )Nz%target_column should be list, got: %szCould not find target_column='r   	data_typenumeric	is_ignoretrueztarget_column='z' has flag is_ignore.is_row_identifierz' has flag is_row_identifier.r   zgCan only handle homogeneous multi-target datasets, i.e., all targets are either numeric or categorical.)r/   listr   typesetKeyErroraddnpfloat64objectr   r   )r   r   found_typestarget_columns       r'   r   r     s     nd++@4CWWXX%K'-;M?!LMM'4	AOOBJJ'OOF# '4>?=/1FGH'(;<F?=/1NOP ( ;!
 	
 r)   c                     / nU  H8  nUS   U;  d  M  US   S:w  d  M  US   S:w  d  M$  UR                  US   5        M:     U$ )Nrl   r   r   r   )append)features_listr   valid_data_column_namesr   s       r'   _valid_data_column_namesr    sU    
 ! FO>1$.+,6#**76?; ! #"r)   left)closedr   autog        neither>   r  r   	liac-arff)rl   r   r   r"   r   cache
return_X_yr   r@   rA   r   r   T)prefer_skip_nested_validationdefault-targetF)r   r   r"   r   r  r	  r   r@   rA   r   r   r   r  r	  c                   USL a  SnO[        US9n[        [        U5      S5      nU b>  U R                  5       n Ub  [	        SR                  X 5      5      e[        XX8U	S9nUS   nO/Ub!  US:w  a  [	        S	R                  X!5      5      eO[	        S
5      e[        X#5      nUS   S:w  a%  [        SR                  US   US   US   5      5        SU;   a  [        SR                  US   5      5        SU;   a  [        SR                  US   5      5        US   R                  5       S:H  nUS:X  a  U(       + OUnU
S:X  a  U(       a  SOSnOU
nUS:X  a   [        S5        U(       a(  U(       a  [	        S5      eUS:X  a  [	        SU
< S35      e[        X#5      nU(       d-  U H'  nS US!   US"   4;   a  M  US#   S$:X  d  M  [	        S%5      e   US&:X  a!  U Vs/ s H  nUS'   S :X  d  M  US   PM     nnO![        U[        5      (       a  U/nOUc  / nOUn[        UU5      nU(       d"  [        X#5      n[        U5      [!        U5      4nOSnUS   n[#        UUU[%        U5      UUUUUS(   UU	UUS)9nU(       a  UR&                  UR(                  4$ S*R                  UR+                  S+5      5      nUR-                  UUS,R                  U5      S-9  U$ ! [         a"  nU(       a  SnOSU
< S3n[        U5      UeSnAff = fs  snf ).at!  Fetch dataset from openml by name or dataset id.

Datasets are uniquely identified by either an integer ID or by a
combination of name and version (i.e. there might be multiple
versions of the 'iris' dataset). Please give either name or data_id
(not both). In case a name is given, a version can also be
provided.

Read more in the :ref:`User Guide <openml>`.

.. versionadded:: 0.20

.. note:: EXPERIMENTAL

    The API is experimental (particularly the return value structure),
    and might have small backward-incompatible changes without notice
    or warning in future releases.

Parameters
----------
name : str, default=None
    String identifier of the dataset. Note that OpenML can have multiple
    datasets with the same name.

version : int or 'active', default='active'
    Version of the dataset. Can only be provided if also ``name`` is given.
    If 'active' the oldest version that's still active is used. Since
    there may be more than one active version of a dataset, and those
    versions may fundamentally be different from one another, setting an
    exact version is highly recommended.

data_id : int, default=None
    OpenML ID of the dataset. The most specific way of retrieving a
    dataset. If data_id is not given, name (and potential version) are
    used to obtain a dataset.

data_home : str or path-like, default=None
    Specify another download and cache folder for the data sets. By default
    all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

target_column : str, list or None, default='default-target'
    Specify the column name in the data to use as target. If
    'default-target', the standard target column a stored on the server
    is used. If ``None``, all columns are returned as data and the
    target is ``None``. If list (of strings), all columns with these names
    are returned as multi-target (Note: not all scikit-learn classifiers
    can handle all types of multi-output combinations).

cache : bool, default=True
    Whether to cache the downloaded datasets into `data_home`.

return_X_y : bool, default=False
    If True, returns ``(data, target)`` instead of a Bunch object. See
    below for more information about the `data` and `target` objects.

as_frame : bool or 'auto', default='auto'
    If True, the data is a pandas DataFrame including columns with
    appropriate dtypes (numeric, string or categorical). The target is
    a pandas DataFrame or Series depending on the number of target_columns.
    The Bunch will contain a ``frame`` attribute with the target and the
    data. If ``return_X_y`` is True, then ``(data, target)`` will be pandas
    DataFrames or Series as describe above.

    If `as_frame` is 'auto', the data and target will be converted to
    DataFrame or Series as if `as_frame` is set to True, unless the dataset
    is stored in sparse format.

    If `as_frame` is False, the data and target will be NumPy arrays and
    the `data` will only contain numerical values when `parser="liac-arff"`
    where the categories are provided in the attribute `categories` of the
    `Bunch` instance. When `parser="pandas"`, no ordinal encoding is made.

    .. versionchanged:: 0.24
       The default value of `as_frame` changed from `False` to `'auto'`
       in 0.24.

n_retries : int, default=3
    Number of retries when HTTP errors or network timeouts are encountered.
    Error with status code 412 won't be retried as they represent OpenML
    generic errors.

delay : float, default=1.0
    Number of seconds between retries.

parser : {"auto", "pandas", "liac-arff"}, default="auto"
    Parser used to load the ARFF file. Two parsers are implemented:

    - `"pandas"`: this is the most efficient parser. However, it requires
      pandas to be installed and can only open dense datasets.
    - `"liac-arff"`: this is a pure Python ARFF parser that is much less
      memory- and CPU-efficient. It deals with sparse ARFF datasets.

    If `"auto"`, the parser is chosen automatically such that `"liac-arff"`
    is selected for sparse ARFF datasets, otherwise `"pandas"` is selected.

    .. versionadded:: 1.2
    .. versionchanged:: 1.4
       The default value of `parser` changes from `"liac-arff"` to
       `"auto"`.

read_csv_kwargs : dict, default=None
    Keyword arguments passed to :func:`pandas.read_csv` when loading the data
    from a ARFF file and using the pandas parser. It can allow to
    overwrite some default parameters.

    .. versionadded:: 1.3

Returns
-------
data : :class:`~sklearn.utils.Bunch`
    Dictionary-like object, with the following attributes.

    data : np.array, scipy.sparse.csr_matrix of floats, or pandas DataFrame
        The feature matrix. Categorical features are encoded as ordinals.
    target : np.array, pandas Series or DataFrame
        The regression target or classification labels, if applicable.
        Dtype is float if numeric, and object if categorical. If
        ``as_frame`` is True, ``target`` is a pandas object.
    DESCR : str
        The full description of the dataset.
    feature_names : list
        The names of the dataset columns.
    target_names: list
        The names of the target columns.

    .. versionadded:: 0.22

    categories : dict or None
        Maps each categorical feature name to a list of values, such
        that the value encoded as i is ith in the list. If ``as_frame``
        is True, this is None.
    details : dict
        More metadata from OpenML.
    frame : pandas DataFrame
        Only present when `as_frame=True`. DataFrame with ``data`` and
        ``target``.

(data, target) : tuple if ``return_X_y`` is True

    .. note:: EXPERIMENTAL

        This interface is **experimental** and subsequent releases may
        change attributes without notice (although there should only be
        minor changes to ``data`` and ``target``).

    Missing values in the 'data' are represented as NaN's. Missing values
    in 'target' are represented as NaN's (numerical target) or None
    (categorical target).

Notes
-----
The `"pandas"` and `"liac-arff"` parsers can lead to different data types
in the output. The notable differences are the following:

- The `"liac-arff"` parser always encodes categorical features as `str` objects.
  To the contrary, the `"pandas"` parser instead infers the type while
  reading and numerical categories will be casted into integers whenever
  possible.
- The `"liac-arff"` parser uses float64 to encode numerical features
  tagged as 'REAL' and 'NUMERICAL' in the metadata. The `"pandas"`
  parser instead infers if these numerical features corresponds
  to integers and uses panda's Integer extension dtype.
- In particular, classification datasets with integer categories are
  typically loaded as such `(0, 1, ...)` with the `"pandas"` parser while
  `"liac-arff"` will force the use of string encoded class labels such as
  `"0"`, `"1"` and so on.
- The `"pandas"` parser will not strip single quotes - i.e. `'` - from
  string columns. For instance, a string `'my string'` will be kept as is
  while the `"liac-arff"` parser will strip the single quotes. For
  categorical columns, the single quotes are stripped from the values.

In addition, when `as_frame=False` is used, the `"liac-arff"` parser
returns ordinally encoded data where the categories are provided in the
attribute `categories` of the `Bunch` instance. Instead, `"pandas"` returns
a NumPy array were the categories are not encoded.

Examples
--------
>>> from sklearn.datasets import fetch_openml
>>> adult = fetch_openml("adult", version=2)  # doctest: +SKIP
>>> adult.frame.info()  # doctest: +SKIP
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48842 entries, 0 to 48841
Data columns (total 15 columns):
 #   Column          Non-Null Count  Dtype
---  ------          --------------  -----
 0   age             48842 non-null  int64
 1   workclass       46043 non-null  category
 2   fnlwgt          48842 non-null  int64
 3   education       48842 non-null  category
 4   education-num   48842 non-null  int64
 5   marital-status  48842 non-null  category
 6   occupation      46033 non-null  category
 7   relationship    48842 non-null  category
 8   race            48842 non-null  category
 9   sex             48842 non-null  category
 10  capital-gain    48842 non-null  int64
 11  capital-loss    48842 non-null  int64
 12  hours-per-week  48842 non-null  int64
 13  native-country  47985 non-null  category
 14  class           48842 non-null  category
dtypes: category(9), int64(6)
memory usage: 2.7 MB
FNr   openmlzfDataset data_id={} and name={} passed, but you can only specify a numeric data_id or a name, not both.r   r   r   zlDataset data_id={} and version={} passed, but you can only specify a numeric data_id or a version, not both.zFNeither name nor data_id are provided. Please provide name or data_id.r   zVersion {} of dataset {} is inactive, meaning that issues have been found in the dataset. Try using a newer version from this URL: {}r   rl   rB   r   zMOpenML registered a problem with the dataset. It might be unusable. Error: {}warningzIOpenML raised a warning on the dataset. It might be unusable. Warning: {}r   sparse_arffr  r  r   z`fetch_openml`zReturning pandas objects requires pandas to be installed. Alternatively, explicitly set `as_frame=False` and `parser='liac-arff'`.zUsing `parser=zf` with dense data requires pandas to be installed. Alternatively, explicitly set `parser='liac-arff'`.zhSparse ARFF datasets cannot be loaded with as_frame=True. Use as_frame=False or as_frame='auto' instead.z2Sparse ARFF datasets cannot be loaded with parser=z2. Use parser='liac-arff' or parser='auto' instead.r   r   r   r   stringzOSTRING attributes are not supported for array representation. Try as_frame=Truer  	is_targetr   )
r   r   r   r   r   r   r@   rA   r   r   z{}

Downloaded from openml.org.descriptionzhttps://www.openml.org/d/{})DESCRdetailsrB   )r   r   strlowerr   r   r   r   r   r   ImportErrorr   r/   r  r   r   r   r   boolr   r   popr   )rl   r   r   r"   r   r  r	  r   r@   rA   r   r   	data_infodata_descriptionreturn_sparseparser_r5   err_msgr   r   r   r   r   r   rB   bunchr  s                              r'   r    r      s   \ ~	!I6	Y2	  zz|w- 
 +9
	 E"		hw0   T
 	
 37F!X-!6 + ( '	
 """"(&)9')B"C	
 $$$$*F+;I+F$G	

 %X.446-GM$,$6= HH!.+H(	0 !12 A  hDVJ OC C  'w:M$G'+.8K0LMM{#x/ > 	 % (( )
({#v- GFO( 	 

 
M3	'	''		 '+M>JL  -W@ 0#m2DD 5
!C#h)%!%n5'E  zz5<<''5<<]+K 
LL )009   LG  	0,  %VJ /U U  g&C/	0V
s$   K L*	L
LK<<L)N)r>   r?   rT   )r>   r?   )r>   r?   N)PrU   r   r   r%   ri   rJ   
contextlibr   	functoolsr   os.pathr   tempfiler   typingr   r	   r
   r   r   r   r   urllib.errorr   r   urllib.parser   urllib.requestr   r   warningsr   r   r   utilsr   utils._optional_dependenciesr   utils._param_validationr   r   r   r   r   rT   r   _arff_parserr   __all__r   r   r   r   r  OpenmlQualitiesTypeOpenmlFeaturesTyper(   r.   r=   r   r   rQ   rt   r   rv   r   r   r   r   r   r   r   r   r  r   r   r  PathLiker   r    r<   r)   r'   <module>r1     s       	      ' D D D , ! +    ?   2
R9
FH4S>* $sCx.) F F F F /3!!}! !+! 	!J 8:$$$$14$$P LOI+	I+!#I+36I+CHI+X	* 	 5%	5%C=5% }5% 	5%
 5% 
5%x ^+
^+38_^+ }^+ 	^+
 ^+H 	--}- - 	-
 
#s(^-, 	11}1 1 	1
 1. 	BB}B B 	B
 B,M%8 MS M> &*E#	E#}E# E# 	E#
 E# "#YE# !IE# E#s(O$E# E# E# E# d^E#f &*H	HH }H
 H dH s)H IH E#s(O$H H H H H d^HV
6#  dXq$v>
H:@VWXq$v>E2;;-tT*f:vh/0xD@A4d9=>67
 !$<  #'#( B  (!370@!'&*B
3-B 38_B c]	B
 c2;;./0B E#t),-B B B CIB B B B d^B'&Br)   