
    -i$                     h   S r SSKrSSKJr  SSKJrJr  SSKJrJ	r	J
r
  SSKJr  SSKrSSKrSSKJr  SS	KJrJr  S
SKJr  S
SKJrJrJr  \" SSSS9r\" SSSS9rSr\R<                  " \5      r S\RB                  4S jr"S r#S r$\" \%\S/S/\" \S
SSS9/\" \SSSS9/S.SS 9SSS!S"S.S# j5       r&g)$a  
=============================
Species distribution dataset
=============================

This dataset represents the geographic distribution of species.
The dataset is provided by Phillips et. al. (2006).

The two species are:

 - `"Bradypus variegatus"
   <http://www.iucnredlist.org/details/3038/0>`_ ,
   the Brown-throated Sloth.

 - `"Microryzomys minutus"
   <http://www.iucnredlist.org/details/13408/0>`_ ,
   also known as the Forest Small Rice Rat, a rodent that lives in Peru,
   Colombia, Ecuador, Peru, and Venezuela.

References
----------

`"Maximum entropy modeling of species geographic distributions"
<http://rob.schapire.net/papers/ecolmod.pdf>`_ S. J. Phillips,
R. P. Anderson, R. E. Schapire - Ecological Modelling, 190:231-259, 2006.
    N)BytesIO)IntegralReal)PathLikemakedirsremove)exists   )Bunch)Intervalvalidate_params   )get_data_home)RemoteFileMetadata_fetch_remote_pkl_filepathzsamples.zipz.https://ndownloader.figshare.com/files/5976075@abb07ad284ac50d9e6d20f1c4211e0fd3c098f7f85955e89d321ee8efe37ac28)filenameurlchecksumzcoverages.zipz.https://ndownloader.figshare.com/files/5976078@4d862674d72e79d6cee77e63b98651ec7926043ba7d39dcb31329cf3f6073807zspecies_coverage.pkz   c                 
   [        U5       Vs/ s H  o0R                  5       PM     nnS n[        U Vs/ s H
  oe" U5      PM     sn5      n[        R                  " XS9n[        US   5      nUS:w  a  SXx'   U$ s  snf s  snf )zbLoad a coverage file from an open file object.

This will return a numpy array of the given dtype
c                 `    U R                  5       S   [        U R                  5       S   5      4$ )Nr   r   )splitfloat)ts    Z/var/www/html/venv/lib/python3.13/site-packages/sklearn/datasets/_species_distributions.py<lambda> _load_coverage.<locals>.<lambda>I   s"    AGGIaL%	!*=>    dtypes   NODATA_valuei)rangereadlinedictnploadtxtint)	Fheader_lengthr#   _header
make_tuplelineMnodatas	            r   _load_coverager2   C   s~    
 %*-$89$8qjjl$8F9>J7:d#78F


1"A()F	H :7s
   A;B c                     U R                  5       R                  S5      R                  5       R                  S5      n[        R
                  " U SSSS9nXR                  l        U$ )zLoad csv file.

Parameters
----------
F : file object
    CSV file open in byte mode.

Returns
-------
rec : np.ndarray
    record array representing the data
ascii,r   z	S22,f4,f4)skiprows	delimiterr#   )r%   decodestripr   r'   r(   r#   names)r*   r:   recs      r   	_load_csvr<   S   sP     JJL(..066s;E
**Qc
ECIIOJr!   c                 \   U R                   U R                  -   nXR                  U R                  -  -   nU R                  U R                  -   nX0R                  U R                  -  -   n[
        R                  " XU R                  5      n[
        R                  " X4U R                  5      nXV4$ )a  Construct the map grid from the batch object

Parameters
----------
batch : Batch object
    The object returned by :func:`fetch_species_distributions`

Returns
-------
(xgrid, ygrid) : 1-D arrays
    The grid corresponding to the values in batch.coverages
)x_left_lower_corner	grid_sizeNxy_left_lower_cornerNyr'   arange)batchxminxmaxyminymaxxgridygrids          r   construct_gridsrK   g   s     $$u6D88eoo-.D$$u6D88eoo-.D IId%//2EIId%//2E>r!   booleanleft)closedg        neither)	data_homedownload_if_missing	n_retriesdelayT)prefer_skip_nested_validation   g      ?c                    [        U 5      n [        U 5      (       d  [        U 5        [        SSSSSS9n[        R
                  n[        U [        5      n[        U5      (       Gd  U(       d  [        S5      e[        R                  S[        R                  < S	U < 35        [        [        XUS
9n[        R                  " U5       nUR                   H4  n	[!        X   5      n
SU	;   a  [#        U
5      nSU	;   d  M)  [#        U
5      nM6     SSS5        [%        U5        [        R                  S[&        R                  < S	U < 35        [        [&        XUS
9n[        R                  " U5       n/ nUR                   HN  n	[!        X   5      n
[        R)                  SR+                  U	5      5        UR-                  [/        U
5      5        MP     [        R0                  " XS9nSSS5        [%        U5        [3        SWWWS.UD6n[4        R6                  " XSS9  U$ [4        R                  " U5      nU$ ! , (       d  f       GN0= f! , (       d  f       Ni= f)a  Loader for species distribution dataset from Phillips et. al. (2006).

Read more in the :ref:`User Guide <species_distribution_dataset>`.

Parameters
----------
data_home : str or path-like, default=None
    Specify another download and cache folder for the datasets. By default
    all scikit-learn data is stored in '~/scikit_learn_data' subfolders.

download_if_missing : bool, default=True
    If False, raise an OSError if the data is not locally available
    instead of trying to download the data from the source site.

n_retries : int, default=3
    Number of retries when HTTP errors are encountered.

    .. versionadded:: 1.5

delay : float, default=1.0
    Number of seconds between retries.

    .. versionadded:: 1.5

Returns
-------
data : :class:`~sklearn.utils.Bunch`
    Dictionary-like object, with the following attributes.

    coverages : array, shape = [14, 1592, 1212]
        These represent the 14 features measured
        at each point of the map grid.
        The latitude/longitude values for the grid are discussed below.
        Missing data is represented by the value -9999.
    train : record array, shape = (1624,)
        The training points for the data.  Each point has three fields:

        - train['species'] is the species name
        - train['dd long'] is the longitude, in degrees
        - train['dd lat'] is the latitude, in degrees
    test : record array, shape = (620,)
        The test points for the data.  Same format as the training data.
    Nx, Ny : integers
        The number of longitudes (x) and latitudes (y) in the grid
    x_left_lower_corner, y_left_lower_corner : floats
        The (x,y) position of the lower-left corner, in degrees
    grid_size : float
        The spacing between points of the grid, in degrees

Notes
-----

This dataset represents the geographic distribution of species.
The dataset is provided by Phillips et. al. (2006).

The two species are:

- `"Bradypus variegatus"
  <http://www.iucnredlist.org/details/3038/0>`_ ,
  the Brown-throated Sloth.

- `"Microryzomys minutus"
  <http://www.iucnredlist.org/details/13408/0>`_ ,
  also known as the Forest Small Rice Rat, a rodent that lives in Peru,
  Colombia, Ecuador, Peru, and Venezuela.

References
----------

* `"Maximum entropy modeling of species geographic distributions"
  <http://rob.schapire.net/papers/ecolmod.pdf>`_
  S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling,
  190:231-259, 2006.

Examples
--------
>>> from sklearn.datasets import fetch_species_distributions
>>> species = fetch_species_distributions()
>>> species.train[:5]
array([(b'microryzomys_minutus', -64.7   , -17.85  ),
       (b'microryzomys_minutus', -67.8333, -16.3333),
       (b'microryzomys_minutus', -67.8833, -16.3   ),
       (b'microryzomys_minutus', -67.8   , -16.2667),
       (b'microryzomys_minutus', -67.9833, -15.9   )],
      dtype=[('species', 'S22'), ('dd long', '<f4'), ('dd lat', '<f4')])

For a more extended example,
see :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py`
g33333Wi  gfffffLi8  g?)r>   r@   rA   rB   r?   z1Data not found and `download_if_missing` is FalsezDownloading species data from z to )dirnamerR   rS   traintestNzDownloading coverage data from z - converting {}r"   )	coveragesrY   rX   	   )compress )r   r	   r   r&   r'   int16r   DATA_ARCHIVE_NAMEOSErrorloggerinfoSAMPLESr   r   loadfilesr   r<   r   	COVERAGESdebugformatappendr2   asarrayr   joblibdump)rP   rQ   rR   rS   extra_paramsr#   archive_pathsamples_pathXffhandlerX   rY   coverages_pathrZ   bunchs                   r   fetch_species_distributionsru      s   R i(I)
 !"L HHE ,=>L,"MNNYWX$Y5
 WW\"aWW!!$-a<%g.EQ;$W-D  # 	|9B	R	
 'yU
 WW^$IWW!!$-/66q9:  !89  

9:I % 	~R	ER\RE!4 L L)L; #" %$s   2H5=H5/A5I5
I
I)'__doc__loggingior   numbersr   r   osr   r   r   os.pathr	   rk   numpyr'   utilsr   utils._param_validationr   r    r   _baser   r   r   rc   rf   r_   	getLogger__name__ra   r^   r2   r<   rK   strru   r]   r!   r   <module>r      s   <   " ) )     ?  C C 8O 8O	 +  
		8	$ %&RXX  (6 8T* ){xD@A4d9=>	 #' 
VVr!   