From 162568250c4b807baac1574902dee08747788f12 Mon Sep 17 00:00:00 2001
From: elbeejay <jhariharan@usgs.gov>
Date: Tue, 13 Dec 2022 17:14:17 -0500
Subject: [PATCH 1/6] failing tz testcase

---
 dataretrieval/nwis.py | 8 ++++++--
 tests/nwis_test.py    | 9 ++++++++-
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py
index c385e28b..76c167b5 100644
--- a/dataretrieval/nwis.py
+++ b/dataretrieval/nwis.py
@@ -50,6 +50,9 @@ def format_response(df, service=None, **kwargs):
             df = df.tz_localize('UTC', level=1)
 
     else:
+        # transform datetime column to datetime values
+        #df['datetime'] = pd.to_datetime(df.pop('datetime'), errors='coerce')
+        # define index using datetime values
         df.set_index(['datetime'], inplace=True)
         if hasattr(df.index, 'tzinfo') and df.index.tzinfo is None:
             df = df.tz_localize('UTC')
@@ -770,12 +773,13 @@ def _read_json(json):
             # should be able to avoid this by dumping
             record_json = str(record_json).replace("'", '"')
 
-            # read json, converting all values to float64 and all qaulifiers
+            # read json, converting all values to float64 and all qualifiers
             # Lists can't be hashed, thus we cannot df.merge on a list column
             record_df = pd.read_json(record_json,
                                      orient='records',
                                      dtype={'value': 'float64',
-                                            'qualifiers': 'unicode'})
+                                            'qualifiers': 'unicode'},
+                                     convert_dates=True)
 
             record_df['qualifiers'] = (record_df['qualifiers']
                                        .str.strip("[]").str.replace("'", ""))
diff --git a/tests/nwis_test.py b/tests/nwis_test.py
index 44a40a56..0b6a68f2 100644
--- a/tests/nwis_test.py
+++ b/tests/nwis_test.py
@@ -2,6 +2,7 @@
 import pandas as pd
 import pytest
 from dataretrieval.nwis import get_record, preformat_peaks_response
+from dataretrieval.nwis import what_sites, get_iv
 
 START_DATE = '2018-01-24'
 END_DATE   = '2018-01-25'
@@ -36,7 +37,6 @@ def test_iv_service():
 
 def test_iv_service_answer():
     df = test_iv_service()
-
     # check multiindex function
     assert df.index.names == [SITENO_COL, DATETIME_COL], "iv service returned incorrect index: {}".format(df.index.names)
 
@@ -122,3 +122,10 @@ def test_inc_date_03():
     assert df.shape == df2.shape
     # assert that the datetime index is not there
     assert df2.index.name != 'datetime'
+
+
+def test_multiple_tz_01():
+    """Test based on GitHub Issue #60 - error merging different time zones."""
+    sites, sites_md = what_sites(stateCd='MD')
+    iv, iv_md = get_iv(sites=sites.site_no.values[:25].tolist())
+    import pdb; pdb.set_trace()
\ No newline at end of file

From 4b7b0e8538b5cd134768f9ff27d981a6e5967849 Mon Sep 17 00:00:00 2001
From: elbeejay <jhariharan@usgs.gov>
Date: Fri, 16 Dec 2022 08:36:42 -0500
Subject: [PATCH 2/6] pair of tests, one fails, one passes before fix

---
 tests/nwis_test.py | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/tests/nwis_test.py b/tests/nwis_test.py
index 0b6a68f2..8800a109 100644
--- a/tests/nwis_test.py
+++ b/tests/nwis_test.py
@@ -1,8 +1,9 @@
 import numpy as np
 import pandas as pd
 import pytest
+import datetime
 from dataretrieval.nwis import get_record, preformat_peaks_response
-from dataretrieval.nwis import what_sites, get_iv
+from dataretrieval.nwis import what_sites, get_iv, get_dv, get_discharge_peaks
 
 START_DATE = '2018-01-24'
 END_DATE   = '2018-01-25'
@@ -124,8 +125,24 @@ def test_inc_date_03():
     assert df2.index.name != 'datetime'
 
 
-def test_multiple_tz_01():
-    """Test based on GitHub Issue #60 - error merging different time zones."""
-    sites, sites_md = what_sites(stateCd='MD')
-    iv, iv_md = get_iv(sites=sites.site_no.values[:25].tolist())
-    import pdb; pdb.set_trace()
\ No newline at end of file
+class TestTZ:
+    """Tests relating to GitHub Issue #60."""
+    sites, _ = what_sites(stateCd='MD')
+
+    def test_multiple_tz_01(self):
+        """Test based on GitHub Issue #60 - error merging different time zones."""
+        # this test fails before issue #60 is fixed
+        iv, _ = get_iv(sites=self.sites.site_no.values[:25].tolist())
+        # assert that the datetime column exists
+        assert 'datetime' in iv.index.names
+        # assert that it is a datetime type
+        assert isinstance(iv.index[0][1], datetime.datetime)
+
+    def test_multiple_tz_02(self):
+        """Test based on GitHub Issue #60 - confirm behavior for same tz."""
+        # this test passes before issue #60 is fixed
+        iv, _ = get_iv(sites=self.sites.site_no.values[:20].tolist())
+        # assert that the datetime column exists
+        assert 'datetime' in iv.index.names
+        # assert that it is a datetime type
+        assert isinstance(iv.index[0][1], datetime.datetime)
\ No newline at end of file

From f4bcbc567b2961fc573c38bb625cddada21be11f Mon Sep 17 00:00:00 2001
From: elbeejay <jhariharan@usgs.gov>
Date: Fri, 16 Dec 2022 08:37:17 -0500
Subject: [PATCH 3/6] fix for multiple tz by converting tz at end

---
 dataretrieval/nwis.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py
index 76c167b5..36683ed6 100644
--- a/dataretrieval/nwis.py
+++ b/dataretrieval/nwis.py
@@ -779,7 +779,7 @@ def _read_json(json):
                                      orient='records',
                                      dtype={'value': 'float64',
                                             'qualifiers': 'unicode'},
-                                     convert_dates=True)
+                                     convert_dates=False)
 
             record_df['qualifiers'] = (record_df['qualifiers']
                                        .str.strip("[]").str.replace("'", ""))
@@ -797,6 +797,9 @@ def _read_json(json):
                 merged_df = update_merge(merged_df, record_df, na_only=True,
                                          on=['site_no', 'datetime'])
 
+    # convert to datetime
+    merged_df['datetime'] = pd.to_datetime(merged_df['datetime'])
+
     return merged_df
 
 

From 1908d0a12f5b00bb123f28d17ea231a18dbedefc Mon Sep 17 00:00:00 2001
From: elbeejay <jhariharan@usgs.gov>
Date: Fri, 16 Dec 2022 11:41:41 -0500
Subject: [PATCH 4/6] normalize tz to utc to match single-site process

---
 dataretrieval/nwis.py | 9 +++------
 tests/nwis_test.py    | 2 +-
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/dataretrieval/nwis.py b/dataretrieval/nwis.py
index 36683ed6..a0dc27b4 100644
--- a/dataretrieval/nwis.py
+++ b/dataretrieval/nwis.py
@@ -50,9 +50,6 @@ def format_response(df, service=None, **kwargs):
             df = df.tz_localize('UTC', level=1)
 
     else:
-        # transform datetime column to datetime values
-        #df['datetime'] = pd.to_datetime(df.pop('datetime'), errors='coerce')
-        # define index using datetime values
         df.set_index(['datetime'], inplace=True)
         if hasattr(df.index, 'tzinfo') and df.index.tzinfo is None:
             df = df.tz_localize('UTC')
@@ -121,7 +118,7 @@ def _qwdata(datetime_index=True, **kwargs):
                'rdb_qw_attributes': 'expanded',
                'date_format': 'YYYY-MM-DD',
                'rdb_compression': 'value',
-               'submmitted_form': 'brief_list'}
+               'submitted_form': 'brief_list'}
     # 'qw_sample_wide': 'separated_wide'}
 
     # check for parameter codes, and reformat query args
@@ -797,8 +794,8 @@ def _read_json(json):
                 merged_df = update_merge(merged_df, record_df, na_only=True,
                                          on=['site_no', 'datetime'])
 
-    # convert to datetime
-    merged_df['datetime'] = pd.to_datetime(merged_df['datetime'])
+    # convert to datetime, normalizing the timezone to UTC when doing so
+    merged_df['datetime'] = pd.to_datetime(merged_df['datetime'], utc=True)
 
     return merged_df
 
diff --git a/tests/nwis_test.py b/tests/nwis_test.py
index 8800a109..6b6ba7e0 100644
--- a/tests/nwis_test.py
+++ b/tests/nwis_test.py
@@ -145,4 +145,4 @@ def test_multiple_tz_02(self):
         # assert that the datetime column exists
         assert 'datetime' in iv.index.names
         # assert that it is a datetime type
-        assert isinstance(iv.index[0][1], datetime.datetime)
\ No newline at end of file
+        assert isinstance(iv.index[0][1], datetime.datetime)

From 4bfc256ad05aadbfe94af6f3d4c49faf49c93171 Mon Sep 17 00:00:00 2001
From: elbeejay <jhariharan@usgs.gov>
Date: Fri, 16 Dec 2022 12:57:04 -0500
Subject: [PATCH 5/6] documentation for time

---
 docs/source/examples/readme_examples.rst  | 53 ++++++++++-----
 docs/source/index.rst                     |  5 +-
 docs/source/userguide/index.rst           | 16 +++++
 docs/source/userguide/timeconventions.rst | 80 +++++++++++++++++++++++
 4 files changed, 137 insertions(+), 17 deletions(-)
 create mode 100644 docs/source/userguide/index.rst
 create mode 100644 docs/source/userguide/timeconventions.rst

diff --git a/docs/source/examples/readme_examples.rst b/docs/source/examples/readme_examples.rst
index 3dc14609..1caa0264 100644
--- a/docs/source/examples/readme_examples.rst
+++ b/docs/source/examples/readme_examples.rst
@@ -9,18 +9,41 @@ Examples from the Readme file on retrieving NWIS data
 
 .. doctest::
 
-    # first import the functions for downloading data from NWIS
-    import dataretrieval.nwis as nwis
-
-    # specify the USGS site code for which we want data.
-    site = '03339000'
-
-
-    # get instantaneous values (iv)
-    df = nwis.get_record(sites=site, service='iv', start='2017-12-31', end='2018-01-01')
-
-    # get water quality samples (qwdata)
-    df2 = nwis.get_record(sites=site, service='qwdata', start='2017-12-31', end='2018-01-01')
-
-    # get basic info about the site
-    df3 = nwis.get_record(sites=site, service='site')
\ No newline at end of file
+    >>> # first import the functions for downloading data from NWIS
+    >>> import dataretrieval.nwis as nwis
+
+    >>> # specify the USGS site code for which we want data.
+    >>> site = '03339000'
+
+    >>> # get instantaneous values (iv)
+    >>> df = nwis.get_record(sites=site, service='iv', start='2017-12-31', end='2018-01-01')
+
+    >>> df.head()
+                               00010 00010_cd   site_no  00060 00060_cd  ...  63680_ysi), [discontinued 10/5/21_cd 63680_hach  63680_hach_cd 99133  99133_cd
+    datetime                                                             ...
+    2017-12-31 06:00:00+00:00    1.0        A  03339000  140.0        A  ...                                     A        3.6              A  4.61         A
+    2017-12-31 06:15:00+00:00    1.0        A  03339000  138.0        A  ...                                     A        3.6              A  4.61         A
+    2017-12-31 06:30:00+00:00    1.0        A  03339000  139.0        A  ...                                     A        3.4              A  4.61         A
+    2017-12-31 06:45:00+00:00    1.0        A  03339000  139.0        A  ...                                     A        3.4              A  4.61         A
+    2017-12-31 07:00:00+00:00    1.0        A  03339000  139.0        A  ...                                     A        3.5              A  4.61         A
+    <BLANKLINE>
+    [5 rows x 21 columns]
+
+    >>> # get water quality samples (qwdata)
+    >>> df2 = nwis.get_record(sites=site, service='qwdata', start='2018-12-01', end='2019-01-01')
+
+    >>> print(df2)
+                              agency_cd   site_no   sample_dt sample_tm  sample_end_dt  sample_end_tm  ... p80154 p82398 p84164  p91157  p91158  p91159
+    datetime                                                                                           ...
+    2018-12-10 17:30:00+00:00      USGS  03339000  2018-12-10     11:30            NaN            NaN  ...     16     50   3060  0.0165  0.0141  0.0024
+    <BLANKLINE>
+    [1 rows x 33 columns]
+
+    >>> # get basic info about the site
+    >>> df3 = nwis.get_record(sites=site, service='site')
+
+    >>> print(df3)
+      agency_cd   site_no                         station_nm site_tp_cd  lat_va  ...  aqfr_type_cd  well_depth_va  hole_depth_va depth_src_cd project_no
+    0      USGS  03339000  VERMILION RIVER NEAR DANVILLE, IL         ST  400603  ...           NaN            NaN            NaN          NaN        100
+    <BLANKLINE>
+    [1 rows x 42 columns]
\ No newline at end of file
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 6c8bcc4a..54f1110b 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -2,11 +2,11 @@ Welcome
 =======
 
 Welcome to the documentation for the Python ``dataretrieval`` package.
-``dataretreival`` is a Python alternative to the `USGS R dataRetrieval package`_
+``dataretrieval`` is a Python alternative to the `USGS R dataRetrieval package`_
 and is used to obtain USGS and EPA water quality data, streamflow data, and
 metadata directly from webservices.
 
-.. _USGS R dataRetrieval package: https://github.com/USGS-R/dataRetrieval
+.. _USGS R dataRetrieval package: https://github.com/DOI-USGS/dataRetrieval
 
 
 Table of Contents
@@ -16,6 +16,7 @@ Table of Contents
    :maxdepth: 1
 
    meta/installing
+   userguide/index
    examples/index
    meta/contributing
    meta/license
diff --git a/docs/source/userguide/index.rst b/docs/source/userguide/index.rst
new file mode 100644
index 00000000..4d7ae895
--- /dev/null
+++ b/docs/source/userguide/index.rst
@@ -0,0 +1,16 @@
+.. userguide:
+
+==========
+User Guide
+==========
+
+Contents
+--------
+
+.. toctree::
+    :maxdepth: 1
+
+    timeconventions
+
+
+.. include:: timeconventions.rst
diff --git a/docs/source/userguide/timeconventions.rst b/docs/source/userguide/timeconventions.rst
new file mode 100644
index 00000000..60fab8ff
--- /dev/null
+++ b/docs/source/userguide/timeconventions.rst
@@ -0,0 +1,80 @@
+.. timeconventions:
+
+Datetime Information
+--------------------
+
+``dataretrieval`` attempts to normalize time data to UTC time when converting
+web service data into dataframes. To do this, in-built pandas functions are
+used; either :obj:`pandas.to_datetime()` during the initial datetime object
+conversion, or :obj:`pandas.DataFrame.tz_localize()` if the datetime objects
+exist but are not UTC-localized. In most cases (single-site and multi-site),
+``dataretrieval`` assigns the datetime information as the dataframe *index*,
+the exception to this is when incomplete datetime information is available, in
+these cases integers are used as the dataframe index (see `PR#58`_ for more
+details).
+
+.. _PR#58: https://github.com/USGS-python/dataretrieval/pull/58
+
+
+Inspecting Timestamps
+*********************
+
+For single sites, the index of the returned dataframe contains pandas
+timestamps.
+
+.. code:: python
+
+    >>> import dataretrieval.nwis as nwis
+    >>> site = '03339000'
+    >>> df = nwis.get_record(sites=site, service='peaks',
+    ...                      start='2015-01-01', end='2017-12-31')
+    >>> print(df)
+                              agency_cd   site_no peak_tm  peak_va peak_cd  gage_ht  gage_ht_cd  year_last_pk  ag_dt  ag_tm  ag_gage_ht  ag_gage_ht_cd
+    datetime
+    2015-06-08 00:00:00+00:00      USGS  03339000   17:30    25100       C    22.83         NaN           NaN    NaN    NaN         NaN            NaN
+    2015-12-29 00:00:00+00:00      USGS  03339000   18:45    37600       C    26.66         NaN           NaN    NaN    NaN         NaN            NaN
+    2017-05-05 00:00:00+00:00      USGS  03339000   04:45    17000       C    18.47         NaN           NaN    NaN    NaN         NaN            NaN
+
+Here the index of the dataframe ``df`` is a set of datetime objects. Each has
+the format, ``YYYY-MM-DD HH:MM:SS+HH:MM``. Because these timestamps are
+localized to be in UTC, the expected offset (``+HH:MM``) is ``+00:00``.
+These values can be converted to a local timezone of your choosing using
+:obj:`pandas` functionality.
+
+.. code:: python
+
+    >>> df.index = df.index.tz_convert(tz='America/New_York')
+    >>> print(df)
+                              agency_cd   site_no peak_tm  peak_va peak_cd  gage_ht  gage_ht_cd  year_last_pk  ag_dt  ag_tm  ag_gage_ht  ag_gage_ht_cd
+    datetime
+    2015-06-07 20:00:00-04:00      USGS  03339000   17:30    25100       C    22.83         NaN           NaN    NaN    NaN         NaN            NaN
+    2015-12-28 19:00:00-05:00      USGS  03339000   18:45    37600       C    26.66         NaN           NaN    NaN    NaN         NaN            NaN
+    2017-05-04 20:00:00-04:00      USGS  03339000   04:45    17000       C    18.47         NaN           NaN    NaN    NaN         NaN            NaN
+
+Above, the index was converted to localize the timestamps to New York.
+In the updated dataframe index, the resulting timestamps now have offsets of
+``-04:00`` and ``-05:00`` as New York is either 4 or 5 hours behind UTC
+depending on the time of year (due to daylight savings).
+
+When information for multiple sites is requested, ``dataretrieval`` creates a
+dataframe with a multi-index, with the first entry containing the site number,
+and the second containing the datetime information.
+
+.. doctest::
+
+    >>> import dataretrieval.nwis as nwis
+    >>> sites = ['180049066381200', '290000095192602']
+    >>> df = nwis.get_record(sites=sites, service='gwlevels',
+    ...                      start='2021-10-01', end='2022-01-01')
+    >>> df
+                                              agency_cd site_tp_cd      lev_dt lev_tm lev_tz_cd  ...  lev_dt_acy_cd  lev_acy_cd  lev_src_cd  lev_meth_cd lev_age_cd
+    site_no         datetime                                                                     ...
+    180049066381200 2021-10-04 19:54:00+00:00      USGS         GW  2021-10-04  19:54     +0000  ...              m         NaN           S            S          A
+                    2021-11-16 14:28:00+00:00      USGS         GW  2021-11-16  14:28     +0000  ...              m         NaN           S            S          A
+                    2021-12-09 10:43:00+00:00      USGS         GW  2021-12-09  10:43     +0000  ...              m         NaN           S            S          A
+    290000095192602 2021-12-08 19:07:00+00:00      USGS         GW  2021-12-08  19:07     +0000  ...              m         NaN           S            S          P
+    <BLANKLINE>
+    [4 rows x 15 columns]
+
+Here note that the default datetime index information returned is also UTC
+localized, and therefore the offset values are ``+00:00``.
\ No newline at end of file

From d8a83f9dcdcd8f44cace708c1ea7882449d69a09 Mon Sep 17 00:00:00 2001
From: "J. Hariharan" <jayaram.hariharan@utexas.edu>
Date: Mon, 19 Dec 2022 13:59:15 -0500
Subject: [PATCH 6/6] Update waterservices_test.py

submmitted -> submitted typo fix
---
 tests/waterservices_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/waterservices_test.py b/tests/waterservices_test.py
index 2ae04b0a..bb0ec105 100755
--- a/tests/waterservices_test.py
+++ b/tests/waterservices_test.py
@@ -116,7 +116,7 @@ def test_get_qwdata(requests_mock):
     request_url = 'https://nwis.waterdata.usgs.gov/nwis/qwdata?site_no={}' \
                   '&qw_sample_wide=qw_sample_wide&agency_cd=USGS&format={}&pm_cd_compare=Greater+than' \
                   '&inventory_output=0&rdb_inventory_output=file&TZoutput=0&rdb_qw_attributes=expanded' \
-                  '&date_format=YYYY-MM-DD&rdb_compression=value&submmitted_form=brief_list'.format(site, format)
+                  '&date_format=YYYY-MM-DD&rdb_compression=value&submitted_form=brief_list'.format(site, format)
     response_file_path = 'data/waterdata_qwdata.txt'
     mock_request(requests_mock, request_url, response_file_path)
     with pytest.warns(DeprecationWarning):