diff --git a/pyiso/base.py b/pyiso/base.py index bf754d5..a1c9738 100644 --- a/pyiso/base.py +++ b/pyiso/base.py @@ -351,7 +351,12 @@ def slice_times(self, df, options=None): except KeyError: raise ValueError('Slicing by time requires start_at and end_at') - return df.truncate(before=start_at, after=end_at) + # sort before truncate eliminates DST KeyError + sorteddf = df.sort() + sliced = sorteddf.truncate(before=start_at, after=end_at) + + # return + return sliced def unpivot(self, df): return df.stack().reset_index(level=1) diff --git a/pyiso/bpa.py b/pyiso/bpa.py index c40fc97..b456e27 100644 --- a/pyiso/bpa.py +++ b/pyiso/bpa.py @@ -1,5 +1,6 @@ from datetime import datetime, timedelta import pytz +from dateutil.parser import parse as dateutil_parse import pandas as pd from pyiso.base import BaseClient @@ -70,10 +71,19 @@ def fetch_recent(self): # parse like tsv df = self.parse_to_df(response.text, skiprows=6, header=0, delimiter='\t', - index_col=0, parse_dates=True, usecols=cols) + index_col=0, parse_dates=True, usecols=cols, + date_parser=self.date_parser) return df + def date_parser(self, ts_str): + TZINFOS = { + 'PDT': pytz.timezone('America/Los_Angeles'), + 'PST': pytz.timezone('America/Los_Angeles'), + } + + return dateutil_parse(ts_str, tzinfos=TZINFOS) + def fetcher(self): """Choose the correct fetcher method for this request""" # get mode from options diff --git a/pyiso/caiso.py b/pyiso/caiso.py index 6f63dfe..4890e67 100644 --- a/pyiso/caiso.py +++ b/pyiso/caiso.py @@ -216,10 +216,10 @@ def _generation_historical(self): continue # process both halves of page - for header in [1, 29]: + for header in [0, 0]: df = self.parse_to_df(response.text, skiprows=header, nrows=24, header=header, - delimiter='\t+') + delimiter='\t+', engine='python') # combine date with hours to index indexed = self.set_dt_index(df, this_date, df['Hour']) diff --git a/requirements.txt b/requirements.txt index 33923b9..2e0c4a7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ Sphinx==1.2.2 beautifulsoup4==4.3.2 nose==1.3.1 -pandas==0.13.1 +pandas==0.14.1 python-dateutil==2.2 -pytz==2014.2 +pytz requests==2.2.1 xlrd==0.9.2 celery>=3.1 \ No newline at end of file diff --git a/setup.py b/setup.py index 0010f08..c024bc2 100644 --- a/setup.py +++ b/setup.py @@ -61,7 +61,7 @@ def find_version(*file_paths): test_suite = 'nose.collector', install_requires=[ 'beautifulsoup4', - 'pandas>=0.12', + 'pandas==0.14.1', 'python-dateutil', 'pytz', 'requests', diff --git a/tests/test_caiso.py b/tests/test_caiso.py index ea16d9d..5fe85e6 100644 --- a/tests/test_caiso.py +++ b/tests/test_caiso.py @@ -332,15 +332,15 @@ def test_parse_ren_report_both(self): # top half top_df = c.parse_to_df(self.ren_report_tsv, - skiprows=1, nrows=24, header=1, - delimiter='\t+') + skiprows=1, nrows=24, header=0, + delimiter='\t+', engine='python') self.assertEqual(list(top_df.columns), ['Hour', 'GEOTHERMAL', 'BIOMASS', 'BIOGAS', 'SMALL HYDRO', 'WIND TOTAL', 'SOLAR PV', 'SOLAR THERMAL']) self.assertEqual(len(top_df), 24) # bottom half bot_df = c.parse_to_df(self.ren_report_tsv, - skiprows=3, nrows=24, header=3, - delimiter='\t+') + skiprows=3, nrows=24, header=0, + delimiter='\t+', engine='python') self.assertEqual(list(bot_df.columns), ['Hour', 'RENEWABLES', 'NUCLEAR', 'THERMAL', 'IMPORTS', 'HYDRO']) self.assertEqual(len(bot_df), 24) @@ -349,16 +349,16 @@ def test_parse_ren_report_bot(self): # bottom half bot_df = c.parse_to_df(self.ren_report_tsv, - skiprows=29, nrows=24, header=29, - delimiter='\t+') + skiprows=29, nrows=24, header=0, + delimiter='\t+', engine='python') self.assertEqual(list(bot_df.columns), ['Hour', 'RENEWABLES', 'NUCLEAR', 'THERMAL', 'IMPORTS', 'HYDRO']) self.assertEqual(len(bot_df), 24) def test_dt_index(self): c = self.create_client('CAISO') df = c.parse_to_df(self.ren_report_tsv, - skiprows=1, nrows=24, header=1, - delimiter='\t+') + skiprows=1, nrows=24, header=0, + delimiter='\t+', engine='python') indexed = c.set_dt_index(df, date(2014, 3, 12), df['Hour']) self.assertEqual(type(indexed.index), pd.tseries.index.DatetimeIndex) self.assertEqual(indexed.index[0].hour, 7) @@ -366,8 +366,8 @@ def test_dt_index(self): def test_pivot(self): c = self.create_client('CAISO') df = c.parse_to_df(self.ren_report_tsv, - skiprows=1, nrows=24, header=1, - delimiter='\t+') + skiprows=1, nrows=24, header=0, + delimiter='\t+', engine='python') indexed = c.set_dt_index(df, date(2014, 3, 12), df['Hour']) indexed.pop('Hour') pivoted = c.unpivot(indexed)