From 25995901388949ec9e4465962d1d6c3ee43c6327 Mon Sep 17 00:00:00 2001 From: Jesse van Elteren Date: Wed, 4 Jan 2023 21:15:55 +0100 Subject: [PATCH 01/11] changed generation to lxml --- entsoe/entsoe.py | 10 +++++----- entsoe/parsers.py | 44 +++++++++++++++++++------------------------- requirements.txt | 1 + setup.py | 2 +- 4 files changed, 26 insertions(+), 31 deletions(-) diff --git a/entsoe/entsoe.py b/entsoe/entsoe.py index f3db5af..e09ddbf 100644 --- a/entsoe/entsoe.py +++ b/entsoe/entsoe.py @@ -307,7 +307,7 @@ def query_wind_and_solar_forecast( def query_generation( self, country_code: Union[Area, str], start: pd.Timestamp, - end: pd.Timestamp, psr_type: Optional[str] = None, **kwargs) -> str: + end: pd.Timestamp, psr_type: Optional[str] = None, **kwargs) -> bytes: """ Parameters ---------- @@ -319,7 +319,7 @@ def query_generation( Returns ------- - str + bytes """ area = lookup_area(country_code) params = { @@ -330,7 +330,7 @@ def query_generation( if psr_type: params.update({'psrType': psr_type}) response = self._base_request(params=params, start=start, end=end) - return response.text + return response.content def query_generation_per_plant( self, country_code: Union[Area, str], start: pd.Timestamp, @@ -1231,9 +1231,9 @@ def query_generation( pd.DataFrame """ area = lookup_area(country_code) - text = super(EntsoePandasClient, self).query_generation( + xml = super(EntsoePandasClient, self).query_generation( country_code=area, start=start, end=end, psr_type=psr_type) - df = parse_generation(text, nett=nett) + df = parse_generation(xml, nett=nett) df = df.tz_convert(area.tz) df = df.truncate(before=start, after=end) return df diff --git a/entsoe/parsers.py b/entsoe/parsers.py index df90a0b..af3177e 100644 --- a/entsoe/parsers.py +++ b/entsoe/parsers.py @@ -6,6 +6,7 @@ import bs4 from bs4.builder import XMLParsedAsHTMLWarning import pandas as pd +from lxml import etree from .mappings import PSRTYPE_MAPPINGS, DOCSTATUS, BSNTYPE, Area @@ -15,21 +16,21 @@ CONSUMPTION_ELEMENT = "outBiddingZone_Domain.mRID" -def _extract_timeseries(xml_text): +def _extract_timeseries(xml): """ Parameters ---------- - xml_text : str + xml : bytes Yields ------- - bs4.element.tag + lxml.element """ - if not xml_text: + if not xml: return - soup = bs4.BeautifulSoup(xml_text, 'html.parser') - for timeseries in soup.find_all('timeseries'): - yield timeseries + for event, element in etree.iterparse(BytesIO(xml), tag='{*}TimeSeries'): + yield element + def parse_prices(xml_text): @@ -137,7 +138,7 @@ def parse_generation( all_series = dict() for soup in _extract_timeseries(xml_text): ts = _parse_generation_timeseries(soup, per_plant=per_plant, include_eic=include_eic) - + # check if we already have a series of this name series = all_series.get(ts.name) if series is None: @@ -148,7 +149,7 @@ def parse_generation( series = pd.concat([series, ts]) series.sort_index(inplace=True) all_series[series.name] = series - + # drop duplicates in all series for name in all_series: ts = all_series[name] @@ -631,24 +632,17 @@ def _parse_generation_timeseries(soup, per_plant: bool = False, include_eic: boo ------- pd.Series """ - positions = [] - quantities = [] - for point in soup.find_all('point'): - positions.append(int(point.find('position').text)) - quantity = point.find('quantity') - if quantity is None: - raise LookupError( - f'No quantity found in this point, it should have one: {point}') - quantities.append(float(quantity.text)) + positions = [int(x.text) for x in soup.iter('{*}position')] + quantities = [float(x.text) for x in soup.iter('{*}quantity')] series = pd.Series(index=positions, data=quantities) series = series.sort_index() series.index = _parse_datetimeindex(soup) # Check if there is a psrtype, if so, get it. - _psrtype = soup.find('psrtype') + _psrtype = list(soup.iter("{*}psrType")) if _psrtype is not None: - psrtype = _psrtype.text + psrtype = _psrtype[0].text else: psrtype = None @@ -656,7 +650,7 @@ def _parse_generation_timeseries(soup, per_plant: bool = False, include_eic: boo # If IN, this means Actual Consumption is measured # If OUT, this means Consumption is measured. # OUT means Consumption of a generation plant, eg. charging a pumped hydro plant - if soup.find(CONSUMPTION_ELEMENT.lower()): + if list(soup.iter("{*}" + CONSUMPTION_ELEMENT)): metric = 'Actual Consumption' else: metric = 'Actual Aggregated' @@ -669,7 +663,7 @@ def _parse_generation_timeseries(soup, per_plant: bool = False, include_eic: boo name.append(psrtype_name) if per_plant: - plantname = soup.find('name').text + plantname = soup.find('{*}name').text name.append(plantname) if include_eic: eic = soup.find("mrid", codingscheme="A01").text @@ -758,13 +752,13 @@ def _parse_datetimeindex(soup, tz=None): ------- pd.DatetimeIndex """ - start = pd.Timestamp(soup.find('start').text) - end = pd.Timestamp(soup.find_all('end')[-1].text) + start = pd.Timestamp(next(soup.iter("{*}start")).text) + end = pd.Timestamp(next(soup.iter("{*}end")).text) if tz is not None: start = start.tz_convert(tz) end = end.tz_convert(tz) - delta = _resolution_to_timedelta(res_text=soup.find('resolution').text) + delta = _resolution_to_timedelta(res_text=next(soup.iter('{*}resolution')).text) index = pd.date_range(start=start, end=end, freq=delta, inclusive='left') if tz is not None: dst_jump = len(set(index.map(lambda d: d.dst()))) > 1 diff --git a/requirements.txt b/requirements.txt index 0e0cbd9..58f04a7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ requests pytz beautifulsoup4 pandas>=1.4.0 +lxml==4.9.2 diff --git a/setup.py b/setup.py index aec7542..a70da39 100644 --- a/setup.py +++ b/setup.py @@ -67,7 +67,7 @@ # List run-time dependencies here. These will be installed by pip when # your project is installed. - install_requires=['requests', 'pytz', 'beautifulsoup4', 'pandas>=1.4.0'], + install_requires=['requests', 'pytz', 'beautifulsoup4', 'pandas>=1.4.0', 'lxml==4.9.2'], include_package_data=True, ) From a17a80f56d761ddc6ae74a8eb5a0f80763c2d0a3 Mon Sep 17 00:00:00 2001 From: Jesse van Elteren Date: Wed, 4 Jan 2023 22:34:39 +0100 Subject: [PATCH 02/11] redid most of bs into lxml elements --- entsoe/parsers.py | 328 ++++++++++++++++++++++------------------------ 1 file changed, 159 insertions(+), 169 deletions(-) diff --git a/entsoe/parsers.py b/entsoe/parsers.py index af3177e..69c9b78 100644 --- a/entsoe/parsers.py +++ b/entsoe/parsers.py @@ -16,6 +16,12 @@ CONSUMPTION_ELEMENT = "outBiddingZone_Domain.mRID" +def find(element, tag): + return next(element.iter('{*}'+tag)).text + +def findall(element, tag): + return element.iter('{*}'+tag) + def _extract_timeseries(xml): """ Parameters @@ -33,11 +39,11 @@ def _extract_timeseries(xml): -def parse_prices(xml_text): +def parse_prices(xml_bytes): """ Parameters ---------- - xml_text : str + xml_bytes : butes Returns ------- @@ -48,9 +54,9 @@ def parse_prices(xml_text): '30T': [], '60T': [] } - for soup in _extract_timeseries(xml_text): - soup_series = _parse_price_timeseries(soup) - series[soup_series.index.freqstr].append(soup_series) + for element in _extract_timeseries(xml_bytes): + element_series = _parse_price_timeseries(element) + series[element_series.index.freqstr].append(element_series) for freq, freq_series in series.items(): if len(freq_series) > 0: @@ -58,30 +64,30 @@ def parse_prices(xml_text): return series -def parse_netpositions(xml_text): +def parse_netpositions(xml_bytes): """ Parameters ---------- - xml_text : str + xml_bytes : bytes Returns ------- pd.Series """ series = [] - for soup in _extract_timeseries(xml_text): - series.append(_parse_netposition_timeseries(soup)) + for element in _extract_timeseries(xml_bytes): + series.append(_parse_netposition_timeseries(element)) series = pd.concat(series) series = series.sort_index() return series -def parse_loads(xml_text, process_type='A01'): +def parse_loads(xml_bytes, process_type='A01'): """ Parameters ---------- - xml_text : str + xml_bytes : str Returns ------- @@ -89,8 +95,8 @@ def parse_loads(xml_text, process_type='A01'): """ if process_type == 'A01' or process_type == 'A16': series = [] - for soup in _extract_timeseries(xml_text): - series.append(_parse_load_timeseries(soup)) + for element in _extract_timeseries(xml_bytes): + series.append(_parse_load_timeseries(element)) series = pd.concat(series) series = series.sort_index() return pd.DataFrame({ @@ -99,11 +105,11 @@ def parse_loads(xml_text, process_type='A01'): else: series_min = pd.Series(dtype='object') series_max = pd.Series(dtype='object') - for soup in _extract_timeseries(xml_text): - t = _parse_load_timeseries(soup) - if soup.find('businesstype').text == 'A60': + for element in _extract_timeseries(xml_bytes): + t = _parse_load_timeseries(element) + if find(element, 'businessType') == 'A60': series_min = series_min.append(t) - elif soup.find('businesstype').text == 'A61': + elif find(element, 'businessType') == 'A61': series_max = series_max.append(t) else: continue @@ -115,14 +121,14 @@ def parse_loads(xml_text, process_type='A01'): def parse_generation( - xml_text: str, + xml_bytes: str, per_plant: bool = False, include_eic: bool = False, nett: bool = False) -> Union[pd.DataFrame, pd.Series]: """ Parameters ---------- - xml_text : str + xml_bytes : str per_plant : bool Decide if you need the parser that can extract plant info as well. nett : bool @@ -136,8 +142,8 @@ def parse_generation( pd.DataFrame | pd.Series """ all_series = dict() - for soup in _extract_timeseries(xml_text): - ts = _parse_generation_timeseries(soup, per_plant=per_plant, include_eic=include_eic) + for element in _extract_timeseries(xml_bytes): + ts = _parse_generation_timeseries(element, per_plant=per_plant, include_eic=include_eic) # check if we already have a series of this name series = all_series.get(ts.name) @@ -199,19 +205,19 @@ def _calc_nett(_df): return df -def parse_installed_capacity_per_plant(xml_text): +def parse_installed_capacity_per_plant(xml_bytes): """ Parameters ---------- - xml_text : str + xml_bytes : bytes Returns ------- pd.DataFrame """ all_series = {} - for soup in _extract_timeseries(xml_text): - s = _parse_installed_capacity_per_plant(soup) + for element in _extract_timeseries(xml_bytes): + s = _parse_installed_capacity_per_plant(element) series = all_series.get(s.name) if series is None: all_series[s.name] = s @@ -231,95 +237,95 @@ def parse_installed_capacity_per_plant(xml_text): return df -def parse_water_hydro(xml_text, tz): +def parse_water_hydro(xml_bytes, tz): """ Parameters ---------- - xml_text : str + xml_bytes : str Returns ------- pd.Series """ all_series = [] - for soup in _extract_timeseries(xml_text): - all_series.append(_parse_water_hydro_timeseries(soup, tz=tz)) + for element in _extract_timeseries(xml_bytes): + all_series.append(_parse_water_hydro_timeseries(element, tz=tz)) series = pd.concat(all_series) return series -def parse_crossborder_flows(xml_text): +def parse_crossborder_flows(xml_bytes): """ Parameters ---------- - xml_text : str + xml_bytes : str Returns ------- pd.Series """ series = [] - for soup in _extract_timeseries(xml_text): - series.append(_parse_crossborder_flows_timeseries(soup)) + for element in _extract_timeseries(xml_bytes): + series.append(_parse_crossborder_flows_timeseries(element)) series = pd.concat(series) series = series.sort_index() return series -def parse_imbalance_prices(xml_text): +def parse_imbalance_prices(xml_bytes): """ Parameters ---------- - xml_text : str + xml_bytes : str Returns ------- pd.DataFrame """ - timeseries_blocks = _extract_timeseries(xml_text) - frames = (_parse_imbalance_prices_timeseries(soup) - for soup in timeseries_blocks) + timeseries_blocks = _extract_timeseries(xml_bytes) + frames = (_parse_imbalance_prices_timeseries(element) + for element in timeseries_blocks) df = pd.concat(frames, axis=1) df = df.stack().unstack() # ad-hoc fix to prevent column splitting by NaNs df.sort_index(inplace=True) return df -def parse_imbalance_volumes(xml_text): +def parse_imbalance_volumes(xml_bytes): """ Parameters ---------- - xml_text : str + xml_bytes : str Returns ------- pd.DataFrame """ - timeseries_blocks = _extract_timeseries(xml_text) - frames = (_parse_imbalance_volumes_timeseries(soup) - for soup in timeseries_blocks) + timeseries_blocks = _extract_timeseries(xml_bytes) + frames = (_parse_imbalance_volumes_timeseries(element) + for element in timeseries_blocks) df = pd.concat(frames, axis=1) df = df.stack().unstack() # ad-hoc fix to prevent column splitting by NaNs df.sort_index(inplace=True) return df -def parse_procured_balancing_capacity(xml_text, tz): +def parse_procured_balancing_capacity(xml_bytes, tz): """ Parameters ---------- - xml_text : str + xml_bytes : str tz: str Returns ------- pd.DataFrame """ - timeseries_blocks = _extract_timeseries(xml_text) - frames = (_parse_procured_balancing_capacity(soup, tz) - for soup in timeseries_blocks) + timeseries_blocks = _extract_timeseries(xml_bytes) + frames = (_parse_procured_balancing_capacity(element, tz) + for element in timeseries_blocks) df = pd.concat(frames, axis=1) df.sort_index(axis=0, inplace=True) @@ -327,11 +333,11 @@ def parse_procured_balancing_capacity(xml_text, tz): return df -def _parse_procured_balancing_capacity(soup, tz): +def _parse_procured_balancing_capacity(element, tz): """ Parameters ---------- - soup : bs4.element.tag + element: lxml.element tz: str Returns @@ -343,20 +349,19 @@ def _parse_procured_balancing_capacity(soup, tz): 'A02': 'Down' } - flow_direction = direction[soup.find('flowdirection.direction').text] - period = soup.find('period') - start = pd.to_datetime(period.find('timeinterval').find('start').text) - end = pd.to_datetime(period.find('timeinterval').find('end').text) - resolution = _resolution_to_timedelta(period.find('resolution').text) + flow_direction = direction[find(element, 'flowDirection.direction')] + start = pd.to_datetime(find(element, 'start')) + end = pd.to_datetime(find(element, 'end')) + resolution = _resolution_to_timedelta(find(element, 'resolution')) tx = pd.date_range(start=start, end=end, freq=resolution, inclusive='left') - points = period.find_all('point') + points = findall(element, 'Point') df = pd.DataFrame(index=tx, columns=['Price', 'Volume']) for dt, point in zip(tx, points): - df.loc[dt, 'Price'] = float(point.find('procurement_price.amount').text) - df.loc[dt, 'Volume'] = float(point.find('quantity').text) + df.loc[dt, 'Price'] = float(find(point, 'Procurement_Price.amount')) + df.loc[dt, 'Volume'] = float(find(point, 'quantity')) - mr_id = int(soup.find('mrid').text) + mr_id = int(find(element, 'mrid')) df.columns = pd.MultiIndex.from_product( [[flow_direction], [mr_id], df.columns], names=('direction', 'mrid', 'unit') @@ -365,11 +370,11 @@ def _parse_procured_balancing_capacity(soup, tz): return df -def parse_contracted_reserve(xml_text, tz, label): +def parse_contracted_reserve(xml_bytes, tz, label): """ Parameters ---------- - xml_text : str + xml_bytes : str tz: str label: str @@ -377,9 +382,9 @@ def parse_contracted_reserve(xml_text, tz, label): ------- pd.DataFrame """ - timeseries_blocks = _extract_timeseries(xml_text) - frames = (_parse_contracted_reserve_series(soup, tz, label) - for soup in timeseries_blocks) + timeseries_blocks = _extract_timeseries(xml_bytes) + frames = (_parse_contracted_reserve_series(element, tz, label) + for element in timeseries_blocks) df = pd.concat(frames, axis=1) # Ad-hoc fix to prevent that columns are split by NaNs: df = df.groupby(axis=1, level = [0,1]).mean() @@ -387,11 +392,11 @@ def parse_contracted_reserve(xml_text, tz, label): return df -def _parse_contracted_reserve_series(soup, tz, label): +def _parse_contracted_reserve_series(element, tz, label): """ Parameters ---------- - soup : bs4.element.tag + element: lxml.element tz: str label: str @@ -399,17 +404,14 @@ def _parse_contracted_reserve_series(soup, tz, label): ------- pd.Series """ - positions = [] - prices = [] - for point in soup.find_all('point'): - positions.append(int(point.find('position').text)) - prices.append(float(point.find(label).text)) - + positions = [int(x.text) for x in findall(element, 'position')] + prices = [float(x.text) for x in findall(element, label)] + df = pd.DataFrame(data={'position': positions, label: prices}) df = df.set_index(['position']) df.sort_index(inplace=True) - index = _parse_datetimeindex(soup, tz) + index = _parse_datetimeindex(element, tz) if len(index) > len(df.index): print("Shortening index", file=sys.stderr) df.index = index[:len(df.index)] @@ -423,11 +425,12 @@ def _parse_contracted_reserve_series(soup, tz, label): 'A03': 'Symmetric'} # First column level: the type of reserve - reserve_type = BSNTYPE[soup.find("businesstype").text] + reserve_type = BSNTYPE[find(element, "businesstype")] df.rename(columns={label: reserve_type}, inplace=True) # Second column level: the flow direction - direction = direction_dico[soup.find("flowdirection.direction").text] + #TODO + direction = direction_dico[find(element, 'FlowDirection.direction')] df.columns = pd.MultiIndex.from_product([df.columns, [direction]]) return df @@ -455,11 +458,11 @@ def gen_frames(archive): return df -def _parse_imbalance_prices_timeseries(soup) -> pd.DataFrame: +def _parse_imbalance_prices_timeseries(element) -> pd.DataFrame: """ Parameters ---------- - soup : bs4.element.tag + element: lxml.element Returns ------- @@ -468,9 +471,10 @@ def _parse_imbalance_prices_timeseries(soup) -> pd.DataFrame: positions = [] amounts = [] categories = [] - for point in soup.find_all('point'): - positions.append(int(point.find('position').text)) - amounts.append(float(point.find('imbalance_price.amount').text)) + for point in findall(element, 'Point'): + positions.append(int(find(point, 'position'))) + #TODO + amounts.append(float(find(point, 'imbalance_price.amount'))) if point.find('imbalance_price.category'): categories.append(point.find('imbalance_price.category').text) else: @@ -480,7 +484,7 @@ def _parse_imbalance_prices_timeseries(soup) -> pd.DataFrame: 'amount': amounts, 'category': categories}) df = df.set_index(['position', 'category']).unstack() df.sort_index(inplace=True) - df.index = _parse_datetimeindex(soup) + df.index = _parse_datetimeindex(element) df = df.xs('amount', axis=1) df.index.name = None df.columns.name = None @@ -513,11 +517,11 @@ def gen_frames(archive): return df -def _parse_imbalance_volumes_timeseries(soup) -> pd.DataFrame: +def _parse_imbalance_volumes_timeseries(element) -> pd.DataFrame: """ Parameters ---------- - soup : bs4.element.tag + element: lxml.element Returns ------- @@ -526,31 +530,31 @@ def _parse_imbalance_volumes_timeseries(soup) -> pd.DataFrame: flow_direction_factor = { 'A01': 1, # in 'A02': -1 # out - }[soup.find('flowdirection.direction').text] + }[find(element, 'flowDirection.direction')] df = pd.DataFrame(columns=['Imbalance Volume']) - for period in soup.find_all('period'): - start = pd.to_datetime(period.find('timeinterval').find('start').text) - end = pd.to_datetime(period.find('timeinterval').find('end').text) - resolution = _resolution_to_timedelta(period.find('resolution').text) + for period in findall(element, 'Period'): + start = pd.to_datetime(find(period, 'start')) + end = pd.to_datetime(find(period, 'end')) + resolution = _resolution_to_timedelta(find(period, 'resolution')) tx = pd.date_range(start=start, end=end, freq=resolution, inclusive='left') - points = period.find_all('point') + points = findall(period, 'Point') for dt, point in zip(tx, points): df.loc[dt, 'Imbalance Volume'] = \ - float(point.find('quantity').text) * flow_direction_factor + float(find(point, 'quantity')) * flow_direction_factor df.set_index(['Imbalance Volume']) return df -def _parse_netposition_timeseries(soup): +def _parse_netposition_timeseries(element): """ Parameters ---------- - soup : bs4.element.tag + element : lxml.element Returns ------- @@ -558,91 +562,85 @@ def _parse_netposition_timeseries(soup): """ positions = [] quantities = [] - if 'REGION' in soup.find('out_domain.mrid').text: + #TODO + if 'REGION' in find(element, 'out_domain.mrid'): factor = -1 # flow is import so negative else: factor = 1 - for point in soup.find_all('point'): - positions.append(int(point.find('position').text)) - quantities.append(factor * float(point.find('quantity').text)) + positions = [int(x.text) for x in findall(element, 'position')] + quantities = [factor * float(x.text) for x in findall(element, 'quantity')] series = pd.Series(index=positions, data=quantities) series = series.sort_index() - series.index = _parse_datetimeindex(soup) + series.index = _parse_datetimeindex(element) return series -def _parse_price_timeseries(soup): +def _parse_price_timeseries(element): """ Parameters ---------- - soup : bs4.element.tag + element : lxml.element Returns ------- pd.Series """ - positions = [] - prices = [] - for point in soup.find_all('point'): - positions.append(int(point.find('position').text)) - prices.append(float(point.find('price.amount').text)) + positions = [int(x.text) for x in findall(element, 'position')] + prices = [float(x.text) for x in findall(element, 'price_amount')] series = pd.Series(index=positions, data=prices) series = series.sort_index() - series.index = _parse_datetimeindex(soup) + series.index = _parse_datetimeindex(element) return series -def _parse_load_timeseries(soup): +def _parse_load_timeseries(element): """ Parameters ---------- - soup : bs4.element.tag + element : lxml.element Returns ------- pd.Series """ - positions = [] - prices = [] - for point in soup.find_all('point'): - positions.append(int(point.find('position').text)) - prices.append(float(point.find('quantity').text)) + positions = [int(x.text) for x in findall(element, 'position')] + prices = [float(x.text) for x in findall(element, 'quantity')] series = pd.Series(index=positions, data=prices) series = series.sort_index() - series.index = _parse_datetimeindex(soup) + series.index = _parse_datetimeindex(element) return series -def _parse_generation_timeseries(soup, per_plant: bool = False, include_eic: bool = False) -> pd.Series: +def _parse_generation_timeseries(element, per_plant: bool = False, include_eic: bool = False) -> pd.Series: """ Works for generation by type, generation forecast, and wind and solar forecast Parameters ---------- - soup : bs4.element.tag + element : lxml.element Returns ------- pd.Series """ - positions = [int(x.text) for x in soup.iter('{*}position')] - quantities = [float(x.text) for x in soup.iter('{*}quantity')] + positions = [int(x.text) for x in findall(element, 'position')] + quantities = [float(x.text) for x in findall(element, 'quantity')] series = pd.Series(index=positions, data=quantities) series = series.sort_index() - series.index = _parse_datetimeindex(soup) + series.index = _parse_datetimeindex(element) # Check if there is a psrtype, if so, get it. - _psrtype = list(soup.iter("{*}psrType")) + _psrtype = findall(element, 'psrType') if _psrtype is not None: - psrtype = _psrtype[0].text + psrtype = find(element, 'psrType') else: psrtype = None @@ -650,7 +648,7 @@ def _parse_generation_timeseries(soup, per_plant: bool = False, include_eic: boo # If IN, this means Actual Consumption is measured # If OUT, this means Consumption is measured. # OUT means Consumption of a generation plant, eg. charging a pumped hydro plant - if list(soup.iter("{*}" + CONSUMPTION_ELEMENT)): + if list(findall(element, CONSUMPTION_ELEMENT)): metric = 'Actual Consumption' else: metric = 'Actual Aggregated' @@ -663,10 +661,11 @@ def _parse_generation_timeseries(soup, per_plant: bool = False, include_eic: boo name.append(psrtype_name) if per_plant: - plantname = soup.find('{*}name').text + plantname = find(element, 'name') name.append(plantname) if include_eic: - eic = soup.find("mrid", codingscheme="A01").text + #TODO + eic = find(element, "mrid", codingscheme="A01") name.insert(0, eic) @@ -681,84 +680,79 @@ def _parse_generation_timeseries(soup, per_plant: bool = False, include_eic: boo return series -def _parse_water_hydro_timeseries(soup, tz): +def _parse_water_hydro_timeseries(element, tz): """ Parses timeseries for water reservoirs and hydro storage plants Parameters ---------- - soup : bs4.element.tag + element : lxml.element Returns ------- pd.Series """ - positions = [] - quantities = [] - for point in soup.find_all('point'): - positions.append(int(point.find('position').text)) - quantity = point.find('quantity') - if quantity is None: - raise LookupError( - f'No quantity found in this point, it should have one: {point}') - quantities.append(float(quantity.text)) + positions = [int(x.text) for x in findall(element, 'position')] + quantities = [float(x.text) for x in findall(element, 'quantity')] series = pd.Series(index=positions, data=quantities) series = series.sort_index() - series.index = _parse_datetimeindex(soup, tz) + series.index = _parse_datetimeindex(element, tz) return series -def _parse_installed_capacity_per_plant(soup): +def _parse_installed_capacity_per_plant(element): """ Parameters ---------- - soup : bs4.element.tag + element : lxml.element Returns ------- pd.Series """ + #TODO extract_vals = {'Name': 'registeredresource.name', - 'Production Type': 'psrtype', + 'Production Type': 'psrType', 'Bidding Zone': 'inbiddingzone_domain.mrid', - # 'Status': 'businesstype', + # 'Status': 'businessType', 'Voltage Connection Level [kV]': 'voltage_powersystemresources.highvoltagelimit'} - series = pd.Series(extract_vals).apply(lambda v: soup.find(v).text) + #TODO + series = pd.Series(extract_vals).apply(lambda v: find(element, v)) # extract only first point series['Installed Capacity [MW]'] = \ - soup.find_all('point')[0].find('quantity').text - - series.name = soup.find('registeredresource.mrid').text + float(find(element, 'quantity')) + #TODO + series.name = find(element, 'registeredresource.mrid') return series -def _parse_datetimeindex(soup, tz=None): +def _parse_datetimeindex(element, tz=None): """ - Create a datetimeindex from a parsed beautifulsoup, + Create a datetimeindex from a lxml element, given that it contains the elements 'start', 'end' and 'resolution' Parameters ---------- - soup : bs4.element.tag + element : lxml.element tz: str Returns ------- pd.DatetimeIndex """ - start = pd.Timestamp(next(soup.iter("{*}start")).text) - end = pd.Timestamp(next(soup.iter("{*}end")).text) + start = pd.Timestamp(find(element, 'start')) + end = pd.Timestamp(find(element, 'end')) if tz is not None: start = start.tz_convert(tz) end = end.tz_convert(tz) - delta = _resolution_to_timedelta(res_text=next(soup.iter('{*}resolution')).text) + delta = _resolution_to_timedelta(res_text=find(element, 'resolution')) index = pd.date_range(start=start, end=end, freq=delta, inclusive='left') if tz is not None: dst_jump = len(set(index.map(lambda d: d.dst()))) > 1 @@ -772,25 +766,22 @@ def _parse_datetimeindex(soup, tz=None): return index -def _parse_crossborder_flows_timeseries(soup): +def _parse_crossborder_flows_timeseries(element): """ Parameters ---------- - soup : bs4.element.tag + element : lxml.element Returns ------- pd.Series """ - positions = [] - flows = [] - for point in soup.find_all('point'): - positions.append(int(point.find('position').text)) - flows.append(float(point.find('quantity').text)) + positions = [int(x.text) for x in element.iter('{*}position')] + flows = [float(x.text) for x in element.iter('{*}quantity')] series = pd.Series(index=positions, data=flows) series = series.sort_index() - series.index = _parse_datetimeindex(soup) + series.index = _parse_datetimeindex(element) return series @@ -843,12 +834,12 @@ def _resolution_to_timedelta(res_text: str) -> str: ] -def _unavailability_gen_ts(soup: bs4.BeautifulSoup) -> list: +def _unavailability_gen_ts(element) -> list: """ Parser for generation unavailibility time-series Parameters ---------- - soup : bs4.element.tag + element : lxml.element tz : str Returns @@ -857,8 +848,7 @@ def _unavailability_gen_ts(soup: bs4.BeautifulSoup) -> list: """ # Avoid attribute errors when some of the fields are void: - get_attr = lambda attr: "" if soup.find(attr) is None else soup.find( - attr).text + get_attr = lambda attr: "" if list(findall(element, attr)) is None else find(element, attr) # When no nominal power is given, give default numeric value of 0: get_float = lambda val: float('NaN') if val == "" else float(val) @@ -873,7 +863,7 @@ def _unavailability_gen_ts(soup: bs4.BeautifulSoup) -> list: 'production_registeredresource.psrtype.psrtype'), ""), get_float(get_attr( 'production_registeredresource.psrtype.powersystemresources.nominalp'))] - return [f + p for p in _available_period(soup)] + return [f + p for p in _available_period(element)] HEADERS_UNAVAIL_TRANSM = ['created_doc_time', @@ -891,13 +881,13 @@ def _unavailability_gen_ts(soup: bs4.BeautifulSoup) -> list: ] -def _unavailability_tm_ts(soup: bs4.BeautifulSoup) -> list: +def _unavailability_tm_ts(element) -> list: """ Parser for transmission unavailibility time-series Parameters ---------- - soup : bs4.element.tag + element: lxml.element tz : str Returns @@ -905,8 +895,7 @@ def _unavailability_tm_ts(soup: bs4.BeautifulSoup) -> list: list """ # Avoid attribute errors when some of the fields are void: - get_attr = lambda attr: "" if soup.find(attr) is None else soup.find( - attr).text + get_attr = lambda attr: "" if list(findall(element, attr)) is None else find(element, attr) # When no nominal power is given, give default numeric value of 0: f = [BSNTYPE[get_attr('businesstype')], @@ -915,7 +904,7 @@ def _unavailability_tm_ts(soup: bs4.BeautifulSoup) -> list: get_attr('quantity_measure_unit.name'), get_attr('curvetype'), ] - return [f + p for p in _available_period(soup)] + return [f + p for p in _available_period(element)] _UNAVAIL_PARSE_CFG = {'A77': (HEADERS_UNAVAIL_GEN, _unavailability_gen_ts), @@ -946,10 +935,11 @@ def parse_unavailabilities(response: bytes, doctype: str) -> pd.DataFrame: return df -def _available_period(timeseries: bs4.BeautifulSoup) -> list: +def _available_period(timeseries) -> list: # if not timeseries: # return - for period in timeseries.find_all('available_period'): + #TODO + for period in findall(timeseries, 'available_period'): start, end = pd.Timestamp(period.timeinterval.start.text), pd.Timestamp( period.timeinterval.end.text) res = period.resolution.text From c56021881da9db5cc7a0e489aed7bc556bc48191 Mon Sep 17 00:00:00 2001 From: Jesse van Elteren Date: Wed, 4 Jan 2023 23:10:16 +0100 Subject: [PATCH 03/11] capitals in tags since lxml is case-sensitive --- entsoe/parsers.py | 103 ++++++++++++++++++++++++---------------------- 1 file changed, 54 insertions(+), 49 deletions(-) diff --git a/entsoe/parsers.py b/entsoe/parsers.py index 69c9b78..3ab140e 100644 --- a/entsoe/parsers.py +++ b/entsoe/parsers.py @@ -22,7 +22,7 @@ def find(element, tag): def findall(element, tag): return element.iter('{*}'+tag) -def _extract_timeseries(xml): +def _extract_timeseries(xml_bytes): """ Parameters ---------- @@ -32,9 +32,9 @@ def _extract_timeseries(xml): ------- lxml.element """ - if not xml: + if not xml_bytes: return - for event, element in etree.iterparse(BytesIO(xml), tag='{*}TimeSeries'): + for event, element in etree.iterparse(BytesIO(xml_bytes), tag='{*}TimeSeries'): yield element @@ -87,7 +87,7 @@ def parse_loads(xml_bytes, process_type='A01'): """ Parameters ---------- - xml_bytes : str + xml_bytes: bytes Returns ------- @@ -128,7 +128,7 @@ def parse_generation( """ Parameters ---------- - xml_bytes : str + xml_bytes: bytes per_plant : bool Decide if you need the parser that can extract plant info as well. nett : bool @@ -241,7 +241,7 @@ def parse_water_hydro(xml_bytes, tz): """ Parameters ---------- - xml_bytes : str + xml_bytes: bytes Returns ------- @@ -260,7 +260,7 @@ def parse_crossborder_flows(xml_bytes): """ Parameters ---------- - xml_bytes : str + xml_bytes: bytes Returns ------- @@ -278,7 +278,7 @@ def parse_imbalance_prices(xml_bytes): """ Parameters ---------- - xml_bytes : str + xml_bytes : bytes Returns ------- @@ -297,7 +297,7 @@ def parse_imbalance_volumes(xml_bytes): """ Parameters ---------- - xml_bytes : str + xml_bytes: bytes Returns ------- @@ -316,7 +316,7 @@ def parse_procured_balancing_capacity(xml_bytes, tz): """ Parameters ---------- - xml_bytes : str + xml_bytes: bytes tz: str Returns @@ -361,9 +361,9 @@ def _parse_procured_balancing_capacity(element, tz): df.loc[dt, 'Price'] = float(find(point, 'Procurement_Price.amount')) df.loc[dt, 'Volume'] = float(find(point, 'quantity')) - mr_id = int(find(element, 'mrid')) + mrid = int(find(element, 'mRID')) df.columns = pd.MultiIndex.from_product( - [[flow_direction], [mr_id], df.columns], + [[flow_direction], [mrid], df.columns], names=('direction', 'mrid', 'unit') ) @@ -374,7 +374,7 @@ def parse_contracted_reserve(xml_bytes, tz, label): """ Parameters ---------- - xml_bytes : str + xml_bytes: bytes tz: str label: str @@ -398,7 +398,7 @@ def _parse_contracted_reserve_series(element, tz, label): ---------- element: lxml.element tz: str - label: str + label: str (case sensitive!) Returns ------- @@ -425,12 +425,11 @@ def _parse_contracted_reserve_series(element, tz, label): 'A03': 'Symmetric'} # First column level: the type of reserve - reserve_type = BSNTYPE[find(element, "businesstype")] + reserve_type = BSNTYPE[find(element, "businessType")] df.rename(columns={label: reserve_type}, inplace=True) # Second column level: the flow direction - #TODO - direction = direction_dico[find(element, 'FlowDirection.direction')] + direction = direction_dico[find(element, 'flowDirection.direction')] df.columns = pd.MultiIndex.from_product([df.columns, [direction]]) return df @@ -449,6 +448,7 @@ def gen_frames(archive): with zipfile.ZipFile(BytesIO(archive), 'r') as arc: for f in arc.infolist(): if f.filename.endswith('xml'): + #TODO this should generate bytes not xml text frame = parse_imbalance_prices(xml_text=arc.read(f)) yield frame @@ -473,10 +473,9 @@ def _parse_imbalance_prices_timeseries(element) -> pd.DataFrame: categories = [] for point in findall(element, 'Point'): positions.append(int(find(point, 'position'))) - #TODO - amounts.append(float(find(point, 'imbalance_price.amount'))) - if point.find('imbalance_price.category'): - categories.append(point.find('imbalance_price.category').text) + amounts.append(float(find(point, 'imbalance_Price.amount'))) + if list(findall(point, 'imbalance_price.category')): + categories.append(find(point, 'imbalance_Price.category')) else: categories.append('None') @@ -563,7 +562,7 @@ def _parse_netposition_timeseries(element): positions = [] quantities = [] #TODO - if 'REGION' in find(element, 'out_domain.mrid'): + if 'REGION' in find(element, 'out_Domain.mrid'): factor = -1 # flow is import so negative else: factor = 1 @@ -665,7 +664,8 @@ def _parse_generation_timeseries(element, per_plant: bool = False, include_eic: name.append(plantname) if include_eic: #TODO - eic = find(element, "mrid", codingscheme="A01") + eic = find(element, "mRID") # is codingscheme as below required + # eic = find(element, "mrid", codingscheme="A01") name.insert(0, eic) @@ -712,21 +712,21 @@ def _parse_installed_capacity_per_plant(element): ------- pd.Series """ - #TODO - extract_vals = {'Name': 'registeredresource.name', + + extract_vals = {'Name': 'registeredResource.name', 'Production Type': 'psrType', - 'Bidding Zone': 'inbiddingzone_domain.mrid', + 'Bidding Zone': 'inBiddingZone_Domain.mRID', # 'Status': 'businessType', 'Voltage Connection Level [kV]': - 'voltage_powersystemresources.highvoltagelimit'} - #TODO + 'voltage_PowerSystemResources.highVoltageLimit'} + series = pd.Series(extract_vals).apply(lambda v: find(element, v)) # extract only first point series['Installed Capacity [MW]'] = \ float(find(element, 'quantity')) - #TODO - series.name = find(element, 'registeredresource.mrid') + + series.name = find(element, 'registeredResource.name') return series @@ -898,11 +898,11 @@ def _unavailability_tm_ts(element) -> list: get_attr = lambda attr: "" if list(findall(element, attr)) is None else find(element, attr) # When no nominal power is given, give default numeric value of 0: - f = [BSNTYPE[get_attr('businesstype')], - _INV_BIDDING_ZONE_DICO[get_attr('in_domain.mrid')], - _INV_BIDDING_ZONE_DICO[get_attr('out_domain.mrid')], - get_attr('quantity_measure_unit.name'), - get_attr('curvetype'), + f = [BSNTYPE[get_attr('businessType')], + _INV_BIDDING_ZONE_DICO[get_attr('in_Domain.mRID')], + _INV_BIDDING_ZONE_DICO[get_attr('out_Domain.mRID')], + get_attr('quantity_Measure_Unit.name'), + get_attr('curveType'), ] return [f + p for p in _available_period(element)] @@ -938,34 +938,39 @@ def parse_unavailabilities(response: bytes, doctype: str) -> pd.DataFrame: def _available_period(timeseries) -> list: # if not timeseries: # return - #TODO - for period in findall(timeseries, 'available_period'): - start, end = pd.Timestamp(period.timeinterval.start.text), pd.Timestamp( - period.timeinterval.end.text) - res = period.resolution.text - pstn, qty = period.point.position.text, period.point.quantity.text + + for period in findall(timeseries, 'Available_Period'): + start, end = pd.Timestamp(find(period, 'start')), pd.Timestamp( + find(period, 'end')) + res = find(period, 'resolution') + pstn, qty = find(period, 'position'), find(period, 'quantity') yield [start, end, res, pstn, qty] def _outage_parser(xml_file: bytes, headers, ts_func) -> pd.DataFrame: - xml_text = xml_file.decode() - - soup = bs4.BeautifulSoup(xml_text, 'html.parser') - mrid = soup.find("mrid").text - revision_number = int(soup.find("revisionnumber").text) + # xml_text = xml_file.decode() + # soup = bs4.BeautifulSoup(xml_text, 'html.parser') + element = etree.iterparse(BytesIO(xml_file)) + + + + mrid = find(element, 'mRID') + revision_number = int(find(element, 'revisionNumber')) + try: - creation_date = pd.Timestamp(soup.createddatetime.text) + creation_date = pd.Timestamp(find(element, 'createdDateTime')) except AttributeError: creation_date = "" try: - docstatus = DOCSTATUS[soup.docstatus.value.text] + docstatus = DOCSTATUS[find(element, 'value')] except AttributeError: docstatus = None d = list() - series = _extract_timeseries(xml_text) + series = _extract_timeseries(xml_file) for ts in series: row = [creation_date, docstatus, mrid, revision_number] + # ts_func may break since it will no longer receive a soup timeseries but a lxml element for t in ts_func(ts): d.append(row + t) df = pd.DataFrame.from_records(d, columns=headers) From 197cac71cb49be6effe4d62436f7d8aef991dbbe Mon Sep 17 00:00:00 2001 From: Jesse van Elteren Date: Wed, 4 Jan 2023 23:25:44 +0100 Subject: [PATCH 04/11] finished checking capitals on tags --- entsoe/parsers.py | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/entsoe/parsers.py b/entsoe/parsers.py index 3ab140e..e5789c0 100644 --- a/entsoe/parsers.py +++ b/entsoe/parsers.py @@ -358,7 +358,7 @@ def _parse_procured_balancing_capacity(element, tz): df = pd.DataFrame(index=tx, columns=['Price', 'Volume']) for dt, point in zip(tx, points): - df.loc[dt, 'Price'] = float(find(point, 'Procurement_Price.amount')) + df.loc[dt, 'Price'] = float(find(point, 'procurement_Price.amount')) df.loc[dt, 'Volume'] = float(find(point, 'quantity')) mrid = int(find(element, 'mRID')) @@ -474,7 +474,7 @@ def _parse_imbalance_prices_timeseries(element) -> pd.DataFrame: for point in findall(element, 'Point'): positions.append(int(find(point, 'position'))) amounts.append(float(find(point, 'imbalance_Price.amount'))) - if list(findall(point, 'imbalance_price.category')): + if list(findall(point, 'imbalance_Price.category')): categories.append(find(point, 'imbalance_Price.category')) else: categories.append('None') @@ -561,8 +561,8 @@ def _parse_netposition_timeseries(element): """ positions = [] quantities = [] - #TODO - if 'REGION' in find(element, 'out_Domain.mrid'): + + if 'REGION' in find(element, 'out_Domain.mRID'): factor = -1 # flow is import so negative else: factor = 1 @@ -587,7 +587,7 @@ def _parse_price_timeseries(element): pd.Series """ positions = [int(x.text) for x in findall(element, 'position')] - prices = [float(x.text) for x in findall(element, 'price_amount')] + prices = [float(x.text) for x in findall(element, 'price.amount')] series = pd.Series(index=positions, data=prices) series = series.sort_index() @@ -663,9 +663,7 @@ def _parse_generation_timeseries(element, per_plant: bool = False, include_eic: plantname = find(element, 'name') name.append(plantname) if include_eic: - #TODO - eic = find(element, "mRID") # is codingscheme as below required - # eic = find(element, "mrid", codingscheme="A01") + eic = find(element, 'mRID codingScheme="A01"') name.insert(0, eic) @@ -776,8 +774,8 @@ def _parse_crossborder_flows_timeseries(element): ------- pd.Series """ - positions = [int(x.text) for x in element.iter('{*}position')] - flows = [float(x.text) for x in element.iter('{*}quantity')] + positions = [int(x.text) for x in findall(element, 'position')] + flows = [float(x.text) for x in findall(element, 'quantity')] series = pd.Series(index=positions, data=flows) series = series.sort_index() @@ -813,9 +811,10 @@ def _resolution_to_timedelta(res_text: str) -> str: # domain code in the unavailibility parsers: _INV_BIDDING_ZONE_DICO = {area.code: area.name for area in Area} +#TODO cannot find some of these in https://transparency.entsoe.eu/content/static_content/Static%20content/web%20api/Guide.html, such as revision, created_doc_time HEADERS_UNAVAIL_GEN = ['created_doc_time', - 'docstatus', - 'mrid', + 'docStatus', + 'mRID', 'revision', 'businesstype', 'biddingzone_domain', @@ -852,20 +851,20 @@ def _unavailability_gen_ts(element) -> list: # When no nominal power is given, give default numeric value of 0: get_float = lambda val: float('NaN') if val == "" else float(val) - f = [BSNTYPE[get_attr('businesstype')], - _INV_BIDDING_ZONE_DICO[get_attr('biddingzone_domain.mrid')], - get_attr('quantity_measure_unit.name'), - get_attr('curvetype'), - get_attr('production_registeredresource.mrid'), - get_attr('production_registeredresource.name'), - get_attr('production_registeredresource.location.name'), + f = [BSNTYPE[get_attr('businessType')], + _INV_BIDDING_ZONE_DICO[get_attr('biddingZone_Domain.mRID')], + get_attr('quantity_Measure_Unit.name'), + get_attr('curveType'), + get_attr('production_RegisteredResource.mRID'), + get_attr('production_RegisteredResource.name'), + get_attr('production_RegisteredResource.location.name'), PSRTYPE_MAPPINGS.get(get_attr( - 'production_registeredresource.psrtype.psrtype'), ""), + 'production_RegisteredResource.pSRType.psrType'), ""), get_float(get_attr( - 'production_registeredresource.psrtype.powersystemresources.nominalp'))] + 'production_RegisteredResource.pSRType.powerSystemResources.nominalP'))] return [f + p for p in _available_period(element)] - +#TODO HEADERS_UNAVAIL_TRANSM = ['created_doc_time', 'docstatus', 'businesstype', From 97db8666f4286fbc01c9cdb3ea8550c154890638 Mon Sep 17 00:00:00 2001 From: Jesse van Elteren Date: Thu, 5 Jan 2023 10:28:27 +0100 Subject: [PATCH 05/11] changed response.text to response.content --- entsoe/entsoe.py | 114 +++++++++++++++++++++++------------------------ 1 file changed, 57 insertions(+), 57 deletions(-) diff --git a/entsoe/entsoe.py b/entsoe/entsoe.py index e09ddbf..41d6281 100644 --- a/entsoe/entsoe.py +++ b/entsoe/entsoe.py @@ -174,7 +174,7 @@ def query_day_ahead_prices(self, country_code: Union[Area, str], 'out_Domain': area.code } response = self._base_request(params=params, start=start, end=end) - return response.text + return response def query_net_position(self, country_code: Union[Area, str], start: pd.Timestamp, end: pd.Timestamp, dayahead: bool = True) -> str: @@ -202,7 +202,7 @@ def query_net_position(self, country_code: Union[Area, str], params.update({'Contract_MarketAgreement.Type': "A07"}) response = self._base_request(params=params, start=start, end=end) - return response.text + return response def query_load(self, country_code: Union[Area, str], start: pd.Timestamp, end: pd.Timestamp) -> str: @@ -225,7 +225,7 @@ def query_load(self, country_code: Union[Area, str], start: pd.Timestamp, 'out_Domain': area.code } response = self._base_request(params=params, start=start, end=end) - return response.text + return response def query_load_forecast( self, country_code: Union[Area, str], start: pd.Timestamp, @@ -250,7 +250,7 @@ def query_load_forecast( # 'out_Domain': domain } response = self._base_request(params=params, start=start, end=end) - return response.text + return response def query_generation_forecast( self, country_code: Union[Area, str], start: pd.Timestamp, @@ -274,7 +274,7 @@ def query_generation_forecast( 'in_Domain': area.code, } response = self._base_request(params=params, start=start, end=end) - return response.text + return response def query_wind_and_solar_forecast( self, country_code: Union[Area, str], start: pd.Timestamp, @@ -303,7 +303,7 @@ def query_wind_and_solar_forecast( if psr_type: params.update({'psrType': psr_type}) response = self._base_request(params=params, start=start, end=end) - return response.text + return response def query_generation( self, country_code: Union[Area, str], start: pd.Timestamp, @@ -357,7 +357,7 @@ def query_generation_per_plant( if psr_type: params.update({'psrType': psr_type}) response = self._base_request(params=params, start=start, end=end) - return response.text + return response def query_installed_generation_capacity( self, country_code: Union[Area, str], start: pd.Timestamp, @@ -384,7 +384,7 @@ def query_installed_generation_capacity( if psr_type: params.update({'psrType': psr_type}) response = self._base_request(params=params, start=start, end=end) - return response.text + return response def query_installed_generation_capacity_per_unit( self, country_code: Union[Area, str], start: pd.Timestamp, @@ -411,7 +411,7 @@ def query_installed_generation_capacity_per_unit( if psr_type: params.update({'psrType': psr_type}) response = self._base_request(params=params, start=start, end=end) - return response.text + return response def query_aggregate_water_reservoirs_and_hydro_storage(self, country_code: Union[Area, str], start: pd.Timestamp, end: pd.Timestamp) -> str: @@ -435,7 +435,7 @@ def query_aggregate_water_reservoirs_and_hydro_storage(self, country_code: Union 'in_Domain': area.code } response = self._base_request(params=params, start=start, end=end) - return response.text + return response def query_crossborder_flows( self, country_code_from: Union[Area, str], @@ -671,7 +671,7 @@ def _query_crossborder( 'businessType'] = business_type response = self._base_request(params=params, start=start, end=end) - return response.text + return response def query_imbalance_prices( self, country_code: Union[Area, str], start: pd.Timestamp, @@ -822,7 +822,7 @@ def query_contracted_reserve_prices( if psr_type: params.update({'psrType': psr_type}) response = self._base_request(params=params, start=start, end=end) - return response.text + return response def query_contracted_reserve_amount( self, country_code: Union[Area, str], start: pd.Timestamp, @@ -856,7 +856,7 @@ def query_contracted_reserve_amount( if psr_type: params.update({'psrType': psr_type}) response = self._base_request(params=params, start=start, end=end) - return response.text + return response def _query_unavailability( self, country_code: Union[Area, str], start: pd.Timestamp, @@ -1040,9 +1040,9 @@ def query_net_position(self, country_code: Union[Area, str], """ area = lookup_area(country_code) - text = super(EntsoePandasClient, self).query_net_position( + response = super(EntsoePandasClient, self).query_net_position( country_code=area, start=start, end=end, dayahead=dayahead) - series = parse_netpositions(text) + series = parse_netpositions(response.content) series = series.tz_convert(area.tz) series = series.truncate(before=start, after=end) return series @@ -1070,12 +1070,12 @@ def query_day_ahead_prices( raise InvalidParameterError('Please choose either 60T, 30T or 15T') area = lookup_area(country_code) # we do here extra days at start and end to fix issue 187 - text = super(EntsoePandasClient, self).query_day_ahead_prices( + response = super(EntsoePandasClient, self).query_day_ahead_prices( country_code=area, start=start-pd.Timedelta(days=1), end=end+pd.Timedelta(days=1) ) - series = parse_prices(text)[resolution] + series = parse_prices(response.content)[resolution] if len(series) == 0: raise NoMatchingDataError series = series.tz_convert(area.tz) @@ -1100,10 +1100,10 @@ def query_load(self, country_code: Union[Area, str], start: pd.Timestamp, pd.DataFrame """ area = lookup_area(country_code) - text = super(EntsoePandasClient, self).query_load( + response = super(EntsoePandasClient, self).query_load( country_code=area, start=start, end=end) - df = parse_loads(text, process_type='A16') + df = parse_loads(response.content, process_type='A16') df = df.tz_convert(area.tz) df = df.truncate(before=start, after=end) return df @@ -1125,10 +1125,10 @@ def query_load_forecast( pd.DataFrame """ area = lookup_area(country_code) - text = super(EntsoePandasClient, self).query_load_forecast( + response = super(EntsoePandasClient, self).query_load_forecast( country_code=area, start=start, end=end, process_type=process_type) - df = parse_loads(text, process_type=process_type) + df = parse_loads(response.content, process_type=process_type) df = df.tz_convert(area.tz) df = df.truncate(before=start, after=end) return df @@ -1175,9 +1175,9 @@ def query_generation_forecast( pd.DataFrame | pd.Series """ area = lookup_area(country_code) - text = super(EntsoePandasClient, self).query_generation_forecast( + response = super(EntsoePandasClient, self).query_generation_forecast( country_code=area, start=start, end=end, process_type=process_type) - df = parse_generation(text, nett=nett) + df = parse_generation(response.content, nett=nett) df = df.tz_convert(area.tz) df = df.truncate(before=start, after=end) return df @@ -1202,10 +1202,10 @@ def query_wind_and_solar_forecast( pd.DataFrame """ area = lookup_area(country_code) - text = super(EntsoePandasClient, self).query_wind_and_solar_forecast( + response = super(EntsoePandasClient, self).query_wind_and_solar_forecast( country_code=area, start=start, end=end, psr_type=psr_type, process_type=process_type) - df = parse_generation(text, nett=True) + df = parse_generation(response.content, nett=True) df = df.tz_convert(area.tz) df = df.truncate(before=start, after=end) return df @@ -1256,10 +1256,10 @@ def query_installed_generation_capacity( pd.DataFrame """ area = lookup_area(country_code) - text = super( + response = super( EntsoePandasClient, self).query_installed_generation_capacity( country_code=area, start=start, end=end, psr_type=psr_type) - df = parse_generation(text) + df = parse_generation(response.content) df = df.tz_convert(area.tz) # Truncate to YearBegin and YearEnd, because answer is always year-based df = df.truncate(before=start - YearBegin(), after=end + YearEnd()) @@ -1283,11 +1283,11 @@ def query_installed_generation_capacity_per_unit( pd.DataFrame """ area = lookup_area(country_code) - text = super( + response = super( EntsoePandasClient, self).query_installed_generation_capacity_per_unit( country_code=area, start=start, end=end, psr_type=psr_type) - df = parse_installed_capacity_per_plant(text) + df = parse_installed_capacity_per_plant(response.content) return df @year_limited @@ -1295,12 +1295,12 @@ def query_installed_generation_capacity_per_unit( def query_aggregate_water_reservoirs_and_hydro_storage(self, country_code: Union[Area, str], start: pd.Timestamp, end: pd.Timestamp) -> pd.DataFrame: area = lookup_area(country_code) - text = super( + response = super( EntsoePandasClient, self).query_aggregate_water_reservoirs_and_hydro_storage( country_code=area, start=start, end=end) - df = parse_water_hydro(text, area.tz) + df = parse_water_hydro(response.content, area.tz) return df @@ -1326,12 +1326,12 @@ def query_crossborder_flows( """ area_to = lookup_area(country_code_to) area_from = lookup_area(country_code_from) - text = super(EntsoePandasClient, self).query_crossborder_flows( + response = super(EntsoePandasClient, self).query_crossborder_flows( country_code_from=area_from, country_code_to=area_to, start=start, end=end) - ts = parse_crossborder_flows(text) + ts = parse_crossborder_flows(response.content) ts = ts.tz_convert(area_from.tz) ts = ts.truncate(before=start, after=end) return ts @@ -1361,13 +1361,13 @@ def query_scheduled_exchanges( """ area_to = lookup_area(country_code_to) area_from = lookup_area(country_code_from) - text = super(EntsoePandasClient, self).query_scheduled_exchanges( + response = super(EntsoePandasClient, self).query_scheduled_exchanges( country_code_from=area_from, country_code_to=area_to, dayahead=dayahead, start=start, end=end) - ts = parse_crossborder_flows(text) + ts = parse_crossborder_flows(response.content) ts = ts.tz_convert(area_from.tz) ts = ts.truncate(before=start, after=end) return ts @@ -1393,12 +1393,12 @@ def query_net_transfer_capacity_dayahead( """ area_to = lookup_area(country_code_to) area_from = lookup_area(country_code_from) - text = super(EntsoePandasClient, self).query_net_transfer_capacity_dayahead( + response = super(EntsoePandasClient, self).query_net_transfer_capacity_dayahead( country_code_from=area_from, country_code_to=area_to, start=start, end=end) - ts = parse_crossborder_flows(text) + ts = parse_crossborder_flows(response.content) ts = ts.tz_convert(area_from.tz) ts = ts.truncate(before=start, after=end) return ts @@ -1424,12 +1424,12 @@ def query_net_transfer_capacity_weekahead( """ area_to = lookup_area(country_code_to) area_from = lookup_area(country_code_from) - text = super(EntsoePandasClient, self).query_net_transfer_capacity_weekahead( + response = super(EntsoePandasClient, self).query_net_transfer_capacity_weekahead( country_code_from=area_from, country_code_to=area_to, start=start, end=end) - ts = parse_crossborder_flows(text) + ts = parse_crossborder_flows(response.content) ts = ts.tz_convert(area_from.tz) ts = ts.truncate(before=start, after=end) return ts @@ -1455,12 +1455,12 @@ def query_net_transfer_capacity_monthahead( """ area_to = lookup_area(country_code_to) area_from = lookup_area(country_code_from) - text = super(EntsoePandasClient, self).query_net_transfer_capacity_monthahead( + response = super(EntsoePandasClient, self).query_net_transfer_capacity_monthahead( country_code_from=area_from, country_code_to=area_to, start=start, end=end) - ts = parse_crossborder_flows(text) + ts = parse_crossborder_flows(response.content) ts = ts.tz_convert(area_from.tz) ts = ts.truncate(before=start, after=end) return ts @@ -1486,12 +1486,12 @@ def query_net_transfer_capacity_yearahead( """ area_to = lookup_area(country_code_to) area_from = lookup_area(country_code_from) - text = super(EntsoePandasClient, self).query_net_transfer_capacity_yearahead( + response = super(EntsoePandasClient, self).query_net_transfer_capacity_yearahead( country_code_from=area_from, country_code_to=area_to, start=start, end=end) - ts = parse_crossborder_flows(text) + ts = parse_crossborder_flows(response.content) ts = ts.tz_convert(area_from.tz) ts = ts.truncate(before=start, after=end) return ts @@ -1517,13 +1517,13 @@ def query_intraday_offered_capacity( """ area_to = lookup_area(country_code_to) area_from = lookup_area(country_code_from) - text = super(EntsoePandasClient, self).query_intraday_offered_capacity( + response = super(EntsoePandasClient, self).query_intraday_offered_capacity( country_code_from=area_from, country_code_to=area_to, start=start, end=end, implicit=implicit) - ts = parse_crossborder_flows(text) + ts = parse_crossborder_flows(response.content) ts = ts.tz_convert(area_from.tz) ts = ts.truncate(before=start, after=end) return ts @@ -1560,7 +1560,7 @@ def query_offered_capacity( """ area_to = lookup_area(country_code_to) area_from = lookup_area(country_code_from) - text = super(EntsoePandasClient, self).query_offered_capacity( + response = super(EntsoePandasClient, self).query_offered_capacity( country_code_from=area_from, country_code_to=area_to, start=start, @@ -1568,7 +1568,7 @@ def query_offered_capacity( contract_marketagreement_type=contract_marketagreement_type, implicit=implicit, offset=offset) - ts = parse_crossborder_flows(text) + ts = parse_crossborder_flows(response.content) ts = ts.tz_convert(area_from.tz) ts = ts.truncate(before=start, after=end) return ts @@ -1649,10 +1649,10 @@ def query_procured_balancing_capacity( pd.DataFrame """ area = lookup_area(country_code) - text = super(EntsoePandasClient, self).query_procured_balancing_capacity( + response = super(EntsoePandasClient, self).query_procured_balancing_capacity( country_code=area, start=start, end=end, process_type=process_type, type_marketagreement_type=type_marketagreement_type) - df = parse_procured_balancing_capacity(text, area.tz) + df = parse_procured_balancing_capacity(response.content, area.tz) df = df.tz_convert(area.tz) df = df.truncate(before=start, after=end) return df @@ -1679,10 +1679,10 @@ def query_activated_balancing_energy( pd.DataFrame """ area = lookup_area(country_code) - text = super(EntsoePandasClient, self).query_activated_balancing_energy( + response = super(EntsoePandasClient, self).query_activated_balancing_energy( country_code=area, start=start, end=end, business_type=business_type, psr_type=psr_type) - df = parse_contracted_reserve(text, area.tz, "quantity") + df = parse_contracted_reserve(response.content, area.tz, "quantity") df = df.tz_convert(area.tz) df = df.truncate(before=start, after=end) return df @@ -1716,11 +1716,11 @@ def query_contracted_reserve_prices( pd.DataFrame """ area = lookup_area(country_code) - text = super(EntsoePandasClient, self).query_contracted_reserve_prices( + response = super(EntsoePandasClient, self).query_contracted_reserve_prices( country_code=area, start=start, end=end, type_marketagreement_type=type_marketagreement_type, psr_type=psr_type, offset=offset) - df = parse_contracted_reserve(text, area.tz, "procurement_price.amount") + df = parse_contracted_reserve(response.content, area.tz, "procurement_price.amount") df = df.tz_convert(area.tz) df = df.truncate(before=start, after=end) return df @@ -1754,11 +1754,11 @@ def query_contracted_reserve_amount( pd.DataFrame """ area = lookup_area(country_code) - text = super(EntsoePandasClient, self).query_contracted_reserve_amount( + response = super(EntsoePandasClient, self).query_contracted_reserve_amount( country_code=area, start=start, end=end, type_marketagreement_type=type_marketagreement_type, psr_type=psr_type, offset=offset) - df = parse_contracted_reserve(text, area.tz, "quantity") + df = parse_contracted_reserve(response.content, area.tz, "quantity") df = df.tz_convert(area.tz) df = df.truncate(before=start, after=end) return df @@ -1931,9 +1931,9 @@ def query_generation_per_plant( pd.DataFrame """ area = lookup_area(country_code) - text = super(EntsoePandasClient, self).query_generation_per_plant( + response = super(EntsoePandasClient, self).query_generation_per_plant( country_code=area, start=start, end=end, psr_type=psr_type) - df = parse_generation(text, per_plant=True, include_eic=include_eic) + df = parse_generation(response.content, per_plant=True, include_eic=include_eic) df.columns = df.columns.set_levels(df.columns.levels[0].str.encode('latin-1').str.decode('utf-8'), level=0) df = df.tz_convert(area.tz) # Truncation will fail if data is not sorted along the index in rare From f2aaa89499faaee142e9d7c8058bc9d0ca6e4be1 Mon Sep 17 00:00:00 2001 From: Jesse van Elteren Date: Thu, 5 Jan 2023 11:16:03 +0100 Subject: [PATCH 06/11] added 'A13': 'Withdrawn' to docstatus --- entsoe/entsoe.py | 3 ++- entsoe/mappings.py | 1 + entsoe/parsers.py | 18 ++++++++++-------- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/entsoe/entsoe.py b/entsoe/entsoe.py index 41d6281..ac6e7a9 100644 --- a/entsoe/entsoe.py +++ b/entsoe/entsoe.py @@ -130,6 +130,7 @@ def _base_request(self, params: Dict, start: pd.Timestamp, if response.headers.get('content-type', '') == 'application/xml': if 'No matching data found' in response.text: raise NoMatchingDataError + print('response received') return response @staticmethod @@ -1652,7 +1653,7 @@ def query_procured_balancing_capacity( response = super(EntsoePandasClient, self).query_procured_balancing_capacity( country_code=area, start=start, end=end, process_type=process_type, type_marketagreement_type=type_marketagreement_type) - df = parse_procured_balancing_capacity(response.content, area.tz) + df = parse_procured_balancing_capacity(response, area.tz) df = df.tz_convert(area.tz) df = df.truncate(before=start, after=end) return df diff --git a/entsoe/mappings.py b/entsoe/mappings.py index cf8e16a..32449f7 100644 --- a/entsoe/mappings.py +++ b/entsoe/mappings.py @@ -178,6 +178,7 @@ def code(self): 'A02': 'Final', 'A05': 'Active', 'A09': 'Cancelled', + 'A13': 'Withdrawn', 'X01': 'Estimated'} BSNTYPE = {'A29': 'Already allocated capacity (AAC)', diff --git a/entsoe/parsers.py b/entsoe/parsers.py index e5789c0..02f4669 100644 --- a/entsoe/parsers.py +++ b/entsoe/parsers.py @@ -17,6 +17,7 @@ def find(element, tag): + print(tag) return next(element.iter('{*}'+tag)).text def findall(element, tag): @@ -449,7 +450,7 @@ def gen_frames(archive): for f in arc.infolist(): if f.filename.endswith('xml'): #TODO this should generate bytes not xml text - frame = parse_imbalance_prices(xml_text=arc.read(f)) + frame = parse_imbalance_prices(xml_bytes=arc.read(f)) yield frame frames = gen_frames(zip_contents) @@ -637,8 +638,8 @@ def _parse_generation_timeseries(element, per_plant: bool = False, include_eic: series.index = _parse_datetimeindex(element) # Check if there is a psrtype, if so, get it. - _psrtype = findall(element, 'psrType') - if _psrtype is not None: + _psrtype = list(findall(element, 'psrType')) + if _psrtype: psrtype = find(element, 'psrType') else: psrtype = None @@ -866,7 +867,7 @@ def _unavailability_gen_ts(element) -> list: #TODO HEADERS_UNAVAIL_TRANSM = ['created_doc_time', - 'docstatus', + 'docStatus', 'businesstype', 'in_domain', 'out_domain', @@ -949,7 +950,7 @@ def _available_period(timeseries) -> list: def _outage_parser(xml_file: bytes, headers, ts_func) -> pd.DataFrame: # xml_text = xml_file.decode() # soup = bs4.BeautifulSoup(xml_text, 'html.parser') - element = etree.iterparse(BytesIO(xml_file)) + element = etree.parse(BytesIO(xml_file)) @@ -960,10 +961,11 @@ def _outage_parser(xml_file: bytes, headers, ts_func) -> pd.DataFrame: creation_date = pd.Timestamp(find(element, 'createdDateTime')) except AttributeError: creation_date = "" - - try: + + value = list(findall(element, 'value')) + if value: docstatus = DOCSTATUS[find(element, 'value')] - except AttributeError: + else: docstatus = None d = list() series = _extract_timeseries(xml_file) From 4a98bfcb60af1bb24266290f976a44ef2c08be87 Mon Sep 17 00:00:00 2001 From: Jesse van Elteren Date: Thu, 5 Jan 2023 12:18:29 +0100 Subject: [PATCH 07/11] fix: procurement_Price.amount casing --- entsoe/entsoe.py | 2 +- entsoe/parsers.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/entsoe/entsoe.py b/entsoe/entsoe.py index ac6e7a9..8226a6f 100644 --- a/entsoe/entsoe.py +++ b/entsoe/entsoe.py @@ -1721,7 +1721,7 @@ def query_contracted_reserve_prices( country_code=area, start=start, end=end, type_marketagreement_type=type_marketagreement_type, psr_type=psr_type, offset=offset) - df = parse_contracted_reserve(response.content, area.tz, "procurement_price.amount") + df = parse_contracted_reserve(response.content, area.tz, "procurement_Price.amount") df = df.tz_convert(area.tz) df = df.truncate(before=start, after=end) return df diff --git a/entsoe/parsers.py b/entsoe/parsers.py index 02f4669..f8a85ea 100644 --- a/entsoe/parsers.py +++ b/entsoe/parsers.py @@ -17,7 +17,6 @@ def find(element, tag): - print(tag) return next(element.iter('{*}'+tag)).text def findall(element, tag): From f1c8d17676fd1b3fd590aef017c55a313aa09122 Mon Sep 17 00:00:00 2001 From: Jesse van Elteren Date: Fri, 6 Jan 2023 21:52:43 +0100 Subject: [PATCH 08/11] remove print statement in entsoe.py --- entsoe/entsoe.py | 1 - 1 file changed, 1 deletion(-) diff --git a/entsoe/entsoe.py b/entsoe/entsoe.py index 8226a6f..f0edd17 100644 --- a/entsoe/entsoe.py +++ b/entsoe/entsoe.py @@ -130,7 +130,6 @@ def _base_request(self, params: Dict, start: pd.Timestamp, if response.headers.get('content-type', '') == 'application/xml': if 'No matching data found' in response.text: raise NoMatchingDataError - print('response received') return response @staticmethod From 43484372a65938e0b34f4d7877f2173f895bd213 Mon Sep 17 00:00:00 2001 From: Jesse van Elteren Date: Fri, 13 Jan 2023 17:06:52 +0100 Subject: [PATCH 09/11] changed docStatus as pd column to lowercase to prevent change in df --- entsoe/parsers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/entsoe/parsers.py b/entsoe/parsers.py index f8a85ea..3183845 100644 --- a/entsoe/parsers.py +++ b/entsoe/parsers.py @@ -973,5 +973,5 @@ def _outage_parser(xml_file: bytes, headers, ts_func) -> pd.DataFrame: # ts_func may break since it will no longer receive a soup timeseries but a lxml element for t in ts_func(ts): d.append(row + t) - df = pd.DataFrame.from_records(d, columns=headers) + df = pd.DataFrame.from_records(d, columns=[h.lower() for h in headers]) return df From 7caf7c15e5a70928e11e77848faace9d123cc0f0 Mon Sep 17 00:00:00 2001 From: Jesse van Elteren Date: Fri, 13 Jan 2023 17:29:52 +0100 Subject: [PATCH 10/11] bug: cap / plant,idx mrid instead of name --- entsoe/parsers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/entsoe/parsers.py b/entsoe/parsers.py index 3183845..5e2a29c 100644 --- a/entsoe/parsers.py +++ b/entsoe/parsers.py @@ -724,7 +724,7 @@ def _parse_installed_capacity_per_plant(element): series['Installed Capacity [MW]'] = \ float(find(element, 'quantity')) - series.name = find(element, 'registeredResource.name') + series.name = find(element, 'registeredResource.mRID') return series From 00d764caeac1081ba373fa5f6e4bab433dff737e Mon Sep 17 00:00:00 2001 From: Jesse van Elteren Date: Fri, 13 Jan 2023 19:51:41 +0100 Subject: [PATCH 11/11] all response.text results now return response --- entsoe/entsoe.py | 103 +++++++++++++++++++++++----------------------- entsoe/parsers.py | 2 +- 2 files changed, 53 insertions(+), 52 deletions(-) diff --git a/entsoe/entsoe.py b/entsoe/entsoe.py index f0edd17..dc1e3c8 100644 --- a/entsoe/entsoe.py +++ b/entsoe/entsoe.py @@ -155,7 +155,7 @@ def _datetime_to_str(dtm: pd.Timestamp) -> str: return ret_str def query_day_ahead_prices(self, country_code: Union[Area, str], - start: pd.Timestamp, end: pd.Timestamp) -> str: + start: pd.Timestamp, end: pd.Timestamp) -> requests.Response: """ Parameters ---------- @@ -165,8 +165,9 @@ def query_day_ahead_prices(self, country_code: Union[Area, str], Returns ------- - str + requests.Response """ + area = lookup_area(country_code) params = { 'documentType': 'A44', @@ -177,7 +178,7 @@ def query_day_ahead_prices(self, country_code: Union[Area, str], return response def query_net_position(self, country_code: Union[Area, str], - start: pd.Timestamp, end: pd.Timestamp, dayahead: bool = True) -> str: + start: pd.Timestamp, end: pd.Timestamp, dayahead: bool = True) -> requests.Response: """ Parameters ---------- @@ -188,7 +189,7 @@ def query_net_position(self, country_code: Union[Area, str], Returns ------- - str + requests.Response """ area = lookup_area(country_code) params = { @@ -205,7 +206,7 @@ def query_net_position(self, country_code: Union[Area, str], return response def query_load(self, country_code: Union[Area, str], start: pd.Timestamp, - end: pd.Timestamp) -> str: + end: pd.Timestamp) -> requests.Response: """ Parameters ---------- @@ -215,7 +216,7 @@ def query_load(self, country_code: Union[Area, str], start: pd.Timestamp, Returns ------- - str + requests.Response """ area = lookup_area(country_code) params = { @@ -229,7 +230,7 @@ def query_load(self, country_code: Union[Area, str], start: pd.Timestamp, def query_load_forecast( self, country_code: Union[Area, str], start: pd.Timestamp, - end: pd.Timestamp, process_type: str = 'A01') -> str: + end: pd.Timestamp, process_type: str = 'A01') -> requests.Response: """ Parameters ---------- @@ -240,7 +241,7 @@ def query_load_forecast( Returns ------- - str + requests.Response """ area = lookup_area(country_code) params = { @@ -254,7 +255,7 @@ def query_load_forecast( def query_generation_forecast( self, country_code: Union[Area, str], start: pd.Timestamp, - end: pd.Timestamp, process_type: str = 'A01') -> str: + end: pd.Timestamp, process_type: str = 'A01') -> requests.Response: """ Parameters ---------- @@ -265,7 +266,7 @@ def query_generation_forecast( Returns ------- - str + requests.Response """ area = lookup_area(country_code) params = { @@ -279,7 +280,7 @@ def query_generation_forecast( def query_wind_and_solar_forecast( self, country_code: Union[Area, str], start: pd.Timestamp, end: pd.Timestamp, psr_type: Optional[str] = None, - process_type: str = 'A01', **kwargs) -> str: + process_type: str = 'A01', **kwargs) -> requests.Response: """ Parameters ---------- @@ -292,7 +293,7 @@ def query_wind_and_solar_forecast( Returns ------- - str + requests.Response """ area = lookup_area(country_code) params = { @@ -307,7 +308,7 @@ def query_wind_and_solar_forecast( def query_generation( self, country_code: Union[Area, str], start: pd.Timestamp, - end: pd.Timestamp, psr_type: Optional[str] = None, **kwargs) -> bytes: + end: pd.Timestamp, psr_type: Optional[str] = None, **kwargs) -> requests.Response: """ Parameters ---------- @@ -319,7 +320,7 @@ def query_generation( Returns ------- - bytes + requests.Response """ area = lookup_area(country_code) params = { @@ -330,11 +331,11 @@ def query_generation( if psr_type: params.update({'psrType': psr_type}) response = self._base_request(params=params, start=start, end=end) - return response.content + return response def query_generation_per_plant( self, country_code: Union[Area, str], start: pd.Timestamp, - end: pd.Timestamp, psr_type: Optional[str] = None, **kwargs) -> str: + end: pd.Timestamp, psr_type: Optional[str] = None, **kwargs) -> requests.Response: """ Parameters ---------- @@ -346,7 +347,7 @@ def query_generation_per_plant( Returns ------- - str + requests.Response """ area = lookup_area(country_code) params = { @@ -361,7 +362,7 @@ def query_generation_per_plant( def query_installed_generation_capacity( self, country_code: Union[Area, str], start: pd.Timestamp, - end: pd.Timestamp, psr_type: Optional[str] = None) -> str: + end: pd.Timestamp, psr_type: Optional[str] = None) -> requests.Response: """ Parameters ---------- @@ -373,7 +374,7 @@ def query_installed_generation_capacity( Returns ------- - str + requests.Response """ area = lookup_area(country_code) params = { @@ -388,7 +389,7 @@ def query_installed_generation_capacity( def query_installed_generation_capacity_per_unit( self, country_code: Union[Area, str], start: pd.Timestamp, - end: pd.Timestamp, psr_type: Optional[str] = None) -> str: + end: pd.Timestamp, psr_type: Optional[str] = None) -> requests.Response: """ Parameters ---------- @@ -400,7 +401,7 @@ def query_installed_generation_capacity_per_unit( Returns ------- - str + requests.Response """ area = lookup_area(country_code) params = { @@ -414,7 +415,7 @@ def query_installed_generation_capacity_per_unit( return response def query_aggregate_water_reservoirs_and_hydro_storage(self, country_code: Union[Area, str], start: pd.Timestamp, - end: pd.Timestamp) -> str: + end: pd.Timestamp) -> requests.Response: """ Parameters ---------- @@ -426,7 +427,7 @@ def query_aggregate_water_reservoirs_and_hydro_storage(self, country_code: Union Returns ------- - str + requests.Response """ area = lookup_area(country_code) params = { @@ -440,7 +441,7 @@ def query_aggregate_water_reservoirs_and_hydro_storage(self, country_code: Union def query_crossborder_flows( self, country_code_from: Union[Area, str], country_code_to: Union[Area, str], start: pd.Timestamp, - end: pd.Timestamp, **kwargs) -> str: + end: pd.Timestamp, **kwargs) -> requests.Response: """ Parameters ---------- @@ -451,7 +452,7 @@ def query_crossborder_flows( Returns ------- - str + requests.Response """ return self._query_crossborder( country_code_from=country_code_from, @@ -464,7 +465,7 @@ def query_scheduled_exchanges( start: pd.Timestamp, end: pd.Timestamp, dayahead: bool = False, - **kwargs) -> str: + **kwargs) -> requests.Response: """ Parameters ---------- @@ -476,7 +477,7 @@ def query_scheduled_exchanges( Returns ------- - str + requests.Response """ if dayahead: contract_marketagreement_type = "A01" @@ -490,7 +491,7 @@ def query_scheduled_exchanges( def query_net_transfer_capacity_dayahead( self, country_code_from: Union[Area, str], country_code_to: Union[Area, str], start: pd.Timestamp, - end: pd.Timestamp) -> str: + end: pd.Timestamp) -> requests.Response: """ Parameters ---------- @@ -501,7 +502,7 @@ def query_net_transfer_capacity_dayahead( Returns ------- - str + requests.Response """ return self._query_crossborder( country_code_from=country_code_from, @@ -511,7 +512,7 @@ def query_net_transfer_capacity_dayahead( def query_net_transfer_capacity_weekahead( self, country_code_from: Union[Area, str], country_code_to: Union[Area, str], start: pd.Timestamp, - end: pd.Timestamp) -> str: + end: pd.Timestamp) -> requests.Response: """ Parameters ---------- @@ -522,7 +523,7 @@ def query_net_transfer_capacity_weekahead( Returns ------- - str + requests.Response """ return self._query_crossborder( country_code_from=country_code_from, @@ -532,7 +533,7 @@ def query_net_transfer_capacity_weekahead( def query_net_transfer_capacity_monthahead( self, country_code_from: Union[Area, str], country_code_to: Union[Area, str], start: pd.Timestamp, - end: pd.Timestamp) -> str: + end: pd.Timestamp) -> requests.Response: """ Parameters ---------- @@ -543,7 +544,7 @@ def query_net_transfer_capacity_monthahead( Returns ------- - str + requests.Response """ return self._query_crossborder( country_code_from=country_code_from, @@ -553,7 +554,7 @@ def query_net_transfer_capacity_monthahead( def query_net_transfer_capacity_yearahead( self, country_code_from: Union[Area, str], country_code_to: Union[Area, str], start: pd.Timestamp, - end: pd.Timestamp) -> str: + end: pd.Timestamp) -> requests.Response: """ Parameters ---------- @@ -564,7 +565,7 @@ def query_net_transfer_capacity_yearahead( Returns ------- - str + requests.Response """ return self._query_crossborder( country_code_from=country_code_from, @@ -574,7 +575,7 @@ def query_net_transfer_capacity_yearahead( def query_intraday_offered_capacity( self, country_code_from: Union[Area, str], country_code_to: Union[Area, str], start: pd.Timestamp, - end: pd.Timestamp, implicit:bool = True,**kwargs) -> str: + end: pd.Timestamp, implicit:bool = True,**kwargs) -> requests.Response: """ Parameters ---------- @@ -586,7 +587,7 @@ def query_intraday_offered_capacity( Returns ------- - str + requests.Response """ return self._query_crossborder( country_code_from=country_code_from, @@ -598,7 +599,7 @@ def query_offered_capacity( self, country_code_from: Union[Area, str], country_code_to: Union[Area, str], start: pd.Timestamp, end: pd.Timestamp, contract_marketagreement_type: str, - implicit:bool = True,**kwargs) -> str: + implicit:bool = True,**kwargs) -> requests.Response: """ Allocated result documents, for OC evolution see query_intraday_offered_capacity @@ -614,7 +615,7 @@ def query_offered_capacity( Returns ------- - str + requests.Response """ if implicit: business_type = None @@ -633,7 +634,7 @@ def _query_crossborder( country_code_to: Union[Area, str], start: pd.Timestamp, end: pd.Timestamp, doctype: str, contract_marketagreement_type: Optional[str] = None, - auction_type: Optional[str] = None, business_type: Optional[str] = None) -> str: + auction_type: Optional[str] = None, business_type: Optional[str] = None) -> requests.Response: """ Generic function called by query_crossborder_flows, query_scheduled_exchanges, query_net_transfer_capacity_DA/WA/MA/YA and query_. @@ -650,7 +651,7 @@ def _query_crossborder( Returns ------- - str + requests.Response """ area_in = lookup_area(country_code_to) area_out = lookup_area(country_code_from) @@ -794,7 +795,7 @@ def query_contracted_reserve_prices( self, country_code: Union[Area, str], start: pd.Timestamp, end: pd.Timestamp, type_marketagreement_type: str, psr_type: Optional[str] = None, - offset: int = 0) -> str: + offset: int = 0) -> requests.Response: """ Parameters ---------- @@ -810,7 +811,7 @@ def query_contracted_reserve_prices( Returns ------- - str + requests.Response """ area = lookup_area(country_code) params = { @@ -828,7 +829,7 @@ def query_contracted_reserve_amount( self, country_code: Union[Area, str], start: pd.Timestamp, end: pd.Timestamp, type_marketagreement_type: str, psr_type: Optional[str] = None, - offset: int = 0) -> str: + offset: int = 0) -> requests.Response: """ Parameters ---------- @@ -844,7 +845,7 @@ def query_contracted_reserve_amount( Returns ------- - str + requests.Response """ area = lookup_area(country_code) params = { @@ -1231,9 +1232,9 @@ def query_generation( pd.DataFrame """ area = lookup_area(country_code) - xml = super(EntsoePandasClient, self).query_generation( + response = super(EntsoePandasClient, self).query_generation( country_code=area, start=start, end=end, psr_type=psr_type) - df = parse_generation(xml, nett=nett) + df = parse_generation(response.content, nett=nett) df = df.tz_convert(area.tz) df = df.truncate(before=start, after=end) return df @@ -1649,10 +1650,10 @@ def query_procured_balancing_capacity( pd.DataFrame """ area = lookup_area(country_code) - response = super(EntsoePandasClient, self).query_procured_balancing_capacity( + content = super(EntsoePandasClient, self).query_procured_balancing_capacity( country_code=area, start=start, end=end, process_type=process_type, type_marketagreement_type=type_marketagreement_type) - df = parse_procured_balancing_capacity(response, area.tz) + df = parse_procured_balancing_capacity(content, area.tz) df = df.tz_convert(area.tz) df = df.truncate(before=start, after=end) return df @@ -1679,10 +1680,10 @@ def query_activated_balancing_energy( pd.DataFrame """ area = lookup_area(country_code) - response = super(EntsoePandasClient, self).query_activated_balancing_energy( + content = super(EntsoePandasClient, self).query_activated_balancing_energy( country_code=area, start=start, end=end, business_type=business_type, psr_type=psr_type) - df = parse_contracted_reserve(response.content, area.tz, "quantity") + df = parse_contracted_reserve(content, area.tz, "quantity") df = df.tz_convert(area.tz) df = df.truncate(before=start, after=end) return df diff --git a/entsoe/parsers.py b/entsoe/parsers.py index 5e2a29c..15b397c 100644 --- a/entsoe/parsers.py +++ b/entsoe/parsers.py @@ -722,7 +722,7 @@ def _parse_installed_capacity_per_plant(element): # extract only first point series['Installed Capacity [MW]'] = \ - float(find(element, 'quantity')) + find(element, 'quantity') series.name = find(element, 'registeredResource.mRID')