Skip to content

Commit

Permalink
Merge pull request #3 from wisdom-oss/feature/time-bucket
Browse files Browse the repository at this point in the history
Feature: Time Bucket
  • Loading branch information
dr4hcu5-jan authored Apr 10, 2024
2 parents 3679feb + ea6db63 commit 4daddb1
Show file tree
Hide file tree
Showing 14 changed files with 350 additions and 51 deletions.
25 changes: 14 additions & 11 deletions algorithms/exponential.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
"""

parameters = {
"size": 30
"size": 30,
"degree": 3
}

if __name__ == "__main__":
Expand Down Expand Up @@ -55,26 +56,28 @@
x_axis.append(year)
y_axis.append(usage)
return_objects.append({
"label": f"{int(municipal)}",
"x": year,
"y": usage
"label": municipal,
"x": int(year),
"y": float(usage)
})

prediction_x_axis = numpy.linspace(start=x_axis[0], stop=x_axis[-1] + parameters["size"], num=len(y_axis) + parameters["size"], dtype=int)
curve = numpy.polynomial.Polynomial.fit(x_axis, y_axis, deg=3)
prediction_x_axis = numpy.linspace(start=x_axis[0], stop=x_axis[-1] + parameters["size"], num=len(y_axis) + parameters["size"], dtype=float)
curve = numpy.polynomial.Polynomial.fit(x_axis, y_axis, deg=parameters["degree"])
prediction_y_axis = curve(prediction_x_axis).tolist()
reference_values = prediction_y_axis[:len(x_axis)]
forecasted_values = prediction_y_axis[len(x_axis):]

r_square = sklearn.metrics.r2_score(y_axis, reference_values)
meta["curves"][int(municipal)] = str(curve)
meta["r-scores"][int(municipal)] = r_square
meta["real-data-until"][int(municipal)] = str(x_axis[-1])
meta["curves"][municipal] = str(curve).replace("\n", " ")
meta["r-scores"][municipal] = r_square
meta["real-data-until"][municipal] = x_axis[-1]
for year in prediction_x_axis:
if year <= x_axis[-1]:
continue
idx = int(year) - int(prediction_x_axis[0])
return_objects.append({
"label": f"{int(municipal)}",
"x": f"{int(year)}",
"label": municipal,
"x": int(year),
"y": float(prediction_y_axis[idx])
})

Expand Down
5 changes: 4 additions & 1 deletion algorithms/exponential.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@ parameters:
description: >-
The degree of the polynomial equation that should be used to fit the
function to
default: 3
default: 3

useBuckets: true
bucketSize: 1 year
88 changes: 87 additions & 1 deletion algorithms/linear.py
Original file line number Diff line number Diff line change
@@ -1 +1,87 @@
#!/usr/bin/python3import argparseimport datetimeimport jsonimport pandasimport numpyimport sklearn.metricsdescription = """This is an example on how to handle the input and output for algorithms and thedata pulled from the databases """parameters = { "size": 30}if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("data_file", default="") parser.add_argument("output_file", default="") parser.add_argument("parameter_file", default="") args = parser.parse_args() # load the parameters try: with open(args.parameter_file) as f: ext_parameters = json.load(f) parameters = ext_parameters except: print("using default parameters") # create a dataframe from the input file df = pandas.read_json(args.data_file, dtype={'municipal': str, 'usageType': str, 'date': datetime.datetime, 'amount': float}) municipals = df.groupby(df.municipal) usage_types = df.groupby(df.usageType) return_objects = [] meta = { "curves": {}, "r-scores": {}, "real-data-until": {} } for municipal, df in municipals: yearly_usages: pandas.Series = df.groupby(df.date.dt.year)['amount'].sum() x_axis = [] y_axis = [] for year, usage in yearly_usages.items(): x_axis.append(year) y_axis.append(usage) return_objects.append({ "label": f"{int(municipal)}", "x": year, "y": usage }) prediction_x_axis = numpy.linspace(start=x_axis[0], stop=x_axis[-1] + parameters["size"], num=len(y_axis) + parameters["size"], dtype=int) curve = numpy.polynomial.Polynomial.fit(x_axis, y_axis, deg=1) prediction_y_axis = curve(prediction_x_axis).tolist() reference_values = prediction_y_axis[:len(x_axis)] forecasted_values = prediction_y_axis[len(x_axis):] r_square = sklearn.metrics.r2_score(y_axis, reference_values) meta["curves"][int(municipal)] = str(curve) meta["r-scores"][int(municipal)] = r_square meta["real-data-until"][int(municipal)] = str(x_axis[-1]) for year in prediction_x_axis: idx = int(year) - int(prediction_x_axis[0]) return_objects.append({ "label": f"{int(municipal)}", "x": f"{int(year)}", "y": float(prediction_y_axis[idx]) }) with open(args.output_file, 'wt') as f: output_object = { "meta": meta, "data": return_objects } json.dump(output_object, f, indent=4)
#!/usr/bin/python3
import argparse
import datetime
import json

import pandas
import numpy
import sklearn.metrics

description = """
This is an example on how to handle the input and output for algorithms and the
data pulled from the databases
"""

parameters = {
"size": 30
}

if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("data_file", default="")
parser.add_argument("output_file", default="")
parser.add_argument("parameter_file", default="")

args = parser.parse_args()

# load the parameters
try:
with open(args.parameter_file) as f:
ext_parameters = json.load(f)
parameters = ext_parameters
except:
print("using default parameters")

# create a dataframe from the input file
df = pandas.read_json(args.data_file, dtype={'municipal': str, 'usageType': str, 'date': datetime.datetime, 'amount': float})
municipals = df.groupby(df.municipal)
usage_types = df.groupby(df.usageType)

return_objects = []
e
meta = {
"curves": {},
"r-scores": {},
"real-data-until": {}
}

for municipal, df in municipals:
yearly_usages: pandas.Series = df.groupby(df.date.dt.year)['amount'].sum()
x_axis = []
y_axis = []
for year, usage in yearly_usages.items():
x_axis.append(year)
y_axis.append(usage)
return_objects.append({
"label": municipal,
"x": int(year),
"y": int(usage)
})

prediction_x_axis = numpy.linspace(start=x_axis[0], stop=x_axis[-1] + parameters["size"], num=len(y_axis) + parameters["size"], dtype=int)
curve = numpy.polynomial.Polynomial.fit(x_axis, y_axis, deg=1)
prediction_y_axis = curve(prediction_x_axis).tolist()
reference_values = prediction_y_axis[:len(x_axis)]
forecasted_values = prediction_y_axis[len(x_axis):]

r_square = sklearn.metrics.r2_score(y_axis, reference_values)
meta["curves"][municipal] = str(curve)
meta["r-scores"][municipal] = r_square
meta["real-data-until"][municipal] = int(x_axis[-1])
for year in prediction_x_axis:
if year <= x_axis[-1]:
continue
idx = int(year) - int(prediction_x_axis[0])
return_objects.append({
"label": municipal,
"x": int(year),
"y": float(prediction_y_axis[idx])
})

with open(args.output_file, 'wt') as f:
output_object = {
"meta": meta,
"data": return_objects
}
json.dump(output_object, f)

5 changes: 4 additions & 1 deletion algorithms/linear.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,7 @@ parameters:
description: >-
The amount of years that shall be predicted, after the source data is
available
default: 30
default: 30

useBuckets: true
bucketSize: 1 year
5 changes: 4 additions & 1 deletion algorithms/logarithmic.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
description: >-
An algorithm that implements an logarithmic fit
An algorithm that implements an logarithmic fit
useBuckets: true
bucketSize: 1 year
4 changes: 2 additions & 2 deletions algorithms/yearly-municipal-prophet.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@
x_axis.append(year)
y_axis.append(usage)
return_objects.append({
"label": f"{int(municipal)}",
"label": municipal,
"x": year,
"y": usage,
"uncertainty": [0, 0]
Expand All @@ -88,7 +88,7 @@
if idx == len(y_axis):
continue
return_objects.append({
"label": f"{int(municipal)}",
"label": municipal,
"x": row['ds'].year,
"y": row['yhat'],
"uncertainty": [row['yhat_lower'], row['yhat_upper']]
Expand Down
8 changes: 4 additions & 4 deletions helpers/algorithms.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ import (
)

// GetAlgorithmMetadata reads the yaml metadata file supplied by the filepath
func GetAlgorithmMetadata(metadataFilePath string) (map[string]types.Parameter, string, error) {
func GetAlgorithmMetadata(metadataFilePath string) (types.AlgorithmMetadata, error) {
var metadata types.AlgorithmMetadata
file, err := os.Open(metadataFilePath)
if err != nil {
return nil, "", nil
return types.AlgorithmMetadata{}, err
}
err = yaml.NewDecoder(file).Decode(&metadata)
if err != nil {
return nil, "", err
return types.AlgorithmMetadata{}, err
}
return metadata.Parameters, metadata.Description, nil
return metadata, nil
}
127 changes: 125 additions & 2 deletions openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ components:
schemas:
Parameter:
properties:
defaultValue: {}
defaultValue: { }
description:
type: string
Script:
Expand All @@ -31,10 +31,102 @@ components:
items:
$ref: '#/components/schemas/Parameter'

ResultMetadata:
properties:
rScores:
description: >-
The R²-Scores archived by the prognosis mapped to each available
label.
type: object
example:
label1: 0.01
label2: 0.02
additionalProperties:
type: number
realDataUntil:
description: >-
A map indicating until which value the real data is contained on the
x-Axis
type: object
example:
label1: 0.01
label2: 0.02
additionalProperties:
type: number

Datapoint:
type: object
properties:
label:
type: string
description: >-
A unique label for the data contained under this dataseries
x:
type: number
description: >-
The value on the x-Axis (in this case the year as int)
y:
type: number
description: >-
The water-usage


NumPyResult:
properties:
meta:
allOf:
- $ref: '#/components/schemas/ResultMetadata'
- properties:
curves:
description: >-
A map containing the mathematical equations used to
calculate the values in the data set
type: object
example:
label1: 5+4*x
label2: 100+9.4643*x
additionalProperties:
type: string
data:
type: array
items:
$ref: '#/components/schemas/Datapoint'

ProphetResult:
properties:
meta:
$ref: '#/components/schemas/ResultMetadata'
data:
type: array
items:
allOf:
- $ref: '#/components/schemas/Datapoint'
- type: object
properties:
uncertainty:
type: array
minItems: 2
maxItems: 2
items:
type: number

responses:
SuccessfulForecast:
description: Forecast executed successfully
content:
application/json:
schema:
oneOf:
- $ref: '#/components/schemas/ProphetResult'
- $ref: '#/components/schemas/NumPyResult'


paths:
/:
get:
operationId: get-info
summary: Get available algorithms
description: |
Get information about the currently available algorithms and their
parameters.
Expand All @@ -45,4 +137,35 @@ paths:
application/json:
schema:
items:
$ref: '#/components/schemas/Script'
$ref: '#/components/schemas/Script'
/{script-identifier}:
parameters:
- in: path
name: script-identifier
description: The Name of the prognosis script

get:
summary: Make a Forecast with default parameters
responses:
200:
$ref: '#/components/responses/SuccessfulForecast'

post:
summary: Make a forecast with changed parameters
requestBody:
content:
multipart/form-data:
schema:
type: object
properties:
parameters:
type: object
description: >-
The parameters you want to override as key-value-pairs.
Parameters that are not included in the object are not
overwritten. The single parameters are returned by the
`/` endpoint
responses:
200:
$ref: '#/components/responses/SuccessfulForecast'
Loading

0 comments on commit 4daddb1

Please sign in to comment.