Merge pull request #3 from wisdom-oss/feature/time-bucket

Feature: Time Bucket
wisdom-oss · Apr 10, 2024 · 4daddb1 · 4daddb1
2 parents 3679feb + ea6db63
commit 4daddb1
Show file tree

Hide file tree

Showing 14 changed files with 350 additions and 51 deletions.
diff --git a/algorithms/exponential.py b/algorithms/exponential.py
@@ -14,7 +14,8 @@
 """
 
 parameters = {
-    "size": 30
+    "size": 30,
+    "degree": 3
 }
 
 if __name__ == "__main__":
@@ -55,26 +56,28 @@
             x_axis.append(year)
             y_axis.append(usage)
             return_objects.append({
-                "label": f"{int(municipal)}",
-                "x": year,
-                "y": usage
+                "label": municipal,
+                "x": int(year),
+                "y": float(usage)
             })
 
-        prediction_x_axis = numpy.linspace(start=x_axis[0], stop=x_axis[-1] + parameters["size"], num=len(y_axis) + parameters["size"], dtype=int)
-        curve = numpy.polynomial.Polynomial.fit(x_axis, y_axis, deg=3)
+        prediction_x_axis = numpy.linspace(start=x_axis[0], stop=x_axis[-1] + parameters["size"], num=len(y_axis) + parameters["size"], dtype=float)
+        curve = numpy.polynomial.Polynomial.fit(x_axis, y_axis, deg=parameters["degree"])
         prediction_y_axis = curve(prediction_x_axis).tolist()
         reference_values = prediction_y_axis[:len(x_axis)]
         forecasted_values = prediction_y_axis[len(x_axis):]
 
         r_square = sklearn.metrics.r2_score(y_axis, reference_values)
-        meta["curves"][int(municipal)] = str(curve)
-        meta["r-scores"][int(municipal)] = r_square
-        meta["real-data-until"][int(municipal)] = str(x_axis[-1])
+        meta["curves"][municipal] = str(curve).replace("\n", " ")
+        meta["r-scores"][municipal] = r_square
+        meta["real-data-until"][municipal] = x_axis[-1]
         for year in prediction_x_axis:
+            if year <= x_axis[-1]:
+                continue
             idx = int(year) - int(prediction_x_axis[0])
             return_objects.append({
-                "label": f"{int(municipal)}",
-                "x": f"{int(year)}",
+                "label": municipal,
+                "x": int(year),
                 "y": float(prediction_y_axis[idx])
             })
 

diff --git a/algorithms/exponential.yaml b/algorithms/exponential.yaml
@@ -6,4 +6,7 @@ parameters:
     description: >-
       The degree of the polynomial equation that should be used to fit the
       function to
-    default: 3
+    default: 3
+
+useBuckets: true
+bucketSize: 1 year
diff --git a/algorithms/linear.py b/algorithms/linear.py
@@ -1 +1,87 @@
-#!/usr/bin/python3import argparseimport datetimeimport jsonimport pandasimport numpyimport sklearn.metricsdescription = """This is an example on how to handle the input and output for algorithms and thedata pulled from the databases """parameters = {    "size": 30}if __name__ == "__main__":    parser = argparse.ArgumentParser()    parser.add_argument("data_file", default="")    parser.add_argument("output_file", default="")    parser.add_argument("parameter_file", default="")    args = parser.parse_args()    # load the parameters    try:        with open(args.parameter_file) as f:            ext_parameters = json.load(f)            parameters = ext_parameters    except:        print("using default parameters")    # create a dataframe from the input file    df = pandas.read_json(args.data_file, dtype={'municipal': str, 'usageType': str, 'date': datetime.datetime, 'amount': float})    municipals = df.groupby(df.municipal)    usage_types = df.groupby(df.usageType)    return_objects = []    meta = {        "curves": {},        "r-scores": {},        "real-data-until": {}    }    for municipal, df in municipals:        yearly_usages: pandas.Series = df.groupby(df.date.dt.year)['amount'].sum()        x_axis = []        y_axis = []        for year, usage in yearly_usages.items():            x_axis.append(year)            y_axis.append(usage)            return_objects.append({                "label": f"{int(municipal)}",                "x": year,                "y": usage            })        prediction_x_axis = numpy.linspace(start=x_axis[0], stop=x_axis[-1] + parameters["size"], num=len(y_axis) + parameters["size"], dtype=int)        curve = numpy.polynomial.Polynomial.fit(x_axis, y_axis, deg=1)        prediction_y_axis = curve(prediction_x_axis).tolist()        reference_values = prediction_y_axis[:len(x_axis)]        forecasted_values = prediction_y_axis[len(x_axis):]        r_square = sklearn.metrics.r2_score(y_axis, reference_values)        meta["curves"][int(municipal)] = str(curve)        meta["r-scores"][int(municipal)] = r_square        meta["real-data-until"][int(municipal)] = str(x_axis[-1])        for year in prediction_x_axis:            idx = int(year) - int(prediction_x_axis[0])            return_objects.append({                "label": f"{int(municipal)}",                "x": f"{int(year)}",                "y": float(prediction_y_axis[idx])            })    with open(args.output_file, 'wt') as f:        output_object = {            "meta": meta,            "data": return_objects        }        json.dump(output_object, f, indent=4)
+#!/usr/bin/python3
+import argparse
+import datetime
+import json
+
+import pandas
+import numpy
+import sklearn.metrics
+
+description = """
+This is an example on how to handle the input and output for algorithms and the
+data pulled from the databases 
+"""
+
+parameters = {
+    "size": 30
+}
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("data_file", default="")
+    parser.add_argument("output_file", default="")
+    parser.add_argument("parameter_file", default="")
+
+    args = parser.parse_args()
+
+    # load the parameters
+    try:
+        with open(args.parameter_file) as f:
+            ext_parameters = json.load(f)
+            parameters = ext_parameters
+    except:
+        print("using default parameters")
+
+    # create a dataframe from the input file
+    df = pandas.read_json(args.data_file, dtype={'municipal': str, 'usageType': str, 'date': datetime.datetime, 'amount': float})
+    municipals = df.groupby(df.municipal)
+    usage_types = df.groupby(df.usageType)
+
+    return_objects = []
+e
+    meta = {
+        "curves": {},
+        "r-scores": {},
+        "real-data-until": {}
+    }
+
+    for municipal, df in municipals:
+        yearly_usages: pandas.Series = df.groupby(df.date.dt.year)['amount'].sum()
+        x_axis = []
+        y_axis = []
+        for year, usage in yearly_usages.items():
+            x_axis.append(year)
+            y_axis.append(usage)
+            return_objects.append({
+                "label": municipal,
+                "x": int(year),
+                "y": int(usage)
+            })
+
+        prediction_x_axis = numpy.linspace(start=x_axis[0], stop=x_axis[-1] + parameters["size"], num=len(y_axis) + parameters["size"], dtype=int)
+        curve = numpy.polynomial.Polynomial.fit(x_axis, y_axis, deg=1)
+        prediction_y_axis = curve(prediction_x_axis).tolist()
+        reference_values = prediction_y_axis[:len(x_axis)]
+        forecasted_values = prediction_y_axis[len(x_axis):]
+
+        r_square = sklearn.metrics.r2_score(y_axis, reference_values)
+        meta["curves"][municipal] = str(curve)
+        meta["r-scores"][municipal] = r_square
+        meta["real-data-until"][municipal] = int(x_axis[-1])
+        for year in prediction_x_axis:
+            if year <= x_axis[-1]:
+                continue
+            idx = int(year) - int(prediction_x_axis[0])
+            return_objects.append({
+                "label": municipal,
+                "x": int(year),
+                "y": float(prediction_y_axis[idx])
+            })
+
+    with open(args.output_file, 'wt') as f:
+        output_object = {
+            "meta": meta,
+            "data": return_objects
+        }
+        json.dump(output_object, f)
+
diff --git a/algorithms/linear.yaml b/algorithms/linear.yaml
@@ -6,4 +6,7 @@ parameters:
     description: >-
       The amount of years that shall be predicted, after the source data is 
       available
-    default: 30
+    default: 30
+
+useBuckets: true
+bucketSize: 1 year
diff --git a/algorithms/logarithmic.yaml b/algorithms/logarithmic.yaml
@@ -1,2 +1,5 @@
 description: >-
-  An algorithm that implements an logarithmic fit
+  An algorithm that implements an logarithmic fit
+
+useBuckets: true
+bucketSize: 1 year
diff --git a/algorithms/yearly-municipal-prophet.py b/algorithms/yearly-municipal-prophet.py
@@ -67,7 +67,7 @@
             x_axis.append(year)
             y_axis.append(usage)
             return_objects.append({
-                "label": f"{int(municipal)}",
+                "label": municipal,
                 "x": year,
                 "y": usage,
                 "uncertainty": [0, 0]
@@ -88,7 +88,7 @@
                 if idx == len(y_axis):
                     continue
                 return_objects.append({
-                    "label": f"{int(municipal)}",
+                    "label": municipal,
                     "x": row['ds'].year,
                     "y": row['yhat'],
                     "uncertainty": [row['yhat_lower'], row['yhat_upper']]

diff --git a/helpers/algorithms.go b/helpers/algorithms.go
@@ -9,15 +9,15 @@ import (
 )
 
 // GetAlgorithmMetadata reads the yaml metadata file supplied by the filepath
-func GetAlgorithmMetadata(metadataFilePath string) (map[string]types.Parameter, string, error) {
+func GetAlgorithmMetadata(metadataFilePath string) (types.AlgorithmMetadata, error) {
 	var metadata types.AlgorithmMetadata
 	file, err := os.Open(metadataFilePath)
 	if err != nil {
-		return nil, "", nil
+		return types.AlgorithmMetadata{}, err
 	}
 	err = yaml.NewDecoder(file).Decode(&metadata)
 	if err != nil {
-		return nil, "", err
+		return types.AlgorithmMetadata{}, err
 	}
-	return metadata.Parameters, metadata.Description, nil
+	return metadata, nil
 }
diff --git a/openapi.yaml b/openapi.yaml
@@ -17,7 +17,7 @@ components:
   schemas:
     Parameter:
       properties:
-        defaultValue: {}
+        defaultValue: { }
         description:
           type: string
     Script:
@@ -31,10 +31,102 @@ components:
           items:
             $ref: '#/components/schemas/Parameter'
 
+    ResultMetadata:
+      properties:
+        rScores:
+          description: >-
+            The R²-Scores archived by the prognosis mapped to each available
+            label.
+          type: object
+          example:
+            label1: 0.01
+            label2: 0.02
+          additionalProperties:
+            type: number
+        realDataUntil:
+          description: >-
+            A map indicating until which value the real data is contained on the
+            x-Axis
+          type: object
+          example:
+            label1: 0.01
+            label2: 0.02
+          additionalProperties:
+            type: number
+
+    Datapoint:
+      type: object
+      properties:
+        label:
+          type: string
+          description: >-
+            A unique label for the data contained under this dataseries
+        x:
+          type: number
+          description: >-
+            The value on the x-Axis (in this case the year as int)
+        y:
+          type: number
+          description: >-
+            The water-usage
+    
+
+
+    NumPyResult:
+      properties:
+        meta:
+          allOf:
+            - $ref: '#/components/schemas/ResultMetadata'
+            - properties:
+                curves:
+                  description: >-
+                    A map containing the mathematical equations used to
+                    calculate the values in the data set
+                  type: object
+                  example:
+                    label1: 5+4*x
+                    label2: 100+9.4643*x
+                  additionalProperties:
+                    type: string
+        data:
+          type: array
+          items:
+            $ref: '#/components/schemas/Datapoint'
+
+    ProphetResult:
+      properties:
+        meta:
+          $ref: '#/components/schemas/ResultMetadata'
+        data:
+          type: array
+          items:
+            allOf:
+              - $ref: '#/components/schemas/Datapoint'
+              - type: object
+                properties:
+                  uncertainty:
+                    type: array
+                    minItems: 2
+                    maxItems: 2
+                    items:
+                      type: number
+
+  responses:
+    SuccessfulForecast:
+      description: Forecast executed successfully
+      content:
+        application/json:
+          schema:
+            oneOf:
+              - $ref: '#/components/schemas/ProphetResult'
+              - $ref: '#/components/schemas/NumPyResult'
+
+
 paths:
   /:
     get:
       operationId: get-info
+      summary: Get available algorithms
       description: |
         Get information about the currently available algorithms and their
         parameters.
@@ -45,4 +137,35 @@ paths:
             application/json:
               schema:
                 items:
-                  $ref: '#/components/schemas/Script'
+                  $ref: '#/components/schemas/Script'
+  /{script-identifier}:
+    parameters:
+      - in: path
+        name: script-identifier
+        description: The Name of the prognosis script
+
+    get:
+      summary: Make a Forecast with default parameters
+      responses:
+        200:
+          $ref: '#/components/responses/SuccessfulForecast'
+
+    post:
+      summary: Make a forecast with changed parameters
+      requestBody:
+        content:
+          multipart/form-data:
+            schema:
+              type: object
+              properties:
+                parameters:
+                  type: object
+                  description: >-
+                    The parameters you want to override as key-value-pairs.
+                    Parameters that are not included in the object are not
+                    overwritten. The single parameters are returned by the
+                    `/` endpoint
+
+      responses:
+        200:
+          $ref: '#/components/responses/SuccessfulForecast'