Skip to content

Commit

Permalink
Merge branch 'main' into formatting_views
Browse files Browse the repository at this point in the history
  • Loading branch information
dalonsoa authored Oct 4, 2024
2 parents 972e577 + 46041f8 commit 2f1ef89
Show file tree
Hide file tree
Showing 20 changed files with 223 additions and 444 deletions.
13 changes: 3 additions & 10 deletions docs/Applications/station.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@

The Station is the main component of Paricia. Stations own the measurement data, who can view the existing data and add new sets, and captures a lot of metadata on the geographical location of the station, including longitude, latitude and altitude, but also region, basin, ecosystem or area covered.

It also owns two key parameters used during the data ingestion and the validation process: the **timezone** where the station is located, used to interpret the time of the datasets, and the **period** of the data (`delta_t`), which identifies the expected separation between consecutive data points.
It also owns another key parameter used during the data ingestion process: the **timezone** where the station is located, used to interpret the time of the datasets.

!!! warning "Incorrect `timezone` or `delta_t`"
!!! warning "Incorrect `timezone`"

If these two parameters are not defined correctly, the time of the data associated to the station would be interpreted wrong or the validation process will flagged data as invalid, when they might be correct. **Check the parameters before uploading data, specially the first time.**
If this parameter is not defined correctly, the time of the data associated to the station would be interpreted wrong. **Check the parameter before uploading data, specially the first time.**

As pointed out in the [permissions page](../permissions.md), Stations visibility can take a third option, `internal`, that only allows registered users to visualize the data of the station. Stations also require `change` permission in order to be able to upload data associated to that station and then validating it.

Expand Down Expand Up @@ -72,13 +72,6 @@ As pointed out in the [permissions page](../permissions.md), Stations visibility
members: None
show_root_full_path: False

::: station.models.DeltaT
options:
heading_level: 3
show_bases: False
members: None
show_root_full_path: False

## Core Components

::: station.models.Station
Expand Down
40 changes: 40 additions & 0 deletions management/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from django.contrib.auth.mixins import LoginRequiredMixin
from django.core.exceptions import PermissionDenied
from django.db.models import Model
from django.http import HttpResponse
from django.shortcuts import get_object_or_404
from django.urls import reverse_lazy
from django.views.generic import CreateView, DeleteView, DetailView
Expand Down Expand Up @@ -325,6 +326,45 @@ def __init__(self, *args, **kwargs):

return CustomCreateForm

def form_valid(self, form: forms.ModelForm) -> HttpResponse:
"""Set the owner of the object to the current user.
This is done before saving the object to the database.
Args:
form (forms.ModelForm): Form with the object data.
Returns:
HttpResponse: Redirect to the detail view of the created object.
"""
if hasattr(form.instance, "owner"):
form.instance.owner = self.request.user
return super().form_valid(form)

@property
def app_label(self) -> str:
return self.model._meta.app_label

@property
def model_name(self) -> str:
return self.model._meta.model_name

@property
def model_description(self) -> str:
return self.model._meta.verbose_name.title()

@property
def success_url(self) -> str:
return reverse_lazy(self.detail_url, kwargs={"pk": self.object.pk})

@property
def detail_url(self) -> str:
return f"{self.app_label}:{self.model_name}_detail"

@property
def list_url(self) -> str:
return f"{self.app_label}:{self.model_name}_list"

def get_form_kwargs(self):
"""Add the user to the form kwargs, so we can filter the options."""
kwargs = super().get_form_kwargs()
Expand Down
19 changes: 19 additions & 0 deletions measurement/migrations/0014_alter_report_completeness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 5.1.1 on 2024-10-02 13:32

import django.core.validators
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('measurement', '0013_alter_measurement_depth_alter_measurement_direction_and_more'),
]

operations = [
migrations.AlterField(
model_name='report',
name='completeness',
field=models.DecimalField(decimal_places=1, default=100, help_text='Completeness of the report. Eg. a daily report made out of 24 hourly measurements would have a completeness of 100%.', max_digits=4, validators=[django.core.validators.MinValueValidator(0), django.core.validators.MaxValueValidator(100)]),
),
]
1 change: 1 addition & 0 deletions measurement/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ class Report(MeasurementBase):
completeness = models.DecimalField(
max_digits=4,
decimal_places=1,
default=100,
null=False,
help_text="Completeness of the report. Eg. a daily report made out of 24 hourly"
" measurements would have a completeness of 100%.",
Expand Down
23 changes: 4 additions & 19 deletions measurement/reporting.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import zoneinfo
from datetime import datetime
from decimal import Decimal

import pandas as pd

Expand All @@ -10,17 +9,16 @@


def calculate_reports(
data: pd.DataFrame, station: str, variable: str, operation: str, period: Decimal
data: pd.DataFrame, station: str, variable: str, operation: str
) -> pd.DataFrame:
"""Calculates the report for the chosen days.
Args:
data: The dataframe with the data.
station: The name of the station.
variable: The name of the variable.
operation: Agreggation operation to perform on the data when calculating the
operation: Aggregation operation to perform on the data when calculating the
report.
period: The period of the data in minutes.
Returns:
A dataframe with the hourly, daily and monthly reports.
Expand All @@ -36,18 +34,6 @@ def calculate_reports(
daily = hourly.resample("D").agg(operation)
monthly = daily.resample("MS").agg(operation)

# Find the completeness of the data
per_hour = 60 / period
per_day = 24
per_month = monthly.index.to_series().apply(
lambda t: pd.Period(t, freq="S").days_in_month
)
hourly["completeness"] = (
data[["time", "value"]].resample("H", on="time").count() / per_hour * 100
)
daily["completeness"] = hourly["value"].resample("D").count() / per_day * 100
monthly["completeness"] = daily["value"].resample("MS").count() / per_month * 100

# Put everything together
hourly["report_type"] = "hourly"
daily["report_type"] = "daily"
Expand Down Expand Up @@ -168,7 +154,6 @@ def save_report_data(data: pd.DataFrame) -> None:
value=row["value"],
maximum=row.get("maximum", None),
minimum=row.get("minimum", None),
completeness=row["completeness"],
report_type=row["report_type"],
)
for time, row in data_.iterrows()
Expand Down Expand Up @@ -265,9 +250,9 @@ def launch_reports_calculation(
operation = (
"sum" if Variable.objects.get(variable_code=variable).is_cumulative else "mean"
)
period = Station.objects.get(station_code=station).delta_t.delta_t

start_time_, end_time_ = reformat_dates(station, start_time, end_time)
data = get_data_to_report(station, variable, start_time_, end_time_)
report = calculate_reports(data, station, variable, operation, period)
report = calculate_reports(data, station, variable, operation)
remove_report_data_in_range(station, variable, start_time_, end_time_)
save_report_data(report)
56 changes: 11 additions & 45 deletions measurement/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,29 +49,6 @@ def get_data_to_validate(
return df.sort_values("time")


def flag_time_lapse_status(data: pd.DataFrame, period: Decimal) -> pd.Series:
"""Flags if period of the time entries is correct.
It is assume that the first entry is correct. A tolerance of 2% of the period
is used when deciding on the suspicious status.
Args:
data: The dataframe with allowed_difference = Variable. the data.
period: The expected period for the measurements, in minutes.
Returns:
A series with the status of the time lapse.
"""
flags = pd.DataFrame(index=data.index, columns=["suspicious_time_lapse"])
low = pd.Timedelta(f"{period}min") * (1 - 0.02)
high = pd.Timedelta(f"{period}min") * (1 + 0.02)
flags["suspicious_time_lapse"] = ~data.time.diff().between(
low, high, inclusive="both"
)
flags["suspicious_time_lapse"].iloc[0] = False
return flags


def flag_value_difference(data: pd.DataFrame, allowed_difference: Decimal) -> pd.Series:
"""Flags if the differences in value of the measurements is correct.
Expand Down Expand Up @@ -124,7 +101,6 @@ def flag_suspicious_data(
data: pd.DataFrame,
maximum: Decimal,
minimum: Decimal,
period: Decimal,
allowed_difference: Decimal,
) -> pd.DataFrame:
"""Finds suspicious data in the database.
Expand All @@ -133,36 +109,31 @@ def flag_suspicious_data(
data: The dataframe with the data to be evaluated.
maximum: The maximum allowed value.
minimum: The minimum allowed value.
period: The expected period for the measurements, in minutes.
allowed_difference: The allowed difference between the measurements.
Returns:
A dataframe with the suspicious data.
"""
time_lapse = flag_time_lapse_status(data, period)
value_difference = flag_value_difference(data, allowed_difference)
value_limits = flag_value_limits(data, maximum, minimum)
return pd.concat([time_lapse, value_difference, value_limits], axis=1)
return pd.concat([value_difference, value_limits], axis=1)


def flag_suspicious_daily_count(
data: pd.Series, period: Decimal, null_limit: Decimal
) -> pd.DataFrame:
def flag_suspicious_daily_count(data: pd.Series, null_limit: Decimal) -> pd.DataFrame:
"""Finds suspicious records count for daily data.
Args:
data: The count of records per day.
period: The expected period for the measurements, in minutes.
null_limit: The percentage of null data allowed.
Returns:
A dataframe with the suspicious data.
"""
expected_data_count = 24 * 60 / float(period)
expected_data_count = data.mode().iloc[0]

suspicious = pd.DataFrame(index=data.index)
suspicious["daily_count_fraction"] = (data / expected_data_count).round(2)
suspicious["suspicious_daily_count"] = ~suspicious["daily_count_fraction"].between(
1 - float(null_limit) / 100, 1

suspicious["suspicious_daily_count"] = (
suspicious["daily_count_fraction"] < 1 - float(null_limit) / 100
) | (suspicious["daily_count_fraction"] > 1)

return suspicious
Expand All @@ -171,7 +142,6 @@ def flag_suspicious_daily_count(
def generate_daily_summary(
data: pd.DataFrame,
suspicious: pd.DataFrame,
period: Decimal,
null_limit: Decimal,
is_cumulative: bool,
) -> pd.DataFrame:
Expand All @@ -180,7 +150,6 @@ def generate_daily_summary(
Args:
data: The dataframe with the data to be evaluated.
suspicious: The dataframe with the suspicious data.
period: The expected period for the measurements, in minutes.
null_limit: The percentage of null data allowed.
is_cumulative: If the data is cumulative and should be aggregated by sum.
Expand All @@ -201,17 +170,15 @@ def generate_daily_summary(
report["minimum"] = datagroup["minimum"].min()

# Count the number of entries per day and flag the suspicious ones
count_report = flag_suspicious_daily_count(
datagroup["value"].count(), period, null_limit
)
# count_report = flag_suspicious_daily_count(datagroup["value"].count(), null_limit)

# Group the suspicious data by day and calculate the sum
suspiciousgroup = suspicious.groupby(data.time.dt.date)
suspicious_report = suspiciousgroup.sum().astype(int)
suspicious_report["total_suspicious_entries"] = suspicious_report.sum(axis=1)

# Put together the final report
report = pd.concat([report, suspicious_report, count_report], axis=1)
report = pd.concat([report, suspicious_report], axis=1)
report = report.sort_index().reset_index().rename(columns={"index": "date"})
report.date = pd.to_datetime(report.date)
return report
Expand Down Expand Up @@ -240,16 +207,15 @@ def generate_validation_report(
Returns:
A tuple with the summary report and the granular report.
"""
period = Station.objects.get(station_code=station).delta_t.delta_t
var = Variable.objects.get(variable_code=variable)

data = get_data_to_validate(station, variable, start_time, end_time, is_validated)
if data.empty:
return pd.DataFrame(), pd.DataFrame()

suspicious = flag_suspicious_data(data, maximum, minimum, period, var.diff_error)
suspicious = flag_suspicious_data(data, maximum, minimum, var.diff_error)
summary = generate_daily_summary(
data, suspicious, period, var.null_limit, var.is_cumulative
data, suspicious, var.null_limit, var.is_cumulative
)
granular = pd.concat([data, suspicious], axis=1)
return summary, granular
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ dev = [
doc = [
"mkdocs",
"mkdocstrings",
"mkdocstrings-python",
"mkdocstrings-python>=1.11",
"mkdocs-material",
"mkdocs-gen-files",
"mkdocs-literate-nav",
Expand Down
12 changes: 7 additions & 5 deletions requirements-doc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ flask==3.0.3
# django-plotly-dash
ghp-import==2.1.0
# via mkdocs
griffe==0.48.0
griffe==1.3.1
# via mkdocstrings-python
huey==2.5.1
# via Paricia (pyproject.toml)
Expand Down Expand Up @@ -157,8 +157,10 @@ mkdocs==1.6.1
# mkdocs-material
# mkdocs-section-index
# mkdocstrings
mkdocs-autorefs==1.0.1
# via mkdocstrings
mkdocs-autorefs==1.2.0
# via
# mkdocstrings
# mkdocstrings-python
mkdocs-caption==1.2.0
# via Paricia (pyproject.toml)
mkdocs-gen-files==0.5.0
Expand All @@ -175,11 +177,11 @@ mkdocs-material-extensions==1.3.1
# via mkdocs-material
mkdocs-section-index==0.3.9
# via Paricia (pyproject.toml)
mkdocstrings==0.25.2
mkdocstrings==0.26.1
# via
# Paricia (pyproject.toml)
# mkdocstrings-python
mkdocstrings-python==1.10.7
mkdocstrings-python==1.11.1
# via Paricia (pyproject.toml)
numpy==2.1.1
# via
Expand Down
10 changes: 0 additions & 10 deletions station/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from .models import (
Basin,
Country,
DeltaT,
Ecosystem,
Institution,
Place,
Expand Down Expand Up @@ -105,7 +104,6 @@ class StationAdmin(PermissionsBaseAdmin):
"station_external",
"influence_km",
"timezone",
"delta_t",
"owner",
"visibility",
]
Expand All @@ -123,12 +121,4 @@ class StationAdmin(PermissionsBaseAdmin):
"ecosystem",
"institution",
"place_basin",
"delta_t",
]


@admin.register(DeltaT)
class DeltaTAdmin(PermissionsBaseAdmin):
"""Admin class for the DeltaT model."""

list_display = ["id", "delta_t", "owner", "visibility"]
Loading

0 comments on commit 2f1ef89

Please sign in to comment.