Skip to content

Commit

Permalink
checkpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
DylanLukes committed Jun 26, 2024
1 parent 477657e commit ca656a2
Show file tree
Hide file tree
Showing 24 changed files with 896 additions and 708 deletions.
213 changes: 20 additions & 193 deletions notebooks/Bootstrapping.ipynb

Large diffs are not rendered by default.

334 changes: 52 additions & 282 deletions notebooks/Experiments.ipynb

Large diffs are not rendered by default.

324 changes: 291 additions & 33 deletions notebooks/Scratch.ipynb

Large diffs are not rendered by default.

8 changes: 5 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,21 @@ dependencies = [
# Typing and Modeling
"annotated-types", # Annotated type annotations
"pydantic", # Data modeling and validation
"pydantic-settings",
"pydantic-settings", # Settings management
"bitarray", # Bit arrays

# Data Processing
"aiosql", # SQL
"numpy", # Numerical computing
"scikit-learn", # Machine learning
"scipy", # Scientific computing
"polars", # Data frames
"pandas", # Data frames (not used, but needed for stubs in typings to not be Unknown)
# "pandas", # Data frames (not used, but needed for stubs in typings to not be Unknown)
"pandera", # Data validation
"pyarrow", # Columnar data format

# Infrastructure
"loguru", # Logging
"dacite", # TODO: remove

# CLI Interface
"click", # CLI
Expand Down
6 changes: 2 additions & 4 deletions src/renkon/cli/web.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
import logging

import click
import uvicorn

from renkon.util.logging import InterceptHandler, configure_logging
from renkon.util.logging import configure_logging


@click.command(context_settings={"show_default": True})
@click.option("--host", default="127.0.0.1", show_default=True, help="Host to bind to.", type=str)
@click.option("--port", default=9876, show_default=True, help="Port to bind to.", type=int)
@click.option("--reload/--no-reload", default=True, show_default=True, help="Enable/disable auto-reload.", type=bool)
def web(host: str, port: int, reload: bool) -> None:
def web(*, host: str, port: int, reload: bool) -> None:
configure_logging()
uvicorn.run("renkon.web.app:app", host=host, port=port, reload=reload)
18 changes: 18 additions & 0 deletions src/renkon/core/model/errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# SPDX-FileCopyrightText: 2024-present Dylan Lukes <[email protected]>
#
# SPDX-License-Identifier: BSD-3-Clause

class TraitSketchError(Exception):
pass


class UnsubstitutedMetavariableError(TraitSketchError):
def __init__(self, missing_metavars: set[str]):
super().__init__(f"TraitSketch has unsubstituted metavariables: {missing_metavars}", missing_metavars)
self.missing_metavars = missing_metavars


class UnknownMetavariableError(TraitSketchError):
def __init__(self, invalid_substs: set[str]):
super().__init__(f"TraitSketch's schema has unknown metavariables: {invalid_substs}", invalid_substs)
self.invalid_substs = invalid_substs
22 changes: 0 additions & 22 deletions src/renkon/core/model/result.py

This file was deleted.

68 changes: 4 additions & 64 deletions src/renkon/core/model/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ def __hash__(self) -> int:
return hash(tuple(self.root.items()))

@overload
def __getitem__(self, key: ColumnName) -> ColumnType: ...
def __getitem__(self, key: ColumnName) -> ColumnType:
...

@overload
def __getitem__(self, key: ColumnNames) -> Self: ...
def __getitem__(self, key: ColumnNames) -> Self:
...

def __getitem__(self, key: ColumnName | ColumnNames) -> ColumnType | Self:
match key:
Expand Down Expand Up @@ -62,65 +64,3 @@ def to_polars(self) -> SchemaDict:

def subschema(self, columns: Sequence[str]) -> Self:
return self.__class__(root=OrderedDict({col: self.root[col] for col in columns}))


# class _OldSchema(Mapping[str, PolarsDataType]):
# """
# Represents a schema for some or all of the columns a data frame. This differs from
# a polars SchemaDict in that it explicitly preserves the order of its entries.
#
# Unlike OrderedDict, this class provides an .index method for lookup of the index of a column name.
# """
#
# _dict: OrderedDict[str, ColumnType]
# _order: tuple[str, ...]
#
# def __init__(self, schema_dict: SchemaDict, *, ordering: Sequence[str] | None = None):
# """
# @param schema_dict: A mapping of column names to column types.
# @param ordering: The order in which the columns should be stored. If not provided, the order
# will be the order in which the columns are iterated over in the schema_dict.
# """
# ordering = ordering or list(schema_dict.keys())
# self._dict = OrderedDict((col, schema_dict[col]) for col in ordering)
# self._order = tuple(ordering)
#
# @property
# def columns(self) -> tuple[str, ...]:
# return tuple(self._dict.keys())
#
# @property
# def dtypes(self) -> tuple[ColumnType, ...]:
# return tuple(self._dict.values())
#
# @classmethod
# def from_polars(cls, schema_dict: SchemaDict) -> Self:
# return cls(schema_dict)
#
# def to_polars(self) -> SchemaDict:
# return self._dict
#
# def subschema(self, columns: Sequence[str]) -> Self:
# return self.__class__({col: self._dict[col] for col in columns}, ordering=tuple(columns))
#
# def index(self, column_name: str) -> int | None:
# return self._order.index(column_name) if column_name in self._order else None
#
# def __getitem__(self, column_name: str) -> ColumnType:
# return self._dict[column_name]
#
# def __iter__(self):
# return iter(self._dict)
#
# def __len__(self):
# return len(self._dict)
#
# def __lt__(self, other: Self) -> bool:
# return self._order < other._order
#
# def __hash__(self):
# return hash(tuple(self._dict.items()))
#
# def __str__(self):
# schema_str = ", ".join([f"{col}: {ty}" for col, ty in self._dict.items()])
# return f"{{{schema_str}}}"
6 changes: 3 additions & 3 deletions src/renkon/core/model/sketch.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pydantic import BaseModel, model_validator

from renkon.core.model.trait import TraitInfo
from renkon.core.model.trait import TraitSpec


class SketchInfo(BaseModel):
Expand All @@ -13,8 +13,8 @@ class SketchInfo(BaseModel):
:param substs: the assignments of column names to metavariable names in the trait form.
"""

trait: TraitInfo
substs: dict[str, str]
trait: TraitSpec
substs: dict[str, str] # TODO: include the _types_!

@model_validator(mode="after")
def check_all_metavars_subst(self) -> Self:
Expand Down
14 changes: 11 additions & 3 deletions src/renkon/core/model/trait/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@
# SPDX-License-Identifier: BSD-3-Clause
__all__ = [
"TraitId",
"TraitKind",
"TraitForm",
"TraitInfo",
"TraitSort",
"TraitSpec",
"TraitSketch",
"TraitScore",
"TraitResult",

"TraitFormValidationError",
"DuplicateMetavarsError",
"DuplicateParamsError",
Expand All @@ -19,7 +23,11 @@
DuplicateMetavarsError,
DuplicateParamsError,
DuplicateTemplateFieldError,
TraitForm,
TraitFormValidationError,
UnknownTemplateFieldError,
)
from renkon.core.model.trait.info import TraitForm, TraitId, TraitInfo, TraitSort
from renkon.core.model.trait.kind import TraitKind
from renkon.core.model.trait.new import TraitId, TraitScore, TraitSpec
from renkon.core.model.trait.result import TraitResult
from renkon.core.model.trait.sketch import TraitSketch
21 changes: 14 additions & 7 deletions src/renkon/core/model/trait/form.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ class TraitForm(BaseModel):
Model representing the form of a trait, can be templated with actual values.
>>> TraitForm.model_validate_json('''{
... "template": "{y} = {a}*{x} + {b}",
... "metavars": ["x", "y"],
... "template": "{Y} = {a}*{X} + {b}",
... "metavars": ["Y", "X"],
... "params": ["a", "b"]
... }''')
TraitForm(template='{y} = {a}*{x} + {b}', metavars=['x', 'y'], params=['a', 'b'])
Expand All @@ -29,15 +29,22 @@ class TraitForm(BaseModel):
metavars: list[str]
params: list[str]

def format(self, extra: TemplateExtraPolicy = "forbid", **kwargs: Any):
def format(self, extra: TemplateExtraPolicy = "forbid", partial: bool = False, **mapping: Any):
"""
Template the trait form with the given metavariable and parameter substitutions. Must be
complete, i.e. all metavariables and parameters must be substituted.
:param extra: policy for extra fields in the mapping, "allow" or "forbid".
:param partial: if true, missing mappings are left as template fields.
:param mapping: the mapping of metavariables and parameters to their values.
"""
if extra == "forbid" and len(extra_fields := set(kwargs.keys()) - set(self.metavars + self.params)) != 0:
if extra == "forbid" and len(extra_fields := set(mapping.keys()) - set(self.metavars + self.params)) != 0:
raise ExtraTemplateSubstitutionError(extra_fields)

return self.template.format_map(kwargs)
if partial:
pass

return self.template.format_map(mapping)

def format_partial(self, extra: TemplateExtraPolicy = "forbid", **kwargs: Any):
"""
Expand Down Expand Up @@ -117,13 +124,13 @@ def __init__(self, field_name: str):
self.field_name = field_name


class ExtraTemplateSubstitutionError(RuntimeError):
class ExtraTemplateSubstitutionError(TraitFormValidationError):
def __init__(self, extra_fields: set[str]):
super().__init__(f"Extra template substitutions: {extra_fields}", extra_fields)
self.extra_fields = extra_fields


class MissingTemplateSubstitutionError(RuntimeError):
class MissingTemplateSubstitutionError(TraitFormValidationError):
def __init__(self, missing_fields: set[str]):
super().__init__(f"Missing template substitutions: {missing_fields}", missing_fields)
self.missing_fields = missing_fields
26 changes: 26 additions & 0 deletions src/renkon/core/model/trait/kind.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# SPDX-FileCopyrightText: 2024-present Dylan Lukes <[email protected]>
#
# SPDX-License-Identifier: BSD-3-Clause
from enum import StrEnum


class TraitKind(StrEnum):
"""
Enum representing the possible sorts of a trait.
The sort of a trait is a high-level categorization of the trait's nature,
and strongly implies the process by which it is inferred and scored.
:cvar ALGEBRAIC: An algebraic (numeric) expression over columns, e.g. "a*x + b = c".
:cvar LOGICAL: A logical (boolean) expression over columns, e.g. "a > b".
:cvar MODEL: A model of the data, e.g. a linear regression model.
:cvar STATISTICAL: A statistical test or measure, e.g. a t-test.
:cvar TEXTUAL: A textual (string) expression over columns, e.g. "a contains 'b'".
"""

ALGEBRAIC = "algebraic"
LOGICAL = "logical"
MODEL = "model"
STATISTICAL = "statistical"
TEXTUAL = "textual"
Original file line number Diff line number Diff line change
@@ -1,42 +1,29 @@
from enum import StrEnum
# SPDX-FileCopyrightText: 2024-present Dylan Lukes <[email protected]>
#
# SPDX-License-Identifier: BSD-3-Clause
from abc import abstractmethod
from typing import Annotated, Protocol, final

__TRAIT_INFO__ = "__trait_info__"

from annotated_types import Gt, Lt
from pydantic import BaseModel

from renkon.core.model.trait import TraitKind
from renkon.core.model.trait.form import TraitForm

type TraitId = str
type TraitScore = Annotated[float, Gt(0.0), Lt(1.0)]


class TraitSort(StrEnum):
"""
Enum representing the possible sorts of a trait.
The sort of a trait is a high-level categorization of the trait's nature,
and strongly implies the process by which it is inferred and scored.
:cvar ALGEBRAIC: An algebraic (numeric) expression over columns, e.g. "a*x + b = c".
:cvar LOGICAL: A logical (boolean) expression over columns, e.g. "a > b".
:cvar MODEL: A model of the data, e.g. a linear regression model.
:cvar STATISTICAL: A statistical test or measure, e.g. a t-test.
:cvar TEXTUAL: A textual (string) expression over columns, e.g. "a contains 'b'".
"""

ALGEBRAIC = "algebraic"
LOGICAL = "logical"
MODEL = "model"
STATISTICAL = "statistical"
TEXTUAL = "textual"


class TraitInfo(BaseModel):
class TraitSpec(BaseModel):
"""
Model representing the descriptive identity of a trait.
This is as opposed to the behavioral functionality (e.g. inference, scoring)
found in :class:`~renkon.core.trait.Trait`.
>>> trait = TraitInfo.model_validate_json('''{
>>> trait = TraitSpec.model_validate_json('''{
... "id": "renkon.core.trait.linear.Linear2",
... "name": "Linear Regression (2D)",
... "sort": "model",
Expand All @@ -57,5 +44,45 @@ class TraitInfo(BaseModel):

id: TraitId
name: str
sort: TraitSort
sort: TraitKind
form: TraitForm


class TraitDisplay:
""" """


class TraitInfer:
""" """


class Trait(Protocol):
"""
:param R: the type of the result of the trait's inference.
:cvar info: the metadata for this trait.
"""

@property
@abstractmethod
def info(self) -> TraitSpec: ...

@property
@abstractmethod
def view(self) -> TraitDisplay: ...

@property
@abstractmethod
def infer(self) -> TraitInfer: ...

@property
def form(self) -> TraitForm:
return self.info.form


@final
class Linear(Trait):
pass


if __name__ == "__main__":
print(Linear.info) # noqa
Loading

0 comments on commit ca656a2

Please sign in to comment.