Skip to content

Commit

Permalink
feat(scoring): add endersgame function (#115)
Browse files Browse the repository at this point in the history
  • Loading branch information
Caceresenzo authored Oct 8, 2024
1 parent 4b22829 commit 91f1414
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 15 deletions.
3 changes: 3 additions & 0 deletions crunch/api/domain/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,16 @@ class ScorerFunction(enum.Enum):
RECALL = "RECALL"
SPEARMAN = "SPEARMAN"

CUSTOM_ENDERSGAME_PROFIT_AND_LOSS_WITH_TRANSACTION_COST = "CUSTOM_ENDERSGAME_PROFIT_AND_LOSS_WITH_TRANSACTION_COST"

def __repr__(self):
return self.name


class ReducerFunction(enum.Enum):

NONE = "NONE"
SUM = "SUM"
MEAN = "MEAN"
PRODUCT_PLUS_MINUS_1 = "PRODUCT_PLUS_MINUS_1"

Expand Down
23 changes: 15 additions & 8 deletions crunch/scoring/_format/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,18 @@ def merge(
column_names.input,
]]

return pandas.merge(
y_test,
prediction,
on=[
column_names.moon,
column_names.id
]
)
y_test.dropna(inplace=True)

groups = []
for stream_name, x_test_group in y_test.groupby(column_names.id):
prediction_group = prediction[prediction[column_names.id] == stream_name]

x_test_group.reset_index(drop=True, inplace=True)
prediction_group.reset_index(drop=True, inplace=True)

x_test_group[column_names.output] = prediction_group[column_names.output]

assert not len(x_test_group[x_test_group[column_names.output].isna()]) # TODO Should be assured by checks instead
groups.append(x_test_group)

return pandas.concat(groups)
1 change: 1 addition & 0 deletions crunch/scoring/reducers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def product_plus_minus_1(

REGISTRY = {
api.ReducerFunction.NONE: none,
api.ReducerFunction.SUM: sum,
api.ReducerFunction.MEAN: statistics.mean,
api.ReducerFunction.PRODUCT_PLUS_MINUS_1: product_plus_minus_1,
}
17 changes: 11 additions & 6 deletions crunch/scoring/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,15 @@ def _reduce(
target_column_names: api.TargetColumnNames,
):
details = {}
for moon, value in all_details.items():
for key, value in all_details.items():
popped = True

if moon in y_test_keys:
details[moon] = value
if key in y_test_keys:
details[key] = value

popped = False

logger.info(f"score - target_name={target_name} metric_name={metric.name} scorer_function={metric.scorer_function.name} moon={moon} value={value} popped={popped}")
logger.info(f"score - target_name={target_name} metric_name={metric.name} scorer_function={metric.scorer_function.name} key=`{key}` value={value} popped={popped}")

if metric.reducer_function == api.ReducerFunction.NONE:
reducer_method = api.ReducerFunction.NONE.name
Expand Down Expand Up @@ -200,9 +200,14 @@ def score(
logger.warning(f"unknown metric - target_name={target_name} metric_name={metric.name} function={metric.scorer_function.name}")
continue

dataframe = merged
if competition_format == api.CompetitionFormat.STREAM:
dataframe = merged[merged[column_names.id] == target_name] \
.set_index(column_names.moon)

all_details = _call_scorer_grouped(
scorer,
merged,
dataframe,
column_names,
target_column_names
)
Expand All @@ -214,7 +219,7 @@ def score(
y_test_keys,
metric,
scorer,
merged,
dataframe,
column_names,
target_column_names,
)
Expand Down
17 changes: 16 additions & 1 deletion crunch/scoring/scorers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def balanced_accuracy(
prediction_column_name: str,
) -> float:
import sklearn.metrics

target = group[target_column_name]
prediction = group[prediction_column_name]

Expand Down Expand Up @@ -113,6 +113,19 @@ def spearman(
return score


def custom_endersgame_profit_and_loss_with_transaction_cost(
group: pandas.DataFrame,
target_column_name: str,
prediction_column_name: str,
):
EPSILON = 0.01

profit_and_loss = group[target_column_name] * numpy.sign(group[prediction_column_name])
transactions_cost = (group[prediction_column_name] != 0).sum() * EPSILON

return profit_and_loss - transactions_cost


REGISTRY = {
api.ScorerFunction.BALANCED_ACCURACY: balanced_accuracy,
api.ScorerFunction.DOT_PRODUCT: dot_product,
Expand All @@ -121,4 +134,6 @@ def spearman(
api.ScorerFunction.RANDOM: random,
api.ScorerFunction.RECALL: recall,
api.ScorerFunction.SPEARMAN: spearman,

api.ScorerFunction.CUSTOM_ENDERSGAME_PROFIT_AND_LOSS_WITH_TRANSACTION_COST: custom_endersgame_profit_and_loss_with_transaction_cost,
}

0 comments on commit 91f1414

Please sign in to comment.