feat: small refactor

2026-03-08 16:41:30 +00:00
parent 16a7a470ea
commit 05aadaec29
9 changed files with 354 additions and 7 deletions
--- a/data_platform/assets/ml/discord_alerts.py
+++ b/data_platform/assets/ml/discord_alerts.py
@@ -2,6 +2,7 @@

 from pathlib import Path

+import pandas as pd
 import requests
 from dagster import (
    AssetExecutionContext,
@@ -51,6 +52,7 @@ def _build_embed(row) -> dict:
    deps=["elo_inference"],
    group_name="ml",
    kinds={"python", "discord"},
+    tags={"manual": "true"},
    description=(
        "Send a Discord notification for newly scored listings whose "
        "predicted ELO exceeds a configurable threshold."
@@ -69,7 +71,7 @@ def listing_alert(
        conn.execute(text(render_sql(_SQL_DIR, "ensure_notified_table.sql")))

    query = render_sql(_SQL_DIR, "select_top_predictions.sql")
-    df = __import__("pandas").read_sql(
+    df = pd.read_sql(
        text(query),
        engine,
        params={"min_elo": config.min_elo},
--- a/data_platform/assets/ml/elo_inference.py
+++ b/data_platform/assets/ml/elo_inference.py
@@ -16,7 +16,7 @@ from sqlalchemy import text

 from data_platform.assets.ml.elo_model import (
    ALL_FEATURES,
-    _preprocess,
+    preprocess,
 )
 from data_platform.helpers import render_sql
 from data_platform.resources import MLflowResource, PostgresResource
@@ -104,7 +104,7 @@ def elo_inference(
    model = mlflow.lightgbm.load_model(model_uri)

    # Preprocess features identically to training
-    df = _preprocess(df)
+    df = preprocess(df)
    X = df[ALL_FEATURES].copy()

    # Predict normalised ELO and convert back to original scale
--- a/data_platform/assets/ml/elo_model.py
+++ b/data_platform/assets/ml/elo_model.py
@@ -84,7 +84,7 @@ class EloModelConfig(Config):
    mlflow_experiment: str = "elo-rating-prediction"


-def _preprocess(df: pd.DataFrame) -> pd.DataFrame:
+def preprocess(df: pd.DataFrame) -> pd.DataFrame:
    """Convert raw columns to model-ready numeric features."""
    df["energy_label_num"] = (
        df["energy_label"]
@@ -139,7 +139,7 @@ def elo_prediction_model(
        )

    # Preprocess and normalise ELO target
-    df = _preprocess(df)
+    df = preprocess(df)
    df["elo_norm"] = (df["elo_rating"] - 1500) / 100

    X = df[ALL_FEATURES].copy()