feat: add elo tables for house ranking
This commit is contained in:
5
data_platform/assets/elo/__init__.py
Normal file
5
data_platform/assets/elo/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
"""ELO ranking assets."""
|
||||||
|
|
||||||
|
from data_platform.assets.elo.elo import elo_comparisons, elo_ratings
|
||||||
|
|
||||||
|
__all__ = ["elo_comparisons", "elo_ratings"]
|
||||||
77
data_platform/assets/elo/elo.py
Normal file
77
data_platform/assets/elo/elo.py
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
"""ELO schema and table management assets.
|
||||||
|
|
||||||
|
Individual assets for the stateful ELO tables (ratings, comparisons) used
|
||||||
|
by the house-elo-ranking application. The sample_listings table is
|
||||||
|
managed as a dbt model in marts.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from dagster import AssetExecutionContext, MaterializeResult, MetadataValue, asset
|
||||||
|
from sqlalchemy import text
|
||||||
|
|
||||||
|
from data_platform.helpers import render_sql
|
||||||
|
from data_platform.resources import PostgresResource
|
||||||
|
|
||||||
|
_SQL_DIR = Path(__file__).parent / "sql"
|
||||||
|
_SCHEMA = "elo"
|
||||||
|
|
||||||
|
|
||||||
|
def _ensure_schema(conn: object) -> None:
|
||||||
|
"""Create the ELO schema if it does not exist."""
|
||||||
|
schema_sql = render_sql(_SQL_DIR, "create_schema.sql", schema=_SCHEMA)
|
||||||
|
conn.execute(text(schema_sql))
|
||||||
|
|
||||||
|
|
||||||
|
@asset(
|
||||||
|
deps=["elo_sample_listings"],
|
||||||
|
group_name="elo",
|
||||||
|
description="Creates the ELO ratings table that stores per-listing ELO scores.",
|
||||||
|
)
|
||||||
|
def elo_ratings(
|
||||||
|
context: AssetExecutionContext,
|
||||||
|
postgres: PostgresResource,
|
||||||
|
) -> MaterializeResult:
|
||||||
|
"""Ensure the ELO ratings table exists (idempotent)."""
|
||||||
|
table_sql = render_sql(_SQL_DIR, "create_ratings_table.sql", schema=_SCHEMA)
|
||||||
|
|
||||||
|
engine = postgres.get_engine()
|
||||||
|
with engine.begin() as conn:
|
||||||
|
_ensure_schema(conn)
|
||||||
|
conn.execute(text(table_sql))
|
||||||
|
|
||||||
|
context.log.info("ELO ratings table ensured.")
|
||||||
|
|
||||||
|
return MaterializeResult(
|
||||||
|
metadata={
|
||||||
|
"schema": MetadataValue.text(_SCHEMA),
|
||||||
|
"table": MetadataValue.text("ratings"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@asset(
|
||||||
|
deps=["elo_sample_listings"],
|
||||||
|
group_name="elo",
|
||||||
|
description="Creates the ELO comparisons table that records pairwise match results.",
|
||||||
|
)
|
||||||
|
def elo_comparisons(
|
||||||
|
context: AssetExecutionContext,
|
||||||
|
postgres: PostgresResource,
|
||||||
|
) -> MaterializeResult:
|
||||||
|
"""Ensure the ELO comparisons table exists (idempotent)."""
|
||||||
|
table_sql = render_sql(_SQL_DIR, "create_comparisons_table.sql", schema=_SCHEMA)
|
||||||
|
|
||||||
|
engine = postgres.get_engine()
|
||||||
|
with engine.begin() as conn:
|
||||||
|
_ensure_schema(conn)
|
||||||
|
conn.execute(text(table_sql))
|
||||||
|
|
||||||
|
context.log.info("ELO comparisons table ensured.")
|
||||||
|
|
||||||
|
return MaterializeResult(
|
||||||
|
metadata={
|
||||||
|
"schema": MetadataValue.text(_SCHEMA),
|
||||||
|
"table": MetadataValue.text("comparisons"),
|
||||||
|
}
|
||||||
|
)
|
||||||
11
data_platform/assets/elo/sql/create_comparisons_table.sql
Normal file
11
data_platform/assets/elo/sql/create_comparisons_table.sql
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
create table if not exists {{ schema }}.comparisons (
|
||||||
|
id serial primary key,
|
||||||
|
listing_a_id text not null,
|
||||||
|
listing_b_id text not null,
|
||||||
|
winner_id text not null,
|
||||||
|
elo_a_before double precision not null,
|
||||||
|
elo_b_before double precision not null,
|
||||||
|
elo_a_after double precision not null,
|
||||||
|
elo_b_after double precision not null,
|
||||||
|
created_at timestamptz default now()
|
||||||
|
);
|
||||||
9
data_platform/assets/elo/sql/create_ratings_table.sql
Normal file
9
data_platform/assets/elo/sql/create_ratings_table.sql
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
create table if not exists {{ schema }}.ratings (
|
||||||
|
global_id text primary key,
|
||||||
|
elo_rating double precision not null default 1500.0,
|
||||||
|
comparison_count integer not null default 0,
|
||||||
|
wins integer not null default 0,
|
||||||
|
losses integer not null default 0,
|
||||||
|
created_at timestamptz default now(),
|
||||||
|
updated_at timestamptz default now()
|
||||||
|
);
|
||||||
1
data_platform/assets/elo/sql/create_schema.sql
Normal file
1
data_platform/assets/elo/sql/create_schema.sql
Normal file
@@ -0,0 +1 @@
|
|||||||
|
create schema if not exists {{ schema }};
|
||||||
@@ -6,6 +6,7 @@ from dagster import (
|
|||||||
from dagster_dbt import DbtCliResource
|
from dagster_dbt import DbtCliResource
|
||||||
|
|
||||||
from data_platform.assets.dbt import DBT_PROJECT_DIR, dbt_project_assets
|
from data_platform.assets.dbt import DBT_PROJECT_DIR, dbt_project_assets
|
||||||
|
from data_platform.assets.elo import elo_comparisons, elo_ratings
|
||||||
from data_platform.assets.ingestion.funda import (
|
from data_platform.assets.ingestion.funda import (
|
||||||
raw_funda_listing_details,
|
raw_funda_listing_details,
|
||||||
raw_funda_price_history,
|
raw_funda_price_history,
|
||||||
@@ -31,6 +32,8 @@ defs = Definitions(
|
|||||||
raw_funda_search_results,
|
raw_funda_search_results,
|
||||||
raw_funda_listing_details,
|
raw_funda_listing_details,
|
||||||
raw_funda_price_history,
|
raw_funda_price_history,
|
||||||
|
elo_ratings,
|
||||||
|
elo_comparisons,
|
||||||
]
|
]
|
||||||
),
|
),
|
||||||
jobs=[funda_ingestion_job, funda_raw_quality_job, elementary_refresh_job],
|
jobs=[funda_ingestion_job, funda_raw_quality_job, elementary_refresh_job],
|
||||||
|
|||||||
14
dbt/models/marts/elo_sample_listings.sql
Normal file
14
dbt/models/marts/elo_sample_listings.sql
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
-- Mart: stable random sample of Funda listings for pairwise ELO comparison.
|
||||||
|
-- Incrementally tops up to the target sample size using deterministic ordering.
|
||||||
|
|
||||||
|
select l.global_id
|
||||||
|
from {{ ref('funda_listings') }} as l
|
||||||
|
{% if is_incremental() %}
|
||||||
|
left join {{ this }} as s on l.global_id = s.global_id
|
||||||
|
where s.global_id is null
|
||||||
|
order by md5(l.global_id)
|
||||||
|
limit greatest(0, 50 - (select count(*) from {{ this }}))
|
||||||
|
{% else %}
|
||||||
|
order by md5(l.global_id)
|
||||||
|
limit 50
|
||||||
|
{% endif %}
|
||||||
31
dbt/models/marts/elo_sample_listings.yml
Normal file
31
dbt/models/marts/elo_sample_listings.yml
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
version: 2
|
||||||
|
|
||||||
|
models:
|
||||||
|
- name: elo_sample_listings
|
||||||
|
description: >
|
||||||
|
Stable random sample of Funda listings used for pairwise ELO comparison in the
|
||||||
|
house-elo-ranking application. Maintains up to 50 listings, topped up incrementally using
|
||||||
|
deterministic md5-based ordering.
|
||||||
|
config:
|
||||||
|
materialized: incremental
|
||||||
|
unique_key: global_id
|
||||||
|
on_schema_change: fail
|
||||||
|
schema: elo
|
||||||
|
contract:
|
||||||
|
enforced: true
|
||||||
|
meta:
|
||||||
|
dagster:
|
||||||
|
group: elo
|
||||||
|
columns:
|
||||||
|
- name: global_id
|
||||||
|
description: Funda internal listing ID, referencing funda_listings.
|
||||||
|
data_type: text
|
||||||
|
constraints:
|
||||||
|
- type: not_null
|
||||||
|
- type: unique
|
||||||
|
tests:
|
||||||
|
- unique
|
||||||
|
- not_null
|
||||||
|
- relationships:
|
||||||
|
to: ref('funda_listings')
|
||||||
|
field: global_id
|
||||||
Reference in New Issue
Block a user