feat: separate sql

This commit is contained in:
Stijnvandenbroek
2026-03-06 14:26:20 +00:00
parent 81188a4569
commit c908d96921
16 changed files with 179 additions and 219 deletions

View File

@@ -6,7 +6,7 @@ from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import text
from sqlalchemy.orm import Session
from app.config import settings
from app.config import load_sql, settings
from app.database import get_db
from app.elo import calculate_elo
from app.models import Comparison, EloRating
@@ -21,25 +21,22 @@ from app.schemas import (
router = APIRouter()
SAMPLE_JOIN = (
f" inner join {settings.ELO_SCHEMA}.sample_listings as s"
f" on l.global_id = s.global_id"
)
@router.get("/matchup", response_model=MatchupResponse)
def get_matchup(
status: str | None = None,
db: Session = Depends(get_db),
):
"""Return a weighted-random pair of listings for comparison.
Only listings in the stable sample (elo.sample_listings) are considered.
Listings with fewer comparisons are more likely to appear, ensuring
broad coverage across all properties.
"""
query = LISTING_SELECT + f"""
INNER JOIN {settings.ELO_SCHEMA}.sample_listings s
ON l.global_id = s.global_id
"""
params: dict = {}
"""Return a weighted-random pair of listings for comparison."""
query = LISTING_SELECT + SAMPLE_JOIN
params: dict = {"default_elo": settings.DEFAULT_ELO}
if status and status != "all":
query += " WHERE l.status = :status"
query += " where l.status = :status"
params["status"] = status
result = db.execute(text(query), params)
listings = [row_to_listing(row) for row in result]
@@ -50,33 +47,21 @@ def get_matchup(
detail="Not enough listings for comparison (need at least 2).",
)
# Gather recent pairs to avoid immediate repeats
recent = db.execute(
text(
f"SELECT listing_a_id, listing_b_id "
f"FROM {settings.ELO_SCHEMA}.comparisons "
f"ORDER BY created_at DESC LIMIT 20"
)
)
recent = db.execute(text(load_sql("recent_pairs.sql")))
recent_pairs = {
frozenset([r.listing_a_id, r.listing_b_id]) for r in recent
}
# Weight by inverse comparison count (prioritise less-compared houses)
weights = [1.0 / (l.comparison_count + 1) ** 1.5 for l in listings]
# Pick first listing
first = random.choices(listings, weights=weights, k=1)[0]
# Pick second listing (exclude first, avoid recent repeats)
remaining = [l for l in listings if l.global_id != first.global_id]
remaining_weights = [1.0 / (l.comparison_count + 1) ** 1.5 for l in remaining]
second = remaining[0] # fallback
second = remaining[0]
for _ in range(20):
candidate = random.choices(remaining, weights=remaining_weights, k=1)[0]
pair = frozenset([first.global_id, candidate.global_id])
if pair not in recent_pairs:
if frozenset([first.global_id, candidate.global_id]) not in recent_pairs:
second = candidate
break
else:
@@ -88,37 +73,24 @@ def get_matchup(
@router.post("/compare", response_model=CompareResponse)
def submit_comparison(body: CompareRequest, db: Session = Depends(get_db)):
"""Record a comparison result and update ELO ratings."""
winner_id = body.winner_id
loser_id = body.loser_id
if winner_id == loser_id:
if body.winner_id == body.loser_id:
raise HTTPException(status_code=400, detail="Winner and loser must differ.")
# Get or create rating records
winner_rating = db.query(EloRating).filter_by(global_id=winner_id).first()
if not winner_rating:
winner_rating = EloRating(
global_id=winner_id, elo_rating=settings.DEFAULT_ELO
)
db.add(winner_rating)
db.flush()
def get_or_create_rating(global_id: str) -> EloRating:
rating = db.query(EloRating).filter_by(global_id=global_id).first()
if not rating:
rating = EloRating(global_id=global_id, elo_rating=settings.DEFAULT_ELO)
db.add(rating)
db.flush()
return rating
loser_rating = db.query(EloRating).filter_by(global_id=loser_id).first()
if not loser_rating:
loser_rating = EloRating(
global_id=loser_id, elo_rating=settings.DEFAULT_ELO
)
db.add(loser_rating)
db.flush()
winner_rating = get_or_create_rating(body.winner_id)
loser_rating = get_or_create_rating(body.loser_id)
elo_w_before = winner_rating.elo_rating
elo_l_before = loser_rating.elo_rating
new_elo_w, new_elo_l = calculate_elo(elo_w_before, elo_l_before, settings.K_FACTOR)
new_elo_w, new_elo_l = calculate_elo(
elo_w_before, elo_l_before, settings.K_FACTOR
)
# Update ratings
winner_rating.elo_rating = new_elo_w
winner_rating.comparison_count += 1
winner_rating.wins += 1
@@ -127,12 +99,11 @@ def submit_comparison(body: CompareRequest, db: Session = Depends(get_db)):
loser_rating.comparison_count += 1
loser_rating.losses += 1
# Record comparison
db.add(
Comparison(
listing_a_id=winner_id,
listing_b_id=loser_id,
winner_id=winner_id,
listing_a_id=body.winner_id,
listing_b_id=body.loser_id,
winner_id=body.winner_id,
elo_a_before=elo_w_before,
elo_b_before=elo_l_before,
elo_a_after=new_elo_w,
@@ -142,132 +113,58 @@ def submit_comparison(body: CompareRequest, db: Session = Depends(get_db)):
db.commit()
return CompareResponse(
winner_id=winner_id,
loser_id=loser_id,
winner_id=body.winner_id,
loser_id=body.loser_id,
elo_change=round(new_elo_w - elo_w_before, 1),
new_winner_elo=round(new_elo_w, 1),
new_loser_elo=round(new_elo_l, 1),
)
def _row_to_history(r) -> ComparisonHistoryItem:
return ComparisonHistoryItem(
id=r.id,
listing_a_title=r.listing_a_title,
listing_b_title=r.listing_b_title,
winner_title=r.winner_title,
listing_a_id=r.listing_a_id,
listing_b_id=r.listing_b_id,
winner_id=r.winner_id,
elo_a_before=round(r.elo_a_before, 1),
elo_b_before=round(r.elo_b_before, 1),
elo_a_after=round(r.elo_a_after, 1),
elo_b_after=round(r.elo_b_after, 1),
created_at=r.created_at,
)
@router.get("/history", response_model=list[ComparisonHistoryItem])
def get_history(
limit: int = 50,
db: Session = Depends(get_db),
):
def get_history(limit: int = 50, db: Session = Depends(get_db)):
"""Return recent comparisons."""
query = f"""
SELECT
c.*,
a.title AS listing_a_title,
b.title AS listing_b_title,
w.title AS winner_title
FROM {settings.ELO_SCHEMA}.comparisons c
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} a
ON c.listing_a_id = a.global_id
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} b
ON c.listing_b_id = b.global_id
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} w
ON c.winner_id = w.global_id
ORDER BY c.created_at DESC
LIMIT :limit
"""
rows = db.execute(text(query), {"limit": limit})
return [
ComparisonHistoryItem(
id=r.id,
listing_a_title=r.listing_a_title,
listing_b_title=r.listing_b_title,
winner_title=r.winner_title,
listing_a_id=r.listing_a_id,
listing_b_id=r.listing_b_id,
winner_id=r.winner_id,
elo_a_before=round(r.elo_a_before, 1),
elo_b_before=round(r.elo_b_before, 1),
elo_a_after=round(r.elo_a_after, 1),
elo_b_after=round(r.elo_b_after, 1),
created_at=r.created_at,
)
for r in rows
]
rows = db.execute(text(load_sql("history.sql")), {"limit": limit})
return [_row_to_history(r) for r in rows]
@router.get("/stats", response_model=StatsResponse)
def get_stats(db: Session = Depends(get_db)):
"""Return aggregate statistics about comparisons and ratings."""
total_comparisons = db.execute(
text(f"SELECT COUNT(*) FROM {settings.ELO_SCHEMA}.comparisons")
).scalar() or 0
total_rated = db.execute(
text(f"SELECT COUNT(*) FROM {settings.ELO_SCHEMA}.ratings")
).scalar() or 0
total_listings = db.execute(
text(
f"SELECT COUNT(*) FROM {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE}"
)
).scalar() or 0
elo_agg = db.execute(
text(
f"SELECT AVG(elo_rating), MAX(elo_rating), MIN(elo_rating) "
f"FROM {settings.ELO_SCHEMA}.ratings"
)
).first()
total_comparisons = db.execute(text(load_sql("count_comparisons.sql"))).scalar() or 0
total_rated = db.execute(text(load_sql("count_rated.sql"))).scalar() or 0
total_listings = db.execute(text(load_sql("count_listings.sql"))).scalar() or 0
elo_agg = db.execute(text(load_sql("elo_aggregates.sql"))).first()
avg_elo = round(float(elo_agg[0]), 1) if elo_agg and elo_agg[0] else None
max_elo = round(float(elo_agg[1]), 1) if elo_agg and elo_agg[1] else None
min_elo = round(float(elo_agg[2]), 1) if elo_agg and elo_agg[2] else None
# ELO distribution in buckets of 50
dist_rows = db.execute(
text(
f"SELECT FLOOR(elo_rating / 50) * 50 AS bucket, COUNT(*) AS count "
f"FROM {settings.ELO_SCHEMA}.ratings "
f"GROUP BY bucket ORDER BY bucket"
)
)
dist_rows = db.execute(text(load_sql("elo_distribution.sql")))
elo_distribution = [
{"bucket": f"{int(r.bucket)}-{int(r.bucket) + 49}", "count": r.count}
for r in dist_rows
]
# Recent comparisons
recent_query = f"""
SELECT
c.*,
a.title AS listing_a_title,
b.title AS listing_b_title,
w.title AS winner_title
FROM {settings.ELO_SCHEMA}.comparisons c
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} a
ON c.listing_a_id = a.global_id
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} b
ON c.listing_b_id = b.global_id
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} w
ON c.winner_id = w.global_id
ORDER BY c.created_at DESC
LIMIT 10
"""
recent_rows = db.execute(text(recent_query))
recent_comparisons = [
ComparisonHistoryItem(
id=r.id,
listing_a_title=r.listing_a_title,
listing_b_title=r.listing_b_title,
winner_title=r.winner_title,
listing_a_id=r.listing_a_id,
listing_b_id=r.listing_b_id,
winner_id=r.winner_id,
elo_a_before=round(r.elo_a_before, 1),
elo_b_before=round(r.elo_b_before, 1),
elo_a_after=round(r.elo_a_after, 1),
elo_b_after=round(r.elo_b_after, 1),
created_at=r.created_at,
)
for r in recent_rows
]
recent_rows = db.execute(text(load_sql("history.sql")), {"limit": 10})
recent_comparisons = [_row_to_history(r) for r in recent_rows]
return StatsResponse(
total_comparisons=total_comparisons,