Files
house-elo-ranking/backend/app/routers/comparisons.py
2026-03-06 12:25:07 +00:00

282 lines
9.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Comparison endpoints matchmaking and ELO updates."""
import random
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import text
from sqlalchemy.orm import Session
from app.config import settings
from app.database import get_db
from app.elo import calculate_elo
from app.models import Comparison, EloRating
from app.queries import LISTING_SELECT, row_to_listing
from app.schemas import (
CompareRequest,
CompareResponse,
ComparisonHistoryItem,
MatchupResponse,
StatsResponse,
)
router = APIRouter()
@router.get("/matchup", response_model=MatchupResponse)
def get_matchup(
status: str | None = None,
db: Session = Depends(get_db),
):
"""Return a weighted-random pair of listings for comparison.
Only listings in the stable sample (elo.sample_listings) are considered.
Listings with fewer comparisons are more likely to appear, ensuring
broad coverage across all properties.
"""
query = LISTING_SELECT + f"""
INNER JOIN {settings.ELO_SCHEMA}.sample_listings s
ON l.global_id = s.global_id
"""
params: dict = {}
if status and status != "all":
query += " WHERE l.status = :status"
params["status"] = status
result = db.execute(text(query), params)
listings = [row_to_listing(row) for row in result]
if len(listings) < 2:
raise HTTPException(
status_code=400,
detail="Not enough listings for comparison (need at least 2).",
)
# Gather recent pairs to avoid immediate repeats
recent = db.execute(
text(
f"SELECT listing_a_id, listing_b_id "
f"FROM {settings.ELO_SCHEMA}.comparisons "
f"ORDER BY created_at DESC LIMIT 20"
)
)
recent_pairs = {
frozenset([r.listing_a_id, r.listing_b_id]) for r in recent
}
# Weight by inverse comparison count (prioritise less-compared houses)
weights = [1.0 / (l.comparison_count + 1) ** 1.5 for l in listings]
# Pick first listing
first = random.choices(listings, weights=weights, k=1)[0]
# Pick second listing (exclude first, avoid recent repeats)
remaining = [l for l in listings if l.global_id != first.global_id]
remaining_weights = [1.0 / (l.comparison_count + 1) ** 1.5 for l in remaining]
second = remaining[0] # fallback
for _ in range(20):
candidate = random.choices(remaining, weights=remaining_weights, k=1)[0]
pair = frozenset([first.global_id, candidate.global_id])
if pair not in recent_pairs:
second = candidate
break
else:
second = random.choices(remaining, weights=remaining_weights, k=1)[0]
return MatchupResponse(listing_a=first, listing_b=second)
@router.post("/compare", response_model=CompareResponse)
def submit_comparison(body: CompareRequest, db: Session = Depends(get_db)):
"""Record a comparison result and update ELO ratings."""
winner_id = body.winner_id
loser_id = body.loser_id
if winner_id == loser_id:
raise HTTPException(status_code=400, detail="Winner and loser must differ.")
# Get or create rating records
winner_rating = db.query(EloRating).filter_by(global_id=winner_id).first()
if not winner_rating:
winner_rating = EloRating(
global_id=winner_id, elo_rating=settings.DEFAULT_ELO
)
db.add(winner_rating)
db.flush()
loser_rating = db.query(EloRating).filter_by(global_id=loser_id).first()
if not loser_rating:
loser_rating = EloRating(
global_id=loser_id, elo_rating=settings.DEFAULT_ELO
)
db.add(loser_rating)
db.flush()
elo_w_before = winner_rating.elo_rating
elo_l_before = loser_rating.elo_rating
new_elo_w, new_elo_l = calculate_elo(
elo_w_before, elo_l_before, settings.K_FACTOR
)
# Update ratings
winner_rating.elo_rating = new_elo_w
winner_rating.comparison_count += 1
winner_rating.wins += 1
loser_rating.elo_rating = new_elo_l
loser_rating.comparison_count += 1
loser_rating.losses += 1
# Record comparison
db.add(
Comparison(
listing_a_id=winner_id,
listing_b_id=loser_id,
winner_id=winner_id,
elo_a_before=elo_w_before,
elo_b_before=elo_l_before,
elo_a_after=new_elo_w,
elo_b_after=new_elo_l,
)
)
db.commit()
return CompareResponse(
winner_id=winner_id,
loser_id=loser_id,
elo_change=round(new_elo_w - elo_w_before, 1),
new_winner_elo=round(new_elo_w, 1),
new_loser_elo=round(new_elo_l, 1),
)
@router.get("/history", response_model=list[ComparisonHistoryItem])
def get_history(
limit: int = 50,
db: Session = Depends(get_db),
):
"""Return recent comparisons."""
query = f"""
SELECT
c.*,
a.title AS listing_a_title,
b.title AS listing_b_title,
w.title AS winner_title
FROM {settings.ELO_SCHEMA}.comparisons c
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} a
ON c.listing_a_id = a.global_id
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} b
ON c.listing_b_id = b.global_id
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} w
ON c.winner_id = w.global_id
ORDER BY c.created_at DESC
LIMIT :limit
"""
rows = db.execute(text(query), {"limit": limit})
return [
ComparisonHistoryItem(
id=r.id,
listing_a_title=r.listing_a_title,
listing_b_title=r.listing_b_title,
winner_title=r.winner_title,
listing_a_id=r.listing_a_id,
listing_b_id=r.listing_b_id,
winner_id=r.winner_id,
elo_a_before=round(r.elo_a_before, 1),
elo_b_before=round(r.elo_b_before, 1),
elo_a_after=round(r.elo_a_after, 1),
elo_b_after=round(r.elo_b_after, 1),
created_at=r.created_at,
)
for r in rows
]
@router.get("/stats", response_model=StatsResponse)
def get_stats(db: Session = Depends(get_db)):
"""Return aggregate statistics about comparisons and ratings."""
total_comparisons = db.execute(
text(f"SELECT COUNT(*) FROM {settings.ELO_SCHEMA}.comparisons")
).scalar() or 0
total_rated = db.execute(
text(f"SELECT COUNT(*) FROM {settings.ELO_SCHEMA}.ratings")
).scalar() or 0
total_listings = db.execute(
text(
f"SELECT COUNT(*) FROM {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE}"
)
).scalar() or 0
elo_agg = db.execute(
text(
f"SELECT AVG(elo_rating), MAX(elo_rating), MIN(elo_rating) "
f"FROM {settings.ELO_SCHEMA}.ratings"
)
).first()
avg_elo = round(float(elo_agg[0]), 1) if elo_agg and elo_agg[0] else None
max_elo = round(float(elo_agg[1]), 1) if elo_agg and elo_agg[1] else None
min_elo = round(float(elo_agg[2]), 1) if elo_agg and elo_agg[2] else None
# ELO distribution in buckets of 50
dist_rows = db.execute(
text(
f"SELECT FLOOR(elo_rating / 50) * 50 AS bucket, COUNT(*) AS count "
f"FROM {settings.ELO_SCHEMA}.ratings "
f"GROUP BY bucket ORDER BY bucket"
)
)
elo_distribution = [
{"bucket": f"{int(r.bucket)}-{int(r.bucket) + 49}", "count": r.count}
for r in dist_rows
]
# Recent comparisons
recent_query = f"""
SELECT
c.*,
a.title AS listing_a_title,
b.title AS listing_b_title,
w.title AS winner_title
FROM {settings.ELO_SCHEMA}.comparisons c
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} a
ON c.listing_a_id = a.global_id
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} b
ON c.listing_b_id = b.global_id
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} w
ON c.winner_id = w.global_id
ORDER BY c.created_at DESC
LIMIT 10
"""
recent_rows = db.execute(text(recent_query))
recent_comparisons = [
ComparisonHistoryItem(
id=r.id,
listing_a_title=r.listing_a_title,
listing_b_title=r.listing_b_title,
winner_title=r.winner_title,
listing_a_id=r.listing_a_id,
listing_b_id=r.listing_b_id,
winner_id=r.winner_id,
elo_a_before=round(r.elo_a_before, 1),
elo_b_before=round(r.elo_b_before, 1),
elo_a_after=round(r.elo_a_after, 1),
elo_b_after=round(r.elo_b_after, 1),
created_at=r.created_at,
)
for r in recent_rows
]
return StatsResponse(
total_comparisons=total_comparisons,
total_rated_listings=total_rated,
total_listings=total_listings,
avg_elo=avg_elo,
max_elo=max_elo,
min_elo=min_elo,
elo_distribution=elo_distribution,
recent_comparisons=recent_comparisons,
)