feat: initial project setup

This commit is contained in:
Stijnvandenbroek
2026-03-06 12:25:07 +00:00
commit e1a67da3ce
33 changed files with 2069 additions and 0 deletions

View File

View File

@@ -0,0 +1,281 @@
"""Comparison endpoints matchmaking and ELO updates."""
import random
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import text
from sqlalchemy.orm import Session
from app.config import settings
from app.database import get_db
from app.elo import calculate_elo
from app.models import Comparison, EloRating
from app.queries import LISTING_SELECT, row_to_listing
from app.schemas import (
CompareRequest,
CompareResponse,
ComparisonHistoryItem,
MatchupResponse,
StatsResponse,
)
router = APIRouter()
@router.get("/matchup", response_model=MatchupResponse)
def get_matchup(
status: str | None = None,
db: Session = Depends(get_db),
):
"""Return a weighted-random pair of listings for comparison.
Only listings in the stable sample (elo.sample_listings) are considered.
Listings with fewer comparisons are more likely to appear, ensuring
broad coverage across all properties.
"""
query = LISTING_SELECT + f"""
INNER JOIN {settings.ELO_SCHEMA}.sample_listings s
ON l.global_id = s.global_id
"""
params: dict = {}
if status and status != "all":
query += " WHERE l.status = :status"
params["status"] = status
result = db.execute(text(query), params)
listings = [row_to_listing(row) for row in result]
if len(listings) < 2:
raise HTTPException(
status_code=400,
detail="Not enough listings for comparison (need at least 2).",
)
# Gather recent pairs to avoid immediate repeats
recent = db.execute(
text(
f"SELECT listing_a_id, listing_b_id "
f"FROM {settings.ELO_SCHEMA}.comparisons "
f"ORDER BY created_at DESC LIMIT 20"
)
)
recent_pairs = {
frozenset([r.listing_a_id, r.listing_b_id]) for r in recent
}
# Weight by inverse comparison count (prioritise less-compared houses)
weights = [1.0 / (l.comparison_count + 1) ** 1.5 for l in listings]
# Pick first listing
first = random.choices(listings, weights=weights, k=1)[0]
# Pick second listing (exclude first, avoid recent repeats)
remaining = [l for l in listings if l.global_id != first.global_id]
remaining_weights = [1.0 / (l.comparison_count + 1) ** 1.5 for l in remaining]
second = remaining[0] # fallback
for _ in range(20):
candidate = random.choices(remaining, weights=remaining_weights, k=1)[0]
pair = frozenset([first.global_id, candidate.global_id])
if pair not in recent_pairs:
second = candidate
break
else:
second = random.choices(remaining, weights=remaining_weights, k=1)[0]
return MatchupResponse(listing_a=first, listing_b=second)
@router.post("/compare", response_model=CompareResponse)
def submit_comparison(body: CompareRequest, db: Session = Depends(get_db)):
"""Record a comparison result and update ELO ratings."""
winner_id = body.winner_id
loser_id = body.loser_id
if winner_id == loser_id:
raise HTTPException(status_code=400, detail="Winner and loser must differ.")
# Get or create rating records
winner_rating = db.query(EloRating).filter_by(global_id=winner_id).first()
if not winner_rating:
winner_rating = EloRating(
global_id=winner_id, elo_rating=settings.DEFAULT_ELO
)
db.add(winner_rating)
db.flush()
loser_rating = db.query(EloRating).filter_by(global_id=loser_id).first()
if not loser_rating:
loser_rating = EloRating(
global_id=loser_id, elo_rating=settings.DEFAULT_ELO
)
db.add(loser_rating)
db.flush()
elo_w_before = winner_rating.elo_rating
elo_l_before = loser_rating.elo_rating
new_elo_w, new_elo_l = calculate_elo(
elo_w_before, elo_l_before, settings.K_FACTOR
)
# Update ratings
winner_rating.elo_rating = new_elo_w
winner_rating.comparison_count += 1
winner_rating.wins += 1
loser_rating.elo_rating = new_elo_l
loser_rating.comparison_count += 1
loser_rating.losses += 1
# Record comparison
db.add(
Comparison(
listing_a_id=winner_id,
listing_b_id=loser_id,
winner_id=winner_id,
elo_a_before=elo_w_before,
elo_b_before=elo_l_before,
elo_a_after=new_elo_w,
elo_b_after=new_elo_l,
)
)
db.commit()
return CompareResponse(
winner_id=winner_id,
loser_id=loser_id,
elo_change=round(new_elo_w - elo_w_before, 1),
new_winner_elo=round(new_elo_w, 1),
new_loser_elo=round(new_elo_l, 1),
)
@router.get("/history", response_model=list[ComparisonHistoryItem])
def get_history(
limit: int = 50,
db: Session = Depends(get_db),
):
"""Return recent comparisons."""
query = f"""
SELECT
c.*,
a.title AS listing_a_title,
b.title AS listing_b_title,
w.title AS winner_title
FROM {settings.ELO_SCHEMA}.comparisons c
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} a
ON c.listing_a_id = a.global_id
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} b
ON c.listing_b_id = b.global_id
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} w
ON c.winner_id = w.global_id
ORDER BY c.created_at DESC
LIMIT :limit
"""
rows = db.execute(text(query), {"limit": limit})
return [
ComparisonHistoryItem(
id=r.id,
listing_a_title=r.listing_a_title,
listing_b_title=r.listing_b_title,
winner_title=r.winner_title,
listing_a_id=r.listing_a_id,
listing_b_id=r.listing_b_id,
winner_id=r.winner_id,
elo_a_before=round(r.elo_a_before, 1),
elo_b_before=round(r.elo_b_before, 1),
elo_a_after=round(r.elo_a_after, 1),
elo_b_after=round(r.elo_b_after, 1),
created_at=r.created_at,
)
for r in rows
]
@router.get("/stats", response_model=StatsResponse)
def get_stats(db: Session = Depends(get_db)):
"""Return aggregate statistics about comparisons and ratings."""
total_comparisons = db.execute(
text(f"SELECT COUNT(*) FROM {settings.ELO_SCHEMA}.comparisons")
).scalar() or 0
total_rated = db.execute(
text(f"SELECT COUNT(*) FROM {settings.ELO_SCHEMA}.ratings")
).scalar() or 0
total_listings = db.execute(
text(
f"SELECT COUNT(*) FROM {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE}"
)
).scalar() or 0
elo_agg = db.execute(
text(
f"SELECT AVG(elo_rating), MAX(elo_rating), MIN(elo_rating) "
f"FROM {settings.ELO_SCHEMA}.ratings"
)
).first()
avg_elo = round(float(elo_agg[0]), 1) if elo_agg and elo_agg[0] else None
max_elo = round(float(elo_agg[1]), 1) if elo_agg and elo_agg[1] else None
min_elo = round(float(elo_agg[2]), 1) if elo_agg and elo_agg[2] else None
# ELO distribution in buckets of 50
dist_rows = db.execute(
text(
f"SELECT FLOOR(elo_rating / 50) * 50 AS bucket, COUNT(*) AS count "
f"FROM {settings.ELO_SCHEMA}.ratings "
f"GROUP BY bucket ORDER BY bucket"
)
)
elo_distribution = [
{"bucket": f"{int(r.bucket)}-{int(r.bucket) + 49}", "count": r.count}
for r in dist_rows
]
# Recent comparisons
recent_query = f"""
SELECT
c.*,
a.title AS listing_a_title,
b.title AS listing_b_title,
w.title AS winner_title
FROM {settings.ELO_SCHEMA}.comparisons c
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} a
ON c.listing_a_id = a.global_id
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} b
ON c.listing_b_id = b.global_id
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} w
ON c.winner_id = w.global_id
ORDER BY c.created_at DESC
LIMIT 10
"""
recent_rows = db.execute(text(recent_query))
recent_comparisons = [
ComparisonHistoryItem(
id=r.id,
listing_a_title=r.listing_a_title,
listing_b_title=r.listing_b_title,
winner_title=r.winner_title,
listing_a_id=r.listing_a_id,
listing_b_id=r.listing_b_id,
winner_id=r.winner_id,
elo_a_before=round(r.elo_a_before, 1),
elo_b_before=round(r.elo_b_before, 1),
elo_a_after=round(r.elo_a_after, 1),
elo_b_after=round(r.elo_b_after, 1),
created_at=r.created_at,
)
for r in recent_rows
]
return StatsResponse(
total_comparisons=total_comparisons,
total_rated_listings=total_rated,
total_listings=total_listings,
avg_elo=avg_elo,
max_elo=max_elo,
min_elo=min_elo,
elo_distribution=elo_distribution,
recent_comparisons=recent_comparisons,
)

View File

@@ -0,0 +1,37 @@
"""Listing endpoints read-only access to Funda data with ELO ratings."""
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import text
from sqlalchemy.orm import Session
from app.database import get_db
from app.queries import LISTING_SELECT, row_to_listing
from app.schemas import ListingResponse
router = APIRouter()
@router.get("/listings", response_model=list[ListingResponse])
def get_listings(
status: str | None = None,
db: Session = Depends(get_db),
):
"""Return all listings with their current ELO rating."""
query = LISTING_SELECT
params: dict = {}
if status and status != "all":
query += " WHERE l.status = :status"
params["status"] = status
query += " ORDER BY elo_rating DESC"
result = db.execute(text(query), params)
return [row_to_listing(row) for row in result]
@router.get("/listings/{global_id}", response_model=ListingResponse)
def get_listing(global_id: str, db: Session = Depends(get_db)):
"""Return a single listing by its global_id."""
query = LISTING_SELECT + " WHERE l.global_id = :global_id"
row = db.execute(text(query), {"global_id": global_id}).first()
if not row:
raise HTTPException(status_code=404, detail="Listing not found")
return row_to_listing(row)

View File

@@ -0,0 +1,36 @@
"""Ranking endpoints listings sorted by ELO."""
from fastapi import APIRouter, Depends
from sqlalchemy import text
from sqlalchemy.orm import Session
from app.database import get_db
from app.queries import LISTING_SELECT, row_to_listing
from app.schemas import RankingResponse
router = APIRouter()
@router.get("/rankings", response_model=list[RankingResponse])
def get_rankings(
status: str | None = None,
limit: int = 100,
offset: int = 0,
db: Session = Depends(get_db),
):
"""Return listings ranked by ELO rating (highest first)."""
query = LISTING_SELECT
params: dict = {"limit": limit, "offset": offset}
if status and status != "all":
query += " WHERE l.status = :status"
params["status"] = status
query += " ORDER BY elo_rating DESC, l.current_price DESC LIMIT :limit OFFSET :offset"
result = db.execute(text(query), params)
listings = [row_to_listing(row) for row in result]
return [
RankingResponse(rank=offset + i + 1, listing=listing)
for i, listing in enumerate(listings)
]