feat: initial project setup

This commit is contained in:
Stijnvandenbroek
2026-03-06 12:25:07 +00:00
commit e1a67da3ce
33 changed files with 2069 additions and 0 deletions

0
backend/app/__init__.py Normal file
View File

41
backend/app/config.py Normal file
View File

@@ -0,0 +1,41 @@
"""Application configuration from environment variables."""
import os
class Settings:
"""Application settings loaded from environment variables.
Configure via DATABASE_URL (single connection string) or individual
POSTGRES_* variables. The application expects the target database to
contain:
- A listings table (default: marts.funda_listings)
- An ELO schema with ratings, comparisons, and sample_listings tables
(default: elo.*)
"""
POSTGRES_HOST: str = os.getenv("POSTGRES_HOST", "localhost")
POSTGRES_PORT: int = int(os.getenv("POSTGRES_PORT", "5432"))
POSTGRES_USER: str = os.getenv("POSTGRES_USER", "postgres")
POSTGRES_PASSWORD: str = os.getenv("POSTGRES_PASSWORD", "postgres")
POSTGRES_DB: str = os.getenv("POSTGRES_DB", "postgres")
LISTINGS_SCHEMA: str = os.getenv("LISTINGS_SCHEMA", "marts")
LISTINGS_TABLE: str = os.getenv("LISTINGS_TABLE", "funda_listings")
ELO_SCHEMA: str = os.getenv("ELO_SCHEMA", "elo")
K_FACTOR: float = float(os.getenv("ELO_K_FACTOR", "32"))
DEFAULT_ELO: float = float(os.getenv("ELO_DEFAULT_RATING", "1500"))
@property
def database_url(self) -> str:
url = os.getenv("DATABASE_URL")
if url:
return url
return (
f"postgresql+psycopg2://{self.POSTGRES_USER}:{self.POSTGRES_PASSWORD}"
f"@{self.POSTGRES_HOST}:{self.POSTGRES_PORT}/{self.POSTGRES_DB}"
)
settings = Settings()

22
backend/app/database.py Normal file
View File

@@ -0,0 +1,22 @@
"""Database connection and session management."""
from sqlalchemy import create_engine
from sqlalchemy.orm import DeclarativeBase, sessionmaker
from app.config import settings
engine = create_engine(settings.database_url, pool_pre_ping=True)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
class Base(DeclarativeBase):
pass
def get_db():
"""FastAPI dependency that yields a database session."""
db = SessionLocal()
try:
yield db
finally:
db.close()

26
backend/app/elo.py Normal file
View File

@@ -0,0 +1,26 @@
"""ELO rating calculation."""
def calculate_elo(
winner_rating: float,
loser_rating: float,
k_factor: float = 32.0,
) -> tuple[float, float]:
"""Calculate new ELO ratings after a match.
Uses the standard ELO formula:
E = 1 / (1 + 10^((R_opponent - R_self) / 400))
R_new = R_old + K * (S - E)
where S = 1 for a win, S = 0 for a loss.
Returns:
(new_winner_rating, new_loser_rating)
"""
expected_winner = 1.0 / (1.0 + 10.0 ** ((loser_rating - winner_rating) / 400.0))
expected_loser = 1.0 - expected_winner
new_winner = winner_rating + k_factor * (1.0 - expected_winner)
new_loser = loser_rating + k_factor * (0.0 - expected_loser)
return round(new_winner, 2), round(new_loser, 2)

29
backend/app/main.py Normal file
View File

@@ -0,0 +1,29 @@
"""House ELO Ranking API."""
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from app.routers import comparisons, listings, rankings
app = FastAPI(
title="House ELO Ranking",
description="Pairwise comparison and ELO ranking of Funda listings",
version="1.0.0",
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.include_router(comparisons.router, prefix="/api", tags=["comparisons"])
app.include_router(listings.router, prefix="/api", tags=["listings"])
app.include_router(rankings.router, prefix="/api", tags=["rankings"])
@app.get("/api/health")
def health():
return {"status": "ok"}

44
backend/app/models.py Normal file
View File

@@ -0,0 +1,44 @@
"""SQLAlchemy ORM models for ELO rating tables."""
from sqlalchemy import Column, DateTime, Float, Integer, String
from sqlalchemy.sql import func
from app.config import settings
from app.database import Base
class EloRating(Base):
"""Tracks the current ELO rating for each listing."""
__tablename__ = "ratings"
__table_args__ = {"schema": settings.ELO_SCHEMA}
global_id = Column(String, primary_key=True)
elo_rating = Column(Float, nullable=False, default=settings.DEFAULT_ELO)
comparison_count = Column(Integer, nullable=False, default=0)
wins = Column(Integer, nullable=False, default=0)
losses = Column(Integer, nullable=False, default=0)
created_at = Column(DateTime(timezone=True), server_default=func.now())
updated_at = Column(
DateTime(timezone=True), server_default=func.now(), onupdate=func.now()
)
class Comparison(Base):
"""Records each pairwise comparison with ELO snapshots."""
__tablename__ = "comparisons"
__table_args__ = {"schema": settings.ELO_SCHEMA}
id = Column(Integer, primary_key=True, autoincrement=True)
listing_a_id = Column(String, nullable=False)
listing_b_id = Column(String, nullable=False)
winner_id = Column(String, nullable=False)
elo_a_before = Column(Float, nullable=False)
elo_b_before = Column(Float, nullable=False)
elo_a_after = Column(Float, nullable=False)
elo_b_after = Column(Float, nullable=False)
created_at = Column(DateTime(timezone=True), server_default=func.now())

68
backend/app/queries.py Normal file
View File

@@ -0,0 +1,68 @@
"""Shared query helpers for listing data."""
from app.config import settings
from app.schemas import ListingResponse
LISTING_SELECT = f"""
SELECT
l.global_id, l.tiny_id, l.url, l.title, l.city, l.postcode,
l.province, l.neighbourhood, l.municipality,
l.latitude, l.longitude,
l.object_type, l.house_type, l.offering_type,
l.construction_type, l.construction_year,
l.energy_label, l.living_area, l.plot_area,
l.bedrooms, l.rooms,
l.has_garden, l.has_balcony, l.has_solar_panels,
l.has_heat_pump, l.has_roof_terrace,
l.is_energy_efficient, l.is_monument,
l.current_price, l.status, l.price_per_sqm,
l.publication_date,
COALESCE(r.elo_rating, {settings.DEFAULT_ELO}) AS elo_rating,
COALESCE(r.comparison_count, 0) AS comparison_count,
COALESCE(r.wins, 0) AS wins,
COALESCE(r.losses, 0) AS losses
FROM {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} l
LEFT JOIN {settings.ELO_SCHEMA}.ratings r ON l.global_id = r.global_id
"""
def row_to_listing(row) -> ListingResponse:
"""Convert a raw SQL row to a ListingResponse."""
return ListingResponse(
global_id=row.global_id,
tiny_id=row.tiny_id,
url=row.url,
title=row.title,
city=row.city,
postcode=row.postcode,
province=row.province,
neighbourhood=row.neighbourhood,
municipality=row.municipality,
latitude=float(row.latitude) if row.latitude is not None else None,
longitude=float(row.longitude) if row.longitude is not None else None,
object_type=row.object_type,
house_type=row.house_type,
offering_type=row.offering_type,
construction_type=row.construction_type,
construction_year=row.construction_year,
energy_label=row.energy_label,
living_area=row.living_area,
plot_area=row.plot_area,
bedrooms=row.bedrooms,
rooms=row.rooms,
has_garden=row.has_garden,
has_balcony=row.has_balcony,
has_solar_panels=row.has_solar_panels,
has_heat_pump=row.has_heat_pump,
has_roof_terrace=row.has_roof_terrace,
is_energy_efficient=row.is_energy_efficient,
is_monument=row.is_monument,
current_price=row.current_price,
status=row.status,
price_per_sqm=float(row.price_per_sqm) if row.price_per_sqm is not None else None,
publication_date=row.publication_date,
elo_rating=round(float(row.elo_rating), 1),
comparison_count=int(row.comparison_count),
wins=int(row.wins),
losses=int(row.losses),
)

View File

View File

@@ -0,0 +1,281 @@
"""Comparison endpoints matchmaking and ELO updates."""
import random
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import text
from sqlalchemy.orm import Session
from app.config import settings
from app.database import get_db
from app.elo import calculate_elo
from app.models import Comparison, EloRating
from app.queries import LISTING_SELECT, row_to_listing
from app.schemas import (
CompareRequest,
CompareResponse,
ComparisonHistoryItem,
MatchupResponse,
StatsResponse,
)
router = APIRouter()
@router.get("/matchup", response_model=MatchupResponse)
def get_matchup(
status: str | None = None,
db: Session = Depends(get_db),
):
"""Return a weighted-random pair of listings for comparison.
Only listings in the stable sample (elo.sample_listings) are considered.
Listings with fewer comparisons are more likely to appear, ensuring
broad coverage across all properties.
"""
query = LISTING_SELECT + f"""
INNER JOIN {settings.ELO_SCHEMA}.sample_listings s
ON l.global_id = s.global_id
"""
params: dict = {}
if status and status != "all":
query += " WHERE l.status = :status"
params["status"] = status
result = db.execute(text(query), params)
listings = [row_to_listing(row) for row in result]
if len(listings) < 2:
raise HTTPException(
status_code=400,
detail="Not enough listings for comparison (need at least 2).",
)
# Gather recent pairs to avoid immediate repeats
recent = db.execute(
text(
f"SELECT listing_a_id, listing_b_id "
f"FROM {settings.ELO_SCHEMA}.comparisons "
f"ORDER BY created_at DESC LIMIT 20"
)
)
recent_pairs = {
frozenset([r.listing_a_id, r.listing_b_id]) for r in recent
}
# Weight by inverse comparison count (prioritise less-compared houses)
weights = [1.0 / (l.comparison_count + 1) ** 1.5 for l in listings]
# Pick first listing
first = random.choices(listings, weights=weights, k=1)[0]
# Pick second listing (exclude first, avoid recent repeats)
remaining = [l for l in listings if l.global_id != first.global_id]
remaining_weights = [1.0 / (l.comparison_count + 1) ** 1.5 for l in remaining]
second = remaining[0] # fallback
for _ in range(20):
candidate = random.choices(remaining, weights=remaining_weights, k=1)[0]
pair = frozenset([first.global_id, candidate.global_id])
if pair not in recent_pairs:
second = candidate
break
else:
second = random.choices(remaining, weights=remaining_weights, k=1)[0]
return MatchupResponse(listing_a=first, listing_b=second)
@router.post("/compare", response_model=CompareResponse)
def submit_comparison(body: CompareRequest, db: Session = Depends(get_db)):
"""Record a comparison result and update ELO ratings."""
winner_id = body.winner_id
loser_id = body.loser_id
if winner_id == loser_id:
raise HTTPException(status_code=400, detail="Winner and loser must differ.")
# Get or create rating records
winner_rating = db.query(EloRating).filter_by(global_id=winner_id).first()
if not winner_rating:
winner_rating = EloRating(
global_id=winner_id, elo_rating=settings.DEFAULT_ELO
)
db.add(winner_rating)
db.flush()
loser_rating = db.query(EloRating).filter_by(global_id=loser_id).first()
if not loser_rating:
loser_rating = EloRating(
global_id=loser_id, elo_rating=settings.DEFAULT_ELO
)
db.add(loser_rating)
db.flush()
elo_w_before = winner_rating.elo_rating
elo_l_before = loser_rating.elo_rating
new_elo_w, new_elo_l = calculate_elo(
elo_w_before, elo_l_before, settings.K_FACTOR
)
# Update ratings
winner_rating.elo_rating = new_elo_w
winner_rating.comparison_count += 1
winner_rating.wins += 1
loser_rating.elo_rating = new_elo_l
loser_rating.comparison_count += 1
loser_rating.losses += 1
# Record comparison
db.add(
Comparison(
listing_a_id=winner_id,
listing_b_id=loser_id,
winner_id=winner_id,
elo_a_before=elo_w_before,
elo_b_before=elo_l_before,
elo_a_after=new_elo_w,
elo_b_after=new_elo_l,
)
)
db.commit()
return CompareResponse(
winner_id=winner_id,
loser_id=loser_id,
elo_change=round(new_elo_w - elo_w_before, 1),
new_winner_elo=round(new_elo_w, 1),
new_loser_elo=round(new_elo_l, 1),
)
@router.get("/history", response_model=list[ComparisonHistoryItem])
def get_history(
limit: int = 50,
db: Session = Depends(get_db),
):
"""Return recent comparisons."""
query = f"""
SELECT
c.*,
a.title AS listing_a_title,
b.title AS listing_b_title,
w.title AS winner_title
FROM {settings.ELO_SCHEMA}.comparisons c
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} a
ON c.listing_a_id = a.global_id
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} b
ON c.listing_b_id = b.global_id
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} w
ON c.winner_id = w.global_id
ORDER BY c.created_at DESC
LIMIT :limit
"""
rows = db.execute(text(query), {"limit": limit})
return [
ComparisonHistoryItem(
id=r.id,
listing_a_title=r.listing_a_title,
listing_b_title=r.listing_b_title,
winner_title=r.winner_title,
listing_a_id=r.listing_a_id,
listing_b_id=r.listing_b_id,
winner_id=r.winner_id,
elo_a_before=round(r.elo_a_before, 1),
elo_b_before=round(r.elo_b_before, 1),
elo_a_after=round(r.elo_a_after, 1),
elo_b_after=round(r.elo_b_after, 1),
created_at=r.created_at,
)
for r in rows
]
@router.get("/stats", response_model=StatsResponse)
def get_stats(db: Session = Depends(get_db)):
"""Return aggregate statistics about comparisons and ratings."""
total_comparisons = db.execute(
text(f"SELECT COUNT(*) FROM {settings.ELO_SCHEMA}.comparisons")
).scalar() or 0
total_rated = db.execute(
text(f"SELECT COUNT(*) FROM {settings.ELO_SCHEMA}.ratings")
).scalar() or 0
total_listings = db.execute(
text(
f"SELECT COUNT(*) FROM {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE}"
)
).scalar() or 0
elo_agg = db.execute(
text(
f"SELECT AVG(elo_rating), MAX(elo_rating), MIN(elo_rating) "
f"FROM {settings.ELO_SCHEMA}.ratings"
)
).first()
avg_elo = round(float(elo_agg[0]), 1) if elo_agg and elo_agg[0] else None
max_elo = round(float(elo_agg[1]), 1) if elo_agg and elo_agg[1] else None
min_elo = round(float(elo_agg[2]), 1) if elo_agg and elo_agg[2] else None
# ELO distribution in buckets of 50
dist_rows = db.execute(
text(
f"SELECT FLOOR(elo_rating / 50) * 50 AS bucket, COUNT(*) AS count "
f"FROM {settings.ELO_SCHEMA}.ratings "
f"GROUP BY bucket ORDER BY bucket"
)
)
elo_distribution = [
{"bucket": f"{int(r.bucket)}-{int(r.bucket) + 49}", "count": r.count}
for r in dist_rows
]
# Recent comparisons
recent_query = f"""
SELECT
c.*,
a.title AS listing_a_title,
b.title AS listing_b_title,
w.title AS winner_title
FROM {settings.ELO_SCHEMA}.comparisons c
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} a
ON c.listing_a_id = a.global_id
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} b
ON c.listing_b_id = b.global_id
LEFT JOIN {settings.LISTINGS_SCHEMA}.{settings.LISTINGS_TABLE} w
ON c.winner_id = w.global_id
ORDER BY c.created_at DESC
LIMIT 10
"""
recent_rows = db.execute(text(recent_query))
recent_comparisons = [
ComparisonHistoryItem(
id=r.id,
listing_a_title=r.listing_a_title,
listing_b_title=r.listing_b_title,
winner_title=r.winner_title,
listing_a_id=r.listing_a_id,
listing_b_id=r.listing_b_id,
winner_id=r.winner_id,
elo_a_before=round(r.elo_a_before, 1),
elo_b_before=round(r.elo_b_before, 1),
elo_a_after=round(r.elo_a_after, 1),
elo_b_after=round(r.elo_b_after, 1),
created_at=r.created_at,
)
for r in recent_rows
]
return StatsResponse(
total_comparisons=total_comparisons,
total_rated_listings=total_rated,
total_listings=total_listings,
avg_elo=avg_elo,
max_elo=max_elo,
min_elo=min_elo,
elo_distribution=elo_distribution,
recent_comparisons=recent_comparisons,
)

View File

@@ -0,0 +1,37 @@
"""Listing endpoints read-only access to Funda data with ELO ratings."""
from fastapi import APIRouter, Depends, HTTPException
from sqlalchemy import text
from sqlalchemy.orm import Session
from app.database import get_db
from app.queries import LISTING_SELECT, row_to_listing
from app.schemas import ListingResponse
router = APIRouter()
@router.get("/listings", response_model=list[ListingResponse])
def get_listings(
status: str | None = None,
db: Session = Depends(get_db),
):
"""Return all listings with their current ELO rating."""
query = LISTING_SELECT
params: dict = {}
if status and status != "all":
query += " WHERE l.status = :status"
params["status"] = status
query += " ORDER BY elo_rating DESC"
result = db.execute(text(query), params)
return [row_to_listing(row) for row in result]
@router.get("/listings/{global_id}", response_model=ListingResponse)
def get_listing(global_id: str, db: Session = Depends(get_db)):
"""Return a single listing by its global_id."""
query = LISTING_SELECT + " WHERE l.global_id = :global_id"
row = db.execute(text(query), {"global_id": global_id}).first()
if not row:
raise HTTPException(status_code=404, detail="Listing not found")
return row_to_listing(row)

View File

@@ -0,0 +1,36 @@
"""Ranking endpoints listings sorted by ELO."""
from fastapi import APIRouter, Depends
from sqlalchemy import text
from sqlalchemy.orm import Session
from app.database import get_db
from app.queries import LISTING_SELECT, row_to_listing
from app.schemas import RankingResponse
router = APIRouter()
@router.get("/rankings", response_model=list[RankingResponse])
def get_rankings(
status: str | None = None,
limit: int = 100,
offset: int = 0,
db: Session = Depends(get_db),
):
"""Return listings ranked by ELO rating (highest first)."""
query = LISTING_SELECT
params: dict = {"limit": limit, "offset": offset}
if status and status != "all":
query += " WHERE l.status = :status"
params["status"] = status
query += " ORDER BY elo_rating DESC, l.current_price DESC LIMIT :limit OFFSET :offset"
result = db.execute(text(query), params)
listings = [row_to_listing(row) for row in result]
return [
RankingResponse(rank=offset + i + 1, listing=listing)
for i, listing in enumerate(listings)
]

110
backend/app/schemas.py Normal file
View File

@@ -0,0 +1,110 @@
"""Pydantic schemas for API request/response models."""
from datetime import datetime
from pydantic import BaseModel
class ListingResponse(BaseModel):
"""Funda listing combined with its ELO rating data."""
global_id: str
tiny_id: str | None = None
url: str | None = None
title: str | None = None
city: str | None = None
postcode: str | None = None
province: str | None = None
neighbourhood: str | None = None
municipality: str | None = None
latitude: float | None = None
longitude: float | None = None
object_type: str | None = None
house_type: str | None = None
offering_type: str | None = None
construction_type: str | None = None
construction_year: str | None = None
energy_label: str | None = None
living_area: int | None = None
plot_area: int | None = None
bedrooms: int | None = None
rooms: int | None = None
has_garden: bool | None = None
has_balcony: bool | None = None
has_solar_panels: bool | None = None
has_heat_pump: bool | None = None
has_roof_terrace: bool | None = None
is_energy_efficient: bool | None = None
is_monument: bool | None = None
current_price: int | None = None
status: str | None = None
price_per_sqm: float | None = None
publication_date: str | None = None
elo_rating: float
comparison_count: int
wins: int
losses: int
class MatchupResponse(BaseModel):
"""A pair of listings to compare."""
listing_a: ListingResponse
listing_b: ListingResponse
class CompareRequest(BaseModel):
"""Submit the result of a pairwise comparison."""
winner_id: str
loser_id: str
class CompareResponse(BaseModel):
"""Result of a comparison submission with ELO changes."""
winner_id: str
loser_id: str
elo_change: float
new_winner_elo: float
new_loser_elo: float
class RankingResponse(BaseModel):
"""A listing with its rank position."""
rank: int
listing: ListingResponse
class ComparisonHistoryItem(BaseModel):
"""A single historical comparison record."""
id: int
listing_a_title: str | None
listing_b_title: str | None
winner_title: str | None
listing_a_id: str
listing_b_id: str
winner_id: str
elo_a_before: float
elo_b_before: float
elo_a_after: float
elo_b_after: float
created_at: datetime
class StatsResponse(BaseModel):
"""Overall statistics about comparisons and ratings."""
total_comparisons: int
total_rated_listings: int
total_listings: int
avg_elo: float | None
max_elo: float | None
min_elo: float | None
elo_distribution: list[dict]
recent_comparisons: list[ComparisonHistoryItem]