feat: restructure sql ingestion
This commit is contained in:
@@ -0,0 +1,41 @@
|
||||
CREATE TABLE IF NOT EXISTS {{ schema }}.listing_details (
|
||||
global_id TEXT,
|
||||
tiny_id TEXT,
|
||||
title TEXT,
|
||||
city TEXT,
|
||||
postcode TEXT,
|
||||
province TEXT,
|
||||
neighbourhood TEXT,
|
||||
municipality TEXT,
|
||||
price BIGINT,
|
||||
price_formatted TEXT,
|
||||
status TEXT,
|
||||
offering_type TEXT,
|
||||
object_type TEXT,
|
||||
house_type TEXT,
|
||||
construction_type TEXT,
|
||||
construction_year TEXT,
|
||||
energy_label TEXT,
|
||||
living_area INT,
|
||||
plot_area INT,
|
||||
bedrooms INT,
|
||||
rooms INT,
|
||||
description TEXT,
|
||||
publication_date TEXT,
|
||||
latitude DOUBLE PRECISION,
|
||||
longitude DOUBLE PRECISION,
|
||||
has_garden BOOLEAN,
|
||||
has_balcony BOOLEAN,
|
||||
has_solar_panels BOOLEAN,
|
||||
has_heat_pump BOOLEAN,
|
||||
has_roof_terrace BOOLEAN,
|
||||
is_energy_efficient BOOLEAN,
|
||||
is_monument BOOLEAN,
|
||||
url TEXT,
|
||||
photo_count INT,
|
||||
views INT,
|
||||
saves INT,
|
||||
raw_json JSONB,
|
||||
ingested_at TIMESTAMPTZ DEFAULT now(),
|
||||
UNIQUE (global_id, status)
|
||||
);
|
||||
@@ -0,0 +1,11 @@
|
||||
CREATE TABLE IF NOT EXISTS {{ schema }}.price_history (
|
||||
global_id TEXT,
|
||||
price BIGINT,
|
||||
human_price TEXT,
|
||||
date TEXT,
|
||||
timestamp TEXT,
|
||||
source TEXT,
|
||||
status TEXT,
|
||||
ingested_at TIMESTAMPTZ DEFAULT now(),
|
||||
UNIQUE (global_id, date, source, status)
|
||||
);
|
||||
@@ -0,0 +1,23 @@
|
||||
CREATE TABLE IF NOT EXISTS {{ schema }}.search_results (
|
||||
global_id TEXT,
|
||||
title TEXT,
|
||||
city TEXT,
|
||||
postcode TEXT,
|
||||
province TEXT,
|
||||
neighbourhood TEXT,
|
||||
price BIGINT,
|
||||
living_area INT,
|
||||
plot_area INT,
|
||||
bedrooms INT,
|
||||
rooms INT,
|
||||
energy_label TEXT,
|
||||
object_type TEXT,
|
||||
offering_type TEXT,
|
||||
construction_type TEXT,
|
||||
publish_date TEXT,
|
||||
broker_id TEXT,
|
||||
broker_name TEXT,
|
||||
raw_json JSONB,
|
||||
ingested_at TIMESTAMPTZ DEFAULT now(),
|
||||
UNIQUE (global_id)
|
||||
);
|
||||
@@ -0,0 +1,18 @@
|
||||
-- Deduplicate and add UNIQUE constraint to listing_details if it doesn't exist yet.
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conrelid = '{{ schema }}.listing_details'::regclass
|
||||
AND contype = 'u'
|
||||
) THEN
|
||||
DELETE FROM {{ schema }}.listing_details a
|
||||
USING {{ schema }}.listing_details b
|
||||
WHERE a.global_id = b.global_id
|
||||
AND a.status IS NOT DISTINCT FROM b.status
|
||||
AND a.ingested_at < b.ingested_at;
|
||||
|
||||
ALTER TABLE {{ schema }}.listing_details
|
||||
ADD UNIQUE (global_id, status);
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -0,0 +1,20 @@
|
||||
-- Deduplicate and add UNIQUE constraint to price_history if it doesn't exist yet.
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conrelid = '{{ schema }}.price_history'::regclass
|
||||
AND contype = 'u'
|
||||
) THEN
|
||||
DELETE FROM {{ schema }}.price_history a
|
||||
USING {{ schema }}.price_history b
|
||||
WHERE a.global_id = b.global_id
|
||||
AND a.date IS NOT DISTINCT FROM b.date
|
||||
AND a.source IS NOT DISTINCT FROM b.source
|
||||
AND a.status IS NOT DISTINCT FROM b.status
|
||||
AND a.ingested_at < b.ingested_at;
|
||||
|
||||
ALTER TABLE {{ schema }}.price_history
|
||||
ADD UNIQUE (global_id, date, source, status);
|
||||
END IF;
|
||||
END $$;
|
||||
@@ -0,0 +1,17 @@
|
||||
-- Deduplicate and add UNIQUE constraint to search_results if it doesn't exist yet.
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM pg_constraint
|
||||
WHERE conrelid = '{{ schema }}.search_results'::regclass
|
||||
AND contype = 'u'
|
||||
) THEN
|
||||
DELETE FROM {{ schema }}.search_results a
|
||||
USING {{ schema }}.search_results b
|
||||
WHERE a.global_id = b.global_id
|
||||
AND a.ingested_at < b.ingested_at;
|
||||
|
||||
ALTER TABLE {{ schema }}.search_results
|
||||
ADD UNIQUE (global_id);
|
||||
END IF;
|
||||
END $$;
|
||||
4
data_platform/assets/ingestion/funda/sql/dml/.sqlfluff
Normal file
4
data_platform/assets/ingestion/funda/sql/dml/.sqlfluff
Normal file
@@ -0,0 +1,4 @@
|
||||
[sqlfluff]
|
||||
# SQLAlchemy :named_param bind parameters are misinterpreted by sqlfluff
|
||||
# as cast operators, causing false LT01 spacing violations. Disable here.
|
||||
exclude_rules = LT01
|
||||
@@ -0,0 +1,59 @@
|
||||
INSERT INTO {{ schema }}.listing_details (
|
||||
global_id, tiny_id, title, city, postcode, province,
|
||||
neighbourhood, municipality, price, price_formatted,
|
||||
status, offering_type, object_type, house_type,
|
||||
construction_type, construction_year, energy_label,
|
||||
living_area, plot_area, bedrooms, rooms, description,
|
||||
publication_date, latitude, longitude,
|
||||
has_garden, has_balcony, has_solar_panels, has_heat_pump,
|
||||
has_roof_terrace, is_energy_efficient, is_monument,
|
||||
url, photo_count, views, saves, raw_json
|
||||
)
|
||||
VALUES (
|
||||
:global_id, :tiny_id, :title, :city, :postcode, :province,
|
||||
:neighbourhood, :municipality, :price, :price_formatted,
|
||||
:status, :offering_type, :object_type, :house_type,
|
||||
:construction_type, :construction_year, :energy_label,
|
||||
:living_area, :plot_area, :bedrooms, :rooms, :description,
|
||||
:publication_date, :latitude, :longitude,
|
||||
:has_garden, :has_balcony, :has_solar_panels, :has_heat_pump,
|
||||
:has_roof_terrace, :is_energy_efficient, :is_monument,
|
||||
:url, :photo_count, :views, :saves, :raw_json
|
||||
)
|
||||
ON CONFLICT (global_id, status) DO UPDATE SET
|
||||
tiny_id = excluded.tiny_id,
|
||||
title = excluded.title,
|
||||
city = excluded.city,
|
||||
postcode = excluded.postcode,
|
||||
province = excluded.province,
|
||||
neighbourhood = excluded.neighbourhood,
|
||||
municipality = excluded.municipality,
|
||||
price = excluded.price,
|
||||
price_formatted = excluded.price_formatted,
|
||||
offering_type = excluded.offering_type,
|
||||
object_type = excluded.object_type,
|
||||
house_type = excluded.house_type,
|
||||
construction_type = excluded.construction_type,
|
||||
construction_year = excluded.construction_year,
|
||||
energy_label = excluded.energy_label,
|
||||
living_area = excluded.living_area,
|
||||
plot_area = excluded.plot_area,
|
||||
bedrooms = excluded.bedrooms,
|
||||
rooms = excluded.rooms,
|
||||
description = excluded.description,
|
||||
publication_date = excluded.publication_date,
|
||||
latitude = excluded.latitude,
|
||||
longitude = excluded.longitude,
|
||||
has_garden = excluded.has_garden,
|
||||
has_balcony = excluded.has_balcony,
|
||||
has_solar_panels = excluded.has_solar_panels,
|
||||
has_heat_pump = excluded.has_heat_pump,
|
||||
has_roof_terrace = excluded.has_roof_terrace,
|
||||
is_energy_efficient = excluded.is_energy_efficient,
|
||||
is_monument = excluded.is_monument,
|
||||
url = excluded.url,
|
||||
photo_count = excluded.photo_count,
|
||||
views = excluded.views,
|
||||
saves = excluded.saves,
|
||||
raw_json = excluded.raw_json,
|
||||
ingested_at = now()
|
||||
@@ -0,0 +1,11 @@
|
||||
INSERT INTO {{ schema }}.price_history (
|
||||
global_id, price, human_price, date, timestamp, source, status
|
||||
)
|
||||
VALUES (
|
||||
:global_id, :price, :human_price, :date, :timestamp, :source, :status
|
||||
)
|
||||
ON CONFLICT (global_id, date, source, status) DO UPDATE SET
|
||||
price = excluded.price,
|
||||
human_price = excluded.human_price,
|
||||
timestamp = excluded.timestamp,
|
||||
ingested_at = now()
|
||||
@@ -0,0 +1,32 @@
|
||||
INSERT INTO {{ schema }}.search_results (
|
||||
global_id, title, city, postcode, province, neighbourhood,
|
||||
price, living_area, plot_area, bedrooms, rooms, energy_label,
|
||||
object_type, offering_type, construction_type, publish_date,
|
||||
broker_id, broker_name, raw_json
|
||||
)
|
||||
VALUES (
|
||||
:global_id, :title, :city, :postcode, :province, :neighbourhood,
|
||||
:price, :living_area, :plot_area, :bedrooms, :rooms, :energy_label,
|
||||
:object_type, :offering_type, :construction_type, :publish_date,
|
||||
:broker_id, :broker_name, :raw_json
|
||||
)
|
||||
ON CONFLICT (global_id) DO UPDATE SET
|
||||
title = excluded.title,
|
||||
city = excluded.city,
|
||||
postcode = excluded.postcode,
|
||||
province = excluded.province,
|
||||
neighbourhood = excluded.neighbourhood,
|
||||
price = excluded.price,
|
||||
living_area = excluded.living_area,
|
||||
plot_area = excluded.plot_area,
|
||||
bedrooms = excluded.bedrooms,
|
||||
rooms = excluded.rooms,
|
||||
energy_label = excluded.energy_label,
|
||||
object_type = excluded.object_type,
|
||||
offering_type = excluded.offering_type,
|
||||
construction_type = excluded.construction_type,
|
||||
publish_date = excluded.publish_date,
|
||||
broker_id = excluded.broker_id,
|
||||
broker_name = excluded.broker_name,
|
||||
raw_json = excluded.raw_json,
|
||||
ingested_at = now()
|
||||
Reference in New Issue
Block a user