From a7d8dfdbd33869523b5f463af39ae6922f906ac6 Mon Sep 17 00:00:00 2001
From: Rik Veenboer <rik.veenboer@gmail.com>
Date: Mon, 14 Oct 2024 11:24:03 +0200
Subject: [PATCH] update sounds fetch

---
 src/app/vinyl/sounds/fetch.py | 64 ++++++++++++++++++++++++-----------
 1 file changed, 45 insertions(+), 19 deletions(-)

diff --git a/src/app/vinyl/sounds/fetch.py b/src/app/vinyl/sounds/fetch.py
index 7be3185..219d1cb 100644
--- a/src/app/vinyl/sounds/fetch.py
+++ b/src/app/vinyl/sounds/fetch.py
@@ -24,29 +24,55 @@ def get_page_count(html_content):
 
 
 def parse_page(html_content):
+    entries = []
     soup = BeautifulSoup(html_content, "html.parser")
-
-    # Extract the name (artist - album) from the h5 tag
-    names = list(map(lambda x: x.get_text(strip=True), soup.find_all("h5")))
-
-    # Remove 'Telefoon', 'E-mail', 'Facebook'
-    names = list(filter(lambda x: " -" in x, names))
-
-    # Extract the numerical id from the a tag
-    ids = list(map(lambda x: x["rel"][0], soup.find_all("a", rel=True)))
-
-    # Extract the price
-    prices = list(
-        map(
-            lambda x: float(x.get_text(strip=True).split()[1]),
-            soup.find_all("span", class_="product-price"),
+    for product in soup.find_all("div", {"class": "search-product"}):
+        item_id = product.find("a", rel=True)["rel"][0]
+        name = product.find("h5").text.strip()
+        artist_title = name.split("-")
+        artist = artist_title[0].strip()
+        title = artist_title[1].strip()
+        price = (
+            product.find("span", class_="product-price")
+            .text.strip()
+            .replace("€", "")
+            .strip()
         )
+
+        entry = {
+            "id": item_id,
+            "name": name,
+            "artist": artist,
+            "title": title,
+            "price": price,
+        }
+        if detail := product.find("h6", {"class": "hide-for-small"}):
+            entry["detail"] = detail.text
+        if supply := product.find("div", {"class": "product-voorraad"}):
+            entry["supply"] = supply.text
+
+        for info in product.find_all("div", {"class": "product-info"}):
+            info = info.text.split(":")
+            if "Genre" in info[0]:
+                entry["genre"] = info[1].strip()
+            if "Releasedatum" in info[0]:
+                entry["release"] = info[1].strip()
+            entries.append(entry)
+
+    return pd.DataFrame(entries).reindex(
+        columns=[
+            "id",
+            "name",
+            "artist",
+            "title",
+            "price",
+            "supply",
+            "release",
+            "genre",
+            "detail",
+        ]
     )
 
-    df = pd.DataFrame({"id": ids, "name": names, "price": prices})
-
-    return df
-
 
 def fetch_deals():
     # Get page count