update sounds fetch

This commit is contained in:
2024-10-14 11:24:03 +02:00
parent 0196f8bd27
commit a7d8dfdbd3

View File

@@ -24,29 +24,55 @@ def get_page_count(html_content):
def parse_page(html_content):
entries = []
soup = BeautifulSoup(html_content, "html.parser")
# Extract the name (artist - album) from the h5 tag
names = list(map(lambda x: x.get_text(strip=True), soup.find_all("h5")))
# Remove 'Telefoon', 'E-mail', 'Facebook'
names = list(filter(lambda x: " -" in x, names))
# Extract the numerical id from the a tag
ids = list(map(lambda x: x["rel"][0], soup.find_all("a", rel=True)))
# Extract the price
prices = list(
map(
lambda x: float(x.get_text(strip=True).split()[1]),
soup.find_all("span", class_="product-price"),
for product in soup.find_all("div", {"class": "search-product"}):
item_id = product.find("a", rel=True)["rel"][0]
name = product.find("h5").text.strip()
artist_title = name.split("-")
artist = artist_title[0].strip()
title = artist_title[1].strip()
price = (
product.find("span", class_="product-price")
.text.strip()
.replace("", "")
.strip()
)
entry = {
"id": item_id,
"name": name,
"artist": artist,
"title": title,
"price": price,
}
if detail := product.find("h6", {"class": "hide-for-small"}):
entry["detail"] = detail.text
if supply := product.find("div", {"class": "product-voorraad"}):
entry["supply"] = supply.text
for info in product.find_all("div", {"class": "product-info"}):
info = info.text.split(":")
if "Genre" in info[0]:
entry["genre"] = info[1].strip()
if "Releasedatum" in info[0]:
entry["release"] = info[1].strip()
entries.append(entry)
return pd.DataFrame(entries).reindex(
columns=[
"id",
"name",
"artist",
"title",
"price",
"supply",
"release",
"genre",
"detail",
]
)
df = pd.DataFrame({"id": ids, "name": names, "price": prices})
return df
def fetch_deals():
# Get page count