update sounds fetch
This commit is contained in:
@@ -24,29 +24,55 @@ def get_page_count(html_content):
|
||||
|
||||
|
||||
def parse_page(html_content):
|
||||
entries = []
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
|
||||
# Extract the name (artist - album) from the h5 tag
|
||||
names = list(map(lambda x: x.get_text(strip=True), soup.find_all("h5")))
|
||||
|
||||
# Remove 'Telefoon', 'E-mail', 'Facebook'
|
||||
names = list(filter(lambda x: " -" in x, names))
|
||||
|
||||
# Extract the numerical id from the a tag
|
||||
ids = list(map(lambda x: x["rel"][0], soup.find_all("a", rel=True)))
|
||||
|
||||
# Extract the price
|
||||
prices = list(
|
||||
map(
|
||||
lambda x: float(x.get_text(strip=True).split()[1]),
|
||||
soup.find_all("span", class_="product-price"),
|
||||
for product in soup.find_all("div", {"class": "search-product"}):
|
||||
item_id = product.find("a", rel=True)["rel"][0]
|
||||
name = product.find("h5").text.strip()
|
||||
artist_title = name.split("-")
|
||||
artist = artist_title[0].strip()
|
||||
title = artist_title[1].strip()
|
||||
price = (
|
||||
product.find("span", class_="product-price")
|
||||
.text.strip()
|
||||
.replace("€", "")
|
||||
.strip()
|
||||
)
|
||||
|
||||
entry = {
|
||||
"id": item_id,
|
||||
"name": name,
|
||||
"artist": artist,
|
||||
"title": title,
|
||||
"price": price,
|
||||
}
|
||||
if detail := product.find("h6", {"class": "hide-for-small"}):
|
||||
entry["detail"] = detail.text
|
||||
if supply := product.find("div", {"class": "product-voorraad"}):
|
||||
entry["supply"] = supply.text
|
||||
|
||||
for info in product.find_all("div", {"class": "product-info"}):
|
||||
info = info.text.split(":")
|
||||
if "Genre" in info[0]:
|
||||
entry["genre"] = info[1].strip()
|
||||
if "Releasedatum" in info[0]:
|
||||
entry["release"] = info[1].strip()
|
||||
entries.append(entry)
|
||||
|
||||
return pd.DataFrame(entries).reindex(
|
||||
columns=[
|
||||
"id",
|
||||
"name",
|
||||
"artist",
|
||||
"title",
|
||||
"price",
|
||||
"supply",
|
||||
"release",
|
||||
"genre",
|
||||
"detail",
|
||||
]
|
||||
)
|
||||
|
||||
df = pd.DataFrame({"id": ids, "name": names, "price": prices})
|
||||
|
||||
return df
|
||||
|
||||
|
||||
def fetch_deals():
|
||||
# Get page count
|
||||
|
||||
Reference in New Issue
Block a user