update sounds fetch
This commit is contained in:
@@ -24,28 +24,54 @@ def get_page_count(html_content):
|
|||||||
|
|
||||||
|
|
||||||
def parse_page(html_content):
|
def parse_page(html_content):
|
||||||
|
entries = []
|
||||||
soup = BeautifulSoup(html_content, "html.parser")
|
soup = BeautifulSoup(html_content, "html.parser")
|
||||||
|
for product in soup.find_all("div", {"class": "search-product"}):
|
||||||
# Extract the name (artist - album) from the h5 tag
|
item_id = product.find("a", rel=True)["rel"][0]
|
||||||
names = list(map(lambda x: x.get_text(strip=True), soup.find_all("h5")))
|
name = product.find("h5").text.strip()
|
||||||
|
artist_title = name.split("-")
|
||||||
# Remove 'Telefoon', 'E-mail', 'Facebook'
|
artist = artist_title[0].strip()
|
||||||
names = list(filter(lambda x: " -" in x, names))
|
title = artist_title[1].strip()
|
||||||
|
price = (
|
||||||
# Extract the numerical id from the a tag
|
product.find("span", class_="product-price")
|
||||||
ids = list(map(lambda x: x["rel"][0], soup.find_all("a", rel=True)))
|
.text.strip()
|
||||||
|
.replace("€", "")
|
||||||
# Extract the price
|
.strip()
|
||||||
prices = list(
|
|
||||||
map(
|
|
||||||
lambda x: float(x.get_text(strip=True).split()[1]),
|
|
||||||
soup.find_all("span", class_="product-price"),
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
df = pd.DataFrame({"id": ids, "name": names, "price": prices})
|
entry = {
|
||||||
|
"id": item_id,
|
||||||
|
"name": name,
|
||||||
|
"artist": artist,
|
||||||
|
"title": title,
|
||||||
|
"price": price,
|
||||||
|
}
|
||||||
|
if detail := product.find("h6", {"class": "hide-for-small"}):
|
||||||
|
entry["detail"] = detail.text
|
||||||
|
if supply := product.find("div", {"class": "product-voorraad"}):
|
||||||
|
entry["supply"] = supply.text
|
||||||
|
|
||||||
return df
|
for info in product.find_all("div", {"class": "product-info"}):
|
||||||
|
info = info.text.split(":")
|
||||||
|
if "Genre" in info[0]:
|
||||||
|
entry["genre"] = info[1].strip()
|
||||||
|
if "Releasedatum" in info[0]:
|
||||||
|
entry["release"] = info[1].strip()
|
||||||
|
entries.append(entry)
|
||||||
|
|
||||||
|
return pd.DataFrame(entries).reindex(
|
||||||
|
columns=[
|
||||||
|
"id",
|
||||||
|
"name",
|
||||||
|
"artist",
|
||||||
|
"title",
|
||||||
|
"price",
|
||||||
|
"supply",
|
||||||
|
"release",
|
||||||
|
"genre",
|
||||||
|
"detail",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def fetch_deals():
|
def fetch_deals():
|
||||||
|
|||||||
Reference in New Issue
Block a user