refactor plato logic
This commit is contained in:
52
src/app/vinyl/plato/fetch.py
Executable file
52
src/app/vinyl/plato/fetch.py
Executable file
@@ -0,0 +1,52 @@
|
||||
#!/root/.pyenv/versions/dev/bin/python
|
||||
|
||||
import re
|
||||
from datetime import datetime
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from .scrape import get_soup, scrape_page, scrape_page_links
|
||||
|
||||
|
||||
def scrape_plato(get=None):
|
||||
ic()
|
||||
url = "https://www.platomania.nl/vinyl-aanbiedingen?page=1"
|
||||
|
||||
ic(url)
|
||||
soup = get_soup(url=url, get=get)
|
||||
articles_info = scrape_page(soup)
|
||||
ic(len(articles_info))
|
||||
|
||||
links = sorted(set(scrape_page_links(soup)), key=lambda x: int(x.split("=")[-1]))
|
||||
for link in links:
|
||||
ic(link)
|
||||
soup = get_soup(url=link, get=get)
|
||||
tmp = scrape_page(soup)
|
||||
ic(len(tmp))
|
||||
articles_info.extend(tmp)
|
||||
|
||||
def clean(name):
|
||||
tmp = " ".join(reversed(name.split(", ")))
|
||||
tmp = tmp.lower()
|
||||
tmp = re.sub(r"\s+\([^)]*\)", "", tmp)
|
||||
return tmp
|
||||
|
||||
articles_df = pd.DataFrame(articles_info).reindex(
|
||||
columns=[
|
||||
"artist",
|
||||
"title",
|
||||
"url",
|
||||
"label",
|
||||
"release_date",
|
||||
"origin",
|
||||
"item_number",
|
||||
"ean",
|
||||
"delivery_info",
|
||||
"price",
|
||||
]
|
||||
)
|
||||
articles_df["_artist"] = articles_df["artist"].map(clean)
|
||||
articles_df["_price"] = articles_df["price"].map(lambda x: float(x.split(" ")[-1]))
|
||||
articles_df["_date"] = datetime.now()
|
||||
|
||||
return articles_df
|
||||
Reference in New Issue
Block a user