Spaces:
Sleeping
Sleeping
import playwright | |
import playwright.sync_api | |
from playwright.sync_api import sync_playwright | |
import json | |
from tqdm import tqdm | |
with open ("sections.json", "r") as f: | |
data = json.load(f) | |
with sync_playwright() as p: | |
browser = p.chromium.launch(headless=True) | |
page = browser.new_page() | |
def fetch_description(url, selector): | |
page.goto(url) | |
page.wait_for_timeout(3000) | |
description = page.query_selector(selector).text_content() | |
return description | |
selector_description = "div.s-rte" | |
def check_if_url(url,selector): | |
if url is not None: | |
return fetch_description(url, selector) | |
else: | |
pass | |
for item in tqdm(data): | |
for i in range(len(item["activities"])): | |
description = check_if_url((item["activities"][i]["url"]), selector_description) | |
item["activities"][i]["description"] = description | |
print(item["activities"][i]["description"]) | |
with open ("sections_with_details.json", "w") as f: | |
json.dump(data, f) |