Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,198 Bytes
da2da03 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
import requests
from bs4 import BeautifulSoup
from html_to_markdown import convert_to_markdown
# --- Static Helper Functions (Web Scraping) ---
@staticmethod
def get_html(url: str) -> str:
"""Fetches HTML content from a URL."""
try:
response = requests.get(url)
response.raise_for_status() # Raises an HTTPError for bad responses (4xx or 5xx)
return response.text
except requests.exceptions.RequestException as e:
print(f"Error fetching {url}: {e}")
return ""
@staticmethod
def find_wiki_links(html_content: str) -> list[str]:
"""Parses HTML to find all boss links within the 'mw-pages' div."""
soup = BeautifulSoup(html_content, 'html.parser')
mw_pages_div = soup.find('div', id='mw-pages')
if not mw_pages_div:
return []
return [a['href'] for a in mw_pages_div.find_all('a', href=True)]
@staticmethod
def get_markdown_from_html(html: str) -> str:
if not html:
return ""
soup = BeautifulSoup(html, 'html.parser')
return convert_to_markdown(soup)
@staticmethod
def get_markdown_from_url(url: str) -> str:
return get_markdown_from_html(get_html(url))
|