File size: 1,198 Bytes
da2da03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import requests
from bs4 import BeautifulSoup
from html_to_markdown import convert_to_markdown

# --- Static Helper Functions (Web Scraping) ---

@staticmethod
def get_html(url: str) -> str:
    """Fetches HTML content from a URL."""
    try:
        response = requests.get(url)
        response.raise_for_status()  # Raises an HTTPError for bad responses (4xx or 5xx)
        return response.text
    except requests.exceptions.RequestException as e:
        print(f"Error fetching {url}: {e}")
        return ""

@staticmethod
def find_wiki_links(html_content: str) -> list[str]:
    """Parses HTML to find all boss links within the 'mw-pages' div."""
    soup = BeautifulSoup(html_content, 'html.parser')
    mw_pages_div = soup.find('div', id='mw-pages')
    if not mw_pages_div:
        return []
    return [a['href'] for a in mw_pages_div.find_all('a', href=True)]

@staticmethod
def get_markdown_from_html(html: str) -> str:
    if not html:
        return ""

    soup = BeautifulSoup(html, 'html.parser')
    return convert_to_markdown(soup)

@staticmethod
def get_markdown_from_url(url: str) -> str:
    return get_markdown_from_html(get_html(url))