From 04b91277c2c2f00a92672a0a40d897627f758a2d Mon Sep 17 00:00:00 2001 From: Gabe Farrell Date: Sun, 6 Apr 2025 06:24:11 -0400 Subject: [PATCH] I HATE MONOGATARI METADATA --- .gitignore | 1 + monogatari_metadata_fetcher.py | 241 +++++++++++++++++++++++++++++++++ replace_nfo.py | 1 + 3 files changed, 243 insertions(+) create mode 100644 monogatari_metadata_fetcher.py diff --git a/.gitignore b/.gitignore index 9be002e..c961b0e 100755 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ my-venv +monogatari_nfo \ No newline at end of file diff --git a/monogatari_metadata_fetcher.py b/monogatari_metadata_fetcher.py new file mode 100644 index 0000000..3a4a7ad --- /dev/null +++ b/monogatari_metadata_fetcher.py @@ -0,0 +1,241 @@ +import os +import requests +from bs4 import BeautifulSoup +import xml.etree.ElementTree as ET +from datetime import datetime +import time + +# List of Monogatari seasons with their AniDB IDs +SEASONS = [ + {"name": "Bakemonogatari", "id": 6327}, + {"name": "Nisemonogatari", "id": 8658}, + {"name": "Nekomonogatari Kuro", "id": 9453}, + {"name": "Monogatari: Second Season", "id": 9183}, + {"name": "Hanamonogatari", "id": 10046}, + {"name": "Tsukimonogatari", "id": 10891}, + {"name": "Owarimonogatari", "id": 11350}, + {"name": "Koyomimonogatari", "id": 11827}, + {"name": "Kizumonogatari", "id": 8357}, + {"name": "Zoku Owarimonogatari", "id": 13691} +] + +HEADERS = { + "User-Agent": "Mozilla/5.0 mf2389hf890328-fh37h7fd32h7" +} + +def fetch_episode_details(episode_id): + """Fetch detailed information for a given episode.""" + episode_url = f"https://anidb.net/episode/{episode_id}" + print(f"[FETCH] {episode_url}") + + try: + res = requests.get(episode_url, headers=HEADERS) + res.raise_for_status() + except Exception as e: + print(f"[ERROR] Failed to fetch episode {episode_id}: {e}") + return None + + soup = BeautifulSoup(res.text, "html.parser") + + # Find the table with episode details inside the div with class 'g_definitionlist' + table = soup.find("div", class_="g_definitionlist").find("table") + if not table: + print(f"[WARN] Table with episode details not found for episode {episode_id}.") + return None + + episode_data = {} + + # Extract the episode details from the table + for row in table.find("tbody").find_all("tr"): + try: + if "romaji" in row.get("class", []): # Episode title (romaji) + episode_data["title_romaji"] = row.find("td").find("span").text.strip() + + elif "official" in row.get("class", []): # Original title + episode_data["title_en"] = row.find("td").find("label").text.strip() + + elif "date" in row.get("class", []): # Air date + air_date = row.find("td").find("span").text.strip() + episode_data["air_date"] = datetime.strptime(air_date, "%d.%m.%Y").strftime("%Y-%m-%d") + + elif "rating" in row.get("class", []): # Rating + episode_data["rating"] = row.find("td").find("span", class_="value").text.strip() + + except Exception as e: + print(f"[SKIP] Error parsing row: {e}") + continue + + # Get the AniDB episode ID for this episode + episode_data["anidbid"] = episode_id + + # Fetch the plot from the div with class "summary" + summary_div = soup.find("div", class_="summary") + if summary_div: + plot = "" + # Extract the text before the first two
tags + br_tags = summary_div.find_all("br") + if len(br_tags) >= 2: + plot = "".join(str(summary_div.contents[i]) for i in range(summary_div.contents.index(br_tags[0]))) + else: + plot = summary_div.text.strip() + episode_data["plot"] = plot.strip() + else: + episode_data["plot"] = "No plot available" + + return episode_data + +def fetch_episodes(season_id): + """Fetch episodes for a specific season based on its AniDB ID and hardcoded table item ranges.""" + season_ranges = { + 6327: range(1, 16), # Bakemonogatari - first 15 items + 8658: range(1, 12), # Nisemonogatari - first 11 items + 9453: range(2, 6), # Nekomonogatari Kuro - items 2-5 + 9183: [i for i in range(1, 27) if i not in [6, 11, 16]], # Monogatari: Second Season - items 1-26, excluding 6, 11, and 16 + 10046: range(2, 7), # Hanamonogatari - items 2-6 + 10891: range(2, 6), # Tsukimonogatari - items 2-5 + 11350: range(2, 13), # Owarimonogatari - items 2-12, starting at episode 2 + 11827: range(1, 13), # Koyomimonogatari - first 12 items + 8357: range(1, 4), # Kizumonogatari - first 3 items + 13691: range(2, 8), # Zoku Owarimonogatari - items 2-7 + } + + base_url = f"https://anidb.net/anime/{season_id}" + print(f"[FETCH] {base_url}") + try: + res = requests.get(base_url, headers=HEADERS) + res.raise_for_status() + except Exception as e: + print(f"[ERROR] Failed to fetch page: {e}") + return [] + + soup = BeautifulSoup(res.text, "html.parser") + table = soup.find("table", id="eplist", class_="eplist") + if not table: + print("[WARN] Episode table not found.") + return [] + + tbody = table.find("tbody") + if not tbody: + print("[WARN] Table body not found.") + return [] + + episodes = [] + rows = tbody.find_all("tr") + + # Get the specific range of rows based on the season_id + season_range = season_ranges.get(season_id) + if not season_range: + print(f"[ERROR] No episode range found for season ID {season_id}.") + return [] + + episode_number = 1 # Start episode count from 1 + for idx, row in enumerate(rows): + # Only process rows within the specified range for the season + if idx + 1 not in season_range: + continue + + try: + ep_id_cell = row.find("td", class_="id eid") + title_cell = row.find("td", class_="title name episode") + if not ep_id_cell or not title_cell: + continue + + # Get episode ID from the href of the tag + episode_link = ep_id_cell.find("a")["href"] + episode_id = episode_link.split("/")[-1] # Get the episode ID + + episode_details = fetch_episode_details(episode_id) + time.sleep(0.5) + if not episode_details: + continue + + # For Owarimonogatari, start counting from episode 2 + if season_id == 11350 and episode_number == 1: + episode_number += 1 # Skip episode 1 + + episodes.append({ + "number": episode_number, + "title_en": episode_details.get("title_en", "Unknown Title"), + "title_romaji": episode_details.get("title_romaji", "Unknown Romaji"), + "plot": episode_details.get("plot", "No plot available"), + "air_date": episode_details.get("air_date", "Unknown Date"), + "rating": episode_details.get("rating", "Unknown Rating"), + "year": episode_details["air_date"][:4], # Get the year from the air date (YYYY) + "anidbid": episode_details["anidbid"] # Include anidbid + }) + + # Increment episode number after each episode + episode_number += 1 + + except Exception as e: + print(f"[SKIP] Row parse error: {e}") + continue + + return episodes + + +def pretty_print_xml(element): + """Pretty print XML with newlines and indentation.""" + tree = ET.ElementTree(element) + from io import BytesIO + import xml.dom.minidom + + rough_string = BytesIO() + tree.write(rough_string, encoding="utf-8", xml_declaration=True) + rough_string.seek(0) + dom = xml.dom.minidom.parse(rough_string) + return dom.toprettyxml(indent=" ") + +def write_nfo(episode, season_name, season_number): + """Writes an NFO file for a given episode.""" + # Check if anidbid is missing + if "anidbid" not in episode: + print(f"[WARN] Missing anidbid for episode {episode['number']}. Skipping this episode.") + return + + # Use season_number to create the folder + season_folder = f"Season {season_number:02}" # Format as 'Season 01', 'Season 02', etc. + season_dir = os.path.join("monogatari_nfo", season_folder) + os.makedirs(season_dir, exist_ok=True) + + file_name = f"S{season_number:02}E{episode['number']:02}.nfo" + file_path = os.path.join(season_dir, file_name) + + root = ET.Element("episodedetails") + ET.SubElement(root, "plot").text = episode["plot"] + ET.SubElement(root, "originaltitle").text = episode["title_en"] + ET.SubElement(root, "title").text = episode["title_romaji"] + ET.SubElement(root, "year").text = episode["year"] + ET.SubElement(root, "aired").text = episode["air_date"] + ET.SubElement(root, "rating").text = episode["rating"] + ET.SubElement(root, "anidbid").text = episode["anidbid"] + ET.SubElement(root, "episode").text = str(episode["number"]) + ET.SubElement(root, "season").text = str(season_number) + + pretty_xml = pretty_print_xml(root) + + with open(file_path, "w", encoding="utf-8") as f: + f.write(pretty_xml) + + print(f"[WRITE] {file_path}") + + +def main(): + for season_number, season in enumerate(SEASONS, start=1): + season_name = season["name"] + season_id = season["id"] + print(f"\n[INFO] Scraping episodes for {season_name} (ID: {season_id})...") + + episodes = fetch_episodes(season_id) + if not episodes: + print(f"[FAIL] No episodes found for {season_name}.") + continue + + # Write .nfo files for each episode in the current season + for episode in episodes: + write_nfo(episode, season_name, season_number) + + print("\n✅ Done generating .nfo files for all Monogatari seasons.") + +if __name__ == "__main__": + main() diff --git a/replace_nfo.py b/replace_nfo.py index 4ad0c32..20f8fd8 100755 --- a/replace_nfo.py +++ b/replace_nfo.py @@ -41,6 +41,7 @@ def process_files(source_dir, target_dir): continue for tgt_file in matching_targets: + print(f"Updating file: {tgt_file}") target_tree = parse_xml(tgt_file) if target_tree is None: continue