parent
c5aadb59fd
commit
04b91277c2
@ -1 +1,2 @@
|
||||
my-venv
|
||||
monogatari_nfo
|
||||
@ -0,0 +1,241 @@
|
||||
import os
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import xml.etree.ElementTree as ET
|
||||
from datetime import datetime
|
||||
import time
|
||||
|
||||
# List of Monogatari seasons with their AniDB IDs
|
||||
SEASONS = [
|
||||
{"name": "Bakemonogatari", "id": 6327},
|
||||
{"name": "Nisemonogatari", "id": 8658},
|
||||
{"name": "Nekomonogatari Kuro", "id": 9453},
|
||||
{"name": "Monogatari: Second Season", "id": 9183},
|
||||
{"name": "Hanamonogatari", "id": 10046},
|
||||
{"name": "Tsukimonogatari", "id": 10891},
|
||||
{"name": "Owarimonogatari", "id": 11350},
|
||||
{"name": "Koyomimonogatari", "id": 11827},
|
||||
{"name": "Kizumonogatari", "id": 8357},
|
||||
{"name": "Zoku Owarimonogatari", "id": 13691}
|
||||
]
|
||||
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 mf2389hf890328-fh37h7fd32h7"
|
||||
}
|
||||
|
||||
def fetch_episode_details(episode_id):
|
||||
"""Fetch detailed information for a given episode."""
|
||||
episode_url = f"https://anidb.net/episode/{episode_id}"
|
||||
print(f"[FETCH] {episode_url}")
|
||||
|
||||
try:
|
||||
res = requests.get(episode_url, headers=HEADERS)
|
||||
res.raise_for_status()
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Failed to fetch episode {episode_id}: {e}")
|
||||
return None
|
||||
|
||||
soup = BeautifulSoup(res.text, "html.parser")
|
||||
|
||||
# Find the table with episode details inside the div with class 'g_definitionlist'
|
||||
table = soup.find("div", class_="g_definitionlist").find("table")
|
||||
if not table:
|
||||
print(f"[WARN] Table with episode details not found for episode {episode_id}.")
|
||||
return None
|
||||
|
||||
episode_data = {}
|
||||
|
||||
# Extract the episode details from the table
|
||||
for row in table.find("tbody").find_all("tr"):
|
||||
try:
|
||||
if "romaji" in row.get("class", []): # Episode title (romaji)
|
||||
episode_data["title_romaji"] = row.find("td").find("span").text.strip()
|
||||
|
||||
elif "official" in row.get("class", []): # Original title
|
||||
episode_data["title_en"] = row.find("td").find("label").text.strip()
|
||||
|
||||
elif "date" in row.get("class", []): # Air date
|
||||
air_date = row.find("td").find("span").text.strip()
|
||||
episode_data["air_date"] = datetime.strptime(air_date, "%d.%m.%Y").strftime("%Y-%m-%d")
|
||||
|
||||
elif "rating" in row.get("class", []): # Rating
|
||||
episode_data["rating"] = row.find("td").find("span", class_="value").text.strip()
|
||||
|
||||
except Exception as e:
|
||||
print(f"[SKIP] Error parsing row: {e}")
|
||||
continue
|
||||
|
||||
# Get the AniDB episode ID for this episode
|
||||
episode_data["anidbid"] = episode_id
|
||||
|
||||
# Fetch the plot from the div with class "summary"
|
||||
summary_div = soup.find("div", class_="summary")
|
||||
if summary_div:
|
||||
plot = ""
|
||||
# Extract the text before the first two <br> tags
|
||||
br_tags = summary_div.find_all("br")
|
||||
if len(br_tags) >= 2:
|
||||
plot = "".join(str(summary_div.contents[i]) for i in range(summary_div.contents.index(br_tags[0])))
|
||||
else:
|
||||
plot = summary_div.text.strip()
|
||||
episode_data["plot"] = plot.strip()
|
||||
else:
|
||||
episode_data["plot"] = "No plot available"
|
||||
|
||||
return episode_data
|
||||
|
||||
def fetch_episodes(season_id):
|
||||
"""Fetch episodes for a specific season based on its AniDB ID and hardcoded table item ranges."""
|
||||
season_ranges = {
|
||||
6327: range(1, 16), # Bakemonogatari - first 15 items
|
||||
8658: range(1, 12), # Nisemonogatari - first 11 items
|
||||
9453: range(2, 6), # Nekomonogatari Kuro - items 2-5
|
||||
9183: [i for i in range(1, 27) if i not in [6, 11, 16]], # Monogatari: Second Season - items 1-26, excluding 6, 11, and 16
|
||||
10046: range(2, 7), # Hanamonogatari - items 2-6
|
||||
10891: range(2, 6), # Tsukimonogatari - items 2-5
|
||||
11350: range(2, 13), # Owarimonogatari - items 2-12, starting at episode 2
|
||||
11827: range(1, 13), # Koyomimonogatari - first 12 items
|
||||
8357: range(1, 4), # Kizumonogatari - first 3 items
|
||||
13691: range(2, 8), # Zoku Owarimonogatari - items 2-7
|
||||
}
|
||||
|
||||
base_url = f"https://anidb.net/anime/{season_id}"
|
||||
print(f"[FETCH] {base_url}")
|
||||
try:
|
||||
res = requests.get(base_url, headers=HEADERS)
|
||||
res.raise_for_status()
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Failed to fetch page: {e}")
|
||||
return []
|
||||
|
||||
soup = BeautifulSoup(res.text, "html.parser")
|
||||
table = soup.find("table", id="eplist", class_="eplist")
|
||||
if not table:
|
||||
print("[WARN] Episode table not found.")
|
||||
return []
|
||||
|
||||
tbody = table.find("tbody")
|
||||
if not tbody:
|
||||
print("[WARN] Table body not found.")
|
||||
return []
|
||||
|
||||
episodes = []
|
||||
rows = tbody.find_all("tr")
|
||||
|
||||
# Get the specific range of rows based on the season_id
|
||||
season_range = season_ranges.get(season_id)
|
||||
if not season_range:
|
||||
print(f"[ERROR] No episode range found for season ID {season_id}.")
|
||||
return []
|
||||
|
||||
episode_number = 1 # Start episode count from 1
|
||||
for idx, row in enumerate(rows):
|
||||
# Only process rows within the specified range for the season
|
||||
if idx + 1 not in season_range:
|
||||
continue
|
||||
|
||||
try:
|
||||
ep_id_cell = row.find("td", class_="id eid")
|
||||
title_cell = row.find("td", class_="title name episode")
|
||||
if not ep_id_cell or not title_cell:
|
||||
continue
|
||||
|
||||
# Get episode ID from the href of the <a> tag
|
||||
episode_link = ep_id_cell.find("a")["href"]
|
||||
episode_id = episode_link.split("/")[-1] # Get the episode ID
|
||||
|
||||
episode_details = fetch_episode_details(episode_id)
|
||||
time.sleep(0.5)
|
||||
if not episode_details:
|
||||
continue
|
||||
|
||||
# For Owarimonogatari, start counting from episode 2
|
||||
if season_id == 11350 and episode_number == 1:
|
||||
episode_number += 1 # Skip episode 1
|
||||
|
||||
episodes.append({
|
||||
"number": episode_number,
|
||||
"title_en": episode_details.get("title_en", "Unknown Title"),
|
||||
"title_romaji": episode_details.get("title_romaji", "Unknown Romaji"),
|
||||
"plot": episode_details.get("plot", "No plot available"),
|
||||
"air_date": episode_details.get("air_date", "Unknown Date"),
|
||||
"rating": episode_details.get("rating", "Unknown Rating"),
|
||||
"year": episode_details["air_date"][:4], # Get the year from the air date (YYYY)
|
||||
"anidbid": episode_details["anidbid"] # Include anidbid
|
||||
})
|
||||
|
||||
# Increment episode number after each episode
|
||||
episode_number += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f"[SKIP] Row parse error: {e}")
|
||||
continue
|
||||
|
||||
return episodes
|
||||
|
||||
|
||||
def pretty_print_xml(element):
|
||||
"""Pretty print XML with newlines and indentation."""
|
||||
tree = ET.ElementTree(element)
|
||||
from io import BytesIO
|
||||
import xml.dom.minidom
|
||||
|
||||
rough_string = BytesIO()
|
||||
tree.write(rough_string, encoding="utf-8", xml_declaration=True)
|
||||
rough_string.seek(0)
|
||||
dom = xml.dom.minidom.parse(rough_string)
|
||||
return dom.toprettyxml(indent=" ")
|
||||
|
||||
def write_nfo(episode, season_name, season_number):
|
||||
"""Writes an NFO file for a given episode."""
|
||||
# Check if anidbid is missing
|
||||
if "anidbid" not in episode:
|
||||
print(f"[WARN] Missing anidbid for episode {episode['number']}. Skipping this episode.")
|
||||
return
|
||||
|
||||
# Use season_number to create the folder
|
||||
season_folder = f"Season {season_number:02}" # Format as 'Season 01', 'Season 02', etc.
|
||||
season_dir = os.path.join("monogatari_nfo", season_folder)
|
||||
os.makedirs(season_dir, exist_ok=True)
|
||||
|
||||
file_name = f"S{season_number:02}E{episode['number']:02}.nfo"
|
||||
file_path = os.path.join(season_dir, file_name)
|
||||
|
||||
root = ET.Element("episodedetails")
|
||||
ET.SubElement(root, "plot").text = episode["plot"]
|
||||
ET.SubElement(root, "originaltitle").text = episode["title_en"]
|
||||
ET.SubElement(root, "title").text = episode["title_romaji"]
|
||||
ET.SubElement(root, "year").text = episode["year"]
|
||||
ET.SubElement(root, "aired").text = episode["air_date"]
|
||||
ET.SubElement(root, "rating").text = episode["rating"]
|
||||
ET.SubElement(root, "anidbid").text = episode["anidbid"]
|
||||
ET.SubElement(root, "episode").text = str(episode["number"])
|
||||
ET.SubElement(root, "season").text = str(season_number)
|
||||
|
||||
pretty_xml = pretty_print_xml(root)
|
||||
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(pretty_xml)
|
||||
|
||||
print(f"[WRITE] {file_path}")
|
||||
|
||||
|
||||
def main():
|
||||
for season_number, season in enumerate(SEASONS, start=1):
|
||||
season_name = season["name"]
|
||||
season_id = season["id"]
|
||||
print(f"\n[INFO] Scraping episodes for {season_name} (ID: {season_id})...")
|
||||
|
||||
episodes = fetch_episodes(season_id)
|
||||
if not episodes:
|
||||
print(f"[FAIL] No episodes found for {season_name}.")
|
||||
continue
|
||||
|
||||
# Write .nfo files for each episode in the current season
|
||||
for episode in episodes:
|
||||
write_nfo(episode, season_name, season_number)
|
||||
|
||||
print("\n✅ Done generating .nfo files for all Monogatari seasons.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Reference in new issue