minecraft-crawler/collect-mod-descriptions.py
hiina dd61e75c27
All checks were successful
/ deploy (push) Successful in 3s
improve website/regen mod list
added the mcstatus image, navbar to mod list.

still would like images/webums, later.
2024-08-22 16:41:57 -06:00

694 lines
25 KiB
Python

#!/usr/bin/env python
"""
From the packwiz toml files, look up the mod descriptions from the modrinth API
and collect them into an html page using yattag.
The toml files look like:
```
name = "almostunified-fabric-1.20.1-0.9.4"
filename = "almostunified-fabric-1.20.1-0.9.4.jar"
side = "both"
[download]
url = "https://cdn.modrinth.com/data/sdaSaQEz/versions/iVBf0ICr/almostunified-fabric-1.20.1-0.9.4.jar"
hash = "ec47335d9d8b98c107a2b4cb4bada845669728f78c65df2ef2ee5e06d9ac866d276d09892896c216e30eb028a6fdd0a6cc92a8741eee1c14fa3d0ca24444cbdb"
hash-format = "sha512"
mode = "url"
[option]
optional = false
default = false
[update.modrinth]
mod-id = "sdaSaQEz"
version = "iVBf0ICr"
```
So the update.modrinth.mod-id is the one to look up.
"""
import os
import toml
from yattag import Doc, indent
import requests_cache
import logging
from tqdm import tqdm
import os
import markdown
from bs4 import BeautifulSoup
from ordered_set import OrderedSet
from dataclasses import dataclass, field
from typing import List, Dict
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
session = requests_cache.CachedSession("collectmoddescriptions", cache_control=True)
@dataclass
class ModCategory:
title: str
description: str
mod_slugs: OrderedSet[str]
subcategories: List["ModCategory"] = field(default_factory=list)
hide_by_default: bool = False
mod_categories = [
ModCategory(
title="Centerpieces",
description="Mods that really tie the pack together.",
mod_slugs=OrderedSet(),
subcategories=[
ModCategory(
title="VR",
description="Make the game an actual VR game.",
mod_slugs=OrderedSet(
[
"vivecraft",
"immersivemc",
"vrjesterapi",
"simple-voice-chat",
]
),
),
ModCategory(
title="Crawlin'",
description="Make the murder-hobo dungeon crawling lifestyle viable and fun.",
mod_slugs=OrderedSet(
[
"when-dungeons-arise",
"paladins-and-priests",
"rogues-and-warriors",
"wizards",
"archers",
"simply-skills",
"combat-roll",
"wall-jump-txf",
"myloot",
]
),
),
],
),
ModCategory(
title="Quality of Life",
description="Make the basics less tedious.",
mod_slugs=OrderedSet(
[
"cadmus",
"jei",
"jade",
"mob-plaques",
"inventory-sorting",
"reacharound",
"xaeros-world-map",
"xaeros-minimap",
"simple-voice-radio",
"trashslot",
]
),
),
ModCategory(
title="New Content (Spoilers)",
description="New stuff you'll come across but is usually self-explanatory or has in-game guides. Show only if you want a preview of what's in the pack.",
mod_slugs=OrderedSet(),
subcategories=[
ModCategory(
title="Exploration",
description="Make traversal more interesting.",
mod_slugs=OrderedSet(
[
"lets-do-camping",
"gliders",
"grappling-hook-mod-fabric",
"small-ships",
"mythic-mounts",
"fwaystones",
"explorers-compass",
"natures-compass",
"portable-base-(move-your-base-around)",
]
),
),
ModCategory(
title="Combat",
description="More ways to kill.",
mod_slugs=OrderedSet(
[
"better-combat",
"mythquest",
"mariums-soulslike-weaponry",
"epic-knights-shields-armor-and-weapons",
"epic-knightsnmages-fabric",
"simply-swords",
"basic-weapons",
"jewelry",
"artifacts",
"mythic-upgrades",
"tieredz",
"kevs-tieredz-modifiers",
"kevs-equipment-sets",
"immersive-armors",
]
),
),
ModCategory(
title="Mobs",
description="More stuff to kill.",
mod_slugs=OrderedSet(
[
"guard-villagers-(fabricquilt)",
"bosses-of-mass-destruction",
"cave-dweller-fabric",
"friends-and-foes",
"mobs-of-mythology",
"mutant-monsters",
]
),
),
ModCategory(
title="Magic",
description="Complicated ways to do stuff.",
mod_slugs=OrderedSet(
[
"archon",
"invocations",
"runes",
"more-totems-of-undying",
"zephyr-mod",
"vein-mining",
"zenith",
"revive",
]
),
),
ModCategory(
title="Structures",
description="Places to crawl.",
mod_slugs=OrderedSet(
[
"minecells",
"when-dungeons-arise-seven-seas",
"dungeon-now-loading",
"dungeons-and-taverns",
"wabi-sabi-structures",
"immersive-structures",
"immersive-structures-ii",
"the-graveyard-fabric",
"the-lost-castle",
"kobold-outposts",
"repurposed-structures-fabric",
"adventurez",
"better-archeology",
"aquamirae",
"villagersplus",
"villages-and-pillages",
"ct-overhaul-village",
"friends-and-foes-beekeeper-hut-fabric",
"friends-and-foes-flowery-mooblooms-fabric",
"gazebos",
]
),
),
ModCategory(
title="Biomes",
description="More nature.",
mod_slugs=OrderedSet(
[
"terralith",
"bingusandfloppa",
"promenade",
"eldritch-end",
"naturalist",
"more-mob-variants",
"valentines-blessing-lilypads-roses",
"nyctophobia",
]
),
),
ModCategory(
title="Ambiance",
description="More pleasant sights and sounds.",
mod_slugs=OrderedSet(
[
"ambientsounds",
"dynamic-lights",
"presence-footsteps",
"sound-physics-remastered",
"nicer-skies",
"distanthorizons",
"true-darkness-fabric",
]
),
),
ModCategory(
title="Vanilla Overhauls",
description="Expanded vanilla content.",
mod_slugs=OrderedSet(
[
"mcda",
"mcdw",
"oxidized",
"hellions-sniffer+",
"geophilic",
"betternether",
"betterend",
"deeperdarker",
"nether-depths-upgrade",
"yungs-better-desert-temples",
"yungs-better-dungeons",
"yungs-better-end-island",
"yungs-better-jungle-temples",
"yungs-better-mineshafts",
"yungs-better-nether-fortresses",
"yungs-better-ocean-monuments",
"yungs-better-strongholds",
"yungs-better-witch-huts",
"yungs-bridges",
"yungs-extras",
"endrem",
"macaws-bridges",
"block-runner",
"creeper-overhaul",
]
),
),
ModCategory(
title="Decoration",
description="More blocks for structures to be built with (your own or otherwise).",
mod_slugs=OrderedSet(
[
"convenient-decor",
"decorative-blocks",
"double-doors",
"handcrafted",
"macaws-doors",
"macaws-fences-and-walls",
"macaws-furniture",
"macaws-lights-and-lamps",
"macaws-paintings",
"macaws-paths-and-pavings",
"macaws-roofs",
"macaws-trapdoors",
"macaws-windows",
]
),
),
],
hide_by_default=True,
),
ModCategory(
title="Internals",
description="Mods that make the pack work, but you don't have to be aware of, unless you're really curious.",
mod_slugs=OrderedSet(),
subcategories=[
ModCategory(
title="Balancing",
description="Tweak the balance for multiplayer crawlin'",
mod_slugs=OrderedSet(
[
"village-spawn-point",
"dungeon-difficulty",
"rebalance",
"protection-balancer",
"starter-kit",
"yigd",
"fallingtree",
]
),
),
ModCategory(
title="Optimization",
description="Make the game run faster",
mod_slugs=OrderedSet(
[
"badoptimizations",
"clumps",
"ebe",
"entityculling",
"entitytexturefeatures",
"immediatelyfast",
"indium",
"iris",
"lithium",
"memoryleakfix",
"moreculling",
"sodium",
"starlight",
"ferrite-core",
"deuf-refabricated",
"spark",
"modernfix",
"chunky",
]
),
),
ModCategory(
title="Fixes",
description="just bugfixes or annoyances.",
mod_slugs=OrderedSet(
[
"dimensional-sync-fixes",
"yungs-menu-tweaks",
"too-fast",
"no-chat-reports",
"netherportalfix",
"neruina",
"debugify",
"packet-fixer",
"remove-terralith-intro-message",
]
),
),
ModCategory(
title="Libraries",
description="Dependencies that are used by other mods.",
mod_slugs=OrderedSet(
[
"skills",
"heracles",
"zenith-attributes",
"attributes",
"way2wayfabric",
"owo-lib",
"spell-power",
"spell-engine",
"spoornpacks",
"projectile-damage-attribute",
"loot-patcher",
"libz",
"legendary-tooltips",
"just-enough-resources-jer",
"geckoanimfix",
"forge-config-screens",
"obscure-api",
"vr-combat",
"prism-lib",
"yungs-api",
"kevs-library",
"lithostitched",
"load-my-resources",
"questbind",
"lmft",
"autotag",
"almost-unified",
"architectury-api",
"attributefix",
"azurelib",
"azurelib-armor",
"balm",
"bclib",
"bookshelf-lib",
"cardinal-components-api",
"cloth-config",
"collective",
"creativecore",
"cristel-lib",
"dawn",
"terrablender",
"mc-vr-api",
"smartbrainlib",
"ranged-weapon-api",
"emi",
"entity-model-features",
"fabric-api",
"fabric-language-kotlin",
"fakerlib",
"forge-config-api-port",
"fzzy-core",
"gear-core",
"geckolib",
"iceberg",
"modmenu",
"necronomicon",
"patchouli",
"playeranimator",
"polymorph",
"puzzles-lib",
"resourceful-config",
"resourceful-lib",
"sodium-extra",
"trinkets",
"yacl",
"azurelib",
"azurelib-armor",
"autotag",
"argonauts",
"globalpacks",
"wasabiwhisper-harmonia",
"lexiconfig",
]
),
),
],
hide_by_default=True,
),
]
def collect_mod_info(directory):
mods = {}
files = os.listdir(directory)
for filename in tqdm(files, desc="Processing mod files", unit="file"):
if filename.endswith(".toml"):
file_path = os.path.join(directory, filename)
logger.debug(f"Processing file: {file_path}")
with open(file_path, "r") as file:
data = toml.load(file)
if "update" in data and "modrinth" in data["update"]:
mod_id = data["update"]["modrinth"].get("mod-id")
if mod_id:
url = f"https://api.modrinth.com/v2/project/{mod_id}"
response = session.get(url)
if response.status_code == 200:
project_data = response.json()
mods[mod_id] = project_data
else:
raise Exception(
f"Failed to fetch data for mod ID: {mod_id}"
)
return mods
def get_modrinth_url(slug):
return f"https://modrinth.com/mod/{slug}"
def make_mod_descriptions_html(info, doc, tag, text):
with tag("article", klass="mod-description"):
with tag("details", klass="mod-description-details"):
with tag("summary"):
with tag("span"):
with tag("a", href=get_modrinth_url(info["slug"])):
if "icon_url" in info and info["icon_url"]:
src = "" + info["icon_url"]
doc.stag("img", src=src, klass="mod-icon")
text(info["title"])
text(" : ")
with tag("span", klass="mod-summary"):
text(info["description"])
with tag("div", klass="full-description"):
bodydoc = BeautifulSoup(
markdown.markdown(info["body"]),
features="html.parser",
)
# lazy load images when seen.
for img in bodydoc.find_all("img"):
img["loading"] = "lazy"
# Remove all iframe embeds
for iframe in bodydoc.find_all("iframe"):
iframe.decompose()
doc.asis(bodydoc.prettify())
def _render_category_content(category, mod_info_by_slug, doc, tag, text):
for slug in category.mod_slugs:
if slug in mod_info_by_slug:
make_mod_descriptions_html(mod_info_by_slug[slug], doc, tag, text)
for subcategory in category.subcategories:
with tag("section", klass="mod-subcategory"):
with tag("h3"):
text(subcategory.title)
with tag("p"):
text(subcategory.description)
if subcategory.hide_by_default:
with tag("details"):
with tag("summary"):
text("Show mods")
_render_category_content(
subcategory, mod_info_by_slug, doc, tag, text
)
else:
_render_category_content(subcategory, mod_info_by_slug, doc, tag, text)
def generate_html(mod_info):
doc, tag, text = Doc().tagtext()
mod_info_by_slug = {}
for id, info in mod_info.items():
mod_info_by_slug[info["slug"]] = info
doc.asis("<!DOCTYPE html>")
with tag("html"):
with tag("head"):
doc.stag("meta", charset="utf-8")
with tag("title"):
text("/vrg/ Crawler: Mod List")
doc.stag("link", rel="stylesheet", href="pico.red.min.css")
doc.stag("link", rel="stylesheet", href="style.css")
with tag("main", klass="container"):
with tag("header"):
with tag("nav"):
with tag("ul"):
with tag("li"):
doc.line("h1", "/vrg/ Crawler")
doc.line("li", "Mod List")
with tag("ul"):
with tag("li"):
doc.line("a", "About", href="index.html")
for category in mod_categories:
with tag("section", klass="mod-category"):
with tag("h2"):
text(category.title)
with tag("p"):
text(category.description)
if category.hide_by_default:
with tag("details"):
with tag("summary"):
text("(Show mods)")
_render_category_content(
category, mod_info_by_slug, doc, tag, text
)
else:
_render_category_content(
category, mod_info_by_slug, doc, tag, text
)
return doc.getvalue()
def generate_dot_graph(mod_info, dependency_tree):
dot_content = "digraph ModDependencies {\n"
dot_content += " node [shape=box];\n"
for mod_id, info in mod_info.items():
url = get_modrinth_url(info["slug"])
dot_content += f' "{mod_id}" [label="{info["title"]}", URL="{url}"];\n'
for mod_id, deps in dependency_tree.items():
for dep in deps["dependencies"]:
dot_content += f' "{mod_id}" -> "{dep}";\n'
dot_content += "}"
return dot_content
def generate_text(mod_info):
text_content = ""
for mod_id, info in mod_info.items():
text_content += f"Title: {info['title']}\n"
text_content += f"Slug: {info['slug']}\n"
text_content += f"Categories: {', '.join(info['categories'])}\n"
text_content += f"Summary: {info['description']}\n"
# Extract text from long body using markdown and BeautifulSoup
bodydoc = BeautifulSoup(markdown.markdown(info["body"]), features="html.parser")
long_description = bodydoc.get_text(separator="\n", strip=True)
text_content += f"Description (truncated):\n{long_description[:200]}\n\n"
return text_content
def main(directory, output_filename, output_format="html"):
mod_info = collect_mod_info(directory)
# Check for discrepancies between mod_info and manual_mod_categories
mod_info_set = set(info["slug"] for info in mod_info.values())
manual_categories_set = set()
slug_category_count = {}
for category in mod_categories:
slugs = category.mod_slugs
manual_categories_set.update(slugs)
for slug in slugs:
if slug in slug_category_count:
slug_category_count[slug].append(category.title)
else:
slug_category_count[slug] = [category.title]
for subcategory in category.subcategories:
slugs = subcategory.mod_slugs
manual_categories_set.update(slugs)
for slug in slugs:
if slug in slug_category_count:
slug_category_count[slug].append(category.title)
else:
slug_category_count[slug] = [category.title]
for slug, categories in slug_category_count.items():
if len(categories) > 1:
print(
f"Warning: Slug '{slug}' is present in multiple categories: {', '.join(categories)}"
)
mods_not_in_categories = mod_info_set - manual_categories_set
categories_not_in_mods = manual_categories_set - mod_info_set
if mods_not_in_categories:
logger.warning(
"Mods not in manual categories:" + ",\n".join(mods_not_in_categories)
)
if categories_not_in_mods:
logger.warning(
"Mods manually categorized but not in pack:"
+ ",\n".join(categories_not_in_mods)
)
if output_format == "html":
output_content = generate_html(mod_info)
elif output_format == "text":
output_content = generate_text(mod_info)
else:
raise ValueError("Invalid output format.")
with open(output_filename, "w", encoding="utf-8") as f:
f.write(output_content)
print(
f"File with mod descriptions and dependency tree has been generated: {output_filename}"
)
if __name__ == "__main__":
import sys
if len(sys.argv) != 3:
print("Usage: python script.py <directory> <output_filename>")
print("output_filename should end with '.html', '.json', '.dot', or '.txt'")
sys.exit(1)
directory = sys.argv[1]
output_filename = sys.argv[2]
if output_filename.lower().endswith(".html"):
output_format = "html"
elif output_filename.lower().endswith(".json"):
output_format = "json"
elif output_filename.lower().endswith(".dot"):
output_format = "dot"
elif output_filename.lower().endswith(".txt"):
output_format = "text"
else:
print(
"Error: output_filename must end with '.html', '.json', '.dot', or '.txt'"
)
sys.exit(1)
main(directory, output_filename, output_format)