2.修改所有html文件(17个文件)的编码(从GBK至UTF-8),并把它们里的<meta charset="gbk" />替换为<meta charset="utf-8" />
import os
from bs4 import BeautifulSoup
from ebooklib import epub
# 1. 定义章节顺序与结构
CHAPTERS = [
{"file": "index.html", "title": "首页"},
{"file": "introduce.html", "title": "游戏介绍"},
{"file": "system.html", "title": "系统介绍"},
{"file": "system-a.html", "title": "系统:言语 (关键字)"},
{"file": "system-b.html", "title": "系统:命运量"},
{"file": "system-c.html", "title": "系统:流程地图"},
{"file": "character.html", "title": "登场人物:Saki"},
{"file": "character_0.html", "title": "登场人物:名波行人"},
{"file": "character_2.html", "title": "登场人物:相羽 和奏"},
{"file": "character_3.html", "title": "登场人物:姬野 美月"},
{"file": "character_4.html", "title": "登场人物:桐原 一叶"},
{"file": "character_5.html", "title": "登场人物:姬野 美星"},
{"file": "character_6.html", "title": "登场人物:古宫 舞"},
{"file": "character_7.html", "title": "登场人物:桥仓 卓也"},
{"file": "character_8.html", "title": "登场人物:杂贺 恭平"},
{"file": "guide.html", "title": "游戏攻略"},
{"file": "staff.html", "title": "制作人员"}
]
BASE_DIR = "中文手册"
# 2. 终极自适应 EPUB CSS (新增了拼写错误补丁、墨水屏兼容等)
EPUB_CSS = """
body { font-family: sans-serif; line-height: 1.6; margin: 0; padding: 2%; color: #333; }
img { max-width: 100%; height: auto; display: block; margin: 0.8em auto; border-radius: 4px; }
h1, h2, h3 { color: #222; border-bottom: 2px solid #ccc; padding-bottom: 5px; margin-top: 1.5em; }
p { margin: 0.8em 0; text-align: justify; }
a { color: #3498db; text-decoration: none; }
/* 页面特殊元素重构 */
.epub-link { display: inline-block; margin-top: 10px; padding: 6px 12px; background: #e8f4f8; border: 1px solid #bce8f1; border-radius: 4px; color: #31708f; font-weight: bold; }
#index_first { text-align: center; margin-top: 10vh; font-size: 1.2em; }
/* 介绍页 (Introduce) */
.introduce img { max-width: 60%; margin: 0 auto; box-shadow: 2px 2px 10px rgba(0,0,0,0.2); }
.introduce ul { list-style: none; padding: 0; text-align: center; color: #666; font-size: 0.9em; background: #f9f9f9; padding: 10px; border-radius: 6px; }
/* 系统介绍页 (System) */
.system .block { border: 1px solid #ddd; padding: 15px; margin-bottom: 20px; border-radius: 6px; background: #fdfdfd; box-shadow: 0 1px 3px rgba(0,0,0,0.05); }
/* 角色介绍卡片化 */
.character #cmain { display: flex; flex-direction: column; align-items: center; text-align: center; }
.character #cmain img { max-width: 55%; margin-bottom: 15px; border-radius: 8px; }
.character #texts { width: 100%; text-align: left; }
.character ul { background: #f5f6fa; padding: 12px 20px; border-radius: 5px; border-left: 4px solid #7f8fa6; list-style-type: square; }
/* Staff 页面列表 */
.staff ul { list-style-type: none; padding-left: 0; }
.staff > ul > li { background: #f4f4f4; margin-bottom: 10px; padding: 10px; border-radius: 5px; border-left: 4px solid #888; }
.staff ul ul { margin-top: 8px; padding-left: 15px; background: #fff; padding: 8px; border-radius: 4px; }
.staff ul ul li { border-bottom: 1px dashed #eee; padding: 4px 0; }
/* 攻略页 (Guide) 扁平化 UI */
.guide { margin-top: 1em; }
.guide h3 { background: #eee; padding: 8px 12px; border-left: 5px solid #555; border-bottom: none; border-radius: 4px; }
.story-card { margin-bottom: 1.2em; padding: 12px; border: 1px solid #ddd; border-radius: 6px; background: #fff; box-shadow: 0 1px 3px rgba(0,0,0,0.05); }
.story-card > b { font-size: 1.15em; color: #111; display: block; margin-bottom: 8px; border-bottom: 1px dashed #eee; padding-bottom: 4px; }
.event-line { margin: 8px 0; line-height: 1.8; }
.event-line > b { display: inline-block; padding: 2px 8px; margin-right: 8px; border-radius: 4px; font-size: 0.9em; font-weight: bold; color: white; vertical-align: middle; }
.collect > b { background-color: #3498db; }
.use > b { background-color: #27ae60; }
.ending > b { background-color: #c0392b; }
/* 边缘情况及汉化组源码拼写错误修复 */
.unknwon { background-color: #f8f9fa; border: 1px dashed #7f8c8d; padding: 8px; color: #7f8c8d; margin: 8px 0; font-size: 0.9em; font-style: italic; }
.hint { color: #555; font-size: 0.9em; border-left: 3px solid #bdc3c7; margin: 6px 0; display: block; background: #fafafa; padding: 6px 10px; }
.jump { color: #d35400; font-size: 0.9em; font-weight: bold; display: block; margin: 6px 0; }
.jump::before { content: "➤ 跳转:"; }
.important, .tip { background-color: #fdf2e9; border: 1px dashed #e67e22; padding: 8px; color: #d35400; margin: 8px 0; font-size: 0.9em; border-radius: 4px; }
.collect span, .use span, .ending span, .unknwon span, .tip span { display: inline-block; background: #f1f2f6; color: #2f3542; padding: 0 8px; margin: 2px 6px 2px 0; border-radius: 12px; font-size: 0.85em; border: 1px solid #ced6e0; }
"""
def process_guide_dom(main_content):
"""单独针对攻略页进行 DOM 降维,修正所有非法的 XML 嵌套结构"""
hint_box = main_content.find('div', id='hintBox')
if hint_box: hint_box.decompose()
# 安全转移锚点 ID
for anchor in main_content.find_all('a', attrs={"name": True}):
anchor_name = anchor.get('name')
next_node = anchor.find_next_sibling(['h3', 'ul', 'div'])
if next_node:
next_node['id'] = anchor_name
anchor.decompose()
# 将 <li> 转化为 <div>,并修复父级 <ul>
for story_li in main_content.find_all('li', class_='story'):
story_li.name = 'div'
story_li['class'] = ['story-card']
# 核心修复:同步将包裹它的父标签 <ul> 变为 <div> 防止 EPUB 报错
parent_ul = story_li.find_parent('ul')
if parent_ul and parent_ul.name == 'ul':
parent_ul.name = 'div'
parent_ul['class'] = ['story-list']
events_ul = story_li.find('ul', class_='events')
if events_ul:
events_ul.name = 'div'
events_ul['class'] = 'events-group'
if not events_ul.find_all(recursive=False):
events_ul.decompose()
continue
for event_li in events_ul.find_all('li'):
event_li.name = 'div'
event_li['class'] = ['event-line'] + event_li.get('class', [])
# 孤立的 <span class="jump"> 修正为 <div>
for span_jump in main_content.find_all('span', class_='jump'):
span_jump.name = 'div'
# 清除彻底空掉的 <ul>
for ul in main_content.find_all('ul'):
if not ul.find_all(recursive=False) and not ul.text.strip():
ul.decompose()
# 清理悬空的无效 <h3> 标签
for h3 in main_content.find_all('h3'):
next_node = h3.find_next_sibling()
if not next_node or (next_node.name == 'ul' and not next_node.contents):
h3.decompose()
def clean_html_dom(soup, chapter_file):
"""全局无差别清洗引擎"""
main_content = soup.find('div', id='right')
if not main_content:
return None
# 清剿所有潜伏的 JS 脚本
for script in main_content.find_all('script'):
script.decompose()
# 抢救首页的 first.png
if chapter_file == "index.html":
index_first = main_content.find('div', id='index_first')
if index_first:
img_tag = soup.new_tag('img', src='first.png', alt='首页配图')
index_first.insert(0, img_tag)
# 1. 暴力拔除所有行内样式
for tag in main_content.find_all(True):
if tag.has_attr('style'):
del tag['style']
if tag.name == 'img':
if tag.has_attr('width'): del tag['width']
if tag.has_attr('height'): del tag['height']
if tag.has_attr('class'):
tag['class'] = [c for c in tag['class'] if c not in ['fl', 'fr', 'xr']]
# 2. 清理废弃的导航碎屑
for unwanted in ['charlst', 'cc']:
for el in main_content.find_all(class_=unwanted):
el.decompose()
# 3. 智能处理按钮
for btn in main_content.find_all('a', class_=lambda c: c and 'btn' in c):
if '返回' in btn.text:
btn.decompose()
else:
btn['class'] = 'epub-link'
# 4. 彻底删除全局隐式强制样式
if chapter_file == "staff.html":
for style_tag in soup.find_all('style'):
style_tag.decompose()
# 5. 调用攻略页专属降维解析器
if chapter_file == "guide.html":
process_guide_dom(main_content)
# 赋予顶级父容器类别并删除无意义的 layout id
base_class = chapter_file.split('.')[0].split('_')[0]
main_content['class'] = [base_class]
if main_content.has_attr('id'):
del main_content['id']
return main_content
def create_epub():
book = epub.EpubBook()
book.set_identifier('id_furuiro_maze_guide_final')
book.set_title('古色迷宫轮舞曲 攻略手册')
book.set_language('zh-CN')
book.add_author('Yatagarasu / KDays汉化组')
# 设置 EPUB 原生封面
cover_path = os.path.join(BASE_DIR, "cover.jpg")
if os.path.exists(cover_path):
with open(cover_path, 'rb') as c:
book.set_cover("cover.jpg", c.read())
default_style = epub.EpubItem(uid="style_default", file_name="style/default.css", media_type="text/css", content=EPUB_CSS)
book.add_item(default_style)
# 载入所有有用图片,忽略冗余的 UI 垃圾图
ignore_images = ['checkmark.png', 'icon.png', 'icon-gray.png', 'icon-white.png', 'nav.png']
for filename in os.listdir(BASE_DIR):
if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
if filename in ignore_images:
continue
filepath = os.path.join(BASE_DIR, filename)
with open(filepath, 'rb') as f:
mime = "image/jpeg" if filename.lower().endswith(('jpg', 'jpeg')) else "image/png"
img_item = epub.EpubImage(uid=filename, file_name=filename, media_type=mime, content=f.read())
try:
book.add_item(img_item)
except ValueError:
pass
epub_chapters = []
guide_sub_toc_items = [] # 用于存放动态获取的攻略页目录
for chapter_info in CHAPTERS:
filepath = os.path.join(BASE_DIR, chapter_info["file"])
if not os.path.exists(filepath):
print(f"缺失文件: {filepath},跳过...")
continue
with open(filepath, 'r', encoding='utf-8') as f:
soup = BeautifulSoup(f.read(), 'html.parser')
main_content = clean_html_dom(soup, chapter_info["file"])
if not main_content:
continue
# 动态提取 guide 的二级目录 (包含原先遗漏的 True End 章节)
if chapter_info["file"] == "guide.html":
for idx, h3 in enumerate(main_content.find_all('h3')):
title = h3.get_text(strip=True)
if not h3.has_attr('id'):
h3['id'] = f"guide_auto_{idx}"
guide_sub_toc_items.append(epub.Link(f"guide.html#{h3['id']}", title, h3['id']))
# 组装纯净 HTML,采用严格 EPUB3 规范。手动修复自闭合标签。
clean_html = f"""
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops" lang="zh-CN" xml:lang="zh-CN">
<head>
<meta charset="utf-8" />
<title>{chapter_info['title']}</title>
<link rel="stylesheet" href="style/default.css" type="text/css" />
</head>
<body>
{f"<h1>{chapter_info['title']}</h1>" if chapter_info['file'] != "index.html" else ""}
{str(main_content).replace('<br>', '<br />').replace('<hr>', '<hr />')}
</body>
</html>
"""
chapter = epub.EpubHtml(title=chapter_info['title'], file_name=chapter_info['file'], lang='zh-CN')
chapter.content = clean_html
chapter.add_item(default_style)
book.add_item(chapter)
epub_chapters.append(chapter)
# 组装全局目录
toc_items = []
for chapter in epub_chapters:
if chapter.file_name == "guide.html":
guide_sub_toc = (epub.Section('游戏攻略'), tuple(guide_sub_toc_items))
toc_items.append(guide_sub_toc)
else:
toc_items.append(chapter)
book.toc = tuple(toc_items)
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
book.spine = ['nav'] + epub_chapters
output_name = '古色迷宫轮舞曲_攻略手册_终极重构版.epub'
epub.write_epub(output_name, book, {})
print(f"EPUB 完美生成完毕:{output_name}")
if __name__ == '__main__':
create_epub()