1. 读取 epub 电子书的内容
import ebooklib
from ebooklib import epub
from bs4 import BeautifulSoup
def get_html(book):
htmls = list()
for d in book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
htmls.append(d) # d 是一个 EpubHtml 类型的对象
return htmls
filename = "abc.epub"
book = epub.read_epub(filename)
htmls = get_html(book)
for h in htmls:
print(h.title, h.file_name) # 输出 html 的 title 和文件名
# 修改 html 中的内容
count = 0
for h in htmls:
count += 1
soup = BeautifulSoup(h.content, "html.parser")
title = soup.find("title")
title.string = str(count) + "." + title.get_text().string()
epub.write_epub("bcd.epub", book, {})
2. 创建电子书
来源:https://pypi.org/project/EbookLib/
from ebooklib import epub
book = epub.EpubBook()
# set metadata
book.set_identifier("id123456")
book.set_title("Sample book")
book.set_language("en")
book.add_author("Author Authorowski")
book.add_author(
"Danko Bananko",
file_as="Gospodin Danko Bananko",
role="ill",
uid="coauthor",
)
# create chapter
c1 = epub.EpubHtml(title="Intro", file_name="chap_01.xhtml", lang="hr")
c1.content = (
"<h1>Intro heading</h1>"
"<p>Zaba je skocila u baru.</p>"
'<p><img alt="[ebook logo]" src="static/ebooklib.gif"/><br/></p>'
)
# create image from the local image
image_content = open("ebooklib.gif", "rb").read()
img = epub.EpubImage(
uid="image_1",
file_name="static/ebooklib.gif",
media_type="image/gif",
content=image_content,
)
# add chapter
book.add_item(c1)
# add image
book.add_item(img)
# define Table Of Contents
book.toc = (
epub.Link("chap_01.xhtml", "Introduction", "intro"),
(epub.Section("Simple book"), (c1,)),
)
# add default NCX and Nav file
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
# define CSS style
style = "BODY {color: white;}"
nav_css = epub.EpubItem(
uid="style_nav",
file_name="style/nav.css",
media_type="text/css",
content=style,
)
# add CSS file
book.add_item(nav_css)
# basic spine
book.spine = ["nav", c1]
# write to the file
epub.write_epub("test.epub", book, {})
- book.spine 书籍的内容列表。每一个章节都要 append 在这个变量上面,不然无法在电子书阅读器上看到这个章节的内容
- book.toc
书籍的目录。通过目录可以跳转到指定的章节。可以都是
epub.Link
。