I wrote the following script to parse an SEP article and call pandoc to convert it to EPUB. I'd love your feedback.
There is no function but I didn't think it was worth adding. Also there is no test to see if the file is executed or imported, but since it is supposed to be an executable this is not a problem in my opinion.
"""
sep2epub
"""
import os
from bs4 import BeautifulSoup
import argparse
import requests
import yaml
# Parse command line arguments
parser = argparse.ArgumentParser("Create an epub file from an article in the Stanford Encyclopedia of Philosophy")
parser.add_argument("id", help="Identifier of the article")
args = parser.parse_args()
URL = "https://plato.stanford.edu/entries/" + args.id
# Request the HTML page and parse it with BeautifulSoup
response = requests.get(URL)
soup = BeautifulSoup(response.text, "html.parser")
article_content = soup.find("div", id="article-content")
article_title = article_content.find("h1").text
article_content.find("div", id="academic-tools").decompose()
article_content.find("div", id="related-entries").decompose()
# Export HTML and metadata
os.makedirs("tmp", exist_ok=True)
with open(f"tmp/{args.id}.html", "w") as f:
f.write(str(article_content))
metadata = {
"title": article_title,
"publisher": "Stanford Encyclopedia of Philosophy",
}
with open("tmp/metadata.yaml", "w") as f:
f.write(yaml.dump(metadata,
explicit_start=True, explicit_end=True))
# Use pandoc to generate the epub file
command = f"pandoc tmp/{args.id}.html"
command += " --metadata-file=tmp/metadata.yaml"
command += f" -o {args.id}.epub"
os.system(command)