EPUB TOC
========

::

  from bs4 import BeautifulSoup
  import re
  import argparse

  parser = argparse.ArgumentParser(description="EPUB TOC")
  parser.add_argument("html_name", help="HTML file extracted from EPUB")
  args = parser.parse_args()

  # Open and read the HTML file
  with open(args.html_name, 'r', encoding='utf-8') as file:
      html_content = file.read()

  # Parse the HTML content using BeautifulSoup
  soup = BeautifulSoup(html_content, 'html.parser')

  # Find all header tags (h1, h2, ..., h5)
  # Note: h6 is reserved for notes and figures
  headers = soup.find_all(re.compile('^h[1-5]$'))

  # Iterate over each header and print it with indentation
  for header in headers:
      # Extract header level from the tag name (e.g., 'h2' -> level 2)
      level = int(header.name[1])
      # Calculate indentation (2 spaces per level beyond 1)
      indent = '  ' * (level - 1)
      # Get the header text
      text = header.get_text(strip=True)
      print(f"{indent}{text}")