#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import argparse

"""tocgen.py

    Erstellt aus einem vordefinierten Verzeichnis mit Markdown-Dateien ein Inhaltsverzeichnis.

    (C) 2025 - Adam Skotarczak - Version: 1.0.0 - stand 20/05/2025
"""

MANUSKRIPT_DIR  = "manuscript"
OUTPUT_PATH     = "INHALT2.md"

from pathlib import Path
import re

def slugify(text):
    return re.sub(r"[^\w\- ]", "", text.lower()).strip().replace(" ", "-")

def parse_headings(md_file: Path, max_level=3):
    lines = md_file.read_text(encoding="utf-8").splitlines()
    headings = []
    in_codeblock = False

    for line in lines:
        # Umschalten bei ```, egal ob mit oder ohne Sprache
        if line.strip().startswith("```"):
            in_codeblock = not in_codeblock
            continue
        if in_codeblock:
            continue

        match = re.match(r"^(#{1,3})\s+(.*)", line)
        if match:
            level = len(match.group(1))
            if level <= max_level:
                title = match.group(2).strip()
                anchor = slugify(title)
                headings.append((level, title, anchor))
    return headings

def main():
    parser = argparse.ArgumentParser(
        description="Erstellt aus einem Verzeichnis mit Markdown-Dateien ein Inhaltsverzeichnis."
    )
    parser.add_argument(
        "-d", "--dir", default=MANUSKRIPT_DIR, help="Verzeichnis mit Markdown-Dateien (Standard: manuscript)"
    )
    parser.add_argument(
        "-o", "--output", default=OUTPUT_PATH, help="Pfad für die Ausgabedatei (Standard: INHALT.md)"
    )
    args = parser.parse_args()

    generate_toc(args.dir, args.output)

def generate_toc(manuskript_dir=MANUSKRIPT_DIR, output_path=OUTPUT_PATH):
    manuskript_dir = Path(manuskript_dir)
    index_path = Path(output_path)
    toc_lines = ["# **Inhalt**\n"]

    for md_file in sorted(manuskript_dir.glob("*.md")):
        rel_path = md_file.as_posix()
        headings = parse_headings(md_file)
        for level, title, anchor in headings:
            indent = "  " * (level - 1)
            link = f"{rel_path}#{anchor}" if level > 1 else rel_path
            toc_lines.append(f"{indent}- [{title}]({link})")

    index_path.write_text("\n".join(toc_lines) + "\n", encoding="utf-8")

if __name__ == "__main__":
    main()