adminslog/tools/collector/link_collector.py

146 lines
4.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
import os
import sys
import json
import shutil
from pathlib import Path
def script_dir():
return Path(__file__).parent.resolve()
def load_config(filename="config.jsonc"):
config_path = script_dir() / filename
if not config_path.exists():
print(" ⚠ Konfigurationsdatei nicht gefunden:", config_path)
sys.exit(1)
with open(config_path, encoding="utf-8") as f:
return json.loads("".join(line for line in f if not line.strip().startswith("//")))
def parse_args():
args = sys.argv[1:]
parsed = {
"scan": None,
"ignore": [],
"reset": False,
"hilfe": False,
}
while args:
arg = args.pop(0)
if arg in ("-h", "--hilfe"):
parsed["hilfe"] = True
elif arg == "--reset":
parsed["reset"] = True
elif arg in ("-s", "--scan") and args:
parsed["scan"] = [entry.strip() for entry in args.pop(0).split(",") if entry.strip()]
elif arg in ("-x", "--ignore") and args:
parsed["ignore"] = [entry.strip() for entry in args.pop(0).split(",") if entry.strip()]
else:
print(f" ⚠ Unbekannter Parameter: {arg}")
parsed["hilfe"] = True
break
return parsed
def show_help():
print("""
(C) 2025 - Adam Skotarczak (ionivation.com)
🛈 Tool das Verzeichnisse nach Markdown-Dateien duchsucht und diese an eine definierte Liste anhängt als Markdown-Link.
Verwendung: python3 link_collector.py [OPTIONEN]
-s, --scan Kommagetrennte Liste von Verzeichnissen zum Durchsuchen (relativ zum Aufrufpfad)
-x, --ignore Kommagetrennte Liste von Verzeichnissen, die ignoriert werden sollen
--reset Löscht das Logfile 'processed.log' und beendet sich
-h, --hilfe Zeigt diese Hilfe
Beispiel:
python3 link_collector.py -s docs,notes -x "docs/alt"
""")
def find_md_files(root_dirs, ignore_dirs, extensions):
for root in root_dirs:
for dirpath, _, filenames in os.walk(root):
if any(str(Path(dirpath)).startswith(str(ignored)) for ignored in ignore_dirs):
continue
for fname in filenames:
if any(fname.endswith(ext) for ext in extensions):
yield Path(dirpath) / fname
def extract_title(filepath):
try:
with open(filepath, encoding="utf-8") as f:
for line in f:
if line.strip().startswith("#"):
return line.strip("# ").strip()
except Exception as e:
print(f"⚠ Fehler beim Lesen von {filepath}: {e}")
return filepath.stem
def load_processed(logfile):
if not logfile.exists():
return set()
with open(logfile, encoding="utf-8") as f:
return set(line.strip() for line in f)
def append_to_output(output_path, links):
with open(output_path, "a", encoding="utf-8") as f:
for line in links:
f.write(line + "\n")
def update_processed(log_path, new_paths):
with open(log_path, "a", encoding="utf-8") as f:
for path in new_paths:
f.write(str(path) + "\n")
def main():
config = load_config()
args = parse_args()
if args["hilfe"]:
show_help()
return
log_path = script_dir() / config.get("processed_log", "processed.log")
if args["reset"]:
if log_path.exists():
log_path.unlink()
print("🧹 Logfile gelöscht:", log_path)
else:
print(" Logfile existierte nicht:", log_path)
return
cwd = Path.cwd()
output_file = cwd / config.get("output_file", "output.md")
root_dirs = args["scan"] or config.get("root_dirs", [])
root_dirs = [Path(d).resolve() for d in root_dirs if d.strip()]
ignore_dirs = [Path(x).resolve() for x in args["ignore"]]
extensions = config.get("extensions", [".md"])
processed = load_processed(log_path)
new_links = []
new_processed = []
for md_file in find_md_files(root_dirs, ignore_dirs, extensions):
if md_file.resolve() == output_file.resolve():
continue
rel_path = md_file.relative_to(cwd)
if str(rel_path) in processed:
continue
title = extract_title(md_file)
new_links.append(f"- [{title}]({rel_path.as_posix()})")
new_processed.append(rel_path)
if new_links:
append_to_output(output_file, new_links)
update_processed(log_path, new_processed)
print(f"{len(new_links)} neue Links hinzugefügt.")
else:
print(" Keine neuen Dateien gefunden.")
if __name__ == "__main__":
main()