Suisa-Listen/suisa-convert-acr-v4.py

#!/usr/bin/env python3

# Format ACRCloud CSV/XLSX reports (Radio Stadtfilter) in das SUISA‑Layout
# mit korrekter Spaltenreihenfolge, automatischer Spaltenbreite,
# Fonts/Ausrichtung und sauberem Zeitformat.
# Usage:   python format_radio_report.py <input.xlsx> [-o <output.xlsx>]
# Abhängigkeiten:
# pip install pandas openpyxl

from __future__ import annotations

import argparse
import sys
from pathlib import Path
from typing import Optional

import pandas as pd
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
from openpyxl.styles import Alignment, Font, Border, Side

# ---------------------------------------------------------------------------
# Konfiguration
# ---------------------------------------------------------------------------
HEADER_ORDER = [
    "Titel", "Komponist", "Interpret", "Sender", "Sendedatum", "Sendedauer",
    "Sendezeit", "ISWC", "ISRC", "Label", "Albumtitel", "Release Date",
    "Lyricists", "Creators", "UPC", "ACRID",
]

SOURCE_MAP = {
    "Titel": "Title", "Komponist": "Composers", "Interpret": "Artist",
    "Sender": "Stream Name", "ISWC": "ISWC", "ISRC": "ISRC", "Label": "Label",
    "Albumtitel": "Album", "UPC": "UPC", "ACRID": "ACRID",
}

RIGHT_ALIGN_COLS = {"Sendedatum", "Sendedauer", "Sendezeit"}

# ---------------------------------------------------------------------------
# Hilfsfunktionen
# ---------------------------------------------------------------------------

def _fmt_duration(seconds: Optional[float | int]) -> str:
    """Wandelt Sekunden (float|int|NaN) in **HH:MM:SS** um.
    Excel interpretiert dann korrekt. Für Kurzzeiten → 00:MM:SS"""
    if seconds is None or pd.isna(seconds):
        seconds = 0
    total = int(round(float(seconds)))
    hours, rem = divmod(total, 3600)
    mins, secs = divmod(rem, 60)
    return f"{hours:02d}:{mins:02d}:{secs:02d}"


def _build_dataframe(src: Path) -> pd.DataFrame:
    try:
        df_orig = pd.read_excel(src)
    except FileNotFoundError:
        raise FileNotFoundError(f"Eingabedatei nicht gefunden: {src}")

    df_new = pd.DataFrame()

    # Einfaches Mapping
    for tgt, src_col in SOURCE_MAP.items():
        df_new[tgt] = df_orig.get(src_col, "")

    # Datum/Zeit
    ts = pd.to_datetime(df_orig.get("Timestamp(UTC+01:00)"), errors="coerce")
    df_new["Sendedatum"] = ts.dt.strftime("%Y%m%d")
    df_new["Sendezeit"] = ts.dt.strftime("%H:%M:%S")

    # Dauer in HH:MM:SS
    df_new["Sendedauer"] = df_orig.get("Played Duration", 0).apply(_fmt_duration)

    # Release Date
    rel = pd.to_datetime(df_orig.get("Release Date"), errors="coerce")
    df_new["Release Date"] = rel.dt.strftime("%Y%m%d")

    # Leerspalten
    df_new["Lyricists"] = ""
    df_new["Creators"] = ""

    # Richtige Reihenfolge
    df_new = df_new[HEADER_ORDER]
    return df_new

# ---------------------------------------------------------------------------
# Excel‑Nachformatierung
# ---------------------------------------------------------------------------

def _autofit_and_align(xlsx: Path) -> None:
    wb = load_workbook(xlsx)
    ws = wb.active

    # Spaltenbreiten
    for col_idx, header_cell in enumerate(ws[1], 1):
        letter = get_column_letter(col_idx)
        max_len = len(str(header_cell.value)) if header_cell.value else 0
        for cell in ws[letter][1:]:  # skip header
            if cell.value is not None:
                max_len = 40
        ws.column_dimensions[letter].width = max_len + 2

    # Ausrichtung
    right_cols_idx = {idx for idx, cell in enumerate(ws[1], 1) if cell.value in RIGHT_ALIGN_COLS}

    for row in ws.iter_rows(min_row=2):
        for cell in row:
            cell.alignment = Alignment(horizontal="right" if cell.column in right_cols_idx else "left")

    # Header fett, links, ohne Rahmen
    no_border = Border(left=Side(border_style=None), right=Side(border_style=None),
                       top=Side(border_style=None), bottom=Side(border_style=None))
    for cell in ws[1]:
        cell.alignment = Alignment(horizontal="left")
        #cell.font = Font(bold=True)
        cell.border = no_border

    wb.save(xlsx)

# ---------------------------------------------------------------------------
# Pipeline
# ---------------------------------------------------------------------------

def format_report(input_path: Path, output_path: Path) -> None:
    df = _build_dataframe(input_path)
    df.to_excel(output_path, index=False)
    _autofit_and_align(output_path)
    print(f"Formatiertes Reporting gespeichert → {output_path}")

# ---------------------------------------------------------------------------
# CLI‑Entry
# ---------------------------------------------------------------------------
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Format ACRCloud XLSX für SUISA")
    parser.add_argument("input", help="Pfad zur Original‑XLSX (ohne/mit .xlsx)")
    parser.add_argument("-o", "--output", help="Pfad der Ziel‑XLSX")
    args = parser.parse_args()

    in_path = Path(args.input)
    if not in_path.exists() and in_path.suffix == "":
        alt = in_path.with_suffix(".xlsx")
        if alt.exists():
            in_path = alt
        else:
            sys.exit(f"Datei nicht gefunden: {in_path} (auch nicht {alt})")
    elif not in_path.exists():
        sys.exit(f"Datei nicht gefunden: {in_path}")

    out_path = Path(args.output) if args.output else in_path.with_name(in_path.stem + "_formatiert.xlsx")
    format_report(in_path, out_path)