159 lines
5.6 KiB
Python
159 lines
5.6 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Format ACRCloud CSV/XLSX reports (Radio Stadtfilter) in das SUISA‑Layout
|
||
mit korrekter Spaltenreihenfolge, automatischer Spaltenbreite,
|
||
Fonts/Ausrichtung und sauberem Zeitformat.
|
||
|
||
ÄNDERUNGEN (v0.4):
|
||
• Header jetzt fett
|
||
• Sendedauer wird als **HH:MM:SS** ausgegeben (Excel interpretiert korrekt)
|
||
– für Zeiten < 1 h wird 00:MM:SS geschrieben, Excel‑Format bleibt "hh:mm:ss".
|
||
|
||
Usage:
|
||
python format_radio_report.py <input.xlsx> [-o <output.xlsx>]
|
||
|
||
Abhängigkeiten:
|
||
pip install pandas openpyxl
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import sys
|
||
from pathlib import Path
|
||
from typing import Optional
|
||
|
||
import pandas as pd
|
||
from openpyxl import load_workbook
|
||
from openpyxl.utils import get_column_letter
|
||
from openpyxl.styles import Alignment, Font, Border, Side
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Konfiguration
|
||
# ---------------------------------------------------------------------------
|
||
HEADER_ORDER = [
|
||
"Titel", "Komponist", "Interpret", "Sender", "Sendedatum", "Sendedauer",
|
||
"Sendezeit", "ISWC", "ISRC", "Label", "Albumtitel", "Release Date",
|
||
"Lyricists", "Creators", "UPC", "ACRID",
|
||
]
|
||
|
||
SOURCE_MAP = {
|
||
"Titel": "Title", "Komponist": "Composers", "Interpret": "Artist",
|
||
"Sender": "Stream Name", "ISWC": "ISWC", "ISRC": "ISRC", "Label": "Label",
|
||
"Albumtitel": "Album", "UPC": "UPC", "ACRID": "ACRID",
|
||
}
|
||
|
||
RIGHT_ALIGN_COLS = {"Sendedatum", "Sendedauer", "Sendezeit"}
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Hilfsfunktionen
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def _fmt_duration(seconds: Optional[float | int]) -> str:
|
||
"""Wandelt Sekunden (float|int|NaN) in **HH:MM:SS** um.
|
||
Excel interpretiert dann korrekt. Für Kurzzeiten → 00:MM:SS"""
|
||
if seconds is None or pd.isna(seconds):
|
||
seconds = 0
|
||
total = int(round(float(seconds)))
|
||
hours, rem = divmod(total, 3600)
|
||
mins, secs = divmod(rem, 60)
|
||
return f"{hours:02d}:{mins:02d}:{secs:02d}"
|
||
|
||
|
||
def _build_dataframe(src: Path) -> pd.DataFrame:
|
||
try:
|
||
df_orig = pd.read_excel(src)
|
||
except FileNotFoundError:
|
||
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {src}")
|
||
|
||
df_new = pd.DataFrame()
|
||
|
||
# Einfaches Mapping
|
||
for tgt, src_col in SOURCE_MAP.items():
|
||
df_new[tgt] = df_orig.get(src_col, "")
|
||
|
||
# Datum/Zeit
|
||
ts = pd.to_datetime(df_orig.get("Timestamp(UTC+01:00)"), errors="coerce")
|
||
df_new["Sendedatum"] = ts.dt.strftime("%Y%m%d")
|
||
df_new["Sendezeit"] = ts.dt.strftime("%H:%M:%S")
|
||
|
||
# Dauer in HH:MM:SS
|
||
df_new["Sendedauer"] = df_orig.get("Played Duration", 0).apply(_fmt_duration)
|
||
|
||
# Release Date
|
||
rel = pd.to_datetime(df_orig.get("Release Date"), errors="coerce")
|
||
df_new["Release Date"] = rel.dt.strftime("%Y%m%d")
|
||
|
||
# Leerspalten
|
||
df_new["Lyricists"] = ""
|
||
df_new["Creators"] = ""
|
||
|
||
# Richtige Reihenfolge
|
||
df_new = df_new[HEADER_ORDER]
|
||
return df_new
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Excel‑Nachformatierung
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def _autofit_and_align(xlsx: Path) -> None:
|
||
wb = load_workbook(xlsx)
|
||
ws = wb.active
|
||
|
||
# Spaltenbreiten
|
||
for col_idx, header_cell in enumerate(ws[1], 1):
|
||
letter = get_column_letter(col_idx)
|
||
max_len = len(str(header_cell.value)) if header_cell.value else 0
|
||
for cell in ws[letter][1:]: # skip header
|
||
if cell.value is not None:
|
||
max_len = 40
|
||
ws.column_dimensions[letter].width = max_len + 2
|
||
|
||
# Ausrichtung
|
||
right_cols_idx = {idx for idx, cell in enumerate(ws[1], 1) if cell.value in RIGHT_ALIGN_COLS}
|
||
|
||
for row in ws.iter_rows(min_row=2):
|
||
for cell in row:
|
||
cell.alignment = Alignment(horizontal="right" if cell.column in right_cols_idx else "left")
|
||
|
||
# Header fett, links, ohne Rahmen
|
||
no_border = Border(left=Side(border_style=None), right=Side(border_style=None),
|
||
top=Side(border_style=None), bottom=Side(border_style=None))
|
||
for cell in ws[1]:
|
||
cell.alignment = Alignment(horizontal="left")
|
||
#cell.font = Font(bold=True)
|
||
cell.border = no_border
|
||
|
||
wb.save(xlsx)
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Pipeline
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def format_report(input_path: Path, output_path: Path) -> None:
|
||
df = _build_dataframe(input_path)
|
||
df.to_excel(output_path, index=False)
|
||
_autofit_and_align(output_path)
|
||
print(f"Formatiertes Reporting gespeichert → {output_path}")
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# CLI‑Entry
|
||
# ---------------------------------------------------------------------------
|
||
if __name__ == "__main__":
|
||
parser = argparse.ArgumentParser(description="Format ACRCloud XLSX für SUISA")
|
||
parser.add_argument("input", help="Pfad zur Original‑XLSX (ohne/mit .xlsx)")
|
||
parser.add_argument("-o", "--output", help="Pfad der Ziel‑XLSX")
|
||
args = parser.parse_args()
|
||
|
||
in_path = Path(args.input)
|
||
if not in_path.exists() and in_path.suffix == "":
|
||
alt = in_path.with_suffix(".xlsx")
|
||
if alt.exists():
|
||
in_path = alt
|
||
else:
|
||
sys.exit(f"Datei nicht gefunden: {in_path} (auch nicht {alt})")
|
||
elif not in_path.exists():
|
||
sys.exit(f"Datei nicht gefunden: {in_path}")
|
||
|
||
out_path = Path(args.output) if args.output else in_path.with_name(in_path.stem + "_formatiert.xlsx")
|
||
format_report(in_path, out_path)
|