Files
Suisa-Listen/suisa-convert-acr-v26.py
2026-03-18 10:03:04 +01:00

162 lines
5.8 KiB
Python

#!/usr/bin/env python3
from __future__ import annotations
import argparse
import sys
from pathlib import Path
from typing import Optional
import pandas as pd
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
from openpyxl.styles import Alignment, Font, Border, Side
# ---------------------------------------------------------------------------
# Konfiguration
# ---------------------------------------------------------------------------
HEADER_ORDER = [
"Sender", "Titel des Musikwerks", "Name des Komponisten", "Interpret(en)", "Sendedatum", "Sendedauer",
"Sendezeit", "ISRC", "Label", "Identifikationsnummer", "Eigenaufnahmen", "EAN/GTIN", "Albumtitel",
"Aufnahmedatum", "Aufnahmeland", "Erstveröffentlichungsdatum", "Katalog-Nummer",
"Werkverzeichnisangaben", "Bestellnummer", "Veröffentlichungsland", "Liveaufnahme",
]
SOURCE_MAP = {
"Sender": "Stream Name","Titel des Musikwerks": "Title", "Name des Komponisten": "Composers", "Interpret(en)": "Artist",
"ISRC": "ISRC", "Label": "Label", "Identifikationsnummer": "ACRID", "EAN/GTIN": "UPC",
"Albumtitel": "Album", "Werkverzeichnisangaben": "ISWC", "Eigenaufnahmen": "Program Title", "Aufnahmedatum": "Tag",
"Aufnahmeland": "Deezer", "Katalog-Nummer": "Spotify", "Bestellnummer": "Youtube", "Veröffentlichungsland": "Creators", "Liveaufnahme":"Bucket ID",
}
RIGHT_ALIGN_COLS = {"Sendedatum", "Sendedauer", "Sendezeit"}
possible_cols = [
"Timestamp(UTC+01:00)",
"Timestamp(UTC+02:00)",
]
# ---------------------------------------------------------------------------
# Hilfsfunktionen
# ---------------------------------------------------------------------------
def _fmt_duration(seconds: Optional[float | int]) -> str:
"""Wandelt Sekunden (float|int|NaN) in **HH:MM:SS** um.
Excel interpretiert dann korrekt. Fuer Kurzzeiten 00:MM:SS"""
if seconds is None or pd.isna(seconds):
seconds = 0
total = int(round(float(seconds)))
hours, rem = divmod(total, 3600)
mins, secs = divmod(rem, 60)
return f"{hours:02d}:{mins:02d}:{secs:02d}"
def _build_dataframe(src: Path) -> pd.DataFrame:
try:
df_orig = pd.read_excel(src)
except FileNotFoundError:
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {src}")
df_new = pd.DataFrame()
# Einfaches Mapping
for tgt, src_col in SOURCE_MAP.items():
df_new[tgt] = df_orig.get(src_col, "")
# Datum/Zeiti
ts_col = next(col for col in possible_cols if col in df_orig.columns)
ts = pd.to_datetime(
pd.Series(df_orig[ts_col]),
errors="coerce"
)
df_new["Sendedatum"] = ts.dt.strftime("%Y%m%d")
df_new["Sendezeit"] = ts.dt.strftime("%H:%M:%S")
# Dauer in HH:MM:SS
df_new["Sendedauer"] = df_orig.get("Played Duration", 0).apply(_fmt_duration)
# Release Date
rel = pd.to_datetime(df_orig.get("Release Date"), errors="coerce")
df_new["Erstveröffentlichungsdatum"] = rel.dt.strftime("%Y%m%d")
# Leerspalten
df_new["Eigenaufnahmen"] = ""
df_new["Aufnahmedatum"] = ""
df_new["Aufnahmeland"] = ""
df_new["Bestellnummer"] = ""
df_new["Veröffentlichungsland"] = ""
df_new["Liveaufnahme"] = ""
# Richtige Reihenfolge
df_new = df_new[HEADER_ORDER]
return df_new
# ---------------------------------------------------------------------------
# Excel Nachformatierung
# ---------------------------------------------------------------------------
def _autofit_and_align(xlsx: Path) -> None:
wb = load_workbook(xlsx)
ws = wb.active
# Spaltenbreiten
for col_idx, header_cell in enumerate(ws[1], 1):
letter = get_column_letter(col_idx)
max_len = len(str(header_cell.value)) if header_cell.value else 0
for cell in ws[letter][1:]: # skip header
if cell.value is not None:
max_len = 40
ws.column_dimensions[letter].width = max_len + 2
# Ausrichtung
right_cols_idx = {idx for idx, cell in enumerate(ws[1], 1) if cell.value in RIGHT_ALIGN_COLS}
for row in ws.iter_rows(min_row=2):
for cell in row:
cell.alignment = Alignment(horizontal="right" if cell.column in right_cols_idx else "left")
# Header fett, links, ohne Rahmen
no_border = Border(left=Side(border_style=None), right=Side(border_style=None),
top=Side(border_style=None), bottom=Side(border_style=None))
for cell in ws[1]:
cell.alignment = Alignment(horizontal="left")
#cell.font = Font(bold=True)
cell.border = no_border
wb.save(xlsx)
# ---------------------------------------------------------------------------
# Pipeline
# ---------------------------------------------------------------------------
def format_report(input_path: Path, output_path: Path) -> None:
df = _build_dataframe(input_path)
df.to_excel(output_path, index=False)
_autofit_and_align(output_path)
print(f"Formatiertes Reporting gespeichert_{output_path}")
# ---------------------------------------------------------------------------
# CLI Entry
# ---------------------------------------------------------------------------
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Format ACRCloud XLSX fuer SUISA")
parser.add_argument("input", help="Pfad zur Original XLSX (ohne/mit .xlsx)")
parser.add_argument("-o", "--output", help="Pfad der ZielXLSX")
args = parser.parse_args()
in_path = Path(args.input)
if not in_path.exists() and in_path.suffix == "":
alt = in_path.with_suffix(".xlsx")
if alt.exists():
in_path = alt
else:
sys.exit(f"Datei nicht gefunden: {in_path} (auch nicht {alt})")
elif not in_path.exists():
sys.exit(f"Datei nicht gefunden: {in_path}")
out_path = Path(args.output) if args.output else in_path.with_name(in_path.stem + "_formatiert.xlsx")
format_report(in_path, out_path)