Files
Suisa-Listen/suisa-convert-acr-v4.py

151 lines
5.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
# Format ACRCloud CSV/XLSX reports (Radio Stadtfilter) in das SUISALayout
# mit korrekter Spaltenreihenfolge, automatischer Spaltenbreite,
# Fonts/Ausrichtung und sauberem Zeitformat.
# Usage: python format_radio_report.py <input.xlsx> [-o <output.xlsx>]
# Abhängigkeiten:
# pip install pandas openpyxl
from __future__ import annotations
import argparse
import sys
from pathlib import Path
from typing import Optional
import pandas as pd
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
from openpyxl.styles import Alignment, Font, Border, Side
# ---------------------------------------------------------------------------
# Konfiguration
# ---------------------------------------------------------------------------
HEADER_ORDER = [
"Titel", "Komponist", "Interpret", "Sender", "Sendedatum", "Sendedauer",
"Sendezeit", "ISWC", "ISRC", "Label", "Albumtitel", "Release Date",
"Lyricists", "Creators", "UPC", "ACRID",
]
SOURCE_MAP = {
"Titel": "Title", "Komponist": "Composers", "Interpret": "Artist",
"Sender": "Stream Name", "ISWC": "ISWC", "ISRC": "ISRC", "Label": "Label",
"Albumtitel": "Album", "UPC": "UPC", "ACRID": "ACRID",
}
RIGHT_ALIGN_COLS = {"Sendedatum", "Sendedauer", "Sendezeit"}
# ---------------------------------------------------------------------------
# Hilfsfunktionen
# ---------------------------------------------------------------------------
def _fmt_duration(seconds: Optional[float | int]) -> str:
"""Wandelt Sekunden (float|int|NaN) in **HH:MM:SS** um.
Excel interpretiert dann korrekt. Für Kurzzeiten → 00:MM:SS"""
if seconds is None or pd.isna(seconds):
seconds = 0
total = int(round(float(seconds)))
hours, rem = divmod(total, 3600)
mins, secs = divmod(rem, 60)
return f"{hours:02d}:{mins:02d}:{secs:02d}"
def _build_dataframe(src: Path) -> pd.DataFrame:
try:
df_orig = pd.read_excel(src)
except FileNotFoundError:
raise FileNotFoundError(f"Eingabedatei nicht gefunden: {src}")
df_new = pd.DataFrame()
# Einfaches Mapping
for tgt, src_col in SOURCE_MAP.items():
df_new[tgt] = df_orig.get(src_col, "")
# Datum/Zeit
ts = pd.to_datetime(df_orig.get("Timestamp(UTC+01:00)"), errors="coerce")
df_new["Sendedatum"] = ts.dt.strftime("%Y%m%d")
df_new["Sendezeit"] = ts.dt.strftime("%H:%M:%S")
# Dauer in HH:MM:SS
df_new["Sendedauer"] = df_orig.get("Played Duration", 0).apply(_fmt_duration)
# Release Date
rel = pd.to_datetime(df_orig.get("Release Date"), errors="coerce")
df_new["Release Date"] = rel.dt.strftime("%Y%m%d")
# Leerspalten
df_new["Lyricists"] = ""
df_new["Creators"] = ""
# Richtige Reihenfolge
df_new = df_new[HEADER_ORDER]
return df_new
# ---------------------------------------------------------------------------
# ExcelNachformatierung
# ---------------------------------------------------------------------------
def _autofit_and_align(xlsx: Path) -> None:
wb = load_workbook(xlsx)
ws = wb.active
# Spaltenbreiten
for col_idx, header_cell in enumerate(ws[1], 1):
letter = get_column_letter(col_idx)
max_len = len(str(header_cell.value)) if header_cell.value else 0
for cell in ws[letter][1:]: # skip header
if cell.value is not None:
max_len = 40
ws.column_dimensions[letter].width = max_len + 2
# Ausrichtung
right_cols_idx = {idx for idx, cell in enumerate(ws[1], 1) if cell.value in RIGHT_ALIGN_COLS}
for row in ws.iter_rows(min_row=2):
for cell in row:
cell.alignment = Alignment(horizontal="right" if cell.column in right_cols_idx else "left")
# Header fett, links, ohne Rahmen
no_border = Border(left=Side(border_style=None), right=Side(border_style=None),
top=Side(border_style=None), bottom=Side(border_style=None))
for cell in ws[1]:
cell.alignment = Alignment(horizontal="left")
#cell.font = Font(bold=True)
cell.border = no_border
wb.save(xlsx)
# ---------------------------------------------------------------------------
# Pipeline
# ---------------------------------------------------------------------------
def format_report(input_path: Path, output_path: Path) -> None:
df = _build_dataframe(input_path)
df.to_excel(output_path, index=False)
_autofit_and_align(output_path)
print(f"Formatiertes Reporting gespeichert → {output_path}")
# ---------------------------------------------------------------------------
# CLIEntry
# ---------------------------------------------------------------------------
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Format ACRCloud XLSX für SUISA")
parser.add_argument("input", help="Pfad zur OriginalXLSX (ohne/mit .xlsx)")
parser.add_argument("-o", "--output", help="Pfad der ZielXLSX")
args = parser.parse_args()
in_path = Path(args.input)
if not in_path.exists() and in_path.suffix == "":
alt = in_path.with_suffix(".xlsx")
if alt.exists():
in_path = alt
else:
sys.exit(f"Datei nicht gefunden: {in_path} (auch nicht {alt})")
elif not in_path.exists():
sys.exit(f"Datei nicht gefunden: {in_path}")
out_path = Path(args.output) if args.output else in_path.with_name(in_path.stem + "_formatiert.xlsx")
format_report(in_path, out_path)