#!/usr/bin/env python3 from __future__ import annotations import argparse import sys from pathlib import Path from typing import Optional import pandas as pd from openpyxl import load_workbook from openpyxl.utils import get_column_letter from openpyxl.styles import Alignment, Font, Border, Side # --------------------------------------------------------------------------- # Konfiguration # --------------------------------------------------------------------------- HEADER_ORDER = [ "Titel", "Komponist", "Interpret", "Sender", "Sendedatum", "Sendedauer", "Sendezeit", "ISWC", "ISRC", "Label", "Albumtitel", "Release Date", "Lyricists", "Creators", "UPC", "ACRID", ] SOURCE_MAP = { "Titel": "Title", "Komponist": "Composers", "Interpret": "Artist", "Sender": "Stream Name", "ISWC": "ISWC", "ISRC": "ISRC", "Label": "Label", "Albumtitel": "Album", "UPC": "UPC", "ACRID": "ACRID", } RIGHT_ALIGN_COLS = {"Sendedatum", "Sendedauer", "Sendezeit"} possible_cols = [ "Timestamp(UTC+01:00)", "Timestamp(UTC+02:00)", ] # --------------------------------------------------------------------------- # Hilfsfunktionen # --------------------------------------------------------------------------- def _fmt_duration(seconds: Optional[float | int]) -> str: """Wandelt Sekunden (float|int|NaN) in **HH:MM:SS** um. Excel interpretiert dann korrekt. Fuer Kurzzeiten 00:MM:SS""" if seconds is None or pd.isna(seconds): seconds = 0 total = int(round(float(seconds))) hours, rem = divmod(total, 3600) mins, secs = divmod(rem, 60) return f"{hours:02d}:{mins:02d}:{secs:02d}" def _build_dataframe(src: Path) -> pd.DataFrame: try: df_orig = pd.read_excel(src) except FileNotFoundError: raise FileNotFoundError(f"Eingabedatei nicht gefunden: {src}") df_new = pd.DataFrame() # Einfaches Mapping for tgt, src_col in SOURCE_MAP.items(): df_new[tgt] = df_orig.get(src_col, "") # Datum/Zeiti ts_col = next(col for col in possible_cols if col in df_orig.columns) ts = pd.to_datetime( pd.Series(df_orig[ts_col]), errors="coerce" ) df_new["Sendedatum"] = ts.dt.strftime("%Y%m%d") df_new["Sendezeit"] = ts.dt.strftime("%H:%M:%S") # Dauer in HH:MM:SS df_new["Sendedauer"] = df_orig.get("Played Duration", 0).apply(_fmt_duration) # Release Date rel = pd.to_datetime(df_orig.get("Release Date"), errors="coerce") df_new["Release Date"] = rel.dt.strftime("%Y%m%d") # Leerspalten df_new["Lyricists"] = "" df_new["Creators"] = "" # Richtige Reihenfolge df_new = df_new[HEADER_ORDER] return df_new # --------------------------------------------------------------------------- # Excel Nachformatierung # --------------------------------------------------------------------------- def _autofit_and_align(xlsx: Path) -> None: wb = load_workbook(xlsx) ws = wb.active # Spaltenbreiten for col_idx, header_cell in enumerate(ws[1], 1): letter = get_column_letter(col_idx) max_len = len(str(header_cell.value)) if header_cell.value else 0 for cell in ws[letter][1:]: # skip header if cell.value is not None: max_len = 40 ws.column_dimensions[letter].width = max_len + 2 # Ausrichtung right_cols_idx = {idx for idx, cell in enumerate(ws[1], 1) if cell.value in RIGHT_ALIGN_COLS} for row in ws.iter_rows(min_row=2): for cell in row: cell.alignment = Alignment(horizontal="right" if cell.column in right_cols_idx else "left") # Header fett, links, ohne Rahmen no_border = Border(left=Side(border_style=None), right=Side(border_style=None), top=Side(border_style=None), bottom=Side(border_style=None)) for cell in ws[1]: cell.alignment = Alignment(horizontal="left") #cell.font = Font(bold=True) cell.border = no_border wb.save(xlsx) # --------------------------------------------------------------------------- # Pipeline # --------------------------------------------------------------------------- def format_report(input_path: Path, output_path: Path) -> None: df = _build_dataframe(input_path) df.to_excel(output_path, index=False) _autofit_and_align(output_path) print(f"Formatiertes Reporting gespeichert_{output_path}") # --------------------------------------------------------------------------- # CLI Entry # --------------------------------------------------------------------------- if __name__ == "__main__": parser = argparse.ArgumentParser(description="Format ACRCloud XLSX fuer SUISA") parser.add_argument("input", help="Pfad zur Original XLSX (ohne/mit .xlsx)") parser.add_argument("-o", "--output", help="Pfad der ZielXLSX") args = parser.parse_args() in_path = Path(args.input) if not in_path.exists() and in_path.suffix == "": alt = in_path.with_suffix(".xlsx") if alt.exists(): in_path = alt else: sys.exit(f"Datei nicht gefunden: {in_path} (auch nicht {alt})") elif not in_path.exists(): sys.exit(f"Datei nicht gefunden: {in_path}") out_path = Path(args.output) if args.output else in_path.with_name(in_path.stem + "_formatiert.xlsx") format_report(in_path, out_path)