#!/usr/bin/env python3
from __future__ import annotations

import json
import sys
from datetime import datetime, timezone
from pathlib import Path

import openpyxl
from openpyxl.utils import get_column_letter


def clean_value(value):
    if value is None:
        return None
    if hasattr(value, 'isoformat'):
        return value.isoformat()
    return value


def main() -> int:
    project_root = Path(__file__).resolve().parents[1]
    source = project_root / 'source' / '万物有灵项目利润分配表2026_原始参考.xlsx'
    output = project_root / 'data' / 'workbook-data.json'
    output.parent.mkdir(parents=True, exist_ok=True)

    wb_formula = openpyxl.load_workbook(source, data_only=False, read_only=False)
    wb_values = openpyxl.load_workbook(source, data_only=True, read_only=False)

    sheets = []
    total_cells = 0
    total_formulas = 0

    for ws_formula in wb_formula.worksheets:
        ws_values = wb_values[ws_formula.title]
        cells = []
        used_rows = set()
        used_cols = set()
        formula_count = 0

        for row in ws_formula.iter_rows():
            for cell in row:
                value = clean_value(cell.value)
                if value is None:
                    continue
                cached = clean_value(ws_values[cell.coordinate].value)
                is_formula = isinstance(value, str) and value.startswith('=')
                if is_formula:
                    formula_count += 1
                used_rows.add(cell.row)
                used_cols.add(cell.column)
                cells.append({
                    'address': cell.coordinate,
                    'row': cell.row,
                    'column': cell.column,
                    'columnLetter': get_column_letter(cell.column),
                    'value': value,
                    'displayValue': cached if cached is not None else value,
                    'isFormula': is_formula,
                    'numberFormat': cell.number_format,
                })

        total_cells += len(cells)
        total_formulas += formula_count
        sheets.append({
            'name': ws_formula.title,
            'maxRow': max(used_rows) if used_rows else 0,
            'maxColumn': max(used_cols) if used_cols else 0,
            'minRow': min(used_rows) if used_rows else 0,
            'minColumn': min(used_cols) if used_cols else 0,
            'usedRows': sorted(used_rows),
            'usedColumns': [{'index': c, 'letter': get_column_letter(c)} for c in sorted(used_cols)],
            'nonEmptyCells': len(cells),
            'formulaCells': formula_count,
            'cells': cells,
        })

    data = {
        'sourceFile': source.name,
        'extractedAt': datetime.now(timezone.utc).isoformat(),
        'summary': {
            'totalSheets': len(sheets),
            'totalNonEmptyCells': total_cells,
            'totalFormulaCells': total_formulas,
        },
        'sheets': sheets,
    }

    output.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding='utf-8')
    print(json.dumps({
        'output': str(output),
        'totalSheets': len(sheets),
        'totalNonEmptyCells': total_cells,
        'totalFormulaCells': total_formulas,
        'sheetNames': [s['name'] for s in sheets],
    }, ensure_ascii=False, indent=2))
    return 0


if __name__ == '__main__':
    raise SystemExit(main())
