import openpyxl, zipfile, os, json, math
out='/Users/qianliyun/Documents/aiwork/finance/万物有灵项目利润分配表_智能简化版_2026.5.14.xlsx'
assert os.path.exists(out), out
# zip integrity
with zipfile.ZipFile(out,'r') as z:
    bad=z.testzip()
    assert bad is None, bad
wb=openpyxl.load_workbook(out, data_only=False)
info={'file':out,'size_bytes':os.path.getsize(out),'sheets':wb.sheetnames,'sheet_count':len(wb.sheetnames)}
# key table sizes
for s in ['录入_项目分配','录入_公共费用','录入_对象收支','录入_账户流水','总览','校验中心']:
    ws=wb[s]
    info[s]={'dimensions':ws.dimensions,'tables':list(ws.tables.keys())}
# Check important extracted input values in project table by reading table rows
ws=wb['录入_项目分配']
headers=[ws.cell(1,c).value for c in range(1,ws.max_column+1)]
rows=[]
for r in range(2,ws.max_row+1):
    rec={headers[c-1]:ws.cell(r,c).value for c in range(1,ws.max_column+1)}
    if rec.get('项目/款项'):
        rows.append(rec)
info['project_nonblank_rows']=len(rows)
info['project_items']=[{k:rec.get(k) for k in ['月份','款项类型','项目/款项','收入金额','苏薇','蒋家宁','朱潇潇/木雨','公共可支配','自研产品组']} for rec in rows[:8]]
# Count formulas
formula_count=0
for ws in wb.worksheets:
    for row in ws.iter_rows():
        for cell in row:
            if isinstance(cell.value,str) and cell.value.startswith('='):
                formula_count+=1
info['formula_count']=formula_count
print(json.dumps(info, ensure_ascii=False, indent=2, default=str))
