from pathlib import Path
from openpyxl import load_workbook, Workbook
from openpyxl.cell import WriteOnlyCell
from openpyxl.styles import PatternFill, Font, Alignment
from decimal import Decimal
import sys

src=Path('/Users/qianliyun/.hermes/profiles/feishu8/cache/documents/doc_fa0466c57339_1-5全量发票查询导出结果.xlsx')
out=Path('/Users/qianliyun/Documents/aiwork/finance/1-5全量发票查询导出结果_已标记飞书提交发票.xlsx')
out.parent.mkdir(parents=True, exist_ok=True)
normal={
'26352000001089216766','26952000001813060471','26342000001355563636','26332000003696515536','26952000001829122936','26952000001831791901','26332000003726574261','26312000002785821421','26312000002786802511','26332000003778293946','26442000005021228116','26952000001877577676','26342000001407857386','26952000001883108221','26952000001882921516','26442000005106306856','26342000001432786741','26442000005152214611','26332000003924451051','26442000005213506156'}
dup={'26312000002925338191'}
submitted=normal.union(dup)

def log(*args):
    print(*args, flush=True)

green=PatternFill('solid', fgColor='C6EFCE')
red=PatternFill('solid', fgColor='FFC7CE')
blue=PatternFill('solid', fgColor='1F4E78')
yellow=PatternFill('solid', fgColor='FFF2CC')
whitebold=Font(color='FFFFFF', bold=True)

def styled_cell(ws, value, fill=None, header=False, numfmt=None):
    c=WriteOnlyCell(ws, value=value)
    if fill: c.fill=fill
    if header:
        c.font=whitebold
        c.alignment=Alignment(horizontal='center', vertical='center', wrap_text=True)
    else:
        c.alignment=Alignment(vertical='top', wrap_text=True)
    if numfmt: c.number_format=numfmt
    return c

log('loading source')
src_wb=load_workbook(src, read_only=True, data_only=True)
out_wb=Workbook(write_only=True)
base_records={}
marked_rows=0
log('copying sheets', src_wb.sheetnames)
for src_ws in src_wb.worksheets:
    log('sheet', src_ws.title)
    ws=out_wb.create_sheet(src_ws.title)
    it=src_ws.iter_rows(values_only=True)
    try:
        headers=list(next(it))
    except StopIteration:
        continue
    colmap={h:i for i,h in enumerate(headers) if h is not None}
    num_idxs=[colmap[x] for x in ('发票号码','数电发票号码') if x in colmap]
    amt_idx=colmap.get('价税合计')
    seller_idx=colmap.get('销方名称')
    date_idx=colmap.get('开票日期')
    new_headers=headers+['飞书审批提交标记','重复提交核查'] if num_idxs else headers
    ws.append([styled_cell(ws,h,blue,True) for h in new_headers])
    count=0
    for ridx,row in enumerate(it, start=2):
        row=list(row)
        hit=[]
        if num_idxs:
            nums=[str(row[i]).strip() for i in num_idxs if i < len(row) and row[i] not in (None,'')]
            hit=[x for x in nums if x in submitted]
        if hit:
            inv=hit[0]
            isdup=inv in dup
            fill=red if isdup else green
            row2=row+['已提交','疑似重复提交' if isdup else '未发现重复']
            ws.append([styled_cell(ws,v,fill) for v in row2])
            marked_rows += 1
            if src_ws.title=='发票基础信息':
                base_records[inv]={
                    '发票号码':inv,
                    '销方名称':row[seller_idx] if seller_idx is not None and seller_idx < len(row) else '',
                    '开票日期':row[date_idx] if date_idx is not None and date_idx < len(row) else '',
                    '价税合计':row[amt_idx] if amt_idx is not None and amt_idx < len(row) else 0,
                    '重复':isdup,
                }
        else:
            row2=row+['',''] if num_idxs else row
            ws.append([styled_cell(ws,v) for v in row2])
        count += 1
    log('rows copied', count)

log('creating stats')
stat=out_wb.create_sheet('飞书提交统计')
headers=['发票号码','销方名称','开票日期','价税合计','重复提交核查']
stat.append([styled_cell(stat,h,blue,True) for h in headers])
unique_total=Decimal('0')
dup_total=Decimal('0')
for inv,rec in sorted(base_records.items(), key=lambda kv: str(kv[1]['开票日期'])):
    amt=Decimal(str(rec['价税合计'] or 0)).quantize(Decimal('0.01'))
    unique_total += amt
    if rec['重复']:
        dup_total += amt
    fill=red if rec['重复'] else green
    vals=[inv,rec['销方名称'],rec['开票日期'],float(amt),'疑似重复提交' if rec['重复'] else '未发现重复']
    stat.append([styled_cell(stat,v,fill,numfmt='#,##0.00' if i==3 else None) for i,v in enumerate(vals)])
stat.append([])
summary=[('唯一发票张数',len(base_records)),('唯一发票合计金额',float(unique_total)),('疑似重复提交发票张数',sum(1 for r in base_records.values() if r['重复'])),('疑似重复提交金额',float(dup_total)),('如按提交次数重复计入合计',float(unique_total+dup_total)),('说明','绿色=已提交；红色=同一发票PDF近三天出现2次，疑似重复提交；合计按发票基础信息去重统计，避免多商品明细重复计数。')]
for k,v in summary:
    stat.append([styled_cell(stat,k,yellow), styled_cell(stat,v,yellow, numfmt='#,##0.00' if isinstance(v,float) else None)])
log('saving', out)
out_wb.save(out)
log('DONE', out, out.stat().st_size)
log('匹配唯一发票', len(base_records))
log('唯一发票合计', unique_total)
log('疑似重复提交金额', dup_total)
log('按提交次数重复计入金额', unique_total + dup_total)
log('标记行数', marked_rows)
