from pathlib import Path
import json, traceback
import openpyxl, xlrd
base=Path('/Users/bot1/Volumes/root_for_ai/AI工作区/良渚文化_月报_2026年5月_20260601_1123/source')

def cell_str(x):
    if x is None: return None
    s=str(x)
    return s[:80]

def read_preview(path):
    sheets=[]
    if path.suffix.lower()=='.xlsx':
        wb=openpyxl.load_workbook(path, data_only=True, read_only=True)
        names=wb.sheetnames
        for sh in names[:3]:
            ws=wb[sh]
            rows=[]
            for i,row in enumerate(ws.iter_rows(min_row=1, max_row=12, values_only=True), start=1):
                vals=[cell_str(v) for v in row[:15]]
                # trim trailing None
                while vals and vals[-1] is None: vals.pop()
                rows.append({'row':i,'nonempty':sum(1 for v in vals if v not in [None,'']),'values':vals})
            sheets.append({'sheet':sh,'preview':rows,'max_row':ws.max_row,'max_col':ws.max_column})
        wb.close()
    else:
        book=xlrd.open_workbook(path)
        names=book.sheet_names()
        for sh in names[:3]:
            ws=book.sheet_by_name(sh)
            rows=[]
            for i in range(min(12, ws.nrows)):
                vals=[cell_str(ws.cell_value(i,j)) for j in range(min(15, ws.ncols))]
                while vals and vals[-1] in [None,'']:
                    vals.pop()
                rows.append({'row':i+1,'nonempty':sum(1 for v in vals if v not in [None,'']),'values':vals})
            sheets.append({'sheet':sh,'preview':rows,'max_row':ws.nrows,'max_col':ws.ncols})
    return names,sheets
res=[]
for p in sorted(base.iterdir()):
    if p.suffix.lower() not in ['.xls','.xlsx']: continue
    item={'file':p.name,'suffix':p.suffix,'size':p.stat().st_size}
    try:
        names,sheets=read_preview(p)
        item['sheet_names']=names; item['sheets']=sheets
    except Exception as e:
        item['error']=repr(e); item['trace']=traceback.format_exc()[-1000:]
    res.append(item)
out=Path('/Users/bot1/Volumes/root_for_ai/AI工作区/良渚文化_月报_2026年5月_20260601_1123/work/source_inventory.json')
out.write_text(json.dumps(res,ensure_ascii=False,indent=2),encoding='utf-8')
for it in res:
    print('\nFILE:',it['file'])
    print('sheets:',it.get('sheet_names'), 'error:', it.get('error'))
    for s in it.get('sheets',[])[:1]:
        print(' sheet',s.get('sheet'),'rows/cols',s.get('max_row'),s.get('max_col'))
        for r in s.get('preview',[])[:8]:
            print('  ',r['row'], r['values'][:12])
print('\nWROTE',out)
