#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from pathlib import Path
import hashlib, json, zipfile, os, time
from PIL import Image, ImageDraw, ImageFont

ROOT=Path('/Users/bot1/Volumes/root_for_ai/AI工作区/国博_图片素材_馆藏八大件_20260612_1857')
IMG_ROOT=ROOT/'images'
DOCS=ROOT/'docs'; SHEETS=ROOT/'contact_sheets'; DEL=ROOT/'deliverables'
for p in [DOCS,SHEETS,DEL]: p.mkdir(parents=True, exist_ok=True)
ITEMS=['01_陶鹰鼎','02_后母戊鼎','03_青铜冰鉴','04_击鼓说唱俑','05_青瓷莲花尊','06_载乐骆驼','07_绿釉鸱吻','08_海晏河清尊']

def font(size):
    for fp in ['/System/Library/Fonts/PingFang.ttc','/System/Library/Fonts/STHeiti Light.ttc','/System/Library/Fonts/Supplemental/Arial Unicode.ttf']:
        try: return ImageFont.truetype(fp,size)
        except Exception: pass
    return None
F_TITLE=font(18); F=font(12); FS=font(10)

def scan_records():
    recs=[]
    for item in ITEMS:
        item_dir=IMG_ROOT/item
        if not item_dir.exists(): continue
        for f in sorted(item_dir.iterdir()):
            if not (f.is_file() and f.suffix.lower() in ['.jpg','.jpeg','.png','.webp']): continue
            try:
                im=Image.open(f); w,h=im.size
            except Exception:
                continue
            data=f.read_bytes()
            recs.append({'item_id':item,'item':item.split('_',1)[1],'file':str(f.relative_to(ROOT)),'width':w,'height':h,'bytes':len(data),'sha256':hashlib.sha256(data).hexdigest()})
    return recs

def make_sheet(item, files, out):
    if not files: return
    tw,th,lh=230,180,54; cols=4
    rows=(len(files)+cols-1)//cols
    W,H=cols*tw,rows*(th+lh)+58
    sheet=Image.new('RGB',(W,H),'white'); d=ImageDraw.Draw(sheet)
    d.text((10,10),f'{item.split("_",1)[1]} 缩略图总览（{len(files)}张）',fill='black',font=F_TITLE)
    for idx,f in enumerate(files):
        x=(idx%cols)*tw; y=48+(idx//cols)*(th+lh)
        try:
            im=Image.open(f).convert('RGB'); w,h=im.size
            im.thumbnail((tw-12,th-12),Image.LANCZOS)
            sheet.paste(im,(x+(tw-im.width)//2,y+(th-im.height)//2))
            d.text((x+6,y+th+2),f'{idx+1:02d}  {w}×{h}',fill='black',font=F)
            d.text((x+6,y+th+22),f.name[:30],fill=(70,70,70),font=FS)
        except Exception as e:
            d.text((x+6,y+10),f'ERR {e}',fill='red',font=FS)
    sheet.save(out,quality=92)

def make_master(recs, out):
    # first 8 per item, for quick confirmation
    chosen=[]
    for item in ITEMS:
        item_recs=[r for r in recs if r['item_id']==item]
        item_recs=sorted(item_recs,key=lambda r:max(r['width'],r['height']),reverse=True)[:8]
        chosen.extend(item_recs)
    tw,th,lh=180,145,44; cols=8
    rows=(len(chosen)+cols-1)//cols
    W,H=cols*tw,rows*(th+lh)+70
    sheet=Image.new('RGB',(W,H),'white'); d=ImageDraw.Draw(sheet)
    d.text((10,10),'中国国家博物馆馆藏八大件：图片素材初筛总览（每件优先展示高清图）',fill='black',font=F_TITLE)
    current=None
    for idx,r in enumerate(chosen):
        x=(idx%cols)*tw; y=58+(idx//cols)*(th+lh)
        f=ROOT/r['file']
        try:
            im=Image.open(f).convert('RGB'); im.thumbnail((tw-10,th-10),Image.LANCZOS)
            sheet.paste(im,(x+(tw-im.width)//2,y+(th-im.height)//2))
        except Exception: pass
        d.text((x+4,y+th+1),f"{r['item']} {r['width']}×{r['height']}",fill='black',font=FS)
        d.text((x+4,y+th+18),Path(r['file']).name[:24],fill=(70,70,70),font=FS)
    sheet.save(out,quality=92)

recs=scan_records()
# dedupe report
seen={}; dup=[]
for r in recs:
    if r['sha256'] in seen: dup.append((r['file'],seen[r['sha256']]))
    else: seen[r['sha256']]=r['file']
# sheets
for item in ITEMS:
    files=sorted((IMG_ROOT/item).glob('*')) if (IMG_ROOT/item).exists() else []
    files=[f for f in files if f.suffix.lower() in ['.jpg','.jpeg','.png','.webp']]
    make_sheet(item, files, SHEETS/f'{item}_缩略图总览_全量.jpg')
make_master(recs, SHEETS/'00_八大件初筛总览_每件8张.jpg')
# docs
with (DOCS/'图片尺寸清单.tsv').open('w',encoding='utf-8') as f:
    f.write('item\tfile\twidth\theight\tbytes\tsha256\n')
    for r in recs:
        f.write('\t'.join(str(r[k]) for k in ['item','file','width','height','bytes','sha256'])+'\n')
stats=[]
for item in ITEMS:
    rs=[r for r in recs if r['item_id']==item]
    stats.append((item.split('_',1)[1],len(rs),sum(1 for r in rs if max(r['width'],r['height'])>=1500),max([max(r['width'],r['height']) for r in rs] or [0])))
readme='''# 中国国家博物馆馆藏八大件图片素材初筛包\n\n本包按文物分组搜集公开可访问图片素材，优先保留高清、多角度、无明显平台 logo/水印的候选图；另含缩略图总览，便于快速确认文物形象是否准确。\n\n## 文物范围\n1. 陶鹰鼎\n2. 后母戊鼎\n3. 青铜冰鉴\n4. 击鼓说唱俑\n5. 青瓷莲花尊\n6. 载乐骆驼\n7. 绿釉鸱吻\n8. 海晏河清尊\n\n## 目录说明\n- `images/`：按文物分组的图片素材。\n- `contact_sheets/`：每件文物缩略图总览，以及八大件总览。\n- `docs/图片尺寸清单.tsv`：图片尺寸、文件大小、SHA256。\n- `sources/`：检索候选来源 JSON，含候选图片 URL / 来源页 / 标题等机器记录，供回溯。\n\n## 初筛统计\n\n| 文物 | 图片数 | 长边≥1500px | 最大长边 |\n|---|---:|---:|---:|\n'''
for name,c,hi,mx in stats:
    readme += f'| {name} | {c} | {hi} | {mx}px |\n'
readme += '''\n## 使用提醒\n- 这些图片来自公开网页/官方页面/图片搜索候选，适合作为内部设计参考素材；若用于商业量产、出版、广告或大面积传播，建议进一步向中国国家博物馆或原图权利方确认授权。\n- `绿釉鸱吻`、`海晏河清尊`公开高清图相对少，已保留多角度候选；如需严格定稿，请优先看缩略图总览确认是否为目标文物。\n'''
(ROOT/'README.md').write_text(readme,encoding='utf-8')
(DOCS/'初筛统计.json').write_text(json.dumps({'total':len(recs),'stats':stats,'duplicates':dup},ensure_ascii=False,indent=2),encoding='utf-8')
# zip selected clean package
zip_path=DEL/'国博馆藏八大件_高清图片素材初筛包.zip'
if zip_path.exists(): zip_path.unlink()
with zipfile.ZipFile(zip_path,'w',compression=zipfile.ZIP_DEFLATED,compresslevel=6) as z:
    for rel_root in ['README.md','images','contact_sheets','docs','sources']:
        p=ROOT/rel_root
        if p.is_file(): z.write(p,p.name)
        elif p.is_dir():
            for f in p.rglob('*'):
                if f.is_file(): z.write(f,f.relative_to(ROOT))
print(json.dumps({'zip':str(zip_path),'total_images':len(recs),'stats':stats,'master_sheet':str(SHEETS/'00_八大件初筛总览_每件8张.jpg'),'zip_size':zip_path.stat().st_size},ensure_ascii=False,indent=2))
