import json, sys, os
proj='/Users/bot1/Volumes/root_for_ai/AI工作区/良渚_IP授权PDF直接修改_20260611_1117'
data=json.load(open(f'{proj}/work/ocr_before.json',encoding='utf-8'))
with open(f'{proj}/work/ocr_text_by_page.txt','w',encoding='utf-8') as f:
    for p in data['pages']:
        f.write(f"\n\n===== PAGE {p['page']} =====\n")
        # Vision y origin bottom; sort top-to-bottom then left-to-right
        items=sorted(p['items'], key=lambda it:(-it['y'], it['x']))
        for it in items:
            f.write(it['text'].replace('\n',' ')+'\n')
print(f'{proj}/work/ocr_text_by_page.txt')
