#!/usr/bin/env python3
"""Export existing ledger Excel/CSV data into normalized CSV files for PostgreSQL import."""
from __future__ import annotations

import csv
import hashlib
import json
import re
import shutil
import uuid
from collections import defaultdict
from datetime import datetime, date
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

import openpyxl

ROOT = Path('/Users/bot1/Volumes/root_for_ai/AI工作区')
PROJECT_DIR = ROOT / '通用_货品台账系统_数据库表结构设计_20260612_0134'
OUT = PROJECT_DIR / 'data' / 'normalized_v1'

GUOBO = ROOT / '国博_合同台账_供应商采购补货梳理_20260610_2117'
SAMPLE = ROOT / '通用_公司产品样品库_秘色破茧云汉寻真_20260611_1035'
SIGNOFF = ROOT / '良渚_云汉寻真_3D悬浮手机贴西泠发货签收_20260611_0919'

FILES = {
    'downstream': GUOBO / 'deliverables/下游供应商采购表_四川极深奇乐_博航手信.xlsx',
    'upstream': GUOBO / 'deliverables/国博上游采购表_含渠道数量.xlsx',
    'recon': GUOBO / 'work/extraction_tables/供应商采购_vs_国博需求核对_提取.csv',
    'sample': SAMPLE / 'deliverables/公司产品样品库_秘色破茧_云汉寻真_20260611.xlsx',
    'signoff': SIGNOFF / 'deliverables/云汉寻真_3D悬浮手机贴_西泠印社签收表_20260611.xlsx',
}

TABLE_COLUMNS: Dict[str, List[str]] = {
    'parties': ['id','party_code','display_name','legal_name','short_name','party_type','contact_masked','notes','created_at','updated_at'],
    'projects': ['id','project_code','name','ip_name','owner_party_id','status','notes','created_at','updated_at'],
    'products': ['id','sku_code','standard_name','raw_name_default','category','spec','default_unit','barcode_69','guobo_code','status','notes','created_at','updated_at'],
    'source_files': ['id','original_filename','stored_path','source_cache_path','sha256','byte_size','mime_type','material_type','received_at','extraction_status','sensitivity_level','notes','created_at','updated_at'],
    'product_aliases': ['id','product_id','alias_name','alias_type','source_file_id','confidence','verification_status','notes','created_at','updated_at'],
    'business_documents': ['id','document_code','document_type','source_file_id','project_id','own_party_id','counterparty_id','document_no','document_date','title','status','raw_payload','notes','created_at','updated_at'],
    'purchase_orders': ['id','order_code','document_id','project_id','order_side','order_type','buyer_party_id','seller_party_id','order_no','order_date','settlement_method','expected_arrival','status','raw_payload','notes','created_at','updated_at'],
    'order_lines': ['id','order_id','line_no','product_id','product_name_raw','product_name_standard_draft','quantity','unit','unit_price','amount','material','evidence_note','verification_status','raw_payload','notes','created_at','updated_at'],
    'channel_allocations': ['id','order_line_id','channel_party_id','raw_channel','standard_channel','original_quantity','adjustment_quantity','adjustment_reason','evidence_note','notes','created_at','updated_at'],
    'inventory_accounts': ['id','account_code','owner_party_id','project_id','account_type','warehouse_party_id','status','notes','created_at','updated_at'],
    'stock_items': ['id','stock_item_code','inventory_account_id','product_id','display_name','spec','unit','unit_cost_tax_included','source_policy','status','notes','created_at','updated_at'],
    'stock_movements': ['id','movement_code','stock_item_id','movement_date','direction','movement_type','quantity','unit','handler_party_id','counterparty_id','purpose','returnable','unit_cost_tax_included','source_document_id','evidence_file_id','notes','created_at','updated_at'],
    'shipment_signoffs': ['id','signoff_code','project_id','shipper_party_id','receiver_party_id','ship_date','purpose','status','source_document_id','notes','created_at','updated_at'],
    'shipment_signoff_lines': ['id','signoff_id','line_no','product_id','product_name','quantity','unit','stock_movement_id','sign_status','notes','created_at','updated_at'],
    'reconciliation_runs': ['id','run_code','project_id','reconciliation_type','policy_version','input_scope','run_status','run_at','notes','created_at','updated_at'],
    'reconciliation_lines': ['id','run_id','product_id','product_name_standard_draft','supplier_purchase_quantity_total','upstream_demand_quantity_total','formal_order_quantity_total','supplement_demand_quantity_total','difference_supplier_minus_demand','self_stock_candidate_qty','shortage_qty','difference_status','related_supplier_sources','related_upstream_sources','raw_payload','notes','created_at','updated_at'],
    'import_batches': ['id','batch_code','source_file_id','import_type','profile_name','status','started_at','finished_at','summary','notes','created_at','updated_at'],
    'validation_issues': ['id','issue_type','severity','related_table','related_id','field_name','message','status','source_file_id','created_at','resolved_at'],
    'agent_write_audit': ['id','agent_profile','tool_name','operation','target_table','target_id','request_payload','result_status','result_message','created_at'],
}

now = datetime.now().astimezone().isoformat(timespec='seconds')

rows: Dict[str, List[Dict[str, Any]]] = {t: [] for t in TABLE_COLUMNS}

party_by_name: Dict[Tuple[str,str], str] = {}
project_by_name: Dict[str, str] = {}
product_by_name: Dict[str, str] = {}
source_by_path: Dict[str, str] = {}
doc_by_key: Dict[str, str] = {}
order_by_key: Dict[str, str] = {}
stock_by_code: Dict[str, str] = {}
movement_by_code: Dict[str, str] = {}
downstream_line_by_key: Dict[Tuple[str, str, str], str] = {}


def uid() -> str:
    return str(uuid.uuid4())


def clean(v: Any) -> str:
    if v is None:
        return ''
    if isinstance(v, datetime):
        return v.date().isoformat()
    if isinstance(v, date):
        return v.isoformat()
    s = str(v).strip()
    if s in {'None', 'nan'}:
        return ''
    return s


def num(v: Any) -> str:
    s = clean(v).replace(',', '')
    if not s:
        return '0'
    try:
        return str(float(s)).rstrip('0').rstrip('.')
    except Exception:
        m = re.search(r'-?\d+(?:\.\d+)?', s)
        return m.group(0) if m else '0'


def jsons(obj: Any) -> str:
    return json.dumps(obj, ensure_ascii=False, default=str)


def code(prefix: str, n: int) -> str:
    return f'{prefix}-{n:04d}'


def sha256_file(p: Path) -> str:
    if not p.exists() or not p.is_file():
        return ''
    h = hashlib.sha256()
    with p.open('rb') as f:
        for chunk in iter(lambda: f.read(1024 * 1024), b''):
            h.update(chunk)
    return h.hexdigest()


def add_party(name: str, party_type: str = 'other', notes: str = '') -> Optional[str]:
    name = clean(name)
    if not name or name in {'待确认', '空/原单未填', '原单未填'}:
        return None
    key = (name, party_type)
    if key in party_by_name:
        return party_by_name[key]
    pid = uid()
    party_by_name[key] = pid
    rows['parties'].append({
        'id': pid,
        'party_code': code('PT', len(rows['parties']) + 1),
        'display_name': name,
        'legal_name': name if len(name) > 4 else '',
        'short_name': name if len(name) <= 8 else '',
        'party_type': party_type,
        'contact_masked': '',
        'notes': notes,
        'created_at': now,
        'updated_at': now,
    })
    return pid


def add_project(name: str, ip_name: str, owner_party_id: Optional[str], notes: str = '') -> str:
    if name in project_by_name:
        return project_by_name[name]
    pid = uid()
    project_by_name[name] = pid
    rows['projects'].append({
        'id': pid,
        'project_code': code('PRJ', len(rows['projects']) + 1),
        'name': name,
        'ip_name': ip_name,
        'owner_party_id': owner_party_id or '',
        'status': 'active',
        'notes': notes,
        'created_at': now,
        'updated_at': now,
    })
    return pid


def add_product(std_name: str, raw_name: str = '', unit: str = '', spec: str = '', notes: str = '') -> Optional[str]:
    std_name = clean(std_name) or clean(raw_name)
    if not std_name:
        return None
    if std_name in product_by_name:
        existing_id = product_by_name[std_name]
        if raw_name and clean(raw_name) != std_name:
            add_alias(existing_id, raw_name, 'raw_name')
        return existing_id
    pid = uid()
    product_by_name[std_name] = pid
    rows['products'].append({
        'id': pid,
        'sku_code': code('SKU', len(rows['products']) + 1),
        'standard_name': std_name,
        'raw_name_default': clean(raw_name) or std_name,
        'category': '',
        'spec': clean(spec),
        'default_unit': clean(unit),
        'barcode_69': '',
        'guobo_code': '',
        'status': 'active',
        'notes': notes,
        'created_at': now,
        'updated_at': now,
    })
    if raw_name and clean(raw_name) != std_name:
        add_alias(pid, raw_name, 'raw_name')
    return pid


def add_alias(product_id: str, alias: str, alias_type: str = 'raw_name', source_file_id: str = '') -> None:
    alias = clean(alias)
    if not alias:
        return
    # lightweight duplicate guard
    for r in rows['product_aliases']:
        if r['product_id'] == product_id and r['alias_name'] == alias and r['alias_type'] == alias_type:
            return
    rows['product_aliases'].append({
        'id': uid(),
        'product_id': product_id,
        'alias_name': alias,
        'alias_type': alias_type,
        'source_file_id': source_file_id,
        'confidence': '0.8',
        'verification_status': 'pending' if alias_type == 'raw_name' else 'confirmed',
        'notes': '',
        'created_at': now,
        'updated_at': now,
    })


def add_source_file(path: Path, material_type: str, notes: str = '', received_at: str = '') -> Optional[str]:
    path = Path(path)
    key = str(path)
    if key in source_by_path:
        return source_by_path[key]
    sid = uid()
    source_by_path[key] = sid
    exists = path.exists() and path.is_file()
    rows['source_files'].append({
        'id': sid,
        'original_filename': path.name,
        'stored_path': str(path),
        'source_cache_path': '',
        'sha256': sha256_file(path) if exists else '',
        'byte_size': path.stat().st_size if exists else '',
        'mime_type': '',
        'material_type': material_type,
        'received_at': received_at or now,
        'extraction_status': 'parsed' if exists else 'needs_review',
        'sensitivity_level': 'sensitive' if material_type in {'contract','purchase_order'} else 'internal',
        'notes': notes,
        'created_at': now,
        'updated_at': now,
    })
    return sid


def load_sheet(path: Path, sheet: str) -> List[Dict[str, Any]]:
    wb = openpyxl.load_workbook(path, data_only=True, read_only=True)
    ws = wb[sheet]
    values = list(ws.iter_rows(values_only=True))
    headers = [clean(x) for x in values[0]]
    out = []
    for row in values[1:]:
        d = {headers[i]: row[i] if i < len(row) else None for i in range(len(headers)) if headers[i]}
        if any(clean(v) for v in d.values()):
            out.append(d)
    return out


def load_csv(path: Path) -> List[Dict[str, Any]]:
    with path.open(encoding='utf-8-sig', errors='replace', newline='') as f:
        return list(csv.DictReader(f))

# baseline parties/projects
pt_mspj = add_party('秘色破茧', 'own_company')
pt_wwyl = add_party('万物有灵', 'own_company')
pt_yunhan = add_party('杭州云汉寻真文化创意有限公司', 'own_company')
pt_guobo = add_party('国博（北京）文化产业发展有限公司', 'customer')
pt_xiling = add_party('西泠印社', 'channel')
pt_qly = add_party('钱丽云', 'person')
pt_sample_wh = add_party('公司样品库', 'warehouse')

prj_guobo = add_project('国博项目', '国博', pt_mspj, '由国博上下游采购台账迁移')
prj_liangzhu = add_project('良渚项目手机贴', '良渚', pt_yunhan, '云汉寻真3D悬浮手机贴样品与签收')

# source files: project sources and key deliverables
for p in sorted((GUOBO / 'source/word_contracts').glob('*')):
    add_source_file(p, 'contract', '国博下游合同/采购合同源文件')
for p in sorted((GUOBO / 'source/upstream_guobo_orders').glob('*')):
    add_source_file(p, 'purchase_order', '国博/元浪上游采购订单源文件')
for p, mt, note in [
    (FILES['downstream'], 'ledger', '下游供应商采购表'),
    (FILES['upstream'], 'ledger', '国博上游采购表'),
    (FILES['sample'], 'ledger', '公司产品样品库'),
    (FILES['signoff'], 'shipment_signoff', '云汉寻真西泠签收表'),
]:
    add_source_file(p, mt, note)

# downstream purchase rows
for rec in load_sheet(FILES['downstream'], '下游供应商采购表'):
    supplier_name = clean(rec.get('下游公司'))
    own_name = clean(rec.get('我方签约主体'))
    doc_name = clean(rec.get('合同/订单文件'))
    product_raw = clean(rec.get('原始商品名称')) or clean(rec.get('商品名称'))
    product_std = clean(rec.get('商品名称')) or product_raw
    supplier_id = add_party(supplier_name, 'supplier')
    own_id = add_party(own_name, 'own_company') or pt_mspj
    product_id = add_product(product_std, product_raw, clean(rec.get('单位')), clean(rec.get('材质/工艺')))
    order_key = f'downstream::{doc_name}::{supplier_name}::{own_name}'
    if order_key not in order_by_key:
        source_id = ''
        # find matching source by filename starts/contains
        for path, sid in source_by_path.items():
            if doc_name and doc_name in Path(path).stem:
                source_id = sid; break
        doc_id = uid()
        doc_by_key[order_key] = doc_id
        rows['business_documents'].append({
            'id': doc_id,
            'document_code': code('DOC', len(rows['business_documents']) + 1),
            'document_type': 'supplier_contract',
            'source_file_id': source_id,
            'project_id': prj_guobo,
            'own_party_id': own_id or '',
            'counterparty_id': supplier_id or '',
            'document_no': '',
            'document_date': clean(rec.get('合同日期')),
            'title': doc_name,
            'status': 'confirmed',
            'raw_payload': jsons({'source_sheet': '下游供应商采购表', 'document_type': clean(rec.get('文件类型'))}),
            'notes': clean(rec.get('备注')),
            'created_at': now,
            'updated_at': now,
        })
        oid = uid()
        order_by_key[order_key] = oid
        rows['purchase_orders'].append({
            'id': oid,
            'order_code': code('PO', len(rows['purchase_orders']) + 1),
            'document_id': doc_id,
            'project_id': prj_guobo,
            'order_side': 'downstream_supplier_purchase',
            'order_type': 'contract_order',
            'buyer_party_id': own_id or '',
            'seller_party_id': supplier_id or '',
            'order_no': '',
            'order_date': clean(rec.get('合同日期')),
            'settlement_method': '',
            'expected_arrival': '',
            'status': 'confirmed',
            'raw_payload': jsons({'source_file_name': doc_name}),
            'notes': '',
            'created_at': now,
            'updated_at': now,
        })
    oid = order_by_key[order_key]
    line_no = sum(1 for r in rows['order_lines'] if r['order_id'] == oid) + 1
    line_id = uid()
    rows['order_lines'].append({
        'id': line_id,
        'order_id': oid,
        'line_no': line_no,
        'product_id': product_id or '',
        'product_name_raw': product_raw,
        'product_name_standard_draft': product_std,
        'quantity': num(rec.get('采购数量')),
        'unit': clean(rec.get('单位')),
        'unit_price': num(rec.get('采购单价')),
        'amount': num(rec.get('采购金额小计')),
        'material': clean(rec.get('材质/工艺')),
        'evidence_note': doc_name,
        'verification_status': 'confirmed',
        'raw_payload': jsons(rec),
        'notes': clean(rec.get('备注')),
        'created_at': now,
        'updated_at': now,
    })
    downstream_line_by_key[(doc_name, product_raw, product_std)] = line_id

# downstream supplier contract channel allocation rows, from initial extraction table.
# These are distinct from upstream 国博 channel demand allocations.
for rec in load_csv(GUOBO / 'work/extraction_tables/渠道分配明细_初提取.csv'):
    doc_name = clean(rec.get('contract_file'))
    product_raw = clean(rec.get('product_name_raw'))
    product_std = clean(rec.get('product_name_standard_draft')) or product_raw
    line_id = downstream_line_by_key.get((doc_name, product_raw, product_std))
    if not line_id:
        # Fallback by product and contract substring.
        for (d, raw, std), lid in downstream_line_by_key.items():
            if d == doc_name and (raw == product_raw or std == product_std):
                line_id = lid
                break
    if not line_id:
        rows['validation_issues'].append({'id':uid(),'issue_type':'other','severity':'warning','related_table':'channel_allocations','related_id':'','field_name':'order_line_id','message':f"下游渠道分配未匹配订单明细: {doc_name} / {product_raw}",'status':'open','source_file_id':'','created_at':now,'resolved_at':''})
        continue
    ch = clean(rec.get('channel'))
    ch_id = add_party(ch, 'channel')
    rows['channel_allocations'].append({
        'id': uid(),
        'order_line_id': line_id,
        'channel_party_id': ch_id or '',
        'raw_channel': ch,
        'standard_channel': ch,
        'original_quantity': num(rec.get('allocated_quantity')),
        'adjustment_quantity': '0',
        'adjustment_reason': '',
        'evidence_note': clean(rec.get('quantity_text')) or clean(rec.get('receiving_info')),
        'notes': '下游供应商采购合同渠道分配',
        'created_at': now,
        'updated_at': now,
    })

# upstream orders and channel allocations
channel_fields = ['馆内/线下', '天猫', '京东', '抖音']
for rec in load_sheet(FILES['upstream'], '国博上游采购表'):
    source_file = clean(rec.get('采购单文件'))
    product_raw = clean(rec.get('原始商品名称')) or clean(rec.get('商品名称'))
    product_std = clean(rec.get('商品名称')) or product_raw
    customer_id = add_party(clean(rec.get('上游公司/客户')) or '国博（北京）文化产业发展有限公司', 'customer') or pt_guobo
    seller_id = add_party(clean(rec.get('采购单显示供货方')), 'own_company') or pt_mspj
    product_id = add_product(product_std, product_raw, clean(rec.get('单位')))
    order_no = clean(rec.get('采购单编号'))
    order_key = f'upstream::{source_file}::{order_no}'
    if order_key not in order_by_key:
        source_id = ''
        for path, sid in source_by_path.items():
            if source_file and Path(path).name == source_file:
                source_id = sid; break
        doc_id = uid()
        doc_by_key[order_key] = doc_id
        rows['business_documents'].append({
            'id': doc_id,
            'document_code': code('DOC', len(rows['business_documents']) + 1),
            'document_type': 'upstream_purchase_order',
            'source_file_id': source_id,
            'project_id': prj_guobo,
            'own_party_id': seller_id or '',
            'counterparty_id': customer_id or '',
            'document_no': order_no,
            'document_date': clean(rec.get('采购单日期')),
            'title': source_file,
            'status': 'confirmed',
            'raw_payload': jsons({'source_sheet': '国博上游采购表'}),
            'notes': '',
            'created_at': now,
            'updated_at': now,
        })
        oid = uid()
        order_by_key[order_key] = oid
        rows['purchase_orders'].append({
            'id': oid,
            'order_code': code('PO', len(rows['purchase_orders']) + 1),
            'document_id': doc_id,
            'project_id': prj_guobo,
            'order_side': 'upstream_customer_order',
            'order_type': 'formal_order',
            'buyer_party_id': customer_id or '',
            'seller_party_id': seller_id or '',
            'order_no': order_no,
            'order_date': clean(rec.get('采购单日期')),
            'settlement_method': '',
            'expected_arrival': clean(rec.get('到货时间')),
            'status': 'confirmed',
            'raw_payload': jsons({'source_file_name': source_file}),
            'notes': clean(rec.get('备注')),
            'created_at': now,
            'updated_at': now,
        })
    oid = order_by_key[order_key]
    line_id = uid()
    line_no = sum(1 for r in rows['order_lines'] if r['order_id'] == oid) + 1
    rows['order_lines'].append({
        'id': line_id,
        'order_id': oid,
        'line_no': line_no,
        'product_id': product_id or '',
        'product_name_raw': product_raw,
        'product_name_standard_draft': product_std,
        'quantity': num(rec.get('单品采购总量')),
        'unit': clean(rec.get('单位')),
        'unit_price': num(rec.get('国博采购单价')),
        'amount': num(rec.get('国博采购金额小计')),
        'material': '',
        'evidence_note': f"{source_file} {order_no}",
        'verification_status': 'confirmed' if clean(rec.get('渠道校验结果')) in {'匹配', 'OK', '通过'} else 'pending',
        'raw_payload': jsons(rec),
        'notes': clean(rec.get('备注')),
        'created_at': now,
        'updated_at': now,
    })
    for ch in channel_fields:
        q = num(rec.get(ch))
        if float(q or 0) == 0:
            continue
        ch_id = add_party(ch, 'channel')
        rows['channel_allocations'].append({
            'id': uid(),
            'order_line_id': line_id,
            'channel_party_id': ch_id or '',
            'raw_channel': ch,
            'standard_channel': ch,
            'original_quantity': q,
            'adjustment_quantity': '0',
            'adjustment_reason': '',
            'evidence_note': f"渠道合计={clean(rec.get('渠道合计'))}; 差异={clean(rec.get('渠道校验差异'))}",
            'notes': '',
            'created_at': now,
            'updated_at': now,
        })

# inventory account and stock items/movements
acct_guobo = uid()
rows['inventory_accounts'].append({'id': acct_guobo,'account_code':'INV-GB-0001','owner_party_id':pt_mspj or '', 'project_id':prj_guobo,'account_type':'sample_stock','warehouse_party_id':pt_sample_wh or '', 'status':'active','notes':'秘色破茧国博项目样品库','created_at':now,'updated_at':now})
acct_yunhan = uid()
rows['inventory_accounts'].append({'id': acct_yunhan,'account_code':'INV-YH-0001','owner_party_id':pt_yunhan or '', 'project_id':prj_liangzhu,'account_type':'sample_stock','warehouse_party_id':pt_sample_wh or '', 'status':'active','notes':'云汉寻真良渚手机贴样品库','created_at':now,'updated_at':now})

for rec in load_sheet(FILES['sample'], '样品库存总表'):
    stock_code = clean(rec.get('样品ID'))
    product_name = clean(rec.get('产品名称'))
    company = clean(rec.get('公司主体'))
    project_name = clean(rec.get('项目/IP'))
    acct = acct_yunhan if '云汉' in company or '良渚' in project_name else acct_guobo
    product_id = add_product(product_name, product_name, clean(rec.get('单位')), clean(rec.get('规格/款式')))
    sid = uid()
    stock_by_code[stock_code] = sid
    rows['stock_items'].append({
        'id': sid,
        'stock_item_code': stock_code,
        'inventory_account_id': acct,
        'product_id': product_id or '',
        'display_name': product_name,
        'spec': clean(rec.get('规格/款式')),
        'unit': clean(rec.get('单位')),
        'unit_cost_tax_included': num(rec.get('单位成本含税')) if clean(rec.get('单位成本含税')) else '',
        'source_policy': clean(rec.get('来源口径')),
        'status': 'active',
        'notes': clean(rec.get('备注')),
        'created_at': now,
        'updated_at': now,
    })

for rec in load_sheet(FILES['sample'], '出入库流水'):
    code_mv = clean(rec.get('流水ID'))
    stock_code = clean(rec.get('样品ID'))
    stock_id = stock_by_code.get(stock_code)
    if not stock_id:
        product_name = clean(rec.get('产品名称'))
        product_id = add_product(product_name)
        acct = acct_yunhan if '云汉' in clean(rec.get('公司主体')) or '良渚' in clean(rec.get('项目/IP')) else acct_guobo
        stock_id = uid(); stock_by_code[stock_code or code('STK', len(stock_by_code)+1)] = stock_id
        rows['stock_items'].append({'id':stock_id,'stock_item_code':stock_code or code('STK', len(stock_by_code)+1),'inventory_account_id':acct,'product_id':product_id or '', 'display_name':product_name,'spec':'','unit':clean(rec.get('单位')),'unit_cost_tax_included':'','source_policy':'由流水补建','status':'active','notes':'','created_at':now,'updated_at':now})
    direction_cn = clean(rec.get('方向'))
    direction = 'in' if direction_cn in {'入库','归还入库'} else 'out'
    movement_type = {
        '入库':'self_stock_in', '出库':'shipment_out', '借出':'claim_out', '归还入库':'return_in', '礼赠':'gift_out'
    }.get(direction_cn, 'adjustment')
    handler = add_party(clean(rec.get('出库人/经办人')), 'person')
    counterparty_name = clean(rec.get('去处/对象'))
    counterparty_type = 'channel' if counterparty_name in {'西泠印社'} else ('warehouse' if counterparty_name == '公司样品库' else 'person')
    counterparty = add_party(counterparty_name, counterparty_type)
    source_path = clean(rec.get('证据/来源'))
    evidence_id = add_source_file(Path(source_path), 'ledger', '库存流水证据') if source_path and source_path.startswith('/') else ''
    rows['stock_movements'].append({
        'id': uid(),
        'movement_code': code_mv or code('MV', len(rows['stock_movements'])+1),
        'stock_item_id': stock_id,
        'movement_date': clean(rec.get('日期')) or now[:10],
        'direction': direction,
        'movement_type': movement_type,
        'quantity': num(rec.get('数量')),
        'unit': clean(rec.get('单位')),
        'handler_party_id': handler or '',
        'counterparty_id': counterparty or '',
        'purpose': clean(rec.get('用途/场景')),
        'returnable': {'是':'true','否':'false','True':'true','False':'false'}.get(clean(rec.get('是否可退回')), ''),
        'unit_cost_tax_included': num(rec.get('成本含税')) if clean(rec.get('成本含税')) else '',
        'source_document_id': '',
        'evidence_file_id': evidence_id or '',
        'notes': clean(rec.get('备注')),
        'created_at': now,
        'updated_at': now,
    })
    movement_by_code[code_mv] = rows['stock_movements'][-1]['id']

# shipment signoff from signoff workbook
ship_source = add_source_file(FILES['signoff'], 'shipment_signoff', '云汉寻真发西泠印社签收表')
ship_doc = uid()
rows['business_documents'].append({'id':ship_doc,'document_code':code('DOC',len(rows['business_documents'])+1),'document_type':'shipment_signoff','source_file_id':ship_source or '', 'project_id':prj_liangzhu,'own_party_id':pt_yunhan or '', 'counterparty_id':pt_xiling or '', 'document_no':'','document_date':'2026-06-11','title':'云汉寻真3D悬浮手机贴发货签收表','status':'confirmed','raw_payload':jsons({'source_sheet':'西泠签收单'}),'notes':'由现有签收表导入','created_at':now,'updated_at':now})
signoff_id = uid()
rows['shipment_signoffs'].append({'id':signoff_id,'signoff_code':'SO-YH-20260611-0001','project_id':prj_liangzhu,'shipper_party_id':pt_yunhan or '', 'receiver_party_id':pt_xiling or '', 'ship_date':'2026-06-11','purpose':'销售签收','status':'sent','source_document_id':ship_doc,'notes':'云汉寻真3D悬浮手机贴发西泠印社','created_at':now,'updated_at':now})
try:
    sign_rows = load_sheet(FILES['signoff'], '内部成本核对')
except Exception:
    sign_rows = []
line_no = 1
for rec in sign_rows:
    product_name = clean(rec.get('产品名称/图案'))
    q = num(rec.get('本次发出数量'))
    if not product_name or float(q or 0) == 0:
        continue
    product_id = add_product(product_name.replace('3D悬浮手机贴-', '3D悬浮手机贴-'), product_name, '个')
    rows['shipment_signoff_lines'].append({'id':uid(),'signoff_id':signoff_id,'line_no':line_no,'product_id':product_id or '', 'product_name':product_name,'quantity':q,'unit':'个','stock_movement_id':'','sign_status':'pending','notes':clean(rec.get('备注')),'created_at':now,'updated_at':now})
    line_no += 1

# reconciliation snapshot
run_id = uid()
rows['reconciliation_runs'].append({'id':run_id,'run_code':'REC-GB-20260610-0001','project_id':prj_guobo,'reconciliation_type':'supplier_vs_upstream_demand','policy_version':'v1-from-existing-excel','input_scope':jsons({'source':'供应商采购_vs_国博需求核对_提取.csv'}),'run_status':'completed','run_at':now,'notes':'由现有国博核对表迁移','created_at':now,'updated_at':now})
for rec in load_csv(FILES['recon']):
    pname = clean(rec.get('product_name_standard_draft'))
    pid = add_product(pname, pname)
    rows['reconciliation_lines'].append({
        'id':uid(),'run_id':run_id,'product_id':pid or '', 'product_name_standard_draft':pname,
        'supplier_purchase_quantity_total':num(rec.get('supplier_purchase_quantity_total')),
        'upstream_demand_quantity_total':num(rec.get('guobo_total_demand_quantity')),
        'formal_order_quantity_total':num(rec.get('formal_order_quantity_total')),
        'supplement_demand_quantity_total':num(rec.get('supplement_demand_quantity_total')),
        'difference_supplier_minus_demand':num(rec.get('difference_supplier_minus_guobo')),
        'self_stock_candidate_qty':num(rec.get('self_stock_candidate_qty')),
        'shortage_qty':num(rec.get('shortage_qty')),
        'difference_status':clean(rec.get('difference_status')),
        'related_supplier_sources':clean(rec.get('related_supplier_contracts')),
        'related_upstream_sources':clean(rec.get('related_guobo_sources')),
        'raw_payload':jsons(rec),
        'notes':clean(rec.get('remarks')),
        'created_at':now,'updated_at':now,
    })

# validation issues from problem sheets
try:
    wb = openpyxl.load_workbook(GUOBO / 'deliverables/国博供应商采购补货台账_初版.xlsx', data_only=True, read_only=True)
    for sheet in ['上游核算问题清单']:
        ws = wb[sheet]
        vals = list(ws.iter_rows(values_only=True))
        headers = [clean(x) for x in vals[0]]
        for row in vals[1:]:
            rec = {headers[i]: row[i] if i < len(row) else None for i in range(len(headers)) if headers[i]}
            if not any(clean(v) for v in rec.values()): continue
            typ = 'quantity_mismatch' if '不足' in clean(rec.get('问题类型')) else 'other'
            rows['validation_issues'].append({'id':uid(),'issue_type':typ,'severity':'warning','related_table':'reconciliation_lines','related_id':'','field_name':'quantity','message':f"{clean(rec.get('问题类型'))}: {clean(rec.get('产品'))} 数量 {clean(rec.get('数量'))}。{clean(rec.get('说明'))}",'status':'open','source_file_id':'','created_at':now,'resolved_at':''})
except Exception as e:
    rows['validation_issues'].append({'id':uid(),'issue_type':'other','severity':'info','related_table':'import_batches','related_id':'','field_name':'','message':f'读取上游核算问题清单失败: {e}','status':'open','source_file_id':'','created_at':now,'resolved_at':''})

# import batch and audit
batch_id = uid()
summary = {t: len(rows[t]) for t in rows if rows[t]}
rows['import_batches'].append({'id':batch_id,'batch_code':'IMP-LEDGER-20260612-0001','source_file_id':'','import_type':'existing_excel_csv_snapshot','profile_name':'it','status':'completed','started_at':now,'finished_at':now,'summary':jsons(summary),'notes':'现有国博/样品库/签收表数据归档同步批次','created_at':now,'updated_at':now})
rows['agent_write_audit'].append({'id':uid(),'agent_profile':'it','tool_name':'local_export_script','operation':'import','target_table':'import_batches','target_id':batch_id,'request_payload':jsons({'project_dir':str(PROJECT_DIR)}),'result_status':'success','result_message':'normalized CSV generated','created_at':now})

# write CSVs
if OUT.exists():
    shutil.rmtree(OUT)
OUT.mkdir(parents=True, exist_ok=True)
for table, cols in TABLE_COLUMNS.items():
    path = OUT / f'{table}.csv'
    with path.open('w', encoding='utf-8', newline='') as f:
        w = csv.DictWriter(f, fieldnames=cols, extrasaction='ignore')
        w.writeheader()
        for r in rows[table]:
            w.writerow({c: r.get(c, '') for c in cols})

# write psql import script
order = [
    'parties','projects','products','source_files','product_aliases','business_documents','purchase_orders','order_lines','channel_allocations','inventory_accounts','stock_items','stock_movements','shipment_signoffs','shipment_signoff_lines','reconciliation_runs','reconciliation_lines','import_batches','validation_issues','agent_write_audit'
]
import_sql = OUT / 'import_normalized_v1.sql'
with import_sql.open('w', encoding='utf-8') as f:
    f.write('BEGIN;\n')
    f.write('SET session_replication_role = replica;\n')
    f.write('TRUNCATE TABLE agent_write_audit, validation_issues, import_batches, reconciliation_lines, reconciliation_runs, shipment_signoff_lines, shipment_signoffs, stock_movements, stock_items, inventory_accounts, channel_allocations, order_lines, purchase_orders, business_documents, product_aliases, source_files, products, projects, parties RESTART IDENTITY CASCADE;\n')
    f.write('SET session_replication_role = origin;\n')
    for table in order:
        cols = ','.join(TABLE_COLUMNS[table])
        f.write(f"\\copy {table} ({cols}) FROM '/srv/goods-ledger/import/normalized_v1/{table}.csv' WITH (FORMAT csv, HEADER true, NULL '');\n")
    f.write('COMMIT;\n')

# write summary
with (OUT / 'export_summary.json').open('w', encoding='utf-8') as f:
    json.dump({'generated_at': now, 'counts': summary, 'output_dir': str(OUT)}, f, ensure_ascii=False, indent=2)

print(json.dumps({'ok': True, 'output_dir': str(OUT), 'counts': summary}, ensure_ascii=False, indent=2))
