#!/usr/bin/env python3
"""阶段一回填：把 raw_import_rows 里上游采购单的字段提升到业务列。

范围（仅上游 upstream_customer_order）：
  A. settlement_method  -> purchase_orders.settlement_method   （源头恒为"代销"，当前为空）
  B. arrival_date_or_period -> order_lines.expected_arrival_text（行级到货周期文本，需先增列）

原则：
  - 只填空，不覆盖已有值（NULL / '' 才写）。
  - 全部在一个事务里；--apply 才落库，默认 dry-run 只打印。
  - 落库时写 agent_write_audit。
  - 按 product_name_raw 在上游范围内匹配（已验证全局唯一），并报告任何对不上的行。

用法：
  python3 backfill_upstream_fields.py            # dry-run
  python3 backfill_upstream_fields.py --apply    # 落库
"""
from __future__ import annotations

import json
import sys
from pathlib import Path

sys.path.insert(0, str(Path(__file__).resolve().parents[1] / "backend"))
from app import db  # noqa: E402

APPLY = "--apply" in sys.argv
SKIP_ARRIVAL = "--no-arrival" in sys.argv  # 跳过 Part B（需 DDL 权限增列）
ACTOR = "backfill-script/phase1"
CSV_SHEET = "国博上游采购订单_提取.csv"


def load_raw_csv(cur):
    cur.execute(
        """
        select row_payload->>'product_name_raw' as pname,
               row_payload->>'settlement_method' as settlement,
               row_payload->>'arrival_date_or_period' as arrival,
               row_payload->>'order_no' as order_no,
               row_payload->>'source_file' as source_file
        from raw_import_rows
        where sheet_or_table = %s
        """,
        (CSV_SHEET,),
    )
    return cur.fetchall()


def column_exists(cur, table, column):
    cur.execute(
        "select 1 from information_schema.columns where table_name=%s and column_name=%s",
        (table, column),
    )
    return cur.fetchone() is not None


def main():
    conn = db.connect()
    cur = conn.cursor()

    raw = load_raw_csv(cur)
    by_name = {r["pname"]: r for r in raw}
    # settlement per order_no (constant) and per source_file fallback
    print(f"=== 源 CSV: {len(raw)} 行, 去重产品名 {len(by_name)} 个 ===")

    # ---------- Part A: settlement_method on purchase_orders ----------
    print("\n=== A. settlement_method -> purchase_orders (上游, 仅填空) ===")
    cur.execute(
        """
        select po.id, po.order_no,
               (select string_agg(distinct ol.product_name_raw, '|')
                  from order_lines ol where ol.order_id = po.id) as sample_products,
               po.settlement_method
        from purchase_orders po
        where po.order_side = 'upstream_customer_order'
        order by po.order_no
        """
    )
    po_rows = cur.fetchall()
    a_updates = []
    for po in po_rows:
        cur_val = (po["settlement_method"] or "").strip()
        if cur_val:
            print(f"  [skip] {po['order_no']}: 已有 settlement_method={cur_val!r}")
            continue
        # derive settlement from any matching raw line of this order's products
        derived = None
        prods = (po["sample_products"] or "").split("|")
        for p in prods:
            r = by_name.get(p)
            if r and (r["settlement"] or "").strip():
                derived = r["settlement"].strip()
                break
        if derived:
            a_updates.append((po["id"], po["order_no"], derived))
            print(f"  [fill] {po['order_no']}: settlement_method = {derived!r}")
        else:
            print(f"  [none] {po['order_no']}: 源里找不到结算方式, 跳过")

    # ---------- Part B: line-level arrival text on order_lines ----------
    print("\n=== B. arrival_date_or_period -> order_lines.expected_arrival_text ===")
    has_col = column_exists(cur, "order_lines", "expected_arrival_text")
    print(f"  order_lines.expected_arrival_text 列存在: {has_col}"
          + ("" if has_col else "  -> 落库时将 ALTER TABLE ADD COLUMN text"))

    cur.execute(
        """
        select ol.id, po.order_no, ol.line_no, ol.product_name_raw
        from order_lines ol
        join purchase_orders po on po.id = ol.order_id
        where po.order_side = 'upstream_customer_order'
        order by po.order_no, ol.line_no
        """
    )
    ol_rows = cur.fetchall()
    b_updates = []
    unmatched_lines = []
    for ol in ol_rows:
        r = by_name.get(ol["product_name_raw"])
        arr = (r["arrival"] or "").strip() if r else ""
        if r and arr:
            b_updates.append((ol["id"], ol["order_no"], ol["line_no"], ol["product_name_raw"], arr))
        else:
            unmatched_lines.append((ol["order_no"], ol["line_no"], ol["product_name_raw"]))

    print(f"  匹配到到货周期的明细行: {len(b_updates)} / {len(ol_rows)}")
    for u in b_updates:
        print(f"    [fill] {u[1]} line{u[2]} {u[3][:18]} -> {u[4]!r}")
    if unmatched_lines:
        print(f"  未匹配明细行: {len(unmatched_lines)}")
        for u in unmatched_lines:
            print(f"    [no-arrival] {u[0]} line{u[1]} {u[2][:18]}")
    # raw CSV rows that found no order_line (反向检查)
    matched_names = {u[3] for u in b_updates}
    orphan_csv = [r["pname"] for r in raw if r["pname"] not in {ol["product_name_raw"] for ol in ol_rows}]
    if orphan_csv:
        print(f"  源CSV有但order_lines无对应产品: {sorted(set(orphan_csv))}")

    # ---------- summary ----------
    print("\n=== 计划汇总 ===")
    print(f"  A. purchase_orders.settlement_method 将更新: {len(a_updates)} 行")
    print(f"  B. order_lines.expected_arrival_text 将更新: {len(b_updates)} 行"
          + ("" if has_col else " (+新增1列)"))

    if not APPLY:
        print("\n[DRY-RUN] 未落库。确认后加 --apply 执行。")
        conn.rollback()
        return

    # ---------- apply ----------
    print("\n[APPLY] 开始事务落库 ...")
    do_arrival = not SKIP_ARRIVAL
    if do_arrival and not has_col:
        cur.execute("alter table order_lines add column expected_arrival_text text")
        print("  + 已增列 order_lines.expected_arrival_text")
    for po_id, ono, val in a_updates:
        cur.execute(
            "update purchase_orders set settlement_method=%s "
            "where id=%s and coalesce(nullif(trim(settlement_method),''),'') = ''",
            (val, po_id),
        )
    if do_arrival:
        for ol_id, ono, ln, pn, arr in b_updates:
            cur.execute(
                "update order_lines set expected_arrival_text=%s "
                "where id=%s and coalesce(nullif(trim(expected_arrival_text),''),'') = ''",
                (arr, ol_id),
            )
    else:
        print("  [skip B] --no-arrival: 跳过行级到货回填(待 DDL 权限增列后再做)")
    cur.execute(
        """
        insert into agent_write_audit(agent_profile, tool_name, operation, target_table, request_payload, result_status)
        values (%s,%s,%s,%s,%s,%s)
        """,
        (ACTOR, "backfill_upstream_fields", "update", "purchase_orders+order_lines",
         json.dumps({"settlement_updates": len(a_updates),
                     "arrival_updates": (0 if SKIP_ARRIVAL else len(b_updates)),
                     "added_column": (do_arrival and not has_col)}, ensure_ascii=False),
         "success"),
    )
    conn.commit()
    print(f"  已提交: settlement={len(a_updates)} arrival={len(b_updates)} 审计已写入。")


if __name__ == "__main__":
    main()
