from __future__ import annotations

import json
import os
import re
import shutil
import sys
import zipfile
from pathlib import Path
from typing import Any
from urllib.parse import parse_qsl, unquote, urlparse

import click
import requests

SENSITIVE_HEADERS = {"cookie", "authorization", "x-csrf-token", "x-xsrf-token"}
LIST_PATH_HINT = "/sz/api/selfHelpAnalysis/getReportList.ajax"
EXPORT_PATH_HINT = "/sz/api/selfHelpAnalysis/exportPreviewList.ajax"
DEFAULT_CREDENTIAL_DIR = Path("/Users/bot1/.hermes/profiles/it/secrets/jd-report-exporter")


def _json_print(payload: Any) -> None:
    click.echo(json.dumps(payload, ensure_ascii=False, indent=2))


def _load_json(path: str | Path) -> Any:
    return json.loads(Path(path).expanduser().read_text(encoding="utf-8"))


def _write_json(path: Path, payload: Any, mode: int | None = None) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
    if mode is not None:
        os.chmod(path, mode)


def _headers_dict(headers: list[dict[str, str]] | dict[str, str], *, include_sensitive: bool = False) -> dict[str, str]:
    if isinstance(headers, dict):
        pairs = headers.items()
    else:
        pairs = ((h.get("name", ""), h.get("value", "")) for h in headers)
    out: dict[str, str] = {}
    for key, value in pairs:
        if not key:
            continue
        lower = key.lower()
        if lower.startswith(":") or lower in {"content-length", "host", "accept-encoding"}:
            continue
        if not include_sensitive and lower in SENSITIVE_HEADERS:
            continue
        out[key] = value
    return out


def _find_entry(entries: list[dict[str, Any]], path_hint: str, *, method: str | None = None) -> dict[str, Any] | None:
    for entry in entries:
        req = entry.get("request", {})
        url = req.get("url", "")
        if path_hint in url and (method is None or (req.get("method") or "").upper() == method.upper()):
            return entry
    return None


def _find_cookie_header(entry: dict[str, Any]) -> str | None:
    for h in entry.get("request", {}).get("headers", []):
        if h.get("name", "").lower() == "cookie" and h.get("value"):
            return h["value"]
    return None


def _parse_form(text: str | None) -> dict[str, str]:
    if not text:
        return {}
    return dict(parse_qsl(text, keep_blank_values=True))


def _decode_response_text(content: dict[str, Any]) -> str:
    text = content.get("text") or ""
    if content.get("encoding") == "base64":
        import base64
        return base64.b64decode(text).decode("utf-8", "replace")
    return text


def _reports_from_list_payload(payload: Any, *, list_url: str | None = None, export_url: str | None = None) -> list[dict[str, Any]]:
    page = (payload or {}).get("content", {}).get("pageList", {}) if isinstance(payload, dict) else {}
    rows = page.get("data", [])
    meta = page.get("metaIndex", {})
    reports = []
    for row in rows:
        if not isinstance(row, list):
            continue
        def get(key: str, default: Any = None) -> Any:
            idx = meta.get(key)
            if isinstance(idx, int) and 0 <= idx < len(row):
                return row[idx]
            return default
        reports.append({
            "report_name": get("ReportName", ""),
            "update_num": get("UpdateNum"),
            "create_time": get("CreateTime"),
            "report_dim": str(get("ReportDim", "")),
            "query_time": get("QueryTime"),
            "indicators": get("Indicators", []),
            "report_id": str(get("ReportId", "")),
            "start_date": str(get("StartDate", "")),
            "end_date": str(get("EndDate", "")),
            "sku_id": str(get("SkuId", "")),
            "attributions": get("Attributions", []),
            "list_url": list_url,
            "export_url": export_url or "https://sz.jd.com/sz/api/selfHelpAnalysis/exportPreviewList.ajax",
        })
    return reports


def _load_cookie_header(cookie_file: Path | None, credential_dir: Path | None) -> str | None:
    path = cookie_file or ((credential_dir or DEFAULT_CREDENTIAL_DIR) / "cookie.json")
    if not path.exists():
        return None
    text = path.read_text(encoding="utf-8").strip()
    if not text:
        return None
    try:
        data = json.loads(text)
    except json.JSONDecodeError:
        return text
    if isinstance(data, dict):
        for key in ("Cookie", "cookie"):
            if isinstance(data.get(key), str):
                return data[key]
        if all(isinstance(v, str) for v in data.values()):
            return "; ".join(f"{k}={v}" for k, v in data.items())
    if isinstance(data, list):
        return "; ".join(f"{x['name']}={x['value']}" for x in data if isinstance(x, dict) and x.get("name") and x.get("value") is not None)
    raise click.ClickException(f"Unsupported cookie file format: {path}")


def _load_optional_json(path: Path | None, default_path: Path) -> dict[str, Any]:
    p = path or default_path
    if p.exists():
        data = _load_json(p)
        if isinstance(data, dict):
            return data
    return {}


def _safe_filename(name: str) -> str:
    return re.sub(r"[\\/:*?\"<>|]+", "_", name).strip()[:180] or "jd_report"


def _decode_disposition_filename(dispo: str, fallback: str) -> str:
    match = re.search(r'filename\*?=(?:UTF-8\'\')?"?([^";]+)', dispo, re.I)
    if not match:
        return fallback
    raw = unquote(match.group(1)).strip().strip('"')
    if not raw:
        return fallback
    # JD often emits UTF-8 bytes interpreted as latin1 in Content-Disposition.
    for decoder in (
        lambda s: s.encode("latin1").decode("utf-8"),
        lambda s: s.encode("latin1").decode("gbk"),
        lambda s: s,
    ):
        try:
            decoded = decoder(raw)
            if decoded:
                return _safe_filename(decoded)
        except Exception:
            pass
    return _safe_filename(raw)


@click.group(context_settings={"help_option_names": ["-h", "--help"]})
def main() -> None:
    """京东/商智自助分析报表导出 CLI harness."""


@main.command()
def doctor() -> None:
    """检查当前 CLI 运行环境。"""
    _json_print({"ok": True, "python": sys.version.split()[0], "requests": requests.__version__, "default_credential_dir": str(DEFAULT_CREDENTIAL_DIR)})


@main.command("scan-har")
@click.argument("har", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("--out", "out_path", type=click.Path(path_type=Path), help="输出不含 Cookie 的 manifest JSON。")
def scan_har(har: Path, out_path: Path | None) -> None:
    """扫描京东 HAR，识别报表列表/导出接口和 HAR 内列表快照。"""
    payload = _load_json(har)
    entries = payload.get("log", {}).get("entries", [])
    list_entry = _find_entry(entries, LIST_PATH_HINT, method="GET")
    export_entry = _find_entry(entries, EXPORT_PATH_HINT, method="POST")
    reports = []
    if list_entry:
        txt = _decode_response_text(list_entry.get("response", {}).get("content", {}))
        try:
            list_json = json.loads(txt)
            reports = _reports_from_list_payload(list_json, list_url=list_entry["request"].get("url"), export_url=export_entry["request"].get("url") if export_entry else None)
        except Exception:
            reports = []
    export_form = _parse_form(export_entry.get("request", {}).get("postData", {}).get("text")) if export_entry else {}
    result = {
        "source_har": str(har),
        "total_entries": len(entries),
        "list_endpoint": list_entry["request"].get("url") if list_entry else None,
        "export_endpoint": export_entry["request"].get("url") if export_entry else None,
        "report_count_in_har_snapshot": len(reports),
        "reports": reports,
        "captured_export_sample": {
            "report_name": export_form.get("ReportName"),
            "start_date": export_form.get("startDate"),
            "end_date": export_form.get("endDate"),
            "report_dim": export_form.get("ReportDim"),
            "sku_id": export_form.get("SkuId"),
            "has_identity_fields": all(export_form.get(k) for k in ("User-mup", "User-mnp", "uuid")),
        },
        "notes": ["Cookie/Authorization are not embedded; use extract-credentials to save them locally.", "Export endpoint returns a ZIP containing one XLS file in smoke tests."],
    }
    if out_path:
        _write_json(out_path, result)
        click.echo(str(out_path))
    else:
        _json_print(result)


@main.command("extract-credentials")
@click.argument("har", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("--out-dir", type=click.Path(path_type=Path), default=DEFAULT_CREDENTIAL_DIR, show_default=True)
def extract_credentials(har: Path, out_dir: Path) -> None:
    """从 HAR 提取 Cookie、非敏感请求头、导出表单身份字段到本地 secrets 目录。"""
    payload = _load_json(har)
    entries = payload.get("log", {}).get("entries", [])
    list_entry = _find_entry(entries, LIST_PATH_HINT, method="GET")
    export_entry = _find_entry(entries, EXPORT_PATH_HINT, method="POST")
    if not export_entry:
        raise click.ClickException("HAR 中没有找到 exportPreviewList.ajax POST 请求；请重新采集包含一次导出的 HAR。")
    cookie = _find_cookie_header(export_entry) or (_find_cookie_header(list_entry) if list_entry else None)
    if not cookie:
        raise click.ClickException("HAR 中没有 Cookie；请导出包含请求头的 HAR。")
    out_dir.mkdir(parents=True, exist_ok=True)
    os.chmod(out_dir, 0o700)
    export_headers = _headers_dict(export_entry.get("request", {}).get("headers", []), include_sensitive=False)
    list_headers = _headers_dict(list_entry.get("request", {}).get("headers", []), include_sensitive=False) if list_entry else {}
    form = _parse_form(export_entry.get("request", {}).get("postData", {}).get("text"))
    identity = {k: form[k] for k in ("User-mup", "User-mnp", "uuid") if k in form}
    _write_json(out_dir / "cookie.json", {"Cookie": cookie}, 0o600)
    _write_json(out_dir / "export_headers.json", export_headers, 0o600)
    _write_json(out_dir / "list_headers.json", list_headers, 0o600)
    _write_json(out_dir / "form_identity.json", identity, 0o600)
    readme = out_dir / "README.md"
    readme.write_text("京东/商智 jd-export 本地登录态目录。不要提交、不要发聊天、不要同步到 NAS。Cookie 过期或风控时重新采集 HAR。\n", encoding="utf-8")
    os.chmod(readme, 0o600)
    _json_print({"out_dir": str(out_dir), "files": ["cookie.json", "export_headers.json", "list_headers.json", "form_identity.json", "README.md"], "cookie_chars": len(cookie), "identity_fields": sorted(identity)})


@main.command("fetch-list")
@click.argument("manifest", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("--credential-dir", type=click.Path(path_type=Path), default=DEFAULT_CREDENTIAL_DIR, show_default=True)
@click.option("--cookie-file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("--headers-file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("--out", "out_path", type=click.Path(path_type=Path), required=True)
def fetch_list(manifest: Path, credential_dir: Path, cookie_file: Path | None, headers_file: Path | None, out_path: Path) -> None:
    """用本地 Cookie 重放 getReportList.ajax，保存规范化报表列表。"""
    data = _load_json(manifest)
    list_url = data.get("list_endpoint") or f"https://sz.jd.com{LIST_PATH_HINT}"
    export_url = data.get("export_endpoint") or f"https://sz.jd.com{EXPORT_PATH_HINT}"
    headers = _load_optional_json(headers_file, credential_dir / "list_headers.json")
    cookie = _load_cookie_header(cookie_file, credential_dir)
    if cookie:
        headers["Cookie"] = cookie
    resp = requests.get(list_url, headers=headers, timeout=90)
    try:
        payload = resp.json()
    except Exception as exc:
        raise click.ClickException(f"列表接口没有返回 JSON：status={resp.status_code}, content-type={resp.headers.get('content-type')}, error={exc}") from exc
    reports = _reports_from_list_payload(payload, list_url=list_url, export_url=export_url)
    result = {"source": "jd_getReportList.ajax", "status_code": resp.status_code, "list_endpoint": list_url, "export_endpoint": export_url, "report_count": len(reports), "reports": reports}
    _write_json(out_path, result)
    _json_print({"out": str(out_path), "status": resp.status_code, "report_count": len(reports), "report_names": [r["report_name"] for r in reports]})


@main.command("export-all")
@click.argument("report_list", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("--credential-dir", type=click.Path(path_type=Path), default=DEFAULT_CREDENTIAL_DIR, show_default=True)
@click.option("--cookie-file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("--headers-file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("--identity-file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("--out-dir", type=click.Path(path_type=Path), default=Path("exports"), show_default=True)
@click.option("--dry-run", is_flag=True)
@click.option("--limit", type=int, default=0, show_default=True)
@click.option("--report-name", multiple=True, help="只导出指定报表名，可重复传。")
@click.option("--extract/--no-extract", default=True, show_default=True, help="导出 ZIP 后是否解压其中 XLS 文件。")
def export_all(report_list: Path, credential_dir: Path, cookie_file: Path | None, headers_file: Path | None, identity_file: Path | None, out_dir: Path, dry_run: bool, limit: int, report_name: tuple[str, ...], extract: bool) -> None:
    """按报表列表逐个 POST exportPreviewList.ajax，保存 ZIP/XLS。"""
    data = _load_json(report_list)
    reports = list(data.get("reports", []))
    if report_name:
        names = set(report_name)
        reports = [r for r in reports if r.get("report_name") in names]
    if limit and limit > 0:
        reports = reports[:limit]
    plan = [{"index": i, "report_name": r.get("report_name"), "start_date": r.get("start_date"), "end_date": r.get("end_date"), "report_dim": r.get("report_dim")} for i, r in enumerate(reports, 1)]
    if dry_run:
        _json_print({"dry_run": True, "count": len(plan), "plan": plan})
        return
    if not reports:
        raise click.ClickException("没有可导出的报表。")
    headers = _load_optional_json(headers_file, credential_dir / "export_headers.json")
    cookie = _load_cookie_header(cookie_file, credential_dir)
    if cookie:
        headers["Cookie"] = cookie
    identity = _load_optional_json(identity_file, credential_dir / "form_identity.json")
    missing = [k for k in ("User-mup", "User-mnp", "uuid") if not identity.get(k)]
    if missing:
        raise click.ClickException(f"缺少导出身份字段：{missing}；请从包含一次导出的 HAR 运行 extract-credentials。")
    out_dir.mkdir(parents=True, exist_ok=True)
    zip_dir = out_dir / "zip"
    xls_dir = out_dir / "xls"
    zip_dir.mkdir(exist_ok=True)
    if extract:
        xls_dir.mkdir(exist_ok=True)
    saved = []
    failed = []
    for idx, r in enumerate(reports, 1):
        export_url = r.get("export_url") or data.get("export_endpoint") or f"https://sz.jd.com{EXPORT_PATH_HINT}"
        payload = {
            "SkuId": r.get("sku_id", ""),
            "ReportDim": r.get("report_dim", ""),
            "Indicators": json.dumps(r.get("indicators") or [], ensure_ascii=False, separators=(",", ":")),
            "Attributions": json.dumps(r.get("attributions") or [], ensure_ascii=False, separators=(",", ":")),
            "startDate": r.get("start_date", ""),
            "endDate": r.get("end_date", ""),
            "ReportName": r.get("report_name", f"report_{idx}"),
            **identity,
        }
        try:
            resp = requests.post(export_url, headers=headers, data=payload, timeout=120)
            ctype = resp.headers.get("content-type", "")
            if resp.status_code != 200 or not (resp.content[:2] == b"PK" or "zip" in ctype.lower()):
                failed.append({"index": idx, "report_name": r.get("report_name"), "status": resp.status_code, "content_type": ctype, "message": "response is not ZIP"})
                continue
            fallback = f"{_safe_filename(str(r.get('report_name') or 'jd_report'))}_{r.get('start_date')}_{r.get('end_date')}.zip"
            filename = _decode_disposition_filename(resp.headers.get("content-disposition", ""), fallback)
            if not filename.lower().endswith(".zip"):
                filename += ".zip"
            zip_path = zip_dir / filename
            zip_path.write_bytes(resp.content)
            record = {"index": idx, "report_name": r.get("report_name"), "zip": str(zip_path), "zip_bytes": zip_path.stat().st_size, "extracted": []}
            if extract:
                try:
                    with zipfile.ZipFile(zip_path) as z:
                        for member in z.infolist():
                            if member.is_dir():
                                continue
                            member_name = _safe_filename(Path(member.filename).name)
                            target = xls_dir / member_name
                            with z.open(member) as src, target.open("wb") as dst:
                                shutil.copyfileobj(src, dst)
                            record["extracted"].append(str(target))
                except Exception as exc:
                    failed.append({"index": idx, "report_name": r.get("report_name"), "zip": str(zip_path), "error": f"zip extract failed: {exc!r}"})
                    continue
            saved.append(record)
        except Exception as exc:
            failed.append({"index": idx, "report_name": r.get("report_name"), "error": repr(exc)})
    _json_print({"saved_count": len(saved), "failed_count": len(failed), "saved": saved, "failed": failed})


if __name__ == "__main__":
    main()
