from __future__ import annotations

import json
import os
import re
import shutil
import sys
import zipfile
from datetime import datetime
from pathlib import Path
from typing import Any
from urllib.parse import parse_qsl, unquote, urlparse

import click
import requests

SENSITIVE_HEADERS = {"cookie", "authorization", "x-csrf-token", "x-xsrf-token"}
LIST_PATH_HINT = "/sz/api/selfHelpAnalysis/getReportList.ajax"
EXPORT_PATH_HINT = "/sz/api/selfHelpAnalysis/exportPreviewList.ajax"
DEFAULT_CREDENTIAL_DIR = Path("/Users/bot1/.hermes/profiles/it/secrets/jd-report-exporter")


def _json_print(payload: Any) -> None:
    click.echo(json.dumps(payload, ensure_ascii=False, indent=2))


def _load_json(path: str | Path) -> Any:
    return json.loads(Path(path).expanduser().read_text(encoding="utf-8"))


def _write_json(path: Path, payload: Any, mode: int | None = None) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
    if mode is not None:
        os.chmod(path, mode)


def _headers_dict(headers: list[dict[str, str]] | dict[str, str], *, include_sensitive: bool = False) -> dict[str, str]:
    if isinstance(headers, dict):
        pairs = headers.items()
    else:
        pairs = ((h.get("name", ""), h.get("value", "")) for h in headers)
    out: dict[str, str] = {}
    for key, value in pairs:
        if not key:
            continue
        lower = key.lower()
        if lower.startswith(":") or lower in {"content-length", "host", "accept-encoding"}:
            continue
        if not include_sensitive and lower in SENSITIVE_HEADERS:
            continue
        out[key] = value
    return out


def _find_entry(entries: list[dict[str, Any]], path_hint: str, *, method: str | None = None) -> dict[str, Any] | None:
    for entry in entries:
        req = entry.get("request", {})
        url = req.get("url", "")
        if path_hint in url and (method is None or (req.get("method") or "").upper() == method.upper()):
            return entry
    return None


def _find_cookie_header(entry: dict[str, Any]) -> str | None:
    for h in entry.get("request", {}).get("headers", []):
        if h.get("name", "").lower() == "cookie" and h.get("value"):
            return h["value"]
    return None


def _parse_form(text: str | None) -> dict[str, str]:
    if not text:
        return {}
    return dict(parse_qsl(text, keep_blank_values=True))


def _decode_response_text(content: dict[str, Any]) -> str:
    text = content.get("text") or ""
    if content.get("encoding") == "base64":
        import base64
        return base64.b64decode(text).decode("utf-8", "replace")
    return text


def _reports_from_list_payload(payload: Any, *, list_url: str | None = None, export_url: str | None = None) -> list[dict[str, Any]]:
    page = (payload or {}).get("content", {}).get("pageList", {}) if isinstance(payload, dict) else {}
    rows = page.get("data", [])
    meta = page.get("metaIndex", {})
    reports = []
    for row in rows:
        if not isinstance(row, list):
            continue
        def get(key: str, default: Any = None) -> Any:
            idx = meta.get(key)
            if isinstance(idx, int) and 0 <= idx < len(row):
                return row[idx]
            return default
        reports.append({
            "report_name": get("ReportName", ""),
            "update_num": get("UpdateNum"),
            "create_time": get("CreateTime"),
            "report_dim": str(get("ReportDim", "")),
            "query_time": get("QueryTime"),
            "indicators": get("Indicators", []),
            "report_id": str(get("ReportId", "")),
            "start_date": str(get("StartDate", "")),
            "end_date": str(get("EndDate", "")),
            "sku_id": str(get("SkuId", "")),
            "attributions": get("Attributions", []),
            "list_url": list_url,
            "export_url": export_url or "https://sz.jd.com/sz/api/selfHelpAnalysis/exportPreviewList.ajax",
        })
    return reports


def _load_cookie_header(cookie_file: Path | None, credential_dir: Path | None) -> str | None:
    path = cookie_file or ((credential_dir or DEFAULT_CREDENTIAL_DIR) / "cookie.json")
    if not path.exists():
        return None
    text = path.read_text(encoding="utf-8").strip()
    if not text:
        return None
    try:
        data = json.loads(text)
    except json.JSONDecodeError:
        return text
    if isinstance(data, dict):
        for key in ("Cookie", "cookie"):
            if isinstance(data.get(key), str):
                return data[key]
        if all(isinstance(v, str) for v in data.values()):
            return "; ".join(f"{k}={v}" for k, v in data.items())
    if isinstance(data, list):
        return "; ".join(f"{x['name']}={x['value']}" for x in data if isinstance(x, dict) and x.get("name") and x.get("value") is not None)
    raise click.ClickException(f"Unsupported cookie file format: {path}")


def _load_optional_json(path: Path | None, default_path: Path) -> dict[str, Any]:
    p = path or default_path
    if p.exists():
        data = _load_json(p)
        if isinstance(data, dict):
            return data
    return {}


def _safe_filename(name: str) -> str:
    return re.sub(r"[\\/:*?\"<>|]+", "_", name).strip()[:180] or "jd_report"


def _decode_disposition_filename(dispo: str, fallback: str) -> str:
    match = re.search(r'filename\*?=(?:UTF-8\'\')?"?([^";]+)', dispo, re.I)
    if not match:
        return fallback
    raw = unquote(match.group(1)).strip().strip('"')
    if not raw:
        return fallback
    # JD often emits UTF-8 bytes interpreted as latin1 in Content-Disposition.
    for decoder in (
        lambda s: s.encode("latin1").decode("utf-8"),
        lambda s: s.encode("latin1").decode("gbk"),
        lambda s: s,
    ):
        try:
            decoded = decoder(raw)
            if decoded:
                return _safe_filename(decoded)
        except Exception:
            pass
    return _safe_filename(raw)


@click.group(context_settings={"help_option_names": ["-h", "--help"]})
def main() -> None:
    """京东/商智自助分析报表导出 CLI harness."""


@main.command()
def doctor() -> None:
    """检查当前 CLI 运行环境。"""
    _json_print({"ok": True, "python": sys.version.split()[0], "requests": requests.__version__, "default_credential_dir": str(DEFAULT_CREDENTIAL_DIR)})


@main.command("scan-har")
@click.argument("har", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("--out", "out_path", type=click.Path(path_type=Path), help="输出不含 Cookie 的 manifest JSON。")
def scan_har(har: Path, out_path: Path | None) -> None:
    """扫描京东 HAR，识别报表列表/导出接口和 HAR 内列表快照。"""
    payload = _load_json(har)
    entries = payload.get("log", {}).get("entries", [])
    list_entry = _find_entry(entries, LIST_PATH_HINT, method="GET")
    export_entry = _find_entry(entries, EXPORT_PATH_HINT, method="POST")
    reports = []
    if list_entry:
        txt = _decode_response_text(list_entry.get("response", {}).get("content", {}))
        try:
            list_json = json.loads(txt)
            reports = _reports_from_list_payload(list_json, list_url=list_entry["request"].get("url"), export_url=export_entry["request"].get("url") if export_entry else None)
        except Exception:
            reports = []
    export_form = _parse_form(export_entry.get("request", {}).get("postData", {}).get("text")) if export_entry else {}
    result = {
        "source_har": str(har),
        "total_entries": len(entries),
        "list_endpoint": list_entry["request"].get("url") if list_entry else None,
        "export_endpoint": export_entry["request"].get("url") if export_entry else None,
        "report_count_in_har_snapshot": len(reports),
        "reports": reports,
        "captured_export_sample": {
            "report_name": export_form.get("ReportName"),
            "start_date": export_form.get("startDate"),
            "end_date": export_form.get("endDate"),
            "report_dim": export_form.get("ReportDim"),
            "sku_id": export_form.get("SkuId"),
            "has_identity_fields": all(export_form.get(k) for k in ("User-mup", "User-mnp", "uuid")),
        },
        "notes": ["Cookie/Authorization are not embedded; use extract-credentials to save them locally.", "Export endpoint returns a ZIP containing one XLS file in smoke tests."],
    }
    if out_path:
        _write_json(out_path, result)
        click.echo(str(out_path))
    else:
        _json_print(result)


@main.command("extract-credentials")
@click.argument("har", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("--out-dir", type=click.Path(path_type=Path), default=DEFAULT_CREDENTIAL_DIR, show_default=True)
def extract_credentials(har: Path, out_dir: Path) -> None:
    """从 HAR 提取 Cookie、非敏感请求头、导出表单身份字段到本地 secrets 目录。"""
    payload = _load_json(har)
    entries = payload.get("log", {}).get("entries", [])
    list_entry = _find_entry(entries, LIST_PATH_HINT, method="GET")
    export_entry = _find_entry(entries, EXPORT_PATH_HINT, method="POST")
    if not export_entry:
        raise click.ClickException("HAR 中没有找到 exportPreviewList.ajax POST 请求；请重新采集包含一次导出的 HAR。")
    cookie = _find_cookie_header(export_entry) or (_find_cookie_header(list_entry) if list_entry else None)
    if not cookie:
        raise click.ClickException("HAR 中没有 Cookie；请导出包含请求头的 HAR。")
    out_dir.mkdir(parents=True, exist_ok=True)
    os.chmod(out_dir, 0o700)
    export_headers = _headers_dict(export_entry.get("request", {}).get("headers", []), include_sensitive=False)
    list_headers = _headers_dict(list_entry.get("request", {}).get("headers", []), include_sensitive=False) if list_entry else {}
    form = _parse_form(export_entry.get("request", {}).get("postData", {}).get("text"))
    identity = {k: form[k] for k in ("User-mup", "User-mnp", "uuid") if k in form}
    _write_json(out_dir / "cookie.json", {"Cookie": cookie}, 0o600)
    _write_json(out_dir / "export_headers.json", export_headers, 0o600)
    _write_json(out_dir / "list_headers.json", list_headers, 0o600)
    _write_json(out_dir / "form_identity.json", identity, 0o600)
    readme = out_dir / "README.md"
    readme.write_text("京东/商智 jd-export 本地登录态目录。不要提交、不要发聊天、不要同步到 NAS。Cookie 过期或风控时重新采集 HAR。\n", encoding="utf-8")
    os.chmod(readme, 0o600)
    _json_print({"out_dir": str(out_dir), "files": ["cookie.json", "export_headers.json", "list_headers.json", "form_identity.json", "README.md"], "cookie_chars": len(cookie), "identity_fields": sorted(identity)})


@main.command("fetch-list")
@click.argument("manifest", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("--credential-dir", type=click.Path(path_type=Path), default=DEFAULT_CREDENTIAL_DIR, show_default=True)
@click.option("--cookie-file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("--headers-file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("--out", "out_path", type=click.Path(path_type=Path), required=True)
def fetch_list(manifest: Path, credential_dir: Path, cookie_file: Path | None, headers_file: Path | None, out_path: Path) -> None:
    """用本地 Cookie 重放 getReportList.ajax，保存规范化报表列表。"""
    data = _load_json(manifest)
    list_url = data.get("list_endpoint") or f"https://sz.jd.com{LIST_PATH_HINT}"
    export_url = data.get("export_endpoint") or f"https://sz.jd.com{EXPORT_PATH_HINT}"
    headers = _load_optional_json(headers_file, credential_dir / "list_headers.json")
    cookie = _load_cookie_header(cookie_file, credential_dir)
    if cookie:
        headers["Cookie"] = cookie
    resp = requests.get(list_url, headers=headers, timeout=90)
    try:
        payload = resp.json()
    except Exception as exc:
        raise click.ClickException(f"列表接口没有返回 JSON：status={resp.status_code}, content-type={resp.headers.get('content-type')}, error={exc}") from exc
    reports = _reports_from_list_payload(payload, list_url=list_url, export_url=export_url)
    result = {"source": "jd_getReportList.ajax", "status_code": resp.status_code, "list_endpoint": list_url, "export_endpoint": export_url, "report_count": len(reports), "reports": reports}
    _write_json(out_path, result)
    _json_print({"out": str(out_path), "status": resp.status_code, "report_count": len(reports), "report_names": [r["report_name"] for r in reports]})


@main.command("export-all")
@click.argument("report_list", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("--credential-dir", type=click.Path(path_type=Path), default=DEFAULT_CREDENTIAL_DIR, show_default=True)
@click.option("--cookie-file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("--headers-file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("--identity-file", type=click.Path(exists=True, dir_okay=False, path_type=Path))
@click.option("--out-dir", type=click.Path(path_type=Path), default=Path("exports"), show_default=True)
@click.option("--dry-run", is_flag=True)
@click.option("--limit", type=int, default=0, show_default=True)
@click.option("--report-name", multiple=True, help="只导出指定报表名，可重复传。")
@click.option("--extract/--no-extract", default=True, show_default=True, help="导出 ZIP 后是否解压其中 XLS 文件。")
def export_all(report_list: Path, credential_dir: Path, cookie_file: Path | None, headers_file: Path | None, identity_file: Path | None, out_dir: Path, dry_run: bool, limit: int, report_name: tuple[str, ...], extract: bool) -> None:
    """按报表列表逐个 POST exportPreviewList.ajax，保存 ZIP/XLS。"""
    data = _load_json(report_list)
    reports = list(data.get("reports", []))
    if report_name:
        names = set(report_name)
        reports = [r for r in reports if r.get("report_name") in names]
    if limit and limit > 0:
        reports = reports[:limit]
    plan = [{"index": i, "report_name": r.get("report_name"), "start_date": r.get("start_date"), "end_date": r.get("end_date"), "report_dim": r.get("report_dim")} for i, r in enumerate(reports, 1)]
    if dry_run:
        _json_print({"dry_run": True, "count": len(plan), "plan": plan})
        return
    if not reports:
        raise click.ClickException("没有可导出的报表。")
    headers = _load_optional_json(headers_file, credential_dir / "export_headers.json")
    cookie = _load_cookie_header(cookie_file, credential_dir)
    if cookie:
        headers["Cookie"] = cookie
    identity = _load_optional_json(identity_file, credential_dir / "form_identity.json")
    missing = [k for k in ("User-mup", "User-mnp", "uuid") if not identity.get(k)]
    if missing:
        raise click.ClickException(f"缺少导出身份字段：{missing}；请从包含一次导出的 HAR 运行 extract-credentials。")
    out_dir.mkdir(parents=True, exist_ok=True)
    zip_dir = out_dir / "zip"
    xls_dir = out_dir / "xls"
    zip_dir.mkdir(exist_ok=True)
    if extract:
        xls_dir.mkdir(exist_ok=True)
    saved = []
    failed = []
    for idx, r in enumerate(reports, 1):
        export_url = r.get("export_url") or data.get("export_endpoint") or f"https://sz.jd.com{EXPORT_PATH_HINT}"
        payload = {
            "SkuId": r.get("sku_id", ""),
            "ReportDim": r.get("report_dim", ""),
            "Indicators": json.dumps(r.get("indicators") or [], ensure_ascii=False, separators=(",", ":")),
            "Attributions": json.dumps(r.get("attributions") or [], ensure_ascii=False, separators=(",", ":")),
            "startDate": r.get("start_date", ""),
            "endDate": r.get("end_date", ""),
            "ReportName": r.get("report_name", f"report_{idx}"),
            **identity,
        }
        try:
            resp = requests.post(export_url, headers=headers, data=payload, timeout=120)
            ctype = resp.headers.get("content-type", "")
            if resp.status_code != 200 or not (resp.content[:2] == b"PK" or "zip" in ctype.lower()):
                failed.append({"index": idx, "report_name": r.get("report_name"), "status": resp.status_code, "content_type": ctype, "message": "response is not ZIP"})
                continue
            fallback = f"{_safe_filename(str(r.get('report_name') or 'jd_report'))}_{r.get('start_date')}_{r.get('end_date')}.zip"
            filename = _decode_disposition_filename(resp.headers.get("content-disposition", ""), fallback)
            if not filename.lower().endswith(".zip"):
                filename += ".zip"
            zip_path = zip_dir / filename
            zip_path.write_bytes(resp.content)
            record = {"index": idx, "report_name": r.get("report_name"), "zip": str(zip_path), "zip_bytes": zip_path.stat().st_size, "extracted": []}
            if extract:
                try:
                    with zipfile.ZipFile(zip_path) as z:
                        for member in z.infolist():
                            if member.is_dir():
                                continue
                            member_name = _safe_filename(Path(member.filename).name)
                            target = xls_dir / member_name
                            with z.open(member) as src, target.open("wb") as dst:
                                shutil.copyfileobj(src, dst)
                            record["extracted"].append(str(target))
                except Exception as exc:
                    failed.append({"index": idx, "report_name": r.get("report_name"), "zip": str(zip_path), "error": f"zip extract failed: {exc!r}"})
                    continue
            saved.append(record)
        except Exception as exc:
            failed.append({"index": idx, "report_name": r.get("report_name"), "error": repr(exc)})
    _json_print({"saved_count": len(saved), "failed_count": len(failed), "saved": saved, "failed": failed})


if __name__ == "__main__":
    main()
