from pathlib import Path
import datetime as dt
import re
import math
import xlrd
import openpyxl
from openpyxl import Workbook, load_workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter

PROJECT = Path('/Users/bot1/Volumes/root_for_ai/AI工作区/良渚_周报_20260608-0614_20260615_0835')
SRC = PROJECT / 'source'
OUT = PROJECT / 'deliverables' / '良渚周报_2026-06-08至2026-06-14.xlsx'
CUR_START, CUR_END = dt.date(2026,6,8), dt.date(2026,6,14)
PREV_START, PREV_END = dt.date(2026,6,1), dt.date(2026,6,7)

WEEK_ACTIONS = [
    '热销产品详情页更新',
    '618推广计划里的商品更新',
    '推广图更新',
    '已规划的产品继续开发',
]


def to_num(x):
    if x is None:
        return 0.0
    if isinstance(x, (int, float)):
        if math.isnan(x) if isinstance(x, float) else False:
            return 0.0
        return float(x)
    s = str(x).strip().replace(',', '')
    if s in ('', '-', '--', '—', 'nan', 'None'):
        return 0.0
    if s.endswith('%'):
        s = s[:-1]
        try:
            return float(s) / 100
        except Exception:
            return 0.0
    try:
        return float(s)
    except Exception:
        return 0.0


def to_date(x):
    if x is None or x == '':
        return None
    if isinstance(x, dt.datetime):
        return x.date()
    if isinstance(x, dt.date):
        return x
    if isinstance(x, (int, float)):
        if x > 20000000:
            return dt.datetime.strptime(str(int(x)), '%Y%m%d').date()
        try:
            return xlrd.xldate.xldate_as_datetime(x, 0).date()
        except Exception:
            return None
    s = str(x).strip().split()[0]
    if re.fullmatch(r'\d{8}\.0', s):
        s = s[:-2]
    for fmt in ('%Y-%m-%d', '%Y/%m/%d', '%Y%m%d'):
        try:
            return dt.datetime.strptime(s, fmt).date()
        except Exception:
            pass
    return None


def read_xlsx(path):
    wb = openpyxl.load_workbook(path, read_only=True, data_only=True)
    ws = wb[wb.sheetnames[0]]
    rows = list(ws.iter_rows(values_only=True))
    headers = [str(x).strip() if x is not None else '' for x in rows[0]]
    out = []
    for r in rows[1:]:
        if any(v is not None and str(v).strip() != '' for v in r):
            out.append(dict(zip(headers, r)))
    return headers, out


def read_xls(path):
    wb = xlrd.open_workbook(path)
    sh = wb.sheet_by_index(0)
    header_i = 0
    for i in range(min(20, sh.nrows)):
        vals = [str(sh.cell_value(i, j)).strip() for j in range(sh.ncols)]
        if any(v in ('统计日期', '日期', '时间') for v in vals):
            header_i = i
            break
    headers = [str(sh.cell_value(header_i, j)).strip() for j in range(sh.ncols)]
    out = []
    for i in range(header_i + 1, sh.nrows):
        vals = [sh.cell_value(i, j) for j in range(sh.ncols)]
        if any(str(v).strip() != '' for v in vals):
            out.append(dict(zip(headers, vals)))
    return headers, out


def file_contains(key):
    matches = [p for p in SRC.iterdir() if key in p.name]
    if not matches:
        raise FileNotFoundError(key)
    return matches[0]


def period_name(d):
    if CUR_START <= d <= CUR_END:
        return '本周'
    if PREV_START <= d <= PREV_END:
        return '上周'
    return None


def empty_metrics(platform, store, date=None, period=None):
    return {
        '周期': period or '', '日期': date, '平台': platform, '店铺': store,
        '访客数': 0.0, '浏览量': 0.0,
        '支付/成交金额': 0.0, '退款金额': 0.0, '实际成交金额': 0.0,
        '支付/成交买家数': 0.0, '支付/成交件数': 0.0,
        '加购人数/客户数': 0.0, '加购件数': 0.0, '收藏买家数': 0.0,
    }


def read_tm_tb_store(platform, store, key):
    _, rows = read_xlsx(file_contains(key))
    daily = []
    for r in rows:
        d = to_date(r.get('统计日期'))
        pn = period_name(d) if d else None
        if not pn:
            continue
        m = empty_metrics(platform, store, d, pn)
        m['访客数'] = to_num(r.get('访客数'))
        m['浏览量'] = to_num(r.get('浏览量'))
        m['支付/成交金额'] = to_num(r.get('支付金额'))
        m['退款金额'] = to_num(r.get('成功退款金额'))
        m['实际成交金额'] = m['支付/成交金额'] - m['退款金额']
        m['支付/成交买家数'] = to_num(r.get('支付买家数'))
        m['支付/成交件数'] = to_num(r.get('支付件数'))
        m['加购人数/客户数'] = to_num(r.get('加购人数'))
        m['加购件数'] = to_num(r.get('加购件数'))
        m['收藏买家数'] = to_num(r.get('商品收藏买家数'))
        daily.append(m)
    return daily


def read_jd_store():
    _, trans = read_xlsx(file_contains('交易概况'))
    _, traffic = read_xls(file_contains('京东分天'))
    by_date = {}
    for r in trans:
        d = to_date(r.get('时间'))
        pn = period_name(d) if d else None
        if not pn:
            continue
        m = by_date.setdefault(d, empty_metrics('京东', '良渚京东店', d, pn))
        m['支付/成交金额'] = to_num(r.get('成交金额'))
        m['退款金额'] = to_num(r.get('退款金额'))
        m['实际成交金额'] = m['支付/成交金额'] - m['退款金额']
        m['支付/成交买家数'] = to_num(r.get('成交客户数'))
        m['支付/成交件数'] = to_num(r.get('成交商品件数'))
        m['加购人数/客户数'] = to_num(r.get('加购客户数'))
        m['加购件数'] = to_num(r.get('加购商品件数'))
    for r in traffic:
        d = to_date(r.get('日期'))
        pn = period_name(d) if d else None
        if not pn:
            continue
        m = by_date.setdefault(d, empty_metrics('京东', '良渚京东店', d, pn))
        # 用户说明该文件名日期不准确，以表内日期为准；流量字段使用这份店铺交易流量表。
        m['访客数'] = to_num(r.get('访客数-全部渠道'))
        m['浏览量'] = to_num(r.get('浏览量-全部渠道'))
        if not m['支付/成交金额']:
            m['支付/成交金额'] = to_num(r.get('成交金额-全部渠道'))
            m['实际成交金额'] = m['支付/成交金额'] - m['退款金额']
    return list(by_date.values())


def aggregate(rows, period, platform=None):
    agg = empty_metrics(platform or '三店合计', platform or '三店合计', None, period)
    rows = [r for r in rows if r['周期'] == period and (platform is None or r['平台'] == platform)]
    if not rows:
        return agg
    agg['平台'] = platform or '三店合计'
    agg['店铺'] = rows[0]['店铺'] if platform else '三店合计'
    for f in ['访客数','浏览量','支付/成交金额','退款金额','实际成交金额','支付/成交买家数','支付/成交件数','加购人数/客户数','加购件数','收藏买家数']:
        agg[f] = sum(r.get(f,0) for r in rows)
    return agg


def derived(m):
    pay = m['支付/成交金额']
    visitors = m['访客数']
    buyers = m['支付/成交买家数']
    return {
        '客单价': pay / buyers if buyers else None,
        '支付/成交转化率': buyers / visitors if visitors else None,
        'UV价值': pay / visitors if visitors else None,
        '退款率': m['退款金额'] / pay if pay else None,
    }


def change(cur, prev):
    if cur is None or prev is None:
        return None, None
    diff = cur - prev
    rate = diff / prev if prev not in (0, None) else None
    return diff, rate


def read_product(platform, key):
    _, rows = read_xls(file_contains(key))
    grouped = {}
    for r in rows:
        name = str(r.get('商品名称') or '').strip()
        if not name:
            continue
        gid = str(r.get('商品ID') or '').strip().replace('.0','')
        k = (gid, name)
        g = grouped.setdefault(k, {
            '平台': platform, '商品ID': gid, '商品名称': name,
            '货号': str(r.get('货号') or '').strip(), '商品状态': str(r.get('商品状态') or '').strip(),
            '商品访客数': 0.0, '商品浏览量': 0.0, '支付买家数': 0.0, '支付件数': 0.0,
            '支付金额': 0.0, '成功退款金额': 0.0, '实际成交金额': 0.0,
            '商品收藏人数': 0.0, '商品加购件数': 0.0,
        })
        for src_f, dst_f in [('商品访客数','商品访客数'),('商品浏览量','商品浏览量'),('支付买家数','支付买家数'),('支付件数','支付件数'),('支付金额','支付金额'),('成功退款金额','成功退款金额'),('商品收藏人数','商品收藏人数'),('商品加购件数','商品加购件数')]:
            g[dst_f] += to_num(r.get(src_f))
        g['实际成交金额'] = g['支付金额'] - g['成功退款金额']
    products = list(grouped.values())
    products = [p for p in products if p['支付金额'] > 0 or p['实际成交金额'] > 0]
    products.sort(key=lambda x: (x['实际成交金额'], x['支付金额']), reverse=True)
    for p in products:
        p['商品支付转化率'] = p['支付买家数'] / p['商品访客数'] if p['商品访客数'] else None
        p['访客平均价值'] = p['支付金额'] / p['商品访客数'] if p['商品访客数'] else None
    return products


def pct(x):
    return '' if x is None else x


def money(x):
    return round(x or 0, 2)


def short_name(s, max_len=32):
    s = str(s)
    return s if len(s) <= max_len else s[:max_len] + '…'

# Load and aggregate data
store_daily = []
store_daily += read_tm_tb_store('天猫', '良渚文化旗舰店', '天猫分天')
store_daily += read_tm_tb_store('淘宝', '良渚博物院官方店', '淘宝分天')
store_daily += read_jd_store()
store_daily.sort(key=lambda r: (r['周期'], r['平台'], r['日期'] or dt.date.min))
platforms = ['天猫', '淘宝', '京东']

store_rows = []
for platform in platforms + [None]:
    cur = aggregate(store_daily, '本周', platform)
    prev = aggregate(store_daily, '上周', platform)
    cder, pder = derived(cur), derived(prev)
    row = {'平台': platform or '三店合计', '店铺': cur['店铺']}
    for f in ['访客数','浏览量','支付/成交金额','退款金额','实际成交金额','支付/成交买家数','支付/成交件数','加购人数/客户数','加购件数','收藏买家数']:
        row[f'本周{f}'] = cur[f]
        row[f'上周{f}'] = prev[f]
        diff, rate = change(cur[f], prev[f])
        row[f'{f}变化'] = diff
        row[f'{f}环比'] = rate
    for f in ['客单价','支付/成交转化率','UV价值','退款率']:
        row[f'本周{f}'] = cder[f]
        row[f'上周{f}'] = pder[f]
        diff, rate = change(cder[f], pder[f])
        row[f'{f}变化'] = diff
        row[f'{f}环比'] = rate
    store_rows.append(row)

products = read_product('天猫', '天猫-周') + read_product('淘宝', '淘宝-周')
# top 15 each platform
product_top = []
for platform in ['天猫', '淘宝']:
    rows = [p for p in products if p['平台'] == platform]
    for i, p in enumerate(rows[:15], 1):
        q = dict(p)
        q['排名'] = i
        product_top.append(q)

# Conclusions
summary = next(r for r in store_rows if r['平台'] == '三店合计')
tmall = next(r for r in store_rows if r['平台'] == '天猫')
taobao = next(r for r in store_rows if r['平台'] == '淘宝')
jd = next(r for r in store_rows if r['平台'] == '京东')

def fmt_money(x):
    return f'{x:,.2f}'

def fmt_pct(x):
    return '—' if x is None else f'{x*100:.2f}%'

def signed_pct(x):
    if x is None:
        return '—'
    sign = '+' if x >= 0 else ''
    return f'{sign}{x*100:.2f}%'

def signed_num(x):
    if x is None:
        return '—'
    sign = '+' if x >= 0 else ''
    return f'{sign}{x:,.2f}'

top_tmall = [p for p in product_top if p['平台']=='天猫'][:3]
top_taobao = [p for p in product_top if p['平台']=='淘宝'][:3]
conclusions = [
    ['店铺数据增长点', f"三店本周实际成交金额 {fmt_money(summary['本周实际成交金额'])} 元，上周 {fmt_money(summary['上周实际成交金额'])} 元，环比 {signed_pct(summary['实际成交金额环比'])}；本周访客数 {int(summary['本周访客数'])}，环比 {signed_pct(summary['访客数环比'])}。"],
    ['店铺数据增长点', f"天猫本周实际成交金额 {fmt_money(tmall['本周实际成交金额'])} 元，环比 {signed_pct(tmall['实际成交金额环比'])}，是本周整体增长的主要来源；淘宝本周实际成交金额 {fmt_money(taobao['本周实际成交金额'])} 元，环比 {signed_pct(taobao['实际成交金额环比'])}。"],
    ['风险/关注', f"京东本周实际成交金额 {fmt_money(jd['本周实际成交金额'])} 元，环比 {signed_pct(jd['实际成交金额环比'])}；建议继续补充京东商品明细，便于判断成交下滑来自商品结构还是流量承接。"],
    ['商品销售增长点', '天猫TOP商品：' + '；'.join([f"{short_name(p['商品名称'])}（实际成交{fmt_money(p['实际成交金额'])}元）" for p in top_tmall])],
    ['商品销售增长点', '淘宝TOP商品：' + '；'.join([f"{short_name(p['商品名称'])}（实际成交{fmt_money(p['实际成交金额'])}元）" for p in top_taobao])],
    ['主要优化动作', '；'.join(WEEK_ACTIONS) + '。'],
    ['下周建议', '继续围绕本周高成交商品做详情页承接与618推广素材迭代；重点跟踪天猫高流量商品转化、淘宝成交稳定性，以及京东商品侧数据补齐后的商品结构调整。'],
]

# Workbook writing
wb = Workbook()
ws = wb.active
ws.title = '00_口径说明'

header_fill = PatternFill('solid', fgColor='1F4E78')
sub_fill = PatternFill('solid', fgColor='D9EAF7')
thin = Side(style='thin', color='D9D9D9')
border = Border(left=thin, right=thin, top=thin, bottom=thin)


def style_sheet(ws):
    for row in ws.iter_rows():
        for cell in row:
            cell.border = border
            cell.alignment = Alignment(vertical='top', wrap_text=True)
    ws.freeze_panes = 'A2'
    ws.auto_filter.ref = ws.dimensions
    for col in range(1, ws.max_column + 1):
        max_len = 8
        letter = get_column_letter(col)
        for cell in ws[letter]:
            if cell.value is not None:
                max_len = max(max_len, min(42, len(str(cell.value)) + 2))
        ws.column_dimensions[letter].width = max_len


def write_rows(ws, rows):
    for r in rows:
        ws.append(r)
    for cell in ws[1]:
        cell.font = Font(bold=True, color='FFFFFF')
        cell.fill = header_fill
        cell.alignment = Alignment(horizontal='center', vertical='center', wrap_text=True)

# 00口径
source_files = sorted([p.name for p in SRC.iterdir()])
write_rows(ws, [
    ['项目', '内容'],
    ['报表周期', f'{CUR_START} 至 {CUR_END}'],
    ['对比周期', f'{PREV_START} 至 {PREV_END}'],
    ['店铺范围', '天猫：良渚文化旗舰店；淘宝：良渚博物院官方店；京东：良渚京东店'],
    ['核心公式', '实际成交金额 = 支付/成交金额 - 退款金额；客单价、转化率、UV价值、退款率均按汇总后分子/分母重新计算'],
    ['京东流量文件口径', '文件名日期不准确，按用户说明以表内日期筛选 2026-06-08 至 2026-06-14'],
    ['商品TOP口径', '按实际成交金额排序；京东商品明细未提供，本次商品TOP仅含天猫、淘宝'],
    ['本周运营动作', '；'.join(WEEK_ACTIONS)],
])
ws.append([])
ws.append(['源文件清单'])
ws['A10'].font = Font(bold=True)
for f in source_files:
    ws.append([f])
style_sheet(ws)

# 01店铺核心指标
ws = wb.create_sheet('01_店铺核心指标')
cols = ['平台','店铺',
        '本周访客数','上周访客数','访客数变化','访客数环比',
        '本周浏览量','上周浏览量','浏览量变化','浏览量环比',
        '本周支付/成交金额','上周支付/成交金额','支付/成交金额变化','支付/成交金额环比',
        '本周退款金额','上周退款金额','退款金额变化','退款金额环比',
        '本周实际成交金额','上周实际成交金额','实际成交金额变化','实际成交金额环比',
        '本周支付/成交买家数','上周支付/成交买家数','支付/成交买家数变化','支付/成交买家数环比',
        '本周支付/成交件数','上周支付/成交件数','支付/成交件数变化','支付/成交件数环比',
        '本周客单价','上周客单价','客单价变化','客单价环比',
        '本周支付/成交转化率','上周支付/成交转化率','支付/成交转化率变化','支付/成交转化率环比',
        '本周UV价值','上周UV价值','UV价值变化','UV价值环比',
        '本周退款率','上周退款率','退款率变化','退款率环比',
        '本周加购人数/客户数','本周加购件数','本周收藏买家数']
write_rows(ws, [cols])
for r in store_rows:
    ws.append([r.get(c, '') for c in cols])
style_sheet(ws)
for row in ws.iter_rows(min_row=2):
    for cell in row:
        if isinstance(cell.value, float):
            if '率' in ws.cell(1, cell.column).value or '转化' in ws.cell(1, cell.column).value:
                cell.number_format = '0.00%'
            else:
                cell.number_format = '#,##0.00'

# 02商品TOP
ws = wb.create_sheet('02_商品TOP')
cols = ['平台','排名','商品ID','商品名称','货号','商品状态','商品访客数','商品浏览量','支付买家数','支付件数','支付金额','成功退款金额','实际成交金额','商品支付转化率','访客平均价值','商品收藏人数','商品加购件数']
write_rows(ws, [cols])
for p in product_top:
    ws.append([p.get(c,'') for c in cols])
style_sheet(ws)
for row in ws.iter_rows(min_row=2):
    for cell in row:
        head = ws.cell(1, cell.column).value
        if isinstance(cell.value, float):
            if '率' in head:
                cell.number_format = '0.00%'
            elif any(k in head for k in ['金额','价值']):
                cell.number_format = '#,##0.00'
            else:
                cell.number_format = '#,##0'

# 03平台店铺对比（日维度）
ws = wb.create_sheet('03_平台店铺对比')
cols = ['周期','日期','平台','店铺','访客数','浏览量','支付/成交金额','退款金额','实际成交金额','支付/成交买家数','支付/成交件数','支付/成交转化率','客单价','UV价值','加购人数/客户数','加购件数','收藏买家数']
write_rows(ws, [cols])
for r in sorted(store_daily, key=lambda x: (x['周期'], x['日期'], x['平台'])):
    d = derived(r)
    ws.append([r['周期'], r['日期'], r['平台'], r['店铺'], r['访客数'], r['浏览量'], r['支付/成交金额'], r['退款金额'], r['实际成交金额'], r['支付/成交买家数'], r['支付/成交件数'], d['支付/成交转化率'], d['客单价'], d['UV价值'], r['加购人数/客户数'], r['加购件数'], r['收藏买家数']])
style_sheet(ws)
for row in ws.iter_rows(min_row=2):
    for cell in row:
        head = ws.cell(1, cell.column).value
        if isinstance(cell.value, dt.date):
            cell.number_format = 'yyyy-mm-dd'
        elif isinstance(cell.value, float):
            if '率' in head:
                cell.number_format = '0.00%'
            elif any(k in head for k in ['金额','客单价','UV价值']):
                cell.number_format = '#,##0.00'
            else:
                cell.number_format = '#,##0'

# 04运营结论
ws = wb.create_sheet('04_运营结论')
write_rows(ws, [['模块', '结论/建议']])
for row in conclusions:
    ws.append(row)
style_sheet(ws)
ws.column_dimensions['A'].width = 18
ws.column_dimensions['B'].width = 100

# 05原始汇总明细
ws = wb.create_sheet('05_原始汇总明细')
cols = ['周期','日期','平台','店铺','访客数','浏览量','支付/成交金额','退款金额','实际成交金额','支付/成交买家数','支付/成交件数','加购人数/客户数','加购件数','收藏买家数']
write_rows(ws, [cols])
for r in sorted(store_daily, key=lambda x: (x['周期'], x['平台'], x['日期'])):
    ws.append([r.get(c,'') for c in cols])
style_sheet(ws)
for row in ws.iter_rows(min_row=2):
    for cell in row:
        head = ws.cell(1, cell.column).value
        if isinstance(cell.value, dt.date):
            cell.number_format = 'yyyy-mm-dd'
        elif isinstance(cell.value, float):
            if any(k in head for k in ['金额']):
                cell.number_format = '#,##0.00'
            else:
                cell.number_format = '#,##0'

# workbook final formatting
for ws in wb.worksheets:
    ws.sheet_view.showGridLines = False
    if ws.max_row >= 1:
        ws.row_dimensions[1].height = 28

OUT.parent.mkdir(parents=True, exist_ok=True)
wb.save(OUT)

# Validation reopen
vwb = load_workbook(OUT, data_only=False)
expected = ['00_口径说明','01_店铺核心指标','02_商品TOP','03_平台店铺对比','04_运营结论','05_原始汇总明细']
missing = [s for s in expected if s not in vwb.sheetnames]
if missing:
    raise RuntimeError('Missing sheets: ' + ', '.join(missing))
if OUT.stat().st_size <= 0:
    raise RuntimeError('Output file is empty')
# Check representative totals
ws1 = vwb['01_店铺核心指标']
headers = [c.value for c in ws1[1]]
idx = {h:i+1 for i,h in enumerate(headers)}
found_total = False
for row in range(2, ws1.max_row+1):
    if ws1.cell(row, idx['平台']).value == '三店合计':
        val = ws1.cell(row, idx['本周实际成交金额']).value
        if abs(val - summary['本周实际成交金额']) > 0.01:
            raise RuntimeError('Total mismatch')
        found_total = True
if not found_total:
    raise RuntimeError('Total row not found')

print(str(OUT))
print('size', OUT.stat().st_size)
print('sheets', ','.join(vwb.sheetnames))
print('total_actual', round(summary['本周实际成交金额'],2))
print('total_actual_prev', round(summary['上周实际成交金额'],2))
print('total_actual_rate', summary['实际成交金额环比'])
print('tmall_actual', round(tmall['本周实际成交金额'],2), 'rate', tmall['实际成交金额环比'])
print('taobao_actual', round(taobao['本周实际成交金额'],2), 'rate', taobao['实际成交金额环比'])
print('jd_actual', round(jd['本周实际成交金额'],2), 'rate', jd['实际成交金额环比'])
print('top_tmall', [(short_name(p['商品名称'],20), round(p['实际成交金额'],2)) for p in top_tmall])
print('top_taobao', [(short_name(p['商品名称'],20), round(p['实际成交金额'],2)) for p in top_taobao])
