# -*- coding: utf-8 -*-
"""
log_parser.py — 公共日志解析模块
所有可视化工具共用同一套正则和解析函数，避免重复维护。
"""
import re, math, os

_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
DEFAULT_LOG = os.path.join(_ROOT, "my_trader.log")

FK = ['TR', 'OB', 'TK', 'OI', 'FR', 'MP', 'VD', 'BTC', 'GM', 'IV', 'EX', 'LC', 'MR', 'SM', 'CT']
FN = {
    'TR': '趋势', 'OB': '挂单', 'TK': '成交', 'OI': '持仓量', 'FR': '费率',
    'MP': '痛点', 'VD': '量变', 'BTC': 'BTC',  'GM': 'Gamma', 'IV': '波动率',
    'EX': '衰竭', 'LC': '清算', 'MR': '回归',  'SM': '聪明钱',
    'CT': '跟单',
}
FC = {
    'TR': '#ff6b6b', 'OB': '#ffa502', 'TK': '#2ed573', 'OI': '#1e90ff', 'FR': '#a55eea',
    'MP': '#ff4757', 'VD': '#7bed9f', 'BTC': '#eccc68', 'GM': '#e056fd', 'IV': '#686de0',
    'EX': '#ff6348', 'LC': '#3ae374', 'MR': '#18dcff', 'SM': '#7d5fff',
    'CT': '#ffd700',
}

# ── 正则 ──────────────────────────────────────────────────────
RE_PRICE = re.compile(
    r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}),\d+ \[MyTrader\] INFO: \[ETH\] \$([0-9,.]+) \|'
)
RE_FACTOR = re.compile(
    r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}),\d+ \[MyTrader\] INFO: \[ETH\] '
    r'TR:([+-]?\d+\.?\d*) OB:([+-]?\d+\.?\d*) FR:([+-]?\d+\.?\d*) '
    r'TK:([+-]?\d+\.?\d*) OI:([+-]?\d+\.?\d*) MP:([+-]?\d+\.?\d*) '
    r'VD:([+-]?\d+\.?\d*) BTC:([+-]?\d+\.?\d*) GM:([+-]?\d+\.?\d*) '
    r'IV:([+-]?\d+\.?\d*) EX:([+-]?\d+\.?\d*) LC:([+-]?\d+\.?\d*) '
    r'MR:([+-]?\d+\.?\d*) SM:([+-]?\d+\.?\d*) CT:([+-]?\d+\.?\d*) '
    r'mom:[+-]?\d+\.?\d* flip:[+-]?\d+\.?\d* => ([+-]?\d+\.?\d*) -> (\w+)'
)
RE_NEWS_DS = re.compile(
    r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}),\d+ \[MyTrader\] INFO: \[News\] DeepSeek: (.+?) ~'
)
RE_NEWS_FULL = re.compile(
    r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}),\d+ \[MyTrader\] INFO: \[News\] DeepSeek: '
    r'AI判断:([+-]?\d+\.?\d*)\(把握(\d+)%[^)]*\) 恐贪:(\d+) (.+?) \[(.+?)\]'
)
RE_NEWS_SCORE = re.compile(
    r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}),\d+ \[MyTrader\] INFO: \[News\] (?!DeepSeek).+=>\s*([+-]?\d+)'
)
RE_FLASH = re.compile(
    r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}),\d+ \[MyTrader\] INFO: \[FLASH\] ★(\d+) (.+)'
)
RE_TRADE_OPEN = re.compile(
    r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}).*ETH (LONG|SHORT) [\d.]+\S* @ \$([\d.]+) TP:\$([\d.]+) SL:\$([\d.]+)'
)
RE_TRADE_TP = re.compile(
    r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}).*\[止盈\].*\$([-\d.]+)'
)
RE_TRADE_SL = re.compile(
    r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}).*\[止损\].*\$([-\d.]+)'
)
RE_TRADE_CLOSE = re.compile(
    r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}).*(锁利|因子反向.*锁|强制平仓|决策树.*平)'
)


def _parse_factor_row(m):
    return {
        'TR': float(m.group(2)),  'OB': float(m.group(3)),
        'FR': float(m.group(4)),  'TK': float(m.group(5)),
        'OI': float(m.group(6)),  'MP': float(m.group(7)),
        'VD': float(m.group(8)),  'BTC': float(m.group(9)),
        'GM': float(m.group(10)), 'IV': float(m.group(11)),
        'EX': float(m.group(12)), 'LC': float(m.group(13)),
        'MR': float(m.group(14)), 'SM': float(m.group(15)),
        'CT': float(m.group(16)), 'total': float(m.group(17)), 'dir': m.group(18),
    }


def parse_rows(log_path=None, max_rows=6000):
    """
    解析日志，返回因子行列表（含价格填充）和事件列表。

    每行格式:
      {'ts', 'price', 'TR'..'SM', 'total', 'dir', 'news': {...}|None}
    事件格式:
      {'ts', 'type': open/tp/sl/close, 'price', 'label', 'color', 'dir', 'tp', 'sl'}
    """
    path = log_path or DEFAULT_LOG
    try:
        lines = open(path, encoding='utf-8', errors='replace').readlines()
    except Exception:
        return [], []

    prices_map = {}
    rows = []
    events = []
    last_news = None

    for line in lines:
        m = RE_PRICE.search(line)
        if m:
            prices_map[m.group(1)] = float(m.group(2).replace(',', ''))

        m = RE_NEWS_FULL.search(line)
        if m:
            last_news = {
                'ai':      float(m.group(2)),
                'conf':    int(m.group(3)),
                'fg':      int(m.group(4)),
                'summary': m.group(5).strip()[:80],
                'headline':m.group(6).strip()[:100],
            }

        m = RE_FACTOR.search(line)
        if m:
            ts = m.group(1)
            row = {'ts': ts, 'price': prices_map.get(ts), 'news': last_news}
            row.update(_parse_factor_row(m))
            rows.append(row)

        m = RE_TRADE_OPEN.search(line)
        if m:
            d = m.group(2)
            events.append({
                'ts': m.group(1), 'type': 'open',
                'price': float(m.group(3)),
                'label': f"{'▲' if d == 'LONG' else '▼'}{d[0]} @{float(m.group(3)):.0f}",
                'color': '#2ed573' if d == 'LONG' else '#ff4757',
                'dir': d, 'tp': float(m.group(4)), 'sl': float(m.group(5)),
            })
        m = RE_TRADE_TP.search(line)
        if m:
            events.append({'ts': m.group(1), 'type': 'tp', 'price': 0,
                           'label': f"止盈 ${float(m.group(2)):+.4f}",
                           'color': '#f0c040', 'dir': ''})
        m = RE_TRADE_SL.search(line)
        if m:
            events.append({'ts': m.group(1), 'type': 'sl', 'price': 0,
                           'label': f"止损 ${float(m.group(2)):+.4f}",
                           'color': '#ff6348', 'dir': ''})
        m = RE_TRADE_CLOSE.search(line)
        if m:
            events.append({'ts': m.group(1), 'type': 'close', 'price': 0,
                           'label': f"平仓:{m.group(2)[:6]}",
                           'color': '#aaa', 'dir': ''})

    # 填充价格（前向填充）
    last = None
    for r in rows:
        if r['price'] is None:
            r['price'] = last
        else:
            last = r['price']
    rows = [r for r in rows if r['price'] is not None][-max_rows:]

    # 填充事件价格（取最近一条 row 的价格）
    if rows:
        ts_map = {r['ts']: r['price'] for r in rows}
        all_ts = sorted(ts_map.keys())
        t0, t1 = rows[0]['ts'], rows[-1]['ts']
        for ev in events:
            if not ev['price']:
                best = next((t for t in reversed(all_ts) if t <= ev['ts']), None)
                ev['price'] = ts_map.get(best or all_ts[0], 0)
        events = [e for e in events if t0 <= e['ts'] <= t1]

    return rows, events


def parse_rows_simple(log_path=None, max_rows=2000):
    """轻量版：只返回 rows，不解析事件（供 factor_correlation 使用）"""
    rows, _ = parse_rows(log_path, max_rows)
    return rows


def parse_news_events(log_path=None):
    """解析新闻事件（供 factor_chart 使用）"""
    path = log_path or DEFAULT_LOG
    try:
        lines = open(path, encoding='utf-8', errors='replace').readlines()
    except Exception:
        return [], []

    news_events, flash_events = [], []
    ds_cache = {}
    for line in lines:
        m = RE_NEWS_DS.search(line)
        if m:
            ds_cache[m.group(1)] = m.group(2)[:120]
        m = RE_NEWS_SCORE.search(line)
        if m:
            ts = m.group(1)
            news_events.append({'ts': ts, 'score': int(m.group(2)),
                                 'summary': ds_cache.get(ts, '')})
        m = RE_FLASH.search(line)
        if m:
            flash_events.append({'ts': m.group(1), 'score': int(m.group(2)),
                                  'text': m.group(3)[:100]})
    return news_events, flash_events


def pearson(x, y):
    n = len(x)
    if n < 3:
        return 0.0
    mx, my = sum(x) / n, sum(y) / n
    num = sum((a - mx) * (b - my) for a, b in zip(x, y))
    dx = math.sqrt(sum((a - mx) ** 2 for a in x))
    dy = math.sqrt(sum((b - my) ** 2 for b in y))
    return round(num / (dx * dy), 4) if dx > 1e-10 and dy > 1e-10 else 0.0


def normalize_prices(rows):
    """返回价格归一化序列 [-1, 1]"""
    prices = [r['price'] for r in rows]
    px_min, px_max = min(prices), max(prices)
    px_range = px_max - px_min if px_max != px_min else 1
    px_mid = (px_max + px_min) / 2
    return [(p - px_mid) / (px_range / 2) for p in prices]


# ═══════════════════════════════════════════════════════════════
# BTC 日志解析器 — 策略A历史数据提取
# 格式: [BTC] $79,246.00 | ...   (价格行)
#       [BTC] TR:-1 OB:-0.10 ... (因子行, 紧接价格行)
#       [News] DeepSeek: ...      (新闻行)
# ═══════════════════════════════════════════════════════════════

RE_BTC_PRICE = re.compile(
    r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}),\d+ \[MyTrader\].*\[BTC\] \$([0-9,.]+)'
)
RE_BTC_FACTOR = re.compile(
    r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}),\d+ \[MyTrader\].*\[BTC\] '
    r'TR:([+-]?\d+\.?\d*)\s+OB:([+-]?\d+\.?\d*)\s+FR:([+-]?\d+\.?\d*)\s+'
    r'TK:([+-]?\d+\.?\d*)\s+OI:([+-]?\d+\.?\d*)\s+MP:([+-]?\d+\.?\d*)\s+'
    r'VD:([+-]?\d+\.?\d*)\s+BTC:([+-]?\d+\.?\d*)\s+GM:([+-]?\d+\.?\d*)\s+'
    r'IV:([+-]?\d+\.?\d*)\s+EX:([+-]?\d+\.?\d*)\s+LC:([+-]?\d+\.?\d*)\s+'
    r'MR:([+-]?\d+\.?\d*)\s+SM:([+-]?\d+\.?\d*)\s+CT:([+-]?\d+\.?\d*)\s+'
    r'mom:[+-]?\d+\.?\d*\s+flip:[+-]?\d+\.?\d*\s*=>\s*([+-]?\d+\.?\d*)\s*->\s*(\w+)'
)
RE_BTC_NEWS = re.compile(
    r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}),\d+ \[MyTrader\].*\[News\]\s*'
    r'(?:DeepSeek:\s*)?(.*?)(?:=>\s*([+-]?\d+))?\s*$'
)


def parse_rows_btc(log_path=None, max_rows=1000):
    """
    解析BTC格式日志, 返回 (rows, news_events)

    rows: [{'ts', 'price', 'TR','OB',...'CT', 'total', 'dir'}, ...]
    news_events: [{'ts', 'summary', 'score', 'headline'}, ...]
    """
    path = log_path or DEFAULT_LOG
    try:
        lines = open(path, encoding='utf-8', errors='replace').readlines()
    except Exception:
        return [], []

    # 第一遍：收集价格时间戳映射
    price_map = {}   # ts -> price
    for line in lines:
        m = RE_BTC_PRICE.search(line)
        if m:
            ts = m.group(1)
            try:
                price_map[ts] = float(m.group(2).replace(',', ''))
            except ValueError:
                pass

    # 第二遍：解析因子行 + 新闻行
    rows = []
    news_events = []
    last_price_ts = None

    for line in lines:
        # --- 因子行 ---
        m = RE_BTC_FACTOR.search(line)
        if m:
            ts = m.group(1)
            # 取最接近的价格（按时间排序选上一个）
            available_ts = sorted(price_map.keys())
            best_ts = None
            for t in available_ts:
                if t <= ts:
                    best_ts = t
            price = price_map.get(best_ts) if best_ts else None

            row = {
                'ts': ts,
                'price': price,
                'TR': float(m.group(2)), 'OB': float(m.group(3)),
                'FR': float(m.group(4)), 'TK': float(m.group(5)),
                'OI': float(m.group(6)), 'MP': float(m.group(7)),
                'VD': float(m.group(8)), 'BTC': float(m.group(9)),
                'GM': float(m.group(10)), 'IV': float(m.group(11)),
                'EX': float(m.group(12)), 'LC': float(m.group(13)),
                'MR': float(m.group(14)), 'SM': float(m.group(15)),
                'CT': float(m.group(16)),
                'total': float(m.group(17)),
                'dir': m.group(18),
            }
            rows.append(row)

        # --- 新闻行 ---
        if '[News]' in line and '[MyTrader]' in line:
            ts_match = re.match(
                r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})', line
            )
            if not ts_match:
                continue
            ts = ts_match.group(1)

            # DeepSeek 格式
            ds = re.search(
                r'\[News\]\s*DeepSeek:\s*AI判断:([+-]?\d+\.?\d*)\(.*?\)\s+恐贪:(\d+)\s+(.*?)\s+\[(.+?)\]',
                line
            )
            if ds:
                news_events.append({
                    'ts': ts,
                    'ai_score': float(ds.group(1)),
                    'fear_greed': int(ds.group(2)),
                    'summary': ds.group(3).strip()[:120],
                    'headline': ds.group(4).strip()[:100],
                    'score': 0,  # 后续填充
                })
            else:
                # 快讯格式
                nx = re.search(r'\[News\]\s*(.+?)(?:=>\s*([+-]?\d+))?\s*$', line)
                if nx:
                    score_str = nx.group(2)
                    news_events.append({
                        'ts': ts,
                        'ai_score': 0,
                        'fear_greed': 0,
                        'summary': nx.group(1).strip()[:120],
                        'headline': '',
                        'score': int(score_str) if score_str else 0,
                    })

    # 前向填充价格（没有精确匹配时用最近价格）
    last_px = None
    for r in rows:
        if r['price'] is not None:
            last_px = r['price']
        else:
            r['price'] = last_px

    # 过滤无价格的行
    rows = [r for r in rows if r['price'] is not None]

    # 限制行数
    rows = rows[-max_rows:]

    # 过滤新闻到有效时间范围
    if rows:
        t_min, t_max = rows[0]['ts'], rows[-1]['ts']
        news_events = [
            n for n in news_events
            if t_min <= n['ts'] <= t_max
        ]

    return rows, news_events
