import json
import os
import re
from typing import Dict, List, Optional

import gradio as gr
import pandas as pd

# Verified badge HTML for OpenAI models
VERIFIED_BADGE = (
    '<span style="display: inline-block; width: 12px; height: 12px; '
    "background-color: #00bfa5; border-radius: 50%; color: white; "
    "text-align: center; line-height: 12px; font-size: 8px; font-weight: bold; "
    'margin-left: 4px; vertical-align: middle;">✓</span>'
)

# Unified data structure - each model defined once with both scores
# Format: List of dictionaries with model info and both Full Set and Text-Only scores
LEADERBOARD_DATA = [
    {
        "Model": f"Zoom Federated AI {VERIFIED_BADGE}",
        "Organization": "Zoom",
        "Open Source": "No",
        "Publish Date": "2025-12-29",
        "Full Set Score": 53.0,
        "Full Set Reference": "https://www.zoom.com/en/blog/zoom-ai-redefining-agentic-federated-intelligence/",
        "Text-Only Score": 55.2,
        "Text-Only Reference": "https://tinyurl.com/sileixu-hle-linkedin-53",
    },
    {
        "Model": f"GPT-5.2 Pro {VERIFIED_BADGE}",
        "Organization": "OpenAI",
        "Open Source": "No",
        "Publish Date": "2025-12-11",
        "Full Set Score": 50.0,
        "Full Set Reference": "https://openai.com/index/introducing-gpt-5-2/",
        "Text-Only Score": "-",
        "Text-Only Reference": None,
    },
    {
        "Model": f"GPT-5.2 Thinking {VERIFIED_BADGE}",
        "Organization": "OpenAI",
        "Open Source": "No",
        "Publish Date": "2025-12-11",
        "Full Set Score": 45.5,
        "Full Set Reference": "https://openai.com/index/introducing-gpt-5-2/",
        "Text-Only Score": "-",
        "Text-Only Reference": None,
    },
    {
        "Model": f"GPT-5 Pro {VERIFIED_BADGE}",
        "Organization": "OpenAI",
        "Open Source": "No",
        "Publish Date": "2025-12-11",
        "Full Set Score": 42.0,
        "Full Set Reference": "https://openai.com/index/introducing-gpt-5/",
        "Text-Only Score": "-",
        "Text-Only Reference": None,
    },
    {
        "Model": f"GPT-5 {VERIFIED_BADGE}",
        "Organization": "OpenAI",
        "Open Source": "No",
        "Publish Date": "2025-12-11",
        "Full Set Score": 35.2,
        "Full Set Reference": "https://openai.com/index/introducing-gpt-5/",
        "Text-Only Score": "-",
        "Text-Only Reference": None,
    },
    {
        "Model": f"ChatGPT Agent {VERIFIED_BADGE}",
        "Organization": "OpenAI",
        "Open Source": "No",
        "Publish Date": "2025-07-17",
        "Full Set Score": 41.6,
        "Full Set Reference": "https://openai.com/index/introducing-chatgpt-agent/",
        "Text-Only Score": "-",
        "Text-Only Reference": None,
    },
    {
        "Model": "OpenAI Deep Research",
        "Organization": "OpenAI",
        "Open Source": "No",
        "Publish Date": "2025-02-02",
        "Full Set Score": 26.6,
        "Full Set Reference": "https://openai.com/index/introducing-deep-research/",
        "Text-Only Score": "-",
        "Text-Only Reference": None,
    },
    {
        "Model": "Gemini Deep Research",
        "Organization": "Google",
        "Open Source": "No",
        "Publish Date": "2025-12-11",
        "Full Set Score": 46.4,
        "Full Set Reference": "https://blog.google/technology/developers/deep-research-agent-gemini-api/",
        "Text-Only Score": "-",
        "Text-Only Reference": None,
    },
    {
        "Model": "Gemini 3 Pro",
        "Organization": "Google",
        "Open Source": "No",
        "Publish Date": "2025-11-18",
        "Full Set Score": 45.8,
        "Full Set Reference": "https://blog.google/products/gemini/gemini-3/",
        "Text-Only Score": "-",
        "Text-Only Reference": None,
    },
    {
        "Model": "Gemini 3 Flash",
        "Organization": "Google",
        "Open Source": "No",
        "Publish Date": "2025-12-17",
        "Full Set Score": 43.5,
        "Full Set Reference": "https://blog.google/products/gemini/gemini-3-flash/",
        "Text-Only Score": "-",
        "Text-Only Reference": None,
    },
    {
        "Model": f"Kimi K2 Thinking {VERIFIED_BADGE}",
        "Organization": "Moonshot AI",
        "Open Source": "Yes",
        "Publish Date": "2025-11-06",
        "Full Set Score": "-",
        "Full Set Reference": None,
        "Text-Only Score": 44.9,
        "Text-Only Reference": "https://huggingface.co/moonshotai/Kimi-K2-Thinking",
    },
    {
        "Model": f"Kimi K2 Thinking (Heavy) {VERIFIED_BADGE}",
        "Organization": "Moonshot AI",
        "Open Source": "Yes",
        "Publish Date": "2025-11-06",
        "Full Set Score": "-",
        "Full Set Reference": None,
        "Text-Only Score": 51.0,
        "Text-Only Reference": "https://huggingface.co/moonshotai/Kimi-K2-Thinking",
    },
    {
        "Model": "Grok 4",
        "Organization": "xAI",
        "Open Source": "No",
        "Publish Date": "2025-07-09",
        "Full Set Score": 38.6,
        "Full Set Reference": "https://x.ai/news/grok-4",
        "Text-Only Score": "-",
        "Text-Only Reference": None,
    },
    {
        "Model": "Grok 4 (Heavy)",
        "Organization": "xAI",
        "Open Source": "No",
        "Publish Date": "2025-07-09",
        "Full Set Score": 44.4,
        "Full Set Reference": "https://x.ai/news/grok-4",
        "Text-Only Score": 50.7,
        "Text-Only Reference": "https://x.ai/news/grok-4",
    },
    {
        "Model": "GLM 4.7",
        "Organization": "Z.ai",
        "Open Source": "Yes",
        "Publish Date": "2025-12-22",
        "Full Set Score": 42.8,
        "Full Set Reference": "https://z.ai/blog/glm-4.7",
        "Text-Only Score": None,
        "Text-Only Reference": None,
    },
    {
        "Model": f"Claude Opus 4.5 {VERIFIED_BADGE}",
        "Organization": "Anthropic",
        "Open Source": "No",
        "Publish Date": "2025-12-22",
        "Full Set Score": 43.2,
        "Full Set Reference": "https://assets.anthropic.com/m/64823ba7485345a7/Claude-Opus-4-5-System-Card.pdf",
        "Text-Only Score": None,
        "Text-Only Reference": None,
    },
    {
        "Model": f"Claude Sonnet 4.5 {VERIFIED_BADGE}",
        "Organization": "Anthropic",
        "Open Source": "No",
        "Publish Date": "2025-09-29",
        "Full Set Score": 28.4,
        "Full Set Reference": "https://assets.anthropic.com/m/64823ba7485345a7/Claude-Opus-4-5-System-Card.pdf",
        "Text-Only Score": None,
        "Text-Only Reference": None,
    },
    {
        "Model": "DeepWriter",
        "Organization": "Deepwriter AI",
        "Open Source": "No",
        "Publish Date": "2025-11-26",
        "Full Set Score": "-",
        "Full Set Reference": None,
        "Text-Only Score": 50.9,
        "Text-Only Reference": "https://deepwriter.com/blog/small-team-beats-worlds-top-ai-labs-at-hle/",
    },
    {
        "Model": "Seed1.8",
        "Organization": "ByteDance",
        "Open Source": "No",
        "Publish Date": "2025-12-18",
        "Full Set Score": "-",
        "Full Set Reference": None,
        "Text-Only Score": 41.7,
        "Text-Only Reference": "https://lf3-static.bytednsdoc.com/obj/eden-cn/lapzild-tss/ljhwZthlaukjlkulzlp/research/Seed-1.8-Modelcard.pdf",
    },
    {
        "Model": f"MiroThinker-v1.5-235B {VERIFIED_BADGE}",
        "Organization": "MiroMind AI",
        "Open Source": "Yes",
        "Publish Date": "2026-01-04",
        "Full Set Score": "-",
        "Full Set Reference": None,
        "Text-Only Score": 39.2,
        "Text-Only Reference": "https://huggingface.co/miromind-ai/MiroThinker-v1.5-235B",
    },
    {
        "Model": f"MiroThinker-v1.5-30B {VERIFIED_BADGE}",
        "Organization": "MiroMind AI",
        "Open Source": "Yes",
        "Publish Date": "2026-01-04",
        "Full Set Score": "-",
        "Full Set Reference": None,
        "Text-Only Score": 31.0,
        "Text-Only Reference": "https://huggingface.co/miromind-ai/MiroThinker-v1.5-30B",
    },
    {
        "Model": f"MiroThinker-v1.0-72B {VERIFIED_BADGE}",
        "Organization": "MiroMind AI",
        "Open Source": "Yes",
        "Publish Date": "2025-11-14",
        "Full Set Score": "-",
        "Full Set Reference": None,
        "Text-Only Score": 37.7,
        "Text-Only Reference": "https://arxiv.org/pdf/2511.11793",
    },
    {
        "Model": f"MiroThinker-v1.0-30B {VERIFIED_BADGE}",
        "Organization": "MiroMind AI",
        "Open Source": "Yes",
        "Publish Date": "2025-11-14",
        "Full Set Score": "-",
        "Full Set Reference": None,
        "Text-Only Score": 33.4,
        "Text-Only Reference": "https://arxiv.org/pdf/2511.11793",
    },
    {
        "Model": f"MiroThinker-v1.0-8B {VERIFIED_BADGE}",
        "Organization": "MiroMind AI",
        "Open Source": "Yes",
        "Publish Date": "2025-11-14",
        "Full Set Score": "-",
        "Full Set Reference": None,
        "Text-Only Score": 21.5,
        "Text-Only Reference": "https://arxiv.org/pdf/2511.11793",
    },
    {
        "Model": "Tongyi-DeepResearch-30B-A3B",
        "Organization": "Alibaba",
        "Open Source": "Yes",
        "Publish Date": "2025-11-04",
        "Full Set Score": "-",
        "Full Set Reference": None,
        "Text-Only Score": 32.9,
        "Text-Only Reference": "https://arxiv.org/pdf/2510.24701",
    },
    {
        "Model": "Tongyi-DeepResearch-30B-A3B (Heavy)",
        "Organization": "Alibaba",
        "Open Source": "Yes",
        "Publish Date": "2025-11-04",
        "Full Set Score": "-",
        "Full Set Reference": None,
        "Text-Only Score": 38.3,
        "Text-Only Reference": "https://arxiv.org/pdf/2510.24701",
    },
    {
        "Model": "Perplexity Deep Research",
        "Organization": "Perplexity",
        "Open Source": "No",
        "Publish Date": "2025-02-14",
        "Full Set Score": 21.1,
        "Full Set Reference": "https://www.perplexity.ai/hub/blog/introducing-perplexity-deep-research",
        "Text-Only Score": None,
        "Text-Only Reference": None,
    },
    {
        "Model": "MiniMax-M2",
        "Organization": "MiniMax AI",
        "Open Source": "Yes",
        "Publish Date": "2025-10-27",
        "Full Set Score": None,
        "Full Set Reference": None,
        "Text-Only Score": 31.8,
        "Text-Only Reference": "https://huggingface.co/MiniMaxAI/MiniMax-M2",
    },
    {
        "Model": "ToolOrchestra",
        "Organization": "NVIDIA",
        "Open Source": "Yes",
        "Publish Date": "2025-11-26",
        "Full Set Score": None,
        "Full Set Reference": None,
        "Text-Only Score": 37.1,
        "Text-Only Reference": "https://arxiv.org/pdf/2511.21689",
    },
]

# Legacy support - kept for backward compatibility with file loading
FULL_SET_DATA = []
TEXT_ONLY_DATA = []


def load_data_from_file(filepath: str, data_type: str = "csv") -> Optional[List[Dict]]:
    """Load data from CSV or JSON file"""
    if not os.path.exists(filepath):
        return None

    try:
        if data_type == "csv":
            df = pd.read_csv(filepath)
            return df.to_dict("records")
        elif data_type == "json":
            with open(filepath, "r") as f:
                return json.load(f)
    except Exception as e:
        print(f"Error loading data from {filepath}: {e}")
        return None

    return None


def get_data():
    """Get data from files if they exist, otherwise use default unified data"""
    # Try to load unified data from file first
    unified_data = load_data_from_file("data/leaderboard.json", "json")
    if unified_data is None:
        unified_data = load_data_from_file("data/leaderboard.csv", "csv")

    # If unified data exists, split it into full_set and text_only format
    if unified_data:
        full_set = []
        text_only = []
        for entry in unified_data:
            # Extract common fields
            model_info = {
                "Model": entry.get("Model", ""),
                "Organization": entry.get("Organization", ""),
                "Open Source": entry.get("Open Source", ""),
                "Publish Date": entry.get("Publish Date", ""),
            }

            # Add full set data if available
            if entry.get("Full Set Score") is not None:
                full_set_entry = model_info.copy()
                full_set_entry["Score"] = entry.get("Full Set Score")
                if entry.get("Full Set Reference"):
                    full_set_entry["Reference"] = entry.get("Full Set Reference")
                full_set.append(full_set_entry)

            # Add text-only data if available
            if entry.get("Text-Only Score") is not None:
                text_only_entry = model_info.copy()
                text_only_entry["Score"] = entry.get("Text-Only Score")
                if entry.get("Text-Only Reference"):
                    text_only_entry["Reference"] = entry.get("Text-Only Reference")
                text_only.append(text_only_entry)

        return full_set, text_only

    # Fallback: Try to load separate files (backward compatibility)
    full_set = load_data_from_file("data/full_set.csv", "csv")
    if full_set is None:
        full_set = load_data_from_file("data/full_set.json", "json")
    if full_set is None:
        # Convert unified LEADERBOARD_DATA to separate formats
        full_set = []
        text_only = []
        for entry in LEADERBOARD_DATA:
            model_info = {
                "Model": entry.get("Model", ""),
                "Organization": entry.get("Organization", ""),
                "Open Source": entry.get("Open Source", ""),
                "Publish Date": entry.get("Publish Date", ""),
            }

            if entry.get("Full Set Score") is not None:
                full_set_entry = model_info.copy()
                full_set_entry["Score"] = entry.get("Full Set Score")
                if entry.get("Full Set Reference"):
                    full_set_entry["Reference"] = entry.get("Full Set Reference")
                full_set.append(full_set_entry)

            if entry.get("Text-Only Score") is not None:
                text_only_entry = model_info.copy()
                text_only_entry["Score"] = entry.get("Text-Only Score")
                if entry.get("Text-Only Reference"):
                    text_only_entry["Reference"] = entry.get("Text-Only Reference")
                text_only.append(text_only_entry)
        return full_set, text_only

    text_only = load_data_from_file("data/text_only.csv", "csv")
    if text_only is None:
        text_only = load_data_from_file("data/text_only.json", "json")
    if text_only is None:
        text_only = TEXT_ONLY_DATA

    return full_set, text_only


def create_leaderboard_table(data: List[Dict], title: str) -> pd.DataFrame:
    """Convert data list to pandas DataFrame for display"""
    df = pd.DataFrame(data)
    # Remove Rank and Tools columns if they exist
    columns_to_remove = ["Rank", "Tools"]
    for col in columns_to_remove:
        if col in df.columns:
            df = df.drop(columns=[col])
    # Sort by Score descending if Score column exists
    if "Score" in df.columns:
        df = df.sort_values("Score", ascending=False).reset_index(drop=True)
    return df


def dataframe_to_html(df: pd.DataFrame) -> str:
    """Convert DataFrame to HTML table with proper styling"""
    html = "<div>"
    html += '<table style="border-collapse: collapse; border: 2px solid #95d5b2; margin: 0 auto;">'
    # Header
    html += "<thead><tr>"
    for col in df.columns:
        html += f'<th style="padding: 10px; border: 1px solid #95d5b2; background-color: #95d5b2; color: #1a1a1a; text-align: center;">{col}</th>'
    html += "</tr></thead>"
    # Body
    html += "<tbody>"
    for idx, row in df.iterrows():
        html += "<tr>"
        for col in df.columns:
            cell_value = str(row[col]) if pd.notna(row[col]) else ""
            # Check if cell contains HTML link
            if "<a href=" in cell_value:
                html += f'<td style="padding: 10px; border: 1px solid #ddd; text-align: center;">{cell_value}</td>'
            else:
                html += f'<td style="padding: 10px; border: 1px solid #ddd; text-align: center;">{cell_value}</td>'
        html += "</tr>"
    html += "</tbody></table>"
    html += "</div>"
    return html


def format_leaderboard_table(data: List[Dict], score_column_name: str) -> pd.DataFrame:
    """Format a single leaderboard table with proper score formatting and links"""
    if not data:
        return pd.DataFrame()

    # Filter out models without scores (None, empty string, or "-")
    filtered_data = []
    for row in data:
        score = row.get("Score", "")
        # Skip if score is None, empty, or "-"
        if score is None or str(score).strip() == "" or str(score).strip() == "-":
            continue
        filtered_data.append(row)

    if not filtered_data:
        return pd.DataFrame()

    # Create DataFrame from filtered data
    df = pd.DataFrame(filtered_data)

    # Format scores with links and tooltips
    formatted_data = []
    for _, row in df.iterrows():
        formatted_row = {
            "Agent / Model": row.get("Model", ""),
            "Organization": row.get("Organization", ""),
            "Open Source": row.get("Open Source", ""),
            "Publish Date": row.get("Publish Date", ""),
        }

        score = row.get("Score", "")
        reference = row.get("Reference", "")

        # Format score with reference link if available
        if score and reference:
            # Format as HTML link with custom tooltip
            formatted_row[score_column_name] = (
                f'<span class="tooltip"><a href="{reference}" target="_blank">'
                f'<strong>{score}</strong></a><span class="tooltiptext">{reference}</span></span>'
            )
        elif score:
            # Format score (numeric values)
            try:
                score_val = float(score)
                formatted_row[score_column_name] = f"<strong>{score_val:.2f}</strong>"
            except (ValueError, TypeError):
                formatted_row[score_column_name] = f"<strong>{score}</strong>"
        else:
            formatted_row[score_column_name] = ""

        formatted_data.append(formatted_row)

    result_df = pd.DataFrame(formatted_data)

    # Sort by score descending
    def extract_score_for_sorting(score_str):
        """Extract numeric score from string"""
        if not score_str or str(score_str).strip() == "" or str(score_str).strip() == "-":
            return None
        # Extract from HTML if present
        match = re.search(r"<strong>([\d.]+)</strong>", str(score_str))
        if match:
            try:
                return float(match.group(1))
            except (ValueError, TypeError):
                pass
        # Try direct conversion
        try:
            return float(score_str)
        except (ValueError, TypeError):
            return None

    if score_column_name in result_df.columns:
        result_df["_sort_score"] = result_df[score_column_name].apply(extract_score_for_sorting)
        result_df = result_df.sort_values("_sort_score", ascending=False, na_position="last")
        result_df = result_df.drop(columns=["_sort_score"])

    # Reorder columns
    column_order = ["Agent / Model", "Organization", "Open Source", "Publish Date", score_column_name]
    existing_columns = [col for col in column_order if col in result_df.columns]
    other_columns = [col for col in result_df.columns if col not in column_order]
    result_df = result_df[existing_columns + other_columns]

    return result_df.reset_index(drop=True)


def merge_leaderboard_tables(full_set_data: List[Dict], text_only_data: List[Dict]) -> pd.DataFrame:
    """Merge full set and text-only data into a single table"""
    # Create dictionaries keyed by Model name for easier merging
    full_set_dict = {}
    for row in full_set_data:
        model = row.get("Model", "")
        if model:
            full_set_dict[model] = row

    text_only_dict = {}
    for row in text_only_data:
        model = row.get("Model", "")
        if model:
            text_only_dict[model] = row

    # Get all unique models
    all_models = set(full_set_dict.keys()) | set(text_only_dict.keys())

    # Build merged data
    merged_data = []

    def get_sort_key(model):
        """Get sort key for initial model ordering"""
        full_score = full_set_dict.get(model, {}).get("Score", "")
        text_score = text_only_dict.get(model, {}).get("Score", "")

        # Try to get full set score
        if full_score and str(full_score).strip() != "-":
            try:
                return -float(full_score)
            except (ValueError, TypeError):
                pass

        # If no valid full score, use text-only score minus 3%
        if text_score and str(text_score).strip() != "-":
            try:
                return -float(text_score) + 3
            except (ValueError, TypeError):
                pass

        # Default to 0 if no valid scores
        return 0

    for model in sorted(all_models, key=get_sort_key):
        row = {"Model": model}

        # Add full set score with reference link
        if model in full_set_dict:
            full_score = full_set_dict[model].get("Score", "")
            full_ref = full_set_dict[model].get("Reference", "")
            if full_score and full_ref:
                # Format as HTML link with custom tooltip
                row["Full Set Score"] = (
                    f'<span class="tooltip"><a href="{full_ref}" target="_blank">'
                    f'<strong>{full_score}</strong></a><span class="tooltiptext">{full_ref}</span></span>'
                )
            elif full_score:
                row["Full Set Score"] = f"<strong>{full_score}</strong>"
            else:
                row["Full Set Score"] = ""
        else:
            row["Full Set Score"] = ""

        # Add text-only score with reference link
        if model in text_only_dict:
            text_score = text_only_dict[model].get("Score", "")
            text_ref = text_only_dict[model].get("Reference", "")
            if text_score and text_ref:
                # Format as HTML link with custom tooltip
                row["Text-Only Score"] = (
                    f'<span class="tooltip"><a href="{text_ref}" target="_blank">'
                    f'<strong>{text_score}</strong></a><span class="tooltiptext">{text_ref}</span></span>'
                )
            elif text_score:
                row["Text-Only Score"] = f"<strong>{text_score}</strong>"
            else:
                row["Text-Only Score"] = ""
        else:
            row["Text-Only Score"] = ""

        # Add Organization (from data if available, otherwise use placeholder)
        if model in full_set_dict and "Organization" in full_set_dict[model]:
            row["Organization"] = full_set_dict[model]["Organization"]
        elif model in text_only_dict and "Organization" in text_only_dict[model]:
            row["Organization"] = text_only_dict[model]["Organization"]
        else:
            row["Organization"] = ""  # Will be filled from data if available

        # Add Open Source (from data if available, otherwise use placeholder)
        if model in full_set_dict and "Open Source" in full_set_dict[model]:
            row["Open Source"] = full_set_dict[model]["Open Source"]
        elif model in full_set_dict and "OpenSource" in full_set_dict[model]:
            row["Open Source"] = full_set_dict[model]["OpenSource"]
        elif model in text_only_dict and "Open Source" in text_only_dict[model]:
            row["Open Source"] = text_only_dict[model]["Open Source"]
        elif model in text_only_dict and "OpenSource" in text_only_dict[model]:
            row["Open Source"] = text_only_dict[model]["OpenSource"]
        else:
            row["Open Source"] = ""  # Will be filled from data if available

        # Add Publish Date (from data if available)
        if model in full_set_dict and "Publish Date" in full_set_dict[model]:
            row["Publish Date"] = full_set_dict[model]["Publish Date"]
        elif model in text_only_dict and "Publish Date" in text_only_dict[model]:
            row["Publish Date"] = text_only_dict[model]["Publish Date"]
        else:
            row["Publish Date"] = ""

        merged_data.append(row)

    # Create DataFrame
    merged_df = pd.DataFrame(merged_data)

    # Extract numeric scores for sorting (before formatting as links)
    # We need to sort before converting to links, so let's extract the numeric values
    def extract_score_for_sorting(score_str):
        """Extract numeric score from string (handles both plain numbers and HTML links)"""
        if not score_str or str(score_str).strip() == "" or str(score_str).strip() == "-":
            return None
        # If it's an HTML link like <a href="url">85.2</a>, extract 85.2
        match = re.search(r"<a[^>]*>([\d.]+)</a>", str(score_str))
        if match:
            return float(match.group(1))
        # Otherwise try to convert directly
        try:
            return float(score_str)
        except Exception:
            return None

    # Sort by Full Set Score descending (or Text-Only Score minus 3% if Full Set Score is None or "-")
    if "Full Set Score" in merged_df.columns and "Text-Only Score" in merged_df.columns:
        merged_df["_full_score"] = merged_df["Full Set Score"].apply(extract_score_for_sorting)
        merged_df["_text_score"] = merged_df["Text-Only Score"].apply(extract_score_for_sorting)

        # Calculate sort score: use Full Set Score if available, otherwise Text-Only Score * 0.97
        def calculate_sort_score(row):
            full_score = row["_full_score"]
            text_score = row["_text_score"]
            if full_score is not None:
                return full_score
            elif text_score is not None:
                return text_score * 0.97  # Minus 3%
            else:
                return 0

        merged_df["_sort_score"] = merged_df.apply(calculate_sort_score, axis=1)
        merged_df = merged_df.sort_values("_sort_score", ascending=False, na_position="last")
        merged_df = merged_df.drop(columns=["_sort_score", "_full_score", "_text_score"])
    elif "Full Set Score" in merged_df.columns:
        merged_df["_sort_score"] = merged_df["Full Set Score"].apply(extract_score_for_sorting)
        merged_df = merged_df.sort_values("_sort_score", ascending=False, na_position="last")
        merged_df = merged_df.drop(columns=["_sort_score"])

        # Format scores that aren't already links
        def format_score_with_ref(score_str):
            """Format score, ensuring it has 2 decimal places if it's a link"""
            if not score_str:
                return ""
            # If already an HTML link with tooltip and bold, format the number inside
            tooltip_pattern = (
                r'<span class="tooltip"><a href="([^"]+)"[^>]*><strong>([\d.]+)</strong></a>' r'<span class="tooltiptext">([^<]+)</span></span>'
            )
            match = re.match(tooltip_pattern, str(score_str))
            if match:
                url = match.group(1)
                score_val = float(match.group(2))
                tooltip_text = match.group(3)
                return (
                    f'<span class="tooltip"><a href="{url}" target="_blank">'
                    f'<strong>{score_val:.2f}</strong></a><span class="tooltiptext">{tooltip_text}</span></span>'
                )
            # If already an HTML link with tooltip (without bold), format the number inside
            tooltip_pattern = r'<span class="tooltip"><a href="([^"]+)"[^>]*>([\d.]+)</a>' r'<span class="tooltiptext">([^<]+)</span></span>'
            match = re.match(tooltip_pattern, str(score_str))
            if match:
                url = match.group(1)
                score_val = float(match.group(2))
                tooltip_text = match.group(3)
                return (
                    f'<span class="tooltip"><a href="{url}" target="_blank">'
                    f'<strong>{score_val:.2f}</strong></a><span class="tooltiptext">{tooltip_text}</span></span>'
                )
            # If it's a simple link without tooltip wrapper, add tooltip wrapper
            match = re.match(r'<a href="([^"]+)"[^>]*>([\d.]+)</a>', str(score_str))
            if match:
                url = match.group(1)
                score_val = float(match.group(2))
                return (
                    f'<span class="tooltip"><a href="{url}" target="_blank">'
                    f'<strong>{score_val:.2f}</strong></a><span class="tooltiptext">{url}</span></span>'
                )
            # If it's already bold but not a link
            match = re.match(r"<strong>([\d.]+)</strong>", str(score_str))
            if match:
                score_val = float(match.group(1))
                return f"<strong>{score_val:.2f}</strong>"
            # Otherwise format as plain number with bold
            try:
                score_val = float(score_str)
                return f"<strong>{score_val:.2f}</strong>"
            except Exception:
                # If it's already a string with special characters (like "-"), keep it as is but make bold
                return f"<strong>{score_str}</strong>"

        merged_df["Full Set Score"] = merged_df["Full Set Score"].apply(format_score_with_ref)

    if "Text-Only Score" in merged_df.columns:

        def format_score_with_ref(score_str):
            """Format score, ensuring it has 2 decimal places if it's a link"""
            if not score_str:
                return ""
            # If already an HTML link with tooltip, format the number inside
            tooltip_pattern = r'<span class="tooltip"><a href="([^"]+)"[^>]*>([\d.]+)</a>' r'<span class="tooltiptext">([^<]+)</span></span>'
            match = re.match(tooltip_pattern, str(score_str))
            if match:
                url = match.group(1)
                score_val = float(match.group(2))
                tooltip_text = match.group(3)
                return (
                    f'<span class="tooltip"><a href="{url}" target="_blank">'
                    f'{score_val:.2f}</a><span class="tooltiptext">{tooltip_text}</span></span>'
                )
            # If it's a simple link without tooltip wrapper, add tooltip wrapper
            match = re.match(r'<a href="([^"]+)"[^>]*>([\d.]+)</a>', str(score_str))
            if match:
                url = match.group(1)
                score_val = float(match.group(2))
                return (
                    f'<span class="tooltip"><a href="{url}" target="_blank">'
                    f'<strong>{score_val:.2f}</strong></a><span class="tooltiptext">{url}</span></span>'
                )
            # Otherwise format as plain number
            try:
                score_val = float(score_str)
                return f"{score_val:.2f}"
            except Exception:
                return str(score_str)

        merged_df["Text-Only Score"] = merged_df["Text-Only Score"].apply(format_score_with_ref)

    # Ensure Organization, Open Source, and Publish Date columns exist
    if "Organization" not in merged_df.columns:
        merged_df["Organization"] = ""
    if "Open Source" not in merged_df.columns:
        merged_df["Open Source"] = ""
    if "Publish Date" not in merged_df.columns:
        merged_df["Publish Date"] = ""

    # Rename Model column to "Agent / Model"
    if "Model" in merged_df.columns:
        merged_df = merged_df.rename(columns={"Model": "Agent / Model"})

    # Reorder columns: Agent / Model, Organization, Open Source, Publish Date, Full Set Score, Text-Only Score
    column_order = ["Agent / Model", "Organization", "Open Source", "Publish Date", "Full Set Score", "Text-Only Score"]
    existing_columns = [col for col in column_order if col in merged_df.columns]
    other_columns = [col for col in merged_df.columns if col not in column_order]
    merged_df = merged_df[existing_columns + other_columns]

    return merged_df.reset_index(drop=True)


def markdown_to_html(text: str) -> str:
    """Convert markdown text to HTML while preserving HTML badges"""
    import re

    lines = text.split("\n")
    html_lines = []
    in_list = False
    in_paragraph = False
    paragraph_lines = []

    def process_inline_markdown(line: str) -> str:
        """Process inline markdown (bold, links) in a line"""
        # Convert markdown links [text](url) to HTML links
        line = re.sub(r"\[([^\]]+)\]\(([^\)]+)\)", r'<a href="\2" target="_blank">\1</a>', line)
        # Convert markdown bold **text** to HTML bold
        line = re.sub(r"\*\*([^\*]+)\*\*", r"<strong>\1</strong>", line)
        return line

    for line in lines:
        stripped = line.strip()

        # Handle headers
        if stripped.startswith("#### "):
            if in_list:
                html_lines.append("</ul>")
                in_list = False
            if in_paragraph:
                html_lines.append("<p>" + " ".join(paragraph_lines) + "</p>")
                paragraph_lines = []
                in_paragraph = False
            header_text = process_inline_markdown(stripped[5:])
            html_lines.append(f"<h4>{header_text}</h4>")
        elif stripped.startswith("### "):
            if in_list:
                html_lines.append("</ul>")
                in_list = False
            if in_paragraph:
                html_lines.append("<p>" + " ".join(paragraph_lines) + "</p>")
                paragraph_lines = []
                in_paragraph = False
            header_text = process_inline_markdown(stripped[4:])
            html_lines.append(f"<h3>{header_text}</h3>")
        # Handle list items
        elif stripped.startswith("- "):
            if in_paragraph:
                html_lines.append("<p>" + " ".join(paragraph_lines) + "</p>")
                paragraph_lines = []
                in_paragraph = False
            if not in_list:
                html_lines.append("<ul>")
                in_list = True
            list_item = process_inline_markdown(stripped[2:])
            html_lines.append(f"<li>{list_item}</li>")
        # Handle empty lines
        elif not stripped:
            if in_list:
                html_lines.append("</ul>")
                in_list = False
            if in_paragraph:
                html_lines.append("<p>" + " ".join(paragraph_lines) + "</p>")
                paragraph_lines = []
                in_paragraph = False
            html_lines.append("<br>")
        # Handle regular paragraphs
        else:
            if in_list:
                html_lines.append("</ul>")
                in_list = False
            processed_line = process_inline_markdown(stripped)
            paragraph_lines.append(processed_line)
            in_paragraph = True

    # Close any open structures
    if in_list:
        html_lines.append("</ul>")
    if in_paragraph:
        html_lines.append("<p>" + " ".join(paragraph_lines) + "</p>")

    return "\n".join(html_lines)


def create_motivation_section():
    """Create the motivation section content as HTML"""
    motivation = f"""### About Humanity's Last Exam (HLE)
    [Humanity's Last Exam (HLE)](https://agi.safe.ai/) is a rigorous, multi-modal AI benchmark created by the Center for AI Safety in collaboration with Scale AI, 
    designed to push large language models beyond saturated tests by evaluating reasoning and expert-level knowledge across thousands of 
    challenging questions spanning mathematics, natural sciences, and the humanities.

    ### Why another leaderboard?
    While leaderboards existed for HLE, they fall short in several ways, leading to widespread confusion about true state-of-the-art results.
    In fact, if you ask ChatGPT, Gemini, and Claude "What's the SOTA for Humanity's Last Exam" today, they will all get the answer wrong.
    - **Tool Exclusion**: The [official leaderboard by Scale AI](https://scale.com/leaderboard/humanitys_last_exam) and many other leaderboards focus on models without tool use.
    - **Lack of separation**: Scores for the full HLE benchmark and the text-only subset are often mixed together, leading to unfair comparisons - score on full set is generally lower than the text only subset.
    - **Data Contamination**: Since copies of HLE, blogs and papers discussing HLE have been indexed by search engines, scores might be artificially inflated for agents without filtering. In this leaderboard, we add a{VERIFIED_BADGE} badge to indicate that some form of filtering is mentioned for the agent or its previous versions.
    """
    return markdown_to_html(motivation)


def build_leaderboard():
    """Build the Gradio interface"""

    # Get data (from files or defaults)
    full_set_data, text_only_data = get_data()

    # Create separate tables for Full Set and Text-Only
    full_set_df = format_leaderboard_table(full_set_data, "Full Set Score")
    text_only_df = format_leaderboard_table(text_only_data, "Text-Only Score")

    custom_css = """
    .gradio-container {
        max-width: 1200px;
        width: 100%;
        margin: 0 auto;
        padding: 0 10px;
        overflow: visible !important;
    }
    .gradio-row {
        justify-content: center;
        overflow: visible !important;
    }
    .gradio-column {
        text-align: left;
        max-width: 100%;
        overflow: visible !important;
    }
    h1, h2, h3 {
        text-align: center !important;
    }
    .markdown-text h1, .markdown-text h2, .markdown-text h3 {
        text-align: center !important;
    }
    hr {
        border-color: #95d5b2 !important;
        border-width: 2px !important;
    }
    .markdown-text, .markdown-text p, .markdown-text ul, .markdown-text ol, .markdown-text li {
        text-align: left !important;
    }
    /* Style verified badge in markdown - restore badge styling for all spans if inline styles are stripped */
    .markdown-text span {
        display: inline-block !important;
        width: 12px !important;
        height: 12px !important;
        background-color: #00bfa5 !important;
        border-radius: 50% !important;
        color: white !important;
        text-align: center !important;
        line-height: 12px !important;
        font-size: 8px !important;
        font-weight: bold !important;
        margin-left: 4px !important;
        vertical-align: middle !important;
    }
    table {
        border: 2px solid #95d5b2 !important;
        border-collapse: collapse;
        margin: 0 auto;
    }
    table th {
        background-color: #95d5b2 !important;
        color: #1a1a1a !important;
        text-align: center;
    }
    table td {
        text-align: center;
    }
    table tr:hover {
        background-color: rgba(149, 213, 178, 0.1) !important;
    }
    table a {
        color: inherit !important;
        text-decoration: none !important;
    }
    table a:hover {
        color: #00bfa5 !important;
    }
    /* Make Agent / Model column bold */
    table td:first-child, table th:first-child {
        font-weight: bold !important;
    }
    /* Make scores green */
    table td strong {
        color: #00bfa5 !important;
    }
    table td a strong {
        color: #00bfa5 !important;
    }
    /* Custom tooltip styling */
    .tooltip {
        position: relative;
        display: inline-block;
    }
    .tooltip .tooltiptext {
        visibility: hidden;
        opacity: 0;
        background-color: rgba(60, 60, 60, 0.95);
        color: #ffffff;
        text-align: center;
        border-radius: 0;
        padding: 14px 18px;
        position: absolute;
        z-index: 9999;
        bottom: 125%;
        left: 50%;
        transform: translateX(-50%);
        font-size: 13px;
        box-shadow: 0 6px 16px rgba(0, 0, 0, 0.3);
        transition: opacity 0.05s ease-in, visibility 0.05s;
        pointer-events: none;
        margin-bottom: 15px;
        word-wrap: break-word;
        white-space: nowrap;
        line-height: 1.5;
        border: none;
    }
    .tooltip .tooltiptext::after {
        content: "";
        position: absolute;
        top: 100%;
        left: 50%;
        margin-left: -6px;
        border-width: 6px;
        border-style: solid;
        border-color: rgba(60, 60, 60, 0.95) transparent transparent transparent;
    }
    .tooltip:hover .tooltiptext {
        visibility: visible;
        opacity: 1;
        transition: opacity 0.05s ease-in, visibility 0s;
    }
    /* Mobile responsive styles */
    @media screen and (max-width: 768px) {
        .gradio-container {
            max-width: 100%;
            padding: 0 5px;
        }
        table {
            font-size: 12px;
        }
        table td, table th {
            padding: 6px 4px !important;
            font-size: 11px;
        }
        h1 {
            font-size: 24px !important;
        }
        h2 {
            font-size: 20px !important;
        }
        h3 {
            font-size: 18px !important;
        }
        .markdown-text {
            font-size: 14px !important;
        }
        .tooltip .tooltiptext {
            font-size: 11px;
            padding: 10px 12px;
            white-space: normal;
        }
    }
    @media screen and (max-width: 480px) {
        .gradio-container {
            padding: 0 3px;
        }
        table {
            font-size: 10px;
        }
        table td, table th {
            padding: 4px 2px !important;
            font-size: 10px;
        }
        h1 {
            font-size: 20px !important;
        }
        h2 {
            font-size: 18px !important;
        }
        h3 {
            font-size: 16px !important;
        }
        .markdown-text {
            font-size: 13px !important;
        }
    }
    """

    with gr.Blocks(title="HLE Leaderboard for Agents with Tools", theme="JohnSmith9982/small_and_pretty", css=custom_css) as demo:
        gr.Markdown(
            """
            # 🏆 Humanity's Last Exam Leaderboard for Agents with Tools
            """
        )

        gr.Markdown("---")

        # Motivation Section
        with gr.Row():
            with gr.Column():
                motivation_text = create_motivation_section()
                gr.HTML(motivation_text)

        gr.Markdown("---")
        gr.Markdown("<br>")

        # Full Set Leaderboard
        gr.Markdown("## 📊 Full Set Leaderboard")
        gr.Markdown(
            """
            Results on the complete set of tasks (2500 examples).
            """
        )
        # Convert DataFrame to HTML with clickable links
        full_set_html = dataframe_to_html(full_set_df)

        # Display as HTML to support clickable links
        gr.HTML(value=full_set_html)

        # Text-Only Leaderboard
        gr.Markdown("<br>")
        gr.Markdown("## 📊 Text-Only Leaderboard")
        gr.Markdown(
            """
            Results on the text-only subset of tasks (2158 examples).
            """
        )
        # Convert DataFrame to HTML with clickable links
        text_only_html = dataframe_to_html(text_only_df)

        # Display as HTML to support clickable links
        gr.HTML(value=text_only_html)

        # Add JavaScript for tooltip positioning (injected after page load)
        gr.HTML(
            value="""<script>
            window.addEventListener('load', function() {
                setTimeout(function() {
                    var tooltips = document.querySelectorAll('.tooltip');
                    tooltips.forEach(function(tooltip) {
                        var tooltipText = tooltip.querySelector('.tooltiptext');
                        if (!tooltipText || tooltip.dataset.init) return;
                        tooltip.dataset.init = 'true';
                        
                        tooltip.addEventListener('mouseenter', function() {
                            var rect = tooltip.getBoundingClientRect();
                            var vw = window.innerWidth;
                            var centerX = rect.left + rect.width / 2;
                            var tooltipWidth = Math.min(1000, vw - 20);
                            
                            tooltipText.style.position = 'fixed';
                            tooltipText.style.zIndex = '99999';
                            tooltipText.style.maxWidth = tooltipWidth + 'px';
                            
                            var top = rect.top - tooltipText.offsetHeight - 15;
                            if (top < 10) top = rect.bottom + 15;
                            
                            var left = centerX - tooltipWidth / 2;
                            if (left + tooltipWidth > vw - 10) left = vw - tooltipWidth - 10;
                            if (left < 10) left = 10;
                            
                            tooltipText.style.top = top + 'px';
                            tooltipText.style.left = left + 'px';
                            tooltipText.style.transform = 'none';
                        });
                        
                        tooltip.addEventListener('mouseleave', function() {
                            tooltipText.style.position = '';
                            tooltipText.style.top = '';
                            tooltipText.style.left = '';
                            tooltipText.style.transform = '';
                        });
                    });
                }, 200);
            });
            </script>""",
            visible=False,
        )

        # Footer

        gr.Markdown("<br>")
        notes_text = f"""#### Notes
            - All models and agents listed have tool use capabilities.
            - All numbers are based on the official report of each agent.
            - Missing{VERIFIED_BADGE} badge does not necessarily mean the agent applied no filtering, it just means that no such mention is found.
            - Having{VERIFIED_BADGE} badge does not necessarily mean the agent applied perfect filtering. E.g., only blocking huggingface URLs won't be sufficient.
            - We exclude scores reported on non-official subsets as the results are not comparable.
            - Please contact us at hle@zoom.us if you find any errors or want to add a model or agent to the leaderboard.
            - Last updated: Jan 8, 2026.
            """
        gr.HTML(markdown_to_html(notes_text))

    return demo


if __name__ == "__main__":
    demo = build_leaderboard()
    demo.launch(share=False, server_name="0.0.0.0", server_port=7860)