"""
Smart AI Healthcare Consultant — v2.0
Upgraded from Nebius/Qwen → NVIDIA NIM (Free Tier)
Model: nvidia/nemotron-3-super-120b-a12b (reasoning model)
Author: abhishekudeniyan | Updated: 2026
"""

import gradio as gr
import os
import json
import re
from openai import OpenAI

# ─────────────────────────────────────────────
# 🔧 CONFIGURATION
# ─────────────────────────────────────────────
NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")

# Primary model — NVIDIA NIM free tier (reasoning-capable)
PRIMARY_MODEL   = "nvidia/nemotron-3-super-120b-a12b"

# Fallback model if primary quota is hit (also free on NIM)
FALLBACK_MODEL  = "meta/llama-3.3-70b-instruct"

client = OpenAI(
    base_url="https://integrate.api.nvidia.com/v1",
    api_key=NVIDIA_API_KEY
) if NVIDIA_API_KEY else None


# ─────────────────────────────────────────────
# 📋 PROMPT TEMPLATE
# ─────────────────────────────────────────────
PROMPT_TEMPLATE = """
You are a smart AI healthcare triage consultant. Analyze the patient data below and respond ONLY in valid raw JSON.

Patient Information:
- Gender: {gender}
- Age: {age}
- Pre-existing Conditions: {pre_existing}
- Current Symptoms: "{symptoms}"

Clinical Instructions:
- Apply standard triage logic and symptom analysis.
- Be specific and medically accurate.
- Never recommend emergency-level without clear justification.

Respond ONLY with a raw JSON object (no markdown, no explanation) containing exactly these fields:
{{
  "urgency_level": "<Low | Moderate | High | Emergency>",
  "possible_condition": "<short diagnosis>",
  "icd_hint": "<ICD-10 code guess, e.g. J06.9>",
  "recommended_action": "<clear next steps for patient>",
  "suggested_medication": "<OTC or general guidance, or 'Consult physician'>",
  "red_flags": "<warning signs to watch for, or 'None'>",
  "lifestyle_tip": "<one relevant wellness tip>"
}}
"""

# ─────────────────────────────────────────────
# 🧠 CORE LOGIC
# ─────────────────────────────────────────────
def build_prompt(gender: str, age: str, pre_existing: str, symptoms: str) -> str:
    return PROMPT_TEMPLATE.format(
        gender=gender,
        age=age,
        pre_existing=pre_existing.strip() or "None reported",
        symptoms=symptoms.strip()
    )


def extract_json(text: str) -> str:
    """Strip markdown fences and extract first JSON object."""
    # Remove ```json ... ``` or ``` ... ```
    text = re.sub(r"```(?:json)?", "", text).strip("`").strip()
    match = re.search(r"\{.*\}", text, re.DOTALL)
    return match.group(0) if match else text.strip()


def call_nvidia_stream(prompt: str, model: str) -> str:
    """
    Stream from NVIDIA NIM and collect full response.
    Handles reasoning_content (thinking tokens) separately.
    Returns the final response text only.
    """
    completion = client.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "system",
                "content": (
                    "You are a precise medical triage AI. "
                    "Respond ONLY with a valid raw JSON object. No prose. No markdown."
                )
            },
            {"role": "user", "content": prompt}
        ],
        temperature=0.3,          # Low temp for clinical accuracy
        top_p=0.95,
        max_tokens=1024,
        extra_body={
            "chat_template_kwargs": {
                "enable_thinking": True,  # Activate chain-of-thought reasoning
                "low_effort": True        # Faster / lighter reasoning budget
            },
            "reasoning_budget": 8192      # Allow model to think before answering
        },
        stream=True
    )

    full_response = []
    for chunk in completion:
        if not chunk.choices:
            continue
        delta = chunk.choices[0].delta
        # Skip internal reasoning tokens — only collect final answer
        reasoning = getattr(delta, "reasoning_content", None)
        if reasoning:
            continue  # reasoning is internal chain-of-thought; discard for JSON extraction
        if delta.content:
            full_response.append(delta.content)

    return "".join(full_response)


def triage_response(gender: str, age: str, pre_existing: str, symptoms: str) -> dict:
    """Main triage logic with primary + fallback model."""
    if not client:
        return {"error": "NVIDIA_API_KEY environment variable not set. Please add it in HuggingFace Space Secrets."}

    if not symptoms.strip():
        return {"error": "Please describe your symptoms before submitting."}

    prompt = build_prompt(gender, age, pre_existing, symptoms)

    # Try primary model first, fall back on any error
    for model in [PRIMARY_MODEL, FALLBACK_MODEL]:
        try:
            raw_output = call_nvidia_stream(prompt, model)
            cleaned    = extract_json(raw_output)

            result = json.loads(cleaned)

            required = [
                "urgency_level", "possible_condition",
                "recommended_action", "suggested_medication"
            ]
            if all(f in result for f in required):
                result["_model_used"] = model  # Track which model answered
                return result
            else:
                # Partial response — log and try fallback
                print(f"[WARN] Incomplete response from {model}: {result}")
                continue

        except json.JSONDecodeError as e:
            print(f"[ERROR] JSON parse failed ({model}): {e}\nRaw: {raw_output[:300]}")
            continue
        except Exception as e:
            err_str = str(e)
            print(f"[ERROR] API call failed ({model}): {err_str}")
            if "rate limit" in err_str.lower() or "quota" in err_str.lower():
                continue  # Try fallback
            return {"error": f"API Error: {err_str}"}

    return {"error": "Both primary and fallback models failed. Please try again later."}


# ─────────────────────────────────────────────
# 🖥️ OUTPUT FORMATTER
# ─────────────────────────────────────────────
URGENCY_ICONS = {
    "Low":       ("🟢", "#22c55e"),
    "Moderate":  ("🟡", "#eab308"),
    "High":      ("🟠", "#f97316"),
    "Emergency": ("🔴", "#ef4444"),
}

def format_triage_output(result: dict) -> str:
    if "error" in result:
        return f"### ❌ Error\n\n> {result['error']}"

    urgency = result.get("urgency_level", "Unknown")
    icon, _ = URGENCY_ICONS.get(urgency, ("⚪", "#94a3b8"))
    model_badge = result.get("_model_used", "").split("/")[-1]

    lines = [
        f"## {icon} Urgency: **{urgency}**",
        "",
        f"| Field | Details |",
        f"|-------|---------|",
        f"| 🩺 Possible Condition | {result.get('possible_condition', 'N/A')} |",
        f"| 🏷️ ICD-10 Hint | `{result.get('icd_hint', 'N/A')}` |",
        f"| 📋 Recommended Action | {result.get('recommended_action', 'N/A')} |",
        f"| 💊 Suggested Medication | {result.get('suggested_medication', 'N/A')} |",
        f"| ⚠️ Red Flags | {result.get('red_flags', 'None')} |",
        f"| 🌿 Lifestyle Tip | {result.get('lifestyle_tip', 'N/A')} |",
        "",
        "---",
        f"*Powered by `{model_badge}` via NVIDIA NIM · For educational use only · Always consult a licensed physician.*"
    ]
    return "\n".join(lines)


def gradio_wrapper(gender, age, pre_existing, symptoms):
    result = triage_response(gender, age, pre_existing, symptoms)
    return format_triage_output(result)


# ─────────────────────────────────────────────
# 🎨 GRADIO UI
# ─────────────────────────────────────────────
DESCRIPTION = """
### 🤖 Smart AI Healthcare Consultant — v2.0
AI-powered symptom triage using **NVIDIA NIM** (Nemotron reasoning model).  
Enter your details and get an instant clinical triage report with urgency level, diagnosis hints, and care recommendations.

> ⚠️ **Not a substitute for professional medical advice.** Always consult a licensed doctor.
"""

demo = gr.Interface(
    fn=gradio_wrapper,
    inputs=[
        gr.Dropdown(
            choices=["Male", "Female", "Other"],
            label="👤 Gender",
            value="Male"
        ),
        gr.Textbox(
            label="🎂 Age",
            placeholder="e.g., 45",
            max_lines=1
        ),
        gr.Textbox(
            label="🏥 Pre-existing Conditions",
            placeholder="e.g., Type 2 Diabetes, Hypertension, Asthma...",
            lines=2
        ),
        gr.Textbox(
            label="🤒 Describe Your Symptoms",
            placeholder="e.g., Severe chest pain radiating to left arm, sweating, shortness of breath...",
            lines=5
        ),
    ],
    outputs=gr.Markdown(label="📊 AI Triage Report"),
    title="🏥 Smart AI Healthcare Consultant",
    description=DESCRIPTION,
    theme=gr.themes.Soft(
        primary_hue="blue",
        secondary_hue="cyan",
        neutral_hue="slate"
    ),
    examples=[
        ["Female", "55", "Hypertension, High Cholesterol", "Chest tightness, breathlessness, dizziness for 30 min"],
        ["Male",   "18", "None",                          "High fever 103°F, severe body pain, sore throat, chills"],
        ["Male",   "40", "Type 2 Diabetes",               "Frequent urination, extreme thirst, blurry vision"],
        ["Female", "28", "None",                          "Sudden severe headache, stiff neck, sensitivity to light"],
        ["Male",   "65", "COPD",                          "Worsening shortness of breath, productive cough, blue lips"],
    ],
    flagging_mode="never",
    cache_examples=False,
)

if __name__ == "__main__":
    demo.launch(
        mcp_server=True,   # Keep MCP server enabled (HuggingFace Spaces feature)
        share=False
    )