abhishekudeniyan's picture
NVIDIA NIM (Free Tier) Model: nvidia/nemotron-3-super-120b-a12b (reasoning model)
8bcb9a8 verified
"""
Smart AI Healthcare Consultant β€” v2.0
Upgraded from Nebius/Qwen β†’ NVIDIA NIM (Free Tier)
Model: nvidia/nemotron-3-super-120b-a12b (reasoning model)
Author: abhishekudeniyan | Updated: 2026
"""
import gradio as gr
import os
import json
import re
from openai import OpenAI
# ─────────────────────────────────────────────
# πŸ”§ CONFIGURATION
# ─────────────────────────────────────────────
NVIDIA_API_KEY = os.getenv("NVIDIA_API_KEY")
# Primary model β€” NVIDIA NIM free tier (reasoning-capable)
PRIMARY_MODEL = "nvidia/nemotron-3-super-120b-a12b"
# Fallback model if primary quota is hit (also free on NIM)
FALLBACK_MODEL = "meta/llama-3.3-70b-instruct"
client = OpenAI(
base_url="https://integrate.api.nvidia.com/v1",
api_key=NVIDIA_API_KEY
) if NVIDIA_API_KEY else None
# ─────────────────────────────────────────────
# πŸ“‹ PROMPT TEMPLATE
# ─────────────────────────────────────────────
PROMPT_TEMPLATE = """
You are a smart AI healthcare triage consultant. Analyze the patient data below and respond ONLY in valid raw JSON.
Patient Information:
- Gender: {gender}
- Age: {age}
- Pre-existing Conditions: {pre_existing}
- Current Symptoms: "{symptoms}"
Clinical Instructions:
- Apply standard triage logic and symptom analysis.
- Be specific and medically accurate.
- Never recommend emergency-level without clear justification.
Respond ONLY with a raw JSON object (no markdown, no explanation) containing exactly these fields:
{{
"urgency_level": "<Low | Moderate | High | Emergency>",
"possible_condition": "<short diagnosis>",
"icd_hint": "<ICD-10 code guess, e.g. J06.9>",
"recommended_action": "<clear next steps for patient>",
"suggested_medication": "<OTC or general guidance, or 'Consult physician'>",
"red_flags": "<warning signs to watch for, or 'None'>",
"lifestyle_tip": "<one relevant wellness tip>"
}}
"""
# ─────────────────────────────────────────────
# 🧠 CORE LOGIC
# ─────────────────────────────────────────────
def build_prompt(gender: str, age: str, pre_existing: str, symptoms: str) -> str:
return PROMPT_TEMPLATE.format(
gender=gender,
age=age,
pre_existing=pre_existing.strip() or "None reported",
symptoms=symptoms.strip()
)
def extract_json(text: str) -> str:
"""Strip markdown fences and extract first JSON object."""
# Remove ```json ... ``` or ``` ... ```
text = re.sub(r"```(?:json)?", "", text).strip("`").strip()
match = re.search(r"\{.*\}", text, re.DOTALL)
return match.group(0) if match else text.strip()
def call_nvidia_stream(prompt: str, model: str) -> str:
"""
Stream from NVIDIA NIM and collect full response.
Handles reasoning_content (thinking tokens) separately.
Returns the final response text only.
"""
completion = client.chat.completions.create(
model=model,
messages=[
{
"role": "system",
"content": (
"You are a precise medical triage AI. "
"Respond ONLY with a valid raw JSON object. No prose. No markdown."
)
},
{"role": "user", "content": prompt}
],
temperature=0.3, # Low temp for clinical accuracy
top_p=0.95,
max_tokens=1024,
extra_body={
"chat_template_kwargs": {
"enable_thinking": True, # Activate chain-of-thought reasoning
"low_effort": True # Faster / lighter reasoning budget
},
"reasoning_budget": 8192 # Allow model to think before answering
},
stream=True
)
full_response = []
for chunk in completion:
if not chunk.choices:
continue
delta = chunk.choices[0].delta
# Skip internal reasoning tokens β€” only collect final answer
reasoning = getattr(delta, "reasoning_content", None)
if reasoning:
continue # reasoning is internal chain-of-thought; discard for JSON extraction
if delta.content:
full_response.append(delta.content)
return "".join(full_response)
def triage_response(gender: str, age: str, pre_existing: str, symptoms: str) -> dict:
"""Main triage logic with primary + fallback model."""
if not client:
return {"error": "NVIDIA_API_KEY environment variable not set. Please add it in HuggingFace Space Secrets."}
if not symptoms.strip():
return {"error": "Please describe your symptoms before submitting."}
prompt = build_prompt(gender, age, pre_existing, symptoms)
# Try primary model first, fall back on any error
for model in [PRIMARY_MODEL, FALLBACK_MODEL]:
try:
raw_output = call_nvidia_stream(prompt, model)
cleaned = extract_json(raw_output)
result = json.loads(cleaned)
required = [
"urgency_level", "possible_condition",
"recommended_action", "suggested_medication"
]
if all(f in result for f in required):
result["_model_used"] = model # Track which model answered
return result
else:
# Partial response β€” log and try fallback
print(f"[WARN] Incomplete response from {model}: {result}")
continue
except json.JSONDecodeError as e:
print(f"[ERROR] JSON parse failed ({model}): {e}\nRaw: {raw_output[:300]}")
continue
except Exception as e:
err_str = str(e)
print(f"[ERROR] API call failed ({model}): {err_str}")
if "rate limit" in err_str.lower() or "quota" in err_str.lower():
continue # Try fallback
return {"error": f"API Error: {err_str}"}
return {"error": "Both primary and fallback models failed. Please try again later."}
# ─────────────────────────────────────────────
# πŸ–₯️ OUTPUT FORMATTER
# ─────────────────────────────────────────────
URGENCY_ICONS = {
"Low": ("🟒", "#22c55e"),
"Moderate": ("🟑", "#eab308"),
"High": ("🟠", "#f97316"),
"Emergency": ("πŸ”΄", "#ef4444"),
}
def format_triage_output(result: dict) -> str:
if "error" in result:
return f"### ❌ Error\n\n> {result['error']}"
urgency = result.get("urgency_level", "Unknown")
icon, _ = URGENCY_ICONS.get(urgency, ("βšͺ", "#94a3b8"))
model_badge = result.get("_model_used", "").split("/")[-1]
lines = [
f"## {icon} Urgency: **{urgency}**",
"",
f"| Field | Details |",
f"|-------|---------|",
f"| 🩺 Possible Condition | {result.get('possible_condition', 'N/A')} |",
f"| 🏷️ ICD-10 Hint | `{result.get('icd_hint', 'N/A')}` |",
f"| πŸ“‹ Recommended Action | {result.get('recommended_action', 'N/A')} |",
f"| πŸ’Š Suggested Medication | {result.get('suggested_medication', 'N/A')} |",
f"| ⚠️ Red Flags | {result.get('red_flags', 'None')} |",
f"| 🌿 Lifestyle Tip | {result.get('lifestyle_tip', 'N/A')} |",
"",
"---",
f"*Powered by `{model_badge}` via NVIDIA NIM Β· For educational use only Β· Always consult a licensed physician.*"
]
return "\n".join(lines)
def gradio_wrapper(gender, age, pre_existing, symptoms):
result = triage_response(gender, age, pre_existing, symptoms)
return format_triage_output(result)
# ─────────────────────────────────────────────
# 🎨 GRADIO UI
# ─────────────────────────────────────────────
DESCRIPTION = """
### πŸ€– Smart AI Healthcare Consultant β€” v2.0
AI-powered symptom triage using **NVIDIA NIM** (Nemotron reasoning model).
Enter your details and get an instant clinical triage report with urgency level, diagnosis hints, and care recommendations.
> ⚠️ **Not a substitute for professional medical advice.** Always consult a licensed doctor.
"""
demo = gr.Interface(
fn=gradio_wrapper,
inputs=[
gr.Dropdown(
choices=["Male", "Female", "Other"],
label="πŸ‘€ Gender",
value="Male"
),
gr.Textbox(
label="πŸŽ‚ Age",
placeholder="e.g., 45",
max_lines=1
),
gr.Textbox(
label="πŸ₯ Pre-existing Conditions",
placeholder="e.g., Type 2 Diabetes, Hypertension, Asthma...",
lines=2
),
gr.Textbox(
label="πŸ€’ Describe Your Symptoms",
placeholder="e.g., Severe chest pain radiating to left arm, sweating, shortness of breath...",
lines=5
),
],
outputs=gr.Markdown(label="πŸ“Š AI Triage Report"),
title="πŸ₯ Smart AI Healthcare Consultant",
description=DESCRIPTION,
theme=gr.themes.Soft(
primary_hue="blue",
secondary_hue="cyan",
neutral_hue="slate"
),
examples=[
["Female", "55", "Hypertension, High Cholesterol", "Chest tightness, breathlessness, dizziness for 30 min"],
["Male", "18", "None", "High fever 103Β°F, severe body pain, sore throat, chills"],
["Male", "40", "Type 2 Diabetes", "Frequent urination, extreme thirst, blurry vision"],
["Female", "28", "None", "Sudden severe headache, stiff neck, sensitivity to light"],
["Male", "65", "COPD", "Worsening shortness of breath, productive cough, blue lips"],
],
flagging_mode="never",
cache_examples=False,
)
if __name__ == "__main__":
demo.launch(
mcp_server=True, # Keep MCP server enabled (HuggingFace Spaces feature)
share=False
)