import gradio as gr import spaces import torch from transformers import AutoTokenizer, AutoModelForCausalLM import re # --- Model Loading --- MODEL_ID = "reaperdoesntknow/DualMinded-Qwen3-1.7B" tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( MODEL_ID, dtype=torch.bfloat16, device_map="auto", trust_remote_code=True, ) def parse_dualmind_output(text): """Separate the thinking trace from the final response.""" think_match = re.search(r'(.*?)', text, re.DOTALL) thinking = think_match.group(1).strip() if think_match else "" if '' in text: response = text.split('')[-1].strip() else: response = text.strip() return thinking, response @spaces.GPU def generate( message: str, history: list, system_prompt: str, max_tokens: int, temperature: float, top_p: float, repetition_penalty: float, ): if not system_prompt: system_prompt = "You are a helpful assistant. Think carefully before responding." messages = [{"role": "system", "content": system_prompt}] # Gradio 6 messages format: list of {"role": ..., "content": ...} for msg in history: messages.append({"role": msg["role"], "content": msg["content"]}) messages.append({"role": "user", "content": message}) input_text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = tokenizer(input_text, return_tensors="pt").to(model.device) with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, pad_token_id=tokenizer.eos_token_id, ) generated = outputs[0][inputs["input_ids"].shape[-1]:] raw_output = tokenizer.decode(generated, skip_special_tokens=True) thinking, response = parse_dualmind_output(raw_output) if thinking: formatted = f"🧠 **Explore → Examine**\n\n{thinking}\n\n---\n\n💬 **Response**\n\n{response}" else: formatted = response return formatted # --- Custom CSS --- css = """ @import url('https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;700&family=Plus+Jakarta+Sans:wght@400;600;800&display=swap'); .gradio-container { font-family: 'Plus Jakarta Sans', sans-serif !important; background: #0a0a0f !important; max-width: 900px !important; margin: auto !important; } .main-header { text-align: center; padding: 2rem 1rem; background: linear-gradient(135deg, #0a0a0f 0%, #1a1a2e 50%, #0a0a0f 100%); border-bottom: 1px solid #2a2a3e; margin-bottom: 1rem; } .main-header h1 { font-family: 'Plus Jakarta Sans', sans-serif; font-weight: 800; font-size: 2.2rem; background: linear-gradient(135deg, #00d4aa, #00a8e8, #7b68ee); -webkit-background-clip: text; -webkit-text-fill-color: transparent; margin: 0; } .main-header p { color: #8888aa; font-size: 0.95rem; margin-top: 0.5rem; font-family: 'JetBrains Mono', monospace; } .info-banner { background: linear-gradient(135deg, rgba(0,212,170,0.08), rgba(0,168,232,0.08)); border: 1px solid rgba(0,212,170,0.2); border-radius: 12px; padding: 1rem 1.5rem; margin: 0.5rem 0 1rem 0; color: #ccccdd; font-size: 0.85rem; line-height: 1.6; } .info-banner a { color: #00d4aa !important; text-decoration: none; } footer { display: none !important; } """ # --- UI --- with gr.Blocks() as demo: gr.HTML("""

DualMind

Explore → Examine → Response

""") gr.HTML("""
One model, two voices. DualMind uses a three-phase cognitive loop: the model explores the problem space, examines its own reasoning, then produces a response. Watch the thinking trace unfold in real time.

Built by Convergent Intelligence LLC: Research Division · Paper (DOI: 10.57967/hf/8184) · DISC Foundations (DOI: 10.57967/hf/8194)
""") chatbot = gr.Chatbot( height=500, show_label=False, container=True, ) with gr.Row(): msg = gr.Textbox( placeholder="Ask DualMind something...", show_label=False, container=False, scale=8, ) send_btn = gr.Button("Send", variant="primary", scale=1) with gr.Accordion("Settings", open=False): system_prompt = gr.Textbox( value="You are a helpful assistant. Think carefully before responding.", label="System Prompt", lines=2, ) with gr.Row(): max_tokens = gr.Slider(64, 2048, value=1024, step=64, label="Max Tokens") temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.05, label="Temperature") with gr.Row(): top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p") rep_penalty = gr.Slider(1.0, 2.0, value=1.3, step=0.05, label="Repetition Penalty") gr.HTML("""
DualMinded-Qwen3-1.7B · Claude Opus 4.6 reasoning traces · DualMind Collection · DistilQwen Collection
""") def user_message(message, history): history = history + [{"role": "user", "content": message}] return "", history def bot_response(history, system_prompt, max_tokens, temperature, top_p, rep_penalty): user_msg = history[-1]["content"] past = history[:-1] response = generate(user_msg, past, system_prompt, max_tokens, temperature, top_p, rep_penalty) history = history + [{"role": "assistant", "content": response}] return history msg.submit( user_message, [msg, chatbot], [msg, chatbot] ).then( bot_response, [chatbot, system_prompt, max_tokens, temperature, top_p, rep_penalty], chatbot ) send_btn.click( user_message, [msg, chatbot], [msg, chatbot] ).then( bot_response, [chatbot, system_prompt, max_tokens, temperature, top_p, rep_penalty], chatbot ) demo.launch(css=css, theme=gr.themes.Base(primary_hue="teal", neutral_hue="slate"))