import json

notebook = {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Phase 2: RL Trading Agent for MT5 (XAUUSDc)\n",
    "This notebook trains a reinforcement learning model on the extracted MT5 data, simulating live-market constraints and exporting an ONNX model for the Expert Advisor."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install -q stable-baselines3[extra] pandas_ta xgboost onnx onnxruntime plotly gym"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import math\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "import onnx\n",
    "import onnxruntime as ort\n",
    "import plotly.graph_objects as go\n",
    "import gym\n",
    "from gym import spaces\n",
    "from stable_baselines3 import PPO\n",
    "from google.colab import files\n",
    "\n",
    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
    "print(f\"Using device: {device}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load Dataset (Upload XAUUSD_M3_Data.csv to Colab first)\n",
    "if not os.path.exists('XAUUSD_M3_Data.csv'):\n",
    "    print(\"Please upload XAUUSD_M3_Data.csv to the Colab environment.\")\n",
    "else:\n",
    "    df = pd.read_csv('XAUUSD_M3_Data.csv', index_col='time', parse_dates=True)\n",
    "    print(f\"Loaded {len(df)} rows.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Vectorized Custom Gym Environment for XAUUSDc\n",
    "class XAUUSDM3Env(gym.Env):\n",
    "    def __init__(self, df, initial_balance=2000.0, risk_per_trade=0.02, max_lot_size=20.0):\n",
    "        super(XAUUSDM3Env, self).__init__()\n",
    "        self.df = df\n",
    "        self.prices = df['close'].values\n",
    "        self.spreads = df['spread'].values if 'spread' in df.columns else np.full(len(df), 20.0)\n",
    "        \n",
    "        # Features for observation (dropping strings/dates)\n",
    "        self.features = df.select_dtypes(include=[np.number]).fillna(0).values\n",
    "        \n",
    "        self.initial_balance = initial_balance\n",
    "        self.risk_per_trade = risk_per_trade\n",
    "        self.max_lot_size = max_lot_size\n",
    "        \n",
    "        # Actions: 0=Buy, 1=Sell, 2=Hold, 3=Do Nothing\n",
    "        self.action_space = spaces.Discrete(4)\n",
    "        \n",
    "        self.observation_space = spaces.Box(\n",
    "            low=-np.inf, high=np.inf, shape=(self.features.shape[1],), dtype=np.float32\n",
    "        )\n",
    "        \n",
    "        self.reset()\n",
    "\n",
    "    def reset(self):\n",
    "        self.current_step = 0\n",
    "        self.balance = self.initial_balance\n",
    "        self.equity = self.initial_balance\n",
    "        self.current_position = 0 # 1=Long, -1=Short, 0=Flat\n",
    "        self.entry_price = 0.0\n",
    "        self.stop_loss = 0.0\n",
    "        self.take_profit = 0.0\n",
    "        self.lot_size = 0.0\n",
    "        self.history = []\n",
    "        return self.features[self.current_step]\n",
    "\n",
    "    def _calculate_lot_size(self, sl_distance):\n",
    "        # 2% Risk\n",
    "        risk_amount = self.balance * self.risk_per_trade\n",
    "        # XAUUSDc lot size standard: $100 per $1 move for 1 lot usually. \n",
    "        sl_dollar_risk_per_lot = sl_distance * 100.0 \n",
    "        if sl_dollar_risk_per_lot <= 0:\n",
    "            return 0.01\n",
    "            \n",
    "        lots = risk_amount / sl_dollar_risk_per_lot\n",
    "        return max(0.01, round(lots, 2))\n",
    "\n",
    "    def step(self, action):\n",
    "        done = False\n",
    "        reward = 0.0\n",
    "        \n",
    "        current_price = self.prices[self.current_step]\n",
    "        spread = self.spreads[self.current_step] / 100.0 # Standard conversion for points\n",
    "        \n",
    "        # Calculate equity running\n",
    "        if self.current_position == 1:\n",
    "            self.equity = self.balance + (current_price - self.entry_price) * 100.0 * self.lot_size\n",
    "        elif self.current_position == -1:\n",
    "            self.equity = self.balance + (self.entry_price - current_price) * 100.0 * self.lot_size\n",
    "\n",
    "        # Execute at close-price\n",
    "        if action == 0 and self.current_position == 0:\n",
    "            # BUY\n",
    "            sl_dist = max(current_price * 0.005, spread * 10.0)\n",
    "            self.stop_loss = current_price - sl_dist\n",
    "            self.take_profit = current_price + (sl_dist * 2.0) # > 1R\n",
    "            self.entry_price = current_price + spread\n",
    "            self.lot_size = self._calculate_lot_size(sl_dist)\n",
    "            self.current_position = 1\n",
    "            \n",
    "        elif action == 1 and self.current_position == 0:\n",
    "            # SELL\n",
    "            sl_dist = max(current_price * 0.005, spread * 10.0)\n",
    "            self.stop_loss = current_price + sl_dist + spread\n",
    "            self.take_profit = current_price - (sl_dist * 2.0) \n",
    "            self.entry_price = current_price\n",
    "            self.lot_size = self._calculate_lot_size(sl_dist)\n",
    "            self.current_position = -1\n",
    "\n",
    "        # Check SL / TP for exit\n",
    "        if self.current_position == 1:\n",
    "            if current_price <= self.stop_loss or current_price >= self.take_profit:\n",
    "                profit = (current_price - self.entry_price) * 100.0 * self.lot_size\n",
    "                self.balance += profit\n",
    "                self.equity = self.balance\n",
    "                self.current_position = 0\n",
    "                reward = profit\n",
    "                self.history.append({'type': 'long', 'profit': profit, 'lot': self.lot_size})\n",
    "                \n",
    "        elif self.current_position == -1:\n",
    "            if current_price >= self.stop_loss or current_price <= self.take_profit:\n",
    "                profit = (self.entry_price - current_price) * 100.0 * self.lot_size\n",
    "                self.balance += profit\n",
    "                self.equity = self.balance\n",
    "                self.current_position = 0\n",
    "                reward = profit\n",
    "                self.history.append({'type': 'short', 'profit': profit, 'lot': self.lot_size})\n",
    "\n",
    "        self.current_step += 1\n",
    "        if self.current_step >= len(self.prices) - 1 or self.equity <= 0:\n",
    "            done = True\n",
    "            \n",
    "        next_state = self.features[self.current_step] if not done else np.zeros(self.features.shape[1])\n",
    "        return next_state, reward, done, {}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Train Model\n",
    "if 'df' in locals():\n",
    "    train_size = int(len(df) * 0.7)\n",
    "    train_df = df.iloc[:train_size].copy()\n",
    "    test_df = df.iloc[train_size:].copy()\n",
    "    \n",
    "    env = XAUUSDM3Env(train_df)\n",
    "    model = PPO(\"MlpPolicy\", env, verbose=1, device=device)\n",
    "    \n",
    "    print(\"Starting RL Training...\")\n",
    "    model.learn(total_timesteps=50000)\n",
    "    print(\"Training Finished.\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plotting white-themed performance metrics\n",
    "if 'env' in locals() and len(env.history) > 0:\n",
    "    profits = [x['profit'] for x in env.history]\n",
    "    cumulative = np.cumsum(profits)\n",
    "    \n",
    "    fig = go.Figure()\n",
    "    fig.add_trace(go.Scatter(y=cumulative, mode='lines', name='Cumulative Profit', line=dict(color='blue')))\n",
    "    fig.update_layout(\n",
    "        title=\"RL Agent Performance (Cumulative Profit)\",\n",
    "        xaxis_title=\"Trades\",\n",
    "        yaxis_title=\"USD Returns\",\n",
    "        template=\"plotly_white\"\n",
    "    )\n",
    "    fig.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": None,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Export to ONNX for MT5 Expert Advisor\n",
    "if 'model' in locals():\n",
    "    class OnnxablePolicy(nn.Module):\n",
    "        def __init__(self, policy):\n",
    "            super().__init__()\n",
    "            self.policy = policy\n",
    "            \n",
    "        def forward(self, observation):\n",
    "            return self.policy(observation, deterministic=True)[0]\n",
    "            \n",
    "    onnx_policy = OnnxablePolicy(model.policy)\n",
    "    dummy_input = torch.randn(1, env.observation_space.shape[0]).to(device)\n",
    "    onnx_policy.to(device)\n",
    "    \n",
    "    onnx_path = \"RL_Agent_XAUUSD.onnx\"\n",
    "    torch.onnx.export(\n",
    "        onnx_policy,\n",
    "        dummy_input,\n",
    "        onnx_path,\n",
    "        opset_version=11,\n",
    "        input_names=[\"input\"],\n",
    "        output_names=[\"output\"]\n",
    "    )\n",
    "    print(f\"ONNX Model successfully exported to {onnx_path}. Next, download it and deploy to your MT5 EA.\")\n",
    "    try: files.download(onnx_path)\n",
    "    except: pass\n"
   ]
  }
 ],
 "metadata": {
  "colab": {"provenance": []},
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {"name": "python"}
 },
 "nbformat": 4,
 "nbformat_minor": 0
}

with open("RL_XAUUSD_Colab_System.ipynb", "w") as f:
    json.dump(notebook, f, indent=1)

print("Generated RL_XAUUSD_Colab_System.ipynb securely!")