import json notebook = { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Phase 2: RL Trading Agent for MT5 (XAUUSDc)\n", "This notebook trains a reinforcement learning model on the extracted MT5 data, simulating live-market constraints and exporting an ONNX model for the Expert Advisor." ] }, { "cell_type": "code", "execution_count": None, "metadata": {}, "outputs": [], "source": [ "!pip install -q stable-baselines3[extra] pandas_ta xgboost onnx onnxruntime plotly gym" ] }, { "cell_type": "code", "execution_count": None, "metadata": {}, "outputs": [], "source": [ "import os\n", "import math\n", "import numpy as np\n", "import pandas as pd\n", "import torch\n", "import torch.nn as nn\n", "import onnx\n", "import onnxruntime as ort\n", "import plotly.graph_objects as go\n", "import gym\n", "from gym import spaces\n", "from stable_baselines3 import PPO\n", "from google.colab import files\n", "\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", "print(f\"Using device: {device}\")" ] }, { "cell_type": "code", "execution_count": None, "metadata": {}, "outputs": [], "source": [ "# Load Dataset (Upload XAUUSD_M3_Data.csv to Colab first)\n", "if not os.path.exists('XAUUSD_M3_Data.csv'):\n", " print(\"Please upload XAUUSD_M3_Data.csv to the Colab environment.\")\n", "else:\n", " df = pd.read_csv('XAUUSD_M3_Data.csv', index_col='time', parse_dates=True)\n", " print(f\"Loaded {len(df)} rows.\")" ] }, { "cell_type": "code", "execution_count": None, "metadata": {}, "outputs": [], "source": [ "# Vectorized Custom Gym Environment for XAUUSDc\n", "class XAUUSDM3Env(gym.Env):\n", " def __init__(self, df, initial_balance=2000.0, risk_per_trade=0.02, max_lot_size=20.0):\n", " super(XAUUSDM3Env, self).__init__()\n", " self.df = df\n", " self.prices = df['close'].values\n", " self.spreads = df['spread'].values if 'spread' in df.columns else np.full(len(df), 20.0)\n", " \n", " # Features for observation (dropping strings/dates)\n", " self.features = df.select_dtypes(include=[np.number]).fillna(0).values\n", " \n", " self.initial_balance = initial_balance\n", " self.risk_per_trade = risk_per_trade\n", " self.max_lot_size = max_lot_size\n", " \n", " # Actions: 0=Buy, 1=Sell, 2=Hold, 3=Do Nothing\n", " self.action_space = spaces.Discrete(4)\n", " \n", " self.observation_space = spaces.Box(\n", " low=-np.inf, high=np.inf, shape=(self.features.shape[1],), dtype=np.float32\n", " )\n", " \n", " self.reset()\n", "\n", " def reset(self):\n", " self.current_step = 0\n", " self.balance = self.initial_balance\n", " self.equity = self.initial_balance\n", " self.current_position = 0 # 1=Long, -1=Short, 0=Flat\n", " self.entry_price = 0.0\n", " self.stop_loss = 0.0\n", " self.take_profit = 0.0\n", " self.lot_size = 0.0\n", " self.history = []\n", " return self.features[self.current_step]\n", "\n", " def _calculate_lot_size(self, sl_distance):\n", " # 2% Risk\n", " risk_amount = self.balance * self.risk_per_trade\n", " # XAUUSDc lot size standard: $100 per $1 move for 1 lot usually. \n", " sl_dollar_risk_per_lot = sl_distance * 100.0 \n", " if sl_dollar_risk_per_lot <= 0:\n", " return 0.01\n", " \n", " lots = risk_amount / sl_dollar_risk_per_lot\n", " return max(0.01, round(lots, 2))\n", "\n", " def step(self, action):\n", " done = False\n", " reward = 0.0\n", " \n", " current_price = self.prices[self.current_step]\n", " spread = self.spreads[self.current_step] / 100.0 # Standard conversion for points\n", " \n", " # Calculate equity running\n", " if self.current_position == 1:\n", " self.equity = self.balance + (current_price - self.entry_price) * 100.0 * self.lot_size\n", " elif self.current_position == -1:\n", " self.equity = self.balance + (self.entry_price - current_price) * 100.0 * self.lot_size\n", "\n", " # Execute at close-price\n", " if action == 0 and self.current_position == 0:\n", " # BUY\n", " sl_dist = max(current_price * 0.005, spread * 10.0)\n", " self.stop_loss = current_price - sl_dist\n", " self.take_profit = current_price + (sl_dist * 2.0) # > 1R\n", " self.entry_price = current_price + spread\n", " self.lot_size = self._calculate_lot_size(sl_dist)\n", " self.current_position = 1\n", " \n", " elif action == 1 and self.current_position == 0:\n", " # SELL\n", " sl_dist = max(current_price * 0.005, spread * 10.0)\n", " self.stop_loss = current_price + sl_dist + spread\n", " self.take_profit = current_price - (sl_dist * 2.0) \n", " self.entry_price = current_price\n", " self.lot_size = self._calculate_lot_size(sl_dist)\n", " self.current_position = -1\n", "\n", " # Check SL / TP for exit\n", " if self.current_position == 1:\n", " if current_price <= self.stop_loss or current_price >= self.take_profit:\n", " profit = (current_price - self.entry_price) * 100.0 * self.lot_size\n", " self.balance += profit\n", " self.equity = self.balance\n", " self.current_position = 0\n", " reward = profit\n", " self.history.append({'type': 'long', 'profit': profit, 'lot': self.lot_size})\n", " \n", " elif self.current_position == -1:\n", " if current_price >= self.stop_loss or current_price <= self.take_profit:\n", " profit = (self.entry_price - current_price) * 100.0 * self.lot_size\n", " self.balance += profit\n", " self.equity = self.balance\n", " self.current_position = 0\n", " reward = profit\n", " self.history.append({'type': 'short', 'profit': profit, 'lot': self.lot_size})\n", "\n", " self.current_step += 1\n", " if self.current_step >= len(self.prices) - 1 or self.equity <= 0:\n", " done = True\n", " \n", " next_state = self.features[self.current_step] if not done else np.zeros(self.features.shape[1])\n", " return next_state, reward, done, {}\n" ] }, { "cell_type": "code", "execution_count": None, "metadata": {}, "outputs": [], "source": [ "# Train Model\n", "if 'df' in locals():\n", " train_size = int(len(df) * 0.7)\n", " train_df = df.iloc[:train_size].copy()\n", " test_df = df.iloc[train_size:].copy()\n", " \n", " env = XAUUSDM3Env(train_df)\n", " model = PPO(\"MlpPolicy\", env, verbose=1, device=device)\n", " \n", " print(\"Starting RL Training...\")\n", " model.learn(total_timesteps=50000)\n", " print(\"Training Finished.\")\n" ] }, { "cell_type": "code", "execution_count": None, "metadata": {}, "outputs": [], "source": [ "# Plotting white-themed performance metrics\n", "if 'env' in locals() and len(env.history) > 0:\n", " profits = [x['profit'] for x in env.history]\n", " cumulative = np.cumsum(profits)\n", " \n", " fig = go.Figure()\n", " fig.add_trace(go.Scatter(y=cumulative, mode='lines', name='Cumulative Profit', line=dict(color='blue')))\n", " fig.update_layout(\n", " title=\"RL Agent Performance (Cumulative Profit)\",\n", " xaxis_title=\"Trades\",\n", " yaxis_title=\"USD Returns\",\n", " template=\"plotly_white\"\n", " )\n", " fig.show()\n" ] }, { "cell_type": "code", "execution_count": None, "metadata": {}, "outputs": [], "source": [ "# Export to ONNX for MT5 Expert Advisor\n", "if 'model' in locals():\n", " class OnnxablePolicy(nn.Module):\n", " def __init__(self, policy):\n", " super().__init__()\n", " self.policy = policy\n", " \n", " def forward(self, observation):\n", " return self.policy(observation, deterministic=True)[0]\n", " \n", " onnx_policy = OnnxablePolicy(model.policy)\n", " dummy_input = torch.randn(1, env.observation_space.shape[0]).to(device)\n", " onnx_policy.to(device)\n", " \n", " onnx_path = \"RL_Agent_XAUUSD.onnx\"\n", " torch.onnx.export(\n", " onnx_policy,\n", " dummy_input,\n", " onnx_path,\n", " opset_version=11,\n", " input_names=[\"input\"],\n", " output_names=[\"output\"]\n", " )\n", " print(f\"ONNX Model successfully exported to {onnx_path}. Next, download it and deploy to your MT5 EA.\")\n", " try: files.download(onnx_path)\n", " except: pass\n" ] } ], "metadata": { "colab": {"provenance": []}, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": {"name": "python"} }, "nbformat": 4, "nbformat_minor": 0 } with open("RL_XAUUSD_Colab_System.ipynb", "w") as f: json.dump(notebook, f, indent=1) print("Generated RL_XAUUSD_Colab_System.ipynb securely!")