simple-RL-ONNX / generate_notebook.py
algorembrant's picture
Upload 7 files
13c3b17 verified
import json
notebook = {
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Phase 2: RL Trading Agent for MT5 (XAUUSDc)\n",
"This notebook trains a reinforcement learning model on the extracted MT5 data, simulating live-market constraints and exporting an ONNX model for the Expert Advisor."
]
},
{
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": [
"!pip install -q stable-baselines3[extra] pandas_ta xgboost onnx onnxruntime plotly gym"
]
},
{
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import math\n",
"import numpy as np\n",
"import pandas as pd\n",
"import torch\n",
"import torch.nn as nn\n",
"import onnx\n",
"import onnxruntime as ort\n",
"import plotly.graph_objects as go\n",
"import gym\n",
"from gym import spaces\n",
"from stable_baselines3 import PPO\n",
"from google.colab import files\n",
"\n",
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
"print(f\"Using device: {device}\")"
]
},
{
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": [
"# Load Dataset (Upload XAUUSD_M3_Data.csv to Colab first)\n",
"if not os.path.exists('XAUUSD_M3_Data.csv'):\n",
" print(\"Please upload XAUUSD_M3_Data.csv to the Colab environment.\")\n",
"else:\n",
" df = pd.read_csv('XAUUSD_M3_Data.csv', index_col='time', parse_dates=True)\n",
" print(f\"Loaded {len(df)} rows.\")"
]
},
{
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": [
"# Vectorized Custom Gym Environment for XAUUSDc\n",
"class XAUUSDM3Env(gym.Env):\n",
" def __init__(self, df, initial_balance=2000.0, risk_per_trade=0.02, max_lot_size=20.0):\n",
" super(XAUUSDM3Env, self).__init__()\n",
" self.df = df\n",
" self.prices = df['close'].values\n",
" self.spreads = df['spread'].values if 'spread' in df.columns else np.full(len(df), 20.0)\n",
" \n",
" # Features for observation (dropping strings/dates)\n",
" self.features = df.select_dtypes(include=[np.number]).fillna(0).values\n",
" \n",
" self.initial_balance = initial_balance\n",
" self.risk_per_trade = risk_per_trade\n",
" self.max_lot_size = max_lot_size\n",
" \n",
" # Actions: 0=Buy, 1=Sell, 2=Hold, 3=Do Nothing\n",
" self.action_space = spaces.Discrete(4)\n",
" \n",
" self.observation_space = spaces.Box(\n",
" low=-np.inf, high=np.inf, shape=(self.features.shape[1],), dtype=np.float32\n",
" )\n",
" \n",
" self.reset()\n",
"\n",
" def reset(self):\n",
" self.current_step = 0\n",
" self.balance = self.initial_balance\n",
" self.equity = self.initial_balance\n",
" self.current_position = 0 # 1=Long, -1=Short, 0=Flat\n",
" self.entry_price = 0.0\n",
" self.stop_loss = 0.0\n",
" self.take_profit = 0.0\n",
" self.lot_size = 0.0\n",
" self.history = []\n",
" return self.features[self.current_step]\n",
"\n",
" def _calculate_lot_size(self, sl_distance):\n",
" # 2% Risk\n",
" risk_amount = self.balance * self.risk_per_trade\n",
" # XAUUSDc lot size standard: $100 per $1 move for 1 lot usually. \n",
" sl_dollar_risk_per_lot = sl_distance * 100.0 \n",
" if sl_dollar_risk_per_lot <= 0:\n",
" return 0.01\n",
" \n",
" lots = risk_amount / sl_dollar_risk_per_lot\n",
" return max(0.01, round(lots, 2))\n",
"\n",
" def step(self, action):\n",
" done = False\n",
" reward = 0.0\n",
" \n",
" current_price = self.prices[self.current_step]\n",
" spread = self.spreads[self.current_step] / 100.0 # Standard conversion for points\n",
" \n",
" # Calculate equity running\n",
" if self.current_position == 1:\n",
" self.equity = self.balance + (current_price - self.entry_price) * 100.0 * self.lot_size\n",
" elif self.current_position == -1:\n",
" self.equity = self.balance + (self.entry_price - current_price) * 100.0 * self.lot_size\n",
"\n",
" # Execute at close-price\n",
" if action == 0 and self.current_position == 0:\n",
" # BUY\n",
" sl_dist = max(current_price * 0.005, spread * 10.0)\n",
" self.stop_loss = current_price - sl_dist\n",
" self.take_profit = current_price + (sl_dist * 2.0) # > 1R\n",
" self.entry_price = current_price + spread\n",
" self.lot_size = self._calculate_lot_size(sl_dist)\n",
" self.current_position = 1\n",
" \n",
" elif action == 1 and self.current_position == 0:\n",
" # SELL\n",
" sl_dist = max(current_price * 0.005, spread * 10.0)\n",
" self.stop_loss = current_price + sl_dist + spread\n",
" self.take_profit = current_price - (sl_dist * 2.0) \n",
" self.entry_price = current_price\n",
" self.lot_size = self._calculate_lot_size(sl_dist)\n",
" self.current_position = -1\n",
"\n",
" # Check SL / TP for exit\n",
" if self.current_position == 1:\n",
" if current_price <= self.stop_loss or current_price >= self.take_profit:\n",
" profit = (current_price - self.entry_price) * 100.0 * self.lot_size\n",
" self.balance += profit\n",
" self.equity = self.balance\n",
" self.current_position = 0\n",
" reward = profit\n",
" self.history.append({'type': 'long', 'profit': profit, 'lot': self.lot_size})\n",
" \n",
" elif self.current_position == -1:\n",
" if current_price >= self.stop_loss or current_price <= self.take_profit:\n",
" profit = (self.entry_price - current_price) * 100.0 * self.lot_size\n",
" self.balance += profit\n",
" self.equity = self.balance\n",
" self.current_position = 0\n",
" reward = profit\n",
" self.history.append({'type': 'short', 'profit': profit, 'lot': self.lot_size})\n",
"\n",
" self.current_step += 1\n",
" if self.current_step >= len(self.prices) - 1 or self.equity <= 0:\n",
" done = True\n",
" \n",
" next_state = self.features[self.current_step] if not done else np.zeros(self.features.shape[1])\n",
" return next_state, reward, done, {}\n"
]
},
{
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": [
"# Train Model\n",
"if 'df' in locals():\n",
" train_size = int(len(df) * 0.7)\n",
" train_df = df.iloc[:train_size].copy()\n",
" test_df = df.iloc[train_size:].copy()\n",
" \n",
" env = XAUUSDM3Env(train_df)\n",
" model = PPO(\"MlpPolicy\", env, verbose=1, device=device)\n",
" \n",
" print(\"Starting RL Training...\")\n",
" model.learn(total_timesteps=50000)\n",
" print(\"Training Finished.\")\n"
]
},
{
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": [
"# Plotting white-themed performance metrics\n",
"if 'env' in locals() and len(env.history) > 0:\n",
" profits = [x['profit'] for x in env.history]\n",
" cumulative = np.cumsum(profits)\n",
" \n",
" fig = go.Figure()\n",
" fig.add_trace(go.Scatter(y=cumulative, mode='lines', name='Cumulative Profit', line=dict(color='blue')))\n",
" fig.update_layout(\n",
" title=\"RL Agent Performance (Cumulative Profit)\",\n",
" xaxis_title=\"Trades\",\n",
" yaxis_title=\"USD Returns\",\n",
" template=\"plotly_white\"\n",
" )\n",
" fig.show()\n"
]
},
{
"cell_type": "code",
"execution_count": None,
"metadata": {},
"outputs": [],
"source": [
"# Export to ONNX for MT5 Expert Advisor\n",
"if 'model' in locals():\n",
" class OnnxablePolicy(nn.Module):\n",
" def __init__(self, policy):\n",
" super().__init__()\n",
" self.policy = policy\n",
" \n",
" def forward(self, observation):\n",
" return self.policy(observation, deterministic=True)[0]\n",
" \n",
" onnx_policy = OnnxablePolicy(model.policy)\n",
" dummy_input = torch.randn(1, env.observation_space.shape[0]).to(device)\n",
" onnx_policy.to(device)\n",
" \n",
" onnx_path = \"RL_Agent_XAUUSD.onnx\"\n",
" torch.onnx.export(\n",
" onnx_policy,\n",
" dummy_input,\n",
" onnx_path,\n",
" opset_version=11,\n",
" input_names=[\"input\"],\n",
" output_names=[\"output\"]\n",
" )\n",
" print(f\"ONNX Model successfully exported to {onnx_path}. Next, download it and deploy to your MT5 EA.\")\n",
" try: files.download(onnx_path)\n",
" except: pass\n"
]
}
],
"metadata": {
"colab": {"provenance": []},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {"name": "python"}
},
"nbformat": 4,
"nbformat_minor": 0
}
with open("RL_XAUUSD_Colab_System.ipynb", "w") as f:
json.dump(notebook, f, indent=1)
print("Generated RL_XAUUSD_Colab_System.ipynb securely!")