simple-RL-ONNX / generate_notebook.py

Upload 7 files

13c3b17 verified 18 days ago

10.1 kB

	import json

	notebook = {
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# Phase 2: RL Trading Agent for MT5 (XAUUSDc)\n",
	"This notebook trains a reinforcement learning model on the extracted MT5 data, simulating live-market constraints and exporting an ONNX model for the Expert Advisor."
	]
	},
	{
	"cell_type": "code",
	"execution_count": None,
	"metadata": {},
	"outputs": [],
	"source": [
	"!pip install -q stable-baselines3[extra] pandas_ta xgboost onnx onnxruntime plotly gym"
	]
	},
	{
	"cell_type": "code",
	"execution_count": None,
	"metadata": {},
	"outputs": [],
	"source": [
	"import os\n",
	"import math\n",
	"import numpy as np\n",
	"import pandas as pd\n",
	"import torch\n",
	"import torch.nn as nn\n",
	"import onnx\n",
	"import onnxruntime as ort\n",
	"import plotly.graph_objects as go\n",
	"import gym\n",
	"from gym import spaces\n",
	"from stable_baselines3 import PPO\n",
	"from google.colab import files\n",
	"\n",
	"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
	"print(f\"Using device: {device}\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": None,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Load Dataset (Upload XAUUSD_M3_Data.csv to Colab first)\n",
	"if not os.path.exists('XAUUSD_M3_Data.csv'):\n",
	" print(\"Please upload XAUUSD_M3_Data.csv to the Colab environment.\")\n",
	"else:\n",
	" df = pd.read_csv('XAUUSD_M3_Data.csv', index_col='time', parse_dates=True)\n",
	" print(f\"Loaded {len(df)} rows.\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": None,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Vectorized Custom Gym Environment for XAUUSDc\n",
	"class XAUUSDM3Env(gym.Env):\n",
	" def __init__(self, df, initial_balance=2000.0, risk_per_trade=0.02, max_lot_size=20.0):\n",
	" super(XAUUSDM3Env, self).__init__()\n",
	" self.df = df\n",
	" self.prices = df['close'].values\n",
	" self.spreads = df['spread'].values if 'spread' in df.columns else np.full(len(df), 20.0)\n",
	" \n",
	" # Features for observation (dropping strings/dates)\n",
	" self.features = df.select_dtypes(include=[np.number]).fillna(0).values\n",
	" \n",
	" self.initial_balance = initial_balance\n",
	" self.risk_per_trade = risk_per_trade\n",
	" self.max_lot_size = max_lot_size\n",
	" \n",
	" # Actions: 0=Buy, 1=Sell, 2=Hold, 3=Do Nothing\n",
	" self.action_space = spaces.Discrete(4)\n",
	" \n",
	" self.observation_space = spaces.Box(\n",
	" low=-np.inf, high=np.inf, shape=(self.features.shape[1],), dtype=np.float32\n",
	" )\n",
	" \n",
	" self.reset()\n",
	"\n",
	" def reset(self):\n",
	" self.current_step = 0\n",
	" self.balance = self.initial_balance\n",
	" self.equity = self.initial_balance\n",
	" self.current_position = 0 # 1=Long, -1=Short, 0=Flat\n",
	" self.entry_price = 0.0\n",
	" self.stop_loss = 0.0\n",
	" self.take_profit = 0.0\n",
	" self.lot_size = 0.0\n",
	" self.history = []\n",
	" return self.features[self.current_step]\n",
	"\n",
	" def _calculate_lot_size(self, sl_distance):\n",
	" # 2% Risk\n",
	" risk_amount = self.balance * self.risk_per_trade\n",
	" # XAUUSDc lot size standard: $100 per $1 move for 1 lot usually. \n",
	" sl_dollar_risk_per_lot = sl_distance * 100.0 \n",
	" if sl_dollar_risk_per_lot <= 0:\n",
	" return 0.01\n",
	" \n",
	" lots = risk_amount / sl_dollar_risk_per_lot\n",
	" return max(0.01, round(lots, 2))\n",
	"\n",
	" def step(self, action):\n",
	" done = False\n",
	" reward = 0.0\n",
	" \n",
	" current_price = self.prices[self.current_step]\n",
	" spread = self.spreads[self.current_step] / 100.0 # Standard conversion for points\n",
	" \n",
	" # Calculate equity running\n",
	" if self.current_position == 1:\n",
	" self.equity = self.balance + (current_price - self.entry_price) * 100.0 * self.lot_size\n",
	" elif self.current_position == -1:\n",
	" self.equity = self.balance + (self.entry_price - current_price) * 100.0 * self.lot_size\n",
	"\n",
	" # Execute at close-price\n",
	" if action == 0 and self.current_position == 0:\n",
	" # BUY\n",
	" sl_dist = max(current_price * 0.005, spread * 10.0)\n",
	" self.stop_loss = current_price - sl_dist\n",
	" self.take_profit = current_price + (sl_dist * 2.0) # > 1R\n",
	" self.entry_price = current_price + spread\n",
	" self.lot_size = self._calculate_lot_size(sl_dist)\n",
	" self.current_position = 1\n",
	" \n",
	" elif action == 1 and self.current_position == 0:\n",
	" # SELL\n",
	" sl_dist = max(current_price * 0.005, spread * 10.0)\n",
	" self.stop_loss = current_price + sl_dist + spread\n",
	" self.take_profit = current_price - (sl_dist * 2.0) \n",
	" self.entry_price = current_price\n",
	" self.lot_size = self._calculate_lot_size(sl_dist)\n",
	" self.current_position = -1\n",
	"\n",
	" # Check SL / TP for exit\n",
	" if self.current_position == 1:\n",
	" if current_price <= self.stop_loss or current_price >= self.take_profit:\n",
	" profit = (current_price - self.entry_price) * 100.0 * self.lot_size\n",
	" self.balance += profit\n",
	" self.equity = self.balance\n",
	" self.current_position = 0\n",
	" reward = profit\n",
	" self.history.append({'type': 'long', 'profit': profit, 'lot': self.lot_size})\n",
	" \n",
	" elif self.current_position == -1:\n",
	" if current_price >= self.stop_loss or current_price <= self.take_profit:\n",
	" profit = (self.entry_price - current_price) * 100.0 * self.lot_size\n",
	" self.balance += profit\n",
	" self.equity = self.balance\n",
	" self.current_position = 0\n",
	" reward = profit\n",
	" self.history.append({'type': 'short', 'profit': profit, 'lot': self.lot_size})\n",
	"\n",
	" self.current_step += 1\n",
	" if self.current_step >= len(self.prices) - 1 or self.equity <= 0:\n",
	" done = True\n",
	" \n",
	" next_state = self.features[self.current_step] if not done else np.zeros(self.features.shape[1])\n",
	" return next_state, reward, done, {}\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": None,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Train Model\n",
	"if 'df' in locals():\n",
	" train_size = int(len(df) * 0.7)\n",
	" train_df = df.iloc[:train_size].copy()\n",
	" test_df = df.iloc[train_size:].copy()\n",
	" \n",
	" env = XAUUSDM3Env(train_df)\n",
	" model = PPO(\"MlpPolicy\", env, verbose=1, device=device)\n",
	" \n",
	" print(\"Starting RL Training...\")\n",
	" model.learn(total_timesteps=50000)\n",
	" print(\"Training Finished.\")\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": None,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Plotting white-themed performance metrics\n",
	"if 'env' in locals() and len(env.history) > 0:\n",
	" profits = [x['profit'] for x in env.history]\n",
	" cumulative = np.cumsum(profits)\n",
	" \n",
	" fig = go.Figure()\n",
	" fig.add_trace(go.Scatter(y=cumulative, mode='lines', name='Cumulative Profit', line=dict(color='blue')))\n",
	" fig.update_layout(\n",
	" title=\"RL Agent Performance (Cumulative Profit)\",\n",
	" xaxis_title=\"Trades\",\n",
	" yaxis_title=\"USD Returns\",\n",
	" template=\"plotly_white\"\n",
	" )\n",
	" fig.show()\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": None,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Export to ONNX for MT5 Expert Advisor\n",
	"if 'model' in locals():\n",
	" class OnnxablePolicy(nn.Module):\n",
	" def __init__(self, policy):\n",
	" super().__init__()\n",
	" self.policy = policy\n",
	" \n",
	" def forward(self, observation):\n",
	" return self.policy(observation, deterministic=True)[0]\n",
	" \n",
	" onnx_policy = OnnxablePolicy(model.policy)\n",
	" dummy_input = torch.randn(1, env.observation_space.shape[0]).to(device)\n",
	" onnx_policy.to(device)\n",
	" \n",
	" onnx_path = \"RL_Agent_XAUUSD.onnx\"\n",
	" torch.onnx.export(\n",
	" onnx_policy,\n",
	" dummy_input,\n",
	" onnx_path,\n",
	" opset_version=11,\n",
	" input_names=[\"input\"],\n",
	" output_names=[\"output\"]\n",
	" )\n",
	" print(f\"ONNX Model successfully exported to {onnx_path}. Next, download it and deploy to your MT5 EA.\")\n",
	" try: files.download(onnx_path)\n",
	" except: pass\n"
	]
	}
	],
	"metadata": {
	"colab": {"provenance": []},
	"kernelspec": {
	"display_name": "Python 3",
	"name": "python3"
	},
	"language_info": {"name": "python"}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}

	with open("RL_XAUUSD_Colab_System.ipynb", "w") as f:
	json.dump(notebook, f, indent=1)

	print("Generated RL_XAUUSD_Colab_System.ipynb securely!")