Agent Conversation: google/gemini-3.1-pro-preview
Model: google/gemini-3.1-pro-preview
Total steps: 10
Messages: 34
UserStep 0
Task: quant-graph-stock
Graph-Based Multi-Stock Prediction on CSI300
Objective
Design and implement a graph-based stock prediction model that leverages inter-stock relationships through a stock-concept graph. Your code goes in custom_model.py. Three reference implementations (HIST, GATs, LightGBM) are provided as read-only.
Evaluation
Signal quality: IC, ICIR, Rank IC. Portfolio (TopkDropout, top 50, drop 5): Annualized Return, Max Drawdown, Information Ratio. Automatic via qlib's workflow.
Workflow Configuration
workflow_config.yaml lines 14-26 and 32-45 are editable. This covers the model plus dataset adapter/preprocessor configuration. Instruments, date ranges, train/valid/test splits, and evaluation settings are fixed.
qlib/custom_model.py [EDITABLE — lines 58–156 only]
1: # Custom graph-based stock prediction model for MLS-Bench
2: #
3: # EDITABLE section: CustomModel class with fit() and predict() methods.
4: # FIXED sections: imports and stock-concept graph loading below.
5: import os
6: import numpy as np
7: import pandas as pd
8: import torch
9: import torch.nn as nn
10: import torch.nn.functional as F
11: from qlib.model.base import Model
12: from qlib.data.dataset import DatasetH
13: from qlib.data.dataset.handler import DataHandlerLP
14:
15: DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
16:
17: # =====================================================================
18: # FIXED: Stock-concept graph data loading utilities
19: # =====================================================================
20: # Paths to pre-downloaded graph data
21: STOCK2CONCEPT_PATH = os.path.expanduser("~/.qlib/qlib_data/qlib_csi300_stock2concept.npy")
22: STOCK_INDEX_PATH = os.path.expanduser("~/.qlib/qlib_data/qlib_csi300_stock_index.npy")
23:
24: # Load the stock-concept mapping matrix and stock index
25: # stock2concept_matrix: shape (num_stocks, num_concepts), binary membership
26: # stock_index_dict: dict mapping instrument name -> integer index
27: _stock2concept_matrix = np.load(STOCK2CONCEPT_PATH)
28: _stock_index_dict = np.load(STOCK_INDEX_PATH, allow_pickle=True).item()
29:
30:
31: def get_stock_index(instruments, default_index=733):
32: """Map instrument names to integer indices for stock2concept lookup.
33:
34: Args:
35: instruments: array-like of instrument name strings
36: default_index: fallback index for unknown instruments (733 = padding)
37:
38: Returns:
39: np.ndarray of integer indices
40: """
41: indices = np.array([_stock_index_dict.get(inst, default_index)
42: for inst in instruments])
43: return indices.astype(int)
44:
45:
46: def get_concept_matrix(stock_indices):
47: """Get the concept membership matrix for giAssistantStep 1edit
Tool Input
1{
2 "filename": "custom_model.py",
3 "content": "# =====================================================================\n# EDITABLE: CustomModel -- implement your stock prediction model here\n# =====================================================================\nclass DynamicConceptRoutingModel(nn.Module):\n def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.1):\n super().__init__()\n self.d_feat = d_feat\n self.hidden_size = hidden_size\n self.rnn = nn.GRU(\n input_size=d_feat,\n hidden_size=hidden_size,\n num_layers=num_layers,\n batch_first=True,\n dropout=dropout,\n )\n self.concept_mlp = nn.Sequential(\n nn.Linear(hidden_size, hidden_size // 2),\n nn.LeakyReLU(),\n nn.Linear(hidden_size // 2, 1)\n )\n self.W_q = nn.Linear(hidden_size, hidden_size)\n self.W_k = nn.Linear(hidden_size, hidden_size)\n self.idio_mlp = nn.Sequential(\n nn.Linear(hidden_size, hidden_size // 2),\n nn.LeakyReLU(),\n nn.Linear(hidden_size // 2, 1)\n )\n \n def forward(self, x, M):\n B = x.shape[0]\n x = x.reshape(B, self.d_feat, -1)\n x = x.permute(0, 2, 1)\n out, _ = self.rnn(x)\n H = out[:, -1, :]\n \n M_sum = M.sum(dim=0, keepdim=True) + 1e-8\n C_states = (M.t() @ H) / M_sum.t()\n \n r_C_hat = self.concept_mlp(C_states).view(-1)\n \n Q = self.W_q(H)\n K = self.W_k(C_states)\n \n scores = Q @ K.t() / (self.hidden_size ** 0.5)\n scores = scores.masked_fill(M == 0, -1e9)\n attention = torch.softmax(scores, dim=-1)\n row_sum = M.sum(dim=1, keepdim=True)\n attention = torch.where(row_sum > 0, attention, torch.zeros_like(attention))\n \n r_sys_hat = (attention * r_C_hat.unsqueeze(0)).sum(dim=1)\n r_idio_hat = self.idio_mlp(H).view(-1)\n \n return r_idio_hat + r_sys_hat, r_C_hat, M_sum, attention\n\ndef correlation_loss(y_hat, y):\n y_mean = y.mean()\n y_hat_mean = y_hat.mean()\n y_centered = y - y_mean\n y_hat_centered = y_hat - y_hat_mean\n cov = (y_centered * y_hat_centered).sum()\n std_y = torch.sqrt((y_centered ** 2).sum() + 1e-8)\n std_y_hat = torch.sqrt((y_hat_centered ** 2).sum() + 1e-8)\n return -cov / (std_y * std_y_hat)\n\nclass CustomModel(Model):\n def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.1, lr=2e-4, epochs=40, patience=6):\n super().__init__()\n self.fitted = False\n self.kwargs = {\n 'd_feat': d_feat, 'hidden_size': hidden_size,\n 'num_layers': num_layers, 'dropout': dropout,\n 'lr': lr, 'epochs': epochs, 'patience': patience\n }\n self.model = None\n\n def fit(self, dataset: DatasetH):\n df_train = dataset.prepare(\"train\", col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_L)\n df_valid = dataset.prepare(\"valid\", col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_L)\n\n def get_batches(df):\n daily_count = df.groupby(level=0).size().values\n daily_index = np.roll(np.cumsum(daily_count), 1)\n daily_index[0] = 0\n features, labels = df[\"feature\"].values, df[\"label\"].values.ravel()\n instruments = df.index.get_level_values(\"instrument\").values\n batches = []\n for idx, count in zip(daily_index, daily_count):\n b_slice = slice(idx, idx + count)\n f, l, inst = features[b_slice], labels[b_slice], instruments[b_slice]\n mask = ~(np.isnan(f).any(axis=1) | np.isnan(l))\n if mask.sum() > 0:\n batches.append((f[mask], l[mask], inst[mask]))\n return batches\n\n train_batches = get_batches(df_train)\n valid_batches = get_batches(df_valid)\n \n device = torch.device(DEVICE)\n self.model = DynamicConceptRoutingModel(\n d_feat=self.kwargs['d_feat'], hidden_size=self.kwargs['hidden_size'],\n num_layers=self.kwargs['num_layers'], dropout=self.kwargs['dropout']\n ).to(device)\n \n import torch.optim as optim\n optimizer = optim.Adam(self.model.parameters(), lr=self.kwargs['lr'])\n \n best_valid_loss, patience_counter, best_state = np.inf, 0, None\n \n for epoch in range(self.kwargs['epochs']):\n self.model.train()\n np.random.shuffle(train_batches)\n for f, l, inst in train_batches:\n f_t = torch.tensor(f, dtype=torch.float32).to(device)\n l_t = torch.tensor(l, dtype=torch.float32).to(device)\n M_t = torch.tensor(get_concept_matrix(get_stock_index(inst)), dtype=torch.float32).to(device)\n \n optimizer.zero_grad()\n y_hat, r_C_hat, M_sum, _ = self.model(f_t, M_t)\n loss_main = F.mse_loss(y_hat, l_t)\n y_target = (M_t.t() @ l_t.unsqueeze(1)).view(-1) / M_sum.view(-1)\n active_mask = M_sum.view(-1) > 1e-7\n loss_concept = F.mse_loss(r_C_hat[active_mask], y_target[active_mask]) if active_mask.sum() > 0 else 0.0\n loss_corr = correlation_loss(y_hat, l_t) if len(l_t) > 1 else 0.0\n \n loss = loss_main + 0.5 * loss_concept + 0.1 * loss_corr\n loss.backward()\n torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=5.0)\n optimizer.step()\n \n self.model.eval()\n valid_loss = 0.0\n with torch.no_grad():\n for f, l, inst in valid_batches:\n f_t, l_t = torch.tensor(f, dtype=torch.float32).to(device), torch.tensor(l, dtype=torch.float32).to(device)\n M_t = torch.tensor(get_concept_matrix(get_stock_index(inst)), dtype=torch.float32).to(device)\n y_hat, _, _, _ = self.model(f_t, M_t)\n valid_loss += F.mse_loss(y_hat, l_t).item()\n \n valid_loss /= max(1, len(valid_batches))\n if valid_loss < best_valid_loss:\n best_valid_loss, best_state, patience_counter = valid_loss, {k: v.cpu().clone() for k, v in self.model.state_dict().items()}, 0\n else:\n patience_counter += 1\n if patience_counter >= self.kwargs['patience']:\n break\n \n if best_state: self.model.load_state_dict(best_state)\n self.fitted = True\n\n def predict(self, dataset: DatasetH, segment=\"test\"):\n if not self.fitted: raise ValueError(\"Model is not fitted yet!\")\n df_test = dataset.prepare(segment, col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_I)\n device = torch.device(DEVICE)\n self.model.eval()\n daily_count = df_test.groupby(level=0).size().values\n daily_index = np.roll(np.cumsum(daily_count), 1)\n daily_index[0] = 0\n features, instruments = np.nan_to_num(df_test[\"feature\"].values, nan=0.0), df_test.index.get_level_values(\"instrument\").values\n preds = []\n with torch.no_grad():\n for idx, count in zip(daily_index, daily_count):\n b_slice = slice(idx, idx + count)\n f_t = torch.tensor(features[b_slice], dtype=torch.float32).to(device)\n M_t = torch.tensor(get_concept_matrix(get_stock_index(instruments[b_slice])), dtype=torch.float32).to(device)\n y_hat, _, _, _ = self.model(f_t, M_t)\n preds.append(y_hat.cpu().numpy())\n return pd.Series(np.concatenate(preds), index=df_test.index, name=\"score\")",
4 "end_line": 156,
5 "start_line": 58,
6 "op": "replace"
7}Tool ResultStep 1
ERROR: Package 'custom_model.py' is not in allowed packages
AssistantStep 2edit
Tool Input
1{
2 "end_line": 156,
3 "content": "# =====================================================================\n# EDITABLE: CustomModel -- implement your stock prediction model here\n# =====================================================================\nclass DynamicConceptRoutingModel(nn.Module):\n def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.1):\n super().__init__()\n self.d_feat = d_feat\n self.hidden_size = hidden_size\n self.rnn = nn.GRU(\n input_size=d_feat,\n hidden_size=hidden_size,\n num_layers=num_layers,\n batch_first=True,\n dropout=dropout,\n )\n self.concept_mlp = nn.Sequential(\n nn.Linear(hidden_size, hidden_size // 2),\n nn.LeakyReLU(),\n nn.Linear(hidden_size // 2, 1)\n )\n self.W_q = nn.Linear(hidden_size, hidden_size)\n self.W_k = nn.Linear(hidden_size, hidden_size)\n self.idio_mlp = nn.Sequential(\n nn.Linear(hidden_size, hidden_size // 2),\n nn.LeakyReLU(),\n nn.Linear(hidden_size // 2, 1)\n )\n \n def forward(self, x, M):\n B = x.shape[0]\n x = x.reshape(B, self.d_feat, -1)\n x = x.permute(0, 2, 1)\n out, _ = self.rnn(x)\n H = out[:, -1, :]\n \n M_sum = M.sum(dim=0, keepdim=True) + 1e-8\n C_states = (M.t() @ H) / M_sum.t()\n \n r_C_hat = self.concept_mlp(C_states).view(-1)\n \n Q = self.W_q(H)\n K = self.W_k(C_states)\n \n scores = Q @ K.t() / (self.hidden_size ** 0.5)\n scores = scores.masked_fill(M == 0, -1e9)\n attention = torch.softmax(scores, dim=-1)\n row_sum = M.sum(dim=1, keepdim=True)\n attention = torch.where(row_sum > 0, attention, torch.zeros_like(attention))\n \n r_sys_hat = (attention * r_C_hat.unsqueeze(0)).sum(dim=1)\n r_idio_hat = self.idio_mlp(H).view(-1)\n \n return r_idio_hat + r_sys_hat, r_C_hat, M_sum, attention\n\ndef correlation_loss(y_hat, y):\n y_mean = y.mean()\n y_hat_mean = y_hat.mean()\n y_centered = y - y_mean\n y_hat_centered = y_hat - y_hat_mean\n cov = (y_centered * y_hat_centered).sum()\n std_y = torch.sqrt((y_centered ** 2).sum() + 1e-8)\n std_y_hat = torch.sqrt((y_hat_centered ** 2).sum() + 1e-8)\n return -cov / (std_y * std_y_hat)\n\nclass CustomModel(Model):\n def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.1, lr=2e-4, epochs=40, patience=6):\n super().__init__()\n self.fitted = False\n self.kwargs = {\n 'd_feat': d_feat, 'hidden_size': hidden_size,\n 'num_layers': num_layers, 'dropout': dropout,\n 'lr': lr, 'epochs': epochs, 'patience': patience\n }\n self.model = None\n\n def fit(self, dataset: DatasetH):\n df_train = dataset.prepare(\"train\", col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_L)\n df_valid = dataset.prepare(\"valid\", col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_L)\n\n def get_batches(df):\n daily_count = df.groupby(level=0).size().values\n daily_index = np.roll(np.cumsum(daily_count), 1)\n daily_index[0] = 0\n features, labels = df[\"feature\"].values, df[\"label\"].values.ravel()\n instruments = df.index.get_level_values(\"instrument\").values\n batches = []\n for idx, count in zip(daily_index, daily_count):\n b_slice = slice(idx, idx + count)\n f, l, inst = features[b_slice], labels[b_slice], instruments[b_slice]\n mask = ~(np.isnan(f).any(axis=1) | np.isnan(l))\n if mask.sum() > 0:\n batches.append((f[mask], l[mask], inst[mask]))\n return batches\n\n train_batches = get_batches(df_train)\n valid_batches = get_batches(df_valid)\n \n device = torch.device(DEVICE)\n self.model = DynamicConceptRoutingModel(\n d_feat=self.kwargs['d_feat'], hidden_size=self.kwargs['hidden_size'],\n num_layers=self.kwargs['num_layers'], dropout=self.kwargs['dropout']\n ).to(device)\n \n import torch.optim as optim\n optimizer = optim.Adam(self.model.parameters(), lr=self.kwargs['lr'])\n \n best_valid_loss, patience_counter, best_state = np.inf, 0, None\n \n for epoch in range(self.kwargs['epochs']):\n self.model.train()\n np.random.shuffle(train_batches)\n for f, l, inst in train_batches:\n f_t = torch.tensor(f, dtype=torch.float32).to(device)\n l_t = torch.tensor(l, dtype=torch.float32).to(device)\n M_t = torch.tensor(get_concept_matrix(get_stock_index(inst)), dtype=torch.float32).to(device)\n \n optimizer.zero_grad()\n y_hat, r_C_hat, M_sum, _ = self.model(f_t, M_t)\n loss_main = F.mse_loss(y_hat, l_t)\n y_target = (M_t.t() @ l_t.unsqueeze(1)).view(-1) / M_sum.view(-1)\n active_mask = M_sum.view(-1) > 1e-7\n loss_concept = F.mse_loss(r_C_hat[active_mask], y_target[active_mask]) if active_mask.sum() > 0 else 0.0\n loss_corr = correlation_loss(y_hat, l_t) if len(l_t) > 1 else 0.0\n \n loss = loss_main + 0.5 * loss_concept + 0.1 * loss_corr\n loss.backward()\n torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=5.0)\n optimizer.step()\n \n self.model.eval()\n valid_loss = 0.0\n with torch.no_grad():\n for f, l, inst in valid_batches:\n f_t, l_t = torch.tensor(f, dtype=torch.float32).to(device), torch.tensor(l, dtype=torch.float32).to(device)\n M_t = torch.tensor(get_concept_matrix(get_stock_index(inst)), dtype=torch.float32).to(device)\n y_hat, _, _, _ = self.model(f_t, M_t)\n valid_loss += F.mse_loss(y_hat, l_t).item()\n \n valid_loss /= max(1, len(valid_batches))\n if valid_loss < best_valid_loss:\n best_valid_loss, best_state, patience_counter = valid_loss, {k: v.cpu().clone() for k, v in self.model.state_dict().items()}, 0\n else:\n patience_counter += 1\n if patience_counter >= self.kwargs['patience']:\n break\n \n if best_state: self.model.load_state_dict(best_state)\n self.fitted = True\n\n def predict(self, dataset: DatasetH, segment=\"test\"):\n if not self.fitted: raise ValueError(\"Model is not fitted yet!\")\n df_test = dataset.prepare(segment, col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_I)\n device = torch.device(DEVICE)\n self.model.eval()\n daily_count = df_test.groupby(level=0).size().values\n daily_index = np.roll(np.cumsum(daily_count), 1)\n daily_index[0] = 0\n features, instruments = np.nan_to_num(df_test[\"feature\"].values, nan=0.0), df_test.index.get_level_values(\"instrument\").values\n preds = []\n with torch.no_grad():\n for idx, count in zip(daily_index, daily_count):\n b_slice = slice(idx, idx + count)\n f_t = torch.tensor(features[b_slice], dtype=torch.float32).to(device)\n M_t = torch.tensor(get_concept_matrix(get_stock_index(instruments[b_slice])), dtype=torch.float32).to(device)\n y_hat, _, _, _ = self.model(f_t, M_t)\n preds.append(y_hat.cpu().numpy())\n return pd.Series(np.concatenate(preds), index=df_test.index, name=\"score\")",
4 "start_line": 58,
5 "op": "replace",
6 "filename": "qlib/custom_model.py"
7}Tool ResultStep 2
OK: Replaced lines 58..156 with 167 line(s) in qlib/custom_model.py. Editable range: 58–224.
[Current file: qlib/custom_model.py | editable: 58–224 | total: 224 lines]
55| return _stock2concept_matrix[stock_indices].astype(np.float32)
56|
57|
58 # =====================================================================
59 # EDITABLE: CustomModel -- implement your stock prediction model here
60 # =====================================================================
61 class DynamicConceptRoutingModel(nn.Module):
62 def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.1):
63 super().__init__()
64 self.d_feat = d_feat
65 self.hidden_size = hidden_size
66 self.rnn = nn.GRU(
67 input_size=d_feat,
68 hidden_size=hidden_size,
69 num_layers=num_layers,
70 batch_first=True,
71 dropout=dropout,
72
AssistantStep 3test
Tool ResultStep 3
[Test #1] (2 tests remaining; call submit(n=1) to submit this result)
### csi300 (scripts/csi300.sh)
Signal analysis (csi300):
SIGNAL_METRIC IC=0.023930
SIGNAL_METRIC ICIR=0.153082
SIGNAL_METRIC Rank_IC=0.038668
SIGNAL_METRIC Rank_ICIR=0.239620
Portfolio analysis (csi300):
PORTFOLIO_METRIC annualized_return=-0.015828
PORTFOLIO_METRIC max_drawdown=-0.184292
PORTFOLIO_METRIC information_ratio=-0.163553
### csi100 (scripts/csi100.sh)
Signal analysis (csi100):
SIGNAL_METRIC IC=0.037140
SIGNAL_METRIC ICIR=0.185649
SIGNAL_METRIC Rank_IC=0.046646
SIGNAL_METRIC Rank_ICIR=0.240916
Portfolio analysis (csi100):
PORTFOLIO_METRIC annualized_return=-0.040919
PORTFOLIO_METRIC max_drawdown=-0.227149
PORTFOLIO_METRIC information_ratio=-0.656261
[Leaderboard] Results saved: {'ic_csi300': 0.02393, 'icir_csi300': 0.153082, 'rank_ic_csi300': 0.038668, 'rank_icir_csi300': 0.23962, 'annualized_return_csi300': -0.015828, 'max_drawdown_csi300': -0.184292, 'information_ratio_csi300': -0.163553, 'elapsed_csi3
AssistantStep 4edit
Tool Input
1{
2 "end_line": 224,
3 "op": "replace",
4 "start_line": 58,
5 "content": "# =====================================================================\n# EDITABLE: CustomModel -- implement your stock prediction model here\n# =====================================================================\nclass AdvancedGATLayer(nn.Module):\n def __init__(self, hidden_size, num_heads=4, dropout=0.2):\n super().__init__()\n self.num_heads = num_heads\n self.head_dim = hidden_size // num_heads\n self.hidden_size = hidden_size\n \n self.W_q = nn.Linear(hidden_size, hidden_size)\n self.W_k = nn.Linear(hidden_size, hidden_size)\n self.W_v = nn.Linear(hidden_size, hidden_size)\n \n self.out_proj = nn.Linear(hidden_size, hidden_size)\n self.norm = nn.LayerNorm(hidden_size)\n self.dropout = nn.Dropout(dropout)\n \n def forward(self, H, A):\n B = H.shape[0]\n q = self.W_q(H).view(B, self.num_heads, self.head_dim).transpose(0, 1) # [h, B, d]\n k = self.W_k(H).view(B, self.num_heads, self.head_dim).transpose(0, 1)\n v = self.W_v(H).view(B, self.num_heads, self.head_dim).transpose(0, 1)\n \n scores = q @ k.transpose(1, 2) / (self.head_dim ** 0.5) # [h, B, B]\n \n A_tilde = A + torch.eye(B, device=A.device)\n mask = (A_tilde == 0).unsqueeze(0) # [1, B, B]\n scores = scores.masked_fill(mask, -1e9)\n \n attn = F.softmax(scores, dim=-1)\n attn = self.dropout(attn)\n \n out = attn @ v # [h, B, d]\n out = out.transpose(0, 1).contiguous().view(B, self.hidden_size)\n \n out = self.out_proj(out)\n return self.norm(H + F.gelu(out))\n\nclass FinalGraphPredictor(nn.Module):\n def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.2):\n super().__init__()\n self.d_feat = d_feat\n self.hidden_size = hidden_size\n \n self.rnn = nn.GRU(\n input_size=d_feat,\n hidden_size=hidden_size,\n num_layers=num_layers,\n batch_first=True,\n dropout=dropout,\n )\n \n self.gat1 = AdvancedGATLayer(hidden_size, num_heads=4, dropout=dropout)\n self.gat2 = AdvancedGATLayer(hidden_size, num_heads=4, dropout=dropout)\n \n self.fc = nn.Sequential(\n nn.Linear(hidden_size, hidden_size // 2),\n nn.GELU(),\n nn.Dropout(dropout),\n nn.Linear(hidden_size // 2, 1)\n )\n \n def forward(self, x, M):\n B = x.shape[0]\n x = x.reshape(B, self.d_feat, -1).permute(0, 2, 1)\n out, _ = self.rnn(x)\n H = out[:, -1, :]\n \n A = M @ M.t()\n \n H1 = self.gat1(H, A)\n H2 = self.gat2(H1, A)\n \n return self.fc(H2).squeeze(-1)\n\ndef compute_ic_loss(y_hat, y):\n if len(y) < 2: return torch.tensor(0.0, device=y.device)\n y_mean = y.mean()\n y_hat_mean = y_hat.mean()\n y_c = y - y_mean\n y_hat_c = y_hat - y_hat_mean\n cov = (y_c * y_hat_c).mean()\n var_y = (y_c ** 2).mean()\n var_y_hat = (y_hat_c ** 2).mean()\n return -cov / torch.sqrt(var_y * var_y_hat + 1e-8)\n\nclass CustomModel(Model):\n def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.2, lr=2e-4, epochs=40, patience=6):\n super().__init__()\n self.fitted = False\n self.kwargs = {\n 'd_feat': d_feat, 'hidden_size': hidden_size,\n 'num_layers': num_layers, 'dropout': dropout,\n 'lr': lr, 'epochs': epochs, 'patience': patience\n }\n self.model = None\n\n def fit(self, dataset: DatasetH):\n df_train = dataset.prepare(\"train\", col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_L)\n df_valid = dataset.prepare(\"valid\", col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_L)\n\n def get_batches(df):\n daily_count = df.groupby(level=0).size().values\n daily_index = np.roll(np.cumsum(daily_count), 1)\n daily_index[0] = 0\n features, labels = df[\"feature\"].values, df[\"label\"].values.ravel()\n instruments = df.index.get_level_values(\"instrument\").values\n batches = []\n for idx, count in zip(daily_index, daily_count):\n b_slice = slice(idx, idx + count)\n f, l, inst = features[b_slice], labels[b_slice], instruments[b_slice]\n mask = ~(np.isnan(f).any(axis=1) | np.isnan(l))\n if mask.sum() > 0:\n batches.append((f[mask], l[mask], inst[mask]))\n return batches\n\n train_batches = get_batches(df_train)\n valid_batches = get_batches(df_valid)\n \n device = torch.device(DEVICE)\n self.model = FinalGraphPredictor(\n d_feat=self.kwargs['d_feat'], hidden_size=self.kwargs['hidden_size'],\n num_layers=self.kwargs['num_layers'], dropout=self.kwargs['dropout']\n ).to(device)\n \n import torch.optim as optim\n optimizer = optim.Adam(self.model.parameters(), lr=self.kwargs['lr'])\n \n best_valid_loss, patience_counter, best_state = np.inf, 0, None\n \n for epoch in range(self.kwargs['epochs']):\n self.model.train()\n np.random.shuffle(train_batches)\n for f, l, inst in train_batches:\n f_t = torch.tensor(f, dtype=torch.float32).to(device)\n l_t = torch.tensor(l, dtype=torch.float32).to(device)\n M_t = torch.tensor(get_concept_matrix(get_stock_index(inst)), dtype=torch.float32).to(device)\n \n optimizer.zero_grad()\n y_hat = self.model(f_t, M_t)\n loss_mse = F.mse_loss(y_hat, l_t)\n loss_ic = compute_ic_loss(y_hat, l_t)\n loss = loss_mse + 0.1 * loss_ic\n \n loss.backward()\n torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=5.0)\n optimizer.step()\n \n self.model.eval()\n valid_loss = 0.0\n with torch.no_grad():\n for f, l, inst in valid_batches:\n f_t, l_t = torch.tensor(f, dtype=torch.float32).to(device), torch.tensor(l, dtype=torch.float32).to(device)\n M_t = torch.tensor(get_concept_matrix(get_stock_index(inst)), dtype=torch.float32).to(device)\n y_hat = self.model(f_t, M_t)\n valid_loss += (F.mse_loss(y_hat, l_t) + 0.1 * compute_ic_loss(y_hat, l_t)).item()\n \n valid_loss /= max(1, len(valid_batches))\n if valid_loss < best_valid_loss:\n best_valid_loss, best_state, patience_counter = valid_loss, {k: v.cpu().clone() for k, v in self.model.state_dict().items()}, 0\n else:\n patience_counter += 1\n if patience_counter >= self.kwargs['patience']:\n break\n \n if best_state: self.model.load_state_dict(best_state)\n self.fitted = True\n\n def predict(self, dataset: DatasetH, segment=\"test\"):\n if not self.fitted: raise ValueError(\"Model is not fitted yet!\")\n df_test = dataset.prepare(segment, col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_I)\n device = torch.device(DEVICE)\n self.model.eval()\n daily_count = df_test.groupby(level=0).size().values\n daily_index = np.roll(np.cumsum(daily_count), 1)\n daily_index[0] = 0\n features, instruments = np.nan_to_num(df_test[\"feature\"].values, nan=0.0), df_test.index.get_level_values(\"instrument\").values\n preds = []\n with torch.no_grad():\n for idx, count in zip(daily_index, daily_count):\n b_slice = slice(idx, idx + count)\n f_t = torch.tensor(features[b_slice], dtype=torch.float32).to(device)\n M_t = torch.tensor(get_concept_matrix(get_stock_index(instruments[b_slice])), dtype=torch.float32).to(device)\n y_hat = self.model(f_t, M_t)\n preds.append(y_hat.cpu().numpy())\n return pd.Series(np.concatenate(preds), index=df_test.index, name=\"score\")",
6 "filename": "qlib/custom_model.py"
7}Tool ResultStep 4
OK: Replaced lines 58..224 with 187 line(s) in qlib/custom_model.py. Editable range: 58–244.
[Current file: qlib/custom_model.py | editable: 58–244 | total: 244 lines]
55| return _stock2concept_matrix[stock_indices].astype(np.float32)
56|
57|
58 # =====================================================================
59 # EDITABLE: CustomModel -- implement your stock prediction model here
60 # =====================================================================
61 class AdvancedGATLayer(nn.Module):
62 def __init__(self, hidden_size, num_heads=4, dropout=0.2):
63 super().__init__()
64 self.num_heads = num_heads
65 self.head_dim = hidden_size // num_heads
66 self.hidden_size = hidden_size
67
68 self.W_q = nn.Linear(hidden_size, hidden_size)
69 self.W_k = nn.Linear(hidden_size, hidden_size)
70 self.W_v = nn.Linear(hidden_size, hidden_size
AssistantStep 5test
Tool ResultStep 5
[Test #2] (1 test remaining; call submit(n=2) to submit this result)
### csi300 (scripts/csi300.sh)
Signal analysis (csi300):
SIGNAL_METRIC IC=0.039799
SIGNAL_METRIC ICIR=0.303068
SIGNAL_METRIC Rank_IC=0.052033
SIGNAL_METRIC Rank_ICIR=0.401982
Portfolio analysis (csi300):
PORTFOLIO_METRIC annualized_return=0.011250
PORTFOLIO_METRIC max_drawdown=-0.080135
PORTFOLIO_METRIC information_ratio=0.163689
### csi100 (scripts/csi100.sh)
Signal analysis (csi100):
SIGNAL_METRIC IC=0.042470
SIGNAL_METRIC ICIR=0.241989
SIGNAL_METRIC Rank_IC=0.049575
SIGNAL_METRIC Rank_ICIR=0.285902
Portfolio analysis (csi100):
PORTFOLIO_METRIC annualized_return=0.019746
PORTFOLIO_METRIC max_drawdown=-0.130572
PORTFOLIO_METRIC information_ratio=0.356220
[Leaderboard] Results saved: {'ic_csi300': 0.039799, 'icir_csi300': 0.303068, 'rank_ic_csi300': 0.052033, 'rank_icir_csi300': 0.401982, 'annualized_return_csi300': 0.01125, 'max_drawdown_csi300': -0.080135, 'information_ratio_csi300': 0.163689, 'elapsed_csi300': 3
AssistantStep 6edit
Tool Input
1{
2 "content": "# =====================================================================\n# EDITABLE: CustomModel -- implement your stock prediction model here\n# =====================================================================\nclass TFIDFGATLayer(nn.Module):\n def __init__(self, hidden_size, num_heads=4, dropout=0.2):\n super().__init__()\n self.num_heads = num_heads\n self.head_dim = hidden_size // num_heads\n self.hidden_size = hidden_size\n \n self.W_q = nn.Linear(hidden_size, hidden_size)\n self.W_k = nn.Linear(hidden_size, hidden_size)\n self.W_v = nn.Linear(hidden_size, hidden_size)\n \n self.structure_weight = nn.Parameter(torch.ones(num_heads, 1, 1))\n \n self.out_proj = nn.Linear(hidden_size, hidden_size)\n self.norm = nn.LayerNorm(hidden_size)\n self.dropout = nn.Dropout(dropout)\n \n def forward(self, H, A_tfidf):\n B = H.shape[0]\n q = self.W_q(H).view(B, self.num_heads, self.head_dim).transpose(0, 1) # [h, B, d]\n k = self.W_k(H).view(B, self.num_heads, self.head_dim).transpose(0, 1)\n v = self.W_v(H).view(B, self.num_heads, self.head_dim).transpose(0, 1)\n \n scores = q @ k.transpose(1, 2) / (self.head_dim ** 0.5) # [h, B, B]\n \n struct_bias = A_tfidf.unsqueeze(0) # [1, B, B]\n scores = scores + self.structure_weight * struct_bias\n \n A_mask = A_tfidf + torch.eye(B, device=A_tfidf.device)\n mask = (A_mask == 0).unsqueeze(0)\n scores = scores.masked_fill(mask, -1e9)\n \n attn = F.softmax(scores, dim=-1)\n attn = self.dropout(attn)\n \n out = attn @ v # [h, B, d]\n out = out.transpose(0, 1).contiguous().view(B, self.hidden_size)\n \n out = self.out_proj(out)\n return self.norm(H + F.gelu(out))\n\nclass FinalGraphPredictor(nn.Module):\n def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.2):\n super().__init__()\n self.d_feat = d_feat\n self.hidden_size = hidden_size\n \n self.rnn = nn.GRU(\n input_size=d_feat,\n hidden_size=hidden_size,\n num_layers=num_layers,\n batch_first=True,\n dropout=dropout,\n )\n \n self.gat1 = TFIDFGATLayer(hidden_size, num_heads=4, dropout=dropout)\n self.gat2 = TFIDFGATLayer(hidden_size, num_heads=4, dropout=dropout)\n \n self.fc = nn.Sequential(\n nn.Linear(hidden_size * 2, hidden_size),\n nn.GELU(),\n nn.Dropout(dropout),\n nn.Linear(hidden_size, 1)\n )\n \n def forward(self, x, M):\n B = x.shape[0]\n x = x.reshape(B, self.d_feat, -1).permute(0, 2, 1)\n out, _ = self.rnn(x)\n H = out[:, -1, :]\n \n concept_df = M.sum(dim=0) + 1.0\n idf = torch.log((B + 1.0) / concept_df)\n M_tfidf = M * idf.unsqueeze(0)\n \n A_tfidf = M_tfidf @ M_tfidf.t()\n row_max = A_tfidf.max(dim=1, keepdim=True)[0] + 1e-8\n A_tfidf = A_tfidf / row_max\n \n H1 = self.gat1(H, A_tfidf)\n H2 = self.gat2(H1, A_tfidf)\n \n combined = torch.cat([H, H2], dim=-1)\n return self.fc(combined).squeeze(-1)\n\ndef compute_ic_loss(y_hat, y):\n if len(y) < 2: return torch.tensor(0.0, device=y.device)\n y_mean = y.mean()\n y_hat_mean = y_hat.mean()\n y_c = y - y_mean\n y_hat_c = y_hat - y_hat_mean\n cov = (y_c * y_hat_c).mean()\n var_y = (y_c ** 2).mean()\n var_y_hat = (y_hat_c ** 2).mean()\n return -cov / torch.sqrt(var_y * var_y_hat + 1e-8)\n\nclass CustomModel(Model):\n def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.2, lr=2e-4, epochs=40, patience=6):\n super().__init__()\n self.fitted = False\n self.kwargs = {\n 'd_feat': d_feat, 'hidden_size': hidden_size,\n 'num_layers': num_layers, 'dropout': dropout,\n 'lr': lr, 'epochs': epochs, 'patience': patience\n }\n self.model = None\n\n def fit(self, dataset: DatasetH):\n df_train = dataset.prepare(\"train\", col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_L)\n df_valid = dataset.prepare(\"valid\", col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_L)\n\n def get_batches(df):\n daily_count = df.groupby(level=0).size().values\n daily_index = np.roll(np.cumsum(daily_count), 1)\n daily_index[0] = 0\n features, labels = df[\"feature\"].values, df[\"label\"].values.ravel()\n instruments = df.index.get_level_values(\"instrument\").values\n batches = []\n for idx, count in zip(daily_index, daily_count):\n b_slice = slice(idx, idx + count)\n f, l, inst = features[b_slice], labels[b_slice], instruments[b_slice]\n mask = ~(np.isnan(f).any(axis=1) | np.isnan(l))\n if mask.sum() > 0:\n batches.append((f[mask], l[mask], inst[mask]))\n return batches\n\n train_batches = get_batches(df_train)\n valid_batches = get_batches(df_valid)\n \n device = torch.device(DEVICE)\n self.model = FinalGraphPredictor(\n d_feat=self.kwargs['d_feat'], hidden_size=self.kwargs['hidden_size'],\n num_layers=self.kwargs['num_layers'], dropout=self.kwargs['dropout']\n ).to(device)\n \n import torch.optim as optim\n optimizer = optim.Adam(self.model.parameters(), lr=self.kwargs['lr'])\n \n best_valid_loss, patience_counter, best_state = np.inf, 0, None\n \n for epoch in range(self.kwargs['epochs']):\n self.model.train()\n np.random.shuffle(train_batches)\n for f, l, inst in train_batches:\n f_t = torch.tensor(f, dtype=torch.float32).to(device)\n l_t = torch.tensor(l, dtype=torch.float32).to(device)\n M_t = torch.tensor(get_concept_matrix(get_stock_index(inst)), dtype=torch.float32).to(device)\n \n optimizer.zero_grad()\n y_hat = self.model(f_t, M_t)\n loss_mse = F.mse_loss(y_hat, l_t)\n loss_ic = compute_ic_loss(y_hat, l_t)\n loss = loss_mse + 0.1 * loss_ic\n \n loss.backward()\n torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=5.0)\n optimizer.step()\n \n self.model.eval()\n valid_loss = 0.0\n with torch.no_grad():\n for f, l, inst in valid_batches:\n f_t, l_t = torch.tensor(f, dtype=torch.float32).to(device), torch.tensor(l, dtype=torch.float32).to(device)\n M_t = torch.tensor(get_concept_matrix(get_stock_index(inst)), dtype=torch.float32).to(device)\n y_hat = self.model(f_t, M_t)\n valid_loss += (F.mse_loss(y_hat, l_t) + 0.1 * compute_ic_loss(y_hat, l_t)).item()\n \n valid_loss /= max(1, len(valid_batches))\n if valid_loss < best_valid_loss:\n best_valid_loss, best_state, patience_counter = valid_loss, {k: v.cpu().clone() for k, v in self.model.state_dict().items()}, 0\n else:\n patience_counter += 1\n if patience_counter >= self.kwargs['patience']:\n break\n \n if best_state: self.model.load_state_dict(best_state)\n self.fitted = True\n\n def predict(self, dataset: DatasetH, segment=\"test\"):\n if not self.fitted: raise ValueError(\"Model is not fitted yet!\")\n df_test = dataset.prepare(segment, col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_I)\n device = torch.device(DEVICE)\n self.model.eval()\n daily_count = df_test.groupby(level=0).size().values\n daily_index = np.roll(np.cumsum(daily_count), 1)\n daily_index[0] = 0\n features, instruments = np.nan_to_num(df_test[\"feature\"].values, nan=0.0), df_test.index.get_level_values(\"instrument\").values\n preds = []\n with torch.no_grad():\n for idx, count in zip(daily_index, daily_count):\n b_slice = slice(idx, idx + count)\n f_t = torch.tensor(features[b_slice], dtype=torch.float32).to(device)\n M_t = torch.tensor(get_concept_matrix(get_stock_index(instruments[b_slice])), dtype=torch.float32).to(device)\n y_hat = self.model(f_t, M_t)\n preds.append(y_hat.cpu().numpy())\n return pd.Series(np.concatenate(preds), index=df_test.index, name=\"score\")",
3 "end_line": 244,
4 "filename": "qlib/custom_model.py",
5 "op": "replace",
6 "start_line": 58
7}Tool ResultStep 6
OK: Replaced lines 58..244 with 199 line(s) in qlib/custom_model.py. Editable range: 58–256.
[Current file: qlib/custom_model.py | editable: 58–256 | total: 256 lines]
55| return _stock2concept_matrix[stock_indices].astype(np.float32)
56|
57|
58 # =====================================================================
59 # EDITABLE: CustomModel -- implement your stock prediction model here
60 # =====================================================================
61 class TFIDFGATLayer(nn.Module):
62 def __init__(self, hidden_size, num_heads=4, dropout=0.2):
63 super().__init__()
64 self.num_heads = num_heads
65 self.head_dim = hidden_size // num_heads
66 self.hidden_size = hidden_size
67
68 self.W_q = nn.Linear(hidden_size, hidden_size)
69 self.W_k = nn.Linear(hidden_size, hidden_size)
70 self.W_v = nn.Linear(hidden_size, hidden_size)
AssistantStep 7test
AssistantStep 1edit
Tool Input
1{
2 "start_line": 33,
3 "after_line": 32,
4 "op": "replace",
5 "content": " learn_processors:\n - class: DropnaLabel\n - class: CSRankNorm\n kwargs:\n fields_group: label\n - class: RobustZScoreNorm\n kwargs:\n fields_group: feature\n clip_outlier: true\n - class: Fillna\n kwargs:\n fields_group: feature",
6 "filename": "qlib/workflow_config.yaml",
7 "end_line": 45
8}Tool ResultStep 1
OK: Replaced lines 33..45 with 12 line(s) in qlib/workflow_config.yaml. Editable range: 14–26, 32–44.
[Current file: qlib/workflow_config.yaml | editable: 14–26 | total: 82 lines]
11| - "." # So custom_model.py is importable via module_path
12|
13| task:
14 model:
15 class: CustomModel
16 module_path: custom_model
17 kwargs: {}
18
19 dataset:
20 class: DatasetH
21 module_path: qlib.data.dataset
22 kwargs:
23 handler:
24 class: Alpha360
25 module_path: qlib.contrib.data.handler
26 kwargs:
27| start_time: "2008-01-01"
28| end_time: "2020-08-01"
29| fit_start_time: "2008-01-01"AssistantStep 2undo
Tool Input
1{
2 "n": 1
3}Tool ResultStep 2
Undo complete: Restored: qlib/workflow_config.yaml
AssistantStep 3edit
Tool Input
1{
2 "content": "# =====================================================================\n# EDITABLE: CustomModel — implement your stock prediction model here\n# =====================================================================\nimport copy\n\nclass DualGraphGNN(nn.Module):\n def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.2):\n super().__init__()\n self.d_feat = d_feat\n self.rnn = nn.GRU(\n input_size=d_feat,\n hidden_size=hidden_size,\n num_layers=num_layers,\n batch_first=True,\n dropout=dropout if num_layers > 1 else 0.0,\n )\n \n # Static concept graph weights\n self.gnn_w1_static = nn.Linear(hidden_size, hidden_size)\n self.gnn_w2_static = nn.Linear(hidden_size, hidden_size)\n \n # Dynamic feature graph weights \n self.gnn_w1_dynamic = nn.Linear(hidden_size, hidden_size)\n self.gnn_w2_dynamic = nn.Linear(hidden_size, hidden_size)\n \n self.norm1 = nn.LayerNorm(hidden_size)\n self.norm2 = nn.LayerNorm(hidden_size)\n \n self.stock_emb = nn.Embedding(1000, 16) # Fallback idx=733\n \n self.out_proj = nn.Sequential(\n nn.Linear(hidden_size + 16, hidden_size // 2),\n nn.GELU(),\n nn.Dropout(dropout),\n nn.Linear(hidden_size // 2, 1)\n )\n \n def forward(self, x, c_mat, s_idx):\n N = x.shape[0]\n x = x.reshape(N, self.d_feat, -1).permute(0, 2, 1)\n \n out, _ = self.rnn(x)\n h = out[:, -1, :] # [N, H]\n \n # --- Static Concept Graph ---\n A = torch.mm(c_mat, c_mat.t()) # [N, N]\n A = A + torch.eye(N, device=A.device)\n D = A.sum(dim=1, keepdim=True)\n D_inv_sqrt = torch.pow(D, -0.5)\n D_inv_sqrt[torch.isinf(D_inv_sqrt)] = 0.0\n S_static = D_inv_sqrt * A * D_inv_sqrt.t()\n \n # --- Dynamic Feature Graph ---\n h_norm = F.normalize(h, p=2, dim=1)\n sim_mat = torch.mm(h_norm, h_norm.t())\n sim_adj = F.relu(sim_mat)\n D_sim = sim_adj.sum(dim=1, keepdim=True).clamp_min(1e-6)\n S_dynamic = sim_adj / D_sim\n \n # Layer 1\n msg1_s = torch.matmul(S_static, self.gnn_w1_static(h))\n msg1_d = torch.matmul(S_dynamic, self.gnn_w1_dynamic(h))\n h1 = h + F.gelu(msg1_s + msg1_d)\n h1 = self.norm1(h1)\n \n # Layer 2\n msg2_s = torch.matmul(S_static, self.gnn_w2_static(h1))\n msg2_d = torch.matmul(S_dynamic, self.gnn_w2_dynamic(h1))\n h2 = h1 + F.gelu(msg2_s + msg2_d)\n h2 = self.norm2(h2)\n \n emb = self.stock_emb(s_idx)\n h_cat = torch.cat([h2, emb], dim=-1)\n pred = self.out_proj(h_cat).squeeze(-1)\n return pred\n\nclass CustomModel(Model):\n def __init__(self):\n super().__init__()\n self.fitted = False\n self.model = DualGraphGNN().to(DEVICE)\n\n def get_daily_batches(self, df):\n batches = []\n for date, group in df.groupby(level=\"datetime\"):\n feats = group[\"feature\"].values\n labs = group[\"label\"].values.ravel()\n insts = group.index.get_level_values(\"instrument\").values\n \n mask = ~np.isnan(labs)\n feats = feats[mask]\n labs = labs[mask]\n insts = insts[mask]\n \n if len(labs) > 1:\n feats = np.nan_to_num(feats, nan=0.0)\n batches.append((feats, labs, insts))\n return batches\n\n def fit(self, dataset: DatasetH):\n df_train = dataset.prepare(\n \"train\", col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_L\n )\n df_valid = dataset.prepare(\n \"valid\", col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_L\n )\n \n train_batches = self.get_daily_batches(df_train)\n valid_batches = self.get_daily_batches(df_valid)\n \n optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-3, weight_decay=1e-5)\n \n n_epochs = 40\n patience = 8\n best_valid_loss = np.inf\n no_improve = 0\n best_state = None\n \n for epoch in range(n_epochs):\n self.model.train()\n np.random.shuffle(train_batches)\n train_loss = 0.0\n \n for b_feat, b_lab, b_inst in train_batches:\n b_feat = torch.tensor(b_feat, dtype=torch.float32).to(DEVICE)\n b_lab = torch.tensor(b_lab, dtype=torch.float32).to(DEVICE)\n stock_idx_np = get_stock_index(b_inst)\n stock_idx = torch.tensor(stock_idx_np, dtype=torch.long).to(DEVICE)\n c_mat = torch.tensor(get_concept_matrix(stock_idx_np), dtype=torch.float32).to(DEVICE)\n \n optimizer.zero_grad()\n pred = self.model(b_feat, c_mat, stock_idx)\n loss = F.mse_loss(pred, b_lab)\n \n loss.backward()\n torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)\n optimizer.step()\n train_loss += loss.item()\n \n self.model.eval()\n val_loss = 0.0\n with torch.no_grad():\n for b_feat, b_lab, b_inst in valid_batches:\n b_feat = torch.tensor(b_feat, dtype=torch.float32).to(DEVICE)\n b_lab = torch.tensor(b_lab, dtype=torch.float32).to(DEVICE)\n stock_idx_np = get_stock_index(b_inst)\n stock_idx = torch.tensor(stock_idx_np, dtype=torch.long).to(DEVICE)\n c_mat = torch.tensor(get_concept_matrix(stock_idx_np), dtype=torch.float32).to(DEVICE)\n \n pred = self.model(b_feat, c_mat, stock_idx)\n loss = F.mse_loss(pred, b_lab)\n val_loss += loss.item()\n \n val_loss /= max(1, len(valid_batches))\n print(f\"Epoch {epoch}, Val Loss: {val_loss:.6f}\")\n \n if val_loss < best_valid_loss:\n best_valid_loss = val_loss\n no_improve = 0\n best_state = copy.deepcopy(self.model.state_dict())\n else:\n no_improve += 1\n \n if no_improve >= patience:\n print(f\"Early stopping at epoch {epoch}\")\n break\n \n if best_state is not None:\n self.model.load_state_dict(best_state)\n self.fitted = True\n\n def predict(self, dataset: DatasetH, segment=\"test\"):\n if not self.fitted:\n raise ValueError(\"Model is not fitted yet!\")\n \n df_test = dataset.prepare(\n segment, col_set=[\"feature\"], data_key=DataHandlerLP.DK_I\n )\n if df_test.empty:\n return pd.Series(dtype=np.float32)\n\n self.model.eval()\n preds = []\n idx = []\n \n for date, group in df_test.groupby(level=\"datetime\"):\n feats = group[\"feature\"].values\n feats = np.nan_to_num(feats, nan=0.0)\n insts = group.index.get_level_values(\"instrument\").values\n \n b_feat = torch.tensor(feats, dtype=torch.float32).to(DEVICE)\n stock_idx_np = get_stock_index(insts)\n stock_idx = torch.tensor(stock_idx_np, dtype=torch.long).to(DEVICE)\n c_mat = torch.tensor(get_concept_matrix(stock_idx_np), dtype=torch.float32).to(DEVICE)\n \n with torch.no_grad():\n pred = self.model(b_feat, c_mat, stock_idx)\n \n preds.extend(pred.cpu().numpy())\n idx.extend(group.index)\n \n return pd.Series(preds, index=pd.MultiIndex.from_tuples(idx, names=df_test.index.names))",
3 "op": "replace",
4 "filename": "qlib/custom_model.py",
5 "start_line": 58,
6 "after_line": 57,
7 "end_line": 156
8}Tool ResultStep 3
OK: Replaced lines 58..156 with 202 line(s) in qlib/custom_model.py. Editable range: 58–259.
[Current file: qlib/custom_model.py | editable: 58–259 | total: 259 lines]
55| return _stock2concept_matrix[stock_indices].astype(np.float32)
56|
57|
58 # =====================================================================
59 # EDITABLE: CustomModel — implement your stock prediction model here
60 # =====================================================================
61 import copy
62
63 class DualGraphGNN(nn.Module):
64 def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.2):
65 super().__init__()
66 self.d_feat = d_feat
67 self.rnn = nn.GRU(
68 input_size=d_feat,
69 hidden_size=hidden_size,
70 num_layers=num_layers,
71 batch_first=True,
72 dropout=dropout if num_layers > 1 else 0.0,
73