Agent Conversation: google/gemini-3.1-pro-preview

Model: google/gemini-3.1-pro-preview
Total steps: 10
Messages: 34
UserStep 0

Task: quant-graph-stock

Graph-Based Multi-Stock Prediction on CSI300

Objective

Design and implement a graph-based stock prediction model that leverages inter-stock relationships through a stock-concept graph. Your code goes in custom_model.py. Three reference implementations (HIST, GATs, LightGBM) are provided as read-only.

Evaluation

Signal quality: IC, ICIR, Rank IC. Portfolio (TopkDropout, top 50, drop 5): Annualized Return, Max Drawdown, Information Ratio. Automatic via qlib's workflow.

Workflow Configuration

workflow_config.yaml lines 14-26 and 32-45 are editable. This covers the model plus dataset adapter/preprocessor configuration. Instruments, date ranges, train/valid/test splits, and evaluation settings are fixed.

qlib/custom_model.py [EDITABLE — lines 58–156 only]

     1: # Custom graph-based stock prediction model for MLS-Bench
     2: #
     3: # EDITABLE section: CustomModel class with fit() and predict() methods.
     4: # FIXED sections: imports and stock-concept graph loading below.
     5: import os
     6: import numpy as np
     7: import pandas as pd
     8: import torch
     9: import torch.nn as nn
    10: import torch.nn.functional as F
    11: from qlib.model.base import Model
    12: from qlib.data.dataset import DatasetH
    13: from qlib.data.dataset.handler import DataHandlerLP
    14: 
    15: DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
    16: 
    17: # =====================================================================
    18: # FIXED: Stock-concept graph data loading utilities
    19: # =====================================================================
    20: # Paths to pre-downloaded graph data
    21: STOCK2CONCEPT_PATH = os.path.expanduser("~/.qlib/qlib_data/qlib_csi300_stock2concept.npy")
    22: STOCK_INDEX_PATH = os.path.expanduser("~/.qlib/qlib_data/qlib_csi300_stock_index.npy")
    23: 
    24: # Load the stock-concept mapping matrix and stock index
    25: # stock2concept_matrix: shape (num_stocks, num_concepts), binary membership
    26: # stock_index_dict: dict mapping instrument name -> integer index
    27: _stock2concept_matrix = np.load(STOCK2CONCEPT_PATH)
    28: _stock_index_dict = np.load(STOCK_INDEX_PATH, allow_pickle=True).item()
    29: 
    30: 
    31: def get_stock_index(instruments, default_index=733):
    32:     """Map instrument names to integer indices for stock2concept lookup.
    33: 
    34:     Args:
    35:         instruments: array-like of instrument name strings
    36:         default_index: fallback index for unknown instruments (733 = padding)
    37: 
    38:     Returns:
    39:         np.ndarray of integer indices
    40:     """
    41:     indices = np.array([_stock_index_dict.get(inst, default_index)
    42:                         for inst in instruments])
    43:     return indices.astype(int)
    44: 
    45: 
    46: def get_concept_matrix(stock_indices):
    47:     """Get the concept membership matrix for gi
AssistantStep 1edit
Tool Input
1{
2  "filename": "custom_model.py",
3  "content": "# =====================================================================\n# EDITABLE: CustomModel -- implement your stock prediction model here\n# =====================================================================\nclass DynamicConceptRoutingModel(nn.Module):\n    def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.1):\n        super().__init__()\n        self.d_feat = d_feat\n        self.hidden_size = hidden_size\n        self.rnn = nn.GRU(\n            input_size=d_feat,\n            hidden_size=hidden_size,\n            num_layers=num_layers,\n            batch_first=True,\n            dropout=dropout,\n        )\n        self.concept_mlp = nn.Sequential(\n            nn.Linear(hidden_size, hidden_size // 2),\n            nn.LeakyReLU(),\n            nn.Linear(hidden_size // 2, 1)\n        )\n        self.W_q = nn.Linear(hidden_size, hidden_size)\n        self.W_k = nn.Linear(hidden_size, hidden_size)\n        self.idio_mlp = nn.Sequential(\n            nn.Linear(hidden_size, hidden_size // 2),\n            nn.LeakyReLU(),\n            nn.Linear(hidden_size // 2, 1)\n        )\n        \n    def forward(self, x, M):\n        B = x.shape[0]\n        x = x.reshape(B, self.d_feat, -1)\n        x = x.permute(0, 2, 1)\n        out, _ = self.rnn(x)\n        H = out[:, -1, :]\n        \n        M_sum = M.sum(dim=0, keepdim=True) + 1e-8\n        C_states = (M.t() @ H) / M_sum.t()\n        \n        r_C_hat = self.concept_mlp(C_states).view(-1)\n        \n        Q = self.W_q(H)\n        K = self.W_k(C_states)\n        \n        scores = Q @ K.t() / (self.hidden_size ** 0.5)\n        scores = scores.masked_fill(M == 0, -1e9)\n        attention = torch.softmax(scores, dim=-1)\n        row_sum = M.sum(dim=1, keepdim=True)\n        attention = torch.where(row_sum > 0, attention, torch.zeros_like(attention))\n        \n        r_sys_hat = (attention * r_C_hat.unsqueeze(0)).sum(dim=1)\n        r_idio_hat = self.idio_mlp(H).view(-1)\n        \n        return r_idio_hat + r_sys_hat, r_C_hat, M_sum, attention\n\ndef correlation_loss(y_hat, y):\n    y_mean = y.mean()\n    y_hat_mean = y_hat.mean()\n    y_centered = y - y_mean\n    y_hat_centered = y_hat - y_hat_mean\n    cov = (y_centered * y_hat_centered).sum()\n    std_y = torch.sqrt((y_centered ** 2).sum() + 1e-8)\n    std_y_hat = torch.sqrt((y_hat_centered ** 2).sum() + 1e-8)\n    return -cov / (std_y * std_y_hat)\n\nclass CustomModel(Model):\n    def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.1, lr=2e-4, epochs=40, patience=6):\n        super().__init__()\n        self.fitted = False\n        self.kwargs = {\n            'd_feat': d_feat, 'hidden_size': hidden_size,\n            'num_layers': num_layers, 'dropout': dropout,\n            'lr': lr, 'epochs': epochs, 'patience': patience\n        }\n        self.model = None\n\n    def fit(self, dataset: DatasetH):\n        df_train = dataset.prepare(\"train\", col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_L)\n        df_valid = dataset.prepare(\"valid\", col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_L)\n\n        def get_batches(df):\n            daily_count = df.groupby(level=0).size().values\n            daily_index = np.roll(np.cumsum(daily_count), 1)\n            daily_index[0] = 0\n            features, labels = df[\"feature\"].values, df[\"label\"].values.ravel()\n            instruments = df.index.get_level_values(\"instrument\").values\n            batches = []\n            for idx, count in zip(daily_index, daily_count):\n                b_slice = slice(idx, idx + count)\n                f, l, inst = features[b_slice], labels[b_slice], instruments[b_slice]\n                mask = ~(np.isnan(f).any(axis=1) | np.isnan(l))\n                if mask.sum() > 0:\n                    batches.append((f[mask], l[mask], inst[mask]))\n            return batches\n\n        train_batches = get_batches(df_train)\n        valid_batches = get_batches(df_valid)\n        \n        device = torch.device(DEVICE)\n        self.model = DynamicConceptRoutingModel(\n            d_feat=self.kwargs['d_feat'], hidden_size=self.kwargs['hidden_size'],\n            num_layers=self.kwargs['num_layers'], dropout=self.kwargs['dropout']\n        ).to(device)\n        \n        import torch.optim as optim\n        optimizer = optim.Adam(self.model.parameters(), lr=self.kwargs['lr'])\n        \n        best_valid_loss, patience_counter, best_state = np.inf, 0, None\n        \n        for epoch in range(self.kwargs['epochs']):\n            self.model.train()\n            np.random.shuffle(train_batches)\n            for f, l, inst in train_batches:\n                f_t = torch.tensor(f, dtype=torch.float32).to(device)\n                l_t = torch.tensor(l, dtype=torch.float32).to(device)\n                M_t = torch.tensor(get_concept_matrix(get_stock_index(inst)), dtype=torch.float32).to(device)\n                \n                optimizer.zero_grad()\n                y_hat, r_C_hat, M_sum, _ = self.model(f_t, M_t)\n                loss_main = F.mse_loss(y_hat, l_t)\n                y_target = (M_t.t() @ l_t.unsqueeze(1)).view(-1) / M_sum.view(-1)\n                active_mask = M_sum.view(-1) > 1e-7\n                loss_concept = F.mse_loss(r_C_hat[active_mask], y_target[active_mask]) if active_mask.sum() > 0 else 0.0\n                loss_corr = correlation_loss(y_hat, l_t) if len(l_t) > 1 else 0.0\n                \n                loss = loss_main + 0.5 * loss_concept + 0.1 * loss_corr\n                loss.backward()\n                torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=5.0)\n                optimizer.step()\n                \n            self.model.eval()\n            valid_loss = 0.0\n            with torch.no_grad():\n                for f, l, inst in valid_batches:\n                    f_t, l_t = torch.tensor(f, dtype=torch.float32).to(device), torch.tensor(l, dtype=torch.float32).to(device)\n                    M_t = torch.tensor(get_concept_matrix(get_stock_index(inst)), dtype=torch.float32).to(device)\n                    y_hat, _, _, _ = self.model(f_t, M_t)\n                    valid_loss += F.mse_loss(y_hat, l_t).item()\n            \n            valid_loss /= max(1, len(valid_batches))\n            if valid_loss < best_valid_loss:\n                best_valid_loss, best_state, patience_counter = valid_loss, {k: v.cpu().clone() for k, v in self.model.state_dict().items()}, 0\n            else:\n                patience_counter += 1\n            if patience_counter >= self.kwargs['patience']:\n                break\n                \n        if best_state: self.model.load_state_dict(best_state)\n        self.fitted = True\n\n    def predict(self, dataset: DatasetH, segment=\"test\"):\n        if not self.fitted: raise ValueError(\"Model is not fitted yet!\")\n        df_test = dataset.prepare(segment, col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_I)\n        device = torch.device(DEVICE)\n        self.model.eval()\n        daily_count = df_test.groupby(level=0).size().values\n        daily_index = np.roll(np.cumsum(daily_count), 1)\n        daily_index[0] = 0\n        features, instruments = np.nan_to_num(df_test[\"feature\"].values, nan=0.0), df_test.index.get_level_values(\"instrument\").values\n        preds = []\n        with torch.no_grad():\n            for idx, count in zip(daily_index, daily_count):\n                b_slice = slice(idx, idx + count)\n                f_t = torch.tensor(features[b_slice], dtype=torch.float32).to(device)\n                M_t = torch.tensor(get_concept_matrix(get_stock_index(instruments[b_slice])), dtype=torch.float32).to(device)\n                y_hat, _, _, _ = self.model(f_t, M_t)\n                preds.append(y_hat.cpu().numpy())\n        return pd.Series(np.concatenate(preds), index=df_test.index, name=\"score\")",
4  "end_line": 156,
5  "start_line": 58,
6  "op": "replace"
7}
Tool ResultStep 1
ERROR: Package 'custom_model.py' is not in allowed packages
AssistantStep 2edit
Tool Input
1{
2  "end_line": 156,
3  "content": "# =====================================================================\n# EDITABLE: CustomModel -- implement your stock prediction model here\n# =====================================================================\nclass DynamicConceptRoutingModel(nn.Module):\n    def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.1):\n        super().__init__()\n        self.d_feat = d_feat\n        self.hidden_size = hidden_size\n        self.rnn = nn.GRU(\n            input_size=d_feat,\n            hidden_size=hidden_size,\n            num_layers=num_layers,\n            batch_first=True,\n            dropout=dropout,\n        )\n        self.concept_mlp = nn.Sequential(\n            nn.Linear(hidden_size, hidden_size // 2),\n            nn.LeakyReLU(),\n            nn.Linear(hidden_size // 2, 1)\n        )\n        self.W_q = nn.Linear(hidden_size, hidden_size)\n        self.W_k = nn.Linear(hidden_size, hidden_size)\n        self.idio_mlp = nn.Sequential(\n            nn.Linear(hidden_size, hidden_size // 2),\n            nn.LeakyReLU(),\n            nn.Linear(hidden_size // 2, 1)\n        )\n        \n    def forward(self, x, M):\n        B = x.shape[0]\n        x = x.reshape(B, self.d_feat, -1)\n        x = x.permute(0, 2, 1)\n        out, _ = self.rnn(x)\n        H = out[:, -1, :]\n        \n        M_sum = M.sum(dim=0, keepdim=True) + 1e-8\n        C_states = (M.t() @ H) / M_sum.t()\n        \n        r_C_hat = self.concept_mlp(C_states).view(-1)\n        \n        Q = self.W_q(H)\n        K = self.W_k(C_states)\n        \n        scores = Q @ K.t() / (self.hidden_size ** 0.5)\n        scores = scores.masked_fill(M == 0, -1e9)\n        attention = torch.softmax(scores, dim=-1)\n        row_sum = M.sum(dim=1, keepdim=True)\n        attention = torch.where(row_sum > 0, attention, torch.zeros_like(attention))\n        \n        r_sys_hat = (attention * r_C_hat.unsqueeze(0)).sum(dim=1)\n        r_idio_hat = self.idio_mlp(H).view(-1)\n        \n        return r_idio_hat + r_sys_hat, r_C_hat, M_sum, attention\n\ndef correlation_loss(y_hat, y):\n    y_mean = y.mean()\n    y_hat_mean = y_hat.mean()\n    y_centered = y - y_mean\n    y_hat_centered = y_hat - y_hat_mean\n    cov = (y_centered * y_hat_centered).sum()\n    std_y = torch.sqrt((y_centered ** 2).sum() + 1e-8)\n    std_y_hat = torch.sqrt((y_hat_centered ** 2).sum() + 1e-8)\n    return -cov / (std_y * std_y_hat)\n\nclass CustomModel(Model):\n    def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.1, lr=2e-4, epochs=40, patience=6):\n        super().__init__()\n        self.fitted = False\n        self.kwargs = {\n            'd_feat': d_feat, 'hidden_size': hidden_size,\n            'num_layers': num_layers, 'dropout': dropout,\n            'lr': lr, 'epochs': epochs, 'patience': patience\n        }\n        self.model = None\n\n    def fit(self, dataset: DatasetH):\n        df_train = dataset.prepare(\"train\", col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_L)\n        df_valid = dataset.prepare(\"valid\", col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_L)\n\n        def get_batches(df):\n            daily_count = df.groupby(level=0).size().values\n            daily_index = np.roll(np.cumsum(daily_count), 1)\n            daily_index[0] = 0\n            features, labels = df[\"feature\"].values, df[\"label\"].values.ravel()\n            instruments = df.index.get_level_values(\"instrument\").values\n            batches = []\n            for idx, count in zip(daily_index, daily_count):\n                b_slice = slice(idx, idx + count)\n                f, l, inst = features[b_slice], labels[b_slice], instruments[b_slice]\n                mask = ~(np.isnan(f).any(axis=1) | np.isnan(l))\n                if mask.sum() > 0:\n                    batches.append((f[mask], l[mask], inst[mask]))\n            return batches\n\n        train_batches = get_batches(df_train)\n        valid_batches = get_batches(df_valid)\n        \n        device = torch.device(DEVICE)\n        self.model = DynamicConceptRoutingModel(\n            d_feat=self.kwargs['d_feat'], hidden_size=self.kwargs['hidden_size'],\n            num_layers=self.kwargs['num_layers'], dropout=self.kwargs['dropout']\n        ).to(device)\n        \n        import torch.optim as optim\n        optimizer = optim.Adam(self.model.parameters(), lr=self.kwargs['lr'])\n        \n        best_valid_loss, patience_counter, best_state = np.inf, 0, None\n        \n        for epoch in range(self.kwargs['epochs']):\n            self.model.train()\n            np.random.shuffle(train_batches)\n            for f, l, inst in train_batches:\n                f_t = torch.tensor(f, dtype=torch.float32).to(device)\n                l_t = torch.tensor(l, dtype=torch.float32).to(device)\n                M_t = torch.tensor(get_concept_matrix(get_stock_index(inst)), dtype=torch.float32).to(device)\n                \n                optimizer.zero_grad()\n                y_hat, r_C_hat, M_sum, _ = self.model(f_t, M_t)\n                loss_main = F.mse_loss(y_hat, l_t)\n                y_target = (M_t.t() @ l_t.unsqueeze(1)).view(-1) / M_sum.view(-1)\n                active_mask = M_sum.view(-1) > 1e-7\n                loss_concept = F.mse_loss(r_C_hat[active_mask], y_target[active_mask]) if active_mask.sum() > 0 else 0.0\n                loss_corr = correlation_loss(y_hat, l_t) if len(l_t) > 1 else 0.0\n                \n                loss = loss_main + 0.5 * loss_concept + 0.1 * loss_corr\n                loss.backward()\n                torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=5.0)\n                optimizer.step()\n                \n            self.model.eval()\n            valid_loss = 0.0\n            with torch.no_grad():\n                for f, l, inst in valid_batches:\n                    f_t, l_t = torch.tensor(f, dtype=torch.float32).to(device), torch.tensor(l, dtype=torch.float32).to(device)\n                    M_t = torch.tensor(get_concept_matrix(get_stock_index(inst)), dtype=torch.float32).to(device)\n                    y_hat, _, _, _ = self.model(f_t, M_t)\n                    valid_loss += F.mse_loss(y_hat, l_t).item()\n            \n            valid_loss /= max(1, len(valid_batches))\n            if valid_loss < best_valid_loss:\n                best_valid_loss, best_state, patience_counter = valid_loss, {k: v.cpu().clone() for k, v in self.model.state_dict().items()}, 0\n            else:\n                patience_counter += 1\n            if patience_counter >= self.kwargs['patience']:\n                break\n                \n        if best_state: self.model.load_state_dict(best_state)\n        self.fitted = True\n\n    def predict(self, dataset: DatasetH, segment=\"test\"):\n        if not self.fitted: raise ValueError(\"Model is not fitted yet!\")\n        df_test = dataset.prepare(segment, col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_I)\n        device = torch.device(DEVICE)\n        self.model.eval()\n        daily_count = df_test.groupby(level=0).size().values\n        daily_index = np.roll(np.cumsum(daily_count), 1)\n        daily_index[0] = 0\n        features, instruments = np.nan_to_num(df_test[\"feature\"].values, nan=0.0), df_test.index.get_level_values(\"instrument\").values\n        preds = []\n        with torch.no_grad():\n            for idx, count in zip(daily_index, daily_count):\n                b_slice = slice(idx, idx + count)\n                f_t = torch.tensor(features[b_slice], dtype=torch.float32).to(device)\n                M_t = torch.tensor(get_concept_matrix(get_stock_index(instruments[b_slice])), dtype=torch.float32).to(device)\n                y_hat, _, _, _ = self.model(f_t, M_t)\n                preds.append(y_hat.cpu().numpy())\n        return pd.Series(np.concatenate(preds), index=df_test.index, name=\"score\")",
4  "start_line": 58,
5  "op": "replace",
6  "filename": "qlib/custom_model.py"
7}
Tool ResultStep 2
OK: Replaced lines 58..156 with 167 line(s) in qlib/custom_model.py. Editable range: 58–224.

[Current file: qlib/custom_model.py | editable: 58–224 | total: 224 lines]
    55|     return _stock2concept_matrix[stock_indices].astype(np.float32)
    56| 
    57| 
    58  # =====================================================================
    59  # EDITABLE: CustomModel -- implement your stock prediction model here
    60  # =====================================================================
    61  class DynamicConceptRoutingModel(nn.Module):
    62      def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.1):
    63          super().__init__()
    64          self.d_feat = d_feat
    65          self.hidden_size = hidden_size
    66          self.rnn = nn.GRU(
    67              input_size=d_feat,
    68              hidden_size=hidden_size,
    69              num_layers=num_layers,
    70              batch_first=True,
    71              dropout=dropout,
    72
AssistantStep 3test
Tool ResultStep 3
[Test #1] (2 tests remaining; call submit(n=1) to submit this result)

### csi300 (scripts/csi300.sh)
Signal analysis (csi300):
SIGNAL_METRIC IC=0.023930
SIGNAL_METRIC ICIR=0.153082
SIGNAL_METRIC Rank_IC=0.038668
SIGNAL_METRIC Rank_ICIR=0.239620
Portfolio analysis (csi300):
PORTFOLIO_METRIC annualized_return=-0.015828
PORTFOLIO_METRIC max_drawdown=-0.184292
PORTFOLIO_METRIC information_ratio=-0.163553

### csi100 (scripts/csi100.sh)
Signal analysis (csi100):
SIGNAL_METRIC IC=0.037140
SIGNAL_METRIC ICIR=0.185649
SIGNAL_METRIC Rank_IC=0.046646
SIGNAL_METRIC Rank_ICIR=0.240916
Portfolio analysis (csi100):
PORTFOLIO_METRIC annualized_return=-0.040919
PORTFOLIO_METRIC max_drawdown=-0.227149
PORTFOLIO_METRIC information_ratio=-0.656261

[Leaderboard] Results saved: {'ic_csi300': 0.02393, 'icir_csi300': 0.153082, 'rank_ic_csi300': 0.038668, 'rank_icir_csi300': 0.23962, 'annualized_return_csi300': -0.015828, 'max_drawdown_csi300': -0.184292, 'information_ratio_csi300': -0.163553, 'elapsed_csi3
AssistantStep 4edit
Tool Input
1{
2  "end_line": 224,
3  "op": "replace",
4  "start_line": 58,
5  "content": "# =====================================================================\n# EDITABLE: CustomModel -- implement your stock prediction model here\n# =====================================================================\nclass AdvancedGATLayer(nn.Module):\n    def __init__(self, hidden_size, num_heads=4, dropout=0.2):\n        super().__init__()\n        self.num_heads = num_heads\n        self.head_dim = hidden_size // num_heads\n        self.hidden_size = hidden_size\n        \n        self.W_q = nn.Linear(hidden_size, hidden_size)\n        self.W_k = nn.Linear(hidden_size, hidden_size)\n        self.W_v = nn.Linear(hidden_size, hidden_size)\n        \n        self.out_proj = nn.Linear(hidden_size, hidden_size)\n        self.norm = nn.LayerNorm(hidden_size)\n        self.dropout = nn.Dropout(dropout)\n        \n    def forward(self, H, A):\n        B = H.shape[0]\n        q = self.W_q(H).view(B, self.num_heads, self.head_dim).transpose(0, 1) # [h, B, d]\n        k = self.W_k(H).view(B, self.num_heads, self.head_dim).transpose(0, 1)\n        v = self.W_v(H).view(B, self.num_heads, self.head_dim).transpose(0, 1)\n        \n        scores = q @ k.transpose(1, 2) / (self.head_dim ** 0.5) # [h, B, B]\n        \n        A_tilde = A + torch.eye(B, device=A.device)\n        mask = (A_tilde == 0).unsqueeze(0) # [1, B, B]\n        scores = scores.masked_fill(mask, -1e9)\n        \n        attn = F.softmax(scores, dim=-1)\n        attn = self.dropout(attn)\n        \n        out = attn @ v # [h, B, d]\n        out = out.transpose(0, 1).contiguous().view(B, self.hidden_size)\n        \n        out = self.out_proj(out)\n        return self.norm(H + F.gelu(out))\n\nclass FinalGraphPredictor(nn.Module):\n    def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.2):\n        super().__init__()\n        self.d_feat = d_feat\n        self.hidden_size = hidden_size\n        \n        self.rnn = nn.GRU(\n            input_size=d_feat,\n            hidden_size=hidden_size,\n            num_layers=num_layers,\n            batch_first=True,\n            dropout=dropout,\n        )\n        \n        self.gat1 = AdvancedGATLayer(hidden_size, num_heads=4, dropout=dropout)\n        self.gat2 = AdvancedGATLayer(hidden_size, num_heads=4, dropout=dropout)\n        \n        self.fc = nn.Sequential(\n            nn.Linear(hidden_size, hidden_size // 2),\n            nn.GELU(),\n            nn.Dropout(dropout),\n            nn.Linear(hidden_size // 2, 1)\n        )\n        \n    def forward(self, x, M):\n        B = x.shape[0]\n        x = x.reshape(B, self.d_feat, -1).permute(0, 2, 1)\n        out, _ = self.rnn(x)\n        H = out[:, -1, :]\n        \n        A = M @ M.t()\n        \n        H1 = self.gat1(H, A)\n        H2 = self.gat2(H1, A)\n        \n        return self.fc(H2).squeeze(-1)\n\ndef compute_ic_loss(y_hat, y):\n    if len(y) < 2: return torch.tensor(0.0, device=y.device)\n    y_mean = y.mean()\n    y_hat_mean = y_hat.mean()\n    y_c = y - y_mean\n    y_hat_c = y_hat - y_hat_mean\n    cov = (y_c * y_hat_c).mean()\n    var_y = (y_c ** 2).mean()\n    var_y_hat = (y_hat_c ** 2).mean()\n    return -cov / torch.sqrt(var_y * var_y_hat + 1e-8)\n\nclass CustomModel(Model):\n    def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.2, lr=2e-4, epochs=40, patience=6):\n        super().__init__()\n        self.fitted = False\n        self.kwargs = {\n            'd_feat': d_feat, 'hidden_size': hidden_size,\n            'num_layers': num_layers, 'dropout': dropout,\n            'lr': lr, 'epochs': epochs, 'patience': patience\n        }\n        self.model = None\n\n    def fit(self, dataset: DatasetH):\n        df_train = dataset.prepare(\"train\", col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_L)\n        df_valid = dataset.prepare(\"valid\", col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_L)\n\n        def get_batches(df):\n            daily_count = df.groupby(level=0).size().values\n            daily_index = np.roll(np.cumsum(daily_count), 1)\n            daily_index[0] = 0\n            features, labels = df[\"feature\"].values, df[\"label\"].values.ravel()\n            instruments = df.index.get_level_values(\"instrument\").values\n            batches = []\n            for idx, count in zip(daily_index, daily_count):\n                b_slice = slice(idx, idx + count)\n                f, l, inst = features[b_slice], labels[b_slice], instruments[b_slice]\n                mask = ~(np.isnan(f).any(axis=1) | np.isnan(l))\n                if mask.sum() > 0:\n                    batches.append((f[mask], l[mask], inst[mask]))\n            return batches\n\n        train_batches = get_batches(df_train)\n        valid_batches = get_batches(df_valid)\n        \n        device = torch.device(DEVICE)\n        self.model = FinalGraphPredictor(\n            d_feat=self.kwargs['d_feat'], hidden_size=self.kwargs['hidden_size'],\n            num_layers=self.kwargs['num_layers'], dropout=self.kwargs['dropout']\n        ).to(device)\n        \n        import torch.optim as optim\n        optimizer = optim.Adam(self.model.parameters(), lr=self.kwargs['lr'])\n        \n        best_valid_loss, patience_counter, best_state = np.inf, 0, None\n        \n        for epoch in range(self.kwargs['epochs']):\n            self.model.train()\n            np.random.shuffle(train_batches)\n            for f, l, inst in train_batches:\n                f_t = torch.tensor(f, dtype=torch.float32).to(device)\n                l_t = torch.tensor(l, dtype=torch.float32).to(device)\n                M_t = torch.tensor(get_concept_matrix(get_stock_index(inst)), dtype=torch.float32).to(device)\n                \n                optimizer.zero_grad()\n                y_hat = self.model(f_t, M_t)\n                loss_mse = F.mse_loss(y_hat, l_t)\n                loss_ic = compute_ic_loss(y_hat, l_t)\n                loss = loss_mse + 0.1 * loss_ic\n                \n                loss.backward()\n                torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=5.0)\n                optimizer.step()\n                \n            self.model.eval()\n            valid_loss = 0.0\n            with torch.no_grad():\n                for f, l, inst in valid_batches:\n                    f_t, l_t = torch.tensor(f, dtype=torch.float32).to(device), torch.tensor(l, dtype=torch.float32).to(device)\n                    M_t = torch.tensor(get_concept_matrix(get_stock_index(inst)), dtype=torch.float32).to(device)\n                    y_hat = self.model(f_t, M_t)\n                    valid_loss += (F.mse_loss(y_hat, l_t) + 0.1 * compute_ic_loss(y_hat, l_t)).item()\n            \n            valid_loss /= max(1, len(valid_batches))\n            if valid_loss < best_valid_loss:\n                best_valid_loss, best_state, patience_counter = valid_loss, {k: v.cpu().clone() for k, v in self.model.state_dict().items()}, 0\n            else:\n                patience_counter += 1\n            if patience_counter >= self.kwargs['patience']:\n                break\n                \n        if best_state: self.model.load_state_dict(best_state)\n        self.fitted = True\n\n    def predict(self, dataset: DatasetH, segment=\"test\"):\n        if not self.fitted: raise ValueError(\"Model is not fitted yet!\")\n        df_test = dataset.prepare(segment, col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_I)\n        device = torch.device(DEVICE)\n        self.model.eval()\n        daily_count = df_test.groupby(level=0).size().values\n        daily_index = np.roll(np.cumsum(daily_count), 1)\n        daily_index[0] = 0\n        features, instruments = np.nan_to_num(df_test[\"feature\"].values, nan=0.0), df_test.index.get_level_values(\"instrument\").values\n        preds = []\n        with torch.no_grad():\n            for idx, count in zip(daily_index, daily_count):\n                b_slice = slice(idx, idx + count)\n                f_t = torch.tensor(features[b_slice], dtype=torch.float32).to(device)\n                M_t = torch.tensor(get_concept_matrix(get_stock_index(instruments[b_slice])), dtype=torch.float32).to(device)\n                y_hat = self.model(f_t, M_t)\n                preds.append(y_hat.cpu().numpy())\n        return pd.Series(np.concatenate(preds), index=df_test.index, name=\"score\")",
6  "filename": "qlib/custom_model.py"
7}
Tool ResultStep 4
OK: Replaced lines 58..224 with 187 line(s) in qlib/custom_model.py. Editable range: 58–244.

[Current file: qlib/custom_model.py | editable: 58–244 | total: 244 lines]
    55|     return _stock2concept_matrix[stock_indices].astype(np.float32)
    56| 
    57| 
    58  # =====================================================================
    59  # EDITABLE: CustomModel -- implement your stock prediction model here
    60  # =====================================================================
    61  class AdvancedGATLayer(nn.Module):
    62      def __init__(self, hidden_size, num_heads=4, dropout=0.2):
    63          super().__init__()
    64          self.num_heads = num_heads
    65          self.head_dim = hidden_size // num_heads
    66          self.hidden_size = hidden_size
    67          
    68          self.W_q = nn.Linear(hidden_size, hidden_size)
    69          self.W_k = nn.Linear(hidden_size, hidden_size)
    70          self.W_v = nn.Linear(hidden_size, hidden_size
AssistantStep 5test
Tool ResultStep 5
[Test #2] (1 test remaining; call submit(n=2) to submit this result)

### csi300 (scripts/csi300.sh)
Signal analysis (csi300):
SIGNAL_METRIC IC=0.039799
SIGNAL_METRIC ICIR=0.303068
SIGNAL_METRIC Rank_IC=0.052033
SIGNAL_METRIC Rank_ICIR=0.401982
Portfolio analysis (csi300):
PORTFOLIO_METRIC annualized_return=0.011250
PORTFOLIO_METRIC max_drawdown=-0.080135
PORTFOLIO_METRIC information_ratio=0.163689

### csi100 (scripts/csi100.sh)
Signal analysis (csi100):
SIGNAL_METRIC IC=0.042470
SIGNAL_METRIC ICIR=0.241989
SIGNAL_METRIC Rank_IC=0.049575
SIGNAL_METRIC Rank_ICIR=0.285902
Portfolio analysis (csi100):
PORTFOLIO_METRIC annualized_return=0.019746
PORTFOLIO_METRIC max_drawdown=-0.130572
PORTFOLIO_METRIC information_ratio=0.356220

[Leaderboard] Results saved: {'ic_csi300': 0.039799, 'icir_csi300': 0.303068, 'rank_ic_csi300': 0.052033, 'rank_icir_csi300': 0.401982, 'annualized_return_csi300': 0.01125, 'max_drawdown_csi300': -0.080135, 'information_ratio_csi300': 0.163689, 'elapsed_csi300': 3
AssistantStep 6edit
Tool Input
1{
2  "content": "# =====================================================================\n# EDITABLE: CustomModel -- implement your stock prediction model here\n# =====================================================================\nclass TFIDFGATLayer(nn.Module):\n    def __init__(self, hidden_size, num_heads=4, dropout=0.2):\n        super().__init__()\n        self.num_heads = num_heads\n        self.head_dim = hidden_size // num_heads\n        self.hidden_size = hidden_size\n        \n        self.W_q = nn.Linear(hidden_size, hidden_size)\n        self.W_k = nn.Linear(hidden_size, hidden_size)\n        self.W_v = nn.Linear(hidden_size, hidden_size)\n        \n        self.structure_weight = nn.Parameter(torch.ones(num_heads, 1, 1))\n        \n        self.out_proj = nn.Linear(hidden_size, hidden_size)\n        self.norm = nn.LayerNorm(hidden_size)\n        self.dropout = nn.Dropout(dropout)\n        \n    def forward(self, H, A_tfidf):\n        B = H.shape[0]\n        q = self.W_q(H).view(B, self.num_heads, self.head_dim).transpose(0, 1) # [h, B, d]\n        k = self.W_k(H).view(B, self.num_heads, self.head_dim).transpose(0, 1)\n        v = self.W_v(H).view(B, self.num_heads, self.head_dim).transpose(0, 1)\n        \n        scores = q @ k.transpose(1, 2) / (self.head_dim ** 0.5) # [h, B, B]\n        \n        struct_bias = A_tfidf.unsqueeze(0) # [1, B, B]\n        scores = scores + self.structure_weight * struct_bias\n        \n        A_mask = A_tfidf + torch.eye(B, device=A_tfidf.device)\n        mask = (A_mask == 0).unsqueeze(0)\n        scores = scores.masked_fill(mask, -1e9)\n        \n        attn = F.softmax(scores, dim=-1)\n        attn = self.dropout(attn)\n        \n        out = attn @ v # [h, B, d]\n        out = out.transpose(0, 1).contiguous().view(B, self.hidden_size)\n        \n        out = self.out_proj(out)\n        return self.norm(H + F.gelu(out))\n\nclass FinalGraphPredictor(nn.Module):\n    def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.2):\n        super().__init__()\n        self.d_feat = d_feat\n        self.hidden_size = hidden_size\n        \n        self.rnn = nn.GRU(\n            input_size=d_feat,\n            hidden_size=hidden_size,\n            num_layers=num_layers,\n            batch_first=True,\n            dropout=dropout,\n        )\n        \n        self.gat1 = TFIDFGATLayer(hidden_size, num_heads=4, dropout=dropout)\n        self.gat2 = TFIDFGATLayer(hidden_size, num_heads=4, dropout=dropout)\n        \n        self.fc = nn.Sequential(\n            nn.Linear(hidden_size * 2, hidden_size),\n            nn.GELU(),\n            nn.Dropout(dropout),\n            nn.Linear(hidden_size, 1)\n        )\n        \n    def forward(self, x, M):\n        B = x.shape[0]\n        x = x.reshape(B, self.d_feat, -1).permute(0, 2, 1)\n        out, _ = self.rnn(x)\n        H = out[:, -1, :]\n        \n        concept_df = M.sum(dim=0) + 1.0\n        idf = torch.log((B + 1.0) / concept_df)\n        M_tfidf = M * idf.unsqueeze(0)\n        \n        A_tfidf = M_tfidf @ M_tfidf.t()\n        row_max = A_tfidf.max(dim=1, keepdim=True)[0] + 1e-8\n        A_tfidf = A_tfidf / row_max\n        \n        H1 = self.gat1(H, A_tfidf)\n        H2 = self.gat2(H1, A_tfidf)\n        \n        combined = torch.cat([H, H2], dim=-1)\n        return self.fc(combined).squeeze(-1)\n\ndef compute_ic_loss(y_hat, y):\n    if len(y) < 2: return torch.tensor(0.0, device=y.device)\n    y_mean = y.mean()\n    y_hat_mean = y_hat.mean()\n    y_c = y - y_mean\n    y_hat_c = y_hat - y_hat_mean\n    cov = (y_c * y_hat_c).mean()\n    var_y = (y_c ** 2).mean()\n    var_y_hat = (y_hat_c ** 2).mean()\n    return -cov / torch.sqrt(var_y * var_y_hat + 1e-8)\n\nclass CustomModel(Model):\n    def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.2, lr=2e-4, epochs=40, patience=6):\n        super().__init__()\n        self.fitted = False\n        self.kwargs = {\n            'd_feat': d_feat, 'hidden_size': hidden_size,\n            'num_layers': num_layers, 'dropout': dropout,\n            'lr': lr, 'epochs': epochs, 'patience': patience\n        }\n        self.model = None\n\n    def fit(self, dataset: DatasetH):\n        df_train = dataset.prepare(\"train\", col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_L)\n        df_valid = dataset.prepare(\"valid\", col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_L)\n\n        def get_batches(df):\n            daily_count = df.groupby(level=0).size().values\n            daily_index = np.roll(np.cumsum(daily_count), 1)\n            daily_index[0] = 0\n            features, labels = df[\"feature\"].values, df[\"label\"].values.ravel()\n            instruments = df.index.get_level_values(\"instrument\").values\n            batches = []\n            for idx, count in zip(daily_index, daily_count):\n                b_slice = slice(idx, idx + count)\n                f, l, inst = features[b_slice], labels[b_slice], instruments[b_slice]\n                mask = ~(np.isnan(f).any(axis=1) | np.isnan(l))\n                if mask.sum() > 0:\n                    batches.append((f[mask], l[mask], inst[mask]))\n            return batches\n\n        train_batches = get_batches(df_train)\n        valid_batches = get_batches(df_valid)\n        \n        device = torch.device(DEVICE)\n        self.model = FinalGraphPredictor(\n            d_feat=self.kwargs['d_feat'], hidden_size=self.kwargs['hidden_size'],\n            num_layers=self.kwargs['num_layers'], dropout=self.kwargs['dropout']\n        ).to(device)\n        \n        import torch.optim as optim\n        optimizer = optim.Adam(self.model.parameters(), lr=self.kwargs['lr'])\n        \n        best_valid_loss, patience_counter, best_state = np.inf, 0, None\n        \n        for epoch in range(self.kwargs['epochs']):\n            self.model.train()\n            np.random.shuffle(train_batches)\n            for f, l, inst in train_batches:\n                f_t = torch.tensor(f, dtype=torch.float32).to(device)\n                l_t = torch.tensor(l, dtype=torch.float32).to(device)\n                M_t = torch.tensor(get_concept_matrix(get_stock_index(inst)), dtype=torch.float32).to(device)\n                \n                optimizer.zero_grad()\n                y_hat = self.model(f_t, M_t)\n                loss_mse = F.mse_loss(y_hat, l_t)\n                loss_ic = compute_ic_loss(y_hat, l_t)\n                loss = loss_mse + 0.1 * loss_ic\n                \n                loss.backward()\n                torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=5.0)\n                optimizer.step()\n                \n            self.model.eval()\n            valid_loss = 0.0\n            with torch.no_grad():\n                for f, l, inst in valid_batches:\n                    f_t, l_t = torch.tensor(f, dtype=torch.float32).to(device), torch.tensor(l, dtype=torch.float32).to(device)\n                    M_t = torch.tensor(get_concept_matrix(get_stock_index(inst)), dtype=torch.float32).to(device)\n                    y_hat = self.model(f_t, M_t)\n                    valid_loss += (F.mse_loss(y_hat, l_t) + 0.1 * compute_ic_loss(y_hat, l_t)).item()\n            \n            valid_loss /= max(1, len(valid_batches))\n            if valid_loss < best_valid_loss:\n                best_valid_loss, best_state, patience_counter = valid_loss, {k: v.cpu().clone() for k, v in self.model.state_dict().items()}, 0\n            else:\n                patience_counter += 1\n            if patience_counter >= self.kwargs['patience']:\n                break\n                \n        if best_state: self.model.load_state_dict(best_state)\n        self.fitted = True\n\n    def predict(self, dataset: DatasetH, segment=\"test\"):\n        if not self.fitted: raise ValueError(\"Model is not fitted yet!\")\n        df_test = dataset.prepare(segment, col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_I)\n        device = torch.device(DEVICE)\n        self.model.eval()\n        daily_count = df_test.groupby(level=0).size().values\n        daily_index = np.roll(np.cumsum(daily_count), 1)\n        daily_index[0] = 0\n        features, instruments = np.nan_to_num(df_test[\"feature\"].values, nan=0.0), df_test.index.get_level_values(\"instrument\").values\n        preds = []\n        with torch.no_grad():\n            for idx, count in zip(daily_index, daily_count):\n                b_slice = slice(idx, idx + count)\n                f_t = torch.tensor(features[b_slice], dtype=torch.float32).to(device)\n                M_t = torch.tensor(get_concept_matrix(get_stock_index(instruments[b_slice])), dtype=torch.float32).to(device)\n                y_hat = self.model(f_t, M_t)\n                preds.append(y_hat.cpu().numpy())\n        return pd.Series(np.concatenate(preds), index=df_test.index, name=\"score\")",
3  "end_line": 244,
4  "filename": "qlib/custom_model.py",
5  "op": "replace",
6  "start_line": 58
7}
Tool ResultStep 6
OK: Replaced lines 58..244 with 199 line(s) in qlib/custom_model.py. Editable range: 58–256.

[Current file: qlib/custom_model.py | editable: 58–256 | total: 256 lines]
    55|     return _stock2concept_matrix[stock_indices].astype(np.float32)
    56| 
    57| 
    58  # =====================================================================
    59  # EDITABLE: CustomModel -- implement your stock prediction model here
    60  # =====================================================================
    61  class TFIDFGATLayer(nn.Module):
    62      def __init__(self, hidden_size, num_heads=4, dropout=0.2):
    63          super().__init__()
    64          self.num_heads = num_heads
    65          self.head_dim = hidden_size // num_heads
    66          self.hidden_size = hidden_size
    67          
    68          self.W_q = nn.Linear(hidden_size, hidden_size)
    69          self.W_k = nn.Linear(hidden_size, hidden_size)
    70          self.W_v = nn.Linear(hidden_size, hidden_size)
 
AssistantStep 7test
AssistantStep 1edit
Tool Input
1{
2  "start_line": 33,
3  "after_line": 32,
4  "op": "replace",
5  "content": "          learn_processors:\n            - class: DropnaLabel\n            - class: CSRankNorm\n              kwargs:\n                fields_group: label\n            - class: RobustZScoreNorm\n              kwargs:\n                fields_group: feature\n                clip_outlier: true\n            - class: Fillna\n              kwargs:\n                fields_group: feature",
6  "filename": "qlib/workflow_config.yaml",
7  "end_line": 45
8}
Tool ResultStep 1
OK: Replaced lines 33..45 with 12 line(s) in qlib/workflow_config.yaml. Editable range: 14–26, 32–44.

[Current file: qlib/workflow_config.yaml | editable: 14–26 | total: 82 lines]
    11|     - "."           # So custom_model.py is importable via module_path
    12| 
    13| task:
    14    model:
    15      class: CustomModel
    16      module_path: custom_model
    17      kwargs: {}
    18  
    19    dataset:
    20      class: DatasetH
    21      module_path: qlib.data.dataset
    22      kwargs:
    23        handler:
    24          class: Alpha360
    25          module_path: qlib.contrib.data.handler
    26          kwargs:
    27|           start_time: "2008-01-01"
    28|           end_time: "2020-08-01"
    29|           fit_start_time: "2008-01-01"
AssistantStep 2undo
Tool Input
1{
2  "n": 1
3}
Tool ResultStep 2
Undo complete:
Restored: qlib/workflow_config.yaml
AssistantStep 3edit
Tool Input
1{
2  "content": "# =====================================================================\n# EDITABLE: CustomModel — implement your stock prediction model here\n# =====================================================================\nimport copy\n\nclass DualGraphGNN(nn.Module):\n    def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.2):\n        super().__init__()\n        self.d_feat = d_feat\n        self.rnn = nn.GRU(\n            input_size=d_feat,\n            hidden_size=hidden_size,\n            num_layers=num_layers,\n            batch_first=True,\n            dropout=dropout if num_layers > 1 else 0.0,\n        )\n        \n        # Static concept graph weights\n        self.gnn_w1_static = nn.Linear(hidden_size, hidden_size)\n        self.gnn_w2_static = nn.Linear(hidden_size, hidden_size)\n        \n        # Dynamic feature graph weights \n        self.gnn_w1_dynamic = nn.Linear(hidden_size, hidden_size)\n        self.gnn_w2_dynamic = nn.Linear(hidden_size, hidden_size)\n        \n        self.norm1 = nn.LayerNorm(hidden_size)\n        self.norm2 = nn.LayerNorm(hidden_size)\n        \n        self.stock_emb = nn.Embedding(1000, 16) # Fallback idx=733\n        \n        self.out_proj = nn.Sequential(\n            nn.Linear(hidden_size + 16, hidden_size // 2),\n            nn.GELU(),\n            nn.Dropout(dropout),\n            nn.Linear(hidden_size // 2, 1)\n        )\n        \n    def forward(self, x, c_mat, s_idx):\n        N = x.shape[0]\n        x = x.reshape(N, self.d_feat, -1).permute(0, 2, 1)\n        \n        out, _ = self.rnn(x)\n        h = out[:, -1, :] # [N, H]\n        \n        # --- Static Concept Graph ---\n        A = torch.mm(c_mat, c_mat.t()) # [N, N]\n        A = A + torch.eye(N, device=A.device)\n        D = A.sum(dim=1, keepdim=True)\n        D_inv_sqrt = torch.pow(D, -0.5)\n        D_inv_sqrt[torch.isinf(D_inv_sqrt)] = 0.0\n        S_static = D_inv_sqrt * A * D_inv_sqrt.t()\n        \n        # --- Dynamic Feature Graph ---\n        h_norm = F.normalize(h, p=2, dim=1)\n        sim_mat = torch.mm(h_norm, h_norm.t())\n        sim_adj = F.relu(sim_mat)\n        D_sim = sim_adj.sum(dim=1, keepdim=True).clamp_min(1e-6)\n        S_dynamic = sim_adj / D_sim\n        \n        # Layer 1\n        msg1_s = torch.matmul(S_static, self.gnn_w1_static(h))\n        msg1_d = torch.matmul(S_dynamic, self.gnn_w1_dynamic(h))\n        h1 = h + F.gelu(msg1_s + msg1_d)\n        h1 = self.norm1(h1)\n        \n        # Layer 2\n        msg2_s = torch.matmul(S_static, self.gnn_w2_static(h1))\n        msg2_d = torch.matmul(S_dynamic, self.gnn_w2_dynamic(h1))\n        h2 = h1 + F.gelu(msg2_s + msg2_d)\n        h2 = self.norm2(h2)\n        \n        emb = self.stock_emb(s_idx)\n        h_cat = torch.cat([h2, emb], dim=-1)\n        pred = self.out_proj(h_cat).squeeze(-1)\n        return pred\n\nclass CustomModel(Model):\n    def __init__(self):\n        super().__init__()\n        self.fitted = False\n        self.model = DualGraphGNN().to(DEVICE)\n\n    def get_daily_batches(self, df):\n        batches = []\n        for date, group in df.groupby(level=\"datetime\"):\n            feats = group[\"feature\"].values\n            labs = group[\"label\"].values.ravel()\n            insts = group.index.get_level_values(\"instrument\").values\n            \n            mask = ~np.isnan(labs)\n            feats = feats[mask]\n            labs = labs[mask]\n            insts = insts[mask]\n            \n            if len(labs) > 1:\n                feats = np.nan_to_num(feats, nan=0.0)\n                batches.append((feats, labs, insts))\n        return batches\n\n    def fit(self, dataset: DatasetH):\n        df_train = dataset.prepare(\n            \"train\", col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_L\n        )\n        df_valid = dataset.prepare(\n            \"valid\", col_set=[\"feature\", \"label\"], data_key=DataHandlerLP.DK_L\n        )\n        \n        train_batches = self.get_daily_batches(df_train)\n        valid_batches = self.get_daily_batches(df_valid)\n        \n        optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-3, weight_decay=1e-5)\n        \n        n_epochs = 40\n        patience = 8\n        best_valid_loss = np.inf\n        no_improve = 0\n        best_state = None\n        \n        for epoch in range(n_epochs):\n            self.model.train()\n            np.random.shuffle(train_batches)\n            train_loss = 0.0\n            \n            for b_feat, b_lab, b_inst in train_batches:\n                b_feat = torch.tensor(b_feat, dtype=torch.float32).to(DEVICE)\n                b_lab = torch.tensor(b_lab, dtype=torch.float32).to(DEVICE)\n                stock_idx_np = get_stock_index(b_inst)\n                stock_idx = torch.tensor(stock_idx_np, dtype=torch.long).to(DEVICE)\n                c_mat = torch.tensor(get_concept_matrix(stock_idx_np), dtype=torch.float32).to(DEVICE)\n                \n                optimizer.zero_grad()\n                pred = self.model(b_feat, c_mat, stock_idx)\n                loss = F.mse_loss(pred, b_lab)\n                \n                loss.backward()\n                torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)\n                optimizer.step()\n                train_loss += loss.item()\n                \n            self.model.eval()\n            val_loss = 0.0\n            with torch.no_grad():\n                for b_feat, b_lab, b_inst in valid_batches:\n                    b_feat = torch.tensor(b_feat, dtype=torch.float32).to(DEVICE)\n                    b_lab = torch.tensor(b_lab, dtype=torch.float32).to(DEVICE)\n                    stock_idx_np = get_stock_index(b_inst)\n                    stock_idx = torch.tensor(stock_idx_np, dtype=torch.long).to(DEVICE)\n                    c_mat = torch.tensor(get_concept_matrix(stock_idx_np), dtype=torch.float32).to(DEVICE)\n                    \n                    pred = self.model(b_feat, c_mat, stock_idx)\n                    loss = F.mse_loss(pred, b_lab)\n                    val_loss += loss.item()\n                    \n            val_loss /= max(1, len(valid_batches))\n            print(f\"Epoch {epoch}, Val Loss: {val_loss:.6f}\")\n            \n            if val_loss < best_valid_loss:\n                best_valid_loss = val_loss\n                no_improve = 0\n                best_state = copy.deepcopy(self.model.state_dict())\n            else:\n                no_improve += 1\n                \n            if no_improve >= patience:\n                print(f\"Early stopping at epoch {epoch}\")\n                break\n                \n        if best_state is not None:\n            self.model.load_state_dict(best_state)\n        self.fitted = True\n\n    def predict(self, dataset: DatasetH, segment=\"test\"):\n        if not self.fitted:\n            raise ValueError(\"Model is not fitted yet!\")\n            \n        df_test = dataset.prepare(\n            segment, col_set=[\"feature\"], data_key=DataHandlerLP.DK_I\n        )\n        if df_test.empty:\n            return pd.Series(dtype=np.float32)\n\n        self.model.eval()\n        preds = []\n        idx = []\n        \n        for date, group in df_test.groupby(level=\"datetime\"):\n            feats = group[\"feature\"].values\n            feats = np.nan_to_num(feats, nan=0.0)\n            insts = group.index.get_level_values(\"instrument\").values\n            \n            b_feat = torch.tensor(feats, dtype=torch.float32).to(DEVICE)\n            stock_idx_np = get_stock_index(insts)\n            stock_idx = torch.tensor(stock_idx_np, dtype=torch.long).to(DEVICE)\n            c_mat = torch.tensor(get_concept_matrix(stock_idx_np), dtype=torch.float32).to(DEVICE)\n            \n            with torch.no_grad():\n                pred = self.model(b_feat, c_mat, stock_idx)\n                \n            preds.extend(pred.cpu().numpy())\n            idx.extend(group.index)\n            \n        return pd.Series(preds, index=pd.MultiIndex.from_tuples(idx, names=df_test.index.names))",
3  "op": "replace",
4  "filename": "qlib/custom_model.py",
5  "start_line": 58,
6  "after_line": 57,
7  "end_line": 156
8}
Tool ResultStep 3
OK: Replaced lines 58..156 with 202 line(s) in qlib/custom_model.py. Editable range: 58–259.

[Current file: qlib/custom_model.py | editable: 58–259 | total: 259 lines]
    55|     return _stock2concept_matrix[stock_indices].astype(np.float32)
    56| 
    57| 
    58  # =====================================================================
    59  # EDITABLE: CustomModel — implement your stock prediction model here
    60  # =====================================================================
    61  import copy
    62  
    63  class DualGraphGNN(nn.Module):
    64      def __init__(self, d_feat=6, hidden_size=64, num_layers=2, dropout=0.2):
    65          super().__init__()
    66          self.d_feat = d_feat
    67          self.rnn = nn.GRU(
    68              input_size=d_feat,
    69              hidden_size=hidden_size,
    70              num_layers=num_layers,
    71              batch_first=True,
    72              dropout=dropout if num_layers > 1 else 0.0,
    73