Agent Conversation: deepseek-reasoner
Model: deepseek-reasoner
Total steps: 6
Messages: 13
UserStep 0
Task: ts-imputation
Time Series Imputation: Custom Model Design
Objective
Design and implement a custom deep learning model for time series missing value imputation. Your code goes in the Model class in models/Custom.py. Three reference implementations (DLinear, TimesNet, PatchTST) are provided as read-only.
Evaluation
Trained and evaluated on three multivariate datasets with 25% random masking:
- ETTh1 (7 variables)
- Weather (21 variables)
- ECL (321 variables)
All use seq_len=96. Metrics: MSE and MAE on masked regions only (lower is better).
Time-Series-Library/models/Custom.py [EDITABLE — entire file only]
1: import torch
2: import torch.nn as nn
3:
4:
5: class Model(nn.Module):
6: """
7: Custom model for time series imputation.
8:
9: Forward signature: forward(x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None)
10: - x_enc: [batch, seq_len, enc_in] — input with masked values set to 0
11: - x_mark_enc: [batch, seq_len, time_features] — time feature encoding
12: - x_dec: not used for imputation (None)
13: - x_mark_dec: not used for imputation (None)
14: - mask: [batch, seq_len, enc_in] — binary mask (1=observed, 0=masked)
15:
16: Must return: [batch, seq_len, enc_in] — reconstructed sequence
17: """
18:
19: def __init__(self, configs):
20: super(Model, self).__init__()
21: self.task_name = configs.task_name
22: self.seq_len = configs.seq_len
23: self.pred_len = configs.seq_len # For imputation, pred_len = seq_len
24: self.enc_in = configs.enc_in
25: # TODO: Define your model architecture here
26:
27: def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
28: """
29: Imputation: reconstruct missing values in the input sequence.
30: Input: x_enc [batch, seq_len, enc_in] with zeros at masked positions
31: Mask: [batch, seq_len, enc_in], 1=observed, 0=masked
32: Output: [batch, seq_len, enc_in]
33: """
34: # TODO: Implement your imputation logic
35: return x_enc # Placeholder: return input as-is
36:
37: def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
38: if self.task_name == 'imputation':
39: dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
40: return dec_out
41: return None
Time-Series-Library/models/DLinear.py [READ-ONLY — do not edit]
1: import torch
2: import torch.nn as nn
3: import torch.nn.functional as F
4: from layers.Autoformer_EncDec import series_decomp
5:
6:
7: class Model(nn.Module):
8: """
9: Paper link: https://arxiv.org/pdf/2205.13504.pdf
10: """
11:
12: def __iniAssistantStep 1edit
Tool Input
1{
2 "op": "replace",
3 "filename": "Time-Series-Library/models/Custom.py",
4 "start_line": 1,
5 "end_line": 41,
6 "content": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom layers.Embed import DataEmbedding\nfrom layers.Transformer_EncDec import Encoder, EncoderLayer\nfrom layers.SelfAttention_Family import FullAttention, AttentionLayer\n\n\nclass Model(nn.Module):\n \"\"\"\n Custom model for time series imputation.\n Forward signature: forward(x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None)\n - x_enc: [batch, seq_len, enc_in] — input with masked values set to 0\n - x_mark_enc: [batch, seq_len, time_features] — time feature encoding\n - x_dec: not used for imputation (None)\n - x_mark_dec: not used for imputation (None)\n - mask: [batch, seq_len, enc_in] — binary mask (1=observed, 0=masked)\n\n Must return: [batch, seq_len, enc_in] — reconstructed sequence\n \"\"\"\n\n def __init__(self, configs):\n super(Model, self).__init__()\n self.task_name = configs.task_name\n self.seq_len = configs.seq_len\n self.pred_len = configs.seq_len # For imputation, pred_len = seq_len\n self.enc_in = configs.enc_in\n self.d_model = configs.d_model\n self.dropout = configs.dropout\n \n # Mask embedding: project mask to same dimension as values\n self.mask_embedding = nn.Linear(self.enc_in, self.d_model)\n \n # Value embedding\n self.value_embedding = nn.Linear(self.enc_in, self.d_model)\n \n # Time feature embedding (using DataEmbedding from layers)\n self.time_embedding = DataEmbedding(self.enc_in, self.d_model, \n configs.embed, configs.freq, \n self.dropout)\n \n # Transformer encoder with mask-aware attention\n self.encoder = Encoder(\n [\n EncoderLayer(\n AttentionLayer(\n FullAttention(False, configs.factor, \n attention_dropout=configs.dropout,\n output_attention=False),\n self.d_model, configs.n_heads),\n self.d_model,\n configs.d_ff,\n dropout=configs.dropout,\n activation=configs.activation\n ) for _ in range(configs.e_layers)\n ],\n norm_layer=nn.LayerNorm(self.d_model)\n )\n \n # Projection back to original dimension\n self.projection = nn.Linear(self.d_model, self.enc_in)\n \n # Residual refinement layers (2 extra steps)\n self.refine_layers = nn.ModuleList([\n nn.Linear(self.enc_in, self.enc_in) for _ in range(2)\n ])\n \n # Uncertainty estimation head (predicts log variance)\n self.uncertainty_head = nn.Linear(self.d_model, self.enc_in)\n\n def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):\n \"\"\"\n Imputation: reconstruct missing values in the input sequence.\n Input: x_enc [batch, seq_len, enc_in] with zeros at masked positions\n Mask: [batch, seq_len, enc_in], 1=observed, 0=masked\n Output: [batch, seq_len, enc_in]\n \"\"\"\n batch_size, seq_len, n_vars = x_enc.shape\n \n # Normalization using observed values only (similar to TimesNet)\n means = torch.sum(x_enc, dim=1) / torch.sum(mask == 1, dim=1)\n means = means.unsqueeze(1).detach()\n x_enc = x_enc - means\n x_enc = x_enc.masked_fill(mask == 0, 0)\n stdev = torch.sqrt(torch.sum(x_enc * x_enc, dim=1) /\n torch.sum(mask == 1, dim=1) + 1e-5)\n stdev = stdev.unsqueeze(1).detach()\n x_enc = x_enc / stdev\n \n # Encode mask information\n mask_embed = self.mask_embedding(mask.float())\n \n # Encode value information\n value_embed = self.value_embedding(x_enc)\n \n # Combine value and mask embeddings (additive)\n x_combined = value_embed + mask_embed\n \n # Add time embedding if time features available\n if x_mark_enc is not None:\n time_embed = self.time_embedding(x_enc, x_mark_enc)\n x_combined = x_combined + time_embed\n \n # Pass through transformer encoder\n enc_out, _ = self.encoder(x_combined)\n \n # Project back to variable space\n dec_out = self.projection(enc_out)\n \n # Uncertainty estimation (for possible future use)\n # uncertainty = self.uncertainty_head(enc_out)\n \n # Residual refinement (2 steps)\n for layer in self.refine_layers:\n residual = layer(dec_out)\n dec_out = dec_out + 0.1 * residual # Small step\n \n # De-normalization\n dec_out = dec_out * stdev\n dec_out = dec_out + means\n \n # Keep observed values unchanged (optional)\n # dec_out = dec_out * (1 - mask) + x_enc * mask\n \n return dec_out\n\n def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):\n if self.task_name == 'imputation':\n dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)\n return dec_out\n return None"
7}Tool ResultStep 1
OK: Replaced lines 1..41 with 131 line(s) in Time-Series-Library/models/Custom.py. Editable range: entire file.
[Current file: Time-Series-Library/models/Custom.py | editable: 1–131 | total: 131 lines]
1 import torch
2 import torch.nn as nn
3 import torch.nn.functional as F
4 from layers.Embed import DataEmbedding
5 from layers.Transformer_EncDec import Encoder, EncoderLayer
6 from layers.SelfAttention_Family import FullAttention, AttentionLayer
7
8
9 class Model(nn.Module):
10 """
11 Custom model for time series imputation.
12 Forward signature: forward(x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None)
13 - x_enc: [batch, seq_len, enc_in] — input with masked values set to 0
14 - x_mark_enc: [batch, seq_len, time_features] — time feature encoding
15 - x_dec: not used for imputation (None)
16 - x_mark_dec: not used for imputation (None)
17 - mask: [batch, seq_l
AssistantStep 2test
Tool ResultStep 2
[Test #1] (2 tests remaining; call submit(n=1) to submit this result) ### ETTh1 (scripts/etth1.sh) Training metrics (last epochs): TRAIN_METRICS epoch=6 train_loss=0.0314556 vali_loss=0.0544689 test_loss=0.0413540 TRAIN_METRICS epoch=7 train_loss=0.0309600 vali_loss=0.0542433 test_loss=0.0401134 TRAIN_METRICS epoch=8 train_loss=0.0304932 vali_loss=0.0530166 test_loss=0.0393081 TRAIN_METRICS epoch=9 train_loss=0.0301576 vali_loss=0.0533328 test_loss=0.0390623 TRAIN_METRICS epoch=10 train_loss=0.0301334 vali_loss=0.0534223 test_loss=0.0391408 Test results (ETTh1): MSE: 0.039453, MAE: 0.130379 ### Weather (scripts/weather.sh) Training metrics (last epochs): TRAIN_METRICS epoch=6 train_loss=0.1273184 vali_loss=0.0876791 test_loss=0.0304748 TRAIN_METRICS epoch=7 train_loss=0.1263281 vali_loss=0.0839318 test_loss=0.0298363 TRAIN_METRICS epoch=8 train_loss=0.1327198 vali_loss=0.0731080 test_loss=0.0295672 TRAIN_METRICS epoch=9 train_loss=0.1218260 vali_loss=0.0776822 test_loss=0.0294062 T
AssistantStep 3edit
Tool Input
1{
2 "op": "replace",
3 "filename": "Time-Series-Library/models/Custom.py",
4 "start_line": 1,
5 "end_line": 131,
6 "content": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom layers.Embed import PatchEmbedding\nfrom layers.Transformer_EncDec import Encoder, EncoderLayer\nfrom layers.SelfAttention_Family import FullAttention, AttentionLayer\n\n\nclass MultiScalePatchEmbedding(nn.Module):\n \"\"\"Multi-scale patch embedding with mask awareness.\"\"\"\n def __init__(self, d_model, patch_lens=[8, 16, 32], strides=[4, 8, 16], dropout=0.1):\n super().__init__()\n self.patch_embeddings = nn.ModuleList()\n self.patch_lens = patch_lens\n self.strides = strides\n \n for patch_len, stride in zip(patch_lens, strides):\n padding = stride\n embedding = nn.Sequential(\n nn.ReplicationPad1d((0, padding)),\n nn.Linear(patch_len, d_model, bias=False),\n nn.Dropout(dropout)\n )\n self.patch_embeddings.append(embedding)\n \n # Learnable scale weights\n self.scale_weights = nn.Parameter(torch.ones(len(patch_lens)))\n \n # Position embedding\n self.pos_embed = nn.Parameter(torch.randn(1, d_model))\n \n def forward(self, x, mask):\n \"\"\"x: [batch, n_vars, seq_len], mask: [batch, n_vars, seq_len]\"\"\"\n batch, n_vars, seq_len = x.shape\n all_patches = []\n all_masks = []\n \n for i, (patch_len, stride) in enumerate(zip(self.patch_lens, self.strides)):\n # Unfold to patches\n x_padded = self.patch_embeddings[i][0](x) # padding\n x_unfold = x_padded.unfold(dimension=-1, size=patch_len, step=stride)\n # [batch, n_vars, num_patches, patch_len]\n x_unfold = x_unfold.reshape(batch * n_vars, -1, patch_len)\n \n # Similarly for mask (compute observation density per patch)\n mask_padded = F.pad(mask, (0, stride), mode='replicate')\n mask_unfold = mask_padded.unfold(dimension=-1, size=patch_len, step=stride)\n mask_unfold = mask_unfold.reshape(batch * n_vars, -1, patch_len)\n obs_density = mask_unfold.mean(dim=-1, keepdim=True) # [batch*n_vars, num_patches, 1]\n \n # Embed patches\n patch_embed = self.patch_embeddings[i][1](x_unfold) # linear\n patch_embed = self.patch_embeddings[i][2](patch_embed) # dropout\n \n # Weight by observation density\n patch_embed = patch_embed * obs_density.clamp(min=0.1)\n \n # Add position embedding\n patch_embed = patch_embed + self.pos_embed\n \n all_patches.append(patch_embed)\n all_masks.append(obs_density)\n \n # Weighted sum across scales\n weights = F.softmax(self.scale_weights, dim=0)\n combined = sum(w * patches for w, patches in zip(weights, all_patches))\n \n return combined, n_vars\n\n\nclass Model(nn.Module):\n \"\"\"\n Custom model for time series imputation with multi-scale mask-aware patching.\n \"\"\"\n def __init__(self, configs):\n super(Model, self).__init__()\n self.task_name = configs.task_name\n self.seq_len = configs.seq_len\n self.pred_len = configs.seq_len\n self.enc_in = configs.enc_in\n self.d_model = configs.d_model\n self.dropout = configs.dropout\n \n # Multi-scale patch embedding\n self.patch_embed = MultiScalePatchEmbedding(\n d_model=self.d_model,\n patch_lens=[8, 16, 32],\n strides=[4, 8, 16],\n dropout=self.dropout\n )\n \n # Transformer encoder\n self.encoder = Encoder(\n [\n EncoderLayer(\n AttentionLayer(\n FullAttention(False, configs.factor,\n attention_dropout=configs.dropout,\n output_attention=False),\n self.d_model, configs.n_heads),\n self.d_model,\n configs.d_ff,\n dropout=configs.dropout,\n activation=configs.activation\n ) for _ in range(configs.e_layers)\n ],\n norm_layer=nn.LayerNorm(self.d_model)\n )\n \n # Reconstruction head (unpatch)\n self.head_nf = self.d_model * sum(\n (self.seq_len - pl) // st + 2 for pl, st in zip([8, 16, 32], [4, 8, 16])\n ) // 3 # approximate average\n self.head = nn.Linear(self.head_nf, self.seq_len)\n \n # Mask refinement module\n self.refine = nn.Sequential(\n nn.Linear(self.enc_in, self.d_model),\n nn.GELU(),\n nn.Linear(self.d_model, self.enc_in)\n )\n \n def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):\n batch, seq_len, n_vars = x_enc.shape\n \n # Normalization using observed values only\n means = torch.sum(x_enc, dim=1) / torch.sum(mask == 1, dim=1)\n means = means.unsqueeze(1).detach()\n x_enc = x_enc - means\n x_enc = x_enc.masked_fill(mask == 0, 0)\n stdev = torch.sqrt(torch.sum(x_enc * x_enc, dim=1) /\n torch.sum(mask == 1, dim=1) + 1e-5)\n stdev = stdev.unsqueeze(1).detach()\n x_enc = x_enc / stdev\n \n # Prepare input for patching: [batch, n_vars, seq_len]\n x_enc_t = x_enc.permute(0, 2, 1)\n mask_t = mask.permute(0, 2, 1)\n \n # Multi-scale patch embedding\n enc_out, n_vars = self.patch_embed(x_enc_t, mask_t)\n # enc_out: [batch * n_vars, num_patches, d_model]\n \n # Transformer encoder\n enc_out, _ = self.encoder(enc_out)\n \n # Reshape back: [batch, n_vars, num_patches, d_model]\n enc_out = enc_out.reshape(batch, n_vars, -1, self.d_model)\n \n # Flatten patches\n enc_out = enc_out.permute(0, 1, 3, 2) # [batch, n_vars, d_model, num_patches]\n enc_out = enc_out.reshape(batch, n_vars, -1)\n \n # Project to sequence length\n dec_out = self.head(enc_out) # [batch, n_vars, seq_len]\n dec_out = dec_out.permute(0, 2, 1) # [batch, seq_len, n_vars]\n \n # Mask refinement: blend with original observed values\n residual = self.refine(dec_out)\n dec_out = dec_out + 0.1 * residual\n \n # De-normalization\n dec_out = dec_out * stdev\n dec_out = dec_out + means\n \n return dec_out\n \n def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):\n if self.task_name == 'imputation':\n dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)\n return dec_out\n return None"
7}Tool ResultStep 3
OK: Replaced lines 1..131 with 172 line(s) in Time-Series-Library/models/Custom.py. Editable range: entire file.
[Current file: Time-Series-Library/models/Custom.py | editable: 1–172 | total: 172 lines]
1 import torch
2 import torch.nn as nn
3 import torch.nn.functional as F
4 from layers.Embed import PatchEmbedding
5 from layers.Transformer_EncDec import Encoder, EncoderLayer
6 from layers.SelfAttention_Family import FullAttention, AttentionLayer
7
8
9 class MultiScalePatchEmbedding(nn.Module):
10 """Multi-scale patch embedding with mask awareness."""
11 def __init__(self, d_model, patch_lens=[8, 16, 32], strides=[4, 8, 16], dropout=0.1):
12 super().__init__()
13 self.patch_embeddings = nn.ModuleList()
14 self.patch_lens = patch_lens
15 self.strides = strides
16
17 for patch_len, stride in zip(patch_lens, strides):
18
AssistantStep 4test
Tool ResultStep 4
[Test #2] (1 test remaining; call submit(n=2) to submit this result) ### ETTh1 (scripts/etth1.sh) WARNING: Overriding HOME environment variable with APPTAINERENV_HOME is not permitted Using GPU Args in experiment: [1mBasic Config[0m Task Name: imputation Is Training: 1 Model ID: ETTh1_mask_0.25 Model: Custom [1mData Loader[0m Data: ETTh1 Root Path: /data/ETT-small/ Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints/ [1mImputation Task[0m Mask Rate: 0.25 [1mModel Parameters[0m Top k: 5 Num Kernels: 6 Enc In: 7 Dec In: 7 C Out: 7
AssistantStep 5edit
Tool Input
1{
2 "op": "replace",
3 "filename": "Time-Series-Library/models/Custom.py",
4 "start_line": 1,
5 "end_line": 172,
6 "content": "import torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom layers.Embed import DataEmbedding\nfrom layers.Transformer_EncDec import Encoder, EncoderLayer\nfrom layers.SelfAttention_Family import FullAttention, AttentionLayer\n\n\nclass MaskConditionedNorm(nn.Module):\n \"\"\"Layer normalization conditioned on mask density.\"\"\"\n def __init__(self, normalized_shape, eps=1e-5):\n super().__init__()\n self.normalized_shape = normalized_shape\n self.eps = eps\n self.weight = nn.Parameter(torch.ones(normalized_shape))\n self.bias = nn.Parameter(torch.zeros(normalized_shape))\n \n def forward(self, x, mask):\n # x: [batch, seq_len, d_model]\n # mask: [batch, seq_len, d_model] or [batch, seq_len, 1]\n if mask.dim() == 3:\n mask = mask.mean(dim=-1, keepdim=True) # average over variables\n \n # Compute mean and std using only observed positions (mask > 0)\n masked_x = x * mask\n count = mask.sum(dim=1, keepdim=True) + self.eps\n mean = masked_x.sum(dim=1, keepdim=True) / count\n var = ((masked_x - mean) ** 2).sum(dim=1, keepdim=True) / count\n std = torch.sqrt(var + self.eps)\n \n x_norm = (x - mean) / std\n # Affine transform conditioned on mask density\n alpha = mask.mean(dim=1, keepdim=True) # [batch, 1, 1]\n weight = self.weight * (1 + alpha) # modulate by mask density\n bias = self.bias * (1 - alpha)\n return x_norm * weight + bias\n\n\nclass VariableInteraction(nn.Module):\n \"\"\"Cross-variable interaction using attention.\"\"\"\n def __init__(self, d_model, n_heads, dropout=0.1):\n super().__init__()\n self.attention = AttentionLayer(\n FullAttention(False, factor=1, attention_dropout=dropout, output_attention=False),\n d_model, n_heads\n )\n self.norm = nn.LayerNorm(d_model)\n \n def forward(self, x, mask):\n # x: [batch, seq_len, n_vars, d_model]\n batch, seq_len, n_vars, d_model = x.shape\n # Reshape to treat variables as sequence dimension\n x_reshaped = x.reshape(batch * seq_len, n_vars, d_model)\n # Apply attention across variables\n out, _ = self.attention(x_reshaped, x_reshaped, x_reshaped, attn_mask=None)\n out = self.norm(out)\n out = out.reshape(batch, seq_len, n_vars, d_model)\n return out\n\n\nclass Model(nn.Module):\n \"\"\"\n Custom model for time series imputation with mask-conditioned normalization\n and cross-variable interaction.\n \"\"\"\n def __init__(self, configs):\n super(Model, self).__init__()\n self.task_name = configs.task_name\n self.seq_len = configs.seq_len\n self.pred_len = configs.seq_len\n self.enc_in = configs.enc_in\n self.d_model = configs.d_model\n self.dropout = configs.dropout\n \n # Per-variable embedding\n self.var_embedding = nn.Linear(1, self.d_model)\n \n # Time feature embedding\n self.time_embedding = DataEmbedding(self.enc_in, self.d_model,\n configs.embed, configs.freq,\n self.dropout)\n \n # Mask-conditioned normalization\n self.mask_norm = MaskConditionedNorm(self.d_model)\n \n # Temporal convolution to capture local patterns\n self.temporal_conv = nn.Sequential(\n nn.Conv1d(self.d_model, self.d_model * 2, kernel_size=3, padding=1),\n nn.GELU(),\n nn.Conv1d(self.d_model * 2, self.d_model, kernel_size=3, padding=1),\n nn.GELU(),\n )\n \n # Cross-variable interaction\n self.var_interaction = VariableInteraction(self.d_model, configs.n_heads, self.dropout)\n \n # Temporal transformer encoder\n self.encoder = Encoder(\n [\n EncoderLayer(\n AttentionLayer(\n FullAttention(False, configs.factor,\n attention_dropout=configs.dropout,\n output_attention=False),\n self.d_model, configs.n_heads),\n self.d_model,\n configs.d_ff,\n dropout=configs.dropout,\n activation=configs.activation\n ) for _ in range(configs.e_layers)\n ],\n norm_layer=nn.LayerNorm(self.d_model)\n )\n \n # Reconstruction head\n self.projection = nn.Sequential(\n nn.Linear(self.d_model, self.d_model * 2),\n nn.GELU(),\n nn.Linear(self.d_model * 2, 1)\n )\n \n # Residual refinement\n self.refine = nn.Linear(self.enc_in, self.enc_in)\n \n def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):\n batch, seq_len, n_vars = x_enc.shape\n \n # Normalization using observed values only (global)\n means = torch.sum(x_enc, dim=1) / torch.sum(mask == 1, dim=1)\n means = means.unsqueeze(1).detach()\n x_enc = x_enc - means\n x_enc = x_enc.masked_fill(mask == 0, 0)\n stdev = torch.sqrt(torch.sum(x_enc * x_enc, dim=1) /\n torch.sum(mask == 1, dim=1) + 1e-5)\n stdev = stdev.unsqueeze(1).detach()\n x_enc = x_enc / stdev\n \n # Embed each variable separately\n # Reshape to treat each variable-time pair as separate token\n x_reshaped = x_enc.unsqueeze(-1) # [batch, seq_len, n_vars, 1]\n var_embed = self.var_embedding(x_reshaped) # [batch, seq_len, n_vars, d_model]\n \n # Add time embedding (broadcast across variables)\n if x_mark_enc is not None:\n # time_embed expects [batch, seq_len, n_vars] but we have separate per variable?\n # Use DataEmbedding on original x_enc (with zeros) and repeat across variables\n time_embed = self.time_embedding(x_enc, x_mark_enc) # [batch, seq_len, d_model]\n time_embed = time_embed.unsqueeze(2).repeat(1, 1, n_vars, 1) # broadcast\n var_embed = var_embed + time_embed\n \n # Mask-conditioned normalization\n mask_expanded = mask.unsqueeze(-1) # [batch, seq_len, n_vars, 1]\n var_embed = self.mask_norm(var_embed, mask_expanded)\n \n # Temporal convolution per variable\n # Rearrange to [batch, n_vars, d_model, seq_len]\n var_embed_t = var_embed.permute(0, 2, 3, 1) # [batch, n_vars, d_model, seq_len]\n conv_out = self.temporal_conv(var_embed_t) # [batch, n_vars, d_model, seq_len]\n conv_out = conv_out.permute(0, 3, 1, 2) # [batch, seq_len, n_vars, d_model]\n var_embed = var_embed + conv_out # residual\n \n # Cross-variable interaction\n var_embed = self.var_interaction(var_embed, mask_expanded)\n \n # Flatten variables into sequence dimension for temporal transformer\n batch, seq_len, n_vars, d_model = var_embed.shape\n var_embed_flat = var_embed.reshape(batch, seq_len * n_vars, d_model)\n \n # Temporal transformer\n enc_out, _ = self.encoder(var_embed_flat)\n \n # Reshape back\n enc_out = enc_out.reshape(batch, seq_len, n_vars, d_model)\n \n # Project back to scalar per variable\n dec_out = self.projection(enc_out).squeeze(-1) # [batch, seq_len, n_vars]\n \n # Residual refinement\n residual = self.refine(dec_out)\n dec_out = dec_out + 0.1 * residual\n \n # De-normalization\n dec_out = dec_out * stdev\n dec_out = dec_out + means\n \n return dec_out\n \n def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):\n if self.task_name == 'imputation':\n dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)\n return dec_out\n return None"
7}Tool ResultStep 5
OK: Replaced lines 1..172 with 192 line(s) in Time-Series-Library/models/Custom.py. Editable range: entire file.
[Current file: Time-Series-Library/models/Custom.py | editable: 1–192 | total: 192 lines]
1 import torch
2 import torch.nn as nn
3 import torch.nn.functional as F
4 from layers.Embed import DataEmbedding
5 from layers.Transformer_EncDec import Encoder, EncoderLayer
6 from layers.SelfAttention_Family import FullAttention, AttentionLayer
7
8
9 class MaskConditionedNorm(nn.Module):
10 """Layer normalization conditioned on mask density."""
11 def __init__(self, normalized_shape, eps=1e-5):
12 super().__init__()
13 self.normalized_shape = normalized_shape
14 self.eps = eps
15 self.weight = nn.Parameter(torch.ones(normalized_shape))
16 self.bias = nn.Parameter(torch.zeros(normalized_shape))
17
18 def forward(self, x, mas
AssistantStep 6test
Tool ResultStep 6
[Test #3] (0 tests remaining) ### ETTh1 (scripts/etth1.sh) WARNING: Overriding HOME environment variable with APPTAINERENV_HOME is not permitted Using GPU Args in experiment: [1mBasic Config[0m Task Name: imputation Is Training: 1 Model ID: ETTh1_mask_0.25 Model: Custom [1mData Loader[0m Data: ETTh1 Root Path: /data/ETT-small/ Data Path: ETTh1.csv Features: M Target: OT Freq: h Checkpoints: ./checkpoints/ [1mImputation Task[0m Mask Rate: 0.25 [1mModel Parameters[0m Top k: 5 Num Kernels: 6 Enc In: 7 Dec In: 7 C Out: 7 d model: 51