Agent Conversation: openai/gpt-5.4-pro
Model: openai/gpt-5.4-pro
Total steps: 6
Messages: 13
UserStep 0
Task: ts-classification
Time Series Classification: Custom Model Design
Objective
Design and implement a custom deep learning model for multivariate time series classification. Your code goes in the Model class in models/Custom.py. Three reference implementations (DLinear, TimesNet, PatchTST) are provided as read-only.
Evaluation
Trained and evaluated on three UEA datasets:
- EthanolConcentration — spectral data classification
- FaceDetection — MEG brain imaging classification
- Handwriting — accelerometer-based character recognition
Training uses RAdam optimizer, CrossEntropyLoss, patience=10. Metric: accuracy (higher is better).
Time-Series-Library/models/Custom.py [EDITABLE — entire file only]
1: import torch
2: import torch.nn as nn
3: import torch.nn.functional as F
4:
5:
6: class Model(nn.Module):
7: """
8: Custom model for time series classification.
9:
10: Forward signature: forward(x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None)
11: - x_enc: [batch, seq_len, enc_in] — input time series
12: - x_mark_enc: [batch, seq_len] — padding mask (1=valid, 0=padding)
13: - x_dec: not used (None)
14: - x_mark_dec: not used (None)
15:
16: Must return: [batch, num_class] — class logits (before softmax)
17:
18: Note: configs.seq_len, configs.enc_in, and configs.num_class are set
19: dynamically from the dataset at runtime.
20: """
21:
22: def __init__(self, configs):
23: super(Model, self).__init__()
24: self.task_name = configs.task_name
25: self.seq_len = configs.seq_len
26: self.enc_in = configs.enc_in
27: self.num_class = configs.num_class
28: # TODO: Define your model architecture here
29:
30: def classification(self, x_enc, x_mark_enc):
31: """
32: Classification: assign a label to the input time series.
33: Input: x_enc [batch, seq_len, enc_in]
34: x_mark_enc: [batch, seq_len] padding mask
35: Output: [batch, num_class] logits
36: """
37: # TODO: Implement your classification logic
38: batch_size = x_enc.shape[0]
39: return torch.zeros(batch_size, self.num_class).to(x_enc.device)
40:
41: def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
42: if self.task_name == 'classification':
43: dec_out = self.classification(x_enc, x_mark_enc)
44: return dec_out
45: return None
Time-Series-Library/models/DLinear.py [READ-ONLY — do not edit]
1: import torch
2: import torch.nn as nn
3: import torch.nn.functional as F
4: from layers.Autoformer_EncDec import series_decomp
5:
6:
7: class Model(nn.Module):
8: """
9: Paper lAssistantStep 1edit
Tool Input
1{
2 "op": "replace",
3 "filename": "Time-Series-Library/models/Custom.py",
4 "content": "import math\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\n\n\ndef masked_mean(x, mask):\n return (x * mask).sum(dim=1) / mask.sum(dim=1).clamp_min(1.0)\n\n\n\ndef masked_max(x, mask):\n x = x.masked_fill(mask <= 0, -1e4)\n values = x.max(dim=1).values\n values = torch.where(torch.isfinite(values), values, torch.zeros_like(values))\n return values\n\n\n\ndef fill_tail_with_last_valid(x, mask):\n lengths = mask.squeeze(-1).sum(dim=1).long().clamp(min=1, max=x.size(1))\n idx = (lengths - 1).view(-1, 1, 1).expand(-1, 1, x.size(-1))\n last_valid = torch.gather(x, dim=1, index=idx)\n return x * mask + last_valid * (1.0 - mask)\n\n\n\ndef masked_standardize(x, mask, eps=1e-5):\n mean = masked_mean(x, mask)\n centered = x - mean.unsqueeze(1)\n var = (centered.pow(2) * mask).sum(dim=1) / mask.sum(dim=1).clamp_min(1.0)\n std = torch.sqrt(var + eps)\n x = centered / std.unsqueeze(1)\n return x, mean, std\n\n\n\nclass MovingAverage(nn.Module):\n def __init__(self, kernel_size):\n super().__init__()\n self.kernel_size = max(1, int(kernel_size))\n\n def forward(self, x):\n if self.kernel_size <= 1:\n return x\n pad = self.kernel_size // 2\n xt = x.transpose(1, 2)\n xt = F.pad(xt, (pad, pad), mode='replicate')\n xt = F.avg_pool1d(xt, kernel_size=self.kernel_size, stride=1)\n return xt.transpose(1, 2)\n\n\n\nclass ChannelGraphMixer(nn.Module):\n def __init__(self):\n super().__init__()\n self.temperature = nn.Parameter(torch.tensor(1.0))\n self.self_bias = nn.Parameter(torch.tensor(0.5))\n\n def forward(self, x, mask):\n denom = mask.sum(dim=1, keepdim=True).clamp_min(1.0)\n xm = x * mask\n corr = torch.matmul(xm.transpose(1, 2), xm) / denom\n eye = torch.eye(corr.size(-1), device=corr.device, dtype=corr.dtype).unsqueeze(0)\n corr = corr / math.sqrt(max(corr.size(-1), 1))\n corr = corr + self.self_bias * eye\n attn = torch.softmax(self.temperature * corr, dim=-1)\n return torch.matmul(x, attn)\n\n\n\nclass MultiScaleResidualMixer(nn.Module):\n def __init__(self, d_model, kernels, dropout):\n super().__init__()\n self.branches = nn.ModuleList([\n nn.Conv1d(d_model, d_model, kernel_size=k, padding=k // 2, groups=d_model, bias=False)\n for k in kernels\n ])\n self.pointwise = nn.Conv1d(d_model * len(kernels), d_model, kernel_size=1, bias=False)\n self.dropout = nn.Dropout(dropout)\n\n def forward(self, x):\n xt = x.transpose(1, 2)\n ys = [branch(xt) for branch in self.branches]\n y = self.pointwise(torch.cat(ys, dim=1)).transpose(1, 2)\n return self.dropout(F.gelu(y))\n\n\n\nclass SpectralAdaptiveMixer(nn.Module):\n def __init__(self, d_model, dropout):\n super().__init__()\n self.freq_scale = nn.Parameter(torch.zeros(1, 1, d_model))\n self.out_proj = nn.Linear(d_model, d_model)\n self.dropout = nn.Dropout(dropout)\n\n def forward(self, x):\n xf = torch.fft.rfft(x, dim=1, norm='ortho')\n amp = xf.abs().mean(dim=-1, keepdim=True)\n amp = amp / (amp.mean(dim=1, keepdim=True) + 1e-6)\n gate = torch.tanh(amp - 1.0)\n xf = xf * (1.0 + self.freq_scale * gate)\n y = torch.fft.irfft(xf, n=x.size(1), dim=1, norm='ortho')\n y = self.out_proj(y)\n return self.dropout(F.gelu(y))\n\n\n\nclass DecompMixerBlock(nn.Module):\n def __init__(self, d_model, d_ff, kernels, trend_kernels, dropout):\n super().__init__()\n self.trend_ma_1 = MovingAverage(trend_kernels[0])\n self.trend_ma_2 = MovingAverage(trend_kernels[1])\n self.local_mixer = MultiScaleResidualMixer(d_model, kernels, dropout)\n self.global_mixer = SpectralAdaptiveMixer(d_model, dropout)\n self.branch_gate = nn.Sequential(\n nn.Linear(2 * d_model, d_model),\n nn.GELU(),\n nn.Linear(d_model, d_model),\n )\n self.norm1 = nn.LayerNorm(d_model)\n self.norm2 = nn.LayerNorm(d_model)\n self.ff_gate = nn.Linear(d_model, 2 * d_ff)\n self.ff_out = nn.Linear(d_ff, d_model)\n self.dropout = nn.Dropout(dropout)\n\n def forward(self, x, mask):\n trend = 0.5 * (self.trend_ma_1(x) + self.trend_ma_2(x))\n resid = x - trend\n\n local_feat = self.local_mixer(resid)\n global_feat = self.global_mixer(trend)\n\n summary = torch.cat([\n masked_mean(trend, mask),\n masked_mean(resid.abs(), mask)\n ], dim=-1)\n gate = torch.sigmoid(self.branch_gate(summary)).unsqueeze(1)\n mixed = gate * global_feat + (1.0 - gate) * local_feat\n\n x = self.norm1(x + self.dropout(mixed))\n ff_u, ff_v = self.ff_gate(x).chunk(2, dim=-1)\n ff = self.ff_out(F.gelu(ff_u) * torch.sigmoid(ff_v))\n x = self.norm2(x + self.dropout(ff))\n return x\n\n\n\nclass AdaptivePoolHead(nn.Module):\n def __init__(self, d_model, stats_dim, num_class, dropout):\n super().__init__()\n hidden = max(d_model // 2, 16)\n self.score = nn.Linear(d_model, 1)\n self.pool_mix = nn.Sequential(\n nn.Linear(2 * d_model, hidden),\n nn.GELU(),\n nn.Linear(hidden, 3),\n )\n self.stats_proj = nn.Sequential(\n nn.LayerNorm(stats_dim),\n nn.Linear(stats_dim, d_model),\n nn.GELU(),\n )\n self.head = nn.Sequential(\n nn.LayerNorm(2 * d_model),\n nn.Dropout(dropout),\n nn.Linear(2 * d_model, num_class),\n )\n\n def forward(self, x, mask, stats):\n score = self.score(x).squeeze(-1)\n score = score.masked_fill(mask.squeeze(-1) <= 0, -1e4)\n attn = torch.softmax(score, dim=-1).unsqueeze(-1)\n\n mean_pool = masked_mean(x, mask)\n max_pool = masked_max(x, mask)\n attn_pool = (x * attn).sum(dim=1)\n\n mix = torch.softmax(self.pool_mix(torch.cat([mean_pool, attn_pool], dim=-1)), dim=-1)\n pooled = (\n mix[:, 0:1] * mean_pool\n + mix[:, 1:2] * max_pool\n + mix[:, 2:3] * attn_pool\n )\n\n stats_feat = self.stats_proj(stats)\n return self.head(torch.cat([pooled, stats_feat], dim=-1))\n\n\n\nclass Model(nn.Module):\n \"\"\"\n Custom model for time series classification.\n\n Forward signature: forward(x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None)\n - x_enc: [batch, seq_len, enc_in] — input time series\n - x_mark_enc: [batch, seq_len] — padding mask (1=valid, 0=padding)\n - x_dec: not used (None)\n - x_mark_dec: not used (None)\n\n Must return: [batch, num_class] — class logits (before softmax)\n\n Note: configs.seq_len, configs.enc_in, and configs.num_class are set\n dynamically from the dataset at runtime.\n \"\"\"\n\n def __init__(self, configs):\n super(Model, self).__init__()\n self.task_name = configs.task_name\n self.seq_len = configs.seq_len\n self.enc_in = configs.enc_in\n self.num_class = configs.num_class\n\n self.d_model = getattr(configs, 'd_model', 64)\n self.d_ff = getattr(configs, 'd_ff', self.d_model * 2)\n self.e_layers = getattr(configs, 'e_layers', 2)\n self.dropout = getattr(configs, 'dropout', 0.1)\n\n large_kernel = max(7, min(self.seq_len // 8 * 2 + 1, 31))\n if large_kernel % 2 == 0:\n large_kernel += 1\n kernels = [3, 5, large_kernel]\n\n trend_k1 = max(3, min(self.seq_len // 8 * 2 + 1, max(3, self.seq_len - (1 - self.seq_len % 2))))\n trend_k2 = max(5, min(self.seq_len // 4 * 2 + 1, max(5, self.seq_len - (1 - self.seq_len % 2))))\n if trend_k1 % 2 == 0:\n trend_k1 += 1\n if trend_k2 % 2 == 0:\n trend_k2 += 1\n trend_k1 = min(trend_k1, self.seq_len if self.seq_len % 2 == 1 else self.seq_len - 1)\n trend_k2 = min(trend_k2, self.seq_len if self.seq_len % 2 == 1 else self.seq_len - 1)\n trend_k1 = max(trend_k1, 1)\n trend_k2 = max(trend_k2, trend_k1)\n\n self.channel_graph = ChannelGraphMixer()\n self.input_proj = nn.Linear(self.enc_in * 3, self.d_model)\n self.pos_proj = nn.Linear(3, self.d_model)\n self.input_norm = nn.LayerNorm(self.d_model)\n self.input_dropout = nn.Dropout(self.dropout)\n\n self.blocks = nn.ModuleList([\n DecompMixerBlock(\n d_model=self.d_model,\n d_ff=self.d_ff,\n kernels=kernels,\n trend_kernels=(trend_k1, trend_k2),\n dropout=self.dropout,\n )\n for _ in range(self.e_layers)\n ])\n\n stats_dim = self.enc_in * 3\n self.head = AdaptivePoolHead(self.d_model, stats_dim, self.num_class, self.dropout)\n\n self._reset_parameters()\n\n def _reset_parameters(self):\n for module in self.modules():\n if isinstance(module, nn.Linear):\n nn.init.xavier_uniform_(module.weight)\n if module.bias is not None:\n nn.init.zeros_(module.bias)\n elif isinstance(module, nn.Conv1d):\n nn.init.kaiming_normal_(module.weight, mode='fan_out', nonlinearity='relu')\n if module.bias is not None:\n nn.init.zeros_(module.bias)\n\n def _build_mask(self, x_enc, x_mark_enc):\n if x_mark_enc is None:\n return torch.ones(x_enc.size(0), x_enc.size(1), 1, device=x_enc.device, dtype=x_enc.dtype)\n mask = x_mark_enc\n if mask.dim() == 2:\n mask = mask.unsqueeze(-1)\n return mask.to(dtype=x_enc.dtype)\n\n def _positional_features(self, length, device, dtype):\n pos = torch.linspace(0.0, 1.0, steps=length, device=device, dtype=dtype)\n feats = torch.stack([\n pos,\n torch.sin(2.0 * math.pi * pos),\n torch.cos(2.0 * math.pi * pos),\n ], dim=-1)\n return self.pos_proj(feats).unsqueeze(0)\n\n def classification(self, x_enc, x_mark_en\n... (truncated)",
5 "after_line": 0,
6 "start_line": 1,
7 "end_line": 45
8}Tool ResultStep 1
OK: Replaced lines 1..45 with 316 line(s) in Time-Series-Library/models/Custom.py. Editable range: entire file.
[Current file: Time-Series-Library/models/Custom.py | editable: 1–316 | total: 316 lines]
1 import math
2 import torch
3 import torch.nn as nn
4 import torch.nn.functional as F
5
6
7
8 def masked_mean(x, mask):
9 return (x * mask).sum(dim=1) / mask.sum(dim=1).clamp_min(1.0)
10
11
12
13 def masked_max(x, mask):
14 x = x.masked_fill(mask <= 0, -1e4)
15 values = x.max(dim=1).values
16 values = torch.where(torch.isfinite(values), values, torch.zeros_like(values))
17 return values
18
19
20
21 def fill_tail_with_last_valid(x, mask):
22 lengths = mask.squeeze(-1).sum(dim=1).long().clamp(min=1, max=x.size(1))
23 idx = (lengths - 1).view(-1, 1, 1).expand(-1, 1, x.size(-1))
24 last_valid = torch.gather(x, dim=1, index
AssistantStep 2test
Tool ResultStep 2
[Test #1] (2 tests remaining; call submit(n=1) to submit this result) ## Seed 42 ### EthanolConcentration (scripts/ethanol.sh) Training metrics (last epochs): TRAIN_METRICS epoch=9 train_loss=1.452 vali_loss=1.446 vali_acc=0.236 test_acc=0.236 TRAIN_METRICS epoch=10 train_loss=1.438 vali_loss=1.451 vali_acc=0.243 test_acc=0.243 TRAIN_METRICS epoch=11 train_loss=1.429 vali_loss=1.415 vali_acc=0.247 test_acc=0.247 TRAIN_METRICS epoch=12 train_loss=1.450 vali_loss=1.451 vali_acc=0.274 test_acc=0.274 TRAIN_METRICS epoch=13 train_loss=1.458 vali_loss=1.470 vali_acc=0.255 test_acc=0.255 Test results (EthanolConcentration): Accuracy: 0.27757 ### FaceDetection (scripts/facedetection.sh) Training metrics (last epochs): TRAIN_METRICS epoch=35 train_loss=0.071 vali_loss=1.908 vali_acc=0.613 test_acc=0.613 TRAIN_METRICS epoch=36 train_loss=0.063 vali_loss=1.802 vali_acc=0.624 test_acc=0.624 TRAIN_METRICS epoch=37 train_loss=0.089 vali_loss=1.775 vali_acc=0.642 test_acc=0.642 TRAIN_METRICS ep
AssistantStep 3edit
Tool Input
1{
2 "op": "replace",
3 "filename": "Time-Series-Library/models/Custom.py",
4 "content": "import math\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\n\n\ndef masked_mean(x, mask):\n return (x * mask).sum(dim=1) / mask.sum(dim=1).clamp_min(1.0)\n\n\n\ndef masked_max(x, mask):\n x = x.masked_fill(mask <= 0, -1e4)\n values = x.max(dim=1).values\n values = torch.where(torch.isfinite(values), values, torch.zeros_like(values))\n return values\n\n\n\ndef fill_tail_with_last_valid(x, mask):\n lengths = mask.squeeze(-1).sum(dim=1).long().clamp(min=1, max=x.size(1))\n idx = (lengths - 1).view(-1, 1, 1).expand(-1, 1, x.size(-1))\n last_valid = torch.gather(x, dim=1, index=idx)\n return x * mask + last_valid * (1.0 - mask)\n\n\n\ndef masked_standardize(x, mask, eps=1e-5):\n mean = masked_mean(x, mask)\n centered = x - mean.unsqueeze(1)\n var = (centered.pow(2) * mask).sum(dim=1) / mask.sum(dim=1).clamp_min(1.0)\n std = torch.sqrt(var + eps)\n x = centered / std.unsqueeze(1)\n return x, mean, std\n\n\n\ndef spectral_band_summary(x, num_bands):\n xf = torch.fft.rfft(x, dim=1, norm='ortho').abs().permute(0, 2, 1)\n bands = F.adaptive_avg_pool1d(xf, num_bands)\n return bands.reshape(bands.size(0), -1)\n\n\n\nclass MovingAverage(nn.Module):\n def __init__(self, kernel_size):\n super().__init__()\n self.kernel_size = max(1, int(kernel_size))\n\n def forward(self, x):\n if self.kernel_size <= 1:\n return x\n pad = self.kernel_size // 2\n xt = x.transpose(1, 2)\n xt = F.pad(xt, (pad, pad), mode='replicate')\n xt = F.avg_pool1d(xt, kernel_size=self.kernel_size, stride=1)\n return xt.transpose(1, 2)\n\n\n\nclass ChannelGraphMixer(nn.Module):\n def __init__(self):\n super().__init__()\n self.log_temperature = nn.Parameter(torch.tensor(0.0))\n self.self_bias = nn.Parameter(torch.tensor(0.5))\n\n def forward(self, x, mask):\n denom = mask.sum(dim=1, keepdim=True).clamp_min(1.0)\n xm = x * mask\n corr = torch.matmul(xm.transpose(1, 2), xm) / denom\n eye = torch.eye(corr.size(-1), device=corr.device, dtype=corr.dtype).unsqueeze(0)\n corr = corr / math.sqrt(max(corr.size(-1), 1))\n corr = corr + self.self_bias * eye\n temperature = torch.exp(self.log_temperature).clamp(min=0.1, max=10.0)\n attn = torch.softmax(temperature * corr, dim=-1)\n return torch.matmul(x, attn)\n\n\n\nclass MultiScaleResidualMixer(nn.Module):\n def __init__(self, d_model, kernels, dropout):\n super().__init__()\n self.branches = nn.ModuleList([\n nn.Conv1d(d_model, d_model, kernel_size=k, padding=k // 2, groups=d_model, bias=False)\n for k in kernels\n ])\n self.pointwise = nn.Conv1d(d_model * len(kernels), d_model, kernel_size=1, bias=False)\n self.dropout = nn.Dropout(dropout)\n\n def forward(self, x):\n xt = x.transpose(1, 2)\n ys = [branch(xt) for branch in self.branches]\n y = self.pointwise(torch.cat(ys, dim=1)).transpose(1, 2)\n return self.dropout(F.gelu(y))\n\n\n\nclass SpectralAdaptiveMixer(nn.Module):\n def __init__(self, d_model, dropout):\n super().__init__()\n self.freq_scale = nn.Parameter(torch.zeros(1, 1, d_model))\n self.out_proj = nn.Linear(d_model, d_model)\n self.dropout = nn.Dropout(dropout)\n\n def forward(self, x):\n xf = torch.fft.rfft(x, dim=1, norm='ortho')\n amp = xf.abs().mean(dim=-1, keepdim=True)\n amp = amp / (amp.mean(dim=1, keepdim=True) + 1e-6)\n gate = torch.tanh(amp - 1.0)\n xf = xf * (1.0 + self.freq_scale * gate)\n y = torch.fft.irfft(xf, n=x.size(1), dim=1, norm='ortho')\n y = self.out_proj(y)\n return self.dropout(F.gelu(y))\n\n\n\nclass DecompMixerBlock(nn.Module):\n def __init__(self, d_model, d_ff, kernels, trend_kernels, dropout):\n super().__init__()\n self.trend_ma_1 = MovingAverage(trend_kernels[0])\n self.trend_ma_2 = MovingAverage(trend_kernels[1])\n self.local_mixer = MultiScaleResidualMixer(d_model, kernels, dropout)\n self.global_mixer = SpectralAdaptiveMixer(d_model, dropout)\n self.branch_gate = nn.Sequential(\n nn.Linear(2 * d_model, d_model),\n nn.GELU(),\n nn.Linear(d_model, d_model),\n )\n self.norm1 = nn.LayerNorm(d_model)\n self.norm2 = nn.LayerNorm(d_model)\n self.ff_gate = nn.Linear(d_model, 2 * d_ff)\n self.ff_out = nn.Linear(d_ff, d_model)\n self.dropout = nn.Dropout(dropout)\n\n def forward(self, x, mask):\n trend = 0.5 * (self.trend_ma_1(x) + self.trend_ma_2(x))\n resid = x - trend\n\n local_feat = self.local_mixer(resid)\n global_feat = self.global_mixer(trend)\n\n summary = torch.cat([\n masked_mean(trend, mask),\n masked_mean(resid.abs(), mask)\n ], dim=-1)\n gate = torch.sigmoid(self.branch_gate(summary)).unsqueeze(1)\n mixed = gate * global_feat + (1.0 - gate) * local_feat\n\n x = self.norm1(x + self.dropout(mixed))\n ff_u, ff_v = self.ff_gate(x).chunk(2, dim=-1)\n ff = self.ff_out(F.gelu(ff_u) * torch.sigmoid(ff_v))\n x = self.norm2(x + self.dropout(ff))\n return x\n\n\n\nclass AdaptivePoolHead(nn.Module):\n def __init__(self, d_model, stats_dim, num_class, dropout):\n super().__init__()\n hidden = max(d_model // 2, 16)\n self.score = nn.Linear(d_model, 1)\n self.pool_mix = nn.Sequential(\n nn.Linear(2 * d_model, hidden),\n nn.GELU(),\n nn.Linear(hidden, 3),\n )\n self.stats_proj = nn.Sequential(\n nn.LayerNorm(stats_dim),\n nn.Linear(stats_dim, d_model),\n nn.GELU(),\n )\n self.head = nn.Sequential(\n nn.LayerNorm(2 * d_model),\n nn.Dropout(dropout),\n nn.Linear(2 * d_model, num_class),\n )\n\n def forward(self, x, mask, stats):\n score = self.score(x).squeeze(-1)\n score = score.masked_fill(mask.squeeze(-1) <= 0, -1e4)\n attn = torch.softmax(score, dim=-1).unsqueeze(-1)\n\n mean_pool = masked_mean(x, mask)\n max_pool = masked_max(x, mask)\n attn_pool = (x * attn).sum(dim=1)\n\n mix = torch.softmax(self.pool_mix(torch.cat([mean_pool, attn_pool], dim=-1)), dim=-1)\n pooled = (\n mix[:, 0:1] * mean_pool\n + mix[:, 1:2] * max_pool\n + mix[:, 2:3] * attn_pool\n )\n\n stats_feat = self.stats_proj(stats)\n return self.head(torch.cat([pooled, stats_feat], dim=-1))\n\n\n\nclass Model(nn.Module):\n \"\"\"\n Custom model for time series classification.\n\n Forward signature: forward(x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None)\n - x_enc: [batch, seq_len, enc_in] — input time series\n - x_mark_enc: [batch, seq_len] — padding mask (1=valid, 0=padding)\n - x_dec: not used (None)\n - x_mark_dec: not used (None)\n\n Must return: [batch, num_class] — class logits (before softmax)\n\n Note: configs.seq_len, configs.enc_in, and configs.num_class are set\n dynamically from the dataset at runtime.\n \"\"\"\n\n def __init__(self, configs):\n super(Model, self).__init__()\n self.task_name = configs.task_name\n self.seq_len = configs.seq_len\n self.enc_in = configs.enc_in\n self.num_class = configs.num_class\n\n self.d_model = getattr(configs, 'd_model', 64)\n self.d_ff = getattr(configs, 'd_ff', self.d_model * 2)\n self.e_layers = getattr(configs, 'e_layers', 2)\n self.dropout = getattr(configs, 'dropout', 0.1)\n self.num_bands = 8\n\n large_kernel = max(7, min(self.seq_len // 8 * 2 + 1, 31))\n if large_kernel % 2 == 0:\n large_kernel += 1\n kernels = [3, 5, large_kernel]\n\n max_odd_len = self.seq_len if self.seq_len % 2 == 1 else self.seq_len - 1\n trend_k1 = max(3, min(self.seq_len // 8 * 2 + 1, max(3, max_odd_len)))\n trend_k2 = max(5, min(self.seq_len // 4 * 2 + 1, max(5, max_odd_len)))\n if trend_k1 % 2 == 0:\n trend_k1 += 1\n if trend_k2 % 2 == 0:\n trend_k2 += 1\n trend_k1 = min(trend_k1, max(max_odd_len, 1))\n trend_k2 = min(trend_k2, max(max_odd_len, 1))\n trend_k1 = max(trend_k1, 1)\n trend_k2 = max(trend_k2, trend_k1)\n\n self.channel_graph = ChannelGraphMixer()\n self.input_proj = nn.Linear(self.enc_in * 3, self.d_model)\n self.pos_proj = nn.Linear(3, self.d_model)\n self.input_norm = nn.LayerNorm(self.d_model)\n self.input_dropout = nn.Dropout(self.dropout)\n\n self.blocks = nn.ModuleList([\n DecompMixerBlock(\n d_model=self.d_model,\n d_ff=self.d_ff,\n kernels=kernels,\n trend_kernels=(trend_k1, trend_k2),\n dropout=self.dropout,\n )\n for _ in range(self.e_layers)\n ])\n\n stats_dim = self.enc_in * (3 + self.num_bands)\n self.head = AdaptivePoolHead(self.d_model, stats_dim, self.num_class, self.dropout)\n\n self._reset_parameters()\n\n def _reset_parameters(self):\n for module in self.modules():\n if isinstance(module, nn.Linear):\n nn.init.xavier_uniform_(module.weight)\n if module.bias is not None:\n nn.init.zeros_(module.bias)\n elif isinstance(module, nn.Conv1d):\n nn.init.kaiming_normal_(module.weight, mode='fan_out', nonlinearity='relu')\n if module.bias is not None:\n nn.init.zeros_(module.bias)\n\n def _build_mask(self, x_enc, x_mark_enc):\n if x_mark_enc is None:\n return torch.ones(x_enc.size(0), x_enc.size(1), 1, device=x_enc.device, dtype=x_enc.dtype)\n mask = x_mark_enc\n if mask.dim() == 2:\n mask = mask.unsqueeze(-1)\n return mask.to(dtype=x_enc.dtype)\n\n def _positional_features(self, length, device, dtype):\n pos = torch.linspace(0.0, 1.0, steps=len\n... (truncated)",
5 "after_line": 0,
6 "start_line": 1,
7 "end_line": 316
8}Tool ResultStep 3
OK: Replaced lines 1..316 with 328 line(s) in Time-Series-Library/models/Custom.py. Editable range: entire file.
[Current file: Time-Series-Library/models/Custom.py | editable: 1–328 | total: 328 lines]
1 import math
2 import torch
3 import torch.nn as nn
4 import torch.nn.functional as F
5
6
7
8 def masked_mean(x, mask):
9 return (x * mask).sum(dim=1) / mask.sum(dim=1).clamp_min(1.0)
10
11
12
13 def masked_max(x, mask):
14 x = x.masked_fill(mask <= 0, -1e4)
15 values = x.max(dim=1).values
16 values = torch.where(torch.isfinite(values), values, torch.zeros_like(values))
17 return values
18
19
20
21 def fill_tail_with_last_valid(x, mask):
22 lengths = mask.squeeze(-1).sum(dim=1).long().clamp(min=1, max=x.size(1))
23 idx = (lengths - 1).view(-1, 1, 1).expand(-1, 1, x.size(-1))
24 last_valid = torch.gather(x, dim=1, inde
AssistantStep 4test
Tool ResultStep 4
[Test #2] (1 test remaining; call submit(n=2) to submit this result) ### EthanolConcentration (scripts/ethanol.sh) Training metrics (last epochs): TRAIN_METRICS epoch=30 train_loss=1.313 vali_loss=1.673 vali_acc=0.243 test_acc=0.243 TRAIN_METRICS epoch=31 train_loss=1.300 vali_loss=1.552 vali_acc=0.274 test_acc=0.274 TRAIN_METRICS epoch=32 train_loss=1.287 vali_loss=1.480 vali_acc=0.266 test_acc=0.266 TRAIN_METRICS epoch=33 train_loss=1.315 vali_loss=1.574 vali_acc=0.281 test_acc=0.281 TRAIN_METRICS epoch=34 train_loss=1.235 vali_loss=1.538 vali_acc=0.236 test_acc=0.236 Test results (EthanolConcentration): Accuracy: 0.29658 ### FaceDetection (scripts/facedetection.sh) Training metrics (last epochs): TRAIN_METRICS epoch=20 train_loss=0.084 vali_loss=1.703 vali_acc=0.614 test_acc=0.614 TRAIN_METRICS epoch=21 train_loss=0.062 vali_loss=1.449 vali_acc=0.614 test_acc=0.614 TRAIN_METRICS epoch=22 train_loss=0.075 vali_loss=1.849 vali_acc=0.606 test_acc=0.606 TRAIN_METRICS epoch=23 train_
AssistantStep 5edit
Tool Input
1{
2 "op": "replace",
3 "filename": "Time-Series-Library/models/Custom.py",
4 "content": " mask = self._build_mask(x_enc, x_mark_enc)\n x_filled = fill_tail_with_last_valid(x_enc, mask)\n x_norm, mean, std = masked_standardize(x_filled, mask)\n\n graph_mixed = self.channel_graph(x_norm, mask)\n graph_resid = x_norm - graph_mixed\n dx = torch.diff(x_norm, dim=1, prepend=x_norm[:, :1, :])\n\n graph_energy = masked_mean(graph_resid.abs(), mask)\n diff_energy = masked_mean(dx.abs(), mask)\n aux_gate = graph_energy / (graph_energy + diff_energy + 1e-6)\n aux = aux_gate.unsqueeze(1) * graph_resid + (1.0 - aux_gate).unsqueeze(1) * dx\n x_aug = torch.cat([x_norm, graph_mixed, aux], dim=-1)\n\n h = self.input_proj(x_aug)\n h = h + self._positional_features(h.size(1), h.device, h.dtype)\n h = self.input_norm(self.input_dropout(F.gelu(h)))\n\n for block in self.blocks:\n h = block(h, mask)\n\n centered_abs = masked_mean((x_filled - mean.unsqueeze(1)).abs(), mask)\n aux_bands = spectral_band_summary(aux, self.num_bands)\n stats = torch.cat([mean, torch.log(std + 1e-5), centered_abs, aux_bands], dim=-1)\n logits = self.head(h, mask, stats)\n return logits",
5 "after_line": 0,
6 "start_line": 303,
7 "end_line": 321
8}Tool ResultStep 5
OK: Replaced lines 303..321 with 26 line(s) in Time-Series-Library/models/Custom.py. Editable range: entire file.
[Current file: Time-Series-Library/models/Custom.py | editable: 1–335 | total: 335 lines]
1 import math
2 import torch
3 import torch.nn as nn
4 import torch.nn.functional as F
5
6
7
8 def masked_mean(x, mask):
9 return (x * mask).sum(dim=1) / mask.sum(dim=1).clamp_min(1.0)
10
11
12
13 def masked_max(x, mask):
14 x = x.masked_fill(mask <= 0, -1e4)
15 values = x.max(dim=1).values
16 values = torch.where(torch.isfinite(values), values, torch.zeros_like(values))
17 return values
18
19
20
21 def fill_tail_with_last_valid(x, mask):
22 lengths = mask.squeeze(-1).sum(dim=1).long().clamp(min=1, max=x.size(1))
23 idx = (lengths - 1).view(-1, 1, 1).expand(-1, 1, x.size(-1))
24 last_valid = torch.gather(x, dim=1, ind
AssistantStep 6test
Tool ResultStep 6
[Test #3] (0 tests remaining) ## Seed 42 ### EthanolConcentration (scripts/ethanol.sh) Training metrics (last epochs): TRAIN_METRICS epoch=35 train_loss=1.301 vali_loss=1.483 vali_acc=0.274 test_acc=0.274 TRAIN_METRICS epoch=36 train_loss=1.293 vali_loss=1.491 vali_acc=0.259 test_acc=0.259 TRAIN_METRICS epoch=37 train_loss=1.289 vali_loss=1.510 vali_acc=0.285 test_acc=0.285 TRAIN_METRICS epoch=38 train_loss=1.243 vali_loss=1.517 vali_acc=0.285 test_acc=0.285 TRAIN_METRICS epoch=39 train_loss=1.248 vali_loss=1.563 vali_acc=0.255 test_acc=0.255 Test results (EthanolConcentration): Accuracy: 0.29278 ### FaceDetection (scripts/facedetection.sh) Training metrics (last epochs): TRAIN_METRICS epoch=32 train_loss=0.032 vali_loss=2.002 vali_acc=0.631 test_acc=0.631 TRAIN_METRICS epoch=33 train_loss=0.059 vali_loss=1.866 vali_acc=0.623 test_acc=0.623 TRAIN_METRICS epoch=34 train_loss=0.059 vali_loss=1.827 vali_acc=0.632 test_acc=0.632 TRAIN_METRICS epoch=35 train_loss=0.051 vali_loss=1.591