From 8cbd13abcbb9d33247526da5e896d06b3c189431 Mon Sep 17 00:00:00 2001 From: rubiel1 Date: Sun, 23 Nov 2025 06:11:31 -0500 Subject: [PATCH 1/8] We follow the steps on the tutorial to add a dataset --- configs/dataset/graph/WS1000-gamma.yaml | 43 ++++ test/pipeline/test_pipeline.py | 7 +- .../data/datasets/ws1000_gamma_dataset.py | 222 ++++++++++++++++++ .../graph/ws1000_gamma_dataset_loader.py | 48 ++++ 4 files changed, 317 insertions(+), 3 deletions(-) create mode 100644 configs/dataset/graph/WS1000-gamma.yaml create mode 100644 topobench/data/datasets/ws1000_gamma_dataset.py create mode 100644 topobench/data/loaders/graph/ws1000_gamma_dataset_loader.py diff --git a/configs/dataset/graph/WS1000-gamma.yaml b/configs/dataset/graph/WS1000-gamma.yaml new file mode 100644 index 000000000..dae3723a4 --- /dev/null +++ b/configs/dataset/graph/WS1000-gamma.yaml @@ -0,0 +1,43 @@ +# Dataset loader config +loader: + _target_: topobench.data.loaders.graph.ws1000_gamma_dataset_loader.WS1000GammaDatasetLoader + parameters: + data_domain: graph # primary domain + data_type: synthetic # you can rename this if you like + data_name: WS1000-gamma # must match your dataset's 'name' argument + # WS1000_gamma generation parameters (can be overridden from CLI) + num_nodes: 1000 + feature_dim: 1000 + mean_degree: 4 + beta: 0.5 + gamma: 0.0 + noise_scale: 1.0 + seed: 0 + # where to store processed data + data_dir: ${paths.data_dir}/${dataset.loader.parameters.data_domain}/${dataset.loader.parameters.data_type} + +# Dataset parameters +parameters: + num_features: 1000 # = feature_dim + num_classes: ${dataset.parameters.num_nodes} # upper bound on possible distances + num_nodes: 1000 + task: classification # we treat distance as a class label + loss_type: cross_entropy + monitor_metric: accuracy + task_level: node # node-level prediction + +# Splits +split_params: + learning_setting: transductive + data_split_dir: ${paths.data_dir}/data_splits/${dataset.loader.parameters.data_name} + data_seed: 0 + split_type: random # or 'k-fold' + k: 10 # used only if split_type='k-fold' + train_prop: 0.5 # used only if split_type='random' + standardize: False # standardize node features + +# Dataloader parameters +dataloader_params: + batch_size: 1 # fixed for transductive single-graph setting + num_workers: 0 + pin_memory: False diff --git a/test/pipeline/test_pipeline.py b/test/pipeline/test_pipeline.py index 785987159..181df82ca 100644 --- a/test/pipeline/test_pipeline.py +++ b/test/pipeline/test_pipeline.py @@ -4,8 +4,9 @@ from test._utils.simplified_pipeline import run -DATASET = "graph/MUTAG" # ADD YOUR DATASET HERE -MODELS = ["graph/gcn", "cell/topotune", "simplicial/topotune"] # ADD ONE OR SEVERAL MODELS OF YOUR CHOICE HERE +DATASET = "graph/WS1000-gamma" + # ADD YOUR DATASET HERE +MODELS = ["graph/gcn"] # ADD ONE OR SEVERAL MODELS OF YOUR CHOICE HERE class TestPipeline: @@ -32,4 +33,4 @@ def test_pipeline(self): ], return_hydra_config=True ) - run(cfg) \ No newline at end of file + run(cfg) diff --git a/topobench/data/datasets/ws1000_gamma_dataset.py b/topobench/data/datasets/ws1000_gamma_dataset.py new file mode 100644 index 000000000..1f3648cfb --- /dev/null +++ b/topobench/data/datasets/ws1000_gamma_dataset.py @@ -0,0 +1,222 @@ +""" +Location: +topobench/data/datasets/ws1000_gamma_dataset.py + +Implemented a dataset from +@misc{katsman2024revisitingnecessitygraphlearning, + title={Revisiting the Necessity of Graph Learning and Common Graph Benchmarks}, + author={Isay Katsman and Ethan Lou and Anna Gilbert}, + year={2024}, + eprint={2412.06173}, + archivePrefix={arXiv}, + primaryClass={cs.LG}, + url={https://arxiv.org/abs/2412.06173}, +} +Note that we do not evaluate on edge prediction, instead we evaluate node lenght classification. +Hopefully, edge prediction will be added. +""" + +import os +import os.path as osp +from typing import List + +import torch +from torch_geometric.data import InMemoryDataset, Data +from collections import deque +import random + + +class WS1000GammaDataset(InMemoryDataset): + r"""Synthetic Watts–Strogatz dataset WS1000_gamma. + + - Graph: Watts–Strogatz (N nodes, mean degree K, rewiring prob beta) + constructed exactly as in Watts & Strogatz (1998): + 1) regular ring lattice + 2) rewire each edge (i, i+j) with prob beta, keeping i fixed + - Features: R^d, generated via BFS "parental dependence" + x_root ~ N(0, I_d) + x_child = gamma * x_parent + noise_scale * z, z ~ N(0, I_d) + + """ + + def __init__( + self, + root: str, + name: str = "WS1000-gamma", + parameters=None, + transform=None, + pre_transform=None, + ) -> None: + self.name = name + self.parameters = parameters + + # Defaults, can be overridden from Hydra DictConfig + self.num_nodes = 1000 + self.feature_dim = 1000 + self.mean_degree = 4 # K in WS model + self.beta = 0.5 # rewiring probability + self.gamma = 0.0 # parental coefficient + self.noise_scale = 1.0 + self.seed = 0 + + if parameters is not None: + if "num_nodes" in parameters: + self.num_nodes = int(parameters.num_nodes) + if "feature_dim" in parameters: + self.feature_dim = int(parameters.feature_dim) + if "mean_degree" in parameters: + self.mean_degree = int(parameters.mean_degree) + if "beta" in parameters: + self.beta = float(parameters.beta) + if "gamma" in parameters: + self.gamma = float(parameters.gamma) + if "noise_scale" in parameters: + self.noise_scale = float(parameters.noise_scale) + if "seed" in parameters: + self.seed = int(parameters.seed) + + super().__init__(root=root, transform=transform, pre_transform=pre_transform) + + # Load processed data (super() will call process() the first time) + self.data, self.slices = torch.load(self.processed_paths[0]) + + # --------------------------------------------------------------------- + # Required PyG properties + # --------------------------------------------------------------------- + @property + def raw_file_names(self) -> List[str]: + # Dummy file to satisfy InMemoryDataset's bookkeeping. + return ["synthetic.done"] + + @property + def processed_file_names(self) -> List[str]: + return ["data_v1.pt"] + + # --------------------------------------------------------------------- + # Download: here we don't download anything. + # --------------------------------------------------------------------- + def download(self) -> None: + raw_path = osp.join(self.raw_dir, self.raw_file_names[0]) + os.makedirs(self.raw_dir, exist_ok=True) + with open(raw_path, "w") as f: + f.write("synthetic ws1000_gamma marker\n") + + # --------------------------------------------------------------------- + # Process: generate WS graph + WS1000_gamma features and save. + # --------------------------------------------------------------------- + def process(self) -> None: + data = self._generate_ws1000_gamma() + data_list = [data] + data, slices = self.collate(data_list) + os.makedirs(self.processed_dir, exist_ok=True) + torch.save((data, slices), self.processed_paths[0]) + + # --------------------------------------------------------------------- + # Helper: Watts–Strogatz graph + gamma-based features + # --------------------------------------------------------------------- + def _generate_ws1000_gamma(self) -> Data: + N = self.num_nodes + K = self.mean_degree + beta = self.beta + d = self.feature_dim + gamma = self.gamma + noise_scale = self.noise_scale + seed = self.seed + + assert K % 2 == 0, "mean_degree K must be even for Watts–Strogatz ring construction." + + # --- Seed everything deterministically + random.seed(seed) + torch.manual_seed(seed) + + # --- 1) Build regular ring lattice + # neighbors: undirected adjacency; edges: undirected edge set + neighbors = {i: set() for i in range(N)} + edges = set() + + half_k = K // 2 + + ring_edges_oriented = [] + for j in range(1, half_k + 1): # distance layer outer + for i in range(N): # then each vertex + v = (i + j) % N + ring_edges_oriented.append((i, v)) + u_min, u_max = (i, v) if i < v else (v, i) + if (u_min, u_max) not in edges: + edges.add((u_min, u_max)) + neighbors[i].add(v) + neighbors[v].add(i) + # --- 2) Rewire edges in Watts–Strogatz style (exactly as in the paper) + # For each original ring edge (i, i+j) in clockwise sense, with probability beta, + # rewire the endpoint i+j to a new node w chosen uniformly at random + for (i, v) in ring_edges_oriented: + if random.random() < beta: + # Candidates: all nodes except i and current neighbours of i + possible_nodes = [w for w in range(N) + if w != i and w not in neighbors[i]] + if not possible_nodes: + # No valid candidate; skip rewiring for this edge + continue + + w = random.choice(possible_nodes) + + # Remove old edge (i, v) if it still exists + if v in neighbors[i]: + neighbors[i].remove(v) + neighbors[v].remove(i) + edges.discard((i, v) if i < v else (v, i)) + + # Add new edge (i, w) + neighbors[i].add(w) + neighbors[w].add(i) + edges.add((i, w) if i < w else (w, i)) + + + # --- 3) Convert to undirected edge_index with both directions + edge_list = [] + for (u, v) in edges: + edge_list.append((u, v)) + edge_list.append((v, u)) + edge_index = torch.tensor(edge_list, dtype=torch.long).t().contiguous() + + # --- 4) Generate features with BFS parental dependence + # Use neighbors directly as adjacency (adj = neighbors) + x = torch.empty((N, d), dtype=torch.float) + + root = 0 + queue = deque([root]) + + # root feature + x[root] = torch.randn(d) + dist = torch.full((N,), -1, dtype=torch.long) + dist[root] = 0 + + while queue: + u = queue.popleft() + for v in neighbors[u]: + if dist[v] == -1: + dist[v] = dist[u] + 1 + queue.append(v) + noise = torch.randn(d) + x[v] = gamma * x[u] + noise_scale * noise + + # For unvisited (disconnected) nodes: + for i in range(N): + if dist[i] == -1: + x[i] = torch.randn(d) + + + data = Data( + x=x, + edge_index=edge_index, + y=dist, + ) + # Metadata + data.num_nodes = N + data.gamma = gamma + data.beta = beta + data.mean_degree = K + data.feature_dim = d + data.seed = seed + + return data diff --git a/topobench/data/loaders/graph/ws1000_gamma_dataset_loader.py b/topobench/data/loaders/graph/ws1000_gamma_dataset_loader.py new file mode 100644 index 000000000..f25749ec0 --- /dev/null +++ b/topobench/data/loaders/graph/ws1000_gamma_dataset_loader.py @@ -0,0 +1,48 @@ +# location: topobench/data/loaders/graph/ws1000_gamma_dataset_loader.py + +from pathlib import Path +from omegaconf import DictConfig + +from topobench.data.datasets import WS1000GammaDataset +from topobench.data.loaders.base import AbstractLoader + + +class WS1000GammaDatasetLoader(AbstractLoader): + """Loader for the WS1000_gamma synthetic dataset. + + Parameters + ---------- + parameters : DictConfig + Configuration parameters containing at least: + - data_dir + - data_name + - num_nodes + - feature_dim + - mean_degree + - beta + - gamma + - noise_scale + - seed + """ + + def __init__(self, parameters: DictConfig) -> None: + super().__init__(parameters) + + def load_dataset(self) -> WS1000GammaDataset: + """Main method called by TopoBench: instantiate dataset and set data_dir.""" + dataset = self._initialize_dataset() + self.data_dir = self._redefine_data_dir(dataset) + + return dataset + + def _initialize_dataset(self) -> WS1000GammaDataset: + """Helper to instantiate the WS1000GammaDataset.""" + return WS1000GammaDataset( + root=str(self.root_data_dir), + name=self.parameters.data_name, + parameters=self.parameters, + ) + + def _redefine_data_dir(self, dataset: WS1000GammaDataset) -> Path: + """Return the processed root folder as dataset directory.""" + return Path(dataset.processed_dir) From 998a1350536b26161ecc35d8562f65d9dff4054b Mon Sep 17 00:00:00 2001 From: Eric Rubiel Date: Sun, 23 Nov 2025 17:15:48 +0500 Subject: [PATCH 2/8] Added Documentation for WS1000-Gamma dataset Updated details on dataset structure, and configuration parameters. --- .../data/datasets/ws1000_gamma_dataset.py | 78 ++++++++++++------- 1 file changed, 51 insertions(+), 27 deletions(-) diff --git a/topobench/data/datasets/ws1000_gamma_dataset.py b/topobench/data/datasets/ws1000_gamma_dataset.py index 1f3648cfb..4f4127d8a 100644 --- a/topobench/data/datasets/ws1000_gamma_dataset.py +++ b/topobench/data/datasets/ws1000_gamma_dataset.py @@ -1,21 +1,3 @@ -""" -Location: -topobench/data/datasets/ws1000_gamma_dataset.py - -Implemented a dataset from -@misc{katsman2024revisitingnecessitygraphlearning, - title={Revisiting the Necessity of Graph Learning and Common Graph Benchmarks}, - author={Isay Katsman and Ethan Lou and Anna Gilbert}, - year={2024}, - eprint={2412.06173}, - archivePrefix={arXiv}, - primaryClass={cs.LG}, - url={https://arxiv.org/abs/2412.06173}, -} -Note that we do not evaluate on edge prediction, instead we evaluate node lenght classification. -Hopefully, edge prediction will be added. -""" - import os import os.path as osp from typing import List @@ -27,18 +9,60 @@ class WS1000GammaDataset(InMemoryDataset): - r"""Synthetic Watts–Strogatz dataset WS1000_gamma. + """ + WS1000-Gamma Synthetic Dataset + ============================== + + This module implements the WS1000-Gamma dataset introduced in: + + Katsman, I., Lou, E., & Gilbert, A. (2024). + *Revisiting the Necessity of Graph Learning and Common Graph Benchmarks*. + arXiv:2412.06173 + https://arxiv.org/abs/2412.06173 + + The dataset is a synthetic Watts–Strogatz small-world graph with + BFS-dependent Gaussian node features. It is designed as a principled + benchmark that requires graph structure to perform EDGE-level tasks (see Note c). + + Notes + ----- + a.- This implementation follows the Watts & Strogatz (1998) construction: + 1. Create a regular ring lattice with mean degree ``K``. + 2. Rewire each oriented ring edge ``(i, i+j)`` with probability ``beta``. + + b.- Node features are generated via **BFS parental dependence**: + ``x_child = gamma * x_parent + noise_scale * z``, where ``z ~ N(0, I_d)``. + + c.- The current implementation evaluates NODE-level distance classification + (predict BFS distance to the root). + EDGE prediction is NOT yet implemented. + + Dataset Structure + ----------------- + The output is a single :class:`torch_geometric.data.Data` object with: + + - ``x`` : ``[num_nodes, feature_dim]`` float tensor + - ``edge_index`` : ``[2, 2 * num_edges]`` long tensor (undirected) + - ``y`` : ``[num_nodes]`` long tensor of BFS distances from the root node + - metadata fields: ``gamma``, ``beta``, ``mean_degree``, ``feature_dim``, ``seed`` + + Configuration Parameters + ------------------------ + The dataset accepts the following Hydra parameters: + + - ``num_nodes`` : int + - ``feature_dim`` : int + - ``mean_degree`` : int (must be even) + - ``beta`` : float + - ``gamma`` : float + - ``noise_scale`` : float + - ``seed`` : int - - Graph: Watts–Strogatz (N nodes, mean degree K, rewiring prob beta) - constructed exactly as in Watts & Strogatz (1998): - 1) regular ring lattice - 2) rewire each edge (i, i+j) with prob beta, keeping i fixed - - Features: R^d, generated via BFS "parental dependence" - x_root ~ N(0, I_d) - x_child = gamma * x_parent + noise_scale * z, z ~ N(0, I_d) + These are typically defined in: - """ + ``configs/dataset/graph/WS1000-gamma.yaml`` + """ def __init__( self, root: str, From 7968060cf4712d2d49e4960872a23375778c997f Mon Sep 17 00:00:00 2001 From: Eric Rubiel Date: Sun, 23 Nov 2025 17:22:52 +0500 Subject: [PATCH 3/8] Update docstrings in WS1000GammaDatasetLoader Updated docstrings for clarity and consistency. --- .../graph/ws1000_gamma_dataset_loader.py | 69 ++++++++++++++----- 1 file changed, 53 insertions(+), 16 deletions(-) diff --git a/topobench/data/loaders/graph/ws1000_gamma_dataset_loader.py b/topobench/data/loaders/graph/ws1000_gamma_dataset_loader.py index f25749ec0..58dfc8e41 100644 --- a/topobench/data/loaders/graph/ws1000_gamma_dataset_loader.py +++ b/topobench/data/loaders/graph/ws1000_gamma_dataset_loader.py @@ -8,35 +8,56 @@ class WS1000GammaDatasetLoader(AbstractLoader): - """Loader for the WS1000_gamma synthetic dataset. + """ + Loader for the WS1000-Gamma synthetic dataset. Parameters ---------- - parameters : DictConfig - Configuration parameters containing at least: - - data_dir - - data_name - - num_nodes - - feature_dim - - mean_degree - - beta - - gamma - - noise_scale - - seed - """ + parameters : omegaconf.DictConfig + The configuration block located at + ``dataset.loader.parameters`` in the Hydra config. It must + contain at least the following fields: + - ``data_domain`` : str + - ``data_type`` : str + - ``data_name`` : str + - ``data_dir`` : str + - ``num_nodes`` : int + - ``feature_dim`` : int + - ``mean_degree`` : int + - ``beta`` : float + - ``gamma`` : float + - ``noise_scale`` : float + - ``seed`` : int + """ def __init__(self, parameters: DictConfig) -> None: super().__init__(parameters) def load_dataset(self) -> WS1000GammaDataset: - """Main method called by TopoBench: instantiate dataset and set data_dir.""" + """ + Load the WS1000-Gamma dataset. + Returns + ------- + WS1000GammaDataset + The instantiated dataset containing one synthetic graph with + BFS-derived node features. + """ + + dataset = self._initialize_dataset() self.data_dir = self._redefine_data_dir(dataset) return dataset def _initialize_dataset(self) -> WS1000GammaDataset: - """Helper to instantiate the WS1000GammaDataset.""" + """ + Instantiate the underlying :class:`WS1000GammaDataset`. + Returns + ------- + WS1000GammaDataset + A dataset instance that will trigger processing if the + processed data file is missing. + """ return WS1000GammaDataset( root=str(self.root_data_dir), name=self.parameters.data_name, @@ -44,5 +65,21 @@ def _initialize_dataset(self) -> WS1000GammaDataset: ) def _redefine_data_dir(self, dataset: WS1000GammaDataset) -> Path: - """Return the processed root folder as dataset directory.""" + """ + Resolve the dataset directory to the processed root. + + TopoBench components expect ``loader.data_dir`` to point to the + directory containing processed files. This method extracts the + correct processed directory from the dataset object. + + Parameters + ---------- + dataset : WS1000GammaDataset + The dataset whose processed directory is being queried. + + Returns + ------- + pathlib.Path + Path to the processed dataset directory. + """ return Path(dataset.processed_dir) From 127a736658fb961f666f18aeb77453b21f25c5dc Mon Sep 17 00:00:00 2001 From: Eric Rubiel Date: Mon, 24 Nov 2025 18:38:18 +0500 Subject: [PATCH 4/8] Identitation error in line 66 We replaced tab by 4 spaces in line 66 --- topobench/data/datasets/ws1000_gamma_dataset.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/topobench/data/datasets/ws1000_gamma_dataset.py b/topobench/data/datasets/ws1000_gamma_dataset.py index 4f4127d8a..67a94e537 100644 --- a/topobench/data/datasets/ws1000_gamma_dataset.py +++ b/topobench/data/datasets/ws1000_gamma_dataset.py @@ -9,7 +9,7 @@ class WS1000GammaDataset(InMemoryDataset): - """ + """ WS1000-Gamma Synthetic Dataset ============================== @@ -63,6 +63,7 @@ class WS1000GammaDataset(InMemoryDataset): ``configs/dataset/graph/WS1000-gamma.yaml`` """ + def __init__( self, root: str, From a0eb13d9f3069766119445ae78242dd3bc2f21ef Mon Sep 17 00:00:00 2001 From: rubiel1 Date: Mon, 24 Nov 2025 11:40:35 -0500 Subject: [PATCH 5/8] More tabs replaced by 4 spaces --- .../data/datasets/ws1000_gamma_dataset.py | 80 +++++++++---------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/topobench/data/datasets/ws1000_gamma_dataset.py b/topobench/data/datasets/ws1000_gamma_dataset.py index 67a94e537..6902719dd 100644 --- a/topobench/data/datasets/ws1000_gamma_dataset.py +++ b/topobench/data/datasets/ws1000_gamma_dataset.py @@ -10,59 +10,59 @@ class WS1000GammaDataset(InMemoryDataset): """ - WS1000-Gamma Synthetic Dataset - ============================== + WS1000-Gamma Synthetic Dataset + ============================== - This module implements the WS1000-Gamma dataset introduced in: + This module implements the WS1000-Gamma dataset introduced in: - Katsman, I., Lou, E., & Gilbert, A. (2024). - *Revisiting the Necessity of Graph Learning and Common Graph Benchmarks*. - arXiv:2412.06173 - https://arxiv.org/abs/2412.06173 + Katsman, I., Lou, E., & Gilbert, A. (2024). + *Revisiting the Necessity of Graph Learning and Common Graph Benchmarks*. + arXiv:2412.06173 + https://arxiv.org/abs/2412.06173 - The dataset is a synthetic Watts–Strogatz small-world graph with - BFS-dependent Gaussian node features. It is designed as a principled - benchmark that requires graph structure to perform EDGE-level tasks (see Note c). + The dataset is a synthetic Watts–Strogatz small-world graph with + BFS-dependent Gaussian node features. It is designed as a principled + benchmark that requires graph structure to perform EDGE-level tasks (see Note c). - Notes - ----- - a.- This implementation follows the Watts & Strogatz (1998) construction: - 1. Create a regular ring lattice with mean degree ``K``. - 2. Rewire each oriented ring edge ``(i, i+j)`` with probability ``beta``. + Notes + ----- + a.- This implementation follows the Watts & Strogatz (1998) construction: + 1. Create a regular ring lattice with mean degree ``K``. + 2. Rewire each oriented ring edge ``(i, i+j)`` with probability ``beta``. - b.- Node features are generated via **BFS parental dependence**: - ``x_child = gamma * x_parent + noise_scale * z``, where ``z ~ N(0, I_d)``. + b.- Node features are generated via **BFS parental dependence**: + ``x_child = gamma * x_parent + noise_scale * z``, where ``z ~ N(0, I_d)``. - c.- The current implementation evaluates NODE-level distance classification - (predict BFS distance to the root). - EDGE prediction is NOT yet implemented. + c.- The current implementation evaluates NODE-level distance classification + (predict BFS distance to the root). + EDGE prediction is NOT yet implemented. - Dataset Structure - ----------------- - The output is a single :class:`torch_geometric.data.Data` object with: + Dataset Structure + ----------------- + The output is a single :class:`torch_geometric.data.Data` object with: - - ``x`` : ``[num_nodes, feature_dim]`` float tensor - - ``edge_index`` : ``[2, 2 * num_edges]`` long tensor (undirected) - - ``y`` : ``[num_nodes]`` long tensor of BFS distances from the root node - - metadata fields: ``gamma``, ``beta``, ``mean_degree``, ``feature_dim``, ``seed`` + - ``x`` : ``[num_nodes, feature_dim]`` float tensor + - ``edge_index`` : ``[2, 2 * num_edges]`` long tensor (undirected) + - ``y`` : ``[num_nodes]`` long tensor of BFS distances from the root node + - metadata fields: ``gamma``, ``beta``, ``mean_degree``, ``feature_dim``, ``seed`` - Configuration Parameters - ------------------------ - The dataset accepts the following Hydra parameters: + Configuration Parameters + ------------------------ + The dataset accepts the following Hydra parameters: - - ``num_nodes`` : int - - ``feature_dim`` : int - - ``mean_degree`` : int (must be even) - - ``beta`` : float - - ``gamma`` : float - - ``noise_scale`` : float - - ``seed`` : int + - ``num_nodes`` : int + - ``feature_dim`` : int + - ``mean_degree`` : int (must be even) + - ``beta`` : float + - ``gamma`` : float + - ``noise_scale`` : float + - ``seed`` : int - These are typically defined in: + These are typically defined in: - ``configs/dataset/graph/WS1000-gamma.yaml`` + ``configs/dataset/graph/WS1000-gamma.yaml`` - """ + """ def __init__( self, From cbe62fe7b34e4c8acd85c41a56b9ebdb7bda8980 Mon Sep 17 00:00:00 2001 From: Eric Rubiel Date: Mon, 24 Nov 2025 22:18:33 +0500 Subject: [PATCH 6/8] Ruff suggested edits --- topobench/data/datasets/ws1000_gamma_dataset.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/topobench/data/datasets/ws1000_gamma_dataset.py b/topobench/data/datasets/ws1000_gamma_dataset.py index 6902719dd..87fd229c4 100644 --- a/topobench/data/datasets/ws1000_gamma_dataset.py +++ b/topobench/data/datasets/ws1000_gamma_dataset.py @@ -1,11 +1,10 @@ import os import os.path as osp -from typing import List +import random +from collections import deque import torch -from torch_geometric.data import InMemoryDataset, Data -from collections import deque -import random +from torch_geometric.data import Data, InMemoryDataset class WS1000GammaDataset(InMemoryDataset): @@ -109,12 +108,12 @@ def __init__( # Required PyG properties # --------------------------------------------------------------------- @property - def raw_file_names(self) -> List[str]: + def raw_file_names(self) -> list[str]: # Dummy file to satisfy InMemoryDataset's bookkeeping. return ["synthetic.done"] @property - def processed_file_names(self) -> List[str]: + def processed_file_names(self) -> list[str]: return ["data_v1.pt"] # --------------------------------------------------------------------- From cb73ac6a434a5227e0ccb60a8d8c1248fcd50a87 Mon Sep 17 00:00:00 2001 From: Eric Rubiel Date: Mon, 24 Nov 2025 22:21:13 +0500 Subject: [PATCH 7/8] Ruff requested changes --- .../graph/ws1000_gamma_dataset_loader.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/topobench/data/loaders/graph/ws1000_gamma_dataset_loader.py b/topobench/data/loaders/graph/ws1000_gamma_dataset_loader.py index 58dfc8e41..ca5973522 100644 --- a/topobench/data/loaders/graph/ws1000_gamma_dataset_loader.py +++ b/topobench/data/loaders/graph/ws1000_gamma_dataset_loader.py @@ -1,6 +1,6 @@ # location: topobench/data/loaders/graph/ws1000_gamma_dataset_loader.py - from pathlib import Path + from omegaconf import DictConfig from topobench.data.datasets import WS1000GammaDataset @@ -19,16 +19,16 @@ class WS1000GammaDatasetLoader(AbstractLoader): contain at least the following fields: - ``data_domain`` : str - - ``data_type`` : str - - ``data_name`` : str - - ``data_dir`` : str - - ``num_nodes`` : int - - ``feature_dim`` : int - - ``mean_degree`` : int - - ``beta`` : float - - ``gamma`` : float - - ``noise_scale`` : float - - ``seed`` : int + - ``data_type`` : str + - ``data_name`` : str + - ``data_dir`` : str + - ``num_nodes`` : int + - ``feature_dim`` : int + - ``mean_degree`` : int + - ``beta`` : float + - ``gamma`` : float + - ``noise_scale`` : float + - ``seed`` : int """ def __init__(self, parameters: DictConfig) -> None: super().__init__(parameters) From 62a0d57b363b79de50b9a6eae2be08f6ad9fc9ec Mon Sep 17 00:00:00 2001 From: rubiel1 Date: Mon, 24 Nov 2025 12:30:58 -0500 Subject: [PATCH 8/8] I found the actual ruff errors and fixed them --- .../data/datasets/ws1000_gamma_dataset.py | 22 +++++++++---------- .../graph/ws1000_gamma_dataset_loader.py | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/topobench/data/datasets/ws1000_gamma_dataset.py b/topobench/data/datasets/ws1000_gamma_dataset.py index 87fd229c4..7b0ff9f67 100644 --- a/topobench/data/datasets/ws1000_gamma_dataset.py +++ b/topobench/data/datasets/ws1000_gamma_dataset.py @@ -32,30 +32,30 @@ class WS1000GammaDataset(InMemoryDataset): b.- Node features are generated via **BFS parental dependence**: ``x_child = gamma * x_parent + noise_scale * z``, where ``z ~ N(0, I_d)``. - c.- The current implementation evaluates NODE-level distance classification - (predict BFS distance to the root). + c.- The current implementation evaluates NODE-level distance classification + (predict BFS distance to the root). EDGE prediction is NOT yet implemented. Dataset Structure ----------------- The output is a single :class:`torch_geometric.data.Data` object with: - - ``x`` : ``[num_nodes, feature_dim]`` float tensor + - ``x`` : ``[num_nodes, feature_dim]`` float tensor - ``edge_index`` : ``[2, 2 * num_edges]`` long tensor (undirected) - - ``y`` : ``[num_nodes]`` long tensor of BFS distances from the root node + - ``y`` : ``[num_nodes]`` long tensor of BFS distances from the root node - metadata fields: ``gamma``, ``beta``, ``mean_degree``, ``feature_dim``, ``seed`` Configuration Parameters ------------------------ The dataset accepts the following Hydra parameters: - - ``num_nodes`` : int - - ``feature_dim`` : int - - ``mean_degree`` : int (must be even) - - ``beta`` : float - - ``gamma`` : float - - ``noise_scale`` : float - - ``seed`` : int + - ``num_nodes`` : int + - ``feature_dim`` : int + - ``mean_degree`` : int (must be even) + - ``beta`` : float + - ``gamma`` : float + - ``noise_scale`` : float + - ``seed`` : int These are typically defined in: diff --git a/topobench/data/loaders/graph/ws1000_gamma_dataset_loader.py b/topobench/data/loaders/graph/ws1000_gamma_dataset_loader.py index ca5973522..45c36e8da 100644 --- a/topobench/data/loaders/graph/ws1000_gamma_dataset_loader.py +++ b/topobench/data/loaders/graph/ws1000_gamma_dataset_loader.py @@ -18,7 +18,7 @@ class WS1000GammaDatasetLoader(AbstractLoader): ``dataset.loader.parameters`` in the Hydra config. It must contain at least the following fields: - - ``data_domain`` : str + - ``data_domain`` : str - ``data_type`` : str - ``data_name`` : str - ``data_dir`` : str