Snapchat · svij-sc · Jan 24, 2026 · Jan 24, 2026
@@ -56,7 +56,7 @@ fossa*.zip
 
 # ==================== Miscellaneous folders
 proto/
-examples/MAG240M/downloads/
+gigl/examples/MAG240M/downloads/
 graphlearn_torch/
 graphlearn_torch.egg-info/
 do_not_open_source

@@ -21,7 +21,7 @@ DOCKER_IMAGE_MAIN_CUDA_NAME_WITH_TAG?=${DOCKER_IMAGE_MAIN_CUDA_NAME}:${DATE}
 DOCKER_IMAGE_MAIN_CPU_NAME_WITH_TAG?=${DOCKER_IMAGE_MAIN_CPU_NAME}:${DATE}
 DOCKER_IMAGE_DEV_WORKBENCH_NAME_WITH_TAG?=${DOCKER_IMAGE_DEV_WORKBENCH_NAME}:${DATE}
 
-PYTHON_DIRS:=.github/scripts examples gigl tests snapchat scripts testing
+PYTHON_DIRS:=.github/scripts gigl tests snapchat scripts testing
 PY_TEST_FILES?="*_test.py"
 # You can override GIGL_TEST_DEFAULT_RESOURCE_CONFIG by setting it in your environment i.e.
 # adding `export GIGL_TEST_DEFAULT_RESOURCE_CONFIG=your_resource_config` to your shell config (~/.bashrc, ~/.zshrc, etc.)
@@ -270,8 +270,8 @@ _skip_build_deps:
 # make \
   job_name="{alias}_run_dev_mag240m_kfp_pipeline" \
   start_at="config_populator" \
-  task_config_uri="examples/MAG240M/task_config.yaml" \
-  resource_config_uri="examples/MAG240M/resource_config.yaml" \
+  task_config_uri="gigl/examples/MAG240M/task_config.yaml" \
+  resource_config_uri="gigl/examples/MAG240M/resource_config.yaml" \
   run_dev_gnn_kubeflow_pipeline
 # If you have precompiled to some specified poth using `make compile_gigl_kubeflow_pipeline`
 # You can use it here instead of re-compiling by setting `compiled_pipeline_path`

@@ -68,7 +68,6 @@
     "snapchat/**",
     "snapchat/**",
     "index.rst",
-    "examples/**",
 ]
 
 autodoc_default_options = {

@@ -17,6 +17,5 @@ COPY deployment deployment
 COPY gigl gigl
 COPY snapchat snapchat
 COPY tests tests
-COPY examples examples
 
 RUN uv pip install -e .
@@ -26,7 +26,7 @@ We will use the MAG240M task config to walk you through what a config may look l
 <details>
 <summary><bold>Full task config for reference:</bold></summary>
 
-```{literalinclude} ../../../examples/MAG240M/task_config.yaml
+```{literalinclude} ../../../gigl/examples/MAG240M/task_config.yaml
 :language: yaml
 ```
 
@@ -40,7 +40,7 @@ one edge type: `(paper_or_author, references, paper_or_author)`
 Note: In this example we have converted the hetrogeneous MAG240M dataset to a homogeneous one with just one edge and one
 node; which we will be doing self supervised learning on.
 
-```{literalinclude} ../../../examples/MAG240M/task_config.yaml
+```{literalinclude} ../../../gigl/examples/MAG240M/task_config.yaml
 :language: yaml
 :start-after: GraphMetadata
 :end-before: ========
@@ -52,7 +52,7 @@ Now we specify what type of learning task we want to do. In this case we want to
 Prediction to do self supervised learning on the edge: `(paper_or_author, references, paper_or_author)`. Thus, we are
 using the `NodeAnchorBasedLinkPredictionTaskMetadata` task.
 
-```{literalinclude} ../../../examples/MAG240M/task_config.yaml
+```{literalinclude} ../../../gigl/examples/MAG240M/task_config.yaml
 :language: yaml
 :start-after: TaskMetadata
 :end-before: ========
@@ -67,7 +67,7 @@ An example of `NodeBasedTaskMetadata` can be found in `gigl/src/mocking/configs/
 Shared config are parameters that are common and may be used across multiple components i.e. Trainer, Inferencer,
 SubgraphSampler, etc.
 
-```{literalinclude} ../../../examples/MAG240M/task_config.yaml
+```{literalinclude} ../../../gigl/examples/MAG240M/task_config.yaml
 :language: yaml
 :start-after: SharedConfig
 :end-before: ========
@@ -87,7 +87,7 @@ Once we have the data preprocessed, we will be tabularizing the data with the us
 Subsequently, we will be creating test/train/val splits based on the %'s specified, using
 [Split Generator](../overview/components/split_generator.md)
 
-```{literalinclude} ../../../examples/MAG240M/task_config.yaml
+```{literalinclude} ../../../gigl/examples/MAG240M/task_config.yaml
 :language: yaml
 :start-after: DatasetConfig
 :end-before: ========
@@ -102,7 +102,7 @@ defined @ {py:class}`gigl.src.training.v1.lib.base_trainer.BaseTrainer`.
 Some common sense pre-configured trainer implementations can be found in
 {py:class}`gigl.src.common.modeling_task_specs`. Although, you are recommended to implement your own.
 
-```{literalinclude} ../../../examples/MAG240M/task_config.yaml
+```{literalinclude} ../../../gigl/examples/MAG240M/task_config.yaml
 :language: yaml
 :start-after: TrainerConfig
 :end-before: ========
@@ -114,7 +114,7 @@ Similar to Trainer, the class specified by `inferencerClsPath` will be initializ
 `inferencerArgs` will be directly passed in `**kwargs` to your inferencer class. The only requirement is the inferencer
 class implement the protocol defined @ {py:class}`gigl.src.inference.v1.lib.base_inferencer.BaseInferencer`
 
-```{literalinclude} ../../../examples/MAG240M/task_config.yaml
+```{literalinclude} ../../../gigl/examples/MAG240M/task_config.yaml
 :language: yaml
 :start-after: InferencerConfig
 :end-before: ========

@@ -6,7 +6,7 @@ maxdepth: 2
 caption: Examples
 ---
 
-../../../examples/toy_visual_example/toy_example_walkthrough.ipynb
-../../../examples/link_prediction/README.md
-../../../examples/MAG240M/README.md
+../../../gigl/examples/toy_visual_example/toy_example_walkthrough.ipynb
+../../../gigl/examples/link_prediction/README.md
+../../../gigl/examples/MAG240M/README.md
 ```
@@ -103,4 +103,4 @@ data, and general customization:
   [components page](../overview/architecture.md)
 
 - **Examples**: For easy references and make your next steps easier, various example walkthroughs are available on the
-  examples page. See [here](../examples/index.md)
+  examples page. See [here](../gigl/examples/index.md)
@@ -6,7 +6,7 @@
    "source": [
     "# (Optional) Fetch MAG240M Data into your own project\n",
     "\n",
-    "Latest version of this notebook can be found on [github](https://github.com/Snapchat/GiGL/blob/main/examples/MAG240M/fetch_data.ipynb)\n"
+    "Latest version of this notebook can be found on [github](https://github.com/Snapchat/GiGL/blob/main/gigl/examples/MAG240M/fetch_data.ipynb)\n"
    ]
   },
   {

@@ -53,8 +53,8 @@
     "\n",
     "# Firstly, let's give your job a name and ensure that the resource and task configs exist and can be loaded\n",
     "JOB_NAME = \"test_mag240m\"\n",
-    "TEMPLATE_TASK_CONFIG_URI = LocalUri(\"examples/MAG240M/task_config.yaml\")\n",
-    "RESOURCE_CONFIG_URI = LocalUri(\"examples/MAG240M/resource_config.yaml\")\n",
+    "TEMPLATE_TASK_CONFIG_URI = LocalUri(\"gigl/examples/MAG240M/task_config.yaml\")\n",
+    "RESOURCE_CONFIG_URI = LocalUri(\"gigl/examples/MAG240M/resource_config.yaml\")\n",
     "\n",
     "TEMPLATE_TASK_CONFIG: GbmlConfigPbWrapper = GbmlConfigPbWrapper.get_gbml_config_pb_wrapper_from_uri(gbml_config_uri=TEMPLATE_TASK_CONFIG_URI)\n",
     "RESOURCE_CONFIG: GiglResourceConfigWrapper = get_resource_config(resource_config_uri=RESOURCE_CONFIG_URI)\n",

@@ -4,18 +4,18 @@
 
 import tensorflow as tf
 import tensorflow_transform as tft
-from examples.MAG240M.common import NUM_PAPER_FEATURES, TOTAL_NUM_PAPERS
-from examples.MAG240M.queries import (
+from google.cloud.bigquery.job import WriteDisposition
+
+from gigl.common.logger import Logger
+from gigl.env.pipelines_config import get_resource_config
+from gigl.examples.MAG240M.common import NUM_PAPER_FEATURES, TOTAL_NUM_PAPERS
+from gigl.examples.MAG240M.queries import (
     query_template_cast_to_homogeneous_edge_table,
     query_template_cast_to_intermediary_homogeneous_node_table,
     query_template_computed_node_degree_table,
     query_template_generate_homogeneous_node_table,
     query_template_reindex_author_writes_paper_table,
 )
-from google.cloud.bigquery.job import WriteDisposition
-
-from gigl.common.logger import Logger
-from gigl.env.pipelines_config import get_resource_config
 from gigl.src.common.types import AppliedTaskIdentifier
 from gigl.src.common.types.graph_data import EdgeType, EdgeUsageType, NodeType, Relation
 from gigl.src.common.types.pb_wrappers.gigl_resource_config import (

@@ -1,11 +1,11 @@
 from __future__ import annotations
 
-from examples.MAG240M.common import NUM_PAPER_FEATURES
-from examples.MAG240M.queries import query_template_compute_average_features
 from google.cloud.bigquery.job import WriteDisposition
 
 from gigl.common.logger import Logger
 from gigl.env.pipelines_config import get_resource_config
+from gigl.examples.MAG240M.common import NUM_PAPER_FEATURES
+from gigl.examples.MAG240M.queries import query_template_compute_average_features
 from gigl.src.common.types import AppliedTaskIdentifier
 from gigl.src.common.types.graph_data import EdgeType, EdgeUsageType, NodeType, Relation
 from gigl.src.common.types.pb_wrappers.gigl_resource_config import (

@@ -1,4 +1,4 @@
-from examples.MAG240M.common import NUM_PAPER_FEATURES
+from gigl.examples.MAG240M.common import NUM_PAPER_FEATURES
 
 query_template_reindex_author_writes_paper_table = """
 -- Firstly, we reindex the author to the same node space as papers

@@ -59,7 +59,7 @@ trainerConfig:
     out_dim: "256"
     val_every_n_batch: "1000" # Decrease this number to do more frequent validation
     learning_rate: "0.005"
-  command: python -m examples.link_prediction.heterogeneous_training
+  command: python -m gigl.examples.link_prediction.heterogeneous_training
 # ========
 # InferencerConfig:
 # specifies the inference configuration. This includes the command and the arguments to pass to it
@@ -71,7 +71,7 @@ inferencerConfig:
     hid_dim: "256"
     out_dim: "256"
   inferenceBatchSize: 256 # Reduce batch size if Cuda OOM
-  command: python -m examples.link_prediction.heterogeneous_inference
+  command: python -m gigl.examples.link_prediction.heterogeneous_inference
 # ========
 # FeatureFlags:
 # any additional flags which we should specify for the training + inference job. We currently use this to

@@ -48,7 +48,7 @@ trainerConfig:
         ("paper", "to", "author"): [15, 15],
         ("author", "to", "paper"): [20, 20]
       }
-  command: python -m examples.link_prediction.heterogeneous_training
+  command: python -m gigl.examples.link_prediction.heterogeneous_training
 inferencerConfig:
   inferencerArgs:
     # Example argument to inferencer
@@ -64,7 +64,7 @@ inferencerConfig:
         ("author", "to", "paper"): [20, 20]
       }
   inferenceBatchSize: 512
-  command: python -m examples.link_prediction.heterogeneous_inference
+  command: python -m gigl.examples.link_prediction.heterogeneous_inference
 sharedConfig:
   shouldSkipAutomaticTempAssetCleanup: false
   shouldSkipInference: false

@@ -17,14 +17,14 @@ trainerConfig:
     # Example argument to trainer
     log_every_n_batch: "50" # Frequency in which we log batch information
     num_neighbors: "[10, 10]" # Fanout per hop, specified as a string representation of a list for the homogeneous use case
-  command: python -m examples.link_prediction.homogeneous_training
+  command: python -m gigl.examples.link_prediction.homogeneous_training
 inferencerConfig:
   inferencerArgs:
     # Example argument to inferencer
     log_every_n_batch: "50" # Frequency in which we log batch information
     num_neighbors: "[10, 10]" # Fanout per hop, specified as a string representation of a list for the homogeneous use case
   inferenceBatchSize: 512
-  command: python -m examples.link_prediction.homogeneous_inference
+  command: python -m gigl.examples.link_prediction.homogeneous_inference
 sharedConfig:
   shouldSkipAutomaticTempAssetCleanup: false
   shouldSkipInference: false

@@ -79,11 +79,11 @@
     "\n",
     "# Firstly, let's give your job a name and ensure that the resource and task configs exist and can be loaded\n",
     "JOB_NAME = f\"{getpass.getuser()}_gigl_cora_{curr_datetime}\"\n",
-    "TEMPLATE_TASK_CONFIG_URI = LocalUri(\"examples/link_prediction/configs/e2e_hom_cora_sup_task_config.yaml\")\n",
+    "TEMPLATE_TASK_CONFIG_URI = LocalUri(\"gigl/examples/link_prediction/configs/e2e_hom_cora_sup_task_config.yaml\")\n",
     "\n",
     "# Respect the environment variable for resource config URI\n",
     "# if not, set it to some default value.\n",
-    "RESOURCE_CONFIG_URI = LocalUri(os.environ.get(\"GIGL_TEST_DEFAULT_RESOURCE_CONFIG\", \"examples/link_prediction/configs/example_resource_config.yaml\"))\n",
+    "RESOURCE_CONFIG_URI = LocalUri(os.environ.get(\"GIGL_TEST_DEFAULT_RESOURCE_CONFIG\", \"gigl/examples/link_prediction/configs/example_resource_config.yaml\"))\n",
     "print(f\"Using resource config URI: {RESOURCE_CONFIG_URI}\")\n",
     "\n",
     "TEMPLATE_TASK_CONFIG: GbmlConfigPbWrapper = GbmlConfigPbWrapper.get_gbml_config_pb_wrapper_from_uri(gbml_config_uri=TEMPLATE_TASK_CONFIG_URI)\n",
@@ -363,7 +363,7 @@
     "import torch\n",
     "from torch_geometric.data import Data\n",
     "\n",
-    "from examples.link_prediction.models import init_example_gigl_homogeneous_model\n",
+    "from gigl.examples.link_prediction.models import init_example_gigl_homogeneous_model\n",
     "from gigl.common import UriFactory\n",
     "from gigl.src.common.utils.model import load_state_dict_from_uri\n",
     "\n",

@@ -79,10 +79,10 @@
     "\n",
     "# Firstly, let's give your job a name and ensure that the resource and task configs exist and can be loaded\n",
     "JOB_NAME = f\"{getpass.getuser()}_gigl_dblp_{curr_datetime}\"\n",
-    "TEMPLATE_TASK_CONFIG_URI = LocalUri(\"examples/link_prediction/configs/e2e_het_dblp_sup_task_config.yaml\")\n",
+    "TEMPLATE_TASK_CONFIG_URI = LocalUri(\"gigl/examples/link_prediction/configs/e2e_het_dblp_sup_task_config.yaml\")\n",
     "# Respect the environment variable for resource config URI\n",
     "# if not, set it to some default value.\n",
-    "RESOURCE_CONFIG_URI = LocalUri(os.environ.get(\"GIGL_TEST_DEFAULT_RESOURCE_CONFIG\", \"examples/link_prediction/configs/example_resource_config.yaml\"))\n",
+    "RESOURCE_CONFIG_URI = LocalUri(os.environ.get(\"GIGL_TEST_DEFAULT_RESOURCE_CONFIG\", \"gigl/examples/link_prediction/configs/example_resource_config.yaml\"))\n",
     "print(f\"Using resource config URI: {RESOURCE_CONFIG_URI}\")\n",
     "\n",
     "TEMPLATE_TASK_CONFIG: GbmlConfigPbWrapper = GbmlConfigPbWrapper.get_gbml_config_pb_wrapper_from_uri(gbml_config_uri=TEMPLATE_TASK_CONFIG_URI)\n",
@@ -362,7 +362,7 @@
     "import torch\n",
     "from torch_geometric.data import HeteroData\n",
     "\n",
-    "from examples.link_prediction.models import init_example_gigl_heterogeneous_model\n",
+    "from gigl.examples.link_prediction.models import init_example_gigl_heterogeneous_model\n",
     "from gigl.common import UriFactory\n",
     "from gigl.src.common.utils.model import load_state_dict_from_uri\n",
     "from gigl.src.common.types.graph_data import EdgeType, NodeType\n",

@@ -21,15 +21,15 @@ trainerConfig:
     # Example argument to trainer
     log_every_n_batch: "50" # Frequency in which we log batch information
     num_neighbors: "[10, 10]" # Fanout per hop, specified as a string representation of a list for the homogeneous use case
-  command: python -m examples.link_prediction.homogeneous_training
+  command: python -m gigl.examples.link_prediction.homogeneous_training
 # TODO(kmonte): Move to user-defined server code
 inferencerConfig:
   inferencerArgs:
     # Example argument to inferencer
     log_every_n_batch: "50" # Frequency in which we log batch information
     num_neighbors: "[10, 10]" # Fanout per hop, specified as a string representation of a list for the homogeneous use case
   inferenceBatchSize: 512
-  command: python -m examples.link_prediction.graph_store.homogeneous_inference
+  command: python -m gigl.examples.link_prediction.graph_store.homogeneous_inference
 sharedConfig:
   shouldSkipInference: false
   # Model Evaluation is currently only supported for tabularized SGS GiGL pipelines. This will soon be added for in-mem SGS GiGL pipelines.

@@ -14,7 +14,7 @@
   - Better memory utilization (graph data stays on storage nodes)
   - Cost optimization by using appropriate hardware for each role
 
-In contrast, the standard inference mode (see `examples/link_prediction/homogeneous_inference.py`)
+In contrast, the standard inference mode (see `gigl/examples/link_prediction/homogeneous_inference.py`)
 uses a homogeneous cluster where each machine handles both graph storage and computation.
 
 Key Implementation Differences:
@@ -67,7 +67,7 @@
     # Example argument to inferencer
     log_every_n_batch: "50"
   inferenceBatchSize: 512
-  command: python -m examples.link_prediction.graph_store.homogeneous_inference
+  command: python -m gigl.examples.link_prediction.graph_store.homogeneous_inference
 featureFlags:
   should_run_glt_backend: 'True'
 
@@ -85,7 +85,6 @@
 
 import torch
 import torch.multiprocessing as mp
-from examples.link_prediction.models import init_example_gigl_homogeneous_model
 
 import gigl.distributed
 import gigl.distributed.utils
@@ -97,6 +96,7 @@
 from gigl.distributed.graph_store.remote_dist_dataset import RemoteDistDataset
 from gigl.distributed.utils import get_graph_store_info
 from gigl.env.distributed import GraphStoreInfo
+from gigl.examples.link_prediction.models import init_example_gigl_homogeneous_model
 from gigl.nn import LinkPredictionGNN
 from gigl.src.common.types import AppliedTaskIdentifier
 from gigl.src.common.types.graph_data import NodeType

@@ -12,7 +12,7 @@
     # Example argument to inferencer
     log_every_n_batch: "50"
   inferenceBatchSize: 512
-  command: python -m examples.link_prediction.heterogeneous_inference
+  command: python -m gigl.examples.link_prediction.heterogeneous_inference
 featureFlags:
   should_run_glt_backend: 'True'
 
@@ -27,7 +27,6 @@
 import torch
 import torch.distributed
 import torch.multiprocessing as mp
-from examples.link_prediction.models import init_example_gigl_heterogeneous_model
 
 import gigl.distributed
 import gigl.distributed.utils
@@ -36,6 +35,7 @@
 from gigl.common.logger import Logger
 from gigl.common.utils.gcs import GcsUtils
 from gigl.distributed import DistDataset, build_dataset_from_task_config_uri
+from gigl.examples.link_prediction.models import init_example_gigl_heterogeneous_model
 from gigl.nn import LinkPredictionGNN
 from gigl.src.common.types import AppliedTaskIdentifier
 from gigl.src.common.types.graph_data import EdgeType, NodeType
@@ -100,7 +100,7 @@ def _inference_process(
     # to each edge type in the graph, or as string of format dict[(tuple[str, str, str])), list[int]] which will specify fanouts per edge type.
     # In the case of the latter, the keys should be specified with format (SRC_NODE_TYPE, RELATION, DST_NODE_TYPE).
     # For the default example, we make a decision to keep the fanouts for all edge types the same, specifying the `fanout` with a `list[int]`.
-    # To see an example of a 'fanout' with different behaviors per edge type, refer to `examples/link_prediction.configs/e2e_het_dblp_sup_task_config.yaml`.
+    # To see an example of a 'fanout' with different behaviors per edge type, refer to `gigl/examples/link_prediction.configs/e2e_het_dblp_sup_task_config.yaml`.
 
     fanout = inferencer_args.get("num_neighbors", "[10, 10]")
     num_neighbors = parse_fanout(fanout)