Snapchat · kmontemayor2-sc · Jan 21, 2026 · Jan 21, 2026 · Jan 21, 2026 · Jan 21, 2026
@@ -242,6 +242,14 @@ run_hom_cora_sup_gs_e2e_test:
 		--test_spec_uri="testing/e2e_tests/e2e_tests.yaml" \
 		--test_names="hom_cora_sup_gs_test"
 
+run_het_dblp_sup_gs_e2e_test: compiled_pipeline_path:=${GIGL_E2E_TEST_COMPILED_PIPELINE_PATH}
+run_het_dblp_sup_gs_e2e_test: compile_gigl_kubeflow_pipeline
+run_het_dblp_sup_gs_e2e_test:
+	uv run python testing/e2e_tests/e2e_test.py \
+		--compiled_pipeline_path=$(compiled_pipeline_path) \
+		--test_spec_uri="testing/e2e_tests/e2e_tests.yaml" \
+		--test_names="het_dblp_sup_gs_test"
+
 run_all_e2e_tests: compiled_pipeline_path:=${GIGL_E2E_TEST_COMPILED_PIPELINE_PATH}
 run_all_e2e_tests: compile_gigl_kubeflow_pipeline
 run_all_e2e_tests:
@@ -280,7 +288,7 @@ _skip_build_deps:
 # 	job_name=... \ , and other params
 # 	compiled_pipeline_path="/tmp/gigl/my_pipeline.yaml" \
 # 	run_dev_gnn_kubeflow_pipeline
-run_dev_gnn_kubeflow_pipeline: $(if $(compiled_pipeline_path), _skip_build_deps, compile_jars push_new_docker_images)
+run_dev_gnn_kubeflow_pipeline: $(if $(compiled_pipeline_path), _skip_build_deps, push_new_docker_images)
 	uv run python -m gigl.orchestration.kubeflow.runner \
 		$(if $(compiled_pipeline_path),,--container_image_cuda=${DOCKER_IMAGE_MAIN_CUDA_NAME_WITH_TAG}) \
 		$(if $(compiled_pipeline_path),,--container_image_cpu=${DOCKER_IMAGE_MAIN_CPU_NAME_WITH_TAG}) \

@@ -0,0 +1,76 @@
+# This config is used to run heterogeneous DBLP self-supervised training and inference using in memory GiGL SGS. This can be run with `make run_het_dblp_sup_test`.
+graphMetadata:
+  # We have 3 nodes types in the DBLP Dataset: author, paper, and term. We also have 3
+  # edge types: author -> paper, paper -> author, and term -> paper
+  edgeTypes:
+  - dstNodeType: paper
+    relation: to
+    srcNodeType: author
+  - dstNodeType: author
+    relation: to
+    srcNodeType: paper
+  - dstNodeType: paper
+    relation: to
+    srcNodeType: term
+  nodeTypes:
+  - author
+  - paper
+  - term
+taskMetadata:
+  nodeAnchorBasedLinkPredictionTaskMetadata:
+    # We aim to predict paper -> author links in the graph.
+    supervisionEdgeTypes:
+    - dstNodeType: author
+      relation: to
+      srcNodeType: paper
+datasetConfig:
+  dataPreprocessorConfig:
+    dataPreprocessorConfigClsPath: gigl.src.mocking.mocking_assets.passthrough_preprocessor_config_for_mocked_assets.PassthroughPreprocessorConfigForMockedAssets
+    dataPreprocessorArgs:
+      # This argument is specific for the `PassthroughPreprocessorConfigForMockedAssets` preprocessor to indicate which dataset we should be using
+      mocked_dataset_name: 'dblp_node_anchor_edge_features_lp'
+# TODO(kmonte): Add GS trainer
+trainerConfig:
+  trainerArgs:
+    # Example argument to trainer
+    log_every_n_batch: "50"
+    # The DBLP Dataset does not have specified labeled edges so we provide this field to indicate what
+    # percentage of edges we should select as self-supervised labeled edges. Doing this randomly sets 5% as "labels".
+    # Note that the current GiGL implementation does not remove these selected edges from the global set of edges, which may
+    # have a slight negative impact on training specifically with self-supervised learning. This will improved on in the future.
+    ssl_positive_label_percentage: "0.05"
+    # Example of a dictionary fanout which has different fanout-per-hop for each edge type. Currently, we assume that all anchor node types
+    # use the same fanout. If you want different anchor node types to have different fanouts, we encourage adding additional arguemnts here to parse
+    # fanouts for each anchor node type.
+    # Note that edge types must be provided as a tuple[str, str, str] in format (SRC_NODE_TYPE, RELATION, DST_NODE_TYPE), as demonstrated below.
+    num_neighbors: >-
+      {
+        ("term", "to", "paper"): [10, 10],
+        ("paper", "to", "author"): [15, 15],
+        ("author", "to", "paper"): [20, 20]
+      }
+  command: python -m examples.link_prediction.heterogeneous_training
+# TODO(kmonte): Move to user-defined server code
+inferencerConfig:
+  inferencerArgs:
+    # Example argument to inferencer
+    log_every_n_batch: "50"
+    # Example of a dictionary fanout which has different fanout-per-hop for each edge type. Currently, we assume that all anchor node types
+    # use the same fanout. If you want different anchor node types to have different fanouts, we encourage adding additional arguemnts here to parse
+    # fanouts for each anchor node type.
+    # Note that edge types must be provided as a tuple[str, str, str] in format (SRC_NODE_TYPE, RELATION, DST_NODE_TYPE), as demonstrated below.
+    num_neighbors: >-
+      {
+        ("term", "to", "paper"): [10, 10],
+        ("paper", "to", "author"): [15, 15],
+        ("author", "to", "paper"): [20, 20]
+      }
+  inferenceBatchSize: 512
+  command: python -m examples.link_prediction.graph_store.heterogeneous_inference
+sharedConfig:
+  shouldSkipAutomaticTempAssetCleanup: false
+  shouldSkipInference: false
+  # Model Evaluation is currently only supported for tabularized SGS GiGL pipelines. This will soon be added for in-mem SGS GiGL pipelines.
+  shouldSkipModelEvaluation: true
+featureFlags:
+  should_run_glt_backend: 'True'