Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,14 @@ run_hom_cora_sup_gs_e2e_test:
--test_spec_uri="testing/e2e_tests/e2e_tests.yaml" \
--test_names="hom_cora_sup_gs_test"

run_het_dblp_sup_gs_e2e_test: compiled_pipeline_path:=${GIGL_E2E_TEST_COMPILED_PIPELINE_PATH}
run_het_dblp_sup_gs_e2e_test: compile_gigl_kubeflow_pipeline
run_het_dblp_sup_gs_e2e_test:
uv run python testing/e2e_tests/e2e_test.py \
--compiled_pipeline_path=$(compiled_pipeline_path) \
--test_spec_uri="testing/e2e_tests/e2e_tests.yaml" \
--test_names="het_dblp_sup_gs_test"

run_all_e2e_tests: compiled_pipeline_path:=${GIGL_E2E_TEST_COMPILED_PIPELINE_PATH}
run_all_e2e_tests: compile_gigl_kubeflow_pipeline
run_all_e2e_tests:
Expand Down Expand Up @@ -280,7 +288,7 @@ _skip_build_deps:
# job_name=... \ , and other params
# compiled_pipeline_path="/tmp/gigl/my_pipeline.yaml" \
# run_dev_gnn_kubeflow_pipeline
run_dev_gnn_kubeflow_pipeline: $(if $(compiled_pipeline_path), _skip_build_deps, compile_jars push_new_docker_images)
run_dev_gnn_kubeflow_pipeline: $(if $(compiled_pipeline_path), _skip_build_deps, push_new_docker_images)
uv run python -m gigl.orchestration.kubeflow.runner \
$(if $(compiled_pipeline_path),,--container_image_cuda=${DOCKER_IMAGE_MAIN_CUDA_NAME_WITH_TAG}) \
$(if $(compiled_pipeline_path),,--container_image_cpu=${DOCKER_IMAGE_MAIN_CPU_NAME_WITH_TAG}) \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# This config is used to run heterogeneous DBLP self-supervised training and inference using in memory GiGL SGS. This can be run with `make run_het_dblp_sup_test`.
graphMetadata:
# We have 3 nodes types in the DBLP Dataset: author, paper, and term. We also have 3
# edge types: author -> paper, paper -> author, and term -> paper
edgeTypes:
- dstNodeType: paper
relation: to
srcNodeType: author
- dstNodeType: author
relation: to
srcNodeType: paper
- dstNodeType: paper
relation: to
srcNodeType: term
nodeTypes:
- author
- paper
- term
taskMetadata:
nodeAnchorBasedLinkPredictionTaskMetadata:
# We aim to predict paper -> author links in the graph.
supervisionEdgeTypes:
- dstNodeType: author
relation: to
srcNodeType: paper
datasetConfig:
dataPreprocessorConfig:
dataPreprocessorConfigClsPath: gigl.src.mocking.mocking_assets.passthrough_preprocessor_config_for_mocked_assets.PassthroughPreprocessorConfigForMockedAssets
dataPreprocessorArgs:
# This argument is specific for the `PassthroughPreprocessorConfigForMockedAssets` preprocessor to indicate which dataset we should be using
mocked_dataset_name: 'dblp_node_anchor_edge_features_lp'
# TODO(kmonte): Add GS trainer
trainerConfig:
trainerArgs:
# Example argument to trainer
log_every_n_batch: "50"
# The DBLP Dataset does not have specified labeled edges so we provide this field to indicate what
# percentage of edges we should select as self-supervised labeled edges. Doing this randomly sets 5% as "labels".
# Note that the current GiGL implementation does not remove these selected edges from the global set of edges, which may
# have a slight negative impact on training specifically with self-supervised learning. This will improved on in the future.
ssl_positive_label_percentage: "0.05"
# Example of a dictionary fanout which has different fanout-per-hop for each edge type. Currently, we assume that all anchor node types
# use the same fanout. If you want different anchor node types to have different fanouts, we encourage adding additional arguemnts here to parse
# fanouts for each anchor node type.
# Note that edge types must be provided as a tuple[str, str, str] in format (SRC_NODE_TYPE, RELATION, DST_NODE_TYPE), as demonstrated below.
num_neighbors: >-
{
("term", "to", "paper"): [10, 10],
("paper", "to", "author"): [15, 15],
("author", "to", "paper"): [20, 20]
}
command: python -m examples.link_prediction.heterogeneous_training
# TODO(kmonte): Move to user-defined server code
inferencerConfig:
inferencerArgs:
# Example argument to inferencer
log_every_n_batch: "50"
# Example of a dictionary fanout which has different fanout-per-hop for each edge type. Currently, we assume that all anchor node types
# use the same fanout. If you want different anchor node types to have different fanouts, we encourage adding additional arguemnts here to parse
# fanouts for each anchor node type.
# Note that edge types must be provided as a tuple[str, str, str] in format (SRC_NODE_TYPE, RELATION, DST_NODE_TYPE), as demonstrated below.
num_neighbors: >-
{
("term", "to", "paper"): [10, 10],
("paper", "to", "author"): [15, 15],
("author", "to", "paper"): [20, 20]
}
inferenceBatchSize: 512
command: python -m examples.link_prediction.graph_store.heterogeneous_inference
sharedConfig:
shouldSkipAutomaticTempAssetCleanup: false
shouldSkipInference: false
# Model Evaluation is currently only supported for tabularized SGS GiGL pipelines. This will soon be added for in-mem SGS GiGL pipelines.
shouldSkipModelEvaluation: true
featureFlags:
should_run_glt_backend: 'True'
Loading