From 89d4ff08e2ce0945b6d2b215c0bb9ca8facd2fed Mon Sep 17 00:00:00 2001 From: Carl Hvarfner Date: Fri, 19 Dec 2025 03:25:48 -0800 Subject: [PATCH] optimization_trace to return noiseless function_values instead of noisy Summary: Changing optimization trace to return underlying noiseless value instead of noisy. Differential Revision: D89407340 Privacy Context Container: L1307644 --- ax/benchmark/benchmark.py | 21 +++++++++++---------- ax/benchmark/benchmark_runner.py | 1 - ax/benchmark/benchmark_trial_metadata.py | 14 ++++++++------ ax/benchmark/tests/test_benchmark_metric.py | 2 ++ ax/benchmark/tests/test_benchmark_runner.py | 1 + ax/service/utils/best_point.py | 5 ++++- 6 files changed, 26 insertions(+), 18 deletions(-) diff --git a/ax/benchmark/benchmark.py b/ax/benchmark/benchmark.py index 959d2f07390..2436bdc381a 100644 --- a/ax/benchmark/benchmark.py +++ b/ax/benchmark/benchmark.py @@ -806,7 +806,7 @@ def get_opt_trace_by_steps(experiment: Experiment) -> npt.NDArray: "Cumulative epochs not supported for problems with outcome constraints." ) - objective_name = optimization_config.objective.metric.name + objective_name: str = optimization_config.objective.metric.name data = assert_is_instance(experiment.lookup_data(), MapData) map_df = data.map_df @@ -815,39 +815,40 @@ def get_opt_trace_by_steps(experiment: Experiment) -> npt.NDArray: # to know which actually ran def _get_df(trial: Trial) -> pd.DataFrame: """ - Get the (virtual) time each epoch finished at. + Get the (virtual) time each epoch finished at, along with the ground + truth values (Y_true). """ metadata = trial.run_metadata["benchmark_metadata"] backend_simulator = none_throws(metadata.backend_simulator) - # Data for the first metric, which is the only metric - df = next(iter(metadata.dfs.values())) + # Get the DataFrame for the objective metric + df = metadata.dfs[objective_name].copy() start_time = backend_simulator.get_sim_trial_by_index( trial.index ).sim_start_time df["time"] = df["virtual runtime"] + start_time return df - with_timestamps = pd.concat( + with_timestamps_and_y_true = pd.concat( ( _get_df(trial=assert_is_instance(trial, Trial)) for trial in experiment.trials.values() ), axis=0, ignore_index=True, - )[["trial_index", MAP_KEY, "time"]] + )[["trial_index", MAP_KEY, "time", "Y_true"]] df = ( map_df.loc[ map_df["metric_name"] == objective_name, - ["trial_index", "arm_name", "mean", MAP_KEY], + ["trial_index", "arm_name", MAP_KEY], ] - .merge(with_timestamps, how="left") + .merge(with_timestamps_and_y_true, how="left") .sort_values("time", ignore_index=True) ) return ( - df["mean"].cummin() + df["Y_true"].cummin() if optimization_config.objective.minimize - else df["mean"].cummax() + else df["Y_true"].cummax() ).to_numpy() diff --git a/ax/benchmark/benchmark_runner.py b/ax/benchmark/benchmark_runner.py index 765d413452f..9a0c4152309 100644 --- a/ax/benchmark/benchmark_runner.py +++ b/ax/benchmark/benchmark_runner.py @@ -303,7 +303,6 @@ def run(self, trial: BaseTrial) -> dict[str, BenchmarkTrialMetadata]: df=df, noise_stds=self.get_noise_stds(), arm_weights=arm_weights ) df["trial_index"] = trial.index - df.drop(columns=["Y_true"], inplace=True) df["metric_signature"] = df["metric_name"] if self.simulated_backend_runner is not None: diff --git a/ax/benchmark/benchmark_trial_metadata.py b/ax/benchmark/benchmark_trial_metadata.py index 52b3db216d7..db5b1898d0a 100644 --- a/ax/benchmark/benchmark_trial_metadata.py +++ b/ax/benchmark/benchmark_trial_metadata.py @@ -20,12 +20,14 @@ class BenchmarkTrialMetadata: Args: df: A dict mapping each metric name to a Pandas DataFrame with columns - ["metric_name", "arm_name", "mean", "sem", and "step"]. The "sem" is - always present in this df even if noise levels are unobserved; - ``BenchmarkMetric`` and ``BenchmarkMapMetric`` hide that data if it - should not be observed, and ``BenchmarkMapMetric``s drop data from - time periods that that are not observed based on the (simulated) - trial progression. + ["metric_name", "arm_name", "mean", "sem", "Y_true", and "step"]. The + "sem" is always present in this df even if noise levels are + unobserved; ``BenchmarkMetric`` and ``BenchmarkMapMetric`` hide that + data if it should not be observed, and ``BenchmarkMapMetric``s drop + data from time periods that that are not observed based on the + (simulated) trial progression. The "Y_true" column contains the + ground-truth (noiseless) values, which are used for computing the + optimization trace. backend_simulator: Optionally, the backend simulator that is tracking the trial's status. """ diff --git a/ax/benchmark/tests/test_benchmark_metric.py b/ax/benchmark/tests/test_benchmark_metric.py index 4d4bedf8ceb..e6521c99d9a 100644 --- a/ax/benchmark/tests/test_benchmark_metric.py +++ b/ax/benchmark/tests/test_benchmark_metric.py @@ -47,6 +47,7 @@ def _get_one_step_df( "metric_name": metric_name, "mean": [1.0, 2.5] if batch else [1.0], "sem": sem, + "Y_true": [0.9, 2.4] if batch else [0.9], "trial_index": 0, "step": step, "virtual runtime": step, @@ -59,6 +60,7 @@ def _get_one_step_df( "metric_name": metric_name, "mean": [0.5, 1.5] if batch else [0.5], "sem": sem, + "Y_true": [0.4, 1.4] if batch else [0.4], "trial_index": 0, "step": step, "virtual runtime": step, diff --git a/ax/benchmark/tests/test_benchmark_runner.py b/ax/benchmark/tests/test_benchmark_runner.py index add8ae20e39..81d3247b09f 100644 --- a/ax/benchmark/tests/test_benchmark_runner.py +++ b/ax/benchmark/tests/test_benchmark_runner.py @@ -367,6 +367,7 @@ def test_heterogeneous_noise(self) -> None: "metric_signature", "mean", "sem", + "Y_true", "trial_index", "step", "virtual runtime", diff --git a/ax/service/utils/best_point.py b/ax/service/utils/best_point.py index 11c2394fc45..2733089e19b 100644 --- a/ax/service/utils/best_point.py +++ b/ax/service/utils/best_point.py @@ -844,9 +844,12 @@ def _prepare_data_for_trace( # Transform to a DataFrame with columns ["trial_index", "arm_name"] + # relevant metric names, and values being means. + # Use Y_true (ground truth) if available (benchmarking context), + # otherwise fall back to mean (production context) + value_col = "Y_true" if "Y_true" in df.columns else "mean" df_wide = ( df[df["metric_name"].isin(metrics)] - .set_index(["trial_index", "arm_name", "metric_name"])["mean"] + .set_index(["trial_index", "arm_name", "metric_name"])[value_col] .unstack(level="metric_name") ) missing_metrics = [