Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions ax/benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,7 @@ def get_opt_trace_by_steps(experiment: Experiment) -> npt.NDArray:
"Cumulative epochs not supported for problems with outcome constraints."
)

objective_name = optimization_config.objective.metric.name
objective_name: str = optimization_config.objective.metric.name
data = assert_is_instance(experiment.lookup_data(), MapData)
map_df = data.map_df

Expand All @@ -815,39 +815,40 @@ def get_opt_trace_by_steps(experiment: Experiment) -> npt.NDArray:
# to know which actually ran
def _get_df(trial: Trial) -> pd.DataFrame:
"""
Get the (virtual) time each epoch finished at.
Get the (virtual) time each epoch finished at, along with the ground
truth values (Y_true).
"""
metadata = trial.run_metadata["benchmark_metadata"]
backend_simulator = none_throws(metadata.backend_simulator)
# Data for the first metric, which is the only metric
df = next(iter(metadata.dfs.values()))
# Get the DataFrame for the objective metric
df = metadata.dfs[objective_name].copy()
start_time = backend_simulator.get_sim_trial_by_index(
trial.index
).sim_start_time
df["time"] = df["virtual runtime"] + start_time
return df

with_timestamps = pd.concat(
with_timestamps_and_y_true = pd.concat(
(
_get_df(trial=assert_is_instance(trial, Trial))
for trial in experiment.trials.values()
),
axis=0,
ignore_index=True,
)[["trial_index", MAP_KEY, "time"]]
)[["trial_index", MAP_KEY, "time", "Y_true"]]

df = (
map_df.loc[
map_df["metric_name"] == objective_name,
["trial_index", "arm_name", "mean", MAP_KEY],
["trial_index", "arm_name", MAP_KEY],
]
.merge(with_timestamps, how="left")
.merge(with_timestamps_and_y_true, how="left")
.sort_values("time", ignore_index=True)
)
return (
df["mean"].cummin()
df["Y_true"].cummin()
if optimization_config.objective.minimize
else df["mean"].cummax()
else df["Y_true"].cummax()
).to_numpy()


Expand Down
1 change: 0 additions & 1 deletion ax/benchmark/benchmark_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,6 @@ def run(self, trial: BaseTrial) -> dict[str, BenchmarkTrialMetadata]:
df=df, noise_stds=self.get_noise_stds(), arm_weights=arm_weights
)
df["trial_index"] = trial.index
df.drop(columns=["Y_true"], inplace=True)
df["metric_signature"] = df["metric_name"]

if self.simulated_backend_runner is not None:
Expand Down
14 changes: 8 additions & 6 deletions ax/benchmark/benchmark_trial_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@ class BenchmarkTrialMetadata:

Args:
df: A dict mapping each metric name to a Pandas DataFrame with columns
["metric_name", "arm_name", "mean", "sem", and "step"]. The "sem" is
always present in this df even if noise levels are unobserved;
``BenchmarkMetric`` and ``BenchmarkMapMetric`` hide that data if it
should not be observed, and ``BenchmarkMapMetric``s drop data from
time periods that that are not observed based on the (simulated)
trial progression.
["metric_name", "arm_name", "mean", "sem", "Y_true", and "step"]. The
"sem" is always present in this df even if noise levels are
unobserved; ``BenchmarkMetric`` and ``BenchmarkMapMetric`` hide that
data if it should not be observed, and ``BenchmarkMapMetric``s drop
data from time periods that that are not observed based on the
(simulated) trial progression. The "Y_true" column contains the
ground-truth (noiseless) values, which are used for computing the
optimization trace.
backend_simulator: Optionally, the backend simulator that is tracking
the trial's status.
"""
Expand Down
2 changes: 2 additions & 0 deletions ax/benchmark/tests/test_benchmark_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def _get_one_step_df(
"metric_name": metric_name,
"mean": [1.0, 2.5] if batch else [1.0],
"sem": sem,
"Y_true": [0.9, 2.4] if batch else [0.9],
"trial_index": 0,
"step": step,
"virtual runtime": step,
Expand All @@ -59,6 +60,7 @@ def _get_one_step_df(
"metric_name": metric_name,
"mean": [0.5, 1.5] if batch else [0.5],
"sem": sem,
"Y_true": [0.4, 1.4] if batch else [0.4],
"trial_index": 0,
"step": step,
"virtual runtime": step,
Expand Down
1 change: 1 addition & 0 deletions ax/benchmark/tests/test_benchmark_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,7 @@ def test_heterogeneous_noise(self) -> None:
"metric_signature",
"mean",
"sem",
"Y_true",
"trial_index",
"step",
"virtual runtime",
Expand Down
5 changes: 4 additions & 1 deletion ax/service/utils/best_point.py
Original file line number Diff line number Diff line change
Expand Up @@ -844,9 +844,12 @@ def _prepare_data_for_trace(

# Transform to a DataFrame with columns ["trial_index", "arm_name"] +
# relevant metric names, and values being means.
# Use Y_true (ground truth) if available (benchmarking context),
# otherwise fall back to mean (production context)
value_col = "Y_true" if "Y_true" in df.columns else "mean"
df_wide = (
df[df["metric_name"].isin(metrics)]
.set_index(["trial_index", "arm_name", "metric_name"])["mean"]
.set_index(["trial_index", "arm_name", "metric_name"])[value_col]
.unstack(level="metric_name")
)
missing_metrics = [
Expand Down