From 411de7c397fcbb7033cc57b5dce03ee59a25403a Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Mon, 29 Dec 2025 11:43:55 +0900 Subject: [PATCH] refactor(pypi): print a better error message for duplicate repos Before this PR the error message would not be super helpful and may potentially make it hard to debug and report errors. This PR does the following: * Add a better error message which also adds comparison of the args with which we create the whl library. * Add a test that ensures that the error message is legible and works. * Add the necessary plumbing to logger to allow for testing error messages. A proper fix requires more work, so just adding better logging and error messages may be useful here. Work towards #3479 --- python/private/pypi/BUILD.bazel | 1 + python/private/pypi/hub_builder.bzl | 70 ++++++++++++++++++-- python/private/repo_utils.bzl | 22 ++++-- tests/pypi/hub_builder/hub_builder_tests.bzl | 59 +++++++++++++++++ 4 files changed, 141 insertions(+), 11 deletions(-) diff --git a/python/private/pypi/BUILD.bazel b/python/private/pypi/BUILD.bazel index aa96cf86a5..1b4c031106 100644 --- a/python/private/pypi/BUILD.bazel +++ b/python/private/pypi/BUILD.bazel @@ -173,6 +173,7 @@ bzl_library( ":whl_repo_name_bzl", "//python/private:full_version_bzl", "//python/private:normalize_name_bzl", + "//python/private:text_util_bzl", "//python/private:version_bzl", "//python/private:version_label_bzl", ], diff --git a/python/private/pypi/hub_builder.bzl b/python/private/pypi/hub_builder.bzl index 700f22e2c0..f54d02d8b0 100644 --- a/python/private/pypi/hub_builder.bzl +++ b/python/private/pypi/hub_builder.bzl @@ -3,6 +3,7 @@ load("//python/private:full_version.bzl", "full_version") load("//python/private:normalize_name.bzl", "normalize_name") load("//python/private:repo_utils.bzl", "repo_utils") +load("//python/private:text_util.bzl", "render") load("//python/private:version.bzl", "version") load("//python/private:version_label.bzl", "version_label") load(":attrs.bzl", "use_isolated") @@ -86,6 +87,16 @@ def hub_builder( ### PUBLIC methods def _build(self): + ret = struct( + whl_map = {}, + group_map = {}, + extra_aliases = {}, + exposed_packages = [], + whl_libraries = {}, + ) + if self._logger.failed(): + return ret + whl_map = {} for key, settings in self._whl_map.items(): for setting, repo in settings.items(): @@ -196,6 +207,44 @@ def _add_extra_aliases(self, extra_hub_aliases): {alias: True for alias in aliases}, ) +def _diff_dict(first, second): + """A simple utility to shallow compare dictionaries. + + Args: + first: The first dictionary to compare. + second: The second dictionary to compare. + + Returns: + A dictionary containing the differences, with keys "common", "different", + "extra", and "missing", or None if the dictionaries are identical. + """ + missing = {} + extra = { + key: value + for key, value in second.items() + if key not in first + } + common = {} + different = {} + + for key, value in first.items(): + if key not in second: + missing[key] = value + elif value == second[key]: + common[key] = value + else: + different[key] = (value, second[key]) + + if missing or extra or different: + return { + "common": common, + "different": different, + "extra": extra, + "missing": missing, + } + else: + return None + def _add_whl_library(self, *, python_version, whl, repo, enable_pipstar): if repo == None: # NOTE @aignas 2025-07-07: we guard against an edge-case where there @@ -207,13 +256,26 @@ def _add_whl_library(self, *, python_version, whl, repo, enable_pipstar): # TODO @aignas 2025-06-29: we should not need the version in the repo_name if # we are using pipstar and we are downloading the wheel using the downloader + # + # However, for that we should first have a different way to reference closures with + # extras. For example, if some package depends on `foo[extra]` and another depends on + # `foo`, we should have 2 py_library targets. repo_name = "{}_{}_{}".format(self.name, version_label(python_version), repo.repo_name) if repo_name in self._whl_libraries: - fail("attempting to create a duplicate library {} for {}".format( - repo_name, - whl.name, - )) + diff = _diff_dict(self._whl_libraries[repo_name], repo.args) + if diff: + self._logger.fail(lambda: ( + "Attempting to create a duplicate library {repo_name} for {whl_name} with different arguments. Already existing declaration has:\n".format( + repo_name = repo_name, + whl_name = whl.name, + ) + "\n".join([ + " {}: {}".format(key, render.indent(render.dict(value)).lstrip()) + for key, value in diff.items() + if value + ]) + )) + return self._whl_libraries[repo_name] = repo.args if not enable_pipstar and "experimental_target_platforms" in repo.args: diff --git a/python/private/repo_utils.bzl b/python/private/repo_utils.bzl index 1abff36a04..28ba07d376 100644 --- a/python/private/repo_utils.bzl +++ b/python/private/repo_utils.bzl @@ -31,7 +31,7 @@ def _is_repo_debug_enabled(mrctx): """ return _getenv(mrctx, REPO_DEBUG_ENV_VAR) == "1" -def _logger(mrctx = None, name = None, verbosity_level = None): +def _logger(mrctx = None, name = None, verbosity_level = None, printer = None): """Creates a logger instance for printing messages. Args: @@ -39,7 +39,9 @@ def _logger(mrctx = None, name = None, verbosity_level = None): `_rule_name` is present, it will be included in log messages. name: name for the logger. Optional for repository_ctx usage. verbosity_level: {type}`int | None` verbosity level. If not set, - taken from `mrctx` + taken from `mrctx`. + printer: a function to use for printing. Defaults to `print` or `fail` depending + on the logging method. Returns: A struct with attributes logging: trace, debug, info, warn, fail. @@ -70,10 +72,15 @@ def _logger(mrctx = None, name = None, verbosity_level = None): elif not name: fail("The name has to be specified when using the logger with `module_ctx`") + failures = [] + def _log(enabled_on_verbosity, level, message_cb_or_str, printer = print): if verbosity < enabled_on_verbosity: return + if level == "FAIL": + failures.append(None) + if type(message_cb_or_str) == "string": message = message_cb_or_str else: @@ -86,11 +93,12 @@ def _logger(mrctx = None, name = None, verbosity_level = None): ), message) # buildifier: disable=print return struct( - trace = lambda message_cb: _log(3, "TRACE", message_cb), - debug = lambda message_cb: _log(2, "DEBUG", message_cb), - info = lambda message_cb: _log(1, "INFO", message_cb), - warn = lambda message_cb: _log(0, "WARNING", message_cb), - fail = lambda message_cb: _log(-1, "FAIL", message_cb, fail), + trace = lambda message_cb: _log(3, "TRACE", message_cb, printer or print), + debug = lambda message_cb: _log(2, "DEBUG", message_cb, printer or print), + info = lambda message_cb: _log(1, "INFO", message_cb, printer or print), + warn = lambda message_cb: _log(0, "WARNING", message_cb, printer or print), + fail = lambda message_cb: _log(-1, "FAIL", message_cb, printer or fail), + failed = lambda: len(failures) != 0, ) def _execute_internal( diff --git a/tests/pypi/hub_builder/hub_builder_tests.bzl b/tests/pypi/hub_builder/hub_builder_tests.bzl index 42c65ae8f7..03cefd13c5 100644 --- a/tests/pypi/hub_builder/hub_builder_tests.bzl +++ b/tests/pypi/hub_builder/hub_builder_tests.bzl @@ -48,6 +48,7 @@ def hub_builder( whl_overrides = {}, evaluate_markers_fn = None, simpleapi_download_fn = None, + log_printer = None, available_interpreters = {}): builder = _hub_builder( name = "pypi", @@ -96,6 +97,7 @@ def hub_builder( ), ), "unit-test", + printer = log_printer, ), ) self = struct( @@ -1245,6 +1247,63 @@ optimum[onnxruntime-gpu]==1.17.1 ; sys_platform == 'linux' _tests.append(_test_pipstar_platforms_limit) +def _test_err_duplicate_repos(env): + logs = {} + log_printer = lambda key, message: logs.setdefault(key.strip(), []).append(message) + builder = hub_builder( + env, + available_interpreters = { + "python_3_15_1_host": "unit_test_interpreter_target_1", + "python_3_15_2_host": "unit_test_interpreter_target_2", + }, + log_printer = log_printer, + ) + builder.pip_parse( + _mock_mctx( + os_name = "osx", + arch_name = "aarch64", + ), + _parse( + hub_name = "pypi", + python_version = "3.15.1", + requirements_lock = "requirements.txt", + ), + ) + builder.pip_parse( + _mock_mctx( + os_name = "osx", + arch_name = "aarch64", + ), + _parse( + hub_name = "pypi", + python_version = "3.15.2", + requirements_lock = "requirements.txt", + ), + ) + pypi = builder.build() + + pypi.exposed_packages().contains_exactly([]) + pypi.group_map().contains_exactly({}) + pypi.whl_map().contains_exactly({}) + pypi.whl_libraries().contains_exactly({}) + pypi.extra_aliases().contains_exactly({}) + env.expect.that_dict(logs).keys().contains_exactly(["rules_python:unit-test FAIL:"]) + env.expect.that_collection(logs["rules_python:unit-test FAIL:"]).contains_exactly([ + """\ +Attempting to create a duplicate library pypi_315_simple for simple with different arguments. Already existing declaration has: + common: { + "dep_template": "@pypi//{name}:{target}", + "config_load": "@pypi//:config.bzl", + "requirement": "simple==0.0.1 --hash=sha256:deadbeef --hash=sha256:deadbaaf", + } + different: { + "python_interpreter_target": ("unit_test_interpreter_target_1", "unit_test_interpreter_target_2"), + }\ +""", + ]).in_order() + +_tests.append(_test_err_duplicate_repos) + def hub_builder_test_suite(name): """Create the test suite.