From 9c928f46b02777b9e301b7d0bf95cbe9d181b216 Mon Sep 17 00:00:00 2001 From: Maxwell Elliott Date: Fri, 19 Jun 2026 09:25:32 -0400 Subject: [PATCH] Canonicalize rule hash inputs (attributes + rule inputs) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hash a rule's attributes in name-sorted order and dedupe + sort its (configured) rule inputs before mixing them into the digest, so a target's hash is invariant to the order Bazel happens to emit them in. A rule's attributes and rule inputs are conceptually sets, but `BazelRule.digest()` / `ruleInputList()` hashed them in Bazel's emission order. That order is not guaranteed stable, so an otherwise-unchanged target could hash differently between two graphs. This is most acute on the configuration-aware (#359) cquery path: `configuredRuleInputList` can surface the same dep label across multiple configurations, and cquery does not promise a stable order for those edges. Backported from bazel-contrib/target-determinator commit d4b6125 ("Canonicalize target hash inputs"), which adds the same `sortedAttributesForHashing` + `canonicalizeRuleInputs` canonicalization. Note: this is a one-time change to absolute hash values. The standard workflow generates hashes for both the "before" and "after" revisions with the same binary, so diff results are unaffected — and unchanged targets now hash identically across runs that previously differed only by input/attribute ordering. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../kotlin/com/bazel_diff/bazel/BazelRule.kt | 56 +++++---- .../com/bazel_diff/bazel/BazelRuleTest.kt | 111 ++++++++++++++++++ 2 files changed, 144 insertions(+), 23 deletions(-) diff --git a/cli/src/main/kotlin/com/bazel_diff/bazel/BazelRule.kt b/cli/src/main/kotlin/com/bazel_diff/bazel/BazelRule.kt index 998f932..04a1712 100644 --- a/cli/src/main/kotlin/com/bazel_diff/bazel/BazelRule.kt +++ b/cli/src/main/kotlin/com/bazel_diff/bazel/BazelRule.kt @@ -44,7 +44,11 @@ class BazelRule(private val rule: Build.Rule) { safePutBytes(rule.ruleClassBytes.toByteArray()) safePutBytes(rule.nameBytes.toByteArray()) safePutBytes(rule.skylarkEnvironmentHashCodeBytes.toByteArray()) - for (attribute in rule.attributeList) { + // Hash attributes in a canonical (name-sorted) order so a rule's digest is invariant to the + // order Bazel happens to emit them in. Attribute names are unique within a rule (the proto + // is generated from the rule's attribute map), so a name sort fully canonicalizes the set. + // Mirrors target-determinator's `sortedAttributesForHashing` (commit d4b6125). + for (attribute in rule.attributeList.sortedBy { it.name }) { if (!DEFAULT_IGNORED_ATTRS.contains(attribute.name) && !ignoredAttrs.contains(attribute.name)) safePutBytes(attribute.toByteArray()) @@ -53,28 +57,34 @@ class BazelRule(private val rule: Build.Rule) { } fun ruleInputList(useCquery: Boolean, fineGrainedHashExternalRepos: Set): List { - return if (useCquery) { - rule.configuredRuleInputList.map { encodeConfiguredRuleInput(it) } + - rule.ruleInputList - .map { ruleInput: String -> - transformRuleInput(fineGrainedHashExternalRepos, ruleInput) - } - // Only keep the non-fine-grained ones because the others are already covered by - // configuredRuleInputList - .filter { it.startsWith("//external:") } - .distinct() - } else { - // Include raw rule inputs plus transformed //external:* inputs so that targets depending - // on external repos pick up hash changes from //external:* synthetic targets (e.g. from - // bzlmod mod show_repo or WORKSPACE //external:all-targets). - rule.ruleInputList + - rule.ruleInputList - .map { ruleInput: String -> - transformRuleInput(fineGrainedHashExternalRepos, ruleInput) - } - .filter { it.startsWith("//external:") } - .distinct() - } + // Transformed //external:* synthetic inputs so that targets depending on external repos pick + // up hash changes from //external:* synthetic targets (e.g. from bzlmod mod show_repo or + // WORKSPACE //external:all-targets). + val externalSyntheticInputs = + rule.ruleInputList + .map { ruleInput: String -> + transformRuleInput(fineGrainedHashExternalRepos, ruleInput) + } + .filter { it.startsWith("//external:") } + + val inputs = + if (useCquery) { + // configuredRuleInputList already covers the non-fine-grained deps (with per-edge + // configuration folded in), so only the //external:* synthetic inputs are added. + rule.configuredRuleInputList.map { encodeConfiguredRuleInput(it) } + + externalSyntheticInputs + } else { + rule.ruleInputList + externalSyntheticInputs + } + + // Canonicalize: dedupe and sort so a target's digest is invariant to the order Bazel happens + // to emit (configured) rule inputs in. This matters most under --useCquery, where the same dep + // label can surface across multiple configurations and cquery does not guarantee a stable + // emission order -- without canonicalization an unchanged target could hash differently + // between two otherwise-identical graphs. RuleHasher mixes these bytes into the digest in list + // order, so a deterministic order is what keeps the hash stable. Mirrors + // target-determinator's `canonicalizeRuleInputs` (commit d4b6125). + return inputs.distinct().sorted() } val name: String = rule.name diff --git a/cli/src/test/kotlin/com/bazel_diff/bazel/BazelRuleTest.kt b/cli/src/test/kotlin/com/bazel_diff/bazel/BazelRuleTest.kt index c6610a6..21040bf 100644 --- a/cli/src/test/kotlin/com/bazel_diff/bazel/BazelRuleTest.kt +++ b/cli/src/test/kotlin/com/bazel_diff/bazel/BazelRuleTest.kt @@ -144,6 +144,117 @@ class BazelRuleTest { assertThat(decodeConfiguredRuleInputLabel("//external:foo")).isEqualTo("//external:foo") } + // Canonicalization (mirrors target-determinator commit d4b6125): a rule's digest must be + // invariant to the order Bazel happens to emit attributes in. Attribute names are unique within + // a rule, so the same set of attributes in a different order is the same rule and must hash the + // same -- otherwise a Bazel upgrade (or any reordering) would spuriously invalidate every target. + @Test + fun testDigestInvariantToAttributeOrder() { + val attrA = + Attribute.newBuilder() + .setType(Attribute.Discriminator.STRING) + .setName("a_attr") + .setStringValue("1") + .build() + val attrZ = + Attribute.newBuilder() + .setType(Attribute.Discriminator.STRING) + .setName("z_attr") + .setStringValue("2") + .build() + + val forward = + Rule.newBuilder() + .setRuleClass("java_library") + .setName("lib") + .addAttribute(attrA) + .addAttribute(attrZ) + .build() + val reversed = + Rule.newBuilder() + .setRuleClass("java_library") + .setName("lib") + .addAttribute(attrZ) + .addAttribute(attrA) + .build() + + assertThat(BazelRule(forward).digest(emptySet())) + .isEqualTo(BazelRule(reversed).digest(emptySet())) + } + + // Sorting attributes must not erase sensitivity to an actual attribute-value change. + @Test + fun testDigestStillDetectsAttributeValueChange() { + fun ruleWith(value: String) = + Rule.newBuilder() + .setRuleClass("java_library") + .setName("lib") + .addAttribute( + Attribute.newBuilder() + .setType(Attribute.Discriminator.STRING) + .setName("a_attr") + .setStringValue(value) + .build()) + .build() + + assertThat(BazelRule(ruleWith("before")).digest(emptySet())) + .isNotEqualTo(BazelRule(ruleWith("after")).digest(emptySet())) + } + + // Rule inputs are a set, not a sequence: the non-cquery input list must come back deduped and in + // a deterministic (sorted) order regardless of Bazel's emission order. + @Test + fun testNonCqueryRuleInputListDedupesAndSorts() { + val rule = + Rule.newBuilder() + .setRuleClass("genrule") + .setName("//:gen") + .addRuleInput("//:b") + .addRuleInput("//:a") + .addRuleInput("//:b") + .build() + + val inputs = + BazelRule(rule).ruleInputList(useCquery = false, fineGrainedHashExternalRepos = emptySet()) + + assertThat(inputs).isEqualTo(listOf("//:a", "//:b")) + } + + // The configuration-aware (#359) path is the one most exposed to non-deterministic emission + // order: cquery can surface the same dep label across multiple configurations in any order. Two + // graphs that differ only by that order describe the same target and must produce an identical + // ruleInputList(), or RuleHasher would report a spurious change. + @Test + fun testCqueryRuleInputListInvariantToConfiguredInputOrder() { + fun genruleWith(inputs: List): Rule { + val builder = Rule.newBuilder().setRuleClass("genrule").setName("//:gen") + inputs.forEach { builder.addConfiguredRuleInput(it) } + return builder.build() + } + + val inputA = + Build.ConfiguredRuleInput.newBuilder() + .setLabel("//:a") + .setConfigurationChecksum("cfg-A") + .build() + val inputB = + Build.ConfiguredRuleInput.newBuilder() + .setLabel("//:b") + .setConfigurationChecksum("cfg-B") + .build() + + val forward = + BazelRule(genruleWith(listOf(inputA, inputB))) + .ruleInputList(useCquery = true, fineGrainedHashExternalRepos = emptySet()) + // Reversed order, plus a duplicate of inputA to exercise dedupe. + val reversed = + BazelRule(genruleWith(listOf(inputB, inputA, inputA))) + .ruleInputList(useCquery = true, fineGrainedHashExternalRepos = emptySet()) + + assertThat(forward).isEqualTo(listOf("//:a|cfg-A", "//:b|cfg-B")) + assertThat(forward).isEqualTo(reversed) + } + private fun configuredGenrule(depLabel: String, configurationChecksum: String): Rule { return Rule.newBuilder() .setRuleClass("genrule")