diff --git a/cli/src/main/kotlin/com/bazel_diff/bazel/BazelRule.kt b/cli/src/main/kotlin/com/bazel_diff/bazel/BazelRule.kt index 998f932..04a1712 100644 --- a/cli/src/main/kotlin/com/bazel_diff/bazel/BazelRule.kt +++ b/cli/src/main/kotlin/com/bazel_diff/bazel/BazelRule.kt @@ -44,7 +44,11 @@ class BazelRule(private val rule: Build.Rule) { safePutBytes(rule.ruleClassBytes.toByteArray()) safePutBytes(rule.nameBytes.toByteArray()) safePutBytes(rule.skylarkEnvironmentHashCodeBytes.toByteArray()) - for (attribute in rule.attributeList) { + // Hash attributes in a canonical (name-sorted) order so a rule's digest is invariant to the + // order Bazel happens to emit them in. Attribute names are unique within a rule (the proto + // is generated from the rule's attribute map), so a name sort fully canonicalizes the set. + // Mirrors target-determinator's `sortedAttributesForHashing` (commit d4b6125). + for (attribute in rule.attributeList.sortedBy { it.name }) { if (!DEFAULT_IGNORED_ATTRS.contains(attribute.name) && !ignoredAttrs.contains(attribute.name)) safePutBytes(attribute.toByteArray()) @@ -53,28 +57,34 @@ class BazelRule(private val rule: Build.Rule) { } fun ruleInputList(useCquery: Boolean, fineGrainedHashExternalRepos: Set): List { - return if (useCquery) { - rule.configuredRuleInputList.map { encodeConfiguredRuleInput(it) } + - rule.ruleInputList - .map { ruleInput: String -> - transformRuleInput(fineGrainedHashExternalRepos, ruleInput) - } - // Only keep the non-fine-grained ones because the others are already covered by - // configuredRuleInputList - .filter { it.startsWith("//external:") } - .distinct() - } else { - // Include raw rule inputs plus transformed //external:* inputs so that targets depending - // on external repos pick up hash changes from //external:* synthetic targets (e.g. from - // bzlmod mod show_repo or WORKSPACE //external:all-targets). - rule.ruleInputList + - rule.ruleInputList - .map { ruleInput: String -> - transformRuleInput(fineGrainedHashExternalRepos, ruleInput) - } - .filter { it.startsWith("//external:") } - .distinct() - } + // Transformed //external:* synthetic inputs so that targets depending on external repos pick + // up hash changes from //external:* synthetic targets (e.g. from bzlmod mod show_repo or + // WORKSPACE //external:all-targets). + val externalSyntheticInputs = + rule.ruleInputList + .map { ruleInput: String -> + transformRuleInput(fineGrainedHashExternalRepos, ruleInput) + } + .filter { it.startsWith("//external:") } + + val inputs = + if (useCquery) { + // configuredRuleInputList already covers the non-fine-grained deps (with per-edge + // configuration folded in), so only the //external:* synthetic inputs are added. + rule.configuredRuleInputList.map { encodeConfiguredRuleInput(it) } + + externalSyntheticInputs + } else { + rule.ruleInputList + externalSyntheticInputs + } + + // Canonicalize: dedupe and sort so a target's digest is invariant to the order Bazel happens + // to emit (configured) rule inputs in. This matters most under --useCquery, where the same dep + // label can surface across multiple configurations and cquery does not guarantee a stable + // emission order -- without canonicalization an unchanged target could hash differently + // between two otherwise-identical graphs. RuleHasher mixes these bytes into the digest in list + // order, so a deterministic order is what keeps the hash stable. Mirrors + // target-determinator's `canonicalizeRuleInputs` (commit d4b6125). + return inputs.distinct().sorted() } val name: String = rule.name diff --git a/cli/src/test/kotlin/com/bazel_diff/bazel/BazelRuleTest.kt b/cli/src/test/kotlin/com/bazel_diff/bazel/BazelRuleTest.kt index c6610a6..21040bf 100644 --- a/cli/src/test/kotlin/com/bazel_diff/bazel/BazelRuleTest.kt +++ b/cli/src/test/kotlin/com/bazel_diff/bazel/BazelRuleTest.kt @@ -144,6 +144,117 @@ class BazelRuleTest { assertThat(decodeConfiguredRuleInputLabel("//external:foo")).isEqualTo("//external:foo") } + // Canonicalization (mirrors target-determinator commit d4b6125): a rule's digest must be + // invariant to the order Bazel happens to emit attributes in. Attribute names are unique within + // a rule, so the same set of attributes in a different order is the same rule and must hash the + // same -- otherwise a Bazel upgrade (or any reordering) would spuriously invalidate every target. + @Test + fun testDigestInvariantToAttributeOrder() { + val attrA = + Attribute.newBuilder() + .setType(Attribute.Discriminator.STRING) + .setName("a_attr") + .setStringValue("1") + .build() + val attrZ = + Attribute.newBuilder() + .setType(Attribute.Discriminator.STRING) + .setName("z_attr") + .setStringValue("2") + .build() + + val forward = + Rule.newBuilder() + .setRuleClass("java_library") + .setName("lib") + .addAttribute(attrA) + .addAttribute(attrZ) + .build() + val reversed = + Rule.newBuilder() + .setRuleClass("java_library") + .setName("lib") + .addAttribute(attrZ) + .addAttribute(attrA) + .build() + + assertThat(BazelRule(forward).digest(emptySet())) + .isEqualTo(BazelRule(reversed).digest(emptySet())) + } + + // Sorting attributes must not erase sensitivity to an actual attribute-value change. + @Test + fun testDigestStillDetectsAttributeValueChange() { + fun ruleWith(value: String) = + Rule.newBuilder() + .setRuleClass("java_library") + .setName("lib") + .addAttribute( + Attribute.newBuilder() + .setType(Attribute.Discriminator.STRING) + .setName("a_attr") + .setStringValue(value) + .build()) + .build() + + assertThat(BazelRule(ruleWith("before")).digest(emptySet())) + .isNotEqualTo(BazelRule(ruleWith("after")).digest(emptySet())) + } + + // Rule inputs are a set, not a sequence: the non-cquery input list must come back deduped and in + // a deterministic (sorted) order regardless of Bazel's emission order. + @Test + fun testNonCqueryRuleInputListDedupesAndSorts() { + val rule = + Rule.newBuilder() + .setRuleClass("genrule") + .setName("//:gen") + .addRuleInput("//:b") + .addRuleInput("//:a") + .addRuleInput("//:b") + .build() + + val inputs = + BazelRule(rule).ruleInputList(useCquery = false, fineGrainedHashExternalRepos = emptySet()) + + assertThat(inputs).isEqualTo(listOf("//:a", "//:b")) + } + + // The configuration-aware (#359) path is the one most exposed to non-deterministic emission + // order: cquery can surface the same dep label across multiple configurations in any order. Two + // graphs that differ only by that order describe the same target and must produce an identical + // ruleInputList(), or RuleHasher would report a spurious change. + @Test + fun testCqueryRuleInputListInvariantToConfiguredInputOrder() { + fun genruleWith(inputs: List): Rule { + val builder = Rule.newBuilder().setRuleClass("genrule").setName("//:gen") + inputs.forEach { builder.addConfiguredRuleInput(it) } + return builder.build() + } + + val inputA = + Build.ConfiguredRuleInput.newBuilder() + .setLabel("//:a") + .setConfigurationChecksum("cfg-A") + .build() + val inputB = + Build.ConfiguredRuleInput.newBuilder() + .setLabel("//:b") + .setConfigurationChecksum("cfg-B") + .build() + + val forward = + BazelRule(genruleWith(listOf(inputA, inputB))) + .ruleInputList(useCquery = true, fineGrainedHashExternalRepos = emptySet()) + // Reversed order, plus a duplicate of inputA to exercise dedupe. + val reversed = + BazelRule(genruleWith(listOf(inputB, inputA, inputA))) + .ruleInputList(useCquery = true, fineGrainedHashExternalRepos = emptySet()) + + assertThat(forward).isEqualTo(listOf("//:a|cfg-A", "//:b|cfg-B")) + assertThat(forward).isEqualTo(reversed) + } + private fun configuredGenrule(depLabel: String, configurationChecksum: String): Rule { return Rule.newBuilder() .setRuleClass("genrule")