From 3f9ad2046272fc24eb834f4d60ad73c45934c278 Mon Sep 17 00:00:00 2001 From: Dima Molodenskiy Date: Mon, 15 Jun 2026 18:49:43 +0200 Subject: [PATCH] Explain empty interfaces.csv instead of writing it silently When no interface row is produced for a run, process() wrote a zero-byte interfaces.csv (no header) and still logged "wrote .../interfaces.csv" with no error, making a genuine "no interface" result indistinguishable from a parse/load failure (issue #17). The common case for heterodimers is that AlphaFold placed the chains with no inter-chain contact within --contact_thresh (default 8 A, Cb-Cb), so no interface is detected and rows is empty. Track lightweight per-run diagnostics (models processed, interfaces found, interfaces dropped by --pae_filter) and emit a WARNING explaining why the CSV is empty and which knob to relax. File-write behavior is unchanged (still an empty file) for backward compatibility; only the logging improves. Add a regression test that runs the AF3 fixture with a sub-Angstrom contact_thresh and asserts the empty CSV plus the explanatory warning. Co-Authored-By: Claude Opus 4.8 --- src/alphajudge/runner.py | 30 ++++++++++++++++++++++++++++++ test/test_parsers_and_runner.py | 19 +++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/src/alphajudge/runner.py b/src/alphajudge/runner.py index 9b74441..7b84587 100644 --- a/src/alphajudge/runner.py +++ b/src/alphajudge/runner.py @@ -70,10 +70,16 @@ def process( job = d.resolve().name rows: list[dict] = [] + # Diagnostics so an empty CSV can be explained rather than written silently. + models_processed = 0 + total_interfaces = 0 + dropped_by_pae = 0 for m in models: try: structure, confidence = run.load_model(m) comp = Complex(structure, confidence, contact_thresh, pae_filter, ipsae_pae_cutoff) + models_processed += 1 + total_interfaces += len(comp.interfaces) global_score = ( comp.mpDockQ @@ -85,6 +91,7 @@ def process( if iface.num_intf_residues == 0: continue if iface.average_interface_pae > pae_filter: + dropped_by_pae += 1 continue pd2, _ = iface.pDockQ2() label = ( @@ -161,6 +168,29 @@ def process( out = d / per_run_csv_name out.parent.mkdir(parents=True, exist_ok=True) + + if not rows: + # Explain *why* the CSV is empty instead of writing a silent zero-byte file + # (see https://github.com/KosinskiLab/AlphaJudge/issues/17). The common case + # for heterodimers is that AlphaFold placed the chains without any inter-chain + # contact within --contact_thresh, so no interface is detected. + if models_processed == 0: + reason = "no model could be loaded/processed" + elif total_interfaces == 0: + reason = ( + f"no inter-chain contacts within contact_thresh={contact_thresh} A " + f"(chains have no detectable interface); try a larger --contact_thresh " + f"or check that the model is actually a complex" + ) + elif dropped_by_pae: + reason = ( + f"all {dropped_by_pae} detected interface(s) were filtered out by " + f"pae_filter={pae_filter}; try a larger --pae_filter" + ) + else: + reason = "all detected interfaces had zero interface residues" + logger.warning(f"no interface rows for {job}: {reason}; writing empty {out}") + with out.open("w", newline="") as f: if rows: w = csv.DictWriter(f, fieldnames=list(rows[0].keys())) diff --git a/test/test_parsers_and_runner.py b/test/test_parsers_and_runner.py index b4e15be..9c1adea 100644 --- a/test/test_parsers_and_runner.py +++ b/test/test_parsers_and_runner.py @@ -519,6 +519,25 @@ def test_af3_runner_outputs_have_expected_scores(tmp_path: Path, af3_dir_src: Pa assert nearly_equal(got_iptm_ptm, float(exp_iptm_ptm)), f"AF3 iptm_ptm mismatch for {m}" +def test_af3_empty_csv_is_explained_when_no_contacts( + tmp_path: Path, af3_dir_src: Path, caplog: pytest.LogCaptureFixture +): + """Regression for issue #17: when no inter-chain contact is within contact_thresh, + the CSV is empty (no header) but the log must say *why* instead of being silent.""" + af3_dir = copy_run_dir(af3_dir_src, tmp_path) + + caplog.set_level(logging.WARNING, logger="alphajudge.runner") + # A sub-Angstrom contact threshold guarantees no inter-chain contacts -> no interfaces. + process(str(af3_dir), 0.01, 100.0, "best", 10.0) + + out = af3_dir / "interfaces.csv" + assert out.exists(), "an (empty) interfaces.csv should still be written" + assert out.stat().st_size == 0, "no contacts -> empty CSV (no header)" + + assert "no interface rows" in caplog.text + assert "contact_thresh" in caplog.text + + def test_af3_parser_accepts_official_prefixed_layout(tmp_path: Path, af3_dir_src: Path): af3_dir = make_official_af3_layout(af3_dir_src, tmp_path, job_name="hello_fold")