datajoint · hummuscience · Feb 19, 2026 · Feb 20, 2026 · Feb 20, 2026
diff --git a/src/datajoint/autopopulate.py b/src/datajoint/autopopulate.py
@@ -403,7 +403,7 @@ def _populate_direct(
         """
         from tqdm import tqdm
 
-        keys = (self._jobs_to_do(restrictions) - self).keys()
+        keys = (self._jobs_to_do(restrictions) - self.proj()).keys()
 
         logger.debug("Found %d keys to populate" % len(keys))
 
@@ -701,7 +701,7 @@ def progress(self, *restrictions: Any, display: bool = False) -> tuple[int, int]
         if not common_attrs:
             # No common attributes - fall back to two-query method
             total = len(todo)
-            remaining = len(todo - self)
+            remaining = len(todo - self.proj())
         else:
             # Build a single query that computes both total and remaining
             # Using LEFT JOIN with COUNT(DISTINCT) to handle 1:many relationships

diff --git a/src/datajoint/jobs.py b/src/datajoint/jobs.py
@@ -370,7 +370,7 @@ def refresh(
 
         # Keys that need jobs: in key_source, not in target, not in jobs
         # Disable semantic_check for Job table (self) because its attributes may not have matching lineage
-        new_keys = (key_source - self._target).restrict(Not(self), semantic_check=False).proj()
+        new_keys = (key_source - self._target.proj()).restrict(Not(self), semantic_check=False).proj()
         new_key_list = new_keys.keys()
 
         if new_key_list:

diff --git a/tests/integration/test_autopopulate.py b/tests/integration/test_autopopulate.py
@@ -112,6 +112,130 @@ def test_allow_insert(clean_autopopulate, subject, experiment):
         experiment.insert1(key)
 
 
+def test_populate_antijoin_with_secondary_attrs(clean_autopopulate, subject, experiment):
+    """Test that populate correctly computes pending keys via antijoin.
+
+    Verifies that partial populate + antijoin gives correct pending counts.
+    Note: Experiment.make() inserts fake_experiments_per_subject rows per key.
+    """
+    assert subject, "root tables are empty"
+    assert not experiment, "table already filled?"
+
+    total_keys = len(experiment.key_source)
+    assert total_keys > 0
+
+    # Partially populate (2 keys from key_source)
+    experiment.populate(max_calls=2)
+    assert len(experiment) == 2 * experiment.fake_experiments_per_subject
+
+    # key_source - target must return only unpopulated keys
+    pending = experiment.key_source - experiment
+    assert len(pending) == total_keys - 2, f"Antijoin returned {len(pending)} pending keys, expected {total_keys - 2}."
+
+    # Verify progress() reports correct counts
+    remaining, total = experiment.progress()
+    assert total == total_keys
+    assert remaining == total_keys - 2
+
+    # Populate the rest and verify antijoin returns 0
+    experiment.populate()
+    pending_after = experiment.key_source - experiment
+    assert len(pending_after) == 0, f"Antijoin returned {len(pending_after)} pending keys after full populate, expected 0."
+
+
+def test_populate_antijoin_overlapping_attrs(prefix, connection_test):
+    """Regression test: antijoin with overlapping secondary attribute names.
+
+    This reproduces the bug where `key_source - self` returns ALL keys instead
+    of just unpopulated ones. The condition is:
+
+    1. key_source returns secondary attributes (e.g., num_samples, quality)
+    2. The target table has secondary attributes with the SAME NAMES
+    3. The VALUES differ between source and target after populate
+
+    Without .proj() on the target, SQL matches on ALL common column names
+    (including secondary attrs), so different values mean no match, and all
+    keys appear "pending" even after populate.
+
+    Real-world example: LightningPoseOutput (key_source) has num_frames,
+    quality, processing_datetime as secondary attrs. InitialContainer (target)
+    also has those same-named columns with different values.
+    """
+    test_schema = dj.Schema(f"{prefix}_antijoin_overlap", connection=connection_test)
+
+    @test_schema
+    class Sensor(dj.Lookup):
+        definition = """
+        sensor_id : int32
+        ---
+        num_samples : int32
+        quality : decimal(4,2)
+        """
+        contents = [
+            (1, 100, 0.95),
+            (2, 200, 0.87),
+            (3, 150, 0.92),
+            (4, 175, 0.89),
+        ]
+
+    @test_schema
+    class ProcessedSensor(dj.Computed):
+        definition = """
+        -> Sensor
+        ---
+        num_samples : int32       # same name as Sensor's secondary attr
+        quality : decimal(4,2)    # same name as Sensor's secondary attr
+        result : decimal(8,2)
+        """
+
+        @property
+        def key_source(self):
+            return Sensor()  # returns sensor_id + num_samples + quality
+
+        def make(self, key):
+            # Fetch source data (key only contains PK after projection)
+            source = (Sensor() & key).fetch1()
+            # Values intentionally differ from source — this is what triggers
+            # the bug: the antijoin tries to match on num_samples and quality
+            # too, and since values differ, no match is found.
+            self.insert1(
+                dict(
+                    sensor_id=key["sensor_id"],
+                    num_samples=source["num_samples"] * 2,
+                    quality=float(source["quality"]) + 0.05,
+                    result=float(source["num_samples"]) * float(source["quality"]),
+                )
+            )
+
+    try:
+        # Partially populate (2 out of 4)
+        ProcessedSensor().populate(max_calls=2)
+        assert len(ProcessedSensor()) == 2
+
+        total_keys = len(ProcessedSensor().key_source)
+        assert total_keys == 4
+
+        # The critical test: populate() must correctly identify remaining keys.
+        # Before the fix, populate() used `key_source - self` which matched on
+        # num_samples and quality too, returning all 4 keys as "pending".
+        ProcessedSensor().populate()
+        assert len(ProcessedSensor()) == 4, (
+            f"After full populate, expected 4 entries but got {len(ProcessedSensor())}. "
+            f"populate() likely re-processed already-completed keys."
+        )
+
+        # Verify progress reports 0 remaining
+        remaining, total = ProcessedSensor().progress()
+        assert remaining == 0, f"Expected 0 remaining, got {remaining}"
+        assert total == 4
+
+        # Verify antijoin with .proj() is correct
+        pending = ProcessedSensor().key_source - ProcessedSensor().proj()
+        assert len(pending) == 0
+    finally:
+        test_schema.drop(prompt=False)
+
+
 def test_load_dependencies(prefix, connection_test):
     schema = dj.Schema(f"{prefix}_load_dependencies_populate", connection=connection_test)