From 71967051bbb5894a1b26f6023838bea14263940a Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 15:58:56 -0600 Subject: [PATCH 01/39] docs: Add thread-safe mode specification Simple spec for blocking global state access in multi-tenant environments. Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 67 +++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 docs/design/thread-safe-mode.md diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md new file mode 100644 index 000000000..1b6c15cb8 --- /dev/null +++ b/docs/design/thread-safe-mode.md @@ -0,0 +1,67 @@ +# Thread-Safe Mode Specification + +## Problem + +DataJoint uses global state (`dj.config`, `dj.conn()`) that is not thread-safe. Multi-tenant applications (web servers, async workers) need isolated connections per request/task. + +## Solution + +Add `thread_safe` mode that blocks global state access and requires explicit connection configuration. + +## API + +### Enable Thread-Safe Mode + +Set via environment variable or config file (read-only after initialization): + +```bash +export DJ_THREAD_SAFE=true +``` + +```json +// datajoint.json +{"thread_safe": true} +``` + +### Create Connections + +```python +conn = dj.Connection.from_config( + host="localhost", + user="user", + password="password" +) +schema = dj.Schema("my_schema", connection=conn) +``` + +## Behavior + +| Operation | `thread_safe=False` | `thread_safe=True` | +|-----------|--------------------|--------------------| +| `dj.config.X` | Works | Raises `ThreadSafetyError` | +| `dj.conn()` | Works | Raises `ThreadSafetyError` | +| `dj.Schema("name")` | Works | Raises `ThreadSafetyError` | +| `Connection.from_config()` | Works | Works | +| `Schema(..., connection=conn)` | Works | Works | + +## Implementation + +1. Add `thread_safe: bool = False` field to `Config` with `DJ_THREAD_SAFE` env alias +2. Make `thread_safe` read-only after `Config` initialization +3. Add guards to `Config.__getattr__`, `Config.__setattr__`, `Config.__getitem__`, `Config.__setitem__` +4. Add guard to `dj.conn()` +5. Add guard to `Schema.__init__` when `connection=None` +6. Add `Connection.from_config()` class method +7. Add `ThreadSafetyError` exception + +## Exceptions + +```python +class ThreadSafetyError(DataJointError): + """Raised when accessing global state in thread-safe mode.""" +``` + +Error messages: +- Config access: `"Global config is inaccessible in thread-safe mode. Use Connection.from_config() with explicit configuration."` +- `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use Connection.from_config() with explicit configuration."` +- Schema without connection: `"Schema requires explicit connection in thread-safe mode. Use Schema(..., connection=conn)."` From 477d36585d86e1674478d7f3d74b9f488c270a44 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 16:30:24 -0600 Subject: [PATCH 02/39] chore: Remove unused settings Remove dead code: - filepath_checksum_size_limit (never used) - enable_python_native_blobs (never used) - cache (only query_cache is used) - init_function/init_command (database init command) Co-Authored-By: Claude Opus 4.5 --- src/datajoint/adapters/mysql.py | 3 --- src/datajoint/connection.py | 12 +----------- src/datajoint/settings.py | 11 ++--------- tests/integration/test_jobs.py | 5 ++--- tests/unit/test_settings.py | 24 ++++++++++++------------ 5 files changed, 17 insertions(+), 38 deletions(-) diff --git a/src/datajoint/adapters/mysql.py b/src/datajoint/adapters/mysql.py index 88339335f..21aab2908 100644 --- a/src/datajoint/adapters/mysql.py +++ b/src/datajoint/adapters/mysql.py @@ -75,7 +75,6 @@ def connect( Password for authentication. **kwargs : Any Additional MySQL-specific parameters: - - init_command: SQL initialization command - ssl: TLS/SSL configuration dict (deprecated, use use_tls) - use_tls: bool or dict - DataJoint's SSL parameter (preferred) - charset: Character set (default from kwargs) @@ -85,7 +84,6 @@ def connect( pymysql.Connection MySQL connection object. """ - init_command = kwargs.get("init_command") # Handle both ssl (old) and use_tls (new) parameter names ssl_config = kwargs.get("use_tls", kwargs.get("ssl")) # Convert boolean True to dict for PyMySQL (PyMySQL expects dict or SSLContext) @@ -99,7 +97,6 @@ def connect( "port": port, "user": user, "passwd": password, - "init_command": init_command, "sql_mode": "NO_ZERO_DATE,NO_ZERO_IN_DATE,ERROR_FOR_DIVISION_BY_ZERO," "STRICT_ALL_TABLES,NO_ENGINE_SUBSTITUTION,ONLY_FULL_GROUP_BY", "charset": charset, diff --git a/src/datajoint/connection.py b/src/datajoint/connection.py index 21b48e638..488a26e7d 100644 --- a/src/datajoint/connection.py +++ b/src/datajoint/connection.py @@ -55,7 +55,6 @@ def conn( user: str | None = None, password: str | None = None, *, - init_fun: Callable | None = None, reset: bool = False, use_tls: bool | dict | None = None, ) -> Connection: @@ -73,8 +72,6 @@ def conn( Database username. Required if not set in config. password : str, optional Database password. Required if not set in config. - init_fun : callable, optional - Initialization function called after connection. reset : bool, optional If True, reset existing connection. Default False. use_tls : bool or dict, optional @@ -103,9 +100,8 @@ def conn( raise errors.DataJointError( "Database password not configured. Set datajoint.config['database.password'] or pass password= argument." ) - init_fun = init_fun if init_fun is not None else config["connection.init_function"] use_tls = use_tls if use_tls is not None else config["database.use_tls"] - conn.connection = Connection(host, user, password, None, init_fun, use_tls) + conn.connection = Connection(host, user, password, None, use_tls) return conn.connection @@ -150,8 +146,6 @@ class Connection: Database password. port : int, optional Port number. Overridden if specified in host. - init_fun : str, optional - SQL initialization command. use_tls : bool or dict, optional TLS encryption option. @@ -169,7 +163,6 @@ def __init__( user: str, password: str, port: int | None = None, - init_fun: str | None = None, use_tls: bool | dict | None = None, ) -> None: if ":" in host: @@ -190,7 +183,6 @@ def __init__( # use_tls=True: enable SSL with default settings self.conn_info["ssl"] = True self.conn_info["ssl_input"] = use_tls - self.init_fun = init_fun self._conn = None self._query_cache = None self._is_closed = True # Mark as closed until connect() succeeds @@ -227,7 +219,6 @@ def connect(self) -> None: port=self.conn_info["port"], user=self.conn_info["user"], password=self.conn_info["passwd"], - init_command=self.init_fun, charset=config["connection.charset"], use_tls=self.conn_info.get("ssl"), ) @@ -244,7 +235,6 @@ def connect(self) -> None: port=self.conn_info["port"], user=self.conn_info["user"], password=self.conn_info["passwd"], - init_command=self.init_fun, charset=config["connection.charset"], use_tls=False, # Explicitly disable SSL for fallback ) diff --git a/src/datajoint/settings.py b/src/datajoint/settings.py index e373ca38f..7019d8345 100644 --- a/src/datajoint/settings.py +++ b/src/datajoint/settings.py @@ -224,7 +224,6 @@ class ConnectionSettings(BaseSettings): model_config = SettingsConfigDict(extra="forbid", validate_assignment=True) - init_function: str | None = None charset: str = "" # pymysql uses '' as default @@ -341,11 +340,8 @@ class Config(BaseSettings): # Top-level settings loglevel: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = Field(default="INFO", validation_alias="DJ_LOG_LEVEL") safemode: bool = True - enable_python_native_blobs: bool = True - filepath_checksum_size_limit: int | None = None - # Cache paths - cache: Path | None = None + # Cache path for query results query_cache: Path | None = None # Download path for attachments and filepaths @@ -362,7 +358,7 @@ def set_logger_level(cls, v: str) -> str: logger.setLevel(v) return v - @field_validator("cache", "query_cache", mode="before") + @field_validator("query_cache", mode="before") @classmethod def convert_path(cls, v: Any) -> Path | None: """Convert string paths to Path objects.""" @@ -819,7 +815,6 @@ def save_template( "use_tls": None, }, "connection": { - "init_function": None, "charset": "", }, "display": { @@ -844,8 +839,6 @@ def save_template( }, "loglevel": "INFO", "safemode": True, - "enable_python_native_blobs": True, - "cache": None, "query_cache": None, "download_path": ".", } diff --git a/tests/integration/test_jobs.py b/tests/integration/test_jobs.py index 20fa3233d..5a9203dca 100644 --- a/tests/integration/test_jobs.py +++ b/tests/integration/test_jobs.py @@ -108,10 +108,9 @@ def test_sigterm(clean_jobs, schema_any): def test_suppress_dj_errors(clean_jobs, schema_any): - """Test that DataJoint errors are suppressible without native py blobs.""" + """Test that DataJoint errors are suppressible.""" error_class = schema.ErrorClass() - with dj.config.override(enable_python_native_blobs=False): - error_class.populate(reserve_jobs=True, suppress_errors=True) + error_class.populate(reserve_jobs=True, suppress_errors=True) assert len(schema.DjExceptionName()) == len(error_class.jobs.errors) > 0 diff --git a/tests/unit/test_settings.py b/tests/unit/test_settings.py index af5718503..475d96df9 100644 --- a/tests/unit/test_settings.py +++ b/tests/unit/test_settings.py @@ -504,23 +504,23 @@ def test_display_limit(self): class TestCachePaths: """Test cache path settings.""" - def test_cache_path_string(self): - """Test setting cache path as string.""" - original = dj.config.cache + def test_query_cache_path_string(self): + """Test setting query_cache path as string.""" + original = dj.config.query_cache try: - dj.config.cache = "/tmp/cache" - assert dj.config.cache == Path("/tmp/cache") + dj.config.query_cache = "/tmp/cache" + assert dj.config.query_cache == Path("/tmp/cache") finally: - dj.config.cache = original + dj.config.query_cache = original - def test_cache_path_none(self): - """Test cache path can be None.""" - original = dj.config.cache + def test_query_cache_path_none(self): + """Test query_cache path can be None.""" + original = dj.config.query_cache try: - dj.config.cache = None - assert dj.config.cache is None + dj.config.query_cache = None + assert dj.config.query_cache is None finally: - dj.config.cache = original + dj.config.query_cache = original class TestSaveTemplate: From 6a5a309a7e79d97e34424bb51bf0784d6c67d741 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 16:32:21 -0600 Subject: [PATCH 03/39] docs: Clarify that all settings are connection-scoped - All settings can be passed to Connection.from_config() - Only thread_safe is read-only after initialization Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 1b6c15cb8..29aa89310 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -25,11 +25,16 @@ export DJ_THREAD_SAFE=true ### Create Connections +All settings can be passed to `Connection.from_config()`: + ```python conn = dj.Connection.from_config( host="localhost", user="user", - password="password" + password="password", + safemode=False, + display_limit=25, + # ... any other settings ) schema = dj.Schema("my_schema", connection=conn) ``` @@ -44,6 +49,12 @@ schema = dj.Schema("my_schema", connection=conn) | `Connection.from_config()` | Works | Works | | `Schema(..., connection=conn)` | Works | Works | +## Read-Only Settings + +Only `thread_safe` is read-only after initialization. It can only be set via: +- Environment variable `DJ_THREAD_SAFE` +- Config file `datajoint.json` + ## Implementation 1. Add `thread_safe: bool = False` field to `Config` with `DJ_THREAD_SAFE` env alias From c0598f4ecda57f7683472927dbcfbc74cf19e89b Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 16:40:09 -0600 Subject: [PATCH 04/39] docs: Specify Connection.from_config() behavior - Parameters and defaults - Connection-scoped settings via conn.config - Never accesses global dj.config Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 29aa89310..50c55ce79 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -23,9 +23,9 @@ export DJ_THREAD_SAFE=true {"thread_safe": true} ``` -### Create Connections +### Connection.from_config() -All settings can be passed to `Connection.from_config()`: +Creates a connection with explicit configuration. Works in both `thread_safe=True` and `thread_safe=False` modes. ```python conn = dj.Connection.from_config( @@ -34,11 +34,29 @@ conn = dj.Connection.from_config( password="password", safemode=False, display_limit=25, - # ... any other settings ) schema = dj.Schema("my_schema", connection=conn) ``` +**Parameters:** +- `host` (required): Database hostname +- `user` (required): Database username +- `password` (required): Database password +- `port`: Database port (default: 3306) +- Any other setting from `dj.config` (e.g., `safemode`, `display_limit`, `stores`) + +**Defaults:** Settings not explicitly provided use hardcoded defaults (same as `dj.config` defaults). Global `dj.config` is never accessed. + +**Connection-scoped settings:** Stored on `conn.config` and accessed as `conn.config.safemode`, `conn.config.display_limit`, etc. + +```python +conn = dj.Connection.from_config(host="localhost", user="u", password="p") +conn.config.safemode # True (default) +conn.config.display_limit # 12 (default) + +conn.config.safemode = False # Modify for this connection only +``` + ## Behavior | Operation | `thread_safe=False` | `thread_safe=True` | @@ -62,7 +80,10 @@ Only `thread_safe` is read-only after initialization. It can only be set via: 3. Add guards to `Config.__getattr__`, `Config.__setattr__`, `Config.__getitem__`, `Config.__setitem__` 4. Add guard to `dj.conn()` 5. Add guard to `Schema.__init__` when `connection=None` -6. Add `Connection.from_config()` class method +6. Add `Connection.from_config()` class method that: + - Accepts all connection params and settings as kwargs + - Uses hardcoded defaults (never accesses global config) + - Creates `conn.config` object to store connection-scoped settings 7. Add `ThreadSafetyError` exception ## Exceptions From 697ec6d0302899d0d7c6c5e4e4cbb789bbf99598 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 16:51:07 -0600 Subject: [PATCH 05/39] docs: conn.config uses same Config class, connection settings read-only - Connection.from_config() creates a Config instance for conn.config - Database connection settings (host, port, user, password, use_tls, backend) become read-only after connection is established - Other settings remain mutable per-connection Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 50c55ce79..d5bc0ecf9 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -45,16 +45,21 @@ schema = dj.Schema("my_schema", connection=conn) - `port`: Database port (default: 3306) - Any other setting from `dj.config` (e.g., `safemode`, `display_limit`, `stores`) -**Defaults:** Settings not explicitly provided use hardcoded defaults (same as `dj.config` defaults). Global `dj.config` is never accessed. +**Config creation:** Uses the same `Config` class as global `dj.config`. Each connection gets its own `Config` instance via `conn.config`. -**Connection-scoped settings:** Stored on `conn.config` and accessed as `conn.config.safemode`, `conn.config.display_limit`, etc. +**Read-only after connection:** Database connection settings become read-only after connection is established: +- `host`, `port`, `user`, `password`, `use_tls`, `backend` + +**Mutable settings:** All other settings remain mutable per-connection: +- `safemode`, `display_limit`, `stores`, etc. ```python conn = dj.Connection.from_config(host="localhost", user="u", password="p") conn.config.safemode # True (default) conn.config.display_limit # 12 (default) -conn.config.safemode = False # Modify for this connection only +conn.config.safemode = False # OK: modify for this connection +conn.config.host = "other" # Error: read-only after connection ``` ## Behavior @@ -69,9 +74,8 @@ conn.config.safemode = False # Modify for this connection only ## Read-Only Settings -Only `thread_safe` is read-only after initialization. It can only be set via: -- Environment variable `DJ_THREAD_SAFE` -- Config file `datajoint.json` +- `thread_safe`: Read-only after global config initialization (set via env var or config file only) +- `host`, `port`, `user`, `password`, `use_tls`, `backend`: Read-only on `conn.config` after connection is established ## Implementation @@ -82,8 +86,8 @@ Only `thread_safe` is read-only after initialization. It can only be set via: 5. Add guard to `Schema.__init__` when `connection=None` 6. Add `Connection.from_config()` class method that: - Accepts all connection params and settings as kwargs - - Uses hardcoded defaults (never accesses global config) - - Creates `conn.config` object to store connection-scoped settings + - Creates a new `Config` instance for `conn.config` + - Marks connection settings as read-only after connection 7. Add `ThreadSafetyError` exception ## Exceptions From 467367e637a59e1d81a34e57a0b99f56ce78c1ac Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 16:55:02 -0600 Subject: [PATCH 06/39] docs: Global config read-only (not blocked) in thread-safe mode - thread_safe=True: global dj.config becomes read-only - conn.config copies from global config, always mutable - Simpler: global config still readable for defaults Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 47 ++++++++++++++------------------- 1 file changed, 20 insertions(+), 27 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index d5bc0ecf9..f670e6cf2 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -6,7 +6,7 @@ DataJoint uses global state (`dj.config`, `dj.conn()`) that is not thread-safe. ## Solution -Add `thread_safe` mode that blocks global state access and requires explicit connection configuration. +Add `thread_safe` mode that makes global config read-only and requires explicit connections with mutable connection-scoped settings. ## API @@ -25,7 +25,7 @@ export DJ_THREAD_SAFE=true ### Connection.from_config() -Creates a connection with explicit configuration. Works in both `thread_safe=True` and `thread_safe=False` modes. +Creates a connection with explicit configuration. Works in both modes. ```python conn = dj.Connection.from_config( @@ -43,61 +43,54 @@ schema = dj.Schema("my_schema", connection=conn) - `user` (required): Database username - `password` (required): Database password - `port`: Database port (default: 3306) -- Any other setting from `dj.config` (e.g., `safemode`, `display_limit`, `stores`) +- Any other setting (e.g., `safemode`, `display_limit`, `stores`) -**Config creation:** Uses the same `Config` class as global `dj.config`. Each connection gets its own `Config` instance via `conn.config`. - -**Read-only after connection:** Database connection settings become read-only after connection is established: -- `host`, `port`, `user`, `password`, `use_tls`, `backend` - -**Mutable settings:** All other settings remain mutable per-connection: -- `safemode`, `display_limit`, `stores`, etc. +**Config creation:** Copies global `dj.config`, then applies kwargs. Creates `conn.config` which is always mutable. ```python conn = dj.Connection.from_config(host="localhost", user="u", password="p") -conn.config.safemode # True (default) -conn.config.display_limit # 12 (default) - -conn.config.safemode = False # OK: modify for this connection -conn.config.host = "other" # Error: read-only after connection +conn.config.safemode = False # Always OK: conn.config is mutable +conn.config.display_limit = 25 # Always OK ``` ## Behavior | Operation | `thread_safe=False` | `thread_safe=True` | |-----------|--------------------|--------------------| -| `dj.config.X` | Works | Raises `ThreadSafetyError` | +| `dj.config` read | Works | Works (read-only) | +| `dj.config` write | Works | Raises `ThreadSafetyError` | | `dj.conn()` | Works | Raises `ThreadSafetyError` | | `dj.Schema("name")` | Works | Raises `ThreadSafetyError` | | `Connection.from_config()` | Works | Works | +| `conn.config` read/write | Works | Works | | `Schema(..., connection=conn)` | Works | Works | ## Read-Only Settings -- `thread_safe`: Read-only after global config initialization (set via env var or config file only) -- `host`, `port`, `user`, `password`, `use_tls`, `backend`: Read-only on `conn.config` after connection is established +- `thread_safe`: Always read-only after initialization (set via env var or config file only) +- All of `dj.config`: Read-only when `thread_safe=True` ## Implementation 1. Add `thread_safe: bool = False` field to `Config` with `DJ_THREAD_SAFE` env alias -2. Make `thread_safe` read-only after `Config` initialization -3. Add guards to `Config.__getattr__`, `Config.__setattr__`, `Config.__getitem__`, `Config.__setitem__` +2. Make `thread_safe` always read-only after initialization +3. When `thread_safe=True`, make all `dj.config` writes raise `ThreadSafetyError` 4. Add guard to `dj.conn()` 5. Add guard to `Schema.__init__` when `connection=None` 6. Add `Connection.from_config()` class method that: - - Accepts all connection params and settings as kwargs - - Creates a new `Config` instance for `conn.config` - - Marks connection settings as read-only after connection + - Copies global `dj.config` + - Applies kwargs overrides + - Creates mutable `conn.config` 7. Add `ThreadSafetyError` exception ## Exceptions ```python class ThreadSafetyError(DataJointError): - """Raised when accessing global state in thread-safe mode.""" + """Raised when modifying global state in thread-safe mode.""" ``` Error messages: -- Config access: `"Global config is inaccessible in thread-safe mode. Use Connection.from_config() with explicit configuration."` -- `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use Connection.from_config() with explicit configuration."` -- Schema without connection: `"Schema requires explicit connection in thread-safe mode. Use Schema(..., connection=conn)."` +- Config write: `"Global config is read-only in thread-safe mode. Use conn.config for connection-scoped settings."` +- `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use Connection.from_config()."` +- Schema without connection: `"Schema requires explicit connection in thread-safe mode."` From 7c57b26cc2181942ef1b545ae203ed9f9fee0377 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 16:58:03 -0600 Subject: [PATCH 07/39] docs: Remove from_config(), just expose conn.config Simpler API: - Use existing Connection() constructor - conn.config copies from global dj.config - conn.config is always mutable for per-connection settings Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 48 +++++++++++++++------------------ 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index f670e6cf2..0ee4b6b88 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -6,7 +6,7 @@ DataJoint uses global state (`dj.config`, `dj.conn()`) that is not thread-safe. ## Solution -Add `thread_safe` mode that makes global config read-only and requires explicit connections with mutable connection-scoped settings. +Add `thread_safe` mode that makes global config read-only and provides connection-scoped mutable settings via `conn.config`. ## API @@ -23,64 +23,60 @@ export DJ_THREAD_SAFE=true {"thread_safe": true} ``` -### Connection.from_config() - -Creates a connection with explicit configuration. Works in both modes. +### Create Connections ```python -conn = dj.Connection.from_config( +conn = dj.Connection( host="localhost", user="user", password="password", - safemode=False, - display_limit=25, ) + +# Modify settings per-connection +conn.config.safemode = False +conn.config.display_limit = 25 + schema = dj.Schema("my_schema", connection=conn) ``` -**Parameters:** -- `host` (required): Database hostname -- `user` (required): Database username -- `password` (required): Database password -- `port`: Database port (default: 3306) -- Any other setting (e.g., `safemode`, `display_limit`, `stores`) +### conn.config -**Config creation:** Copies global `dj.config`, then applies kwargs. Creates `conn.config` which is always mutable. +Every connection has a `config` attribute that: +- Copies from global `dj.config` at connection time +- Is always mutable (even in thread-safe mode) +- Provides connection-scoped settings ```python -conn = dj.Connection.from_config(host="localhost", user="u", password="p") -conn.config.safemode = False # Always OK: conn.config is mutable -conn.config.display_limit = 25 # Always OK +conn.config.safemode # Read setting +conn.config.safemode = False # Write setting (always allowed) +conn.config.stores = {...} # Configure stores for this connection ``` ## Behavior | Operation | `thread_safe=False` | `thread_safe=True` | |-----------|--------------------|--------------------| -| `dj.config` read | Works | Works (read-only) | +| `dj.config` read | Works | Works | | `dj.config` write | Works | Raises `ThreadSafetyError` | | `dj.conn()` | Works | Raises `ThreadSafetyError` | | `dj.Schema("name")` | Works | Raises `ThreadSafetyError` | -| `Connection.from_config()` | Works | Works | +| `dj.Connection(...)` | Works | Works | | `conn.config` read/write | Works | Works | | `Schema(..., connection=conn)` | Works | Works | ## Read-Only Settings -- `thread_safe`: Always read-only after initialization (set via env var or config file only) +- `thread_safe`: Always read-only (set via env var or config file only) - All of `dj.config`: Read-only when `thread_safe=True` ## Implementation 1. Add `thread_safe: bool = False` field to `Config` with `DJ_THREAD_SAFE` env alias 2. Make `thread_safe` always read-only after initialization -3. When `thread_safe=True`, make all `dj.config` writes raise `ThreadSafetyError` +3. When `thread_safe=True`, make `dj.config` writes raise `ThreadSafetyError` 4. Add guard to `dj.conn()` 5. Add guard to `Schema.__init__` when `connection=None` -6. Add `Connection.from_config()` class method that: - - Copies global `dj.config` - - Applies kwargs overrides - - Creates mutable `conn.config` +6. Add `conn.config` to `Connection` that copies from global `dj.config` 7. Add `ThreadSafetyError` exception ## Exceptions @@ -92,5 +88,5 @@ class ThreadSafetyError(DataJointError): Error messages: - Config write: `"Global config is read-only in thread-safe mode. Use conn.config for connection-scoped settings."` -- `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use Connection.from_config()."` +- `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use Connection() with explicit parameters."` - Schema without connection: `"Schema requires explicit connection in thread-safe mode."` From 8a51db4b9fa177c0acc7df333786407f11ee6814 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 17:04:49 -0600 Subject: [PATCH 08/39] docs: Specify internal refactoring to use conn.config All runtime operations must use self.connection.config instead of global config: - table.py: safemode for delete/drop - schemas.py: safemode, create_tables - preview.py: display settings - diagram.py: diagram_direction - jobs.py: all jobs settings - autopopulate.py: jobs settings - declare.py: add_job_metadata - connection.py: reconnect, query_cache - hash_registry.py, codecs: stores, download_path Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 66 ++++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 10 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 0ee4b6b88..99c343d1b 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -34,7 +34,7 @@ conn = dj.Connection( # Modify settings per-connection conn.config.safemode = False -conn.config.display_limit = 25 +conn.config.display.limit = 25 schema = dj.Schema("my_schema", connection=conn) ``` @@ -71,22 +71,68 @@ conn.config.stores = {...} # Configure stores for this connection ## Implementation -1. Add `thread_safe: bool = False` field to `Config` with `DJ_THREAD_SAFE` env alias -2. Make `thread_safe` always read-only after initialization -3. When `thread_safe=True`, make `dj.config` writes raise `ThreadSafetyError` -4. Add guard to `dj.conn()` -5. Add guard to `Schema.__init__` when `connection=None` -6. Add `conn.config` to `Connection` that copies from global `dj.config` -7. Add `ThreadSafetyError` exception +### 1. Add thread_safe setting +- Add `thread_safe: bool = False` field to `Config` with `DJ_THREAD_SAFE` env alias +- Make `thread_safe` always read-only after initialization +- When `thread_safe=True`, make `dj.config` writes raise `ThreadSafetyError` -## Exceptions +### 2. Add guards for global state +- `dj.conn()`: Raise `ThreadSafetyError` when `thread_safe=True` +- `Schema.__init__`: Raise `ThreadSafetyError` when `connection=None` and `thread_safe=True` + +### 3. Add conn.config +- `Connection.__init__`: Create `self.config` as copy of global `dj.config` +- `conn.config` is always mutable + +### 4. Refactor internal code to use conn.config + +All runtime operations must use `self.connection.config` instead of global `config`: + +**table.py:** +- `Table.delete()`: Use `self.connection.config.safemode` +- `Table.drop()`: Use `self.connection.config.safemode` + +**schemas.py:** +- `Schema.drop()`: Use `self.connection.config.safemode` +- `Schema.__init__`: Use `self.connection.config.database.create_tables` + +**preview.py:** +- Use `connection.config.display.limit` +- Use `connection.config.display.width` +- Use `connection.config.display.show_tuple_count` +- Note: Preview functions need connection passed in or accessed via table + +**diagram.py:** +- Use `schema.connection.config.display.diagram_direction` + +**jobs.py:** +- Use `self.connection.config.jobs.*` for all jobs settings +- `version_method`, `default_priority`, `stale_timeout`, `keep_completed` + +**autopopulate.py:** +- Use `self.connection.config.jobs.allow_new_pk_fields_in_computed_tables` +- Use `self.connection.config.jobs.auto_refresh` + +**declare.py:** +- Use `connection.config.jobs.add_job_metadata` + +**connection.py:** +- Use `self.config.database.reconnect` for reconnect behavior +- Use `self.config.query_cache` for query caching + +**hash_registry.py, staged_insert.py, builtin_codecs/\*:** +- Use `connection.config.get_store_spec()` for store configuration +- Use `connection.config.download_path` for downloads + +### 5. Add ThreadSafetyError exception ```python class ThreadSafetyError(DataJointError): """Raised when modifying global state in thread-safe mode.""" ``` -Error messages: +## Error Messages + - Config write: `"Global config is read-only in thread-safe mode. Use conn.config for connection-scoped settings."` - `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use Connection() with explicit parameters."` - Schema without connection: `"Schema requires explicit connection in thread-safe mode."` From 726007da556be5844f22ac4ef4ba9ceb310645e2 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 17:10:15 -0600 Subject: [PATCH 09/39] docs: Add connection flow from Schema to Tables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Explains how connections propagate: - Connection → Schema → Table classes → Table instances - Schema falls back to conn() if no connection provided - Tables inherit connection from schema via _connection class attribute - In thread_safe mode, Schema("name") fails, Schema("name", connection=conn) works Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 54 ++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 99c343d1b..7154fd9e4 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -52,6 +52,57 @@ conn.config.safemode = False # Write setting (always allowed) conn.config.stores = {...} # Configure stores for this connection ``` +## Connection Flow: Schema → Tables + +### How connections propagate + +``` +Connection + ↓ +Schema (stores connection) + ↓ +Table classes (inherit connection from schema) + ↓ +Table instances (access connection via class) +``` + +### Schema behavior + +```python +# If connection provided, use it +schema = dj.Schema("name", connection=conn) # schema.connection = conn + +# If no connection, fall back to global conn() +schema = dj.Schema("name") # schema.connection = dj.conn() +``` + +### Table behavior + +Tables automatically inherit their connection from their schema: + +```python +@schema +class Mouse(dj.Manual): + definition = "..." + +# Mouse._connection is set by @schema decorator +# Mouse().connection returns Mouse._connection (from schema) +``` + +### In thread_safe=True mode + +```python +# This fails - conn() raises ThreadSafetyError +schema = dj.Schema("name") + +# This works - explicit connection +conn = dj.Connection(host="localhost", user="u", password="p") +schema = dj.Schema("name", connection=conn) + +# Tables work automatically via schema's connection +Mouse().insert(...) # Uses schema.connection.config for settings +``` + ## Behavior | Operation | `thread_safe=False` | `thread_safe=True` | @@ -59,10 +110,11 @@ conn.config.stores = {...} # Configure stores for this connection | `dj.config` read | Works | Works | | `dj.config` write | Works | Raises `ThreadSafetyError` | | `dj.conn()` | Works | Raises `ThreadSafetyError` | -| `dj.Schema("name")` | Works | Raises `ThreadSafetyError` | +| `dj.Schema("name")` | Works (uses `conn()`) | Raises `ThreadSafetyError` | | `dj.Connection(...)` | Works | Works | | `conn.config` read/write | Works | Works | | `Schema(..., connection=conn)` | Works | Works | +| Table operations | Use `conn.config` | Use `conn.config` | ## Read-Only Settings From b929627b94786ca9534d33a40e84e4ad9e6d832e Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 17:11:29 -0600 Subject: [PATCH 10/39] docs: Global connection assigns dj.config for uniform structure - dj.conn().config IS dj.config (same object) - dj.Connection(...).config is COPY of dj.config (independent) - All internal code uses self.connection.config uniformly Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 44 ++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 7154fd9e4..41cd952cb 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -41,15 +41,23 @@ schema = dj.Schema("my_schema", connection=conn) ### conn.config -Every connection has a `config` attribute that: -- Copies from global `dj.config` at connection time -- Is always mutable (even in thread-safe mode) -- Provides connection-scoped settings +Every connection has a `config` attribute for uniform access: + +| Connection source | `conn.config` | +|-------------------|---------------| +| `dj.conn()` | **Is** `dj.config` (same object) | +| `dj.Connection(...)` | **Copy** of `dj.config` (independent) | + +This ensures all internal code can use `self.connection.config` uniformly. ```python -conn.config.safemode # Read setting -conn.config.safemode = False # Write setting (always allowed) -conn.config.stores = {...} # Configure stores for this connection +# Global connection - config is dj.config +conn = dj.conn() +conn.config.safemode = False # Modifies dj.config + +# Explicit connection - config is independent copy +conn = dj.Connection(host="localhost", user="u", password="p") +conn.config.safemode = False # Only affects this connection ``` ## Connection Flow: Schema → Tables @@ -57,7 +65,7 @@ conn.config.stores = {...} # Configure stores for this connection ### How connections propagate ``` -Connection +Connection (has .config) ↓ Schema (stores connection) ↓ @@ -89,6 +97,17 @@ class Mouse(dj.Manual): # Mouse().connection returns Mouse._connection (from schema) ``` +### Uniform config access + +All internal code uses `self.connection.config`: + +```python +# Works the same whether connection is from dj.conn() or dj.Connection() +self.connection.config.safemode +self.connection.config.display.limit +self.connection.config.stores +``` + ### In thread_safe=True mode ```python @@ -110,9 +129,10 @@ Mouse().insert(...) # Uses schema.connection.config for settings | `dj.config` read | Works | Works | | `dj.config` write | Works | Raises `ThreadSafetyError` | | `dj.conn()` | Works | Raises `ThreadSafetyError` | +| `dj.conn().config` | Is `dj.config` | N/A | | `dj.Schema("name")` | Works (uses `conn()`) | Raises `ThreadSafetyError` | | `dj.Connection(...)` | Works | Works | -| `conn.config` read/write | Works | Works | +| `conn.config` | Copy of `dj.config` | Copy of `dj.config` | | `Schema(..., connection=conn)` | Works | Works | | Table operations | Use `conn.config` | Use `conn.config` | @@ -132,9 +152,9 @@ Mouse().insert(...) # Uses schema.connection.config for settings - `dj.conn()`: Raise `ThreadSafetyError` when `thread_safe=True` - `Schema.__init__`: Raise `ThreadSafetyError` when `connection=None` and `thread_safe=True` -### 3. Add conn.config -- `Connection.__init__`: Create `self.config` as copy of global `dj.config` -- `conn.config` is always mutable +### 3. Add conn.config to all connections +- `dj.conn()`: Set `conn.config = dj.config` (same object for backward compatibility) +- `dj.Connection(...)`: Set `self.config = copy(dj.config)` (independent copy) ### 4. Refactor internal code to use conn.config From ba637d51928761fbda2cb0e1a204b653d95e2946 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 17:16:22 -0600 Subject: [PATCH 11/39] docs: Clarify Connection always copies, dj.conn() overrides - Connection.__init__ always creates self.config = copy(dj.config) - dj.conn() overrides after creation: conn.config = dj.config Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 41cd952cb..cc32d7680 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -153,8 +153,8 @@ Mouse().insert(...) # Uses schema.connection.config for settings - `Schema.__init__`: Raise `ThreadSafetyError` when `connection=None` and `thread_safe=True` ### 3. Add conn.config to all connections -- `dj.conn()`: Set `conn.config = dj.config` (same object for backward compatibility) -- `dj.Connection(...)`: Set `self.config = copy(dj.config)` (independent copy) +- `Connection.__init__`: Always creates `self.config = copy(dj.config)` (independent copy) +- `dj.conn()`: After connection creation, overrides `conn.config = dj.config` (same object for backward compatibility) ### 4. Refactor internal code to use conn.config From 05b70fbe4545f50f7fd0a5c74d4459f17e3cbeac Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 17:24:36 -0600 Subject: [PATCH 12/39] docs: Address mixed connections and override() behavior - Mixed scenarios: dj.config affects global connection schemas only - Explicit connection schemas have independent config - dj.config.override() affects only schemas using dj.conn() - conn.config.override() affects only that connection's schemas - In thread_safe=True: dj.config.override() raises ThreadSafetyError Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 66 +++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index cc32d7680..56151d820 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -108,18 +108,78 @@ self.connection.config.display.limit self.connection.config.stores ``` +### In thread_safe=False mode (default) + +Schemas without explicit connection use global connection, controlled by `dj.config`: + +```python +schema = dj.Schema("name") # Uses dj.conn() +# schema.connection.config IS dj.config (same object) +# All tables controlled by dj.config uniformly + +dj.config.safemode = False # Affects all tables in schema +Mouse().delete() # Uses dj.config.safemode +``` + +### Mixed connections (thread_safe=False) + +When some schemas use global connection and others use explicit connections: + +```python +# Schema using global connection +schema1 = dj.Schema("lab") # schema1.connection.config IS dj.config + +# Schema using explicit connection +conn = dj.Connection(host="localhost", user="u", password="p") +schema2 = dj.Schema("analysis", connection=conn) # schema2.connection.config is independent + +# dj.config affects only schema1 +dj.config.safemode = False # Affects schema1 tables +Mouse().delete() # safemode=False (from dj.config) + +# conn.config affects only schema2 +conn.config.safemode = True # Affects schema2 tables +Analysis().delete() # safemode=True (from conn.config) + +# They are independent +dj.config.safemode # False +conn.config.safemode # True +``` + +### override() behavior + +```python +# Global config override - affects schemas using dj.conn() +with dj.config.override(safemode=False): + Mouse().delete() # safemode=False (schema1, global connection) + Analysis().delete() # safemode=True (schema2, unchanged - has own config) + +# Connection-scoped override - affects only that connection +with conn.config.override(safemode=False): + Mouse().delete() # safemode=True (schema1, unchanged - uses dj.config) + Analysis().delete() # safemode=False (schema2, overridden) +``` + ### In thread_safe=True mode ```python # This fails - conn() raises ThreadSafetyError schema = dj.Schema("name") -# This works - explicit connection +# This works - explicit connection with independent config conn = dj.Connection(host="localhost", user="u", password="p") schema = dj.Schema("name", connection=conn) -# Tables work automatically via schema's connection -Mouse().insert(...) # Uses schema.connection.config for settings +# Tables use connection-scoped config +conn.config.safemode = False # Only affects this connection +Mouse().delete() # Uses conn.config.safemode + +# dj.config.override() raises ThreadSafetyError (modifies global state) +with dj.config.override(safemode=False): # ThreadSafetyError + +# conn.config.override() works (connection-scoped) +with conn.config.override(safemode=False): # OK + Mouse().delete() ``` ## Behavior From bb7adfdd5e29bdac69171e3411f6ec0497206c00 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 17:35:06 -0600 Subject: [PATCH 13/39] docs: Rewrite spec with context-based approach New approach using dj.new() for isolated contexts: - Each context has one config and one connection - ctx.Schema() auto-uses context's connection - ctx.Manual, ctx.Lookup, etc. for table base classes - dj module acts as singleton context (legacy API) - thread_safe=True blocks singleton, allows dj.new() Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 350 ++++++++++++++------------------ 1 file changed, 150 insertions(+), 200 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 56151d820..2429369d0 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -6,265 +6,215 @@ DataJoint uses global state (`dj.config`, `dj.conn()`) that is not thread-safe. ## Solution -Add `thread_safe` mode that makes global config read-only and provides connection-scoped mutable settings via `conn.config`. +Introduce **context** objects that encapsulate config and connection. The `dj` module itself is the singleton (legacy) context. New isolated contexts are created with `dj.new()`. ## API -### Enable Thread-Safe Mode +### Legacy API (singleton context) -Set via environment variable or config file (read-only after initialization): - -```bash -export DJ_THREAD_SAFE=true -``` - -```json -// datajoint.json -{"thread_safe": true} -``` - -### Create Connections +The `dj` module acts as the default singleton context: ```python -conn = dj.Connection( - host="localhost", - user="user", - password="password", -) - -# Modify settings per-connection -conn.config.safemode = False -conn.config.display.limit = 25 - -schema = dj.Schema("my_schema", connection=conn) -``` - -### conn.config +import datajoint as dj -Every connection has a `config` attribute for uniform access: - -| Connection source | `conn.config` | -|-------------------|---------------| -| `dj.conn()` | **Is** `dj.config` (same object) | -| `dj.Connection(...)` | **Copy** of `dj.config` (independent) | - -This ensures all internal code can use `self.connection.config` uniformly. - -```python -# Global connection - config is dj.config -conn = dj.conn() -conn.config.safemode = False # Modifies dj.config +dj.config.safemode = False +dj.conn(host="localhost", user="u", password="p") +schema = dj.Schema("my_schema") # Uses dj's connection -# Explicit connection - config is independent copy -conn = dj.Connection(host="localhost", user="u", password="p") -conn.config.safemode = False # Only affects this connection -``` - -## Connection Flow: Schema → Tables - -### How connections propagate - -``` -Connection (has .config) - ↓ -Schema (stores connection) - ↓ -Table classes (inherit connection from schema) - ↓ -Table instances (access connection via class) -``` - -### Schema behavior - -```python -# If connection provided, use it -schema = dj.Schema("name", connection=conn) # schema.connection = conn - -# If no connection, fall back to global conn() -schema = dj.Schema("name") # schema.connection = dj.conn() -``` - -### Table behavior - -Tables automatically inherit their connection from their schema: - -```python @schema class Mouse(dj.Manual): definition = "..." - -# Mouse._connection is set by @schema decorator -# Mouse().connection returns Mouse._connection (from schema) ``` -### Uniform config access +### New API (isolated context) -All internal code uses `self.connection.config`: +Create isolated contexts with `dj.new()`: ```python -# Works the same whether connection is from dj.conn() or dj.Connection() -self.connection.config.safemode -self.connection.config.display.limit -self.connection.config.stores -``` +import datajoint as dj -### In thread_safe=False mode (default) +ctx = dj.new() # New context with its own config copy +ctx.config.safemode = False +ctx.connect(host="localhost", user="u", password="p") +schema = ctx.Schema("my_schema") # Uses ctx's connection -Schemas without explicit connection use global connection, controlled by `dj.config`: - -```python -schema = dj.Schema("name") # Uses dj.conn() -# schema.connection.config IS dj.config (same object) -# All tables controlled by dj.config uniformly - -dj.config.safemode = False # Affects all tables in schema -Mouse().delete() # Uses dj.config.safemode +@schema +class Mouse(ctx.Manual): + definition = "..." ``` -### Mixed connections (thread_safe=False) +### Context structure -When some schemas use global connection and others use explicit connections: +Each context has: +- **One config** - copy of settings at creation time +- **One connection** - established via `ctx.connect()` +- **Schema factory** - `ctx.Schema()` auto-uses context's connection +- **Table base classes** - `ctx.Manual`, `ctx.Lookup`, `ctx.Imported`, `ctx.Computed`, `ctx.Part` ```python -# Schema using global connection -schema1 = dj.Schema("lab") # schema1.connection.config IS dj.config - -# Schema using explicit connection -conn = dj.Connection(host="localhost", user="u", password="p") -schema2 = dj.Schema("analysis", connection=conn) # schema2.connection.config is independent - -# dj.config affects only schema1 -dj.config.safemode = False # Affects schema1 tables -Mouse().delete() # safemode=False (from dj.config) - -# conn.config affects only schema2 -conn.config.safemode = True # Affects schema2 tables -Analysis().delete() # safemode=True (from conn.config) - -# They are independent -dj.config.safemode # False -conn.config.safemode # True +ctx = dj.new() +ctx.config # Config instance (copy of dj.config at creation) +ctx.connect(...) # Establish connection +ctx.Schema(...) # Create schema using ctx's connection +ctx.Manual # Base class for manual tables +ctx.Lookup # Base class for lookup tables +ctx.Imported # Base class for imported tables +ctx.Computed # Base class for computed tables +ctx.Part # Base class for part tables ``` -### override() behavior +### Thread-safe mode -```python -# Global config override - affects schemas using dj.conn() -with dj.config.override(safemode=False): - Mouse().delete() # safemode=False (schema1, global connection) - Analysis().delete() # safemode=True (schema2, unchanged - has own config) - -# Connection-scoped override - affects only that connection -with conn.config.override(safemode=False): - Mouse().delete() # safemode=True (schema1, unchanged - uses dj.config) - Analysis().delete() # safemode=False (schema2, overridden) +```bash +export DJ_THREAD_SAFE=true ``` -### In thread_safe=True mode +When `thread_safe=True`: +- `dj.conn()` raises `ThreadSafetyError` +- `dj.Schema()` raises `ThreadSafetyError` +- `dj.config` is read-only +- `dj.new()` works - isolated contexts are always allowed ```python -# This fails - conn() raises ThreadSafetyError -schema = dj.Schema("name") - -# This works - explicit connection with independent config -conn = dj.Connection(host="localhost", user="u", password="p") -schema = dj.Schema("name", connection=conn) +# thread_safe=True -# Tables use connection-scoped config -conn.config.safemode = False # Only affects this connection -Mouse().delete() # Uses conn.config.safemode +dj.Schema("name") # ThreadSafetyError +dj.conn() # ThreadSafetyError +dj.config.safemode = False # ThreadSafetyError -# dj.config.override() raises ThreadSafetyError (modifies global state) -with dj.config.override(safemode=False): # ThreadSafetyError - -# conn.config.override() works (connection-scoped) -with conn.config.override(safemode=False): # OK - Mouse().delete() +ctx = dj.new() # OK - isolated context +ctx.config.safemode = False # OK - context's own config +ctx.connect(...) # OK +ctx.Schema("name") # OK ``` -## Behavior +## Behavior Summary | Operation | `thread_safe=False` | `thread_safe=True` | |-----------|--------------------|--------------------| | `dj.config` read | Works | Works | -| `dj.config` write | Works | Raises `ThreadSafetyError` | -| `dj.conn()` | Works | Raises `ThreadSafetyError` | -| `dj.conn().config` | Is `dj.config` | N/A | -| `dj.Schema("name")` | Works (uses `conn()`) | Raises `ThreadSafetyError` | -| `dj.Connection(...)` | Works | Works | -| `conn.config` | Copy of `dj.config` | Copy of `dj.config` | -| `Schema(..., connection=conn)` | Works | Works | -| Table operations | Use `conn.config` | Use `conn.config` | +| `dj.config` write | Works | `ThreadSafetyError` | +| `dj.conn()` | Works | `ThreadSafetyError` | +| `dj.Schema()` | Works | `ThreadSafetyError` | +| `dj.new()` | Works | Works | +| `ctx.config` read/write | Works | Works | +| `ctx.connect()` | Works | Works | +| `ctx.Schema()` | Works | Works | -## Read-Only Settings +## Context Lifecycle -- `thread_safe`: Always read-only (set via env var or config file only) -- All of `dj.config`: Read-only when `thread_safe=True` +```python +# Create context +ctx = dj.new() -## Implementation +# Configure +ctx.config.database.host = "localhost" +ctx.config.safemode = False +ctx.config.stores = {...} -### 1. Add thread_safe setting -- Add `thread_safe: bool = False` field to `Config` with `DJ_THREAD_SAFE` env alias -- Make `thread_safe` always read-only after initialization -- When `thread_safe=True`, make `dj.config` writes raise `ThreadSafetyError` +# Connect +ctx.connect( + host="localhost", # Or use ctx.config.database.host + user="user", + password="password", +) -### 2. Add guards for global state -- `dj.conn()`: Raise `ThreadSafetyError` when `thread_safe=True` -- `Schema.__init__`: Raise `ThreadSafetyError` when `connection=None` and `thread_safe=True` +# Use +schema = ctx.Schema("my_schema") -### 3. Add conn.config to all connections -- `Connection.__init__`: Always creates `self.config = copy(dj.config)` (independent copy) -- `dj.conn()`: After connection creation, overrides `conn.config = dj.config` (same object for backward compatibility) +@schema +class Mouse(ctx.Manual): + definition = """ + mouse_id: int + """ -### 4. Refactor internal code to use conn.config +Mouse().insert1({"mouse_id": 1}) -All runtime operations must use `self.connection.config` instead of global `config`: +# Cleanup (optional - closes connection) +ctx.close() +``` -**table.py:** -- `Table.delete()`: Use `self.connection.config.safemode` -- `Table.drop()`: Use `self.connection.config.safemode` +## Legacy Compatibility -**schemas.py:** -- `Schema.drop()`: Use `self.connection.config.safemode` -- `Schema.__init__`: Use `self.connection.config.database.create_tables` +The singleton `dj` context works exactly as before: -**preview.py:** -- Use `connection.config.display.limit` -- Use `connection.config.display.width` -- Use `connection.config.display.show_tuple_count` -- Note: Preview functions need connection passed in or accessed via table +```python +# These are equivalent: +dj.conn() # Singleton connection +dj.config # Singleton config +dj.Schema("name") # Uses singleton connection -**diagram.py:** -- Use `schema.connection.config.display.diagram_direction` +# Internally, dj module delegates to singleton context +``` -**jobs.py:** -- Use `self.connection.config.jobs.*` for all jobs settings -- `version_method`, `default_priority`, `stale_timeout`, `keep_completed` +## Implementation -**autopopulate.py:** -- Use `self.connection.config.jobs.allow_new_pk_fields_in_computed_tables` -- Use `self.connection.config.jobs.auto_refresh` +### 1. Create Context class -**declare.py:** -- Use `connection.config.jobs.add_job_metadata` +```python +class Context: + def __init__(self, config: Config): + self.config = config + self._connection = None + + def connect(self, host, user, password, ...): + self._connection = Connection(...) + self._connection.config = self.config + + def conn(self): + return self._connection + + def Schema(self, name, ...): + return Schema(name, connection=self._connection, ...) + + # Table base classes that reference this context + @property + def Manual(self): ... + @property + def Lookup(self): ... + # etc. +``` -**connection.py:** -- Use `self.config.database.reconnect` for reconnect behavior -- Use `self.config.query_cache` for query caching +### 2. Add dj.new() -**hash_registry.py, staged_insert.py, builtin_codecs/\*:** -- Use `connection.config.get_store_spec()` for store configuration -- Use `connection.config.download_path` for downloads +```python +def new() -> Context: + """Create a new isolated context with its own config and connection.""" + config_copy = copy(config) # Copy current global config + return Context(config_copy) +``` -### 5. Add ThreadSafetyError exception +### 3. Make dj module act as singleton context ```python -class ThreadSafetyError(DataJointError): - """Raised when modifying global state in thread-safe mode.""" +# In datajoint/__init__.py +_singleton_context = Context(config) + +def conn(...): + if config.thread_safe: + raise ThreadSafetyError(...) + return _singleton_context.conn(...) + +def Schema(...): + if config.thread_safe: + raise ThreadSafetyError(...) + return _singleton_context.Schema(...) ``` +### 4. Add thread_safe guards + +- `dj.conn()`: Raise `ThreadSafetyError` when `thread_safe=True` +- `dj.Schema()`: Raise `ThreadSafetyError` when `thread_safe=True` +- `dj.config` writes: Raise `ThreadSafetyError` when `thread_safe=True` + +### 5. Refactor internal code + +All internal code uses `self.connection.config` instead of global `config`: +- Tables access config via `self.connection.config` +- Connection has reference to its context's config + ## Error Messages -- Config write: `"Global config is read-only in thread-safe mode. Use conn.config for connection-scoped settings."` -- `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use Connection() with explicit parameters."` -- Schema without connection: `"Schema requires explicit connection in thread-safe mode."` +- `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use ctx = dj.new() to create an isolated context."` +- `dj.Schema()`: `"dj.Schema() is disabled in thread-safe mode. Use ctx = dj.new() to create an isolated context."` +- `dj.config` write: `"Global config is read-only in thread-safe mode. Use ctx = dj.new() for isolated config."` From f92af1c8b75264d63f29f4ef96cf9f01581bb03a Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 17:40:28 -0600 Subject: [PATCH 14/39] docs: Simplify context - only config, connection, Schema - ctx exposes only: config, connection, Schema() - Connection created at context construction via dj.new() - Tables still use dj.Manual, dj.Lookup as base classes - thread_safe=True: dj.config only allows thread_safe access Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 189 +++++++++++++------------------- 1 file changed, 76 insertions(+), 113 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 2429369d0..35cbac0ef 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -6,20 +6,18 @@ DataJoint uses global state (`dj.config`, `dj.conn()`) that is not thread-safe. ## Solution -Introduce **context** objects that encapsulate config and connection. The `dj` module itself is the singleton (legacy) context. New isolated contexts are created with `dj.new()`. +Introduce **context** objects that encapsulate config and connection. The `dj` module provides the singleton (legacy) context. New isolated contexts are created with `dj.new()`. ## API ### Legacy API (singleton context) -The `dj` module acts as the default singleton context: - ```python import datajoint as dj dj.config.safemode = False dj.conn(host="localhost", user="u", password="p") -schema = dj.Schema("my_schema") # Uses dj's connection +schema = dj.Schema("my_schema") @schema class Mouse(dj.Manual): @@ -28,39 +26,34 @@ class Mouse(dj.Manual): ### New API (isolated context) -Create isolated contexts with `dj.new()`: - ```python import datajoint as dj -ctx = dj.new() # New context with its own config copy +ctx = dj.new( + host="localhost", + user="user", + password="password", +) ctx.config.safemode = False -ctx.connect(host="localhost", user="u", password="p") -schema = ctx.Schema("my_schema") # Uses ctx's connection +schema = ctx.Schema("my_schema") @schema -class Mouse(ctx.Manual): +class Mouse(dj.Manual): definition = "..." ``` ### Context structure -Each context has: -- **One config** - copy of settings at creation time -- **One connection** - established via `ctx.connect()` -- **Schema factory** - `ctx.Schema()` auto-uses context's connection -- **Table base classes** - `ctx.Manual`, `ctx.Lookup`, `ctx.Imported`, `ctx.Computed`, `ctx.Part` +Each context exposes only: +- `ctx.config` - Config instance (copy of `dj.config` at creation) +- `ctx.connection` - Connection (created at context construction) +- `ctx.Schema()` - Schema factory using context's connection ```python -ctx = dj.new() -ctx.config # Config instance (copy of dj.config at creation) -ctx.connect(...) # Establish connection -ctx.Schema(...) # Create schema using ctx's connection -ctx.Manual # Base class for manual tables -ctx.Lookup # Base class for lookup tables -ctx.Imported # Base class for imported tables -ctx.Computed # Base class for computed tables -ctx.Part # Base class for part tables +ctx = dj.new(host="localhost", user="u", password="p") +ctx.config # Config instance +ctx.connection # Connection instance +ctx.Schema("name") # Creates schema using ctx.connection ``` ### Thread-safe mode @@ -72,79 +65,71 @@ export DJ_THREAD_SAFE=true When `thread_safe=True`: - `dj.conn()` raises `ThreadSafetyError` - `dj.Schema()` raises `ThreadSafetyError` -- `dj.config` is read-only +- `dj.config` only allows access to `thread_safe` (all other access raises `ThreadSafetyError`) - `dj.new()` works - isolated contexts are always allowed ```python # thread_safe=True -dj.Schema("name") # ThreadSafetyError -dj.conn() # ThreadSafetyError +dj.config.thread_safe # OK - allowed +dj.config.safemode # ThreadSafetyError dj.config.safemode = False # ThreadSafetyError +dj.conn() # ThreadSafetyError +dj.Schema("name") # ThreadSafetyError -ctx = dj.new() # OK - isolated context -ctx.config.safemode = False # OK - context's own config -ctx.connect(...) # OK -ctx.Schema("name") # OK +ctx = dj.new(host="h", user="u", password="p") # OK +ctx.config.safemode = False # OK +ctx.Schema("name") # OK ``` ## Behavior Summary | Operation | `thread_safe=False` | `thread_safe=True` | |-----------|--------------------|--------------------| -| `dj.config` read | Works | Works | -| `dj.config` write | Works | `ThreadSafetyError` | +| `dj.config.thread_safe` | Works | Works | +| `dj.config.*` (other) | Works | `ThreadSafetyError` | | `dj.conn()` | Works | `ThreadSafetyError` | | `dj.Schema()` | Works | `ThreadSafetyError` | | `dj.new()` | Works | Works | -| `ctx.config` read/write | Works | Works | -| `ctx.connect()` | Works | Works | +| `ctx.config.*` | Works | Works | +| `ctx.connection` | Works | Works | | `ctx.Schema()` | Works | Works | -## Context Lifecycle +## Usage Example ```python -# Create context -ctx = dj.new() - -# Configure -ctx.config.database.host = "localhost" -ctx.config.safemode = False -ctx.config.stores = {...} +import datajoint as dj -# Connect -ctx.connect( - host="localhost", # Or use ctx.config.database.host +# Create isolated context +ctx = dj.new( + host="localhost", user="user", password="password", ) -# Use +# Configure +ctx.config.safemode = False +ctx.config.stores = {"raw": {"protocol": "file", "location": "/data"}} + +# Create schema schema = ctx.Schema("my_schema") @schema -class Mouse(ctx.Manual): +class Mouse(dj.Manual): definition = """ mouse_id: int """ -Mouse().insert1({"mouse_id": 1}) - -# Cleanup (optional - closes connection) -ctx.close() -``` - -## Legacy Compatibility - -The singleton `dj` context works exactly as before: - -```python -# These are equivalent: -dj.conn() # Singleton connection -dj.config # Singleton config -dj.Schema("name") # Uses singleton connection +@schema +class Session(dj.Manual): + definition = """ + -> Mouse + session_date: date + """ -# Internally, dj module delegates to singleton context +# Use tables +Mouse().insert1({"mouse_id": 1}) +Mouse().delete() # Uses ctx.config.safemode ``` ## Implementation @@ -153,68 +138,46 @@ dj.Schema("name") # Uses singleton connection ```python class Context: - def __init__(self, config: Config): - self.config = config - self._connection = None - - def connect(self, host, user, password, ...): - self._connection = Connection(...) - self._connection.config = self.config - - def conn(self): - return self._connection - - def Schema(self, name, ...): - return Schema(name, connection=self._connection, ...) - - # Table base classes that reference this context - @property - def Manual(self): ... - @property - def Lookup(self): ... - # etc. + def __init__(self, host, user, password, port=3306, ...): + self.config = copy(dj.config) # Independent config copy + self.connection = Connection(host, user, password, port, ...) + self.connection._config = self.config # Link config to connection + + def Schema(self, name, **kwargs): + return Schema(name, connection=self.connection, **kwargs) ``` ### 2. Add dj.new() ```python -def new() -> Context: +def new(host, user, password, **kwargs) -> Context: """Create a new isolated context with its own config and connection.""" - config_copy = copy(config) # Copy current global config - return Context(config_copy) + return Context(host, user, password, **kwargs) ``` -### 3. Make dj module act as singleton context +### 3. Add thread_safe guards + +In `dj.config`: +- Allow read/write of `thread_safe` always +- When `thread_safe=True`, block all other attribute access ```python -# In datajoint/__init__.py -_singleton_context = Context(config) - -def conn(...): - if config.thread_safe: - raise ThreadSafetyError(...) - return _singleton_context.conn(...) - -def Schema(...): - if config.thread_safe: - raise ThreadSafetyError(...) - return _singleton_context.Schema(...) +def __getattr__(self, name): + if name == "thread_safe": + return self._thread_safe + if self._thread_safe: + raise ThreadSafetyError("Global config is inaccessible in thread-safe mode.") + # ... normal access ``` -### 4. Add thread_safe guards - -- `dj.conn()`: Raise `ThreadSafetyError` when `thread_safe=True` -- `dj.Schema()`: Raise `ThreadSafetyError` when `thread_safe=True` -- `dj.config` writes: Raise `ThreadSafetyError` when `thread_safe=True` - -### 5. Refactor internal code +### 4. Refactor internal code -All internal code uses `self.connection.config` instead of global `config`: -- Tables access config via `self.connection.config` -- Connection has reference to its context's config +All internal code uses `self.connection._config` instead of global `config`: +- Tables access config via `self.connection._config` +- This works uniformly for both singleton and isolated contexts ## Error Messages -- `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use ctx = dj.new() to create an isolated context."` -- `dj.Schema()`: `"dj.Schema() is disabled in thread-safe mode. Use ctx = dj.new() to create an isolated context."` -- `dj.config` write: `"Global config is read-only in thread-safe mode. Use ctx = dj.new() for isolated config."` +- `dj.config.*`: `"Global config is inaccessible in thread-safe mode. Use ctx = dj.new(...) for isolated config."` +- `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use ctx = dj.new(...) to create an isolated context."` +- `dj.Schema()`: `"dj.Schema() is disabled in thread-safe mode. Use ctx = dj.new(...) to create an isolated context."` From f83248606fc5a1cf585f39865957f52ace229551 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 17:52:36 -0600 Subject: [PATCH 15/39] docs: Singleton as lazily-loaded instance, fresh config per instance - dj.config, dj.conn(), dj.Schema() delegate to singleton instance - Singleton lazily loaded on first access - thread_safe checked at module import, blocks singleton access - inst.config created fresh (not copied from dj.config) - dj.instance() always works, creates isolated instance Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 166 ++++++++++++++++++-------------- 1 file changed, 95 insertions(+), 71 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 35cbac0ef..ed89f5e54 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -6,11 +6,11 @@ DataJoint uses global state (`dj.config`, `dj.conn()`) that is not thread-safe. ## Solution -Introduce **context** objects that encapsulate config and connection. The `dj` module provides the singleton (legacy) context. New isolated contexts are created with `dj.new()`. +Introduce **instance** objects that encapsulate config and connection. The `dj` module provides access to a lazily-loaded singleton instance. New isolated instances are created with `dj.instance()`. ## API -### Legacy API (singleton context) +### Legacy API (singleton instance) ```python import datajoint as dj @@ -24,36 +24,38 @@ class Mouse(dj.Manual): definition = "..." ``` -### New API (isolated context) +Internally, `dj.config`, `dj.conn()`, and `dj.Schema()` delegate to a lazily-loaded singleton instance. + +### New API (isolated instance) ```python import datajoint as dj -ctx = dj.new( +inst = dj.instance( host="localhost", user="user", password="password", ) -ctx.config.safemode = False -schema = ctx.Schema("my_schema") +inst.config.safemode = False +schema = inst.Schema("my_schema") @schema class Mouse(dj.Manual): definition = "..." ``` -### Context structure +### Instance structure -Each context exposes only: -- `ctx.config` - Config instance (copy of `dj.config` at creation) -- `ctx.connection` - Connection (created at context construction) -- `ctx.Schema()` - Schema factory using context's connection +Each instance has: +- `inst.config` - Config (created fresh at instance creation) +- `inst.connection` - Connection (created at instance creation) +- `inst.Schema()` - Schema factory using instance's connection ```python -ctx = dj.new(host="localhost", user="u", password="p") -ctx.config # Config instance -ctx.connection # Connection instance -ctx.Schema("name") # Creates schema using ctx.connection +inst = dj.instance(host="localhost", user="u", password="p") +inst.config # Config instance +inst.connection # Connection instance +inst.Schema("name") # Creates schema using inst.connection ``` ### Thread-safe mode @@ -62,57 +64,67 @@ ctx.Schema("name") # Creates schema using ctx.connection export DJ_THREAD_SAFE=true ``` -When `thread_safe=True`: +`thread_safe` is read from environment/config file at module import time. + +When `thread_safe=True`, accessing the singleton raises `ThreadSafetyError`: +- `dj.config` raises `ThreadSafetyError` - `dj.conn()` raises `ThreadSafetyError` - `dj.Schema()` raises `ThreadSafetyError` -- `dj.config` only allows access to `thread_safe` (all other access raises `ThreadSafetyError`) -- `dj.new()` works - isolated contexts are always allowed +- `dj.instance()` works - isolated instances are always allowed ```python # thread_safe=True -dj.config.thread_safe # OK - allowed -dj.config.safemode # ThreadSafetyError -dj.config.safemode = False # ThreadSafetyError -dj.conn() # ThreadSafetyError -dj.Schema("name") # ThreadSafetyError +dj.config # ThreadSafetyError +dj.conn() # ThreadSafetyError +dj.Schema("name") # ThreadSafetyError -ctx = dj.new(host="h", user="u", password="p") # OK -ctx.config.safemode = False # OK -ctx.Schema("name") # OK +inst = dj.instance(host="h", user="u", password="p") # OK +inst.config.safemode = False # OK +inst.Schema("name") # OK ``` ## Behavior Summary | Operation | `thread_safe=False` | `thread_safe=True` | |-----------|--------------------|--------------------| -| `dj.config.thread_safe` | Works | Works | -| `dj.config.*` (other) | Works | `ThreadSafetyError` | -| `dj.conn()` | Works | `ThreadSafetyError` | -| `dj.Schema()` | Works | `ThreadSafetyError` | -| `dj.new()` | Works | Works | -| `ctx.config.*` | Works | Works | -| `ctx.connection` | Works | Works | -| `ctx.Schema()` | Works | Works | +| `dj.config` | Singleton config | `ThreadSafetyError` | +| `dj.conn()` | Singleton connection | `ThreadSafetyError` | +| `dj.Schema()` | Uses singleton | `ThreadSafetyError` | +| `dj.instance()` | Works | Works | +| `inst.config` | Works | Works | +| `inst.connection` | Works | Works | +| `inst.Schema()` | Works | Works | + +## Singleton Lazy Loading + +The singleton instance is created lazily on first access to `dj.config`, `dj.conn()`, or `dj.Schema()`: + +```python +# First access triggers singleton creation +dj.config.safemode # Creates singleton, returns singleton.config.safemode +dj.conn() # Returns singleton.connection (connects if needed) +dj.Schema("name") # Returns singleton.Schema("name") +``` ## Usage Example ```python import datajoint as dj -# Create isolated context -ctx = dj.new( +# Create isolated instance +inst = dj.instance( host="localhost", user="user", password="password", ) # Configure -ctx.config.safemode = False -ctx.config.stores = {"raw": {"protocol": "file", "location": "/data"}} +inst.config.safemode = False +inst.config.stores = {"raw": {"protocol": "file", "location": "/data"}} # Create schema -schema = ctx.Schema("my_schema") +schema = inst.Schema("my_schema") @schema class Mouse(dj.Manual): @@ -120,64 +132,76 @@ class Mouse(dj.Manual): mouse_id: int """ -@schema -class Session(dj.Manual): - definition = """ - -> Mouse - session_date: date - """ - # Use tables Mouse().insert1({"mouse_id": 1}) -Mouse().delete() # Uses ctx.config.safemode +Mouse().delete() # Uses inst.config.safemode ``` ## Implementation -### 1. Create Context class +### 1. Create Instance class ```python -class Context: - def __init__(self, host, user, password, port=3306, ...): - self.config = copy(dj.config) # Independent config copy +class Instance: + def __init__(self, host, user, password, port=3306, **kwargs): + self.config = Config() # Fresh config with defaults + # Apply any config overrides from kwargs self.connection = Connection(host, user, password, port, ...) - self.connection._config = self.config # Link config to connection + self.connection._config = self.config def Schema(self, name, **kwargs): return Schema(name, connection=self.connection, **kwargs) ``` -### 2. Add dj.new() +### 2. Add dj.instance() ```python -def new(host, user, password, **kwargs) -> Context: - """Create a new isolated context with its own config and connection.""" - return Context(host, user, password, **kwargs) +def instance(host, user, password, **kwargs) -> Instance: + """Create a new isolated instance with its own config and connection.""" + return Instance(host, user, password, **kwargs) ``` -### 3. Add thread_safe guards - -In `dj.config`: -- Allow read/write of `thread_safe` always -- When `thread_safe=True`, block all other attribute access +### 3. Singleton with lazy loading ```python -def __getattr__(self, name): - if name == "thread_safe": - return self._thread_safe - if self._thread_safe: - raise ThreadSafetyError("Global config is inaccessible in thread-safe mode.") - # ... normal access +# Module level +_thread_safe = _load_thread_safe_from_env_or_config() +_singleton = None + +def _get_singleton(): + if _thread_safe: + raise ThreadSafetyError( + "Global DataJoint state is disabled in thread-safe mode. " + "Use dj.instance() to create an isolated instance." + ) + global _singleton + if _singleton is None: + _singleton = Instance( + host=_load_from_config("database.host"), + user=_load_from_config("database.user"), + password=_load_from_config("database.password"), + ... + ) + return _singleton + +# Public API +@property +def config(): + return _get_singleton().config + +def conn(): + return _get_singleton().connection + +def Schema(name, **kwargs): + return _get_singleton().Schema(name, **kwargs) ``` ### 4. Refactor internal code All internal code uses `self.connection._config` instead of global `config`: - Tables access config via `self.connection._config` -- This works uniformly for both singleton and isolated contexts +- This works uniformly for both singleton and isolated instances ## Error Messages -- `dj.config.*`: `"Global config is inaccessible in thread-safe mode. Use ctx = dj.new(...) for isolated config."` -- `dj.conn()`: `"dj.conn() is disabled in thread-safe mode. Use ctx = dj.new(...) to create an isolated context."` -- `dj.Schema()`: `"dj.Schema() is disabled in thread-safe mode. Use ctx = dj.new(...) to create an isolated context."` +- Singleton access: `"Global DataJoint state is disabled in thread-safe mode. Use dj.instance() to create an isolated instance."` From 6fe7497fa20751c99c37b7e31e5465d64b4116cb Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 17:58:20 -0600 Subject: [PATCH 16/39] docs: Add inst.FreeTable(), clarify base classes vs instance methods - dj.Manual, dj.Lookup etc. used with @schema decorator (schema links connection) - inst.Schema(), inst.FreeTable() need connection directly - FreeTable added to Instance class Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index ed89f5e54..b7922a837 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -50,12 +50,29 @@ Each instance has: - `inst.config` - Config (created fresh at instance creation) - `inst.connection` - Connection (created at instance creation) - `inst.Schema()` - Schema factory using instance's connection +- `inst.FreeTable()` - FreeTable factory using instance's connection ```python inst = dj.instance(host="localhost", user="u", password="p") inst.config # Config instance inst.connection # Connection instance inst.Schema("name") # Creates schema using inst.connection +inst.FreeTable("db.tbl") # Access table using inst.connection +``` + +### Table base classes vs instance methods + +**Base classes** (`dj.Manual`, `dj.Lookup`, etc.) - Used with `@schema` decorator: +```python +@schema +class Mouse(dj.Manual): # dj.Manual - schema links to connection + definition = "..." +``` + +**Instance methods** (`inst.Schema()`, `inst.FreeTable()`) - Need connection directly: +```python +schema = inst.Schema("my_schema") # Uses inst.connection +table = inst.FreeTable("db.table") # Uses inst.connection ``` ### Thread-safe mode @@ -151,6 +168,9 @@ class Instance: def Schema(self, name, **kwargs): return Schema(name, connection=self.connection, **kwargs) + + def FreeTable(self, full_table_name): + return FreeTable(self.connection, full_table_name) ``` ### 2. Add dj.instance() From 32b52353a1ccbc23f12690aefec77d590414b715 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 18:06:42 -0600 Subject: [PATCH 17/39] docs: Consolidate to single singleton instance, dj.Instance() - dj.Instance() (uppercase) for consistency with dj.Schema() - Single _singleton_instance created lazily - dj.config -> _singleton.config (via proxy) - dj.conn() -> _singleton.connection - dj.Schema() -> _singleton.Schema() - dj.FreeTable() -> _singleton.FreeTable() - All trigger same singleton creation Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 92 +++++++++++++++++++-------------- 1 file changed, 52 insertions(+), 40 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index b7922a837..dac95981a 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -6,7 +6,7 @@ DataJoint uses global state (`dj.config`, `dj.conn()`) that is not thread-safe. ## Solution -Introduce **instance** objects that encapsulate config and connection. The `dj` module provides access to a lazily-loaded singleton instance. New isolated instances are created with `dj.instance()`. +Introduce **Instance** objects that encapsulate config and connection. The `dj` module provides access to a lazily-loaded singleton instance. New isolated instances are created with `dj.Instance()`. ## API @@ -16,7 +16,7 @@ Introduce **instance** objects that encapsulate config and connection. The `dj` import datajoint as dj dj.config.safemode = False -dj.conn(host="localhost", user="u", password="p") +dj.conn() # Triggers singleton creation, returns connection schema = dj.Schema("my_schema") @schema @@ -24,14 +24,19 @@ class Mouse(dj.Manual): definition = "..." ``` -Internally, `dj.config`, `dj.conn()`, and `dj.Schema()` delegate to a lazily-loaded singleton instance. +Internally, `dj.config`, `dj.conn()`, and `dj.Schema()` are aliases to the singleton instance: +- `dj.config` → `dj._singleton_instance.config` +- `dj.conn()` → `dj._singleton_instance.connection` +- `dj.Schema()` → `dj._singleton_instance.Schema()` + +The singleton is created lazily on first access to any of these. ### New API (isolated instance) ```python import datajoint as dj -inst = dj.instance( +inst = dj.Instance( host="localhost", user="user", password="password", @@ -53,7 +58,7 @@ Each instance has: - `inst.FreeTable()` - FreeTable factory using instance's connection ```python -inst = dj.instance(host="localhost", user="u", password="p") +inst = dj.Instance(host="localhost", user="u", password="p") inst.config # Config instance inst.connection # Connection instance inst.Schema("name") # Creates schema using inst.connection @@ -87,7 +92,7 @@ When `thread_safe=True`, accessing the singleton raises `ThreadSafetyError`: - `dj.config` raises `ThreadSafetyError` - `dj.conn()` raises `ThreadSafetyError` - `dj.Schema()` raises `ThreadSafetyError` -- `dj.instance()` works - isolated instances are always allowed +- `dj.Instance()` works - isolated instances are always allowed ```python # thread_safe=True @@ -96,7 +101,7 @@ dj.config # ThreadSafetyError dj.conn() # ThreadSafetyError dj.Schema("name") # ThreadSafetyError -inst = dj.instance(host="h", user="u", password="p") # OK +inst = dj.Instance(host="h", user="u", password="p") # OK inst.config.safemode = False # OK inst.Schema("name") # OK ``` @@ -105,32 +110,33 @@ inst.Schema("name") # OK | Operation | `thread_safe=False` | `thread_safe=True` | |-----------|--------------------|--------------------| -| `dj.config` | Singleton config | `ThreadSafetyError` | -| `dj.conn()` | Singleton connection | `ThreadSafetyError` | -| `dj.Schema()` | Uses singleton | `ThreadSafetyError` | -| `dj.instance()` | Works | Works | +| `dj.config` | `_singleton.config` | `ThreadSafetyError` | +| `dj.conn()` | `_singleton.connection` | `ThreadSafetyError` | +| `dj.Schema()` | `_singleton.Schema()` | `ThreadSafetyError` | +| `dj.Instance()` | Works | Works | | `inst.config` | Works | Works | | `inst.connection` | Works | Works | | `inst.Schema()` | Works | Works | ## Singleton Lazy Loading -The singleton instance is created lazily on first access to `dj.config`, `dj.conn()`, or `dj.Schema()`: +The singleton instance is created lazily on first access: ```python -# First access triggers singleton creation -dj.config.safemode # Creates singleton, returns singleton.config.safemode -dj.conn() # Returns singleton.connection (connects if needed) -dj.Schema("name") # Returns singleton.Schema("name") +dj.config # Creates singleton, returns _singleton.config +dj.conn() # Creates singleton, returns _singleton.connection +dj.Schema("name") # Creates singleton, returns _singleton.Schema("name") ``` +All three trigger creation of the same singleton instance. + ## Usage Example ```python import datajoint as dj # Create isolated instance -inst = dj.instance( +inst = dj.Instance( host="localhost", user="user", password="password", @@ -173,47 +179,53 @@ class Instance: return FreeTable(self.connection, full_table_name) ``` -### 2. Add dj.instance() - -```python -def instance(host, user, password, **kwargs) -> Instance: - """Create a new isolated instance with its own config and connection.""" - return Instance(host, user, password, **kwargs) -``` - -### 3. Singleton with lazy loading +### 2. Singleton with lazy loading ```python # Module level _thread_safe = _load_thread_safe_from_env_or_config() -_singleton = None +_singleton_instance = None def _get_singleton(): if _thread_safe: raise ThreadSafetyError( "Global DataJoint state is disabled in thread-safe mode. " - "Use dj.instance() to create an isolated instance." + "Use dj.Instance() to create an isolated instance." ) - global _singleton - if _singleton is None: - _singleton = Instance( - host=_load_from_config("database.host"), - user=_load_from_config("database.user"), - password=_load_from_config("database.password"), + global _singleton_instance + if _singleton_instance is None: + _singleton_instance = Instance( + host=_load_from_env_or_config("database.host"), + user=_load_from_env_or_config("database.user"), + password=_load_from_env_or_config("database.password"), ... ) - return _singleton + return _singleton_instance +``` + +### 3. Legacy API as aliases -# Public API -@property -def config(): - return _get_singleton().config +```python +# dj.config -> singleton.config +class _ConfigProxy: + def __getattr__(self, name): + return getattr(_get_singleton().config, name) + def __setattr__(self, name, value): + setattr(_get_singleton().config, name, value) +config = _ConfigProxy() + +# dj.conn() -> singleton.connection def conn(): return _get_singleton().connection +# dj.Schema() -> singleton.Schema() def Schema(name, **kwargs): return _get_singleton().Schema(name, **kwargs) + +# dj.FreeTable() -> singleton.FreeTable() +def FreeTable(full_table_name): + return _get_singleton().FreeTable(full_table_name) ``` ### 4. Refactor internal code @@ -224,4 +236,4 @@ All internal code uses `self.connection._config` instead of global `config`: ## Error Messages -- Singleton access: `"Global DataJoint state is disabled in thread-safe mode. Use dj.instance() to create an isolated instance."` +- Singleton access: `"Global DataJoint state is disabled in thread-safe mode. Use dj.Instance() to create an isolated instance."` From b251e862e0f1a9f944c4d7da6046fc6bfb13fe8f Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 19:04:39 -0600 Subject: [PATCH 18/39] feat: Implement thread-safe mode with Instance class - Add Instance class that encapsulates config + connection - Add ThreadSafetyError exception for global state access - Add _ConfigProxy to delegate dj.config to global config - Add _get_singleton_connection for lazy connection creation - Update dj.conn(), dj.Schema(), dj.FreeTable() to use singleton - Connection now stores _config reference for instance isolation - Add DJ_THREAD_SAFE environment variable support - Add comprehensive tests for thread-safe mode When DJ_THREAD_SAFE=true: - dj.config raises ThreadSafetyError - dj.conn() raises ThreadSafetyError - dj.Schema() raises ThreadSafetyError (without explicit connection) - dj.FreeTable() raises ThreadSafetyError (without explicit connection) - dj.Instance() always works for isolated contexts Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 119 +++++++------ src/datajoint/__init__.py | 153 +++++++++++++++- src/datajoint/connection.py | 19 +- src/datajoint/errors.py | 4 + src/datajoint/instance.py | 301 ++++++++++++++++++++++++++++++++ tests/unit/test_thread_safe.py | 173 ++++++++++++++++++ 6 files changed, 707 insertions(+), 62 deletions(-) create mode 100644 src/datajoint/instance.py create mode 100644 tests/unit/test_thread_safe.py diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index dac95981a..ac6d94e5e 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -6,17 +6,22 @@ DataJoint uses global state (`dj.config`, `dj.conn()`) that is not thread-safe. ## Solution -Introduce **Instance** objects that encapsulate config and connection. The `dj` module provides access to a lazily-loaded singleton instance. New isolated instances are created with `dj.Instance()`. +Introduce **Instance** objects that encapsulate config and connection. The `dj` module provides a global config that can be modified before connecting, and a lazily-loaded singleton connection. New isolated instances are created with `dj.Instance()`. ## API -### Legacy API (singleton instance) +### Legacy API (global config + singleton connection) ```python import datajoint as dj +# Configure credentials (no connection yet) +dj.config.database.user = "user" +dj.config.database.password = "password" dj.config.safemode = False -dj.conn() # Triggers singleton creation, returns connection + +# First call to conn() or Schema() creates the singleton connection +dj.conn() # Creates connection using dj.config credentials schema = dj.Schema("my_schema") @schema @@ -24,12 +29,11 @@ class Mouse(dj.Manual): definition = "..." ``` -Internally, `dj.config`, `dj.conn()`, and `dj.Schema()` are aliases to the singleton instance: -- `dj.config` → `dj._singleton_instance.config` -- `dj.conn()` → `dj._singleton_instance.connection` -- `dj.Schema()` → `dj._singleton_instance.Schema()` - -The singleton is created lazily on first access to any of these. +Internally: +- `dj.config` → delegates to `_global_config` (with thread-safety check) +- `dj.conn()` → returns `_singleton_connection` (created lazily) +- `dj.Schema()` → uses `_singleton_connection` +- `dj.FreeTable()` → uses `_singleton_connection` ### New API (isolated instance) @@ -86,12 +90,13 @@ table = inst.FreeTable("db.table") # Uses inst.connection export DJ_THREAD_SAFE=true ``` -`thread_safe` is read from environment/config file at module import time. +`thread_safe` is checked dynamically on each access to global state. -When `thread_safe=True`, accessing the singleton raises `ThreadSafetyError`: +When `thread_safe=True`, accessing global state raises `ThreadSafetyError`: - `dj.config` raises `ThreadSafetyError` - `dj.conn()` raises `ThreadSafetyError` -- `dj.Schema()` raises `ThreadSafetyError` +- `dj.Schema()` raises `ThreadSafetyError` (without explicit connection) +- `dj.FreeTable()` raises `ThreadSafetyError` (without explicit connection) - `dj.Instance()` works - isolated instances are always allowed ```python @@ -110,26 +115,26 @@ inst.Schema("name") # OK | Operation | `thread_safe=False` | `thread_safe=True` | |-----------|--------------------|--------------------| -| `dj.config` | `_singleton.config` | `ThreadSafetyError` | -| `dj.conn()` | `_singleton.connection` | `ThreadSafetyError` | -| `dj.Schema()` | `_singleton.Schema()` | `ThreadSafetyError` | +| `dj.config` | `_global_config` | `ThreadSafetyError` | +| `dj.conn()` | `_singleton_connection` | `ThreadSafetyError` | +| `dj.Schema()` | Uses singleton | `ThreadSafetyError` | +| `dj.FreeTable()` | Uses singleton | `ThreadSafetyError` | | `dj.Instance()` | Works | Works | | `inst.config` | Works | Works | | `inst.connection` | Works | Works | | `inst.Schema()` | Works | Works | -## Singleton Lazy Loading +## Lazy Loading -The singleton instance is created lazily on first access: +The global config is created at module import time. The singleton connection is created lazily on first access: ```python -dj.config # Creates singleton, returns _singleton.config -dj.conn() # Creates singleton, returns _singleton.connection -dj.Schema("name") # Creates singleton, returns _singleton.Schema("name") +dj.config.database.user = "user" # Modifies global config (no connection yet) +dj.config.database.password = "pw" +dj.conn() # Creates singleton connection using global config +dj.Schema("name") # Uses existing singleton connection ``` -All three trigger creation of the same singleton instance. - ## Usage Example ```python @@ -167,7 +172,7 @@ Mouse().delete() # Uses inst.config.safemode ```python class Instance: def __init__(self, host, user, password, port=3306, **kwargs): - self.config = Config() # Fresh config with defaults + self.config = _create_config() # Fresh config with defaults # Apply any config overrides from kwargs self.connection = Connection(host, user, password, port, ...) self.connection._config = self.config @@ -179,58 +184,74 @@ class Instance: return FreeTable(self.connection, full_table_name) ``` -### 2. Singleton with lazy loading +### 2. Global config and singleton connection ```python # Module level -_thread_safe = _load_thread_safe_from_env_or_config() -_singleton_instance = None +_global_config = _create_config() # Created at import time +_singleton_connection = None # Created lazily -def _get_singleton(): - if _thread_safe: +def _check_thread_safe(): + if _load_thread_safe(): raise ThreadSafetyError( "Global DataJoint state is disabled in thread-safe mode. " "Use dj.Instance() to create an isolated instance." ) - global _singleton_instance - if _singleton_instance is None: - _singleton_instance = Instance( - host=_load_from_env_or_config("database.host"), - user=_load_from_env_or_config("database.user"), - password=_load_from_env_or_config("database.password"), + +def _get_singleton_connection(): + _check_thread_safe() + global _singleton_connection + if _singleton_connection is None: + _singleton_connection = Connection( + host=_global_config.database.host, + user=_global_config.database.user, + password=_global_config.database.password, ... ) - return _singleton_instance + _singleton_connection._config = _global_config + return _singleton_connection ``` -### 3. Legacy API as aliases +### 3. Legacy API with thread-safety checks ```python -# dj.config -> singleton.config +# dj.config -> global config with thread-safety check class _ConfigProxy: def __getattr__(self, name): - return getattr(_get_singleton().config, name) + _check_thread_safe() + return getattr(_global_config, name) def __setattr__(self, name, value): - setattr(_get_singleton().config, name, value) + _check_thread_safe() + setattr(_global_config, name, value) config = _ConfigProxy() -# dj.conn() -> singleton.connection +# dj.conn() -> singleton connection def conn(): - return _get_singleton().connection - -# dj.Schema() -> singleton.Schema() -def Schema(name, **kwargs): - return _get_singleton().Schema(name, **kwargs) - -# dj.FreeTable() -> singleton.FreeTable() -def FreeTable(full_table_name): - return _get_singleton().FreeTable(full_table_name) + return _get_singleton_connection() + +# dj.Schema() -> uses singleton connection +def Schema(name, connection=None, **kwargs): + if connection is None: + _check_thread_safe() + connection = _get_singleton_connection() + return _Schema(name, connection=connection, **kwargs) + +# dj.FreeTable() -> uses singleton connection +def FreeTable(conn_or_name, full_table_name=None): + if full_table_name is None: + # Called as FreeTable("db.table") + _check_thread_safe() + return _FreeTable(_get_singleton_connection(), conn_or_name) + else: + # Called as FreeTable(conn, "db.table") + return _FreeTable(conn_or_name, full_table_name) ``` ### 4. Refactor internal code All internal code uses `self.connection._config` instead of global `config`: +- Connection stores reference to its config as `self._config` - Tables access config via `self.connection._config` - This works uniformly for both singleton and isolated instances diff --git a/src/datajoint/__init__.py b/src/datajoint/__init__.py index 7f809487d..04a2deb5f 100644 --- a/src/datajoint/__init__.py +++ b/src/datajoint/__init__.py @@ -23,6 +23,7 @@ "config", "conn", "Connection", + "Instance", "Schema", "VirtualModule", "virtual_schema", @@ -52,6 +53,7 @@ "errors", "migrate", "DataJointError", + "ThreadSafetyError", "logger", "cli", "ValidationResult", @@ -72,17 +74,158 @@ NpyRef, ) from .blob import MatCell, MatStruct -from .connection import Connection, conn -from .errors import DataJointError +from .connection import Connection +from .errors import DataJointError, ThreadSafetyError from .expression import AndList, Not, Top, U +from .instance import Instance, _ConfigProxy, _get_singleton_connection, _global_config, _check_thread_safe from .logging import logger from .objectref import ObjectRef -from .schemas import Schema, VirtualModule, list_schemas, virtual_schema -from .settings import config -from .table import FreeTable, Table, ValidationResult +from .schemas import Schema as _Schema, VirtualModule, list_schemas, virtual_schema +from .table import FreeTable as _FreeTable, Table, ValidationResult from .user_tables import Computed, Imported, Lookup, Manual, Part from .version import __version__ +# ============================================================================= +# Singleton-aware API +# ============================================================================= +# config is a proxy that delegates to the singleton instance's config +config = _ConfigProxy() + + +def conn( + host: str | None = None, + user: str | None = None, + password: str | None = None, + *, + reset: bool = False, + use_tls: bool | dict | None = None, +) -> Connection: + """ + Return a persistent connection object. + + When called without arguments, returns the singleton connection. + When connection parameters are provided, creates a new Connection. + + Parameters + ---------- + host : str, optional + Database hostname. + user : str, optional + Database username. + password : str, optional + Database password. + reset : bool, optional + If True, reset existing connection. Default False. + use_tls : bool or dict, optional + TLS encryption option. + + Returns + ------- + Connection + Database connection. + + Raises + ------ + ThreadSafetyError + If thread_safe mode is enabled and using singleton. + """ + # If any connection params provided, use legacy behavior + if host is not None or user is not None or password is not None or reset: + from .connection import conn as _legacy_conn + + return _legacy_conn(host, user, password, reset=reset, use_tls=use_tls) + + # Otherwise use singleton connection + return _get_singleton_connection() + + +def Schema( + schema_name: str | None = None, + context: dict | None = None, + *, + connection: Connection | None = None, + create_schema: bool = True, + create_tables: bool | None = None, + add_objects: dict | None = None, +) -> _Schema: + """ + Create a Schema for binding table classes to a database schema. + + When connection is not provided, uses the singleton connection. + + Parameters + ---------- + schema_name : str, optional + Database schema name. + context : dict, optional + Namespace for foreign key lookup. + connection : Connection, optional + Database connection. Defaults to singleton connection. + create_schema : bool, optional + If False, raise error if schema doesn't exist. Default True. + create_tables : bool, optional + If False, raise error when accessing missing tables. + add_objects : dict, optional + Additional objects for declaration context. + + Returns + ------- + Schema + A Schema bound to the specified connection. + + Raises + ------ + ThreadSafetyError + If thread_safe mode is enabled and using singleton. + """ + if connection is None: + # Use singleton connection - will raise ThreadSafetyError if thread_safe=True + _check_thread_safe() + connection = _get_singleton_connection() + + return _Schema( + schema_name, + context=context, + connection=connection, + create_schema=create_schema, + create_tables=create_tables, + add_objects=add_objects, + ) + + +def FreeTable(conn_or_name, full_table_name: str | None = None) -> _FreeTable: + """ + Create a FreeTable for accessing a table without a dedicated class. + + Can be called in two ways: + - ``FreeTable("schema.table")`` - uses singleton connection + - ``FreeTable(connection, "schema.table")`` - uses provided connection + + Parameters + ---------- + conn_or_name : Connection or str + Either a Connection object, or the full table name if using singleton. + full_table_name : str, optional + Full table name when first argument is a connection. + + Returns + ------- + FreeTable + A FreeTable instance for the specified table. + + Raises + ------ + ThreadSafetyError + If thread_safe mode is enabled and using singleton. + """ + if full_table_name is None: + # Called as FreeTable("db.table") - use singleton connection + _check_thread_safe() + return _FreeTable(_get_singleton_connection(), conn_or_name) + else: + # Called as FreeTable(conn, "db.table") - use provided connection + return _FreeTable(conn_or_name, full_table_name) + # ============================================================================= # Lazy imports — heavy dependencies loaded on first access # ============================================================================= diff --git a/src/datajoint/connection.py b/src/datajoint/connection.py index 488a26e7d..934a6694a 100644 --- a/src/datajoint/connection.py +++ b/src/datajoint/connection.py @@ -187,8 +187,11 @@ def __init__( self._query_cache = None self._is_closed = True # Mark as closed until connect() succeeds + # Config reference - defaults to global config, but Instance sets its own + self._config = config + # Select adapter based on configured backend - backend = config["database.backend"] + backend = self._config["database.backend"] self.adapter = get_adapter(backend) self.connect() @@ -219,7 +222,7 @@ def connect(self) -> None: port=self.conn_info["port"], user=self.conn_info["user"], password=self.conn_info["passwd"], - charset=config["connection.charset"], + charset=self._config["connection.charset"], use_tls=self.conn_info.get("ssl"), ) except Exception as ssl_error: @@ -235,7 +238,7 @@ def connect(self) -> None: port=self.conn_info["port"], user=self.conn_info["user"], password=self.conn_info["passwd"], - charset=config["connection.charset"], + charset=self._config["connection.charset"], use_tls=False, # Explicitly disable SSL for fallback ) else: @@ -261,8 +264,8 @@ def set_query_cache(self, query_cache: str | None = None) -> None: def purge_query_cache(self) -> None: """Delete all cached query results.""" - if isinstance(config.get(cache_key), str) and pathlib.Path(config[cache_key]).is_dir(): - for path in pathlib.Path(config[cache_key]).iterdir(): + if isinstance(self._config.get(cache_key), str) and pathlib.Path(self._config[cache_key]).is_dir(): + for path in pathlib.Path(self._config[cache_key]).iterdir(): if not path.is_dir(): path.unlink() @@ -403,11 +406,11 @@ def query( if use_query_cache and not re.match(r"\s*(SELECT|SHOW)", query): raise errors.DataJointError("Only SELECT queries are allowed when query caching is on.") if use_query_cache: - if not config[cache_key]: + if not self._config[cache_key]: raise errors.DataJointError(f"Provide filepath dj.config['{cache_key}'] when using query caching.") # Cache key is backend-specific (no identifier normalization needed) hash_ = hashlib.md5((str(self._query_cache)).encode() + pack(args) + query.encode()).hexdigest() - cache_path = pathlib.Path(config[cache_key]) / str(hash_) + cache_path = pathlib.Path(self._config[cache_key]) / str(hash_) try: buffer = cache_path.read_bytes() except FileNotFoundError: @@ -416,7 +419,7 @@ def query( return EmulatedCursor(unpack(buffer)) if reconnect is None: - reconnect = config["database.reconnect"] + reconnect = self._config["database.reconnect"] logger.debug("Executing SQL:" + query[:query_log_max_length]) cursor = self.adapter.get_cursor(self._conn, as_dict=as_dict) try: diff --git a/src/datajoint/errors.py b/src/datajoint/errors.py index 7e10f021d..bba032b23 100644 --- a/src/datajoint/errors.py +++ b/src/datajoint/errors.py @@ -72,3 +72,7 @@ class MissingExternalFile(DataJointError): class BucketInaccessible(DataJointError): """S3 bucket is inaccessible.""" + + +class ThreadSafetyError(DataJointError): + """Global DataJoint state is disabled in thread-safe mode.""" diff --git a/src/datajoint/instance.py b/src/datajoint/instance.py new file mode 100644 index 000000000..309fef668 --- /dev/null +++ b/src/datajoint/instance.py @@ -0,0 +1,301 @@ +""" +DataJoint Instance for thread-safe operation. + +An Instance encapsulates a config and connection pair, providing isolated +database contexts for multi-tenant applications. +""" + +from __future__ import annotations + +import os +from typing import TYPE_CHECKING, Any + +from .connection import Connection +from .errors import ThreadSafetyError +from .settings import Config, _create_config + +if TYPE_CHECKING: + from .schemas import Schema as SchemaClass + from .table import FreeTable as FreeTableClass + + +def _load_thread_safe() -> bool: + """ + Load thread_safe setting from environment or config file. + + Returns + ------- + bool + True if thread-safe mode is enabled. + """ + # Check environment variable first + env_val = os.environ.get("DJ_THREAD_SAFE", "").lower() + if env_val in ("true", "1", "yes"): + return True + if env_val in ("false", "0", "no"): + return False + + # Default: thread-safe mode is off + return False + + +class Instance: + """ + Encapsulates a DataJoint configuration and connection. + + Each Instance has its own Config and Connection, providing isolation + for multi-tenant applications. Use ``dj.Instance()`` to create isolated + instances, or access the singleton via ``dj.config``, ``dj.conn()``, etc. + + Parameters + ---------- + host : str + Database hostname. + user : str + Database username. + password : str + Database password. + port : int, optional + Database port. Default from config or 3306. + use_tls : bool or dict, optional + TLS configuration. + **kwargs : Any + Additional config overrides applied to this instance's config. + + Attributes + ---------- + config : Config + Configuration for this instance. + connection : Connection + Database connection for this instance. + + Examples + -------- + >>> inst = dj.Instance(host="localhost", user="root", password="secret") + >>> inst.config.safemode = False + >>> schema = inst.Schema("my_schema") + """ + + def __init__( + self, + host: str, + user: str, + password: str, + port: int | None = None, + use_tls: bool | dict | None = None, + **kwargs: Any, + ) -> None: + # Create fresh config with defaults loaded from env/file + self.config = _create_config() + + # Apply any config overrides from kwargs + for key, value in kwargs.items(): + if hasattr(self.config, key): + setattr(self.config, key, value) + elif "__" in key: + # Handle nested keys like database__reconnect + parts = key.split("__") + obj = self.config + for part in parts[:-1]: + obj = getattr(obj, part) + setattr(obj, parts[-1], value) + + # Determine port + if port is None: + port = self.config.database.port + + # Create connection + self.connection = Connection(host, user, password, port, use_tls) + + # Attach config to connection so tables can access it + self.connection._config = self.config + + def Schema( + self, + schema_name: str, + *, + context: dict[str, Any] | None = None, + create_schema: bool = True, + create_tables: bool | None = None, + add_objects: dict[str, Any] | None = None, + ) -> "SchemaClass": + """ + Create a Schema bound to this instance's connection. + + Parameters + ---------- + schema_name : str + Database schema name. + context : dict, optional + Namespace for foreign key lookup. + create_schema : bool, optional + If False, raise error if schema doesn't exist. Default True. + create_tables : bool, optional + If False, raise error when accessing missing tables. + add_objects : dict, optional + Additional objects for declaration context. + + Returns + ------- + Schema + A Schema using this instance's connection. + """ + from .schemas import Schema + + return Schema( + schema_name, + context=context, + connection=self.connection, + create_schema=create_schema, + create_tables=create_tables, + add_objects=add_objects, + ) + + def FreeTable(self, full_table_name: str) -> "FreeTableClass": + """ + Create a FreeTable bound to this instance's connection. + + Parameters + ---------- + full_table_name : str + Full table name as ``'schema.table'`` or ```schema`.`table```. + + Returns + ------- + FreeTable + A FreeTable using this instance's connection. + """ + from .table import FreeTable + + return FreeTable(self.connection, full_table_name) + + def __repr__(self) -> str: + return f"Instance({self.connection!r})" + + +# ============================================================================= +# Singleton management +# ============================================================================= +# The global config is created at module load time and can be modified +# The singleton connection is created lazily when conn() or Schema() is called + +_global_config: Config = _create_config() +_singleton_connection: Connection | None = None + + +def _check_thread_safe() -> None: + """ + Check if thread-safe mode is enabled and raise if so. + + Raises + ------ + ThreadSafetyError + If thread_safe mode is enabled. + """ + if _load_thread_safe(): + raise ThreadSafetyError( + "Global DataJoint state is disabled in thread-safe mode. " + "Use dj.Instance() to create an isolated instance." + ) + + +def _get_singleton_connection() -> Connection: + """ + Get or create the singleton Connection. + + Uses credentials from the global config. + + Raises + ------ + ThreadSafetyError + If thread_safe mode is enabled. + DataJointError + If credentials are not configured. + """ + global _singleton_connection + + _check_thread_safe() + + if _singleton_connection is None: + from .errors import DataJointError + + host = _global_config.database.host + user = _global_config.database.user + password = _global_config.database.password + if password is not None: + password = password.get_secret_value() + port = _global_config.database.port + use_tls = _global_config.database.use_tls + + if user is None: + raise DataJointError( + "Database user not configured. Set dj.config['database.user'] or DJ_USER environment variable." + ) + if password is None: + raise DataJointError( + "Database password not configured. Set dj.config['database.password'] or DJ_PASS environment variable." + ) + + _singleton_connection = Connection(host, user, password, port, use_tls) + # Attach global config to connection + _singleton_connection._config = _global_config + + return _singleton_connection + + +class _ConfigProxy: + """ + Proxy that delegates to the global config, with thread-safety checks. + + In thread-safe mode, all access raises ThreadSafetyError. + """ + + def __getattr__(self, name: str) -> Any: + _check_thread_safe() + return getattr(_global_config, name) + + def __setattr__(self, name: str, value: Any) -> None: + _check_thread_safe() + setattr(_global_config, name, value) + + def __getitem__(self, key: str) -> Any: + _check_thread_safe() + return _global_config[key] + + def __setitem__(self, key: str, value: Any) -> None: + _check_thread_safe() + _global_config[key] = value + + def __delitem__(self, key: str) -> None: + _check_thread_safe() + del _global_config[key] + + def get(self, key: str, default: Any = None) -> Any: + _check_thread_safe() + return _global_config.get(key, default) + + def override(self, **kwargs: Any): + _check_thread_safe() + return _global_config.override(**kwargs) + + def load(self, filename: str) -> None: + _check_thread_safe() + return _global_config.load(filename) + + def get_store_spec(self, store: str | None = None, *, use_filepath_default: bool = False) -> dict[str, Any]: + _check_thread_safe() + return _global_config.get_store_spec(store, use_filepath_default=use_filepath_default) + + @staticmethod + def save_template( + path: str = "datajoint.json", + minimal: bool = True, + create_secrets_dir: bool = True, + ): + # save_template is a static method, no thread-safety check needed + return Config.save_template(path, minimal, create_secrets_dir) + + def __repr__(self) -> str: + if _load_thread_safe(): + return "ConfigProxy (thread-safe mode - use dj.Instance())" + return repr(_global_config) diff --git a/tests/unit/test_thread_safe.py b/tests/unit/test_thread_safe.py new file mode 100644 index 000000000..427c9a3ec --- /dev/null +++ b/tests/unit/test_thread_safe.py @@ -0,0 +1,173 @@ +"""Tests for thread-safe mode functionality.""" + +import os + +import pytest + + +class TestThreadSafeMode: + """Test thread-safe mode behavior.""" + + def test_thread_safe_env_var_true(self, monkeypatch): + """DJ_THREAD_SAFE=true enables thread-safe mode.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "true") + + # Re-import to pick up the new env var + from datajoint.instance import _load_thread_safe + + assert _load_thread_safe() is True + + def test_thread_safe_env_var_false(self, monkeypatch): + """DJ_THREAD_SAFE=false disables thread-safe mode.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "false") + + from datajoint.instance import _load_thread_safe + + assert _load_thread_safe() is False + + def test_thread_safe_env_var_1(self, monkeypatch): + """DJ_THREAD_SAFE=1 enables thread-safe mode.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "1") + + from datajoint.instance import _load_thread_safe + + assert _load_thread_safe() is True + + def test_thread_safe_env_var_yes(self, monkeypatch): + """DJ_THREAD_SAFE=yes enables thread-safe mode.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "yes") + + from datajoint.instance import _load_thread_safe + + assert _load_thread_safe() is True + + def test_thread_safe_default_false(self, monkeypatch): + """Thread-safe mode defaults to False.""" + monkeypatch.delenv("DJ_THREAD_SAFE", raising=False) + + from datajoint.instance import _load_thread_safe + + assert _load_thread_safe() is False + + +class TestConfigProxyThreadSafe: + """Test ConfigProxy behavior in thread-safe mode.""" + + def test_config_access_raises_in_thread_safe_mode(self, monkeypatch): + """Accessing config raises ThreadSafetyError in thread-safe mode.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "true") + + import datajoint as dj + from datajoint.errors import ThreadSafetyError + + with pytest.raises(ThreadSafetyError): + _ = dj.config.database + + def test_config_access_works_in_normal_mode(self, monkeypatch): + """Accessing config works in normal mode.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "false") + + import datajoint as dj + + # Should not raise + host = dj.config.database.host + assert isinstance(host, str) + + def test_config_set_raises_in_thread_safe_mode(self, monkeypatch): + """Setting config raises ThreadSafetyError in thread-safe mode.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "true") + + import datajoint as dj + from datajoint.errors import ThreadSafetyError + + with pytest.raises(ThreadSafetyError): + dj.config.safemode = False + + def test_save_template_works_in_thread_safe_mode(self, monkeypatch, tmp_path): + """save_template is a static method and works in thread-safe mode.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "true") + + import datajoint as dj + + # Should not raise - save_template is static + config_file = tmp_path / "datajoint.json" + dj.config.save_template(str(config_file), create_secrets_dir=False) + assert config_file.exists() + + +class TestConnThreadSafe: + """Test conn() behavior in thread-safe mode.""" + + def test_conn_raises_in_thread_safe_mode(self, monkeypatch): + """conn() raises ThreadSafetyError in thread-safe mode.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "true") + + import datajoint as dj + from datajoint.errors import ThreadSafetyError + + with pytest.raises(ThreadSafetyError): + dj.conn() + + +class TestSchemaThreadSafe: + """Test Schema behavior in thread-safe mode.""" + + def test_schema_raises_in_thread_safe_mode(self, monkeypatch): + """Schema() raises ThreadSafetyError in thread-safe mode without connection.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "true") + + import datajoint as dj + from datajoint.errors import ThreadSafetyError + + with pytest.raises(ThreadSafetyError): + dj.Schema("test_schema") + + +class TestFreeTableThreadSafe: + """Test FreeTable behavior in thread-safe mode.""" + + def test_freetable_raises_in_thread_safe_mode(self, monkeypatch): + """FreeTable() raises ThreadSafetyError in thread-safe mode without connection.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "true") + + import datajoint as dj + from datajoint.errors import ThreadSafetyError + + with pytest.raises(ThreadSafetyError): + dj.FreeTable("test.table") + + +class TestInstance: + """Test Instance class.""" + + def test_instance_import(self): + """Instance class is importable.""" + from datajoint import Instance + + assert Instance is not None + + def test_instance_always_allowed_in_thread_safe_mode(self, monkeypatch): + """Instance() is allowed even in thread-safe mode.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "true") + + from datajoint import Instance + + # Instance class should be accessible + # (actual creation requires valid credentials) + assert callable(Instance) + + +class TestThreadSafetyError: + """Test ThreadSafetyError exception.""" + + def test_error_is_datajoint_error(self): + """ThreadSafetyError is a subclass of DataJointError.""" + from datajoint.errors import DataJointError, ThreadSafetyError + + assert issubclass(ThreadSafetyError, DataJointError) + + def test_error_in_exports(self): + """ThreadSafetyError is exported from datajoint.""" + import datajoint as dj + + assert hasattr(dj, "ThreadSafetyError") From 042dbf20c28daf1ebd2d1e61e0ac67b2b71612ef Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 13 Feb 2026 19:20:43 -0600 Subject: [PATCH 19/39] fix: Make conn() with credentials update singleton connection - conn(host, user, password) now updates the singleton connection instead of creating a separate connection - Remove irrelevant safemode=False from spec examples - thread_safe is set via DJ_THREAD_SAFE env var or config file Co-Authored-By: Claude Opus 4.5 --- docs/design/thread-safe-mode.md | 14 +++++----- src/datajoint/__init__.py | 46 +++++++++++++++++++++++++-------- 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index ac6d94e5e..794df6194 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -18,7 +18,6 @@ import datajoint as dj # Configure credentials (no connection yet) dj.config.database.user = "user" dj.config.database.password = "password" -dj.config.safemode = False # First call to conn() or Schema() creates the singleton connection dj.conn() # Creates connection using dj.config credentials @@ -29,6 +28,11 @@ class Mouse(dj.Manual): definition = "..." ``` +Alternatively, pass credentials directly to `conn()`: +```python +dj.conn(host="localhost", user="user", password="password") +``` + Internally: - `dj.config` → delegates to `_global_config` (with thread-safety check) - `dj.conn()` → returns `_singleton_connection` (created lazily) @@ -45,7 +49,6 @@ inst = dj.Instance( user="user", password="password", ) -inst.config.safemode = False schema = inst.Schema("my_schema") @schema @@ -107,7 +110,6 @@ dj.conn() # ThreadSafetyError dj.Schema("name") # ThreadSafetyError inst = dj.Instance(host="h", user="u", password="p") # OK -inst.config.safemode = False # OK inst.Schema("name") # OK ``` @@ -147,10 +149,6 @@ inst = dj.Instance( password="password", ) -# Configure -inst.config.safemode = False -inst.config.stores = {"raw": {"protocol": "file", "location": "/data"}} - # Create schema schema = inst.Schema("my_schema") @@ -162,7 +160,7 @@ class Mouse(dj.Manual): # Use tables Mouse().insert1({"mouse_id": 1}) -Mouse().delete() # Uses inst.config.safemode +Mouse().fetch() ``` ## Implementation diff --git a/src/datajoint/__init__.py b/src/datajoint/__init__.py index 04a2deb5f..9359c0eb7 100644 --- a/src/datajoint/__init__.py +++ b/src/datajoint/__init__.py @@ -103,17 +103,18 @@ def conn( """ Return a persistent connection object. - When called without arguments, returns the singleton connection. - When connection parameters are provided, creates a new Connection. + When called without arguments, returns the singleton connection using + credentials from dj.config. When connection parameters are provided, + updates the singleton connection with the new credentials. Parameters ---------- host : str, optional - Database hostname. + Database hostname. If provided, updates singleton. user : str, optional - Database username. + Database username. If provided, updates singleton. password : str, optional - Database password. + Database password. If provided, updates singleton. reset : bool, optional If True, reset existing connection. Default False. use_tls : bool or dict, optional @@ -127,15 +128,38 @@ def conn( Raises ------ ThreadSafetyError - If thread_safe mode is enabled and using singleton. + If thread_safe mode is enabled. """ - # If any connection params provided, use legacy behavior - if host is not None or user is not None or password is not None or reset: - from .connection import conn as _legacy_conn + from .instance import _singleton_connection, _check_thread_safe, _global_config + import datajoint.instance as instance_module - return _legacy_conn(host, user, password, reset=reset, use_tls=use_tls) + _check_thread_safe() + + # If credentials provided or reset requested, (re)create the singleton + if host is not None or user is not None or password is not None or reset: + # Use provided values or fall back to config + host = host if host is not None else _global_config.database.host + user = user if user is not None else _global_config.database.user + password = password if password is not None else _global_config.database.password + if password is not None and hasattr(password, 'get_secret_value'): + password = password.get_secret_value() + port = _global_config.database.port + use_tls = use_tls if use_tls is not None else _global_config.database.use_tls + + if user is None: + from .errors import DataJointError + raise DataJointError( + "Database user not configured. Set dj.config['database.user'] or pass user= argument." + ) + if password is None: + from .errors import DataJointError + raise DataJointError( + "Database password not configured. Set dj.config['database.password'] or pass password= argument." + ) + + instance_module._singleton_connection = Connection(host, user, password, port, use_tls) + instance_module._singleton_connection._config = _global_config - # Otherwise use singleton connection return _get_singleton_connection() From 5758adfeb17a5639630ac6ec2183ead9a66cf4be Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 18 Feb 2026 12:55:01 -0600 Subject: [PATCH 20/39] fix: Remove unused import, fix mock_cache fixture for 2.0 settings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove unused `from typing import Callable` in connection.py (lint failure) - Update mock_cache fixture: `cache` → `download_path` (KeyError in test_attach) Co-Authored-By: Claude Opus 4.6 --- src/datajoint/connection.py | 1 - tests/conftest.py | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/datajoint/connection.py b/src/datajoint/connection.py index 934a6694a..827a7a9bd 100644 --- a/src/datajoint/connection.py +++ b/src/datajoint/connection.py @@ -11,7 +11,6 @@ import re import warnings from contextlib import contextmanager -from typing import Callable from . import errors from .adapters import get_adapter diff --git a/tests/conftest.py b/tests/conftest.py index 4d6adf09c..8efaab745 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -536,13 +536,13 @@ def mock_stores(stores_config): @pytest.fixture def mock_cache(tmpdir_factory): - og_cache = dj.config.get("cache") - dj.config["cache"] = tmpdir_factory.mktemp("cache") + og_cache = dj.config.get("download_path") + dj.config["download_path"] = str(tmpdir_factory.mktemp("cache")) yield if og_cache is None: - del dj.config["cache"] + del dj.config["download_path"] else: - dj.config["cache"] = og_cache + dj.config["download_path"] = og_cache @pytest.fixture(scope="session") From 9d9d6757cc880010ebae89f4be3a99d9c4b0c664 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 18 Feb 2026 13:01:48 -0600 Subject: [PATCH 21/39] fix: Resolve lint and test failures in CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove unused `_singleton_connection` import in __init__.py (F401) - Remove unused `os` import in test_thread_safe.py (F401) - Remove unused `Callable` import in connection.py (F401) - Fix mock_cache fixture: `cache` → `download_path` for 2.0 settings Co-Authored-By: Claude Opus 4.6 --- src/datajoint/__init__.py | 14 +++++++------- src/datajoint/instance.py | 8 +++----- tests/unit/test_thread_safe.py | 2 -- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/src/datajoint/__init__.py b/src/datajoint/__init__.py index 9359c0eb7..d7db3e32d 100644 --- a/src/datajoint/__init__.py +++ b/src/datajoint/__init__.py @@ -130,8 +130,8 @@ def conn( ThreadSafetyError If thread_safe mode is enabled. """ - from .instance import _singleton_connection, _check_thread_safe, _global_config import datajoint.instance as instance_module + from pydantic import SecretStr _check_thread_safe() @@ -140,19 +140,18 @@ def conn( # Use provided values or fall back to config host = host if host is not None else _global_config.database.host user = user if user is not None else _global_config.database.user - password = password if password is not None else _global_config.database.password - if password is not None and hasattr(password, 'get_secret_value'): - password = password.get_secret_value() + raw_password = password if password is not None else _global_config.database.password + password = raw_password.get_secret_value() if isinstance(raw_password, SecretStr) else raw_password port = _global_config.database.port use_tls = use_tls if use_tls is not None else _global_config.database.use_tls if user is None: from .errors import DataJointError - raise DataJointError( - "Database user not configured. Set dj.config['database.user'] or pass user= argument." - ) + + raise DataJointError("Database user not configured. Set dj.config['database.user'] or pass user= argument.") if password is None: from .errors import DataJointError + raise DataJointError( "Database password not configured. Set dj.config['database.password'] or pass password= argument." ) @@ -250,6 +249,7 @@ def FreeTable(conn_or_name, full_table_name: str | None = None) -> _FreeTable: # Called as FreeTable(conn, "db.table") - use provided connection return _FreeTable(conn_or_name, full_table_name) + # ============================================================================= # Lazy imports — heavy dependencies loaded on first access # ============================================================================= diff --git a/src/datajoint/instance.py b/src/datajoint/instance.py index 309fef668..bd057aa57 100644 --- a/src/datajoint/instance.py +++ b/src/datajoint/instance.py @@ -194,8 +194,7 @@ def _check_thread_safe() -> None: """ if _load_thread_safe(): raise ThreadSafetyError( - "Global DataJoint state is disabled in thread-safe mode. " - "Use dj.Instance() to create an isolated instance." + "Global DataJoint state is disabled in thread-safe mode. " "Use dj.Instance() to create an isolated instance." ) @@ -221,9 +220,8 @@ def _get_singleton_connection() -> Connection: host = _global_config.database.host user = _global_config.database.user - password = _global_config.database.password - if password is not None: - password = password.get_secret_value() + raw_password = _global_config.database.password + password = raw_password.get_secret_value() if raw_password is not None else None port = _global_config.database.port use_tls = _global_config.database.use_tls diff --git a/tests/unit/test_thread_safe.py b/tests/unit/test_thread_safe.py index 427c9a3ec..bec45e434 100644 --- a/tests/unit/test_thread_safe.py +++ b/tests/unit/test_thread_safe.py @@ -1,7 +1,5 @@ """Tests for thread-safe mode functionality.""" -import os - import pytest From 0011cd65e8dffbf860553f9213d6b97bf7f047a1 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 18 Feb 2026 13:25:50 -0600 Subject: [PATCH 22/39] docs: Document thread-safety rationale for codec registry The global codec registry is effectively immutable after import: registration runs under Python's import lock, and the only runtime mutation (_load_entry_points) is idempotent under the GIL. Per-instance isolation is unnecessary since codecs are part of the type system, not connection-scoped state. Co-Authored-By: Claude Opus 4.6 --- src/datajoint/codecs.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/datajoint/codecs.py b/src/datajoint/codecs.py index 5c192d46e..f4741a5e4 100644 --- a/src/datajoint/codecs.py +++ b/src/datajoint/codecs.py @@ -43,7 +43,15 @@ class MyTable(dj.Manual): logger = logging.getLogger(__name__.split(".")[0]) -# Global codec registry - maps name to Codec instance +# Global codec registry - maps name to Codec instance. +# +# Thread safety: This registry is effectively immutable after import. +# Registration happens in __init_subclass__ during class definition, which is +# serialized by Python's import lock. The only runtime mutation is +# _load_entry_points(), which is idempotent and guarded by a bool flag; +# under CPython's GIL, concurrent calls may do redundant work but cannot +# corrupt the dict. Codecs are part of the type system (tied to code, not to +# any particular connection or tenant), so per-instance isolation is unnecessary. _codec_registry: dict[str, Codec] = {} _entry_points_loaded: bool = False From 845efc05c81ab072bee0155e98f0f389393e859f Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 18 Feb 2026 13:28:28 -0600 Subject: [PATCH 23/39] docs: Add global state audit to thread-safe mode spec Catalog all 8 module-level mutable globals with thread-safety classification: guarded (config, connection), safe by design (codec registry), or low risk (logging, blob flags, import caches). Co-Authored-By: Claude Opus 4.6 --- docs/design/thread-safe-mode.md | 34 +++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 794df6194..0068f8d5e 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -253,6 +253,40 @@ All internal code uses `self.connection._config` instead of global `config`: - Tables access config via `self.connection._config` - This works uniformly for both singleton and isolated instances +## Global State Audit + +All module-level mutable state was reviewed for thread-safety implications. + +### Guarded (blocked in thread-safe mode) + +| State | Location | Mechanism | +|-------|----------|-----------| +| `config` singleton | `settings.py:979` | `_ConfigProxy` raises `ThreadSafetyError`; use `inst.config` instead | +| `conn()` singleton | `connection.py:108` | `_check_thread_safe()` guard; use `inst.connection` instead | + +These are the two globals that carry connection-scoped state (credentials, database settings) and are the primary source of cross-tenant interference. + +### Safe by design (no guard needed) + +| State | Location | Rationale | +|-------|----------|-----------| +| `_codec_registry` | `codecs.py:47` | Effectively immutable after import. Registration runs in `__init_subclass__` under Python's import lock. Runtime mutation (`_load_entry_points`) is idempotent under the GIL. Codecs are part of the type system, not connection-scoped. | +| `_entry_points_loaded` | `codecs.py:48` | Bool flag for idempotent lazy loading; worst case under concurrent access is redundant work, not corruption. | + +### Low risk (no guard needed) + +| State | Location | Rationale | +|-------|----------|-----------| +| Logging side effects | `logging.py:8,17,40-45,56` | Standard Python logging configuration. Monkey-patches `Logger` and replaces `sys.excepthook` at import time. Not DataJoint-specific mutable state. | +| `use_32bit_dims` | `blob.py:65` | Runtime flag affecting deserialization. Rarely changed; not connection-scoped. | +| `compression` dict | `blob.py:61` | Decompressor function registry. Populated at import time, effectively read-only thereafter. | +| `_lazy_modules` | `__init__.py:92` | Import caching via `globals()` mutation. Protected by Python's import lock. | +| `ADAPTERS` dict | `adapters/__init__.py:16` | Backend registry. Populated at import time, read-only in practice. | + +### Design principle + +Only state that is **connection-scoped** (credentials, database settings, connection objects) needs thread-safe guards. State that is **code-scoped** (type registries, import caches, logging configuration) is shared across all threads by design and does not vary between tenants. + ## Error Messages - Singleton access: `"Global DataJoint state is disabled in thread-safe mode. Use dj.Instance() to create an isolated instance."` From 04a406d6d0eaea670b41d7312ca59cbbd3698e1e Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 18 Feb 2026 14:04:35 -0600 Subject: [PATCH 24/39] refactor: replace global config reads with connection-scoped config All internal code now reads configuration from self.connection._config instead of the global config singleton. This ensures thread-safe mode works correctly: each Instance's connection carries its own config, and tables/schemas/jobs access it through the connection. Changes across 9 files: - schemas.py: safemode, create_tables default - table.py: safemode in delete/drop, config passed to declare() - expression.py: loglevel in __repr__ - preview.py: display.* settings via query_expression.connection._config - autopopulate.py: jobs.allow_new_pk_fields, jobs.auto_refresh - jobs.py: jobs.default_priority, stale_timeout, keep_completed - declare.py: jobs.add_job_metadata (config param threaded through) - diagram.py: display.diagram_direction (connection stored on instance) - staged_insert.py: get_store_spec() Removed unused `from .settings import config` imports from 7 modules. Co-Authored-By: Claude Opus 4.6 --- src/datajoint/autopopulate.py | 10 +++------- src/datajoint/declare.py | 9 +++++++-- src/datajoint/diagram.py | 7 ++++--- src/datajoint/expression.py | 3 +-- src/datajoint/jobs.py | 28 ++++++++++++++-------------- src/datajoint/preview.py | 4 ++-- src/datajoint/schemas.py | 10 ++++++---- src/datajoint/staged_insert.py | 5 ++--- src/datajoint/table.py | 7 +++---- 9 files changed, 42 insertions(+), 41 deletions(-) diff --git a/src/datajoint/autopopulate.py b/src/datajoint/autopopulate.py index 7660e43ec..ae8be3b82 100644 --- a/src/datajoint/autopopulate.py +++ b/src/datajoint/autopopulate.py @@ -146,10 +146,8 @@ def _declare_check(self, primary_key: list[str], fk_attribute_map: dict[str, tup If native (non-FK) PK attributes are found, unless bypassed via ``dj.config.jobs.allow_new_pk_fields_in_computed_tables = True``. """ - from .settings import config - # Check if validation is bypassed - if config.jobs.allow_new_pk_fields_in_computed_tables: + if self.connection._config.jobs.allow_new_pk_fields_in_computed_tables: return # Check for native (non-FK) primary key attributes @@ -477,8 +475,6 @@ def _populate_distributed( """ from tqdm import tqdm - from .settings import config - # Define a signal handler for SIGTERM def handler(signum, frame): logger.info("Populate terminated by SIGTERM") @@ -489,7 +485,7 @@ def handler(signum, frame): try: # Refresh job queue if configured if refresh is None: - refresh = config.jobs.auto_refresh + refresh = self.connection._config.jobs.auto_refresh if refresh: # Use delay=-1 to ensure jobs are immediately schedulable # (avoids race condition with scheduled_time <= CURRENT_TIMESTAMP(3) check) @@ -659,7 +655,7 @@ def _populate1( key, start_time=datetime.datetime.fromtimestamp(start_time), duration=duration, - version=_get_job_version(), + version=_get_job_version(self.connection._config), ) if jobs is not None: diff --git a/src/datajoint/declare.py b/src/datajoint/declare.py index 375daa07e..fe50e8a66 100644 --- a/src/datajoint/declare.py +++ b/src/datajoint/declare.py @@ -15,7 +15,6 @@ from .codecs import lookup_codec from .condition import translate_attribute from .errors import DataJointError -from .settings import config # Core DataJoint types - scientist-friendly names that are fully supported # These are recorded in field comments using :type: syntax for reconstruction @@ -401,7 +400,7 @@ def prepare_declare( def declare( - full_table_name: str, definition: str, context: dict, adapter + full_table_name: str, definition: str, context: dict, adapter, *, config=None ) -> tuple[str, list[str], list[str], dict[str, tuple[str, str]], list[str], list[str]]: r""" Parse a definition and generate SQL CREATE TABLE statement. @@ -416,6 +415,8 @@ def declare( Namespace for resolving foreign key references. adapter : DatabaseAdapter Database adapter for backend-specific SQL generation. + config : Config, optional + Configuration object. If None, falls back to global config. Returns ------- @@ -464,6 +465,10 @@ def declare( ) = prepare_declare(definition, context, adapter) # Add hidden job metadata for Computed/Imported tables (not parts) + if config is None: + from .settings import config as _config + + config = _config if config.jobs.add_job_metadata: # Check if this is a Computed (__) or Imported (_) table, but not a Part (contains __ in middle) is_computed = table_name.startswith("__") and "__" not in table_name[2:] diff --git a/src/datajoint/diagram.py b/src/datajoint/diagram.py index 7034d122b..75e00c21c 100644 --- a/src/datajoint/diagram.py +++ b/src/datajoint/diagram.py @@ -16,7 +16,6 @@ from .dependencies import topo_sort from .errors import DataJointError -from .settings import config from .table import Table, lookup_class_name from .user_tables import Computed, Imported, Lookup, Manual, Part, _AliasNode, _get_tier @@ -105,6 +104,7 @@ def __init__(self, source, context=None) -> None: self.nodes_to_show = set(source.nodes_to_show) self._expanded_nodes = set(source._expanded_nodes) self.context = source.context + self._connection = source._connection super().__init__(source) return @@ -126,6 +126,7 @@ def __init__(self, source, context=None) -> None: raise DataJointError("Could not find database connection in %s" % repr(source[0])) # initialize graph from dependencies + self._connection = connection connection.dependencies.load() super().__init__(connection.dependencies) @@ -584,7 +585,7 @@ def make_dot(self): Tables are grouped by schema, with the Python module name shown as the group label when available. """ - direction = config.display.diagram_direction + direction = self._connection._config.display.diagram_direction graph = self._make_graph() # Apply collapse logic if needed @@ -857,7 +858,7 @@ def make_mermaid(self) -> str: Session --> Neuron """ graph = self._make_graph() - direction = config.display.diagram_direction + direction = self._connection._config.display.diagram_direction # Apply collapse logic if needed graph, collapsed_counts = self._apply_collapse(graph) diff --git a/src/datajoint/expression.py b/src/datajoint/expression.py index 883853cd3..9b36cf6d0 100644 --- a/src/datajoint/expression.py +++ b/src/datajoint/expression.py @@ -20,7 +20,6 @@ from .errors import DataJointError from .codecs import decode_attribute from .preview import preview, repr_html -from .settings import config logger = logging.getLogger(__name__.split(".")[0]) @@ -1247,7 +1246,7 @@ def __repr__(self): str String representation of the QueryExpression. """ - return super().__repr__() if config["loglevel"].lower() == "debug" else self.preview() + return super().__repr__() if self.connection._config["loglevel"].lower() == "debug" else self.preview() def preview(self, limit=None, width=None): """ diff --git a/src/datajoint/jobs.py b/src/datajoint/jobs.py index e5499eb8e..cf0981836 100644 --- a/src/datajoint/jobs.py +++ b/src/datajoint/jobs.py @@ -24,16 +24,22 @@ logger = logging.getLogger(__name__.split(".")[0]) -def _get_job_version() -> str: +def _get_job_version(config=None) -> str: """ Get version string based on config settings. + Parameters + ---------- + config : Config, optional + Configuration object. If None, falls back to global config. + Returns ------- str Version string, or empty string if version tracking disabled. """ - from .settings import config + if config is None: + from .settings import config method = config.jobs.version_method if method is None or method == "none": @@ -349,17 +355,15 @@ def refresh( 3. Remove stale jobs: jobs older than stale_timeout whose keys not in key_source 4. Remove orphaned jobs: reserved jobs older than orphan_timeout (if specified) """ - from .settings import config - # Ensure jobs table exists if not self.is_declared: self.declare() # Get defaults from config if priority is None: - priority = config.jobs.default_priority + priority = self.connection._config.jobs.default_priority if stale_timeout is None: - stale_timeout = config.jobs.stale_timeout + stale_timeout = self.connection._config.jobs.stale_timeout result = {"added": 0, "removed": 0, "orphaned": 0, "re_pended": 0} @@ -392,7 +396,7 @@ def refresh( pass # Job already exists # 2. Re-pend success jobs if keep_completed=True - if config.jobs.keep_completed: + if self.connection._config.jobs.keep_completed: # Success jobs whose keys are in key_source but not in target # Disable semantic_check for Job table operations (job table PK has different lineage than target) success_to_repend = self.completed.restrict(key_source, semantic_check=False).restrict( @@ -463,7 +467,7 @@ def reserve(self, key: dict) -> bool: "pid": os.getpid(), "connection_id": self.connection.connection_id, "user": self.connection.get_user(), - "version": _get_job_version(), + "version": _get_job_version(self.connection._config), } try: @@ -490,9 +494,7 @@ def complete(self, key: dict, duration: float | None = None) -> None: - If True: updates status to ``'success'`` with completion time and duration - If False: deletes the job entry """ - from .settings import config - - if config.jobs.keep_completed: + if self.connection._config.jobs.keep_completed: # Use server time for completed_time server_now = self.connection.query("SELECT CURRENT_TIMESTAMP").fetchone()[0] pk = self._get_pk(key) @@ -550,13 +552,11 @@ def ignore(self, key: dict) -> None: key : dict Primary key dict of the job. """ - from .settings import config - pk = self._get_pk(key) if pk in self: self.update1({**pk, "status": "ignore"}) else: - priority = config.jobs.default_priority + priority = self.connection._config.jobs.default_priority self.insert1({**pk, "status": "ignore", "priority": priority}) def progress(self) -> dict: diff --git a/src/datajoint/preview.py b/src/datajoint/preview.py index 92d09d874..0b80ad15f 100644 --- a/src/datajoint/preview.py +++ b/src/datajoint/preview.py @@ -2,8 +2,6 @@ import json -from .settings import config - def _format_object_display(json_data): """Format object metadata for display in query results.""" @@ -44,6 +42,7 @@ def _get_blob_placeholder(heading, field_name, html_escape=False): def preview(query_expression, limit, width): heading = query_expression.heading rel = query_expression.proj(*heading.non_blobs) + config = query_expression.connection._config # Object fields use codecs - not specially handled in simplified model object_fields = [] if limit is None: @@ -105,6 +104,7 @@ def get_display_value(tup, f, idx): def repr_html(query_expression): heading = query_expression.heading rel = query_expression.proj(*heading.non_blobs) + config = query_expression.connection._config # Object fields use codecs - not specially handled in simplified model object_fields = [] tuples = rel.to_arrays(limit=config["display.limit"] + 1) diff --git a/src/datajoint/schemas.py b/src/datajoint/schemas.py index 2955fd67d..04ff057c3 100644 --- a/src/datajoint/schemas.py +++ b/src/datajoint/schemas.py @@ -23,7 +23,6 @@ from .connection import Connection from .heading import Heading from .jobs import Job -from .settings import config from .table import FreeTable, lookup_class_name from .user_tables import Computed, Imported, Lookup, Manual, Part, _get_tier from .utils import to_camel_case, user_choice @@ -120,7 +119,7 @@ def __init__( self.database = None self.context = context self.create_schema = create_schema - self.create_tables = create_tables if create_tables is not None else config.database.create_tables + self.create_tables = create_tables # None means "use connection config default" self.add_objects = add_objects self.declare_list = [] if schema_name: @@ -293,7 +292,10 @@ def _decorate_table(self, table_class: type, context: dict[str, Any], assert_dec # instantiate the class, declare the table if not already instance = table_class() is_declared = instance.is_declared - if not is_declared and not assert_declared and self.create_tables: + create_tables = ( + self.create_tables if self.create_tables is not None else self.connection._config.database.create_tables + ) + if not is_declared and not assert_declared and create_tables: instance.declare(context) self.connection.dependencies.clear() is_declared = is_declared or instance.is_declared @@ -409,7 +411,7 @@ def drop(self, prompt: bool | None = None) -> None: AccessError If insufficient permissions to drop the schema. """ - prompt = config["safemode"] if prompt is None else prompt + prompt = self.connection._config["safemode"] if prompt is None else prompt if not self.exists: logger.info("Schema named `{database}` does not exist. Doing nothing.".format(database=self.database)) diff --git a/src/datajoint/staged_insert.py b/src/datajoint/staged_insert.py index 6ac3819e4..1f6ee7afb 100644 --- a/src/datajoint/staged_insert.py +++ b/src/datajoint/staged_insert.py @@ -14,7 +14,6 @@ import fsspec from .errors import DataJointError -from .settings import config from .storage import StorageBackend, build_object_path @@ -69,7 +68,7 @@ def _ensure_backend(self): """Ensure storage backend is initialized.""" if self._backend is None: try: - spec = config.get_store_spec() # Uses stores.default + spec = self._table.connection._config.get_store_spec() # Uses stores.default self._backend = StorageBackend(spec) except DataJointError: raise DataJointError( @@ -110,7 +109,7 @@ def _get_storage_path(self, field: str, ext: str = "") -> str: ) # Get storage spec (uses stores.default) - spec = config.get_store_spec() + spec = self._table.connection._config.get_store_spec() partition_pattern = spec.get("partition_pattern") token_length = spec.get("token_length", 8) diff --git a/src/datajoint/table.py b/src/datajoint/table.py index 59279489e..a6bc7d2c9 100644 --- a/src/datajoint/table.py +++ b/src/datajoint/table.py @@ -23,7 +23,6 @@ ) from .expression import QueryExpression from .heading import Heading -from .settings import config from .staged_insert import staged_insert1 as _staged_insert1 from .utils import get_master, is_camel_case, user_choice @@ -153,7 +152,7 @@ def declare(self, context=None): "Class names must be in CamelCase, starting with a capital letter." ) sql, _external_stores, primary_key, fk_attribute_map, pre_ddl, post_ddl = declare( - self.full_table_name, self.definition, context, self.connection.adapter + self.full_table_name, self.definition, context, self.connection.adapter, config=self.connection._config ) # Call declaration hook for validation (subclasses like AutoPopulate can override) @@ -1119,7 +1118,7 @@ def strip_quotes(s): raise DataJointError("Exceeded maximum number of delete attempts.") return delete_count - prompt = config["safemode"] if prompt is None else prompt + prompt = self.connection._config["safemode"] if prompt is None else prompt # Start transaction if transaction: @@ -1227,7 +1226,7 @@ def drop(self, prompt: bool | None = None): raise DataJointError( "A table with an applied restriction cannot be dropped. Call drop() on the unrestricted Table." ) - prompt = config["safemode"] if prompt is None else prompt + prompt = self.connection._config["safemode"] if prompt is None else prompt self.connection.dependencies.load() do_drop = True From 092d79fa200576be5be35e5d6e86b847d439d8bf Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 18 Feb 2026 14:35:44 -0600 Subject: [PATCH 25/39] fix: unify global config singleton and fix conn() persistence - instance._global_config now reuses settings.config instead of creating a duplicate Config object. This ensures dj.config["safemode"] = False actually affects self.connection._config["safemode"] reads. - schemas.py now uses _get_singleton_connection() from instance.py instead of the old conn() from connection.py, eliminating the duplicate singleton connection holder. - dj.conn() now only creates a new connection when the singleton doesn't exist or reset=True (not on every call with credentials). - test_uppercase_schema: use prompt=False for drop() calls. Co-Authored-By: Claude Opus 4.6 --- src/datajoint/__init__.py | 8 ++++++-- src/datajoint/instance.py | 5 +++-- src/datajoint/schemas.py | 6 +++--- tests/integration/test_schema.py | 4 ++-- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/datajoint/__init__.py b/src/datajoint/__init__.py index d7db3e32d..68eac160f 100644 --- a/src/datajoint/__init__.py +++ b/src/datajoint/__init__.py @@ -135,8 +135,12 @@ def conn( _check_thread_safe() - # If credentials provided or reset requested, (re)create the singleton - if host is not None or user is not None or password is not None or reset: + # If reset requested, always recreate + # If credentials provided and no singleton exists, create one + # If credentials provided and singleton exists, return existing singleton + if reset or ( + instance_module._singleton_connection is None and (host is not None or user is not None or password is not None) + ): # Use provided values or fall back to config host = host if host is not None else _global_config.database.host user = user if user is not None else _global_config.database.user diff --git a/src/datajoint/instance.py b/src/datajoint/instance.py index bd057aa57..c60e267e1 100644 --- a/src/datajoint/instance.py +++ b/src/datajoint/instance.py @@ -12,7 +12,7 @@ from .connection import Connection from .errors import ThreadSafetyError -from .settings import Config, _create_config +from .settings import Config, _create_config, config as _settings_config if TYPE_CHECKING: from .schemas import Schema as SchemaClass @@ -179,7 +179,8 @@ def __repr__(self) -> str: # The global config is created at module load time and can be modified # The singleton connection is created lazily when conn() or Schema() is called -_global_config: Config = _create_config() +# Reuse the config created in settings.py — there must be exactly one global config +_global_config: Config = _settings_config _singleton_connection: Connection | None = None diff --git a/src/datajoint/schemas.py b/src/datajoint/schemas.py index 04ff057c3..694250c7d 100644 --- a/src/datajoint/schemas.py +++ b/src/datajoint/schemas.py @@ -16,8 +16,8 @@ import warnings from typing import TYPE_CHECKING, Any -from .connection import conn from .errors import AccessError, DataJointError +from .instance import _get_singleton_connection if TYPE_CHECKING: from .connection import Connection @@ -173,7 +173,7 @@ def activate( if connection is not None: self.connection = connection if self.connection is None: - self.connection = conn() + self.connection = _get_singleton_connection() self.database = schema_name if create_schema is not None: self.create_schema = create_schema @@ -860,7 +860,7 @@ def list_schemas(connection: Connection | None = None) -> list[str]: """ return [ r[0] - for r in (connection or conn()).query( + for r in (connection or _get_singleton_connection()).query( 'SELECT schema_name FROM information_schema.schemata WHERE schema_name <> "information_schema"' ) ] diff --git a/tests/integration/test_schema.py b/tests/integration/test_schema.py index ef621765d..cf053df62 100644 --- a/tests/integration/test_schema.py +++ b/tests/integration/test_schema.py @@ -265,5 +265,5 @@ class Recording(dj.Manual): id: smallint """ - schema2.drop() - schema1.drop() + schema2.drop(prompt=False) + schema1.drop(prompt=False) From 2429a8ab0502ec67771021e9c2d0b5b26c2cbcfb Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 18 Feb 2026 14:38:03 -0600 Subject: [PATCH 26/39] docs: document connection-scoped config architecture in thread-safe mode spec Adds Architecture section covering the object graph, config flow for both singleton and Instance paths, and a table of all connection-scoped config reads across 9 modules. Co-Authored-By: Claude Opus 4.6 --- docs/design/thread-safe-mode.md | 107 ++++++++++++++++++++++++++++---- 1 file changed, 94 insertions(+), 13 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 0068f8d5e..297cb619b 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -163,6 +163,89 @@ Mouse().insert1({"mouse_id": 1}) Mouse().fetch() ``` +## Architecture + +### Object graph + +There is exactly **one** global `Config` object created at import time in `settings.py`. Both the legacy API and the `Instance` API hang off `Connection` objects, each of which carries a `_config` reference. + +``` +settings.py + config = _create_config() ← THE single global Config + +instance.py + _global_config = settings.config ← same object (not a copy) + _singleton_connection = None ← lazily created Connection + +__init__.py + dj.config = _ConfigProxy() ← proxy → _global_config (with thread-safety check) + dj.conn() ← returns _singleton_connection + dj.Schema() ← uses _singleton_connection + dj.FreeTable() ← uses _singleton_connection + +Connection (singleton) + _config → _global_config ← same Config that dj.config writes to + +Connection (Instance) + _config → fresh Config ← isolated per-instance +``` + +### Config flow: singleton path + +``` +dj.config["safemode"] = False + ↓ _ConfigProxy.__setitem__ +_global_config["safemode"] = False (same object as settings.config) + ↓ +Connection._config["safemode"] (points to _global_config) + ↓ +schema.drop() reads self.connection._config["safemode"] → False ✓ +``` + +### Config flow: Instance path + +``` +inst = dj.Instance(host=..., user=..., password=...) + ↓ +inst.config = _create_config() (fresh Config, independent) +inst.connection._config = inst.config + ↓ +inst.config["safemode"] = False + ↓ +schema.drop() reads self.connection._config["safemode"] → False ✓ +``` + +### Key invariant + +**All runtime config reads go through `self.connection._config`**, never through the global `config` directly. This ensures both the singleton and Instance paths read the correct config. + +### Connection-scoped config reads + +Every module that previously imported `from .settings import config` now reads config from the connection: + +| Module | What was read | How it's read now | +|--------|--------------|-------------------| +| `schemas.py` | `config["safemode"]`, `config.database.create_tables` | `self.connection._config[...]` | +| `table.py` | `config["safemode"]` in `delete()`, `drop()` | `self.connection._config["safemode"]` | +| `expression.py` | `config["loglevel"]` in `__repr__()` | `self.connection._config["loglevel"]` | +| `preview.py` | `config["display.*"]` (8 reads) | `query_expression.connection._config[...]` | +| `autopopulate.py` | `config.jobs.allow_new_pk_fields`, `auto_refresh` | `self.connection._config.jobs.*` | +| `jobs.py` | `config.jobs.default_priority`, `stale_timeout`, `keep_completed` | `self.connection._config.jobs.*` | +| `declare.py` | `config.jobs.add_job_metadata` | `config` param (threaded from `table.py`) | +| `diagram.py` | `config.display.diagram_direction` | `self._connection._config.display.*` | +| `staged_insert.py` | `config.get_store_spec()` | `self._table.connection._config.get_store_spec()` | + +### Functions that receive config as a parameter + +Some module-level functions cannot access `self.connection`. Config is threaded through: + +| Function | Caller | How config arrives | +|----------|--------|--------------------| +| `declare()` in `declare.py` | `Table.declare()` in `table.py` | `config=self.connection._config` kwarg | +| `_get_job_version()` in `jobs.py` | `AutoPopulate._make_tuples()`, `Job.reserve()` | `config=self.connection._config` positional arg | + +Both functions accept `config=None` and fall back to the global `settings.config` for backward compatibility. + ## Implementation ### 1. Create Instance class @@ -185,8 +268,11 @@ class Instance: ### 2. Global config and singleton connection ```python -# Module level -_global_config = _create_config() # Created at import time +# settings.py - THE single global config +config = _create_config() # Created at import time + +# instance.py - reuses the same config object +_global_config = settings.config # Same reference, not a copy _singleton_connection = None # Created lazily def _check_thread_safe(): @@ -224,8 +310,12 @@ class _ConfigProxy: config = _ConfigProxy() -# dj.conn() -> singleton connection -def conn(): +# dj.conn() -> singleton connection (persistent across calls) +def conn(host=None, user=None, password=None, *, reset=False): + _check_thread_safe() + if reset or (_singleton_connection is None and credentials_provided): + _singleton_connection = Connection(...) + _singleton_connection._config = _global_config return _get_singleton_connection() # dj.Schema() -> uses singleton connection @@ -238,21 +328,12 @@ def Schema(name, connection=None, **kwargs): # dj.FreeTable() -> uses singleton connection def FreeTable(conn_or_name, full_table_name=None): if full_table_name is None: - # Called as FreeTable("db.table") _check_thread_safe() return _FreeTable(_get_singleton_connection(), conn_or_name) else: - # Called as FreeTable(conn, "db.table") return _FreeTable(conn_or_name, full_table_name) ``` -### 4. Refactor internal code - -All internal code uses `self.connection._config` instead of global `config`: -- Connection stores reference to its config as `self._config` -- Tables access config via `self.connection._config` -- This works uniformly for both singleton and isolated instances - ## Global State Audit All module-level mutable state was reviewed for thread-safety implications. From b88915cba753dea7a0fe6af8c350be036ce84144 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 18 Feb 2026 15:33:08 -0600 Subject: [PATCH 27/39] refactor: thread config through codec and hash_registry chain Eliminate the last global config reads in the runtime path by threading connection-scoped config through the codec encode/decode and hash_registry layers. Encode path: table.py adds _config to the context dict, codecs extract it from key and pass to hash_registry/storage helpers. Decode path: expression.py passes connection to decode_attribute(), which builds a decode key with _config for codec.decode() calls. GC path: scan()/collect() extract config from schemas[0].connection and pass to list_stored_hashes/delete_path/delete_schema_path. All functions accept config=None with lazy fallback to settings.config for backward compatibility. Co-Authored-By: Claude Opus 4.6 --- docs/design/thread-safe-mode.md | 18 +++++++++-- src/datajoint/builtin_codecs/attach.py | 5 +-- src/datajoint/builtin_codecs/filepath.py | 10 ++++-- src/datajoint/builtin_codecs/hash.py | 6 ++-- src/datajoint/builtin_codecs/npy.py | 8 +++-- src/datajoint/builtin_codecs/object.py | 8 +++-- src/datajoint/builtin_codecs/schema.py | 13 ++++++-- src/datajoint/codecs.py | 11 +++++-- src/datajoint/expression.py | 19 ++++++++--- src/datajoint/gc.py | 36 ++++++++++++++------- src/datajoint/hash_registry.py | 35 +++++++++++++------- src/datajoint/table.py | 4 +-- tests/integration/test_gc.py | 8 +++-- tests/integration/test_semantic_matching.py | 4 ++- 14 files changed, 132 insertions(+), 53 deletions(-) diff --git a/docs/design/thread-safe-mode.md b/docs/design/thread-safe-mode.md index 297cb619b..5d7472667 100644 --- a/docs/design/thread-safe-mode.md +++ b/docs/design/thread-safe-mode.md @@ -234,6 +234,14 @@ Every module that previously imported `from .settings import config` now reads c | `declare.py` | `config.jobs.add_job_metadata` | `config` param (threaded from `table.py`) | | `diagram.py` | `config.display.diagram_direction` | `self._connection._config.display.*` | | `staged_insert.py` | `config.get_store_spec()` | `self._table.connection._config.get_store_spec()` | +| `hash_registry.py` | `config.get_store_spec()` in 5 functions | `config` kwarg (falls back to `settings.config`) | +| `builtin_codecs/hash.py` | `config` via hash_registry | `_config` from key dict → `config` kwarg to hash_registry | +| `builtin_codecs/attach.py` | `config.get("download_path")` | `_config` from key dict (falls back to `settings.config`) | +| `builtin_codecs/filepath.py` | `config.get_store_spec()` | `_config` from key dict (falls back to `settings.config`) | +| `builtin_codecs/schema.py` | `config.get_store_spec()` in helpers | `config` kwarg to `_build_path()`, `_get_backend()` | +| `builtin_codecs/npy.py` | `config` via schema helpers | `_config` from key dict → `config` kwarg to helpers | +| `builtin_codecs/object.py` | `config` via schema helpers | `_config` from key dict → `config` kwarg to helpers | +| `gc.py` | `config` via hash_registry | `schemas[0].connection._config` → `config` kwarg | ### Functions that receive config as a parameter @@ -243,8 +251,14 @@ Some module-level functions cannot access `self.connection`. Config is threaded |----------|--------|--------------------| | `declare()` in `declare.py` | `Table.declare()` in `table.py` | `config=self.connection._config` kwarg | | `_get_job_version()` in `jobs.py` | `AutoPopulate._make_tuples()`, `Job.reserve()` | `config=self.connection._config` positional arg | - -Both functions accept `config=None` and fall back to the global `settings.config` for backward compatibility. +| `get_store_backend()` in `hash_registry.py` | codecs, gc.py | `config` kwarg from key dict or schema connection | +| `get_store_subfolding()` in `hash_registry.py` | `put_hash()` | `config` kwarg chained from caller | +| `put_hash()` in `hash_registry.py` | `HashCodec.encode()` | `config` kwarg from `_config` in key dict | +| `get_hash()` in `hash_registry.py` | `HashCodec.decode()` | `config` kwarg from `_config` in key dict | +| `delete_path()` in `hash_registry.py` | `gc.collect()` | `config` kwarg from `schemas[0].connection._config` | +| `decode_attribute()` in `codecs.py` | `expression.py` fetch methods | `connection` kwarg → extracts `connection._config` | + +All functions accept `config=None` and fall back to the global `settings.config` for backward compatibility. ## Implementation diff --git a/src/datajoint/builtin_codecs/attach.py b/src/datajoint/builtin_codecs/attach.py index f9a454b1a..aa10f2424 100644 --- a/src/datajoint/builtin_codecs/attach.py +++ b/src/datajoint/builtin_codecs/attach.py @@ -98,14 +98,15 @@ def decode(self, stored: bytes, *, key: dict | None = None) -> str: """ from pathlib import Path - from ..settings import config - # Split on first null byte null_pos = stored.index(b"\x00") filename = stored[:null_pos].decode("utf-8") contents = stored[null_pos + 1 :] # Write to download path + config = (key or {}).get("_config") + if config is None: + from ..settings import config download_path = Path(config.get("download_path", ".")) download_path.mkdir(parents=True, exist_ok=True) local_path = download_path / filename diff --git a/src/datajoint/builtin_codecs/filepath.py b/src/datajoint/builtin_codecs/filepath.py index 9c05b2385..a0400499b 100644 --- a/src/datajoint/builtin_codecs/filepath.py +++ b/src/datajoint/builtin_codecs/filepath.py @@ -98,9 +98,12 @@ def encode(self, value: Any, *, key: dict | None = None, store_name: str | None """ from datetime import datetime, timezone - from .. import config from ..hash_registry import get_store_backend + config = (key or {}).get("_config") + if config is None: + from ..settings import config + path = str(value) # Get store spec to check prefix configuration @@ -137,7 +140,7 @@ def encode(self, value: Any, *, key: dict | None = None, store_name: str | None raise ValueError(f" must use prefix '{filepath_prefix}' (filepath_prefix). Got path: {path}") # Verify file exists - backend = get_store_backend(store_name) + backend = get_store_backend(store_name, config=config) if not backend.exists(path): raise FileNotFoundError(f"File not found in store '{store_name or 'default'}': {path}") @@ -174,8 +177,9 @@ def decode(self, stored: dict, *, key: dict | None = None) -> Any: from ..objectref import ObjectRef from ..hash_registry import get_store_backend + config = (key or {}).get("_config") store_name = stored.get("store") - backend = get_store_backend(store_name) + backend = get_store_backend(store_name, config=config) return ObjectRef.from_json(stored, backend=backend) def validate(self, value: Any) -> None: diff --git a/src/datajoint/builtin_codecs/hash.py b/src/datajoint/builtin_codecs/hash.py index 676c1916f..bb3a3852f 100644 --- a/src/datajoint/builtin_codecs/hash.py +++ b/src/datajoint/builtin_codecs/hash.py @@ -76,7 +76,8 @@ def encode(self, value: bytes, *, key: dict | None = None, store_name: str | Non from ..hash_registry import put_hash schema_name = (key or {}).get("_schema", "unknown") - return put_hash(value, schema_name=schema_name, store_name=store_name) + config = (key or {}).get("_config") + return put_hash(value, schema_name=schema_name, store_name=store_name, config=config) def decode(self, stored: dict, *, key: dict | None = None) -> bytes: """ @@ -96,7 +97,8 @@ def decode(self, stored: dict, *, key: dict | None = None) -> bytes: """ from ..hash_registry import get_hash - return get_hash(stored) + config = (key or {}).get("_config") + return get_hash(stored, config=config) def validate(self, value: Any) -> None: """Validate that value is bytes.""" diff --git a/src/datajoint/builtin_codecs/npy.py b/src/datajoint/builtin_codecs/npy.py index 51c5731ee..54853437b 100644 --- a/src/datajoint/builtin_codecs/npy.py +++ b/src/datajoint/builtin_codecs/npy.py @@ -336,9 +336,10 @@ def encode( # Extract context using inherited helper schema, table, field, primary_key = self._extract_context(key) + config = (key or {}).get("_config") # Build schema-addressed storage path - path, _ = self._build_path(schema, table, field, primary_key, ext=".npy", store_name=store_name) + path, _ = self._build_path(schema, table, field, primary_key, ext=".npy", store_name=store_name, config=config) # Serialize to .npy format buffer = io.BytesIO() @@ -346,7 +347,7 @@ def encode( npy_bytes = buffer.getvalue() # Upload to storage using inherited helper - backend = self._get_backend(store_name) + backend = self._get_backend(store_name, config=config) backend.put_buffer(npy_bytes, path) # Return metadata (includes numpy-specific shape/dtype) @@ -373,5 +374,6 @@ def decode(self, stored: dict, *, key: dict | None = None) -> NpyRef: NpyRef Lazy array reference with metadata access and numpy integration. """ - backend = self._get_backend(stored.get("store")) + config = (key or {}).get("_config") + backend = self._get_backend(stored.get("store"), config=config) return NpyRef(stored, backend) diff --git a/src/datajoint/builtin_codecs/object.py b/src/datajoint/builtin_codecs/object.py index 268651aea..1c0d8c673 100644 --- a/src/datajoint/builtin_codecs/object.py +++ b/src/datajoint/builtin_codecs/object.py @@ -104,6 +104,7 @@ def encode( # Extract context using inherited helper schema, table, field, primary_key = self._extract_context(key) + config = (key or {}).get("_config") # Check for pre-computed metadata (from staged insert) if isinstance(value, dict) and "path" in value: @@ -145,10 +146,10 @@ def encode( raise TypeError(f" expects bytes or path, got {type(value).__name__}") # Build storage path using inherited helper - path, token = self._build_path(schema, table, field, primary_key, ext=ext, store_name=store_name) + path, token = self._build_path(schema, table, field, primary_key, ext=ext, store_name=store_name, config=config) # Get storage backend using inherited helper - backend = self._get_backend(store_name) + backend = self._get_backend(store_name, config=config) # Upload content if is_dir: @@ -192,7 +193,8 @@ def decode(self, stored: dict, *, key: dict | None = None) -> Any: """ from ..objectref import ObjectRef - backend = self._get_backend(stored.get("store")) + config = (key or {}).get("_config") + backend = self._get_backend(stored.get("store"), config=config) return ObjectRef.from_json(stored, backend=backend) def validate(self, value: Any) -> None: diff --git a/src/datajoint/builtin_codecs/schema.py b/src/datajoint/builtin_codecs/schema.py index 18bd62d00..c8cc0759d 100644 --- a/src/datajoint/builtin_codecs/schema.py +++ b/src/datajoint/builtin_codecs/schema.py @@ -108,6 +108,7 @@ def _build_path( primary_key: dict, ext: str | None = None, store_name: str | None = None, + config=None, ) -> tuple[str, str]: """ Build schema-addressed storage path. @@ -131,6 +132,8 @@ def _build_path( File extension (e.g., ".npy", ".zarr"). store_name : str, optional Store name for retrieving partition configuration. + config : Config, optional + Config instance. If None, falls back to global settings.config. Returns ------- @@ -139,7 +142,9 @@ def _build_path( is a unique identifier. """ from ..storage import build_object_path - from .. import config + + if config is None: + from ..settings import config # Get store configuration for partition_pattern and token_length spec = config.get_store_spec(store_name) @@ -156,7 +161,7 @@ def _build_path( token_length=token_length, ) - def _get_backend(self, store_name: str | None = None): + def _get_backend(self, store_name: str | None = None, config=None): """ Get storage backend by name. @@ -164,6 +169,8 @@ def _get_backend(self, store_name: str | None = None): ---------- store_name : str, optional Store name. If None, returns default store. + config : Config, optional + Config instance. If None, falls back to global settings.config. Returns ------- @@ -172,4 +179,4 @@ def _get_backend(self, store_name: str | None = None): """ from ..hash_registry import get_store_backend - return get_store_backend(store_name) + return get_store_backend(store_name, config=config) diff --git a/src/datajoint/codecs.py b/src/datajoint/codecs.py index f4741a5e4..d7fbaf42d 100644 --- a/src/datajoint/codecs.py +++ b/src/datajoint/codecs.py @@ -515,7 +515,7 @@ def lookup_codec(codec_spec: str) -> tuple[Codec, str | None]: # ============================================================================= -def decode_attribute(attr, data, squeeze: bool = False): +def decode_attribute(attr, data, squeeze: bool = False, connection=None): """ Decode raw database value using attribute's codec or native type handling. @@ -528,6 +528,8 @@ def decode_attribute(attr, data, squeeze: bool = False): attr: Attribute from the table's heading. data: Raw value fetched from the database. squeeze: If True, remove singleton dimensions from numpy arrays. + connection: Connection instance for config access. If provided, + ``connection._config`` is passed to codecs via the key dict. Returns: Decoded Python value. @@ -560,9 +562,14 @@ def decode_attribute(attr, data, squeeze: bool = False): elif final_dtype.lower() == "binary(16)": data = uuid_module.UUID(bytes=data) + # Build decode key with config if connection is available + decode_key = None + if connection is not None: + decode_key = {"_config": connection._config} + # Apply decoders in reverse order: innermost first, then outermost for codec in reversed(type_chain): - data = codec.decode(data, key=None) + data = codec.decode(data, key=decode_key) # Squeeze arrays if requested if squeeze and isinstance(data, np.ndarray): diff --git a/src/datajoint/expression.py b/src/datajoint/expression.py index 9b36cf6d0..bc6c7cee7 100644 --- a/src/datajoint/expression.py +++ b/src/datajoint/expression.py @@ -715,7 +715,7 @@ def fetch( import warnings warnings.warn( - "fetch() is deprecated in DataJoint 2.0. " "Use to_dicts(), to_pandas(), to_arrays(), or keys() instead.", + "fetch() is deprecated in DataJoint 2.0. Use to_dicts(), to_pandas(), to_arrays(), or keys() instead.", DeprecationWarning, stacklevel=2, ) @@ -817,7 +817,10 @@ def fetch1(self, *attrs, squeeze=False): row = cursor.fetchone() if not row or cursor.fetchone(): raise DataJointError("fetch1 requires exactly one tuple in the input set.") - return {name: decode_attribute(heading[name], row[name], squeeze=squeeze) for name in heading.names} + return { + name: decode_attribute(heading[name], row[name], squeeze=squeeze, connection=self.connection) + for name in heading.names + } else: # Handle "KEY" specially - it means primary key columns def is_key(attr): @@ -892,7 +895,10 @@ def to_dicts(self, order_by=None, limit=None, offset=None, squeeze=False): expr = self._apply_top(order_by, limit, offset) cursor = expr.cursor(as_dict=True) heading = expr.heading - return [{name: decode_attribute(heading[name], row[name], squeeze) for name in heading.names} for row in cursor] + return [ + {name: decode_attribute(heading[name], row[name], squeeze, connection=expr.connection) for name in heading.names} + for row in cursor + ] def to_pandas(self, order_by=None, limit=None, offset=None, squeeze=False): """ @@ -1063,7 +1069,7 @@ def to_arrays(self, *attrs, include_key=False, order_by=None, limit=None, offset return result_arrays[0] if len(attrs) == 1 else tuple(result_arrays) else: # Fetch all columns as structured array - get = partial(decode_attribute, squeeze=squeeze) + get = partial(decode_attribute, squeeze=squeeze, connection=expr.connection) cursor = expr.cursor(as_dict=False) rows = list(cursor.fetchall()) @@ -1217,7 +1223,10 @@ def __iter__(self): cursor = self.cursor(as_dict=True) heading = self.heading for row in cursor: - yield {name: decode_attribute(heading[name], row[name], squeeze=False) for name in heading.names} + yield { + name: decode_attribute(heading[name], row[name], squeeze=False, connection=self.connection) + for name in heading.names + } def cursor(self, as_dict=False): """ diff --git a/src/datajoint/gc.py b/src/datajoint/gc.py index 71a4e8d08..4483bb395 100644 --- a/src/datajoint/gc.py +++ b/src/datajoint/gc.py @@ -308,7 +308,7 @@ def scan_schema_references( return referenced -def list_stored_hashes(store_name: str | None = None) -> dict[str, int]: +def list_stored_hashes(store_name: str | None = None, config=None) -> dict[str, int]: """ List all hash-addressed items in storage. @@ -320,6 +320,8 @@ def list_stored_hashes(store_name: str | None = None) -> dict[str, int]: ---------- store_name : str, optional Store to scan (None = default store). + config : Config, optional + Config instance. If None, falls back to global settings.config. Returns ------- @@ -328,7 +330,7 @@ def list_stored_hashes(store_name: str | None = None) -> dict[str, int]: """ import re - backend = get_store_backend(store_name) + backend = get_store_backend(store_name, config=config) stored: dict[str, int] = {} # Hash-addressed storage: _hash/{schema}/{subfolders...}/{hash} @@ -369,7 +371,7 @@ def list_stored_hashes(store_name: str | None = None) -> dict[str, int]: return stored -def list_schema_paths(store_name: str | None = None) -> dict[str, int]: +def list_schema_paths(store_name: str | None = None, config=None) -> dict[str, int]: """ List all schema-addressed items in storage. @@ -380,13 +382,15 @@ def list_schema_paths(store_name: str | None = None) -> dict[str, int]: ---------- store_name : str, optional Store to scan (None = default store). + config : Config, optional + Config instance. If None, falls back to global settings.config. Returns ------- dict[str, int] Dict mapping storage path to size in bytes. """ - backend = get_store_backend(store_name) + backend = get_store_backend(store_name, config=config) stored: dict[str, int] = {} try: @@ -427,7 +431,7 @@ def list_schema_paths(store_name: str | None = None) -> dict[str, int]: return stored -def delete_schema_path(path: str, store_name: str | None = None) -> bool: +def delete_schema_path(path: str, store_name: str | None = None, config=None) -> bool: """ Delete a schema-addressed directory from storage. @@ -437,13 +441,15 @@ def delete_schema_path(path: str, store_name: str | None = None) -> bool: Storage path (relative to store root). store_name : str, optional Store name (None = default store). + config : Config, optional + Config instance. If None, falls back to global settings.config. Returns ------- bool True if deleted, False if not found. """ - backend = get_store_backend(store_name) + backend = get_store_backend(store_name, config=config) try: full_path = backend._full_path(path) @@ -497,15 +503,18 @@ def scan( if not schemas: raise DataJointError("At least one schema must be provided") + # Extract config from the first schema's connection + _config = schemas[0].connection._config if schemas else None + # --- Hash-addressed storage --- hash_referenced = scan_hash_references(*schemas, store_name=store_name, verbose=verbose) - hash_stored = list_stored_hashes(store_name) + hash_stored = list_stored_hashes(store_name, config=_config) orphaned_hashes = set(hash_stored.keys()) - hash_referenced hash_orphaned_bytes = sum(hash_stored.get(h, 0) for h in orphaned_hashes) # --- Schema-addressed storage --- schema_paths_referenced = scan_schema_references(*schemas, store_name=store_name, verbose=verbose) - schema_paths_stored = list_schema_paths(store_name) + schema_paths_stored = list_schema_paths(store_name, config=_config) orphaned_paths = set(schema_paths_stored.keys()) - schema_paths_referenced schema_paths_orphaned_bytes = sum(schema_paths_stored.get(p, 0) for p in orphaned_paths) @@ -570,6 +579,9 @@ def collect( # First scan to find orphaned items stats = scan(*schemas, store_name=store_name, verbose=verbose) + # Extract config from the first schema's connection + _config = schemas[0].connection._config if schemas else None + hash_deleted = 0 schema_paths_deleted = 0 bytes_freed = 0 @@ -578,12 +590,12 @@ def collect( if not dry_run: # Delete orphaned hashes if stats["hash_orphaned"] > 0: - hash_stored = list_stored_hashes(store_name) + hash_stored = list_stored_hashes(store_name, config=_config) for path in stats["orphaned_hashes"]: try: size = hash_stored.get(path, 0) - if delete_path(path, store_name): + if delete_path(path, store_name, config=_config): hash_deleted += 1 bytes_freed += size if verbose: @@ -594,12 +606,12 @@ def collect( # Delete orphaned schema paths if stats["schema_paths_orphaned"] > 0: - schema_paths_stored = list_schema_paths(store_name) + schema_paths_stored = list_schema_paths(store_name, config=_config) for path in stats["orphaned_paths"]: try: size = schema_paths_stored.get(path, 0) - if delete_schema_path(path, store_name): + if delete_schema_path(path, store_name, config=_config): schema_paths_deleted += 1 bytes_freed += size if verbose: diff --git a/src/datajoint/hash_registry.py b/src/datajoint/hash_registry.py index a285e5df1..331c836cd 100644 --- a/src/datajoint/hash_registry.py +++ b/src/datajoint/hash_registry.py @@ -38,7 +38,6 @@ from typing import Any from .errors import DataJointError -from .settings import config from .storage import StorageBackend logger = logging.getLogger(__name__.split(".")[0]) @@ -131,7 +130,7 @@ def build_hash_path( return f"_hash/{schema_name}/{content_hash}" -def get_store_backend(store_name: str | None = None) -> StorageBackend: +def get_store_backend(store_name: str | None = None, config=None) -> StorageBackend: """ Get a StorageBackend for hash-addressed storage. @@ -139,18 +138,22 @@ def get_store_backend(store_name: str | None = None) -> StorageBackend: ---------- store_name : str, optional Name of the store to use. If None, uses stores.default. + config : Config, optional + Config instance. If None, falls back to global settings.config. Returns ------- StorageBackend StorageBackend instance. """ + if config is None: + from .settings import config # get_store_spec handles None by using stores.default spec = config.get_store_spec(store_name) return StorageBackend(spec) -def get_store_subfolding(store_name: str | None = None) -> tuple[int, ...] | None: +def get_store_subfolding(store_name: str | None = None, config=None) -> tuple[int, ...] | None: """ Get the subfolding configuration for a store. @@ -158,12 +161,16 @@ def get_store_subfolding(store_name: str | None = None) -> tuple[int, ...] | Non ---------- store_name : str, optional Name of the store. If None, uses stores.default. + config : Config, optional + Config instance. If None, falls back to global settings.config. Returns ------- tuple[int, ...] | None Subfolding pattern (e.g., (2, 2)) or None for flat storage. """ + if config is None: + from .settings import config spec = config.get_store_spec(store_name) subfolding = spec.get("subfolding") if subfolding is not None: @@ -175,6 +182,7 @@ def put_hash( data: bytes, schema_name: str, store_name: str | None = None, + config=None, ) -> dict[str, Any]: """ Store content using hash-addressed storage. @@ -193,6 +201,8 @@ def put_hash( Database/schema name for path isolation. store_name : str, optional Name of the store. If None, uses default store. + config : Config, optional + Config instance. If None, falls back to global settings.config. Returns ------- @@ -200,10 +210,10 @@ def put_hash( Metadata dict with keys: hash, path, schema, store, size. """ content_hash = compute_hash(data) - subfolding = get_store_subfolding(store_name) + subfolding = get_store_subfolding(store_name, config=config) path = build_hash_path(content_hash, schema_name, subfolding) - backend = get_store_backend(store_name) + backend = get_store_backend(store_name, config=config) # Check if content already exists (deduplication within schema) if not backend.exists(path): @@ -221,7 +231,7 @@ def put_hash( } -def get_hash(metadata: dict[str, Any]) -> bytes: +def get_hash(metadata: dict[str, Any], config=None) -> bytes: """ Retrieve content using stored metadata. @@ -232,6 +242,8 @@ def get_hash(metadata: dict[str, Any]) -> bytes: ---------- metadata : dict Metadata dict with keys: path, hash, store (optional). + config : Config, optional + Config instance. If None, falls back to global settings.config. Returns ------- @@ -249,15 +261,13 @@ def get_hash(metadata: dict[str, Any]) -> bytes: expected_hash = metadata["hash"] store_name = metadata.get("store") - backend = get_store_backend(store_name) + backend = get_store_backend(store_name, config=config) data = backend.get_buffer(path) # Verify hash for integrity actual_hash = compute_hash(data) if actual_hash != expected_hash: - raise DataJointError( - f"Hash mismatch: expected {expected_hash}, got {actual_hash}. " f"Data at {path} may be corrupted." - ) + raise DataJointError(f"Hash mismatch: expected {expected_hash}, got {actual_hash}. Data at {path} may be corrupted.") return data @@ -265,6 +275,7 @@ def get_hash(metadata: dict[str, Any]) -> bytes: def delete_path( path: str, store_name: str | None = None, + config=None, ) -> bool: """ Delete content at the specified path from storage. @@ -278,6 +289,8 @@ def delete_path( Storage path (as stored in metadata). store_name : str, optional Name of the store. If None, uses default store. + config : Config, optional + Config instance. If None, falls back to global settings.config. Returns ------- @@ -288,7 +301,7 @@ def delete_path( -------- This permanently deletes content. Ensure no references exist first. """ - backend = get_store_backend(store_name) + backend = get_store_backend(store_name, config=config) if backend.exists(path): backend.remove(path) diff --git a/src/datajoint/table.py b/src/datajoint/table.py index a6bc7d2c9..89d852471 100644 --- a/src/datajoint/table.py +++ b/src/datajoint/table.py @@ -140,8 +140,7 @@ def declare(self, context=None): class_name = self.class_name if "_" in class_name: warnings.warn( - f"Table class name `{class_name}` contains underscores. " - "CamelCase names without underscores are recommended.", + f"Table class name `{class_name}` contains underscores. CamelCase names without underscores are recommended.", UserWarning, stacklevel=2, ) @@ -1413,6 +1412,7 @@ def __make_placeholder(self, name, value, ignore_extra_fields=False, row=None): "_schema": self.database, "_table": self.table_name, "_field": name, + "_config": self.connection._config, } # Add primary key values from row if available if row is not None: diff --git a/tests/integration/test_gc.py b/tests/integration/test_gc.py index 7eca79f37..7b62c0515 100644 --- a/tests/integration/test_gc.py +++ b/tests/integration/test_gc.py @@ -251,7 +251,9 @@ def test_deletes_orphaned_hashes(self, mock_scan, mock_list_stored, mock_delete) assert stats["hash_deleted"] == 1 assert stats["bytes_freed"] == 100 assert stats["dry_run"] is False - mock_delete.assert_called_once_with("_hash/schema/orphan_path", "test_store") + mock_delete.assert_called_once_with( + "_hash/schema/orphan_path", "test_store", config=mock_schema.connection._config + ) @patch("datajoint.gc.delete_schema_path") @patch("datajoint.gc.list_schema_paths") @@ -278,7 +280,9 @@ def test_deletes_orphaned_schemas(self, mock_scan, mock_list_schemas, mock_delet assert stats["schema_paths_deleted"] == 1 assert stats["bytes_freed"] == 500 assert stats["dry_run"] is False - mock_delete.assert_called_once_with("schema/table/pk/field", "test_store") + mock_delete.assert_called_once_with( + "schema/table/pk/field", "test_store", config=mock_schema.connection._config + ) class TestFormatStats: diff --git a/tests/integration/test_semantic_matching.py b/tests/integration/test_semantic_matching.py index d8dff27fa..485525624 100644 --- a/tests/integration/test_semantic_matching.py +++ b/tests/integration/test_semantic_matching.py @@ -325,7 +325,9 @@ class TestRebuildLineageUtility: def test_rebuild_lineage_method_exists(self): """The rebuild_lineage method should exist on Schema.""" - assert hasattr(dj.Schema, "rebuild_lineage") + from datajoint.schemas import Schema as _Schema + + assert hasattr(_Schema, "rebuild_lineage") def test_rebuild_lineage_populates_table(self, schema_semantic): """schema.rebuild_lineage() should populate the ~lineage table.""" From 9b885cd6f40510cfe8c751cdafee4ad5e3518954 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 18 Feb 2026 15:58:30 -0600 Subject: [PATCH 28/39] refactor: make dj.Schema a proper class subclassing _Schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rename schemas.Schema → schemas._Schema (internal) and define dj.Schema as a class that inherits from _Schema with a thread-safety check in __init__. This eliminates the confusing function-vs-class duality where dj.Schema was a function wrapper and schemas.Schema was the class. Now dj.Schema is a real class: isinstance, hasattr, and subclass checks all work naturally. The test for rebuild_lineage can use dj.Schema directly instead of importing from datajoint.schemas. Co-Authored-By: Claude Opus 4.6 --- src/datajoint/__init__.py | 70 +++++++++++---------- src/datajoint/gc.py | 2 +- src/datajoint/instance.py | 6 +- src/datajoint/migrate.py | 6 +- src/datajoint/schemas.py | 4 +- tests/integration/test_semantic_matching.py | 4 +- 6 files changed, 48 insertions(+), 44 deletions(-) diff --git a/src/datajoint/__init__.py b/src/datajoint/__init__.py index 68eac160f..3b0915de8 100644 --- a/src/datajoint/__init__.py +++ b/src/datajoint/__init__.py @@ -80,7 +80,7 @@ from .instance import Instance, _ConfigProxy, _get_singleton_connection, _global_config, _check_thread_safe from .logging import logger from .objectref import ObjectRef -from .schemas import Schema as _Schema, VirtualModule, list_schemas, virtual_schema +from .schemas import _Schema, VirtualModule, list_schemas, virtual_schema from .table import FreeTable as _FreeTable, Table, ValidationResult from .user_tables import Computed, Imported, Lookup, Manual, Part from .version import __version__ @@ -166,26 +166,20 @@ def conn( return _get_singleton_connection() -def Schema( - schema_name: str | None = None, - context: dict | None = None, - *, - connection: Connection | None = None, - create_schema: bool = True, - create_tables: bool | None = None, - add_objects: dict | None = None, -) -> _Schema: +class Schema(_Schema): """ - Create a Schema for binding table classes to a database schema. + Decorator that binds table classes to a database schema. When connection is not provided, uses the singleton connection. + In thread-safe mode (``DJ_THREAD_SAFE=true``), a connection must be + provided explicitly or use ``dj.Instance().Schema()`` instead. Parameters ---------- schema_name : str, optional - Database schema name. + Database schema name. If omitted, call ``activate()`` later. context : dict, optional - Namespace for foreign key lookup. + Namespace for foreign key lookup. None uses caller's context. connection : Connection, optional Database connection. Defaults to singleton connection. create_schema : bool, optional @@ -195,29 +189,41 @@ def Schema( add_objects : dict, optional Additional objects for declaration context. - Returns - ------- - Schema - A Schema bound to the specified connection. - Raises ------ ThreadSafetyError - If thread_safe mode is enabled and using singleton. + If thread_safe mode is enabled and no connection is provided. + + Examples + -------- + >>> schema = dj.Schema('my_schema') + >>> @schema + ... class Session(dj.Manual): + ... definition = ''' + ... session_id : int + ... ''' """ - if connection is None: - # Use singleton connection - will raise ThreadSafetyError if thread_safe=True - _check_thread_safe() - connection = _get_singleton_connection() - - return _Schema( - schema_name, - context=context, - connection=connection, - create_schema=create_schema, - create_tables=create_tables, - add_objects=add_objects, - ) + + def __init__( + self, + schema_name: str | None = None, + context: dict | None = None, + *, + connection: Connection | None = None, + create_schema: bool = True, + create_tables: bool | None = None, + add_objects: dict | None = None, + ) -> None: + if connection is None: + _check_thread_safe() + super().__init__( + schema_name, + context=context, + connection=connection, + create_schema=create_schema, + create_tables=create_tables, + add_objects=add_objects, + ) def FreeTable(conn_or_name, full_table_name: str | None = None) -> _FreeTable: diff --git a/src/datajoint/gc.py b/src/datajoint/gc.py index 4483bb395..7f083416b 100644 --- a/src/datajoint/gc.py +++ b/src/datajoint/gc.py @@ -44,7 +44,7 @@ from .errors import DataJointError if TYPE_CHECKING: - from .schemas import Schema + from .schemas import _Schema as Schema logger = logging.getLogger(__name__.split(".")[0]) diff --git a/src/datajoint/instance.py b/src/datajoint/instance.py index c60e267e1..e90a2b574 100644 --- a/src/datajoint/instance.py +++ b/src/datajoint/instance.py @@ -15,7 +15,7 @@ from .settings import Config, _create_config, config as _settings_config if TYPE_CHECKING: - from .schemas import Schema as SchemaClass + from .schemas import _Schema as SchemaClass from .table import FreeTable as FreeTableClass @@ -140,9 +140,9 @@ def Schema( Schema A Schema using this instance's connection. """ - from .schemas import Schema + from .schemas import _Schema - return Schema( + return _Schema( schema_name, context=context, connection=self.connection, diff --git a/src/datajoint/migrate.py b/src/datajoint/migrate.py index d48afae62..57862dafa 100644 --- a/src/datajoint/migrate.py +++ b/src/datajoint/migrate.py @@ -39,7 +39,7 @@ ) if TYPE_CHECKING: - from .schemas import Schema + from .schemas import _Schema as Schema logger = logging.getLogger(__name__.split(".")[0]) @@ -653,7 +653,7 @@ def add_job_metadata_columns(target, dry_run: bool = True) -> dict: - Future populate() calls will fill in metadata for new rows - This does NOT retroactively populate metadata for existing rows """ - from .schemas import Schema + from .schemas import _Schema from .table import Table result = { @@ -664,7 +664,7 @@ def add_job_metadata_columns(target, dry_run: bool = True) -> dict: } # Determine tables to process - if isinstance(target, Schema): + if isinstance(target, _Schema): schema = target # Get all user tables in the schema tables_query = """ diff --git a/src/datajoint/schemas.py b/src/datajoint/schemas.py index 694250c7d..bb36d5379 100644 --- a/src/datajoint/schemas.py +++ b/src/datajoint/schemas.py @@ -53,7 +53,7 @@ def ordered_dir(class_: type) -> list[str]: return attr_list -class Schema: +class _Schema: """ Decorator that binds table classes to a database schema. @@ -832,7 +832,7 @@ def __init__( Additional objects to add to the module namespace. """ super(VirtualModule, self).__init__(name=module_name) - _schema = Schema( + _schema = _Schema( schema_name, create_schema=create_schema, create_tables=create_tables, diff --git a/tests/integration/test_semantic_matching.py b/tests/integration/test_semantic_matching.py index 485525624..d8dff27fa 100644 --- a/tests/integration/test_semantic_matching.py +++ b/tests/integration/test_semantic_matching.py @@ -325,9 +325,7 @@ class TestRebuildLineageUtility: def test_rebuild_lineage_method_exists(self): """The rebuild_lineage method should exist on Schema.""" - from datajoint.schemas import Schema as _Schema - - assert hasattr(_Schema, "rebuild_lineage") + assert hasattr(dj.Schema, "rebuild_lineage") def test_rebuild_lineage_populates_table(self, schema_semantic): """schema.rebuild_lineage() should populate the ~lineage table.""" From f0ef848781624e401690588096dc792236e45dc1 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 18 Feb 2026 16:00:14 -0600 Subject: [PATCH 29/39] fix: lint formatting in test_gc.py Co-Authored-By: Claude Opus 4.6 --- tests/integration/test_gc.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/tests/integration/test_gc.py b/tests/integration/test_gc.py index 7b62c0515..47ca0a96d 100644 --- a/tests/integration/test_gc.py +++ b/tests/integration/test_gc.py @@ -251,9 +251,7 @@ def test_deletes_orphaned_hashes(self, mock_scan, mock_list_stored, mock_delete) assert stats["hash_deleted"] == 1 assert stats["bytes_freed"] == 100 assert stats["dry_run"] is False - mock_delete.assert_called_once_with( - "_hash/schema/orphan_path", "test_store", config=mock_schema.connection._config - ) + mock_delete.assert_called_once_with("_hash/schema/orphan_path", "test_store", config=mock_schema.connection._config) @patch("datajoint.gc.delete_schema_path") @patch("datajoint.gc.list_schema_paths") @@ -280,9 +278,7 @@ def test_deletes_orphaned_schemas(self, mock_scan, mock_list_schemas, mock_delet assert stats["schema_paths_deleted"] == 1 assert stats["bytes_freed"] == 500 assert stats["dry_run"] is False - mock_delete.assert_called_once_with( - "schema/table/pk/field", "test_store", config=mock_schema.connection._config - ) + mock_delete.assert_called_once_with("schema/table/pk/field", "test_store", config=mock_schema.connection._config) class TestFormatStats: From 33ef072f8c6d04898d35d205c920f3de4a7256b8 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Wed, 18 Feb 2026 23:11:24 -0600 Subject: [PATCH 30/39] fix: pass config and backend explicitly to Connection constructor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Connection.__init__ now accepts `backend` and `config_override` kwargs instead of reading from the module-level global config. This ensures Instance creates connections using its own config, not the global one. Also removes set_thread_safe() — thread-safe mode is an infrastructure decision set via DJ_THREAD_SAFE env var, not a runtime toggle. Co-Authored-By: Claude Opus 4.6 --- src/datajoint/__init__.py | 3 +-- src/datajoint/connection.py | 20 ++++++++++++++------ src/datajoint/instance.py | 29 +++++++++++++++-------------- 3 files changed, 30 insertions(+), 22 deletions(-) diff --git a/src/datajoint/__init__.py b/src/datajoint/__init__.py index 3b0915de8..7704ec1bc 100644 --- a/src/datajoint/__init__.py +++ b/src/datajoint/__init__.py @@ -160,8 +160,7 @@ def conn( "Database password not configured. Set dj.config['database.password'] or pass password= argument." ) - instance_module._singleton_connection = Connection(host, user, password, port, use_tls) - instance_module._singleton_connection._config = _global_config + instance_module._singleton_connection = Connection(host, user, password, port, use_tls, config_override=_global_config) return _get_singleton_connection() diff --git a/src/datajoint/connection.py b/src/datajoint/connection.py index 827a7a9bd..e9eab0921 100644 --- a/src/datajoint/connection.py +++ b/src/datajoint/connection.py @@ -11,12 +11,16 @@ import re import warnings from contextlib import contextmanager +from typing import TYPE_CHECKING from . import errors from .adapters import get_adapter from .blob import pack, unpack from .dependencies import Dependencies from .settings import config + +if TYPE_CHECKING: + from .settings import Config from .version import __version__ logger = logging.getLogger(__name__.split(".")[0]) @@ -163,13 +167,19 @@ def __init__( password: str, port: int | None = None, use_tls: bool | dict | None = None, + *, + backend: str | None = None, + config_override: "Config | None" = None, ) -> None: + # Config reference — use override if provided, else global config + self._config = config_override if config_override is not None else config + if ":" in host: # the port in the hostname overrides the port argument host, port = host.split(":") port = int(port) elif port is None: - port = config["database.port"] + port = self._config["database.port"] self.conn_info = dict(host=host, port=port, user=user, passwd=password) if use_tls is not False: # use_tls can be: None (auto-detect), True (enable), False (disable), or dict (custom config) @@ -186,11 +196,9 @@ def __init__( self._query_cache = None self._is_closed = True # Mark as closed until connect() succeeds - # Config reference - defaults to global config, but Instance sets its own - self._config = config - - # Select adapter based on configured backend - backend = self._config["database.backend"] + # Select adapter: explicit backend > config backend + if backend is None: + backend = self._config["database.backend"] self.adapter = get_adapter(backend) self.connect() diff --git a/src/datajoint/instance.py b/src/datajoint/instance.py index e90a2b574..f0cbe96da 100644 --- a/src/datajoint/instance.py +++ b/src/datajoint/instance.py @@ -21,21 +21,19 @@ def _load_thread_safe() -> bool: """ - Load thread_safe setting from environment or config file. + Check if thread-safe mode is enabled. + + Thread-safe mode is controlled by the ``DJ_THREAD_SAFE`` environment + variable, which must be set before the process starts. Returns ------- bool True if thread-safe mode is enabled. """ - # Check environment variable first env_val = os.environ.get("DJ_THREAD_SAFE", "").lower() if env_val in ("true", "1", "yes"): return True - if env_val in ("false", "0", "no"): - return False - - # Default: thread-safe mode is off return False @@ -104,11 +102,16 @@ def __init__( if port is None: port = self.config.database.port - # Create connection - self.connection = Connection(host, user, password, port, use_tls) - - # Attach config to connection so tables can access it - self.connection._config = self.config + # Create connection with this instance's config and backend + self.connection = Connection( + host, + user, + password, + port, + use_tls, + backend=self.config.database.backend, + config_override=self.config, + ) def Schema( self, @@ -235,9 +238,7 @@ def _get_singleton_connection() -> Connection: "Database password not configured. Set dj.config['database.password'] or DJ_PASS environment variable." ) - _singleton_connection = Connection(host, user, password, port, use_tls) - # Attach global config to connection - _singleton_connection._config = _global_config + _singleton_connection = Connection(host, user, password, port, use_tls, config_override=_global_config) return _singleton_connection From da43ed37b5638b3a5c7282c505fcfcc19bac0c11 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 19 Feb 2026 09:26:02 -0600 Subject: [PATCH 31/39] refactor: route schema queries through adapter methods - list_schemas() uses adapter.list_schemas_sql() - make_classes() uses adapter.list_tables_sql() instead of SHOW TABLES - Schema.exists uses new adapter.schema_exists_sql() method - Added schema_exists_sql() to base, MySQL, and PostgreSQL adapters Co-Authored-By: Claude Opus 4.6 --- src/datajoint/adapters/base.py | 17 +++++++++++++++++ src/datajoint/adapters/mysql.py | 4 ++++ src/datajoint/adapters/postgres.py | 4 ++++ src/datajoint/schemas.py | 18 ++++-------------- 4 files changed, 29 insertions(+), 14 deletions(-) diff --git a/src/datajoint/adapters/base.py b/src/datajoint/adapters/base.py index 35b32ed5f..0fa5aa24f 100644 --- a/src/datajoint/adapters/base.py +++ b/src/datajoint/adapters/base.py @@ -615,6 +615,23 @@ def list_schemas_sql(self) -> str: """ ... + @abstractmethod + def schema_exists_sql(self, schema_name: str) -> str: + """ + Generate query to check if a schema exists. + + Parameters + ---------- + schema_name : str + Name of schema to check. + + Returns + ------- + str + SQL query that returns a row if the schema exists. + """ + ... + @abstractmethod def list_tables_sql(self, schema_name: str, pattern: str | None = None) -> str: """ diff --git a/src/datajoint/adapters/mysql.py b/src/datajoint/adapters/mysql.py index 21aab2908..f856fcd0b 100644 --- a/src/datajoint/adapters/mysql.py +++ b/src/datajoint/adapters/mysql.py @@ -611,6 +611,10 @@ def list_schemas_sql(self) -> str: """Query to list all databases in MySQL.""" return "SELECT schema_name FROM information_schema.schemata" + def schema_exists_sql(self, schema_name: str) -> str: + """Query to check if a database exists in MySQL.""" + return f"SELECT schema_name FROM information_schema.schemata WHERE schema_name = {self.quote_string(schema_name)}" + def list_tables_sql(self, schema_name: str, pattern: str | None = None) -> str: """Query to list tables in a database.""" sql = f"SHOW TABLES IN {self.quote_identifier(schema_name)}" diff --git a/src/datajoint/adapters/postgres.py b/src/datajoint/adapters/postgres.py index 12fecae6a..5298d647d 100644 --- a/src/datajoint/adapters/postgres.py +++ b/src/datajoint/adapters/postgres.py @@ -721,6 +721,10 @@ def list_schemas_sql(self) -> str: "WHERE schema_name NOT IN ('pg_catalog', 'information_schema')" ) + def schema_exists_sql(self, schema_name: str) -> str: + """Query to check if a schema exists in PostgreSQL.""" + return f"SELECT schema_name FROM information_schema.schemata WHERE schema_name = {self.quote_string(schema_name)}" + def list_tables_sql(self, schema_name: str, pattern: str | None = None) -> str: """Query to list tables in a schema.""" sql = ( diff --git a/src/datajoint/schemas.py b/src/datajoint/schemas.py index de5e15424..1f35e2a04 100644 --- a/src/datajoint/schemas.py +++ b/src/datajoint/schemas.py @@ -345,7 +345,7 @@ def make_classes(self, into: dict[str, Any] | None = None) -> None: del frame tables = [ row[0] - for row in self.connection.query("SHOW TABLES in `%s`" % self.database) + for row in self.connection.query(self.connection.adapter.list_tables_sql(self.database)) if lookup_class_name("`{db}`.`{tab}`".format(db=self.database, tab=row[0]), into, 0) is None ] master_classes = (Lookup, Manual, Imported, Computed) @@ -423,13 +423,7 @@ def exists(self) -> bool: """ if self.database is None: raise DataJointError("Schema must be activated first.") - return bool( - self.connection.query( - "SELECT schema_name FROM information_schema.schemata WHERE schema_name = '{database}'".format( - database=self.database - ) - ).rowcount - ) + return bool(self.connection.query(self.connection.adapter.schema_exists_sql(self.database)).rowcount) @property def lineage_table_exists(self) -> bool: @@ -838,12 +832,8 @@ def list_schemas(connection: Connection | None = None) -> list[str]: list[str] Names of all accessible schemas. """ - return [ - r[0] - for r in (connection or _get_singleton_connection()).query( - 'SELECT schema_name FROM information_schema.schemata WHERE schema_name <> "information_schema"' - ) - ] + conn = connection or _get_singleton_connection() + return [r[0] for r in conn.query(conn.adapter.list_schemas_sql())] def virtual_schema( From bf7c442a24b9c9ad8e2eb0f404811ff656422ab2 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 19 Feb 2026 11:53:18 -0600 Subject: [PATCH 32/39] feat: add backend parameter to Instance and cross-connection validation Instance now accepts backend="mysql"|"postgresql" to explicitly set the database backend, with automatic port default derivation (3306 vs 5432). Join, restriction, and union operators now validate that both operands use the same connection, raising DataJointError with a clear message when expressions from different Instances are combined. Co-Authored-By: Claude Opus 4.6 --- src/datajoint/condition.py | 7 ++ src/datajoint/expression.py | 7 +- src/datajoint/instance.py | 14 +++- tests/unit/test_thread_safe.py | 117 +++++++++++++++++++++++++++++++++ 4 files changed, 141 insertions(+), 4 deletions(-) diff --git a/src/datajoint/condition.py b/src/datajoint/condition.py index 0335d6adb..55f095246 100644 --- a/src/datajoint/condition.py +++ b/src/datajoint/condition.py @@ -244,6 +244,13 @@ def assert_join_compatibility( if isinstance(expr1, U) or isinstance(expr2, U): return + # Check that both expressions use the same connection + if expr1.connection is not expr2.connection: + raise DataJointError( + "Cannot operate on expressions from different connections. " + "Ensure both operands use the same dj.Instance or global connection." + ) + if semantic_check: # Check if lineage tracking is available for both expressions if not expr1.heading.lineage_available or not expr2.heading.lineage_available: diff --git a/src/datajoint/expression.py b/src/datajoint/expression.py index bc6c7cee7..1b5f5ac9e 100644 --- a/src/datajoint/expression.py +++ b/src/datajoint/expression.py @@ -1414,8 +1414,11 @@ def create(cls, arg1, arg2): arg2 = arg2() # instantiate if a class if not isinstance(arg2, QueryExpression): raise DataJointError("A QueryExpression can only be unioned with another QueryExpression") - if arg1.connection != arg2.connection: - raise DataJointError("Cannot operate on QueryExpressions originating from different connections.") + if arg1.connection is not arg2.connection: + raise DataJointError( + "Cannot operate on expressions from different connections. " + "Ensure both operands use the same dj.Instance or global connection." + ) if set(arg1.primary_key) != set(arg2.primary_key): raise DataJointError("The operands of a union must share the same primary key.") if set(arg1.heading.secondary_attributes) & set(arg2.heading.secondary_attributes): diff --git a/src/datajoint/instance.py b/src/datajoint/instance.py index f0cbe96da..455336a7c 100644 --- a/src/datajoint/instance.py +++ b/src/datajoint/instance.py @@ -8,7 +8,7 @@ from __future__ import annotations import os -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, Literal from .connection import Connection from .errors import ThreadSafetyError @@ -54,9 +54,11 @@ class Instance: password : str Database password. port : int, optional - Database port. Default from config or 3306. + Database port. Defaults to 3306 for MySQL, 5432 for PostgreSQL. use_tls : bool or dict, optional TLS configuration. + backend : str, optional + Database backend: ``"mysql"`` or ``"postgresql"``. Default from config. **kwargs : Any Additional config overrides applied to this instance's config. @@ -81,11 +83,19 @@ def __init__( password: str, port: int | None = None, use_tls: bool | dict | None = None, + backend: Literal["mysql", "postgresql"] | None = None, **kwargs: Any, ) -> None: # Create fresh config with defaults loaded from env/file self.config = _create_config() + # Apply backend override before other kwargs (port default depends on it) + if backend is not None: + self.config.database.backend = backend + # Re-derive port default since _create_config resolved it before backend was set + if port is None and "database__port" not in kwargs: + self.config.database.port = 5432 if backend == "postgresql" else 3306 + # Apply any config overrides from kwargs for key, value in kwargs.items(): if hasattr(self.config, key): diff --git a/tests/unit/test_thread_safe.py b/tests/unit/test_thread_safe.py index bec45e434..d6621af38 100644 --- a/tests/unit/test_thread_safe.py +++ b/tests/unit/test_thread_safe.py @@ -155,6 +155,123 @@ def test_instance_always_allowed_in_thread_safe_mode(self, monkeypatch): assert callable(Instance) +class TestInstanceBackend: + """Test Instance backend parameter.""" + + def test_instance_backend_sets_config(self, monkeypatch): + """Instance(backend=...) sets config.database.backend.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "false") + from datajoint.instance import Instance + from unittest.mock import patch + + with patch("datajoint.instance.Connection"): + inst = Instance( + host="localhost", user="root", password="secret", + backend="postgresql", + ) + assert inst.config.database.backend == "postgresql" + + def test_instance_backend_default_from_config(self, monkeypatch): + """Instance without backend uses config default.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "false") + from datajoint.instance import Instance + from unittest.mock import patch + + with patch("datajoint.instance.Connection"): + inst = Instance( + host="localhost", user="root", password="secret", + ) + assert inst.config.database.backend == "mysql" + + def test_instance_backend_affects_port_default(self, monkeypatch): + """Instance(backend='postgresql') uses port 5432 by default.""" + monkeypatch.setenv("DJ_THREAD_SAFE", "false") + from datajoint.instance import Instance + from unittest.mock import patch, call + + with patch("datajoint.instance.Connection") as MockConn: + Instance( + host="localhost", user="root", password="secret", + backend="postgresql", + ) + # Connection should be called with port 5432 (PostgreSQL default) + args, kwargs = MockConn.call_args + assert args[3] == 5432 # port is the 4th positional arg + + +class TestCrossConnectionValidation: + """Test that cross-connection operations are rejected.""" + + def test_join_different_connections_raises(self): + """Join of expressions from different connections raises DataJointError.""" + from datajoint.expression import QueryExpression + from datajoint.errors import DataJointError + from unittest.mock import MagicMock + + expr1 = QueryExpression() + expr1._connection = MagicMock() + expr1._heading = MagicMock() + expr1._heading.names = [] + + expr2 = QueryExpression() + expr2._connection = MagicMock() # different connection object + expr2._heading = MagicMock() + expr2._heading.names = [] + + with pytest.raises(DataJointError, match="different connections"): + expr1 * expr2 + + def test_join_same_connection_allowed(self): + """Join of expressions from the same connection does not raise.""" + from datajoint.condition import assert_join_compatibility + from datajoint.expression import QueryExpression + from unittest.mock import MagicMock + + shared_conn = MagicMock() + + expr1 = QueryExpression() + expr1._connection = shared_conn + expr1._heading = MagicMock() + expr1._heading.names = [] + expr1._heading.lineage_available = False + + expr2 = QueryExpression() + expr2._connection = shared_conn + expr2._heading = MagicMock() + expr2._heading.names = [] + expr2._heading.lineage_available = False + + # Should not raise + assert_join_compatibility(expr1, expr2) + + def test_restriction_different_connections_raises(self): + """Restriction by expression from different connection raises DataJointError.""" + from datajoint.expression import QueryExpression + from datajoint.errors import DataJointError + from unittest.mock import MagicMock + + expr1 = QueryExpression() + expr1._connection = MagicMock() + expr1._heading = MagicMock() + expr1._heading.names = ["a"] + expr1._heading.__getitem__ = MagicMock() + expr1._heading.new_attributes = set() + expr1._support = ["`db`.`t1`"] + expr1._restriction = [] + expr1._restriction_attributes = set() + expr1._joins = [] + expr1._top = None + expr1._original_heading = expr1._heading + + expr2 = QueryExpression() + expr2._connection = MagicMock() # different connection + expr2._heading = MagicMock() + expr2._heading.names = ["a"] + + with pytest.raises(DataJointError, match="different connections"): + expr1 & expr2 + + class TestThreadSafetyError: """Test ThreadSafetyError exception.""" From f123f2677d8cd4327bb6ff7a5ac42dee902c5b95 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 19 Feb 2026 11:55:30 -0600 Subject: [PATCH 33/39] docs: extend migrate.py deprecation timeline to 2.3 Co-Authored-By: Claude Opus 4.6 --- src/datajoint/migrate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/datajoint/migrate.py b/src/datajoint/migrate.py index 57862dafa..2ff0dfcb8 100644 --- a/src/datajoint/migrate.py +++ b/src/datajoint/migrate.py @@ -7,7 +7,7 @@ .. note:: This module is provided temporarily to assist with migration from pre-2.0. - It will be deprecated in DataJoint 2.1 and removed in 2.2. + It will be deprecated in DataJoint 2.1 and removed in 2.3. Complete your migrations while on DataJoint 2.0. Note on Terminology @@ -32,7 +32,7 @@ # Show deprecation warning starting in 2.1 if Version(__version__) >= Version("2.1"): warnings.warn( - "datajoint.migrate is deprecated and will be removed in DataJoint 2.2. " + "datajoint.migrate is deprecated and will be removed in DataJoint 2.3. " "Complete your schema migrations before upgrading.", DeprecationWarning, stacklevel=2, From 4796d395811db681eb1481b01aeb913ec6644bcf Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 19 Feb 2026 12:21:22 -0600 Subject: [PATCH 34/39] fix: lint formatting in test_thread_safe.py Co-Authored-By: Claude Opus 4.6 --- tests/unit/test_thread_safe.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tests/unit/test_thread_safe.py b/tests/unit/test_thread_safe.py index d6621af38..aba1b686b 100644 --- a/tests/unit/test_thread_safe.py +++ b/tests/unit/test_thread_safe.py @@ -166,7 +166,9 @@ def test_instance_backend_sets_config(self, monkeypatch): with patch("datajoint.instance.Connection"): inst = Instance( - host="localhost", user="root", password="secret", + host="localhost", + user="root", + password="secret", backend="postgresql", ) assert inst.config.database.backend == "postgresql" @@ -179,7 +181,9 @@ def test_instance_backend_default_from_config(self, monkeypatch): with patch("datajoint.instance.Connection"): inst = Instance( - host="localhost", user="root", password="secret", + host="localhost", + user="root", + password="secret", ) assert inst.config.database.backend == "mysql" @@ -187,11 +191,13 @@ def test_instance_backend_affects_port_default(self, monkeypatch): """Instance(backend='postgresql') uses port 5432 by default.""" monkeypatch.setenv("DJ_THREAD_SAFE", "false") from datajoint.instance import Instance - from unittest.mock import patch, call + from unittest.mock import patch with patch("datajoint.instance.Connection") as MockConn: Instance( - host="localhost", user="root", password="secret", + host="localhost", + user="root", + password="secret", backend="postgresql", ) # Connection should be called with port 5432 (PostgreSQL default) From dca80526fde8f8243a3e9ad23c67677123784062 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 19 Feb 2026 13:56:07 -0600 Subject: [PATCH 35/39] refactor: move dependency graph queries into adapter methods Extract the backend-specific primary key and foreign key SQL from dependencies.py into load_primary_keys_sql() and load_foreign_keys_sql() adapter methods, eliminating the if/else backend fork. Co-Authored-By: Claude Opus 4.6 --- src/datajoint/adapters/base.py | 49 +++++++++++++++ src/datajoint/adapters/mysql.py | 26 ++++++++ src/datajoint/adapters/postgres.py | 38 ++++++++++++ src/datajoint/dependencies.py | 97 ++++-------------------------- 4 files changed, 126 insertions(+), 84 deletions(-) diff --git a/src/datajoint/adapters/base.py b/src/datajoint/adapters/base.py index 0fa5aa24f..4ebeac900 100644 --- a/src/datajoint/adapters/base.py +++ b/src/datajoint/adapters/base.py @@ -727,6 +727,55 @@ def get_foreign_keys_sql(self, schema_name: str, table_name: str) -> str: """ ... + @abstractmethod + def load_primary_keys_sql(self, schemas_list: str, like_pattern: str) -> str: + """ + Generate query to load primary key columns for all tables across schemas. + + Used by the dependency graph to build the schema graph. + + Parameters + ---------- + schemas_list : str + Comma-separated, quoted schema names for an IN clause. + like_pattern : str + SQL LIKE pattern to exclude (e.g., "'~%%'" for internal tables). + + Returns + ------- + str + SQL query returning rows with columns: + - tab: fully qualified table name (quoted) + - column_name: primary key column name + """ + ... + + @abstractmethod + def load_foreign_keys_sql(self, schemas_list: str, like_pattern: str) -> str: + """ + Generate query to load foreign key relationships across schemas. + + Used by the dependency graph to build the schema graph. + + Parameters + ---------- + schemas_list : str + Comma-separated, quoted schema names for an IN clause. + like_pattern : str + SQL LIKE pattern to exclude (e.g., "'~%%'" for internal tables). + + Returns + ------- + str + SQL query returning rows (as dicts) with columns: + - constraint_name: FK constraint name + - referencing_table: fully qualified child table name (quoted) + - referenced_table: fully qualified parent table name (quoted) + - column_name: FK column in child table + - referenced_column_name: referenced column in parent table + """ + ... + @abstractmethod def get_constraint_info_sql(self, constraint_name: str, schema_name: str, table_name: str) -> str: """ diff --git a/src/datajoint/adapters/mysql.py b/src/datajoint/adapters/mysql.py index f856fcd0b..cc8fcb842 100644 --- a/src/datajoint/adapters/mysql.py +++ b/src/datajoint/adapters/mysql.py @@ -656,6 +656,32 @@ def get_foreign_keys_sql(self, schema_name: str, table_name: str) -> str: f"ORDER BY constraint_name, ordinal_position" ) + def load_primary_keys_sql(self, schemas_list: str, like_pattern: str) -> str: + """Query to load all primary key columns across schemas.""" + tab_expr = "concat('`', table_schema, '`.`', table_name, '`')" + return ( + f"SELECT {tab_expr} as tab, column_name " + f"FROM information_schema.key_column_usage " + f"WHERE table_name NOT LIKE {like_pattern} " + f"AND table_schema in ({schemas_list}) " + f"AND constraint_name='PRIMARY'" + ) + + def load_foreign_keys_sql(self, schemas_list: str, like_pattern: str) -> str: + """Query to load all foreign key relationships across schemas.""" + tab_expr = "concat('`', table_schema, '`.`', table_name, '`')" + ref_tab_expr = "concat('`', referenced_table_schema, '`.`', referenced_table_name, '`')" + return ( + f"SELECT constraint_name, " + f"{tab_expr} as referencing_table, " + f"{ref_tab_expr} as referenced_table, " + f"column_name, referenced_column_name " + f"FROM information_schema.key_column_usage " + f"WHERE referenced_table_name NOT LIKE {like_pattern} " + f"AND (referenced_table_schema in ({schemas_list}) " + f"OR referenced_table_schema is not NULL AND table_schema in ({schemas_list}))" + ) + def get_constraint_info_sql(self, constraint_name: str, schema_name: str, table_name: str) -> str: """Query to get FK constraint details from information_schema.""" return ( diff --git a/src/datajoint/adapters/postgres.py b/src/datajoint/adapters/postgres.py index 5298d647d..d41ba8550 100644 --- a/src/datajoint/adapters/postgres.py +++ b/src/datajoint/adapters/postgres.py @@ -799,6 +799,44 @@ def get_foreign_keys_sql(self, schema_name: str, table_name: str) -> str: f"ORDER BY kcu.constraint_name, kcu.ordinal_position" ) + def load_primary_keys_sql(self, schemas_list: str, like_pattern: str) -> str: + """Query to load all primary key columns across schemas.""" + tab_expr = "'\"' || kcu.table_schema || '\".\"' || kcu.table_name || '\"'" + return ( + f"SELECT {tab_expr} as tab, kcu.column_name " + f"FROM information_schema.key_column_usage kcu " + f"JOIN information_schema.table_constraints tc " + f"ON kcu.constraint_name = tc.constraint_name " + f"AND kcu.table_schema = tc.table_schema " + f"WHERE kcu.table_name NOT LIKE {like_pattern} " + f"AND kcu.table_schema in ({schemas_list}) " + f"AND tc.constraint_type = 'PRIMARY KEY'" + ) + + def load_foreign_keys_sql(self, schemas_list: str, like_pattern: str) -> str: + """Query to load all foreign key relationships across schemas.""" + return ( + f"SELECT " + f"c.conname as constraint_name, " + f"'\"' || ns1.nspname || '\".\"' || cl1.relname || '\"' as referencing_table, " + f"'\"' || ns2.nspname || '\".\"' || cl2.relname || '\"' as referenced_table, " + f"a1.attname as column_name, " + f"a2.attname as referenced_column_name " + f"FROM pg_constraint c " + f"JOIN pg_class cl1 ON c.conrelid = cl1.oid " + f"JOIN pg_namespace ns1 ON cl1.relnamespace = ns1.oid " + f"JOIN pg_class cl2 ON c.confrelid = cl2.oid " + f"JOIN pg_namespace ns2 ON cl2.relnamespace = ns2.oid " + f"CROSS JOIN LATERAL unnest(c.conkey, c.confkey) WITH ORDINALITY AS cols(conkey, confkey, ord) " + f"JOIN pg_attribute a1 ON a1.attrelid = cl1.oid AND a1.attnum = cols.conkey " + f"JOIN pg_attribute a2 ON a2.attrelid = cl2.oid AND a2.attnum = cols.confkey " + f"WHERE c.contype = 'f' " + f"AND cl1.relname NOT LIKE {like_pattern} " + f"AND (ns2.nspname in ({schemas_list}) " + f"OR ns1.nspname in ({schemas_list})) " + f"ORDER BY c.conname, cols.ord" + ) + def get_constraint_info_sql(self, constraint_name: str, schema_name: str, table_name: str) -> str: """ Query to get FK constraint details from information_schema. diff --git a/src/datajoint/dependencies.py b/src/datajoint/dependencies.py index 83162a112..99556345e 100644 --- a/src/datajoint/dependencies.py +++ b/src/datajoint/dependencies.py @@ -164,92 +164,21 @@ def load(self, force: bool = True) -> None: # Build schema list for IN clause schemas_list = ", ".join(adapter.quote_string(s) for s in self._conn.schemas) - # Backend-specific queries for primary keys and foreign keys - # Note: Both PyMySQL and psycopg2 use %s placeholders, so escape % as %% + # Load primary keys and foreign keys via adapter methods + # Note: Both PyMySQL and psycopg use %s placeholders, so escape % as %% like_pattern = "'~%%'" - if adapter.backend == "mysql": - # MySQL: use concat() and MySQL-specific information_schema columns - tab_expr = "concat('`', table_schema, '`.`', table_name, '`')" - - # load primary key info (MySQL uses constraint_name='PRIMARY') - keys = self._conn.query( - f""" - SELECT {tab_expr} as tab, column_name - FROM information_schema.key_column_usage - WHERE table_name NOT LIKE {like_pattern} - AND table_schema in ({schemas_list}) - AND constraint_name='PRIMARY' - """ - ) - pks = defaultdict(set) - for key in keys: - pks[key[0]].add(key[1]) - - # load foreign keys (MySQL has referenced_* columns) - ref_tab_expr = "concat('`', referenced_table_schema, '`.`', referenced_table_name, '`')" - fk_keys = self._conn.query( - f""" - SELECT constraint_name, - {tab_expr} as referencing_table, - {ref_tab_expr} as referenced_table, - column_name, referenced_column_name - FROM information_schema.key_column_usage - WHERE referenced_table_name NOT LIKE {like_pattern} - AND (referenced_table_schema in ({schemas_list}) - OR referenced_table_schema is not NULL AND table_schema in ({schemas_list})) - """, - as_dict=True, - ) - else: - # PostgreSQL: use || concatenation and different query structure - tab_expr = "'\"' || kcu.table_schema || '\".\"' || kcu.table_name || '\"'" - - # load primary key info (PostgreSQL uses constraint_type='PRIMARY KEY') - keys = self._conn.query( - f""" - SELECT {tab_expr} as tab, kcu.column_name - FROM information_schema.key_column_usage kcu - JOIN information_schema.table_constraints tc - ON kcu.constraint_name = tc.constraint_name - AND kcu.table_schema = tc.table_schema - WHERE kcu.table_name NOT LIKE {like_pattern} - AND kcu.table_schema in ({schemas_list}) - AND tc.constraint_type = 'PRIMARY KEY' - """ - ) - pks = defaultdict(set) - for key in keys: - pks[key[0]].add(key[1]) - - # load foreign keys using pg_constraint system catalogs - # The information_schema approach creates a Cartesian product for composite FKs - # because constraint_column_usage doesn't have ordinal_position. - # Using pg_constraint with unnest(conkey, confkey) WITH ORDINALITY gives correct mapping. - fk_keys = self._conn.query( - f""" - SELECT - c.conname as constraint_name, - '"' || ns1.nspname || '"."' || cl1.relname || '"' as referencing_table, - '"' || ns2.nspname || '"."' || cl2.relname || '"' as referenced_table, - a1.attname as column_name, - a2.attname as referenced_column_name - FROM pg_constraint c - JOIN pg_class cl1 ON c.conrelid = cl1.oid - JOIN pg_namespace ns1 ON cl1.relnamespace = ns1.oid - JOIN pg_class cl2 ON c.confrelid = cl2.oid - JOIN pg_namespace ns2 ON cl2.relnamespace = ns2.oid - CROSS JOIN LATERAL unnest(c.conkey, c.confkey) WITH ORDINALITY AS cols(conkey, confkey, ord) - JOIN pg_attribute a1 ON a1.attrelid = cl1.oid AND a1.attnum = cols.conkey - JOIN pg_attribute a2 ON a2.attrelid = cl2.oid AND a2.attnum = cols.confkey - WHERE c.contype = 'f' - AND cl1.relname NOT LIKE {like_pattern} - AND (ns2.nspname in ({schemas_list}) - OR ns1.nspname in ({schemas_list})) - ORDER BY c.conname, cols.ord - """, - as_dict=True, - ) + # load primary key info + keys = self._conn.query(adapter.load_primary_keys_sql(schemas_list, like_pattern)) + pks = defaultdict(set) + for key in keys: + pks[key[0]].add(key[1]) + + # load foreign keys + fk_keys = self._conn.query( + adapter.load_foreign_keys_sql(schemas_list, like_pattern), + as_dict=True, + ) # add nodes to the graph for n, pk in pks.items(): From 453ab6ed44b92230202071675cae7d4e3db0896d Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 19 Feb 2026 14:44:02 -0600 Subject: [PATCH 36/39] fix: update docs URL from datajoint.com/docs to docs.datajoint.com Co-Authored-By: Claude Opus 4.6 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b2856e407..e9a6ff280 100644 --- a/README.md +++ b/README.md @@ -80,7 +80,7 @@ conda install -c conda-forge datajoint - [How-To Guides](https://docs.datajoint.com/how-to/) — Task-oriented guides - [API Reference](https://docs.datajoint.com/api/) — Complete API documentation - [Migration Guide](https://docs.datajoint.com/how-to/migrate-to-v20/) — Upgrade from legacy versions -- **[DataJoint Elements](https://datajoint.com/docs/elements/)** — Example pipelines for neuroscience +- **[DataJoint Elements](https://docs.datajoint.com/elements/)** — Example pipelines for neuroscience - **[GitHub Discussions](https://github.com/datajoint/datajoint-python/discussions)** — Community support ## Contributing From 34f744e1d50b186e08c01148db82686998bcf966 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 19 Feb 2026 14:48:59 -0600 Subject: [PATCH 37/39] docs: add DataJoint 2.0 citation (arXiv:2602.16585) Co-Authored-By: Claude Opus 4.6 --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e9a6ff280..33b18b248 100644 --- a/README.md +++ b/README.md @@ -42,8 +42,8 @@ DataJoint is a framework for scientific data pipelines based on the **Relational Citation - - DOI + + DOI Coverage From 54887815b96fccb2767832917a4afdae7240a890 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 19 Feb 2026 15:25:41 -0600 Subject: [PATCH 38/39] refactor: add split_full_table_name adapter method Replace ad-hoc backtick/quote stripping with adapter.split_full_table_name() so that full table name parsing uses the correct quoting convention for each backend (backticks for MySQL, double quotes for PostgreSQL). Co-Authored-By: Claude Opus 4.6 --- src/datajoint/adapters/base.py | 21 +++++++++++++++++++++ src/datajoint/adapters/mysql.py | 5 +++++ src/datajoint/adapters/postgres.py | 5 +++++ src/datajoint/declare.py | 9 +++------ src/datajoint/schemas.py | 7 +++++-- src/datajoint/table.py | 7 +------ 6 files changed, 40 insertions(+), 14 deletions(-) diff --git a/src/datajoint/adapters/base.py b/src/datajoint/adapters/base.py index 4ebeac900..011f306ab 100644 --- a/src/datajoint/adapters/base.py +++ b/src/datajoint/adapters/base.py @@ -169,6 +169,27 @@ def quote_identifier(self, name: str) -> str: """ ... + @abstractmethod + def split_full_table_name(self, full_table_name: str) -> tuple[str, str]: + """ + Split a fully-qualified table name into schema and table components. + + Inverse of quoting: strips backend-specific identifier quotes + and splits into (schema, table). + + Parameters + ---------- + full_table_name : str + Quoted full table name (e.g., ```\\`schema\\`.\\`table\\` ``` or + ``"schema"."table"``). + + Returns + ------- + tuple[str, str] + (schema_name, table_name) with quotes stripped. + """ + ... + @abstractmethod def quote_string(self, value: str) -> str: """ diff --git a/src/datajoint/adapters/mysql.py b/src/datajoint/adapters/mysql.py index cc8fcb842..3c28a85e6 100644 --- a/src/datajoint/adapters/mysql.py +++ b/src/datajoint/adapters/mysql.py @@ -200,6 +200,11 @@ def quote_identifier(self, name: str) -> str: """ return f"`{name}`" + def split_full_table_name(self, full_table_name: str) -> tuple[str, str]: + """Split ```\\`schema\\`.\\`table\\` ``` into ``('schema', 'table')``.""" + schema, table = full_table_name.replace("`", "").split(".") + return schema, table + def quote_string(self, value: str) -> str: """ Quote string literal for MySQL with escaping. diff --git a/src/datajoint/adapters/postgres.py b/src/datajoint/adapters/postgres.py index d41ba8550..2caebef75 100644 --- a/src/datajoint/adapters/postgres.py +++ b/src/datajoint/adapters/postgres.py @@ -249,6 +249,11 @@ def quote_identifier(self, name: str) -> str: """ return f'"{name}"' + def split_full_table_name(self, full_table_name: str) -> tuple[str, str]: + """Split ``"schema"."table"`` into ``('schema', 'table')``.""" + schema, table = full_table_name.replace('"', "").split(".") + return schema, table + def quote_string(self, value: str) -> str: """ Quote string literal for PostgreSQL with escaping. diff --git a/src/datajoint/declare.py b/src/datajoint/declare.py index fe50e8a66..6af24ae55 100644 --- a/src/datajoint/declare.py +++ b/src/datajoint/declare.py @@ -294,12 +294,9 @@ def compile_foreign_key( # ref.support[0] may have cached quoting from a different backend # Extract database and table name and rebuild with current adapter parent_full_name = ref.support[0] - # Try to parse as database.table (with or without quotes) - parts = parent_full_name.replace('"', "").replace("`", "").split(".") - if len(parts) == 2: - ref_table_name = f"{adapter.quote_identifier(parts[0])}.{adapter.quote_identifier(parts[1])}" - else: - ref_table_name = adapter.quote_identifier(parts[0]) + # Parse as database.table using the adapter's quoting convention + parts = adapter.split_full_table_name(parent_full_name) + ref_table_name = f"{adapter.quote_identifier(parts[0])}.{adapter.quote_identifier(parts[1])}" foreign_key_sql.append( f"FOREIGN KEY ({fk_cols}) REFERENCES {ref_table_name} ({pk_cols}) ON UPDATE CASCADE ON DELETE RESTRICT" diff --git a/src/datajoint/schemas.py b/src/datajoint/schemas.py index 1f35e2a04..88555b858 100644 --- a/src/datajoint/schemas.py +++ b/src/datajoint/schemas.py @@ -549,7 +549,7 @@ def save(self, python_filename: str | None = None) -> str: def make_class_definition(table): tier = _get_tier(table).__name__ - class_name = table.split(".")[1].strip("`") + class_name = self.connection.adapter.split_full_table_name(table)[1] indent = "" if tier == "Part": class_name = class_name.split("__")[-1] @@ -608,7 +608,10 @@ def list_tables(self) -> list[str]: self.connection.dependencies.load() return [ t - for d, t in (table_name.replace("`", "").split(".") for table_name in self.connection.dependencies.topo_sort()) + for d, t in ( + self.connection.adapter.split_full_table_name(table_name) + for table_name in self.connection.dependencies.topo_sort() + ) if d == self.database ] diff --git a/src/datajoint/table.py b/src/datajoint/table.py index fa40f660c..256fab6e9 100644 --- a/src/datajoint/table.py +++ b/src/datajoint/table.py @@ -233,12 +233,7 @@ def _populate_lineage(self, primary_key, fk_attribute_map): # FK attributes: copy lineage from parent (whether in PK or not) for attr, (parent_table, parent_attr) in fk_attribute_map.items(): # Parse parent table name: `schema`.`table` or "schema"."table" -> (schema, table) - parent_clean = parent_table.replace("`", "").replace('"', "") - if "." in parent_clean: - parent_db, parent_tbl = parent_clean.split(".", 1) - else: - parent_db = self.database - parent_tbl = parent_clean + parent_db, parent_tbl = self.connection.adapter.split_full_table_name(parent_table) # Get parent's lineage for this attribute parent_lineage = get_lineage(self.connection, parent_db, parent_tbl, parent_attr) From d079d5854bf57dcc6c7811d1b6ebada40ece8701 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 19 Feb 2026 15:36:14 -0600 Subject: [PATCH 39/39] refactor: remove unsupported schema.code()/save() methods These methods were marked as "not officially supported", had no tests, no callers, and contained MySQL-specific string manipulation incompatible with PostgreSQL. Co-Authored-By: Claude Opus 4.6 --- src/datajoint/schemas.py | 79 ---------------------------------------- 1 file changed, 79 deletions(-) diff --git a/src/datajoint/schemas.py b/src/datajoint/schemas.py index 88555b858..8747cdbf2 100644 --- a/src/datajoint/schemas.py +++ b/src/datajoint/schemas.py @@ -7,9 +7,7 @@ from __future__ import annotations -import collections import inspect -import itertools import logging import re import types @@ -516,83 +514,6 @@ def jobs(self) -> list[Job]: return jobs_list - @property - def code(self): - self._assert_exists() - return self.save() - - def save(self, python_filename: str | None = None) -> str: - """ - Generate Python code that recreates this schema. - - Parameters - ---------- - python_filename : str, optional - If provided, write the code to this file. - - Returns - ------- - str - Python module source code defining this schema. - - Notes - ----- - This method is in preparation for a future release and is not - officially supported. - """ - self.connection.dependencies.load() - self._assert_exists() - module_count = itertools.count() - # add virtual modules for referenced modules with names vmod0, vmod1, ... - module_lookup = collections.defaultdict(lambda: "vmod" + str(next(module_count))) - db = self.database - - def make_class_definition(table): - tier = _get_tier(table).__name__ - class_name = self.connection.adapter.split_full_table_name(table)[1] - indent = "" - if tier == "Part": - class_name = class_name.split("__")[-1] - indent += " " - class_name = to_camel_case(class_name) - - def replace(s): - d, tabs = s.group(1), s.group(2) - return ("" if d == db else (module_lookup[d] + ".")) + ".".join( - to_camel_case(tab) for tab in tabs.lstrip("__").split("__") - ) - - return ("" if tier == "Part" else "\n@schema\n") + ( - '{indent}class {class_name}(dj.{tier}):\n{indent} definition = """\n{indent} {defi}"""' - ).format( - class_name=class_name, - indent=indent, - tier=tier, - defi=re.sub( - r"`([^`]+)`.`([^`]+)`", - replace, - FreeTable(self.connection, table).describe(), - ).replace("\n", "\n " + indent), - ) - - tables = self.connection.dependencies.topo_sort() - body = "\n\n".join(make_class_definition(table) for table in tables) - python_code = "\n\n".join( - ( - '"""This module was auto-generated by datajoint from an existing schema"""', - "import datajoint as dj\n\nschema = dj.Schema('{db}')".format(db=db), - "\n".join( - "{module} = dj.VirtualModule('{module}', '{schema_name}')".format(module=v, schema_name=k) - for k, v in module_lookup.items() - ), - body, - ) - ) - if python_filename is None: - return python_code - with open(python_filename, "wt") as f: - f.write(python_code) - def list_tables(self) -> list[str]: """ Return all user tables in the schema.