From 756c0be0b5f8fa36cbda0ae29c8c908f7cdc6f3d Mon Sep 17 00:00:00 2001 From: Oliver Date: Wed, 18 Mar 2026 00:01:17 +1100 Subject: [PATCH] [db] Backend setting improvements (#11500) * Refactor database engine options - Move to setting/db_backend.py - Cleanup settings.py * Fix documentation for postgres settings * docs updates * Add transaction_mode options for sqlite * Update CHANGELOG with breaking changes * Remove hard-coded database config * Raise error on invalid backend * Fix typos * Fix broken redis link * Limit to single worker thread for sqlite * Update docs * Add verbosity switch to dev.test task * Add test timeout - kill hanging tests after 120s * Set WAL mode for sqlite * Use IMMEDIATE mode for background worker thread * Use config to set WAL rather than custom hook * Tweak pyproject settings * Tweak code * Increase timeouts * Reset requirements to master --- CHANGELOG.md | 1 + docs/docs/start/config.md | 13 +- docs/docs/start/installer.md | 3 + docs/docs/start/processes.md | 19 ++- pyproject.toml | 12 +- src/backend/InvenTree/.gitignore | 1 + src/backend/InvenTree/InvenTree/api.py | 2 +- src/backend/InvenTree/InvenTree/config.py | 2 +- src/backend/InvenTree/InvenTree/exceptions.py | 2 +- .../InvenTree/InvenTree/setting/db_backend.py | 152 ++++++++++++++++++ src/backend/InvenTree/InvenTree/settings.py | 110 +------------ src/backend/InvenTree/build/test_api.py | 5 +- tasks.py | 22 +-- 13 files changed, 220 insertions(+), 124 deletions(-) create mode 100644 src/backend/InvenTree/InvenTree/setting/db_backend.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c200417ff..9d930e4a09 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Breaking Changes - [#11303](https://github.com/inventree/InvenTree/pull/11303) removes the `default_supplier` field from the `Part` model. Instead, the `SupplierPart` model now has a `primary` field which is used to indicate which supplier is the default for a given part. Any external client applications which made use of the old `default_supplier` field will need to be updated. +- [#11500](https://github.com/inventree/InvenTree/pull/11500) fixes a spelling mistake in the database configuration values, which may affect some users running the PostgreSQL database backend. The `tcp_keepalives_internal` option has been renamed to `tcp_keepalives_interval` to reflect the correct PostgreSQL configuration option name. If you are using PostgreSQL, and have set a custom value for the `tcp_keepalives_internal` option, you will need to update this to `tcp_keepalives_interval` in your configuration (either via environment variable or config file). ### Added diff --git a/docs/docs/start/config.md b/docs/docs/start/config.md index 13f8976c5a..7c9775f5f2 100644 --- a/docs/docs/start/config.md +++ b/docs/docs/start/config.md @@ -275,7 +275,7 @@ If running with a PostgreSQL database backend, the following additional options | INVENTREE_DB_TIMEOUT | database.timeout | Database connection timeout (s) | 2 | | INVENTREE_DB_TCP_KEEPALIVES | database.tcp_keepalives | TCP keepalive | 1 | | INVENTREE_DB_TCP_KEEPALIVES_IDLE | database.tcp_keepalives_idle | Idle TCP keepalive | 1 | -| INVENTREE_DB_TCP_KEEPALIVES_INTERNAL | database.tcp_keepalives_internal | Internal TCP keepalive | 1| +| INVENTREE_DB_TCP_KEEPALIVES_INTERVAL | database.tcp_keepalives_interval | TCP keepalive interval | 1| | INVENTREE_DB_TCP_KEEPALIVES_COUNT | database.tcp_keepalives_count | TCP keepalive count | 5 | | INVENTREE_DB_ISOLATION_SERIALIZABLE | database.serializable | Database isolation level configured to "serializable" | False | @@ -287,6 +287,17 @@ If running with a MySQL database backend, the following additional options are a | --- | --- | --- | --- | | INVENTREE_DB_ISOLATION_SERIALIZABLE | database.serializable | Database isolation level configured to "serializable" | False | +### SQLite Settings + +!!! warning "SQLite Performance" + SQLite is not recommended for production use, and should only be used for testing or development purposes. If you are using SQLite in production, you may want to adjust the following settings to improve performance. + +If running with a SQLite database backend, the following additional options are available: + +| Environment Variable | Configuration File | Description | Default | +| --- | --- | --- | --- | +| INVENTREE_DB_TIMEOUT | database.timeout | Database connection timeout (s) | 10 | + ## Caching InvenTree can be configured to use [redis](https://redis.io) as a global cache backend. diff --git a/docs/docs/start/installer.md b/docs/docs/start/installer.md index 562b2bd936..997a0fefc8 100644 --- a/docs/docs/start/installer.md +++ b/docs/docs/start/installer.md @@ -118,6 +118,9 @@ Extra python packages can be installed by setting the environment variable `SETU The used database backend can be configured with environment variables (before the first setup) or in the config file after the installation. Check the [configuration section](./config.md#database-options) for more information. +!!! warning "SQLite Performance" + SQLite is not recommended for production use, as it is not designed for high concurrency. + ## Moving Data To change the data storage location, link the new location to `/opt/inventree/data`. A rough outline of steps to achieve this could be: diff --git a/docs/docs/start/processes.md b/docs/docs/start/processes.md index 06bea14454..643668aee3 100644 --- a/docs/docs/start/processes.md +++ b/docs/docs/start/processes.md @@ -16,7 +16,22 @@ InvenTree supports a [number of database backends]({% include "django.html" %}/r Refer to the [database configuration guide](./config.md#database-options) for more information on selecting and configuring the database backend. -In running InvenTree via [docker compose](./docker_install.md), the database process is managed by the `inventree-db` service which provides a [Postgres docker container](https://hub.docker.com/_/postgres). +If running InvenTree via [docker compose](./docker_install.md), the database process is managed by the `inventree-db` service which provides a [Postgres docker container](https://hub.docker.com/_/postgres). + +!!! tip "Postgres Recommended" + We recommend using Postgres as the database backend for InvenTree, as it is a robust and scalable database which is well-suited to production use. + +#### SQLite Limitations + +!!! warning "SQLite Performance" + SQLite is not recommended for production use, as it is not designed for high concurrency. + +While SQLite is supported, it is strongly *not* recommended for a production installation, especially where there may be multiple users accessing the system concurrently. SQLite is designed for low-concurrency applications, and can experience performance issues when multiple users are accessing the database at the same time. + +In addition to concurrency issues, there are other structural limitations which exist in SQLite that can prevent operations on large querysets. + +If you are using SQLite, you should be aware of these limitations. It is important to ensure that the database file is stored on a fast storage medium (such as an SSD), and that the database options are configured correctly to minimize locking issues. Refer to the [database configuration guide](./config.md#database-options) for more information on configuring SQLite options. + ### Web Server @@ -112,6 +127,8 @@ If the background worker process is not running, InvenTree will not be able to p If the [cache server](#cache-server) is not running, the background worker will be limited to running a single threaded worker. This is because the background worker uses the cache server to manage task locking, and without a global cache server to communicate between processes, concurrency issues can occur. +Additionally, if you are running SQLite as the database backend, the background worker will be limited to a single thread, due to database locking issues which can occur with SQLite when multiple threads are accessing the database concurrently. + ### Cache Server The InvenTree cache server is used to store temporary data which is shared between the InvenTree web server and the background worker processes. The cache server is also used to store task information, and to manage task locking between the background worker processes. diff --git a/pyproject.toml b/pyproject.toml index b8fe778766..3d3fa6a7eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -108,12 +108,13 @@ root = ["src/backend/InvenTree"] unresolved-reference="ignore" # 21 # see https://github.com/astral-sh/ty/issues/220 unresolved-attribute="ignore" # 505 # need Plugin Mixin typing call-non-callable="ignore" # 8 ## -invalid-return-type="ignore" # 22 ## invalid-argument-type="ignore" # 49 -possibly-unbound-attribute="ignore" # 25 # https://github.com/astral-sh/ty/issues/164 -unknown-argument="ignore" # 3 # need to wait for betterdjango field stubs -invalid-assignment="ignore" # 17 # need to wait for betterdjango field stubs -no-matching-overload="ignore" # 3 # need to wait for betterdjango field stubs +invalid-assignment="ignore" # 17 # need to wait for better django field stubs +invalid-method-override="ignore" +invalid-return-type="ignore" # 22 ## +possibly-missing-attribute="ignore" # 25 # https://github.com/astral-sh/ty/issues/164 +unknown-argument="ignore" # 3 # need to wait for better django field stubs +no-matching-overload="ignore" # 3 # need to wait for better django field stubs [tool.coverage.run] source = ["src/backend/InvenTree", "InvenTree"] @@ -139,3 +140,4 @@ django_find_project = false pythonpath = ["src/backend/InvenTree"] DJANGO_SETTINGS_MODULE = "InvenTree.settings" python_files = ["test*.py",] +timeout = "120" diff --git a/src/backend/InvenTree/.gitignore b/src/backend/InvenTree/.gitignore index ec668443c9..a91adca39b 100644 --- a/src/backend/InvenTree/.gitignore +++ b/src/backend/InvenTree/.gitignore @@ -1,5 +1,6 @@ # Files generated during unit testing _testfolder/ +_tests_report*.txt # Playwright files for CI InvenTree/static/img/playwright*.png diff --git a/src/backend/InvenTree/InvenTree/api.py b/src/backend/InvenTree/InvenTree/api.py index 249091c9b0..fc2b7711d9 100644 --- a/src/backend/InvenTree/InvenTree/api.py +++ b/src/backend/InvenTree/InvenTree/api.py @@ -507,7 +507,7 @@ class BulkCreateMixin: if unique_create_fields := getattr(self, 'unique_create_fields', None): existing = collections.defaultdict(list) for idx, item in enumerate(data): - key = tuple(item[v] for v in unique_create_fields) + key = tuple(item[v] for v in list(unique_create_fields)) existing[key].append(idx) unique_errors = [[] for _ in range(len(data))] diff --git a/src/backend/InvenTree/InvenTree/config.py b/src/backend/InvenTree/InvenTree/config.py index 7392543441..d71d80e567 100644 --- a/src/backend/InvenTree/InvenTree/config.py +++ b/src/backend/InvenTree/InvenTree/config.py @@ -191,7 +191,7 @@ def load_config_data(set_cache: bool = False) -> map | None: if CONFIG_DATA is not None and not set_cache: return CONFIG_DATA - import yaml + import yaml.parser cfg_file = get_config_file() diff --git a/src/backend/InvenTree/InvenTree/exceptions.py b/src/backend/InvenTree/InvenTree/exceptions.py index dbb623eb5b..161860339d 100644 --- a/src/backend/InvenTree/InvenTree/exceptions.py +++ b/src/backend/InvenTree/InvenTree/exceptions.py @@ -66,7 +66,7 @@ def log_error( data = error_data else: try: - formatted_exception = traceback.format_exception(kind, info, data) # type: ignore[no-matching-overload] + formatted_exception = traceback.format_exception(kind, info, data) data = '\n'.join(formatted_exception) except AttributeError: data = 'No traceback information available' diff --git a/src/backend/InvenTree/InvenTree/setting/db_backend.py b/src/backend/InvenTree/InvenTree/setting/db_backend.py new file mode 100644 index 0000000000..cffdb93673 --- /dev/null +++ b/src/backend/InvenTree/InvenTree/setting/db_backend.py @@ -0,0 +1,152 @@ +"""Configuration settings specific to a particular database backend.""" + +import structlog + +from InvenTree.config import get_boolean_setting, get_setting + +logger = structlog.get_logger('inventree') + + +def set_db_options(engine: str, db_options: dict): + """Update database options based on the specified database backend. + + Arguments: + engine: The database engine (e.g. 'sqlite3', 'postgresql', etc.) + db_options: The database options dictionary to update + """ + logger.debug('Setting database options: %s', engine) + + if 'postgres' in engine: + set_postgres_options(db_options) + elif 'mysql' in engine: + set_mysql_options(db_options) + elif 'sqlite' in engine: + set_sqlite_options(db_options) + else: + raise ValueError(f'Unknown database engine: {engine}') + + +def set_postgres_options(db_options: dict): + """Set database options specific to postgres backend.""" + from django.db.backends.postgresql.psycopg_any import ( # type: ignore[unresolved-import] + IsolationLevel, + ) + + # Connection timeout + if 'connect_timeout' not in db_options: + # The DB server is in the same data center, it should not take very + # long to connect to the database server + # # seconds, 2 is minimum allowed by libpq + db_options['connect_timeout'] = int( + get_setting('INVENTREE_DB_TIMEOUT', 'database.timeout', 2) + ) + + # Setup TCP keepalive + # DB server is in the same DC, it should not become unresponsive for + # very long. With the defaults below we wait 5 seconds for the network + # issue to resolve itself. If that doesn't happen, whatever happened + # is probably fatal and no amount of waiting is going to fix it. + # # 0 - TCP Keepalives disabled; 1 - enabled + if 'keepalives' not in db_options: + db_options['keepalives'] = int( + get_setting('INVENTREE_DB_TCP_KEEPALIVES', 'database.tcp_keepalives', 1) + ) + + # Seconds after connection is idle to send keep alive + if 'keepalives_idle' not in db_options: + db_options['keepalives_idle'] = int( + get_setting( + 'INVENTREE_DB_TCP_KEEPALIVES_IDLE', 'database.tcp_keepalives_idle', 1 + ) + ) + + # Seconds after missing ACK to send another keep alive + if 'keepalives_interval' not in db_options: + db_options['keepalives_interval'] = int( + get_setting( + 'INVENTREE_DB_TCP_KEEPALIVES_INTERVAL', + 'database.tcp_keepalives_interval', + '1', + ) + ) + + # Number of missing ACKs before we close the connection + if 'keepalives_count' not in db_options: + db_options['keepalives_count'] = int( + get_setting( + 'INVENTREE_DB_TCP_KEEPALIVES_COUNT', + 'database.tcp_keepalives_count', + '5', + ) + ) + + # # Milliseconds for how long pending data should remain unacked + # by the remote server + # TODO: Supported starting in PSQL 11 + # "tcp_user_timeout": int(os.getenv("PGTCP_USER_TIMEOUT", "1000"), + + # Postgres's default isolation level is Read Committed which is + # normally fine, but most developers think the database server is + # actually going to do Serializable type checks on the queries to + # protect against simultaneous changes. + # https://www.postgresql.org/docs/devel/transaction-iso.html + # https://docs.djangoproject.com/en/3.2/ref/databases/#isolation-level + if 'isolation_level' not in db_options: + serializable = get_boolean_setting( + 'INVENTREE_DB_ISOLATION_SERIALIZABLE', 'database.serializable', False + ) + db_options['isolation_level'] = ( + IsolationLevel.SERIALIZABLE + if serializable + else IsolationLevel.READ_COMMITTED + ) + + +def set_mysql_options(db_options: dict): + """Set database options specific to mysql backend.""" + # TODO TCP time outs and keepalives + + # MariaDB's default isolation level is Repeatable Read which is + # normally fine, but most developers think the database server is + # actually going to Serializable type checks on the queries to + # protect against simultaneous changes. + # https://mariadb.com/kb/en/mariadb-transactions-and-isolation-levels-for-sql-server-users/#changing-the-isolation-level + # https://docs.djangoproject.com/en/3.2/ref/databases/#mysql-isolation-level + if 'isolation_level' not in db_options: + serializable = get_boolean_setting( + 'INVENTREE_DB_ISOLATION_SERIALIZABLE', 'database.serializable', False + ) + db_options['isolation_level'] = ( + 'serializable' if serializable else 'read committed' + ) + + +def set_sqlite_options(db_options: dict): + """Set database options specific to sqlite backend. + + References: + - https://docs.djangoproject.com/en/5.0/ref/databases/#sqlite-notes + - https://docs.djangoproject.com/en/6.0/ref/databases/#database-is-locked-errors + """ + import InvenTree.ready + + # Specify minimum timeout behavior for SQLite connections + if 'timeout' not in db_options: + db_options['timeout'] = int( + get_setting('INVENTREE_DB_TIMEOUT', 'database.timeout', 10) + ) + + # Specify the transaction mode for the database + # For the backend worker thread, IMMEDIATE mode is used, + # it has been determined to provide better protection against database locks in the worker thread + db_options['transaction_mode'] = ( + 'IMMEDIATE' if InvenTree.ready.isInWorkerThread() else 'DEFERRED' + ) + + # SQLite's default isolation level is Serializable due to SQLite's + # single writer implementation. Presumably as a result of this, it is + # not possible to implement any lower isolation levels in SQLite. + # https://www.sqlite.org/isolation.html + + # Specify that we want to use Write-Ahead Logging (WAL) mode for SQLite databases, as this allows for better concurrency and performance + db_options['init_command'] = 'PRAGMA journal_mode=WAL;' diff --git a/src/backend/InvenTree/InvenTree/settings.py b/src/backend/InvenTree/InvenTree/settings.py index c5a86a9857..b8fa0189dc 100644 --- a/src/backend/InvenTree/InvenTree/settings.py +++ b/src/backend/InvenTree/InvenTree/settings.py @@ -33,7 +33,7 @@ from InvenTree.version import checkMinPythonVersion, inventreeCommitHash from users.oauth2_scopes import oauth2_scopes from . import config -from .setting import locales, markdown, spectacular, storages +from .setting import db_backend, locales, markdown, spectacular, storages try: import django_stubs_ext @@ -720,108 +720,8 @@ db_options = db_config.get('OPTIONS', db_config.get('options')) if db_options is None: db_options = {} -# Specific options for postgres backend -if 'postgres' in DB_ENGINE: # pragma: no cover - from django.db.backends.postgresql.psycopg_any import ( # type: ignore[unresolved-import] - IsolationLevel, - ) - - # Connection timeout - if 'connect_timeout' not in db_options: - # The DB server is in the same data center, it should not take very - # long to connect to the database server - # # seconds, 2 is minimum allowed by libpq - db_options['connect_timeout'] = int( - get_setting('INVENTREE_DB_TIMEOUT', 'database.timeout', 2) - ) - - # Setup TCP keepalive - # DB server is in the same DC, it should not become unresponsive for - # very long. With the defaults below we wait 5 seconds for the network - # issue to resolve itself. It it that doesn't happen whatever happened - # is probably fatal and no amount of waiting is going to fix it. - # # 0 - TCP Keepalives disabled; 1 - enabled - if 'keepalives' not in db_options: - db_options['keepalives'] = int( - get_setting('INVENTREE_DB_TCP_KEEPALIVES', 'database.tcp_keepalives', 1) - ) - - # Seconds after connection is idle to send keep alive - if 'keepalives_idle' not in db_options: - db_options['keepalives_idle'] = int( - get_setting( - 'INVENTREE_DB_TCP_KEEPALIVES_IDLE', 'database.tcp_keepalives_idle', 1 - ) - ) - - # Seconds after missing ACK to send another keep alive - if 'keepalives_interval' not in db_options: - db_options['keepalives_interval'] = int( - get_setting( - 'INVENTREE_DB_TCP_KEEPALIVES_INTERVAL', - 'database.tcp_keepalives_internal', - '1', - ) - ) - - # Number of missing ACKs before we close the connection - if 'keepalives_count' not in db_options: - db_options['keepalives_count'] = int( - get_setting( - 'INVENTREE_DB_TCP_KEEPALIVES_COUNT', - 'database.tcp_keepalives_count', - '5', - ) - ) - - # # Milliseconds for how long pending data should remain unacked - # by the remote server - # TODO: Supported starting in PSQL 11 - # "tcp_user_timeout": int(os.getenv("PGTCP_USER_TIMEOUT", "1000"), - - # Postgres's default isolation level is Read Committed which is - # normally fine, but most developers think the database server is - # actually going to do Serializable type checks on the queries to - # protect against simultaneous changes. - # https://www.postgresql.org/docs/devel/transaction-iso.html - # https://docs.djangoproject.com/en/3.2/ref/databases/#isolation-level - if 'isolation_level' not in db_options: - serializable = get_boolean_setting( - 'INVENTREE_DB_ISOLATION_SERIALIZABLE', 'database.serializable', False - ) - db_options['isolation_level'] = ( - IsolationLevel.SERIALIZABLE - if serializable - else IsolationLevel.READ_COMMITTED - ) - -# Specific options for MySql / MariaDB backend -elif 'mysql' in DB_ENGINE: # pragma: no cover - # TODO TCP time outs and keepalives - - # MariaDB's default isolation level is Repeatable Read which is - # normally fine, but most developers think the database server is - # actually going to Serializable type checks on the queries to - # protect against siumltaneous changes. - # https://mariadb.com/kb/en/mariadb-transactions-and-isolation-levels-for-sql-server-users/#changing-the-isolation-level - # https://docs.djangoproject.com/en/3.2/ref/databases/#mysql-isolation-level - if 'isolation_level' not in db_options: - serializable = get_boolean_setting( - 'INVENTREE_DB_ISOLATION_SERIALIZABLE', 'database.serializable', False - ) - db_options['isolation_level'] = ( - 'serializable' if serializable else 'read committed' - ) - -# Specific options for sqlite backend -elif 'sqlite' in DB_ENGINE: - # TODO: Verify timeouts are not an issue because no network is involved for SQLite - - # SQLite's default isolation level is Serializable due to SQLite's - # single writer implementation. Presumably as a result of this, it is - # not possible to implement any lower isolation levels in SQLite. - # https://www.sqlite.org/isolation.html - pass +# Set database-specific options +db_backend.set_db_options(DB_ENGINE, db_options) # Provide OPTIONS dict back to the database configuration dict db_config['OPTIONS'] = db_options @@ -943,6 +843,10 @@ BACKGROUND_WORKER_COUNT = ( else 1 ) +# If running with SQLite, limit background worker threads to 1 to prevent database locking issues +if 'sqlite' in DB_ENGINE: + BACKGROUND_WORKER_COUNT = 1 + # django-q background worker configuration Q_CLUSTER = { 'name': 'InvenTree', diff --git a/src/backend/InvenTree/build/test_api.py b/src/backend/InvenTree/build/test_api.py index da9682fe00..26fdaf201e 100644 --- a/src/backend/InvenTree/build/test_api.py +++ b/src/backend/InvenTree/build/test_api.py @@ -1564,12 +1564,13 @@ class BuildLineTests(BuildAPITest): # Filter by 'available' status # Note: The max_query_time is bumped up here, as postgresql backend has some strange issues (only during testing) - response = self.get(url, data={'available': True}, max_query_time=15) + # TODO: This needs to be addressed in the future, as 25 seconds is an unacceptably long time for a query to take in testing + response = self.get(url, data={'available': True}, max_query_time=25) n_t = len(response.data) self.assertGreater(n_t, 0) # Note: The max_query_time is bumped up here, as postgresql backend has some strange issues (only during testing) - response = self.get(url, data={'available': False}, max_query_time=15) + response = self.get(url, data={'available': False}, max_query_time=25) n_f = len(response.data) self.assertGreater(n_f, 0) diff --git a/tasks.py b/tasks.py index a53296dfb2..758c8b8273 100644 --- a/tasks.py +++ b/tasks.py @@ -1350,19 +1350,21 @@ def test_translations(c): 'translations': 'Compile translations before running tests', 'keepdb': 'Keep the test database after running tests (default = False)', 'pytest': 'Use pytest to run tests', + 'verbosity': 'Verbosity level for test output (default = 1)', } ) def test( c, - check=False, - disable_pty=False, - runtest='', - migrations=False, - report=False, - coverage=False, - translations=False, - keepdb=False, - pytest=False, + check: bool = False, + disable_pty: bool = False, + runtest: str = '', + migrations: bool = False, + report: bool = False, + coverage: bool = False, + translations: bool = False, + keepdb: bool = False, + pytest: bool = False, + verbosity: int = 1, ): """Run unit-tests for InvenTree codebase. @@ -1410,6 +1412,8 @@ def test( cmd += ' --exclude-tag performance_test' + cmd += f' --verbosity {verbosity}' + if coverage: # Run tests within coverage environment, and generate report run(c, f'coverage run {manage_py_path()} {cmd}')