diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..ccdf79d0 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,26 @@ +# Coverage.py configuration for the unit test suite. +# Measures only first-party code: third-party venvs, the native standalone +# builds, screenshots and the test tree itself are excluded so the percentage +# reflects the application, not its dependencies. +[run] +branch = True +source = . +omit = + test/* + .venv/* + .venv-windows/* + native-build/* + scripts/* + screenshot/* + dist/* + query/* + */__pycache__/* + +[report] +show_missing = True +skip_covered = False +precision = 1 +exclude_also = + if __name__ == .__main__.: + raise NotImplementedError + if TYPE_CHECKING: diff --git a/.github/workflows/build-arm-intel.yml b/.github/workflows/build-arm-intel.yml index 87c69770..2ef9a67f 100644 --- a/.github/workflows/build-arm-intel.yml +++ b/.github/workflows/build-arm-intel.yml @@ -45,6 +45,7 @@ jobs: (github.event.pull_request.draft == false && github.event.pull_request.head.repo.full_name == github.repository) runs-on: ubuntu-latest + permissions: {} outputs: image: ${{ steps.repo_name.outputs.image }} tags: ${{ steps.tags.outputs.tags }} diff --git a/.github/workflows/lint-line-endings.yml b/.github/workflows/lint-line-endings.yml index 8c8ff61b..797a9e01 100644 --- a/.github/workflows/lint-line-endings.yml +++ b/.github/workflows/lint-line-endings.yml @@ -5,6 +5,9 @@ on: pull_request: branches: ["**"] +permissions: + contents: read + jobs: check-crlf: runs-on: ubuntu-latest diff --git a/.github/workflows/lint-mypy.yml b/.github/workflows/lint-mypy.yml new file mode 100644 index 00000000..0623eccc --- /dev/null +++ b/.github/workflows/lint-mypy.yml @@ -0,0 +1,34 @@ +name: Type Check (mypy) + +# Static type-checking scoped (via mypy.ini) to small, self-contained core +# modules so the gate passes today and can be widened as modules gain hints. +# Independent of the test/build workflows. + +on: + pull_request: + branches: [ main ] + push: + branches: [ main ] + +permissions: + contents: read + +jobs: + mypy: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install mypy + run: | + python -m pip install --upgrade pip + pip install mypy + + - name: Run mypy (scoped to core modules via mypy.ini) + run: mypy --config-file mypy.ini diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 09449310..88faaa9a 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -144,3 +144,125 @@ jobs: requirements/cpu.txt requirements/gpu.txt ignore-vulns: ${{ steps.cve_allowlist.outputs.ids }} + + # Image / supply-chain layer: Trivy scans the container DEFINITION (Dockerfile + # + deployment manifests) for misconfigurations and the working tree for leaked + # secrets -- coverage pip-audit (Python CVEs) and Bandit (Python SAST) do not + # provide. The config scan (Dockerfile/manifest misconfig) is report-only + # (exit-code 0); the filesystem scan GATES (exit-code 1) so HIGH/CRITICAL + # dependency CVEs or leaked secrets turn this check red, matching pip-audit's + # fail-on-CVE behaviour. A red check does NOT block merge unless this workflow + # is added to branch protection's required checks. Findings are uploaded as + # SARIF to code scanning so the detailed table lives in the private Security + # tab (write/maintain/admin only) instead of the public Actions run log. A + # full built-image OS scan can later be hooked into the image-publish pipeline. + trivy: + runs-on: ubuntu-latest + # security-events:write lets the SARIF upload publish to the Security tab. + permissions: + contents: read + security-events: write + actions: read + + steps: + - uses: actions/checkout@v4 + + - name: Install Trivy + run: | + curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh \ + | sh -s -- -b /usr/local/bin + + # Each scan emits native JSON once: it feeds both a numeric recap on the run + # summary (0 shown when clean) and, via `trivy convert`, the SARIF uploaded + # to the Security tab -- one scan, no second pass over the vuln DB. + + # Accepted / not-applicable findings are listed in .trivyignore (the Trivy + # counterpart of the pip-audit allowlist above). Config scan is report-only. + - name: Trivy config scan (Dockerfiles + deployment manifests) + if: always() + run: | + trivy config --severity HIGH,CRITICAL --exit-code 0 \ + --ignorefile .trivyignore \ + --format json --output trivy-config.json . + + - name: Convert Trivy config report to SARIF + if: always() + run: trivy convert --format sarif --output trivy-config.sarif trivy-config.json + + - name: Upload Trivy config SARIF to code scanning + if: always() + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: trivy-config.sarif + category: trivy-config + + # Filesystem scan GATES on HIGH/CRITICAL. --file-patterns adds the shipped + # requirements/*.txt to the pip vuln scan (Trivy's pip scanner otherwise only + # reads files named exactly requirements.txt); the -noavx2 legacy stacks and + # the dev-only test/requirements.txt are skipped. The detailed finding table + # reaches the Security tab via the converted SARIF (kept out of the public run + # log); only the counts are public. The step still exits non-zero on findings + # so the check goes red. + - name: Trivy filesystem scan (dependency CVEs + leaked secrets, fails on HIGH/CRITICAL) + run: | + trivy fs --scanners vuln,secret --severity HIGH,CRITICAL --exit-code 1 \ + --ignorefile .trivyignore \ + --file-patterns "pip:requirements/.*\.txt" \ + --skip-files requirements/common-noavx2.txt,requirements/cpu-noavx2.txt,test/requirements.txt \ + --format json --output trivy-fs.json \ + --skip-dirs .venv,.venv-windows,native-build,test/songs,model,dist . + + - name: Convert Trivy filesystem report to SARIF + if: always() + run: trivy convert --format sarif --output trivy-fs.sarif trivy-fs.json + + - name: Upload Trivy filesystem SARIF to code scanning + if: always() + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: trivy-fs.sarif + category: trivy-fs + + # Counts on the run summary (0 when clean), mirroring the Bandit summary. + - name: Trivy job summary + if: always() + run: | + python3 - <<'PY' + import json, os + + def load(path): + try: + with open(path) as fh: + return json.load(fh) + except (FileNotFoundError, json.JSONDecodeError): + return {} + + def tally(report, key): + total = high = crit = 0 + for res in (report.get("Results") or []): + for item in (res.get(key) or []): + total += 1 + sev = (item.get("Severity") or "").upper() + if sev == "HIGH": + high += 1 + elif sev == "CRITICAL": + crit += 1 + return total, high, crit + + cfg = load("trivy-config.json") + fs = load("trivy-fs.json") + rows = [ + ("Config misconfig (Dockerfiles / manifests)",) + tally(cfg, "Misconfigurations"), + ("Dependency CVEs (filesystem)",) + tally(fs, "Vulnerabilities"), + ("Leaked secrets (filesystem)",) + tally(fs, "Secrets"), + ] + total = sum(r[1] for r in rows) + lines = ["## Trivy (HIGH / CRITICAL)", "", + "Total findings: %d" % total, "", + "| Scan | Findings | HIGH | CRITICAL |", "|---|---|---|---|"] + for name, t, h, c in rows: + lines.append("| %s | %d | %d | %d |" % (name, t, h, c)) + lines += ["", "Detail for any non-zero row: Security tab -> Code scanning -> trivy-config / trivy-fs."] + with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as fh: + fh.write("\n".join(lines) + "\n") + PY diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index da9e274a..94c08ac7 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -26,6 +26,31 @@ jobs: python -m pip install --upgrade pip pip install -r test/requirements.txt - - name: Run unit tests + - name: Run unit tests with coverage run: | - pytest test/unit/ -v --tb=short + pytest test/unit/ -v --tb=short \ + --cov --cov-report=term-missing --cov-report=xml + + - name: Coverage summary + if: always() + run: | + python - <<'PY' + import xml.etree.ElementTree as ET, os + try: + rate = float(ET.parse("coverage.xml").getroot().get("line-rate", 0)) * 100 + except Exception: + rate = 0.0 + summary = os.environ.get("GITHUB_STEP_SUMMARY") + line = f"## Unit test coverage: {rate:.1f}% of lines\n" + if summary: + open(summary, "a").write(line) + print(line) + PY + + - name: Upload coverage report + if: always() + uses: actions/upload-artifact@v4 + with: + name: coverage-report + path: coverage.xml + retention-days: 7 diff --git a/.trivyignore b/.trivyignore new file mode 100644 index 00000000..a7e9cfea --- /dev/null +++ b/.trivyignore @@ -0,0 +1,33 @@ +# Trivy ignore list -- accepted / not-applicable HIGH/CRITICAL findings. +# This is the Trivy counterpart of the pip-audit allowlist in +# .github/workflows/security.yml. Format: put the human reason on a '#' line, +# then the bare CVE/GHSA id on the next line. Add an id here ONLY when the +# finding does not impact AudioMuse (unreachable path) or has no fixed release +# yet; PREFER bumping the dependency when a fix exists. + +# transformers X-CLIP checkpoint-conversion deserialization RCE -- unreachable: +# AudioMuse never converts X-CLIP checkpoints nor loads untrusted checkpoints. +# Mirrors PYSEC-2025-217 in the pip-audit allowlist; no fix in the 4.x line. +CVE-2025-14929 +GHSA-8jfx-5878-hv4v +# transformers Trainer torch.load RCE -- unreachable: AudioMuse does not use the +# transformers Trainer class. Mirrors CVE-2026-1839 in the pip-audit allowlist; +# fixed only in 5.0.0rc3 (a pre-release we do not pin to). +CVE-2026-1839 +GHSA-69w3-r845-3855 + +# The onnx model-loading CVEs are intentionally NOT excluded here because they +# are already fixed by onnx==1.21.0 in requirements/common.txt (the version Trivy +# now scans). They are recorded below, commented out, only to document why they +# would be non-applicable should a future onnx pin ever regress (AudioMuse loads +# ONLY its own app-bundled models via onnx.load(); it never loads attacker- +# supplied models or calls save_external_data on untrusted data): +# +# # onnx path traversal via external-data load -- only trusted bundled models +# CVE-2026-27489 +# # onnx untrusted-model-repo warning suppression -- models are vendored, not fetched at runtime +# CVE-2026-28500 +# # onnx malicious-model DoS / info disclosure -- only trusted bundled models +# CVE-2026-34445 +# # onnx TOCTOU in save_external_data -- never called on untrusted data +# GHSA-q56x-g2fj-4rj6 diff --git a/config.py b/config.py index c6084bec..33099759 100644 --- a/config.py +++ b/config.py @@ -110,7 +110,7 @@ def _compute_headers(): } # --- General Constants (Read from Environment Variables where applicable) --- -APP_VERSION = "v2.3.1" +APP_VERSION = "v2.3.2" MAX_DISTANCE = float(os.environ.get("MAX_DISTANCE", "0.5")) MAX_SONGS_PER_CLUSTER = int(os.environ.get("MAX_SONGS_PER_CLUSTER", "0")) MAX_SONGS_PER_ARTIST = int(os.getenv("MAX_SONGS_PER_ARTIST", "3")) # Max songs per artist in similarity results and clustering diff --git a/database.py b/database.py index f3b63ff1..4f8cb608 100644 --- a/database.py +++ b/database.py @@ -1028,7 +1028,7 @@ def init_db(): """, (query, 1.0, rank)) logger.info(f"Inserted {len(default_queries)} default DCLAP search queries") - + db.commit() # Release the advisory lock acquired at the top of init_db(). finally: diff --git a/lyrics/lyrics_transcriber.py b/lyrics/lyrics_transcriber.py index b71983e9..0536abee 100644 --- a/lyrics/lyrics_transcriber.py +++ b/lyrics/lyrics_transcriber.py @@ -339,6 +339,9 @@ def _fetch_from_configured_api( import socket as _socket import urllib.parse as _up _parsed_tpl = _up.urlparse(url_template) + if _parsed_tpl.scheme not in ('http', 'https'): + logger.warning('Lyrics API slot %s blocked: non-http(s) scheme %r', slot, _parsed_tpl.scheme) + return None _host = _parsed_tpl.hostname or '' _host_l = _host.strip().lower() if _host_l in ('localhost', '') or _host_l.endswith('.localhost') or _host_l.endswith('.local'): diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 00000000..94ca0eea --- /dev/null +++ b/mypy.ini @@ -0,0 +1,26 @@ +# Static type-checking, scoped to small, self-contained core modules so the +# check is a real gate that passes today. Widen `files` as more modules gain +# type hints. The bulk of the codebase is untyped, hence the lenient defaults +# (no disallow_untyped_defs) -- this catches concrete type errors, not missing +# annotations. +[mypy] +python_version = 3.11 +ignore_missing_imports = True +# Errors are reported only for the modules listed in `files`; imported-but-not +# scoped modules are followed for type info but their own errors are suppressed. +follow_imports = silent +warn_redundant_casts = True +no_implicit_optional = True +warn_unused_ignores = False +show_error_codes = True +files = + config.py, + sanitization.py, + ssrf_guard.py, + tz_helper.py, + proxy_prefix.py, + error/ + +# Tests are not type-checked. +[mypy-test.*] +ignore_errors = True diff --git a/requirements/common.txt b/requirements/common.txt index 77e08153..0c056a1a 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -16,7 +16,7 @@ ftfy==6.3.1 flasgger==0.9.7.1 sqlglot==30.6.0 google-genai==1.57.0 -mistralai>=1.11.1,<2.0.0 +mistralai==1.12.4 umap-learn==0.5.12 av==13.1.0 psutil==7.2.2 diff --git a/requirements/windows.txt b/requirements/windows.txt index fd5b1dbb..57c074cb 100644 --- a/requirements/windows.txt +++ b/requirements/windows.txt @@ -13,8 +13,9 @@ pgserver==0.1.4 waitress # Windows-specific: pywin32 for process management and named mutex. -# mcp 1.27.0 requires >=310 on Windows; pin to the minimum supported. -pywin32>=310 +# mcp 1.27.0 requires >=310 on Windows; pin to the minimum supported. 310 ships +# both win_amd64 (Intel/AMD) and win_arm64 wheels. +pywin32==310 # Windows tray (notification-area) menu app -- the counterpart to the macOS # menu-bar agent. pystray needs Pillow to load the .ico. diff --git a/static/menu.css b/static/menu.css index cee3f133..64f2b647 100644 --- a/static/menu.css +++ b/static/menu.css @@ -112,6 +112,15 @@ html.sidebar-open .sidebar { background-color: #2563EB; } +/* Keyboard focus ring inside the dark sidebar: white reads better than blue, + and a negative offset keeps it from being clipped by the sidebar's overflow. */ +.sidebar-nav li a:focus-visible, +.sidebar-nav li button:focus-visible, +.submenu li a:focus-visible { + outline: 2px solid #ffffff; + outline-offset: -2px; +} + /* Dark mode toggle button styling */ #dark-mode-toggle, #logout-btn { diff --git a/static/menu.js b/static/menu.js index 1ad6992e..df06fede 100644 --- a/static/menu.js +++ b/static/menu.js @@ -4,28 +4,56 @@ document.addEventListener('DOMContentLoaded', function() { const mainContent = document.querySelector('.main-content'); // The menu is now positioned off-screen by default via CSS. - // This script just handles the open/close classes. + // This script handles the open/close classes plus focus/ARIA state so the + // off-screen sidebar isn't tabbable and screen readers know its state. + + const focusFirstInSidebar = () => { + const first = sidebar.querySelector('a, button'); + if (first) first.focus(); + }; + + // Keep the toggle's aria-expanded in sync and make the off-screen sidebar + // unreachable (inert removes it from tab order + a11y tree; aria-hidden is a + // fallback for engines without inert support). + const setSidebarA11y = (isOpen) => { + if (menuToggle) menuToggle.setAttribute('aria-expanded', isOpen ? 'true' : 'false'); + if (isOpen) { + sidebar.removeAttribute('inert'); + sidebar.removeAttribute('aria-hidden'); + } else { + sidebar.setAttribute('inert', ''); + sidebar.setAttribute('aria-hidden', 'true'); + } + }; // Function to open the menu - const openMenu = () => { + const openMenu = ({ focus = true } = {}) => { sidebar.classList.add('open'); mainContent.classList.add('sidebar-open'); document.documentElement.classList.add('sidebar-open'); localStorage.setItem('menuOpen', 'true'); + setSidebarA11y(true); + if (focus) focusFirstInSidebar(); }; // Function to close the menu - const closeMenu = () => { + const closeMenu = ({ returnFocus = false } = {}) => { sidebar.classList.remove('open'); mainContent.classList.remove('sidebar-open'); document.documentElement.classList.remove('sidebar-open'); localStorage.setItem('menuOpen', 'false'); + setSidebarA11y(false); + if (returnFocus && menuToggle) menuToggle.focus(); }; - // Sync classes if menu was opened by FOUC prevention script + // Sync classes if menu was opened by FOUC prevention script (don't steal + // focus on load); otherwise mark the closed sidebar inert. if (document.documentElement.classList.contains('sidebar-open')) { sidebar.classList.add('open'); mainContent.classList.add('sidebar-open'); + setSidebarA11y(true); + } else { + setSidebarA11y(false); } // Event listener for the menu toggle button @@ -48,12 +76,21 @@ document.addEventListener('DOMContentLoaded', function() { } }); + // Escape closes the menu and returns focus to the toggle + document.addEventListener('keydown', (e) => { + if (e.key === 'Escape' && sidebar.classList.contains('open')) { + closeMenu({ returnFocus: true }); + } + }); + // --- Submenu accordion toggle --- - document.querySelectorAll('.has-submenu > .submenu-toggle').forEach(link => { - link.addEventListener('click', (e) => { + document.querySelectorAll('.has-submenu > .submenu-toggle').forEach(toggle => { + toggle.addEventListener('click', (e) => { e.preventDefault(); e.stopPropagation(); - link.closest('.has-submenu').classList.toggle('open'); + const parent = toggle.closest('.has-submenu'); + const isOpen = parent.classList.toggle('open'); + toggle.setAttribute('aria-expanded', isOpen ? 'true' : 'false'); }); }); diff --git a/static/script.js b/static/script.js index 3dc250d0..fcdc8d53 100644 --- a/static/script.js +++ b/static/script.js @@ -330,8 +330,10 @@ function displayTaskStatus(task) { statusTaskType.textContent = task.task_type_from_db || task.task_type || 'N/A'; const stateUpper = (task.state || task.status || 'IDLE').toUpperCase(); statusStatus.textContent = stateUpper; - statusProgress.textContent = task.progress || 0; - progressBar.style.width = `${task.progress || 0}%`; + const progressValue = task.progress || 0; + statusProgress.textContent = progressValue; + progressBar.style.width = `${progressValue}%`; + progressBar.setAttribute('aria-valuenow', progressValue); statusStatus.className = 'status-text'; // Reset classes let statusClass = 'status-pending'; diff --git a/static/style.css b/static/style.css index 1b8f50bb..248f1df4 100644 --- a/static/style.css +++ b/static/style.css @@ -97,6 +97,18 @@ body { transition: background-color 0.3s, color 0.3s; } +/* --- Accessibility: keyboard focus ring --- + Buttons, links and nav items had no focus style at all. Show a ring only for + keyboard users (not mouse clicks). Form fields keep their own box-shadow ring. */ +:focus-visible { + outline: 2px solid var(--color-primary); + outline-offset: 2px; + border-radius: 0.25rem; +} +:focus:not(:focus-visible) { + outline: none; +} + /* --- Main Container --- */ .container { width: 100%; @@ -290,16 +302,19 @@ textarea { gap: 0.5rem; color: #ffffff; font-weight: 700; - padding: 0.6rem 1rem; + padding: 0.75rem 1.5rem; border-radius: 0.5rem; border: none; cursor: pointer; - transition: background-color 0.18s ease, transform 0.06s ease; + transition: background-color 0.2s ease, transform 0.06s ease; font-size: 1rem; } .btn:active { transform: translateY(1px); } .btn:disabled { opacity: 0.6; cursor: not-allowed; } +/* Full-width block button (form submit buttons, task buttons) */ +.btn-block { width: 100%; } + /* Primary blue button (matches existing primary selectors) */ .btn-primary { background-color: var(--color-primary); } .btn-primary:hover { background-color: var(--color-primary-hover); } @@ -308,6 +323,10 @@ textarea { .btn-danger { background-color: var(--color-danger); } .btn-danger:hover { background-color: var(--color-danger-hover); } +/* Success/green button */ +.btn-success { background-color: var(--color-success); } +.btn-success:hover { background-color: var(--color-success-hover); } + /* Ghost / outline button for secondary actions */ .btn-ghost { background: transparent; @@ -373,74 +392,26 @@ textarea { margin-top: 1rem; /* Add space above buttons */ } -#start-analysis-btn, -#start-clustering-btn, -#fetch-playlists-btn, -#start-sync-btn, -#playlistForm button, -#similarity-form button, -#waveform-form button, -#artist-similarity-form button, -#find-artists-btn, -#create-artist-playlist-btn { - background-color: var(--color-primary); -} - -#start-analysis-btn:hover, -#start-clustering-btn:hover, -#fetch-playlists-btn:hover, -#start-sync-btn:hover, -#playlistForm button:hover, -#similarity-form button:hover, -#waveform-form button:hover, -#artist-similarity-form button:hover, -#find-artists-btn:hover, -#create-artist-playlist-btn:hover { - background-color: var(--color-primary-hover); -} - -#createPlaylistForm button, -#playlist-form button[style*="background-color: #16A34A"] { - background-color: var(--color-success) !important; /* Green */ -} -#createPlaylistForm button:hover, -#playlist-form button[style*="background-color: #16A34A"]:hover { - background-color: var(--color-success-hover) !important; -} - +/* Button colors are now supplied by the shared .btn-primary / .btn-danger / + .btn-success utilities (see above). The old per-ID color lists and the + inline-style attribute selector were removed once every action button was + migrated to the .btn system. The sizing block above remains as a benign + layout helper. */ -#cancel-task-btn, -#cancel-sync-btn { - background-color: var(--color-danger); -} -#cancel-task-btn:hover, -#cancel-sync-btn:hover { - background-color: var(--color-danger-hover); -} - -button:disabled, -#cancel-task-btn:disabled, -#cancel-sync-btn:disabled { - background-color: #93C5FD !important; /* Light blue - maybe needs var too? */ +/* Legacy (non-.btn) buttons keep the old greyed-blue disabled treatment. + .btn buttons are governed by .btn:disabled above, so a disabled .btn-danger + stays red and .btn-success stays green (dimmed), not blue. */ +button:not(.btn):disabled { + background-color: #93C5FD !important; /* Light blue */ cursor: not-allowed !important; - opacity: 0.7 !important; /* Changed from 1 to 0.7 for better visual indication */ + opacity: 0.7 !important; } -body.dark-mode button:disabled, -body.dark-mode #cancel-task-btn:disabled, -body.dark-mode #cancel-sync-btn:disabled { - background-color: #1E3A8A !important; /* Darker blue for disabled in dark mode */ +body.dark-mode button:not(.btn):disabled, +html.dark-mode button:not(.btn):disabled { + background-color: #1E3A8A !important; /* Darker blue in dark mode */ color: #9CA3AF !important; } -#cancel-task-btn:disabled, -#cancel-sync-btn:disabled { - background-color: #FCA5A5 !important; /* Light red */ -} -body.dark-mode #cancel-task-btn:disabled, -body.dark-mode #cancel-sync-btn:disabled { - background-color: #7F1D1D !important; /* Dark red */ -} - /* --- Task Status --- */ #task-status-display { diff --git a/tasks/paged_ivf.py b/tasks/paged_ivf.py index adfe1038..c22cdb62 100644 --- a/tasks/paged_ivf.py +++ b/tasks/paged_ivf.py @@ -988,7 +988,7 @@ def _iter_db_cells(self, cur, cell_ids): (self._index_name, list(cell_ids)), ) for cell_id, blob in cur.fetchall(): - ids, vecs = unpack_cell(bytes(blob), self._dim) + ids, vecs = unpack_cell(blob, self._dim) yield int(cell_id), ids, vecs def _read_cells(self, cell_ids: List[int], cache: _CellLruCache) -> Dict[int, Tuple[np.ndarray, np.ndarray]]: @@ -1005,11 +1005,10 @@ def _read_cells(self, cell_ids: List[int], cache: _CellLruCache) -> Dict[int, Tu offsets = self._cell_offsets if mm is not None: _note_mmap_activity(self) - ordered = sorted( - {int(c) for c in cell_ids}, - key=lambda cid: offsets.get(cid, (1 << 62, 0))[0], - ) - for cid in ordered: + # inputs are unique; read in file order, box to int only at the dict key + ordered = sorted(cell_ids, key=lambda c: offsets.get(int(c), (1 << 62, 0))[0]) + for c in ordered: + cid = int(c) cell = self._cell_from_mmap(mm, offsets, cid) if cell is not None: out[cid] = cell @@ -1106,7 +1105,7 @@ def query(self, vector, k: int): q = np.asarray(vector, dtype=np.float32).reshape(-1) order = self._rank_cells(q) cache = self._cache() - probe = [int(c) for c in order[:max(1, self._nprobe)]] + probe = order[:max(1, self._nprobe)] cells = self._read_cells(probe, cache) cand_ids: List[np.ndarray] = [] cand_vecs: List[np.ndarray] = [] diff --git a/templates/alchemy.html b/templates/alchemy.html index df979511..0fd9c3e5 100644 --- a/templates/alchemy.html +++ b/templates/alchemy.html @@ -156,7 +156,7 @@
I want to restore the database from the backup. This action is not reversible
- + diff --git a/templates/chat.html b/templates/chat.html index d51085ca..8f9f0c6b 100644 --- a/templates/chat.html +++ b/templates/chat.html @@ -266,7 +266,7 @@