diff --git a/backend/app/agent/compaction.py b/backend/app/agent/compaction.py index c5b616cd..d97dcdb6 100644 --- a/backend/app/agent/compaction.py +++ b/backend/app/agent/compaction.py @@ -12,6 +12,7 @@ import hashlib import json import logging +import re import time from datetime import UTC from typing import Any, cast @@ -80,6 +81,22 @@ def _serialize_snapshot(text: str | None, cap: int) -> str | None: ) +_URL_RE = re.compile(r"https?://\S+") + + +def _strip_assistant_noise(text: str) -> str: + """Strip URLs from an assistant reply before sending it to the compactor. + + Assistant replies often quote tool-receipt links (CompanyCam photo URLs, + QBO deep links, AppFolio work-order URLs) verbatim alongside the actual + durable content. The URLs are operational chatter the compactor wastes + context summarizing. The semantic prose around them is what we care + about. User messages are left alone, since URLs the contractor pastes + are usually intentional. + """ + return _URL_RE.sub("", text) + + def _format_messages_for_compaction(messages: list[AgentMessage]) -> str: """Format a list of agent messages into a readable text block for the LLM.""" lines: list[str] = [] @@ -87,7 +104,8 @@ def _format_messages_for_compaction(messages: list[AgentMessage]) -> str: if isinstance(msg, UserMessage): lines.append(f"User: {msg.content}") elif isinstance(msg, AssistantMessage) and msg.content: - lines.append(f"Assistant: {msg.content}") + cleaned = _strip_assistant_noise(msg.content) + lines.append(f"Assistant: {cleaned}") return "\n".join(lines) @@ -303,36 +321,50 @@ async def compact_session( raw_content = get_response_text(response) result = _parse_compaction_response(raw_content) - # Capture exactly what got appended to HISTORY.md so we can compute the - # "after" snapshot deterministically below. ``None`` means no append - # happened this event. - appended_history_entry: str | None = None + # An LLM that echoes existing content verbatim is not a memory change. + # ``.strip()`` ignores trailing-whitespace noise that ``write_*_async`` + # would normalize anyway. + memory_changed = ( + bool(result.memory_update) + and result.memory_update.strip() != (current_memory or "").strip() + ) + user_changed = ( + bool(result.user_profile_update) + and result.user_profile_update.strip() != (current_user_profile or "").strip() + ) + soul_changed = ( + bool(result.soul_update) and result.soul_update.strip() != (current_soul or "").strip() + ) + + # Track the post-append HISTORY text for the audit snapshot. Stays + # equal to ``current_history`` when no entry was appended this event. + new_history: str = current_history - # Write updated MEMORY.md if the LLM produced content - if result.memory_update: + # Write updated MEMORY.md only when the rewrite actually differs. + if memory_changed: await memory_store.write_memory_async(result.memory_update) logger.info("Compaction rewrote MEMORY.md for user %s", user_id) - # Append summary to HISTORY.md if the LLM produced one + # Append summary to HISTORY.md if the LLM produced one. ``append_history`` + # returns the new full text under the same row-level lock that protected + # the read-and-write, so the snapshot we record matches what landed in + # the DB even when two compactions race. if result.summary: timestamp = datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%d %H:%M") entry = result.summary.replace("[TIMESTAMP]", f"[{timestamp}]") try: - await memory_store.append_history(entry) - # Mirror the suffix that ``MemoryStore.append_history`` adds at - # the SQL level so the deterministic snapshot below matches. - appended_history_entry = entry + "\n" + new_history = await memory_store.append_history(entry) logger.info("Compaction appended history entry for user %s", user_id) except Exception: logger.exception("Failed to append history for user %s", user_id) - # Write updated USER.md if the LLM detected new profile info - if result.user_profile_update: + # Write updated USER.md only when the rewrite actually differs. + if user_changed: await memory_store.write_user_async(result.user_profile_update) logger.info("Compaction updated USER.md for user %s", user_id) - # Write updated SOUL.md if the LLM detected personality changes - if result.soul_update: + # Write updated SOUL.md only when the rewrite actually differs. + if soul_changed: await memory_store.write_soul_async(result.soul_update) logger.info("Compaction updated SOUL.md for user %s", user_id) @@ -340,8 +372,9 @@ async def compact_session( # so log aggregators (Railway, Loki) can group / filter without # needing JSON. ``input_tokens`` reflects the tokens Anthropic # billed; the ``trimmed_chars`` field gives a provider-agnostic - # input-size proxy. ``*_updated`` flags reveal whether the LLM - # actually produced content for each file vs returning empty. + # input-size proxy. ``*_updated`` flags reflect real persisted + # diffs: an LLM that returns content identical to what was already + # on disk produces ``False`` here, not ``True``. _input_tokens = response.usage.input_tokens or 0 if response.usage else 0 _output_tokens = response.usage.output_tokens or 0 if response.usage else 0 _duration_ms = int((time.monotonic() - _start_monotonic) * 1000) @@ -355,9 +388,9 @@ async def compact_session( _input_tokens, _output_tokens, _duration_ms, - bool(result.memory_update), - bool(result.user_profile_update), - bool(result.soul_update), + memory_changed, + user_changed, + soul_changed, len(result.summary or ""), ) @@ -368,16 +401,12 @@ async def compact_session( # they share ``get_memory_store(user_id)``. A re-read could pick up the # other task's write and record a misleading "after" in this row's # audit log. The compaction prompt returns full rewrites for memory / - # user / soul, and ``append_history`` is a SQL-level concatenation we - # mirror via ``appended_history_entry`` above, so all four "after" - # values are computable without re-reading. - new_memory = result.memory_update if result.memory_update else current_memory - new_user = result.user_profile_update if result.user_profile_update else current_user_profile - new_soul = result.soul_update if result.soul_update else current_soul - if appended_history_entry is not None: - new_history = (current_history or "") + appended_history_entry - else: - new_history = current_history + # user / soul, and ``append_history`` returns the row's new full + # plaintext under the same row-level lock that wrote it, so all four + # "after" values are computable without re-reading. + new_memory = result.memory_update if memory_changed else current_memory + new_user = result.user_profile_update if user_changed else current_user_profile + new_soul = result.soul_update if soul_changed else current_soul cap = settings.compaction_event_snapshot_max_bytes_per_file snapshots = _build_snapshot_pairs( @@ -428,9 +457,9 @@ async def compact_session( output_tokens=_output_tokens, duration_ms=_duration_ms, max_message_seq=max_message_seq, - memory_updated=bool(result.memory_update), - user_profile_updated=bool(result.user_profile_update), - soul_updated=bool(result.soul_update), + memory_updated=memory_changed, + user_profile_updated=user_changed, + soul_updated=soul_changed, summary_len=len(result.summary or ""), snapshots=snapshots, llm_call=llm_call, @@ -438,7 +467,7 @@ async def compact_session( except Exception: logger.exception("Failed to persist compaction event for user %s", user_id) - return result.memory_update, max_message_seq + return (result.memory_update if memory_changed else ""), max_message_seq def _build_snapshot_pairs( diff --git a/backend/app/agent/memory_db.py b/backend/app/agent/memory_db.py index 6d1ab279..d5fbc37d 100644 --- a/backend/app/agent/memory_db.py +++ b/backend/app/agent/memory_db.py @@ -144,7 +144,7 @@ async def read_history_async(self) -> str: finally: await db.close() - async def append_history(self, entry: str) -> None: + async def append_history(self, entry: str) -> str: """Append an entry to history text (equivalent of HISTORY.md). Reads the current row under ``SELECT ... FOR UPDATE`` to @@ -153,6 +153,17 @@ async def append_history(self, entry: str) -> None: SQL-side concatenation is not viable because ``history_text`` is an ``EncryptedString`` column whose envelope format is not concat-safe. + + Guarantees a newline between the existing text and the new + entry: if the stored text is non-empty and does not already + end with a newline (e.g. a manual edit, or older text written + before this guarantee), a separator is inserted so two entries + never end up jammed together as one line. + + Returns the row's new full plaintext so callers (compaction + audit) can record the post-append snapshot without re-reading + the row, which would race with concurrent compactions sharing + the same user. """ suffix = entry + "\n" async with db_session_async() as db: @@ -165,14 +176,19 @@ async def append_history(self, entry: str) -> None: history_text=suffix, ) ) - else: - full_new_text = (doc.history_text or "") + suffix - await db.execute(_append_history_update(doc.id, full_new_text)) + await db.commit() + return suffix + current = doc.history_text or "" + if current and not current.endswith("\n"): + current += "\n" + full_new_text = current + suffix + await db.execute(_append_history_update(doc.id, full_new_text)) await db.commit() + return full_new_text - async def append_history_async(self, entry: str) -> None: + async def append_history_async(self, entry: str) -> str: """Deprecated alias of :meth:`append_history`.""" - await self.append_history(entry) + return await self.append_history(entry) # -- soul text --------------------------------------------------------- diff --git a/backend/app/agent/prompts/compaction.md b/backend/app/agent/prompts/compaction.md index 3328c88d..31a94aed 100644 --- a/backend/app/agent/prompts/compaction.md +++ b/backend/app/agent/prompts/compaction.md @@ -1,40 +1,77 @@ -You are a memory consolidation agent. You will receive five XML-tagged sections: ``, ``, ``, ``, and ``. Your job is to update the user's persistent files with any new durable facts from the conversation. +You are a memory consolidation agent for Clawbolt, an AI assistant for trades contractors. -Each file has a distinct purpose. Route facts to the correct file: +## Operating principle -**user_profile (USER.md)**: the user's personal and business profile. -- Name, preferred name, pronouns -- Trade/profession, business name, crew size -- Pricing: day rate, hourly rate, per-unit rates, markup policies -- Geographic area, service radius, zip code -- Preferred tools, equipment, material brands (general preferences) -- Working hours, availability, timezone -- Preferred contact method, response time expectations +Clawbolt is **not the system of record**. The contractor's authoritative data lives in their integrations: -**memory (MEMORY.md)**: durable business facts that are not about the user themselves. -- Client names, contact info, project history -- Specific job quotes, pricing history per project -- Supplier details, material costs for particular jobs -- Job details, measurements, scheduling commitments -- Business policies, terms, recurring arrangements +| Source of truth | What it owns | +| --- | --- | +| QuickBooks | customers, contacts, invoices, estimates, items, payments | +| CompanyCam | projects, addresses, photos, project status | +| AppFolio | work orders, tenant info, vendor jobs | +| Google Calendar / heartbeat | time-bounded reminders, recurring tasks | +| Google Drive | saved files, receipt images | -**soul (SOUL.md)**: the assistant's personality and communication style. -- How the user wants the assistant to talk (tone, formality, humor) -- Communication preferences ("be more blunt", "stop using emojis") -- Working relationship norms +A fact owned by an integration can change inside that integration without telling Clawbolt. Phone numbers, emails, statuses, amounts, IDs, addresses can all be edited, rotated, or replaced upstream at any time. Memorizing them creates a stale-cache risk: a value that was correct when written can become wrong, even dangerously wrong, by the time the agent reads it next. -The `` section is read-only context (reminder items and recurring tasks). +**Worked example:** AppFolio rotates tenant contact phone numbers every few days for privacy. A memorized number quoted back next week now belongs to a different tenant, and the contractor calls a stranger. Look these values up live, every time. Never memorize a value the source system can change without telling Clawbolt. -Your response must be a JSON object with these fields: +Memory exists for cross-system knowledge that lives nowhere else. -1. "memory_update": the full updated long-term memory as markdown. Base this only on the content from `` plus new durable facts from ``. Remove facts that are clearly outdated or contradicted. If nothing new was learned, return the existing memory unchanged. +## Inputs -2. "summary": a 1-3 sentence summary of the conversation. Start with a timestamp placeholder [TIMESTAMP]. Include enough detail to be useful when searching later (names, topics, decisions). If the conversation is trivial small talk, use an empty string. +You will receive ``, ``, ``, ``, and ``. Update the persistent files with new durable facts and prune items that no longer belong. -3. "user_profile_update": the full updated user profile as markdown. Base this only on the content from `` plus new profile-level facts from ``. Preserve ALL existing content unless explicitly contradicted. If nothing profile-relevant was discussed, use an empty string. +## MEMORY.md: cross-system business knowledge -4. "soul_update": the full updated soul/personality as markdown. Base this only on the content from `` plus new personality/style instructions from ``. If no personality changes were discussed, use an empty string. +**Include:** +- Pricing rules and rate cards keyed by client +- Cross-system relationships ("X is billed through Y, not a direct customer") +- Disambiguation guidance +- Communication conventions and shorthand +- Persistent process rules -Do not duplicate facts across files. A day rate goes in user_profile_update, not memory_update. A client's phone number goes in memory_update, not user_profile_update. +**Do not include:** +- Anything an integration owns: customer IDs, emails, phone numbers, addresses, invoice / estimate contents, project status, work-order details. The agent looks these up live. +- Transient state: tool-call failures, "X is broken" notes, integration outages, deep links, draft IDs, upload confirmations. +- Reminders that have fired or follow-ups that are complete. Open follow-ups belong in heartbeat. + +**Explicit user save requests override these exclusion rules.** If the conversation contains a clear directive to save a fact ("remember X", "save this", "make a note that..."), preserve that fact in MEMORY.md, even when it overlaps with what an integration owns. The contractor has chosen to memorialize it; trust that. The base agent is responsible for warning the contractor about staleness risk on mutating values at save time, so by the time the conversation reaches you, an explicit save is intentional. + +**Prune on rewrite.** Drop excluded items even if a previous compaction wrote them. Once an estimate is sent in QBO, replace a full transcription of its contents with a one-line breadcrumb (`" estimate sent, see QBO"`) or remove the entry entirely. Drop bug notes you wrote yesterday. Drop fired reminders. Keep cross-system rules and conventions. + +## USER.md: the contractor themselves + +- Name, business name, trade, crew composition +- Default rates (day rate, hourly), service area, timezone +- Working-hours and communication preferences +- Which integrations the contractor has connected on the Clawbolt side + +Client-specific pricing or billing rules belong in MEMORY.md, not here. Preserve every existing field on rewrite; only change a field the conversation contradicts. Return an empty string when nothing profile-relevant changed. + +## SOUL.md: the assistant's personality + +- Tone, formality, humor +- "be more blunt", "stop using emojis", working-relationship norms + +The `` section is read-only context. Do not promote already-fired heartbeat items into memory. + +## HISTORY.md (the `summary` field) + +A breadcrumb log, not a transaction log. The agent uses it to answer "did we work on this recently?" before referring back to integrations. + +- One terse 1 to 3 sentence entry per event, prefixed `[TIMESTAMP]`. +- Pointers over numbers: `"Sent estimate, details in QBO"` beats `"Sent $X,XXX estimate (txnId=NNN) with N line items..."`. +- Drop deep links, draft IDs, and dollar amounts (unless the dollar is genuinely the news). +- Skip trivial small talk. Return an empty string. + +## Response format + +Return only a JSON object: + +1. `memory_update`: full updated MEMORY.md with prune rules applied. Return existing verbatim if no change. +2. `summary`: 1 to 3 sentence breadcrumb starting `[TIMESTAMP]`. Empty string for trivial conversations. +3. `user_profile_update`: full updated USER.md, all fields preserved. Empty string if no change. +4. `soul_update`: full updated SOUL.md. Empty string if no change. Return only the JSON object, no other text. diff --git a/backend/app/agent/prompts/instructions.md b/backend/app/agent/prompts/instructions.md index 590251ac..41c67ba9 100644 --- a/backend/app/agent/prompts/instructions.md +++ b/backend/app/agent/prompts/instructions.md @@ -13,7 +13,7 @@ Your replies are read on a phone. Format for mobile text messages: - Keep lines short. Text wraps awkwardly on small screens. ## Multi-field tasks -When a request needs several pieces of information (an estimate, a calendar event, a customer record) and the user has only supplied some, fill in sensible defaults from context (memory, USER.md, prior conversation) and propose the complete result. Surface the assumptions in one short line so the user can amend with one reply, e.g. "Drafted at 8 hours labor, $50/hr, materials $200. Change anything?" +When a request needs several pieces of information (an estimate, a calendar event, a customer record) and the user has only supplied some, fill in sensible defaults from context (memory, USER.md, prior conversation) and propose the complete result. Surface the assumptions in one short line so the user can amend with one reply. - Only ask up front for high-stakes, unguessable fields: recipient email before sending, deletion confirmations, other irreversible actions. - Treat "estimate reasonable X" or "you decide" as explicit permission to act, not an invitation to read the values back as questions. @@ -26,11 +26,26 @@ When a tool fails, no confirmation is appended. Explain plainly what went wrong ## Keeping files up to date Update these files proactively as you learn new things. Do not ask permission. Just do it naturally as part of the conversation. +You are not the system of record for the contractor; the integrations are. Look them up live for current values instead of mirroring them into your files where they can go stale. + - **SOUL.md**: Your personality, communication style, and identity. Update when the user gives you feedback about how to talk ("be more blunt", "stop using emojis") or when your working relationship evolves. This file defines who you are. -- **USER.md**: The user's business profile: name, trade, crew size, pricing approach, geographic area, tools they use, preferred working hours, timezone. Update whenever you learn new business details. The richer this file, the better your estimates and recommendations. -- **MEMORY.md**: Durable business facts: client names and contact info, pricing history, supplier details, job specifics, material costs, business policies. Update whenever you learn facts that should persist across conversations. +- **USER.md**: The user's business profile: name, business name, trade, crew size, default day/hourly rate, geographic area, timezone, working-hours preferences. Client-specific pricing rules live in MEMORY.md, not here. +- **MEMORY.md**: Durable cross-system knowledge that lives nowhere else: pricing rules and rate cards keyed by client, communication conventions, cross-system relationships ("X is billed through Y, not a direct customer"), disambiguation guidance, persistent process rules. Do not write customer contact details, invoice contents, project addresses, or work-order state here: those live in the integrations, can change without telling you, and looking them up live is more reliable than recalling them. - **HEARTBEAT.md**: Recurring things to check on: unpaid invoices, pending estimates, ongoing follow-ups, active job deadlines. Items surface within a window, not at an exact clock time, so don't write time-specific reminders ("at 2pm", "7:30am") here (see the Timed reminders section). Suggest adding items when the user asks about ongoing monitoring. +## "Remember this" requests + +When the user explicitly says "remember X", "save this", "make a note that...", honor the request. Two cases call for a brief caveat before saving: + +- **The value can change in the source system.** Phone numbers, emails, statuses, balances. Save if the user insists but flag the staleness risk in one sentence ("Saving for now, but AppFolio rotates these numbers, so I'll re-check before quoting it back"), or offer to skip and look it up live each time. +- **The fact already lives canonically in a connected integration.** Saving a duplicate creates drift between the two copies. Offer to look it up live; save if the user prefers the convenience. + +Never refuse a save request outright. + +## When asked how you remember things + +If the user asks how you remember things or why you forgot something, answer briefly: you keep durable cross-system rules in MEMORY.md and rely on the integrations for current values. You do not memorize values the integrations can change, since they go stale. Do not volunteer this unprompted. + ## Proactive monitoring - When a user asks to be notified about changes or wants recurring visibility into data, suggest adding a heartbeat item so it gets checked automatically. - Do not wait for the user to mention the heartbeat. If the request is about ongoing monitoring, proactively offer to set it up. diff --git a/tests/test_compaction.py b/tests/test_compaction.py index 349919ed..51353b21 100644 --- a/tests/test_compaction.py +++ b/tests/test_compaction.py @@ -1239,10 +1239,161 @@ def test_build_snapshot_pairs_skips_unchanged() -> None: # --------------------------------------------------------------------------- -# load_conversation_history watermark filter +# Issue #1243 sub-task 2: assistant URL stripping before compaction # --------------------------------------------------------------------------- +def test_format_messages_strips_urls_from_assistant_content() -> None: + """Tool-receipt deep links in assistant replies should not reach the + compaction model. The links are operational chatter the compactor + wastes context summarizing; the prose around them is what matters. + """ + messages: list[AgentMessage] = [ + AssistantMessage( + content=( + "Photo uploaded to CompanyCam project 99101890. " + "View it at https://app.companycam.com/photos/3166227657 " + "or check the project at https://app.companycam.com/projects/99101890." + ) + ), + ] + out = _format_messages_for_compaction(messages) + assert "https://" not in out + assert "Photo uploaded to CompanyCam project 99101890" in out + + +def test_format_messages_keeps_user_content_urls() -> None: + """User-pasted URLs are intentional and should pass through unchanged. + Only assistant noise gets stripped; we don't want to silently drop a + spec or photo URL the contractor sent us. + """ + messages: list[AgentMessage] = [ + UserMessage(content="Spec is at https://example.com/spec.pdf, can you check it?"), + AssistantMessage(content="Got it."), + ] + out = _format_messages_for_compaction(messages) + assert "https://example.com/spec.pdf" in out + + +# --------------------------------------------------------------------------- +# Issue #1243 sub-task 4: *_updated flags reflect real persisted diffs +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio() +async def test_compact_session_does_not_write_when_memory_unchanged( + test_user: UserData, +) -> None: + """LLM returning the existing MEMORY.md verbatim must not be recorded + as a memory update. Otherwise every compaction event flips ``memory_updated`` + to True and the admin "memory updated" indicator becomes meaningless. + """ + store = get_memory_store(test_user.id) + existing = "## Pricing\n- Arbors: $195 flat ≤3hrs" + await store.write_memory_async(existing) + + mock_response = make_text_response( + json.dumps({"memory_update": existing, "summary": "[TIMESTAMP] Trivial chat."}) + ) + messages: list[AgentMessage] = [ + UserMessage(content="just checking in"), + AssistantMessage(content="No updates needed."), + ] + + with patch("backend.app.agent.compaction.amessages", return_value=mock_response): + memory_update, _ = await compact_session(test_user.id, messages) + + # Return value reflects "no real diff": empty string, not the verbatim + # echo of the existing memory. + assert memory_update == "" + + async with db_session_async() as db: + event = ( + await db.execute(select(CompactionEvent).filter_by(user_id=test_user.id)) + ).scalar_one_or_none() + assert event is not None + assert event.memory_updated is False + + +@pytest.mark.asyncio() +async def test_compact_session_summary_log_marks_memory_unchanged_when_llm_echoes( + test_user: UserData, caplog: pytest.LogCaptureFixture +) -> None: + """The structured-summary log line must mirror the persisted flag: an + LLM that echoes the existing memory verbatim is not a memory change. + """ + import logging + + store = get_memory_store(test_user.id) + existing = "## Pricing\n- Default: $500/day" + await store.write_memory_async(existing) + + mock_response = make_text_response( + json.dumps({"memory_update": existing, "summary": "[TIMESTAMP] Trivial."}) + ) + messages: list[AgentMessage] = [UserMessage(content="hi")] + + with ( + caplog.at_level(logging.INFO, logger="backend.app.agent.compaction"), + patch("backend.app.agent.compaction.amessages", return_value=mock_response), + ): + await compact_session(test_user.id, messages) + + summary_lines = [r for r in caplog.records if "compaction.summary" in r.getMessage()] + assert len(summary_lines) == 1 + assert "memory_updated=False" in summary_lines[0].getMessage() + + +# --------------------------------------------------------------------------- +# Issue #1243 sub-task 1: HISTORY.md append integrity +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio() +async def test_compaction_history_snapshot_matches_db_after_append( + test_user: UserData, +) -> None: + """Regression: the persisted ``history_text_after`` snapshot in the + audit row must equal the live ``MemoryDocument.history_text``. Older + code reconstructed the snapshot from a ``.strip()``'d read of the + pre-write text, which silently dropped the trailing newline that the + DB write actually retains, so two consecutive history entries showed + up jammed together in the audit even though they were separated in + the live row. + """ + store = get_memory_store(test_user.id) + # Seed an existing entry so the new entry's append crosses the + # entry-to-entry boundary the bug used to corrupt. + await store.append_history("[2026-05-01 09:00] First entry") + + mock_response = make_text_response( + json.dumps( + { + "memory_update": "", + "summary": "[TIMESTAMP] Second entry from compaction.", + } + ) + ) + messages: list[AgentMessage] = [UserMessage(content="something happened")] + + with patch("backend.app.agent.compaction.amessages", return_value=mock_response): + await compact_session(test_user.id, messages) + + live_history = await store.read_history_async() + async with db_session_async() as db: + event = ( + await db.execute(select(CompactionEvent).filter_by(user_id=test_user.id)) + ).scalar_one_or_none() + assert event is not None + snapshot = event.history_text_after + assert snapshot is not None + # Both views must show the entries cleanly separated by a newline. + assert "First entry\n[" in snapshot + # Snapshot mirrors the DB plaintext (modulo the trailing newline that + # ``read_history_async`` strips for callers). + assert snapshot.rstrip("\n") == live_history + + async def _seed_session_with_messages(user: User, message_count: int) -> ChatSession: """Insert a ChatSession for *user* with *message_count* alternating inbound/outbound messages, returning the persisted ChatSession. diff --git a/tests/test_memory.py b/tests/test_memory.py index 4eed6c0c..d406ebff 100644 --- a/tests/test_memory.py +++ b/tests/test_memory.py @@ -88,3 +88,50 @@ async def test_append_history_after_seed_round_trips(test_user: User) -> None: await store.append_history("follow-up") assert await store.read_history_async() == "seed\nfollow-up" + + +@pytest.mark.asyncio() +async def test_append_history_returns_new_full_text(test_user: User) -> None: + """append_history returns the row's new full plaintext under the same + row-level lock that wrote it. Compaction's audit-snapshot path relies + on this so the recorded ``history_text_after`` matches what landed in + the DB even when concurrent compactions race for the same user. + """ + store = get_memory_store(test_user.id) + + first_text = await store.append_history("first") + assert first_text == "first\n" + + second_text = await store.append_history("second") + assert second_text == "first\nsecond\n" + + +@pytest.mark.asyncio() +async def test_append_history_inserts_separator_when_existing_lacks_newline( + test_user: User, +) -> None: + """If the stored ``history_text`` does not end with a newline (e.g. an + older row written before this guarantee, or a manual edit), the next + append must still produce a clean separator. Two entries jammed + together as one line is the user-visible bug we're guarding against. + """ + from sqlalchemy import update + + from backend.app.database import db_session_async + from backend.app.models import MemoryDocument + + store = get_memory_store(test_user.id) + await store.append_history("first") + # Force the stored text to lose its trailing newline, simulating a + # row written before the separator guarantee. + async with db_session_async() as db: + await db.execute( + update(MemoryDocument) + .where(MemoryDocument.user_id == test_user.id) + .values(history_text="first") + ) + await db.commit() + + new_full = await store.append_history("second") + assert new_full == "first\nsecond\n" + assert await store.read_history_async() == "first\nsecond"