Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 64 additions & 35 deletions backend/app/agent/compaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import hashlib
import json
import logging
import re
import time
from datetime import UTC
from typing import Any, cast
Expand Down Expand Up @@ -80,14 +81,31 @@ def _serialize_snapshot(text: str | None, cap: int) -> str | None:
)


_URL_RE = re.compile(r"https?://\S+")


def _strip_assistant_noise(text: str) -> str:
"""Strip URLs from an assistant reply before sending it to the compactor.

Assistant replies often quote tool-receipt links (CompanyCam photo URLs,
QBO deep links, AppFolio work-order URLs) verbatim alongside the actual
durable content. The URLs are operational chatter the compactor wastes
context summarizing. The semantic prose around them is what we care
about. User messages are left alone, since URLs the contractor pastes
are usually intentional.
"""
return _URL_RE.sub("", text)


def _format_messages_for_compaction(messages: list[AgentMessage]) -> str:
"""Format a list of agent messages into a readable text block for the LLM."""
lines: list[str] = []
for msg in messages:
if isinstance(msg, UserMessage):
lines.append(f"User: {msg.content}")
elif isinstance(msg, AssistantMessage) and msg.content:
lines.append(f"Assistant: {msg.content}")
cleaned = _strip_assistant_noise(msg.content)
lines.append(f"Assistant: {cleaned}")
return "\n".join(lines)


Expand Down Expand Up @@ -303,45 +321,60 @@ async def compact_session(
raw_content = get_response_text(response)
result = _parse_compaction_response(raw_content)

# Capture exactly what got appended to HISTORY.md so we can compute the
# "after" snapshot deterministically below. ``None`` means no append
# happened this event.
appended_history_entry: str | None = None
# An LLM that echoes existing content verbatim is not a memory change.
# ``.strip()`` ignores trailing-whitespace noise that ``write_*_async``
# would normalize anyway.
memory_changed = (
bool(result.memory_update)
and result.memory_update.strip() != (current_memory or "").strip()
)
user_changed = (
bool(result.user_profile_update)
and result.user_profile_update.strip() != (current_user_profile or "").strip()
)
soul_changed = (
bool(result.soul_update) and result.soul_update.strip() != (current_soul or "").strip()
)

# Track the post-append HISTORY text for the audit snapshot. Stays
# equal to ``current_history`` when no entry was appended this event.
new_history: str = current_history

# Write updated MEMORY.md if the LLM produced content
if result.memory_update:
# Write updated MEMORY.md only when the rewrite actually differs.
if memory_changed:
await memory_store.write_memory_async(result.memory_update)
logger.info("Compaction rewrote MEMORY.md for user %s", user_id)

# Append summary to HISTORY.md if the LLM produced one
# Append summary to HISTORY.md if the LLM produced one. ``append_history``
# returns the new full text under the same row-level lock that protected
# the read-and-write, so the snapshot we record matches what landed in
# the DB even when two compactions race.
if result.summary:
timestamp = datetime.datetime.now(datetime.UTC).strftime("%Y-%m-%d %H:%M")
entry = result.summary.replace("[TIMESTAMP]", f"[{timestamp}]")
try:
await memory_store.append_history(entry)
# Mirror the suffix that ``MemoryStore.append_history`` adds at
# the SQL level so the deterministic snapshot below matches.
appended_history_entry = entry + "\n"
new_history = await memory_store.append_history(entry)
logger.info("Compaction appended history entry for user %s", user_id)
except Exception:
logger.exception("Failed to append history for user %s", user_id)

# Write updated USER.md if the LLM detected new profile info
if result.user_profile_update:
# Write updated USER.md only when the rewrite actually differs.
if user_changed:
await memory_store.write_user_async(result.user_profile_update)
logger.info("Compaction updated USER.md for user %s", user_id)

# Write updated SOUL.md if the LLM detected personality changes
if result.soul_update:
# Write updated SOUL.md only when the rewrite actually differs.
if soul_changed:
await memory_store.write_soul_async(result.soul_update)
logger.info("Compaction updated SOUL.md for user %s", user_id)

# Single structured summary line. Fields are space-separated key=value
# so log aggregators (Railway, Loki) can group / filter without
# needing JSON. ``input_tokens`` reflects the tokens Anthropic
# billed; the ``trimmed_chars`` field gives a provider-agnostic
# input-size proxy. ``*_updated`` flags reveal whether the LLM
# actually produced content for each file vs returning empty.
# input-size proxy. ``*_updated`` flags reflect real persisted
# diffs: an LLM that returns content identical to what was already
# on disk produces ``False`` here, not ``True``.
_input_tokens = response.usage.input_tokens or 0 if response.usage else 0
_output_tokens = response.usage.output_tokens or 0 if response.usage else 0
_duration_ms = int((time.monotonic() - _start_monotonic) * 1000)
Expand All @@ -355,9 +388,9 @@ async def compact_session(
_input_tokens,
_output_tokens,
_duration_ms,
bool(result.memory_update),
bool(result.user_profile_update),
bool(result.soul_update),
memory_changed,
user_changed,
soul_changed,
len(result.summary or ""),
)

Expand All @@ -368,16 +401,12 @@ async def compact_session(
# they share ``get_memory_store(user_id)``. A re-read could pick up the
# other task's write and record a misleading "after" in this row's
# audit log. The compaction prompt returns full rewrites for memory /
# user / soul, and ``append_history`` is a SQL-level concatenation we
# mirror via ``appended_history_entry`` above, so all four "after"
# values are computable without re-reading.
new_memory = result.memory_update if result.memory_update else current_memory
new_user = result.user_profile_update if result.user_profile_update else current_user_profile
new_soul = result.soul_update if result.soul_update else current_soul
if appended_history_entry is not None:
new_history = (current_history or "") + appended_history_entry
else:
new_history = current_history
# user / soul, and ``append_history`` returns the row's new full
# plaintext under the same row-level lock that wrote it, so all four
# "after" values are computable without re-reading.
new_memory = result.memory_update if memory_changed else current_memory
new_user = result.user_profile_update if user_changed else current_user_profile
new_soul = result.soul_update if soul_changed else current_soul

cap = settings.compaction_event_snapshot_max_bytes_per_file
snapshots = _build_snapshot_pairs(
Expand Down Expand Up @@ -428,17 +457,17 @@ async def compact_session(
output_tokens=_output_tokens,
duration_ms=_duration_ms,
max_message_seq=max_message_seq,
memory_updated=bool(result.memory_update),
user_profile_updated=bool(result.user_profile_update),
soul_updated=bool(result.soul_update),
memory_updated=memory_changed,
user_profile_updated=user_changed,
soul_updated=soul_changed,
summary_len=len(result.summary or ""),
snapshots=snapshots,
llm_call=llm_call,
)
except Exception:
logger.exception("Failed to persist compaction event for user %s", user_id)

return result.memory_update, max_message_seq
return (result.memory_update if memory_changed else ""), max_message_seq


def _build_snapshot_pairs(
Expand Down
28 changes: 22 additions & 6 deletions backend/app/agent/memory_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ async def read_history_async(self) -> str:
finally:
await db.close()

async def append_history(self, entry: str) -> None:
async def append_history(self, entry: str) -> str:
"""Append an entry to history text (equivalent of HISTORY.md).

Reads the current row under ``SELECT ... FOR UPDATE`` to
Expand All @@ -153,6 +153,17 @@ async def append_history(self, entry: str) -> None:
SQL-side concatenation is not viable because ``history_text``
is an ``EncryptedString`` column whose envelope format is not
concat-safe.

Guarantees a newline between the existing text and the new
entry: if the stored text is non-empty and does not already
end with a newline (e.g. a manual edit, or older text written
before this guarantee), a separator is inserted so two entries
never end up jammed together as one line.

Returns the row's new full plaintext so callers (compaction
audit) can record the post-append snapshot without re-reading
the row, which would race with concurrent compactions sharing
the same user.
"""
suffix = entry + "\n"
async with db_session_async() as db:
Expand All @@ -165,14 +176,19 @@ async def append_history(self, entry: str) -> None:
history_text=suffix,
)
)
else:
full_new_text = (doc.history_text or "") + suffix
await db.execute(_append_history_update(doc.id, full_new_text))
await db.commit()
return suffix
current = doc.history_text or ""
if current and not current.endswith("\n"):
current += "\n"
full_new_text = current + suffix
await db.execute(_append_history_update(doc.id, full_new_text))
await db.commit()
return full_new_text

async def append_history_async(self, entry: str) -> None:
async def append_history_async(self, entry: str) -> str:
"""Deprecated alias of :meth:`append_history`."""
await self.append_history(entry)
return await self.append_history(entry)

# -- soul text ---------------------------------------------------------

Expand Down
91 changes: 64 additions & 27 deletions backend/app/agent/prompts/compaction.md
Original file line number Diff line number Diff line change
@@ -1,40 +1,77 @@
You are a memory consolidation agent. You will receive five XML-tagged sections: `<current_memory>`, `<user_profile>`, `<soul>`, `<heartbeat>`, and `<conversation>`. Your job is to update the user's persistent files with any new durable facts from the conversation.
You are a memory consolidation agent for Clawbolt, an AI assistant for trades contractors.

Each file has a distinct purpose. Route facts to the correct file:
## Operating principle

**user_profile (USER.md)**: the user's personal and business profile.
- Name, preferred name, pronouns
- Trade/profession, business name, crew size
- Pricing: day rate, hourly rate, per-unit rates, markup policies
- Geographic area, service radius, zip code
- Preferred tools, equipment, material brands (general preferences)
- Working hours, availability, timezone
- Preferred contact method, response time expectations
Clawbolt is **not the system of record**. The contractor's authoritative data lives in their integrations:

**memory (MEMORY.md)**: durable business facts that are not about the user themselves.
- Client names, contact info, project history
- Specific job quotes, pricing history per project
- Supplier details, material costs for particular jobs
- Job details, measurements, scheduling commitments
- Business policies, terms, recurring arrangements
| Source of truth | What it owns |
| --- | --- |
| QuickBooks | customers, contacts, invoices, estimates, items, payments |
| CompanyCam | projects, addresses, photos, project status |
| AppFolio | work orders, tenant info, vendor jobs |
| Google Calendar / heartbeat | time-bounded reminders, recurring tasks |
| Google Drive | saved files, receipt images |

**soul (SOUL.md)**: the assistant's personality and communication style.
- How the user wants the assistant to talk (tone, formality, humor)
- Communication preferences ("be more blunt", "stop using emojis")
- Working relationship norms
A fact owned by an integration can change inside that integration without telling Clawbolt. Phone numbers, emails, statuses, amounts, IDs, addresses can all be edited, rotated, or replaced upstream at any time. Memorizing them creates a stale-cache risk: a value that was correct when written can become wrong, even dangerously wrong, by the time the agent reads it next.

The `<heartbeat>` section is read-only context (reminder items and recurring tasks).
**Worked example:** AppFolio rotates tenant contact phone numbers every few days for privacy. A memorized number quoted back next week now belongs to a different tenant, and the contractor calls a stranger. Look these values up live, every time. Never memorize a value the source system can change without telling Clawbolt.

Your response must be a JSON object with these fields:
Memory exists for cross-system knowledge that lives nowhere else.

1. "memory_update": the full updated long-term memory as markdown. Base this only on the content from `<current_memory>` plus new durable facts from `<conversation>`. Remove facts that are clearly outdated or contradicted. If nothing new was learned, return the existing memory unchanged.
## Inputs

2. "summary": a 1-3 sentence summary of the conversation. Start with a timestamp placeholder [TIMESTAMP]. Include enough detail to be useful when searching later (names, topics, decisions). If the conversation is trivial small talk, use an empty string.
You will receive `<current_memory>`, `<user_profile>`, `<soul>`, `<heartbeat>`, and `<conversation>`. Update the persistent files with new durable facts and prune items that no longer belong.

3. "user_profile_update": the full updated user profile as markdown. Base this only on the content from `<user_profile>` plus new profile-level facts from `<conversation>`. Preserve ALL existing content unless explicitly contradicted. If nothing profile-relevant was discussed, use an empty string.
## MEMORY.md: cross-system business knowledge

4. "soul_update": the full updated soul/personality as markdown. Base this only on the content from `<soul>` plus new personality/style instructions from `<conversation>`. If no personality changes were discussed, use an empty string.
**Include:**
- Pricing rules and rate cards keyed by client
- Cross-system relationships ("X is billed through Y, not a direct customer")
- Disambiguation guidance
- Communication conventions and shorthand
- Persistent process rules

Do not duplicate facts across files. A day rate goes in user_profile_update, not memory_update. A client's phone number goes in memory_update, not user_profile_update.
**Do not include:**
- Anything an integration owns: customer IDs, emails, phone numbers, addresses, invoice / estimate contents, project status, work-order details. The agent looks these up live.
- Transient state: tool-call failures, "X is broken" notes, integration outages, deep links, draft IDs, upload confirmations.
- Reminders that have fired or follow-ups that are complete. Open follow-ups belong in heartbeat.

**Explicit user save requests override these exclusion rules.** If the conversation contains a clear directive to save a fact ("remember X", "save this", "make a note that..."), preserve that fact in MEMORY.md, even when it overlaps with what an integration owns. The contractor has chosen to memorialize it; trust that. The base agent is responsible for warning the contractor about staleness risk on mutating values at save time, so by the time the conversation reaches you, an explicit save is intentional.

**Prune on rewrite.** Drop excluded items even if a previous compaction wrote them. Once an estimate is sent in QBO, replace a full transcription of its contents with a one-line breadcrumb (`"<Client> estimate sent, see QBO"`) or remove the entry entirely. Drop bug notes you wrote yesterday. Drop fired reminders. Keep cross-system rules and conventions.

## USER.md: the contractor themselves

- Name, business name, trade, crew composition
- Default rates (day rate, hourly), service area, timezone
- Working-hours and communication preferences
- Which integrations the contractor has connected on the Clawbolt side

Client-specific pricing or billing rules belong in MEMORY.md, not here. Preserve every existing field on rewrite; only change a field the conversation contradicts. Return an empty string when nothing profile-relevant changed.

## SOUL.md: the assistant's personality

- Tone, formality, humor
- "be more blunt", "stop using emojis", working-relationship norms

The `<heartbeat>` section is read-only context. Do not promote already-fired heartbeat items into memory.

## HISTORY.md (the `summary` field)

A breadcrumb log, not a transaction log. The agent uses it to answer "did we work on this recently?" before referring back to integrations.

- One terse 1 to 3 sentence entry per event, prefixed `[TIMESTAMP]`.
- Pointers over numbers: `"Sent <Client> estimate, details in QBO"` beats `"Sent $X,XXX estimate (txnId=NNN) with N line items..."`.
- Drop deep links, draft IDs, and dollar amounts (unless the dollar is genuinely the news).
- Skip trivial small talk. Return an empty string.

## Response format

Return only a JSON object:

1. `memory_update`: full updated MEMORY.md with prune rules applied. Return existing verbatim if no change.
2. `summary`: 1 to 3 sentence breadcrumb starting `[TIMESTAMP]`. Empty string for trivial conversations.
3. `user_profile_update`: full updated USER.md, all fields preserved. Empty string if no change.
4. `soul_update`: full updated SOUL.md. Empty string if no change.

Return only the JSON object, no other text.
Loading
Loading