Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
f5b545c
feat: implement Phase 4 - PYQ Solver with SSE streaming and Chat UI
shubhamxdd May 17, 2026
d6f1b4e
fix: update model in stream_chat method and add pgadmin volume in doc…
shubhamxdd May 17, 2026
47aff4c
feat: add file viewing button to resources and solver pages
shubhamxdd May 17, 2026
158adf2
fix: resolve syntax error and broken JSX in Solver.tsx
shubhamxdd May 17, 2026
5e42a11
feat: implement 'Stop Processing' feature for resources
shubhamxdd May 17, 2026
f98cd21
feat: improve worker reliability, add confirmation alerts, and file r…
shubhamxdd May 17, 2026
59fd21b
fix: enforce 12-page limit at upload and improve worker abort logic
shubhamxdd May 17, 2026
9caf21e
feat: implement multi-session chat with history and persistent UI
shubhamxdd May 17, 2026
718dcc8
feat: implement collapsible sidebar and chat session renaming
shubhamxdd May 17, 2026
3e75e83
fix: provide explicit name for foreign key in alembic migration
shubhamxdd May 17, 2026
6333577
fix: raise RuntimeError for non-200 responses in LLM client
shubhamxdd May 17, 2026
f5849ed
fix: remove delete-orphan cascade from chat session questions
shubhamxdd May 17, 2026
534c2e0
fix: only mark resource as failed if current status is processing
shubhamxdd May 17, 2026
3f01d52
fix: verify chat session ownership in ask_question endpoint
shubhamxdd May 17, 2026
c1d206e
fix: persist delivery_mode from request in ask_question endpoint
shubhamxdd May 17, 2026
b105df8
fix: address remaining CodeRabbit audit findings for data integrity a…
shubhamxdd May 17, 2026
dd75fe4
feat: parameterize frontend API URL via environment variables
shubhamxdd May 17, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ SPACES_REGION=nyc3
SPACES_PUBLIC_URL=https://your_bucket_name.nyc3.cdn.digitaloceanspaces.com
MAX_OCR_PAGES=12
MAX_FILE_SIZE_MB=20

# Frontend
VITE_API_URL=http://127.0.0.1:8001/api
POSTGRES_USER=user
POSTGRES_PASSWORD=password
POSTGRES_DB=pyqdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""add chat sessions and processing progress

Revision ID: d3dcf6b6fe3f
Revises: 9f2c76932a93
Create Date: 2026-05-17 16:07:40.708352

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = 'd3dcf6b6fe3f'
down_revision: Union[str, Sequence[str], None] = '9f2c76932a93'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('chat_sessions',
sa.Column('id', sa.UUID(), nullable=False),
sa.Column('user_id', sa.UUID(), nullable=False),
sa.Column('title', sa.String(), nullable=True),
sa.Column('created_at', sa.DateTime(), nullable=True),
sa.Column('updated_at', sa.DateTime(), nullable=True),
sa.ForeignKeyConstraint(['user_id'], ['users.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
)
op.add_column('questions', sa.Column('session_id', sa.UUID(), nullable=True))
op.create_foreign_key('fk_questions_session_id_chat_sessions', 'questions', 'chat_sessions', ['session_id'], ['id'], ondelete='CASCADE')
op.add_column('resources', sa.Column('processing_progress', sa.Integer(), nullable=True))
# ### end Alembic commands ###


def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('resources', 'processing_progress')
op.drop_constraint('fk_questions_session_id_chat_sessions', 'questions', type_='foreignkey')
op.drop_column('questions', 'session_id')
op.drop_table('chat_sessions')
# ### end Alembic commands ###
48 changes: 48 additions & 0 deletions backend/app/llm/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import json
import httpx
from typing import AsyncGenerator
from ..config import settings

class OpenRouterClient:
def __init__(self):
self.api_key = settings.OPENROUTER_API_KEY
self.base_url = "https://openrouter.ai/api/v1/chat/completions"

async def stream_chat(self, messages: list, model: str = "openrouter/owl-alpha") -> AsyncGenerator[str, None]:
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
"HTTP-Referer": settings.FRONTEND_URL,
"X-Title": "PYQ Solver",
}

payload = {
"model": model,
"messages": messages,
"stream": True
}

async with httpx.AsyncClient(timeout=120.0) as client:
async with client.stream("POST", self.base_url, headers=headers, json=payload) as response:
if response.status_code != 200:
error_text = await response.aread()
raise RuntimeError(f"OpenRouter Error: {response.status_code} - {error_text.decode()}")

async for line in response.aiter_lines():
if not line or line == "":
continue

if line.startswith("data: "):
data_str = line[6:]
if data_str == "[DONE]":
break

try:
data = json.loads(data_str)
chunk = data['choices'][0]['delta'].get('content', "")
if chunk:
yield chunk
except Exception:
continue
Comment on lines +40 to +46
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

🧩 Analysis chain

🏁 Script executed:

git ls-files | grep -E "(client\.py|solver\.py)" | head -20

Repository: shubhamxdd/pyq-appl

Length of output: 119


🏁 Script executed:

wc -l backend/app/llm/client.py

Repository: shubhamxdd/pyq-appl

Length of output: 92


🏁 Script executed:

cat -n backend/app/llm/client.py | sed -n '30,60p'

Repository: shubhamxdd/pyq-appl

Length of output: 928


🏁 Script executed:

cat -n backend/app/llm/client.py | head -80

Repository: shubhamxdd/pyq-appl

Length of output: 2237


🏁 Script executed:

rg "class.*Client" backend/app/llm/client.py -A 5

Repository: shubhamxdd/pyq-appl

Length of output: 350


🏁 Script executed:

rg "stream_chat" backend/ -B 3 -A 15

Repository: shubhamxdd/pyq-appl

Length of output: 2865


🏁 Script executed:

rg "stream_chat" backend/ -B 2 -A 10

Repository: shubhamxdd/pyq-appl

Length of output: 1942


🏁 Script executed:

fd "test" backend/ -type f | head -20

Repository: shubhamxdd/pyq-appl

Length of output: 234


🏁 Script executed:

find backend -name "*test*" -type f 2>/dev/null

Repository: shubhamxdd/pyq-appl

Length of output: 45


🏁 Script executed:

cat -n backend/app/routers/solver.py | sed -n '60,120p'

Repository: shubhamxdd/pyq-appl

Length of output: 2465


🏁 Script executed:

rg "def event_generator" backend/app/routers/solver.py -A 40

Repository: shubhamxdd/pyq-appl

Length of output: 1304


Narrow exception handling to distinguish protocol errors from schema mismatches.

The bare except Exception: continue silently drops malformed frames, allowing truncated responses to be marked status="done" upstream. If a frame fails to parse, it's lost silently rather than surfacing the error. Distinguish JSON parse failures (which should be skipped) from unexpected schema structure (which should fail the answer):

-                        try:
-                            data = json.loads(data_str)
-                            chunk = data['choices'][0]['delta'].get('content', "")
-                            if chunk:
-                                yield chunk
-                        except Exception:
-                            continue
+                        try:
+                            data = json.loads(data_str)
+                        except json.JSONDecodeError:
+                            continue
+
+                        if "error" in data:
+                            raise RuntimeError(f"OpenRouter stream error: {data['error']}")
+
+                        try:
+                            chunk = data["choices"][0]["delta"].get("content", "")
+                        except (KeyError, IndexError, TypeError) as exc:
+                            raise RuntimeError(f"Unexpected stream chunk: {data}") from exc
+
+                        if chunk:
+                            yield chunk
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
try:
data = json.loads(data_str)
chunk = data['choices'][0]['delta'].get('content', "")
if chunk:
yield chunk
except Exception:
continue
try:
data = json.loads(data_str)
except json.JSONDecodeError:
continue
if "error" in data:
raise RuntimeError(f"OpenRouter stream error: {data['error']}")
try:
chunk = data["choices"][0]["delta"].get("content", "")
except (KeyError, IndexError, TypeError) as exc:
raise RuntimeError(f"Unexpected stream chunk: {data}") from exc
if chunk:
yield chunk
🧰 Tools
🪛 Ruff (0.15.12)

[error] 45-46: try-except-continue detected, consider logging the exception

(S112)


[warning] 45-45: Do not catch blind exception: Exception

(BLE001)

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@backend/app/llm/client.py` around lines 40 - 46, The current try/except
around json.loads(data_str) and extracting chunk silently swallows all
exceptions; change it to only ignore JSON parse errors (catch
json.JSONDecodeError and continue) but let schema-related problems bubble up (do
not catch KeyError, TypeError, IndexError) so malformed frames that violate
expected structure (accessing data['choices'][0]['delta']['content']) fail the
answer flow; keep the existing behavior of yielding non-empty chunk values and
consider logging the schema error before re-raising to aid debugging.


open_router_client = OpenRouterClient()
22 changes: 22 additions & 0 deletions backend/app/llm/prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
SOLVER_SYSTEM = """
You are an expert academic tutor helping a student understand a question from their study materials.

You are given relevant excerpts from the student's own documents as context.
Answer the question using ONLY the provided context.

Rules:
- Be clear, structured, and student-friendly.
- Use markdown for formatting: headings (###), bold, and bullet points.
- If the context does not contain enough information to answer the question, say so honestly. Do not make up information.
- Provide a concise answer first, followed by a more detailed explanation if helpful.
- Cite the source filename when referencing specific points.
- DO NOT add information that is not present in the provided context.
"""

SOLVER_USER_TEMPLATE = """
Context from student materials:
{context}

Question:
{question}
"""
3 changes: 2 additions & 1 deletion backend/app/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from .routers import auth, resources
from .routers import auth, resources, solver
from .config import settings

app = FastAPI(title="PYQ Solver API")
Expand All @@ -17,6 +17,7 @@

app.include_router(auth.router, prefix="/api")
app.include_router(resources.router, prefix="/api")
app.include_router(solver.router, prefix="/api")

@app.get("/")
async def root():
Expand Down
2 changes: 2 additions & 0 deletions backend/app/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .base import Base
from .user import User
from .chat_session import ChatSession
from .resource import Resource
from .question import Question, question_resources
from .answer import Answer
Expand All @@ -10,6 +11,7 @@
__all__ = [
"Base",
"User",
"ChatSession",
"Resource",
"Question",
"question_resources",
Expand Down
17 changes: 17 additions & 0 deletions backend/app/models/chat_session.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from sqlalchemy import Column, String, DateTime, UUID, ForeignKey
from sqlalchemy.orm import relationship
from datetime import datetime
import uuid
from .base import Base

class ChatSession(Base):
__tablename__ = "chat_sessions"

id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
user_id = Column(UUID(as_uuid=True), ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
title = Column(String, default="New Chat")
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)

user = relationship("User", back_populates="chat_sessions")
questions = relationship("Question", back_populates="session", cascade="all")
2 changes: 2 additions & 0 deletions backend/app/models/question.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ class Question(Base):

id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
user_id = Column(UUID(as_uuid=True), ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
session_id = Column(UUID(as_uuid=True), ForeignKey("chat_sessions.id", ondelete="CASCADE"), nullable=True)
content = Column(Text, nullable=False)
delivery_mode = Column(String, nullable=False) # stream | background
created_at = Column(DateTime, default=datetime.utcnow)

user = relationship("User", back_populates="questions")
session = relationship("ChatSession", back_populates="questions")
resources = relationship("Resource", secondary=question_resources)
answer = relationship("Answer", back_populates="question", uselist=False, cascade="all, delete-orphan")
3 changes: 2 additions & 1 deletion backend/app/models/resource.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from sqlalchemy import Column, String, DateTime, UUID, ForeignKey, Text
from sqlalchemy import Column, String, DateTime, UUID, ForeignKey, Text, Integer
from sqlalchemy.orm import relationship
from datetime import datetime
import uuid
Expand All @@ -13,6 +13,7 @@ class Resource(Base):
file_url = Column(String, nullable=False)
type = Column(String, nullable=False) # notes | syllabus | past_paper | other
status = Column(String, default="pending") # pending | processing | ready | failed
processing_progress = Column(Integer, default=0)
extracted_text = Column(Text, nullable=True)
created_at = Column(DateTime, default=datetime.utcnow)

Expand Down
1 change: 1 addition & 0 deletions backend/app/models/user.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ class User(Base):
questions = relationship("Question", back_populates="user", cascade="all, delete-orphan")
papers = relationship("Paper", back_populates="user", cascade="all, delete-orphan")
jobs = relationship("Job", back_populates="user", cascade="all, delete-orphan")
chat_sessions = relationship("ChatSession", back_populates="user", cascade="all, delete-orphan")
66 changes: 65 additions & 1 deletion backend/app/routers/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
from sqlalchemy import select
from typing import List
import uuid
import pypdfium2 as pdfium
from ..database import get_db
from ..models.user import User
from ..models.resource import Resource
from ..schemas.resource import ResourceOut
from ..schemas.resource import ResourceOut, ResourceUpdate
from ..routers.auth import get_current_user
from ..services.storage import storage_service
from arq import create_pool
Expand Down Expand Up @@ -46,6 +47,24 @@ async def upload_resource(
)
content.extend(chunk)

# Page count check for PDFs (User Request)
if file.content_type == "application/pdf":
try:
pdf = pdfium.PdfDocument(bytes(content))
num_pages = len(pdf)
if num_pages > settings.MAX_OCR_PAGES:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"File has {num_pages} pages. Maximum {settings.MAX_OCR_PAGES} pages allowed for processing."
)
except HTTPException:
raise
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid or corrupted PDF file: {str(e)}"
)

# Generate unique filename for storage
ext = file.filename.split('.')[-1]
object_name = f"user_{current_user.id}/{uuid.uuid4()}.{ext}"
Expand Down Expand Up @@ -127,6 +146,28 @@ async def delete_resource(

return {"message": "Resource deleted successfully"}

@router.patch("/{resource_id}", response_model=ResourceOut)
async def update_resource(
resource_id: uuid.UUID,
data: ResourceUpdate,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
result = await db.execute(
select(Resource).where(Resource.id == resource_id, Resource.user_id == current_user.id)
)
resource = result.scalar_one_or_none()

if not resource:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Resource not found")

if data.filename:
resource.filename = data.filename

await db.commit()
await db.refresh(resource)
return resource

@router.post("/{resource_id}/retry", response_model=ResourceOut)
async def retry_extraction(
resource_id: uuid.UUID,
Expand Down Expand Up @@ -160,3 +201,26 @@ async def retry_extraction(
)

return resource

@router.post("/{resource_id}/stop", response_model=ResourceOut)
async def stop_processing(
resource_id: uuid.UUID,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
result = await db.execute(
select(Resource).where(Resource.id == resource_id, Resource.user_id == current_user.id)
)
resource = result.scalar_one_or_none()

if not resource:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Resource not found")

# Mark as failed ONLY if it is still processing.
# If it is already 'ready', we don't want to overwrite it.
if resource.status == "processing":
resource.status = "failed"
await db.commit()
await db.refresh(resource)

return resource
Loading