Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
3c34532
feat: Implement Sample Paper Generator with format detection and ques…
shubhamxdd May 18, 2026
ea8adb4
feat: Enhance paper creation with resource validation and error handling
shubhamxdd May 18, 2026
334514d
feat: Add selected_resource_ids to chat_sessions and update related f…
shubhamxdd May 18, 2026
811d17c
feat: Improve session selection handling and update navigation logic
shubhamxdd May 18, 2026
8fd6005
feat: Enhance paper format configuration UI with input fields and dyn…
shubhamxdd May 18, 2026
8c0f79b
feat: Implement PDF generation for papers with download functionality
shubhamxdd May 18, 2026
3dad1ef
feat: Add question_pdf_url to paper_outputs and update PDF generation…
shubhamxdd May 18, 2026
dd3fee4
feat: Update generator route to accept optional paperId and sync stat…
shubhamxdd May 18, 2026
6cf4aeb
feat: Enhance job tracking for paper and resource processing with job…
shubhamxdd May 18, 2026
271a024
feat: Implement quota enforcement for free users on paper and resourc…
shubhamxdd May 18, 2026
dc4b94a
feat: Implement monthly quota reset for free users and enhance scroll…
shubhamxdd May 18, 2026
7ed7b85
Finalize Paper Generator: implement persistent context, URL-based rou…
shubhamxdd May 18, 2026
fde1188
error fix
shubhamxdd May 18, 2026
c1eecaf
feat: Update paper creation button logic to disable based on question…
shubhamxdd May 18, 2026
81da731
feat: Add user query and implement monthly paper limit check for free…
shubhamxdd May 18, 2026
c8139f8
refactor: Remove unused imports from Generator component
shubhamxdd May 18, 2026
61f54fe
feat: Enhance paper listing and dashboard with resource and question …
shubhamxdd May 18, 2026
bce477e
feat: Implement PDF export functionality for study guides and questio…
shubhamxdd May 18, 2026
cfc15d2
docs: Update architecture and standards for improved PDF generation a…
shubhamxdd May 18, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""add selected_resource_ids to chat_sessions

Revision ID: 983ad2983d5c
Revises: d3dcf6b6fe3f
Create Date: 2026-05-18 20:55:14.773697

"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision: str = '983ad2983d5c'
down_revision: Union[str, Sequence[str], None] = 'd3dcf6b6fe3f'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('chat_sessions', sa.Column('selected_resource_ids', sa.JSON(), nullable=True))
# ### end Alembic commands ###


def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('chat_sessions', 'selected_resource_ids')
# ### end Alembic commands ###
1 change: 1 addition & 0 deletions backend/app/llm/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def __init__(self):
self.base_url = "https://openrouter.ai/api/v1/chat/completions"

async def stream_chat(self, messages: list, model: str = "openrouter/owl-alpha") -> AsyncGenerator[str, None]:
# async def stream_chat(self, messages: list, model: str = "nvidia/nemotron-3-super-120b-a12b:free") -> AsyncGenerator[str, None]:
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
Expand Down
39 changes: 39 additions & 0 deletions backend/app/llm/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,42 @@
Question:
{question}
"""

DETECT_FORMAT_PROMPT = """
Analyse this past year paper and extract the question format.
Return ONLY a JSON object with this exact structure, no explanation:

{
"mcq": <count>,
"short": <count>,
"long": <count>,
"mcq_marks": <marks each>,
"short_marks": <marks each>,
"long_marks": <marks each>,
"total_marks": <total>,
"duration_minutes": <duration or null>
}
"""

GENERATE_PAPER_PROMPT = """
You are generating a sample exam paper for a student.

Format config: {format_config}
Subject context from student's material:
{context_chunks}

Generate exactly the number of questions specified in the format config.
Return ONLY a JSON array of question objects. Each object must have:
- type (mcq | short | long)
- marks (integer)
- topic (string)
- question_text (string)
- For MCQ: also include options (array of 4 strings) and answer (correct option text)
- For short/long: also include answer (model answer string) and explanation (string)

Rules:
- Distribute questions across different topics evenly.
- No repeated questions.
- Difficulty should match a real exam for this level.
- JSON array only. No preamble, no explanation, no markdown fences.
"""
3 changes: 2 additions & 1 deletion backend/app/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from .routers import auth, resources, solver
from .routers import auth, resources, solver, papers
from .config import settings

app = FastAPI(title="PYQ Solver API")
Expand All @@ -18,6 +18,7 @@
app.include_router(auth.router, prefix="/api")
app.include_router(resources.router, prefix="/api")
app.include_router(solver.router, prefix="/api")
app.include_router(papers.router, prefix="/api")

@app.get("/")
async def root():
Expand Down
3 changes: 2 additions & 1 deletion backend/app/models/chat_session.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from sqlalchemy import Column, String, DateTime, UUID, ForeignKey
from sqlalchemy import Column, String, DateTime, UUID, ForeignKey, JSON
from sqlalchemy.orm import relationship
from datetime import datetime
import uuid
Expand All @@ -10,6 +10,7 @@ class ChatSession(Base):
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
user_id = Column(UUID(as_uuid=True), ForeignKey("users.id", ondelete="CASCADE"), nullable=False)
title = Column(String, default="New Chat")
selected_resource_ids = Column(JSON, default=list)
created_at = Column(DateTime, default=datetime.utcnow)
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)

Expand Down
213 changes: 213 additions & 0 deletions backend/app/routers/papers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from typing import List
import uuid
import json
import logging

from ..database import get_db
from ..models.user import User
from ..models.resource import Resource
from ..models.paper import Paper, paper_resources
from ..models.paper_output import PaperOutput
from ..schemas.paper import (
PaperCreate, PaperOut, PaperUpdate,
PaperOutputOut, PaperOutputToggle,
FormatDetectionRequest
)
from .auth import get_current_user
from ..llm.client import open_router_client
from ..llm.prompts import DETECT_FORMAT_PROMPT
from ..config import settings
from arq import create_pool
from arq.connections import RedisSettings

logger = logging.getLogger(__name__)

router = APIRouter(prefix="/papers", tags=["papers"])

@router.post("/detect-format")
async def detect_format(
data: FormatDetectionRequest,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
# Fetch the resource
result = await db.execute(
select(Resource).where(Resource.id == data.resource_id, Resource.user_id == current_user.id)
)
resource = result.scalar_one_or_none()

if not resource:
raise HTTPException(status_code=404, detail="Resource not found")

if not resource.extracted_text:
raise HTTPException(status_code=400, detail="Resource has no extracted text. Please wait for processing.")

# Call LLM for format detection
messages = [
{"role": "system", "content": "You are a document analyzer."},
{"role": "user", "content": f"{DETECT_FORMAT_PROMPT}\n\nPaper Content:\n{resource.extracted_text[:10000]}"}
]

try:
# Collect stream into full response
full_response = ""
async for chunk in open_router_client.stream_chat(messages):
full_response += chunk

# Parse JSON from response
# LLM might return markdown code blocks, strip them if present
clean_json = full_response.strip()
if clean_json.startswith("```json"):
clean_json = clean_json[7:]
if clean_json.endswith("```"):
clean_json = clean_json[:-3]

format_config = json.loads(clean_json)
return format_config
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to detect format: {str(e)}")
Comment on lines +72 to +73
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Avoid leaking internal exception text in API error responses.

At Line 68, returning str(e) can expose provider/internal details. Return a generic message to clients and log the detailed error server-side.

🧰 Tools
🪛 Ruff (0.15.12)

[warning] 67-67: Do not catch blind exception: Exception

(BLE001)


[warning] 68-68: Within an except clause, raise exceptions with raise ... from err or raise ... from None to distinguish them from errors in exception handling

(B904)


[warning] 68-68: Use explicit conversion flag

Replace with conversion flag

(RUF010)

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@backend/app/routers/papers.py` around lines 67 - 68, Replace the current
except block that does raise HTTPException(status_code=500, detail=f"Failed to
detect format: {str(e)}") with logic that logs the full exception server-side
(e.g., logger.exception(...) or use the app logger) and raises an HTTPException
with a generic client-facing message like "Failed to detect file format" or
"Internal server error" so internal exception text is not returned; update the
except handling in the papers.py handler where the exception variable e is
caught to call logger.exception(e) and then raise HTTPException(status_code=500,
detail="Failed to detect file format").


@router.post("", response_model=PaperOut)
async def create_paper(
data: PaperCreate,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
if not data.resources:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="At least one resource must be selected."
)

# 2. Create Paper record
new_paper = Paper(
user_id=current_user.id,
title=data.title,
format_config=data.format_config or {},
delivery_mode=data.delivery_mode,
status="pending"
)
db.add(new_paper)
await db.flush()

# 3. Link Resources
for res_link in data.resources:
# Verify resource exists and belongs to user
res_result = await db.execute(
select(Resource).where(Resource.id == res_link.resource_id, Resource.user_id == current_user.id)
)
if not res_result.scalar_one_or_none():
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Resource {res_link.resource_id} not found or unauthorized."
)

Comment thread
coderabbitai[bot] marked this conversation as resolved.
# Insert into association table
await db.execute(
paper_resources.insert().values(
paper_id=new_paper.id,
resource_id=res_link.resource_id,
resource_role=res_link.role
)
)
Comment on lines +117 to +136
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Reject resources that aren't ready yet.

This loop only checks ownership. A direct API caller can attach processing or failed resources, and the worker then burns a queue slot before dying with "No context found in selected resources." Require status == "ready" and non-empty extracted text here.

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@backend/app/routers/papers.py` around lines 117 - 136, The loop in
routers/papers.py currently only checks ownership of each res_link; update the
existence query to also require the resource is ready and has non-empty
extracted text before inserting into paper_resources. Specifically, change the
select(...) used for res_result to include Resource.status == "ready" and
Resource.extracted_text is not null/empty (or the equivalent check your Resource
model uses), then if scalar_one_or_none() returns None raise HTTPException with
a message like "Resource {id} not ready or has no extracted text." Use the same
symbols (data.resources, res_link.resource_id, current_user.id, Resource,
res_result, paper_resources.insert(), new_paper.id) to locate and modify the
code.


await db.commit()
await db.refresh(new_paper)

# 4. Enqueue background task
redis = await create_pool(RedisSettings.from_dsn(settings.REDIS_URL))
try:
job = await redis.enqueue_job("generate_paper_task", str(new_paper.id))
if job is None:
raise RuntimeError("Failed to enqueue generate_paper_task")
except Exception as e:
logger.error(f"Redis enqueue error: {e}")
new_paper.status = "failed"
await db.commit()
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail="Paper generation queued failed, please retry."
)
finally:
await redis.close()
Comment on lines +141 to +167
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | 🏗️ Heavy lift

Commit the Job row before passing its ID to generate_paper_task.

Line 154 hands ARQ an uncommitted new_job.id. If the worker starts before the transaction commits, generate_paper_task can observe no row and crash when it later dereferences job. The exception path also commits the flushed queued job, leaving a phantom job for work that never ran.

🧰 Tools
🪛 Ruff (0.15.13)

[warning] 158-158: Do not catch blind exception: Exception

(BLE001)


[warning] 162-165: Within an except clause, raise exceptions with raise ... from err or raise ... from None to distinguish them from errors in exception handling

(B904)

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@backend/app/routers/papers.py` around lines 141 - 167, The Job row is
committed only after enqueue, so the worker may see a missing Job id; move the
DB commit for the newly created Job (the new_job created and currently flushed
via await db.flush()) to occur before calling redis.enqueue_job for
"generate_paper_task" so the worker always observes the row; additionally,
update the exception path around redis.enqueue_job to remove or mark and commit
the new_job consistently (e.g., delete the phantom new_job or set its status to
"failed" and commit) to avoid leaving a queued Job that never ran; reference
new_job, await db.flush(), await db.commit(),
redis.enqueue_job("generate_paper_task") and generate_paper_task when making the
change.


Comment thread
coderabbitai[bot] marked this conversation as resolved.
return new_paper

@router.get("", response_model=List[PaperOut])
async def list_papers(
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
result = await db.execute(
select(Paper).where(Paper.user_id == current_user.id).order_by(Paper.created_at.desc())
)
return result.scalars().all()

@router.get("/{paper_id}", response_model=PaperOut)
async def get_paper(
paper_id: uuid.UUID,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
result = await db.execute(
select(Paper).where(Paper.id == paper_id, Paper.user_id == current_user.id)
)
paper = result.scalar_one_or_none()
if not paper:
raise HTTPException(status_code=404, detail="Paper not found")
return paper

@router.get("/{paper_id}/output", response_model=PaperOutputOut)
async def get_paper_output(
paper_id: uuid.UUID,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
# Verify paper ownership
result = await db.execute(
select(Paper).where(Paper.id == paper_id, Paper.user_id == current_user.id)
)
if not result.scalar_one_or_none():
raise HTTPException(status_code=404, detail="Paper not found")

output_result = await db.execute(
select(PaperOutput).where(PaperOutput.paper_id == paper_id)
)
output = output_result.scalar_one_or_none()
if not output:
raise HTTPException(status_code=404, detail="Paper output not yet generated")

return output

@router.patch("/{paper_id}/output", response_model=PaperOutputOut)
async def toggle_output_settings(
paper_id: uuid.UUID,
data: PaperOutputToggle,
db: AsyncSession = Depends(get_db),
current_user: User = Depends(get_current_user)
):
# Verify paper ownership
result = await db.execute(
select(Paper).where(Paper.id == paper_id, Paper.user_id == current_user.id)
)
if not result.scalar_one_or_none():
raise HTTPException(status_code=404, detail="Paper not found")

output_result = await db.execute(
select(PaperOutput).where(PaperOutput.paper_id == paper_id)
)
output = output_result.scalar_one_or_none()
if not output:
raise HTTPException(status_code=404, detail="Paper output not found")

if data.include_answers is not None:
output.include_answers = data.include_answers
if data.include_explanations is not None:
output.include_explanations = data.include_explanations

await db.commit()
await db.refresh(output)
return output
17 changes: 15 additions & 2 deletions backend/app/routers/solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ async def update_session(

if data.title:
session.title = data.title
if data.selected_resource_ids is not None:
# Convert UUIDs to strings for JSON storage
session.selected_resource_ids = [str(rid) for rid in data.selected_resource_ids]

await db.commit()
await db.refresh(session)
Expand Down Expand Up @@ -165,12 +168,22 @@ async def ask_question(
sess_result = await db.execute(
select(ChatSession).where(ChatSession.id == data.session_id, ChatSession.user_id == current_user.id)
)
if not sess_result.scalar_one_or_none():
session = sess_result.scalar_one_or_none()
if not session:
raise HTTPException(status_code=404, detail="Chat session not found")
session_id = data.session_id

# Update persistent resource selection if changed
new_resource_ids = [str(rid) for rid in data.resource_ids]
if session.selected_resource_ids != new_resource_ids:
session.selected_resource_ids = new_resource_ids
else:
# Auto-create session if none provided
new_sess = ChatSession(user_id=current_user.id, title=data.content[:30] + "...")
new_sess = ChatSession(
user_id=current_user.id,
title=data.content[:30] + "...",
selected_resource_ids=[str(rid) for rid in data.resource_ids]
Comment on lines +176 to +185
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor | ⚡ Quick win

Persist only the validated resource IDs.

data.resource_ids can still contain deleted or unauthorized IDs that were filtered out by the query above. Saving the raw request back into session.selected_resource_ids keeps stale IDs in the session and desyncs the solver UI from the actual context used for the answer.

Proposed fix
-        new_resource_ids = [str(rid) for rid in data.resource_ids]
+        new_resource_ids = [str(res.id) for res in resources]
         if session.selected_resource_ids != new_resource_ids:
             session.selected_resource_ids = new_resource_ids
@@
         new_sess = ChatSession(
             user_id=current_user.id, 
             title=data.content[:30] + "...",
-            selected_resource_ids=[str(rid) for rid in data.resource_ids]
+            selected_resource_ids=[str(res.id) for res in resources]
         )
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@backend/app/routers/solver.py` around lines 176 - 185, Persist the validated
resource IDs instead of the raw request list: replace uses of data.resource_ids
when assigning session.selected_resource_ids and when constructing new
ChatSession (new_sess) so they store the filtered/validated IDs that were
actually used in the query (e.g., the variable holding the authorized/exists IDs
from the earlier filter), converted to strings; update the assignment in the
existing-session branch (session.selected_resource_ids = ...) and the
ChatSession constructor (selected_resource_ids=...) to use that
validated_id_list rather than data.resource_ids to avoid saving deleted or
unauthorized IDs.

)
db.add(new_sess)
await db.flush()
session_id = new_sess.id
Expand Down
2 changes: 2 additions & 0 deletions backend/app/schemas/chat_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@ class ChatSessionCreate(ChatSessionBase):

class ChatSessionUpdate(BaseModel):
title: Optional[str] = None
selected_resource_ids: Optional[List[UUID]] = None

class ChatSessionOut(ChatSessionBase):
id: UUID
user_id: UUID
selected_resource_ids: Optional[List[UUID]] = []
created_at: datetime
updated_at: datetime

Expand Down
Loading