shubhamxdd · shubhamxdd · May 17, 2026 · May 17, 2026 · May 17, 2026 · May 17, 2026
diff --git a/.env.example b/.env.example
@@ -5,11 +5,14 @@ OPENROUTER_API_KEY=your_openrouter_key
 BETTER_AUTH_SECRET=your_auth_secret
 GOOGLE_CLIENT_ID=your_google_id
 GOOGLE_CLIENT_SECRET=your_google_secret
-R2_ACCOUNT_ID=...
-R2_ACCESS_KEY_ID=...
-R2_SECRET_ACCESS_KEY=...
-R2_BUCKET_NAME=...
-R2_PUBLIC_URL=...
+SPACES_ENDPOINT=https://nyc3.digitaloceanspaces.com
+SPACES_KEY=your_access_key
+SPACES_SECRET=your_secret_key
+SPACES_BUCKET=your_bucket_name
+SPACES_REGION=nyc3
+SPACES_PUBLIC_URL=https://your_bucket_name.nyc3.cdn.digitaloceanspaces.com
+MAX_OCR_PAGES=12
+MAX_FILE_SIZE_MB=20
 POSTGRES_USER=user
 POSTGRES_PASSWORD=password
 POSTGRES_DB=pyqdb
diff --git a/backend/app/config.py b/backend/app/config.py
@@ -9,11 +9,19 @@ class Settings(BaseSettings):
     GOOGLE_CLIENT_ID: str
     GOOGLE_CLIENT_SECRET: str
 
-    R2_ACCOUNT_ID: str
-    R2_ACCESS_KEY_ID: str
-    R2_SECRET_ACCESS_KEY: str
-    R2_BUCKET_NAME: str
-    R2_PUBLIC_URL: str
+    # DigitalOcean Spaces
+    SPACES_ENDPOINT: str
+    SPACES_KEY: str
+    SPACES_SECRET: str
+    SPACES_BUCKET: str
+    SPACES_REGION: str = "nyc3"
+    SPACES_PUBLIC_URL: str
+
+    # OCR Settings
+    MAX_OCR_PAGES: int = 12
+
+    # Upload Settings
+    MAX_FILE_SIZE_MB: int = 20
 
     # JWT Settings
     JWT_SECRET_KEY: str

diff --git a/backend/app/main.py b/backend/app/main.py
@@ -1,6 +1,6 @@
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
-from .routers import auth
+from .routers import auth, resources
 from .config import settings
 
 app = FastAPI(title="PYQ Solver API")
@@ -16,6 +16,7 @@
 )
 
 app.include_router(auth.router, prefix="/api")
+app.include_router(resources.router, prefix="/api")
 
 @app.get("/")
 async def root():

diff --git a/backend/app/routers/resources.py b/backend/app/routers/resources.py
@@ -0,0 +1,162 @@
+from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, status
+from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy import select
+from typing import List
+import uuid
+from ..database import get_db
+from ..models.user import User
+from ..models.resource import Resource
+from ..schemas.resource import ResourceOut
+from ..routers.auth import get_current_user
+from ..services.storage import storage_service
+from arq import create_pool
+from ..config import settings
+from arq.connections import RedisSettings
+
+router = APIRouter(prefix="/resources", tags=["resources"])
+
+@router.post("/", response_model=ResourceOut)
+async def upload_resource(
+    type: str = Form(...),
+    file: UploadFile = File(...),
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_current_user)
+):
+    if file.content_type not in ["application/pdf", "text/plain"]:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Only PDF and Text files are supported"
+        )
+
+    # Chunked read with size enforcement (CodeRabbit fix)
+    MAX_BYTES = settings.MAX_FILE_SIZE_MB * 1024 * 1024
+    CHUNK_SIZE = 1024 * 1024 # 1MB chunks
+    content = bytearray()
+    total_size = 0
+
+    while True:
+        chunk = await file.read(CHUNK_SIZE)
+        if not chunk:
+            break
+        total_size += len(chunk)
+        if total_size > MAX_BYTES:
+            raise HTTPException(
+                status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
+                detail=f"File too large. Maximum size allowed is {settings.MAX_FILE_SIZE_MB}MB"
+            )
+        content.extend(chunk)
+
+    # Generate unique filename for storage
+    ext = file.filename.split('.')[-1]
+    object_name = f"user_{current_user.id}/{uuid.uuid4()}.{ext}"
+
+    # Upload to DigitalOcean Spaces
+    file_url = storage_service.upload_file(content, object_name, file.content_type)
+    if not file_url:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Failed to upload file to storage"
+        )
+
+    # Create DB record (not committed yet)
+    new_resource = Resource(
+        user_id=current_user.id,
+        filename=file.filename,
+        file_url=file_url,
+        type=type,
+        status="processing"
+    )
+    db.add(new_resource)
+    await db.flush() # Flush to get the ID but don't commit
+
+    # Enqueue background extraction task before committing DB
+    try:
+        redis = await create_pool(RedisSettings.from_dsn(settings.REDIS_URL))
+        await redis.enqueue_job('extraction_task', str(new_resource.id))
+
+        # Only commit if enqueue was successful
+        await db.commit()
+        await db.refresh(new_resource)
+    except Exception as e:
+        await db.rollback()
+        # Should also ideally delete the file from Spaces here if we were strict
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to queue background task: {str(e)}"
+        )
+
+    return new_resource
+
+@router.get("/", response_model=List[ResourceOut])
+async def list_resources(
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_current_user)
+):
+    result = await db.execute(
+        select(Resource).where(Resource.user_id == current_user.id).order_by(Resource.created_at.desc())
+    )
+    return result.scalars().all()
+
+@router.delete("/{resource_id}")
+async def delete_resource(
+    resource_id: uuid.UUID,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_current_user)
+):
+    result = await db.execute(
+        select(Resource).where(Resource.id == resource_id, Resource.user_id == current_user.id)
+    )
+    resource = result.scalar_one_or_none()
+
+    if not resource:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Resource not found")
+
+    # Delete from Spaces
+    object_name = resource.file_url.replace(f"{settings.SPACES_PUBLIC_URL}/", "")
+    success = storage_service.delete_file(object_name)
+
+    if not success:
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Failed to delete file from cloud storage. Database record preserved."
+        )
+
+    # Delete from DB only after storage is confirmed deleted
+    await db.delete(resource)
+    await db.commit()
+
+    return {"message": "Resource deleted successfully"}
+
+@router.post("/{resource_id}/retry", response_model=ResourceOut)
+async def retry_extraction(
+    resource_id: uuid.UUID,
+    db: AsyncSession = Depends(get_db),
+    current_user: User = Depends(get_current_user)
+):
+    result = await db.execute(
+        select(Resource).where(Resource.id == resource_id, Resource.user_id == current_user.id)
+    )
+    resource = result.scalar_one_or_none()
+
+    if not resource:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Resource not found")
+
+    # Update status to processing (don't commit yet)
+    resource.status = "processing"
+
+    # Re-enqueue background extraction task before committing DB
+    try:
+        redis = await create_pool(RedisSettings.from_dsn(settings.REDIS_URL))
+        await redis.enqueue_job('extraction_task', str(resource.id))
+
+        # Only commit if enqueue was successful
+        await db.commit()
+        await db.refresh(resource)
+    except Exception as e:
+        await db.rollback()
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail=f"Failed to queue background task: {str(e)}"
+        )
+
+    return resource
diff --git a/backend/app/schemas/resource.py b/backend/app/schemas/resource.py
@@ -0,0 +1,20 @@
+from pydantic import BaseModel, ConfigDict
+from uuid import UUID
+from datetime import datetime
+from typing import Optional, List
+
+class ResourceBase(BaseModel):
+    filename: str
+    type: str # notes | syllabus | past_paper | other
+
+class ResourceOut(ResourceBase):
+    id: UUID
+    user_id: UUID
+    file_url: str
+    status: str # pending | processing | ready | failed
+    created_at: datetime
+
+    model_config = ConfigDict(from_attributes=True)
+
+class ResourceList(BaseModel):
+    resources: List[ResourceOut]
diff --git a/backend/app/services/storage.py b/backend/app/services/storage.py
@@ -0,0 +1,42 @@
+import boto3
+from botocore.exceptions import ClientError
+from ..config import settings
+import logging
+
+class StorageService:
+    def __init__(self):
+        self.session = boto3.session.Session()
+        self.client = self.session.client(
+            's3',
+            region_name=settings.SPACES_REGION,
+            endpoint_url=settings.SPACES_ENDPOINT,
+            aws_access_key_id=settings.SPACES_KEY,
+            aws_secret_access_key=settings.SPACES_SECRET
+        )
+
+    def upload_file(self, file_content: bytes, object_name: str, content_type: str = 'application/pdf'):
+        try:
+            self.client.put_object(
+                Bucket=settings.SPACES_BUCKET,
+                Key=object_name,
+                Body=file_content,
+                ACL='public-read',
+                ContentType=content_type
+            )
+            return f"{settings.SPACES_PUBLIC_URL}/{object_name}"
+        except ClientError as e:
+            logging.error(f"Error uploading file to DigitalOcean Spaces: {e}")
+            return None
+
+    def delete_file(self, object_name: str):
+        try:
+            self.client.delete_object(
+                Bucket=settings.SPACES_BUCKET,
+                Key=object_name
+            )
+            return True
+        except ClientError as e:
+            logging.error(f"Error deleting file from DigitalOcean Spaces: {e}")
+            return False
+
+storage_service = StorageService()
diff --git a/backend/app/workers/arq_worker.py b/backend/app/workers/arq_worker.py
@@ -1,6 +1,7 @@
 import asyncio
 from arq.connections import RedisSettings
 from app.config import settings
+from .tasks import extraction_task
 
 async def ping(ctx):
     return "pong"
@@ -12,7 +13,7 @@ async def shutdown(ctx):
     pass
 
 class WorkerSettings:
-    functions = [ping]
+    functions = [ping, extraction_task]
     on_startup = startup
     on_shutdown = shutdown
     redis_settings = RedisSettings.from_dsn(settings.REDIS_URL)