Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions apps/base/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import re
import uuid
import zipfile
from contextlib import contextmanager
from email.utils import formataddr, parseaddr

Expand Down Expand Up @@ -499,6 +500,23 @@ def is_model_field_changed(model_obj, field_name):
return False


def safe_extract_zip_file(zip_ref, destination):
"""
Extract zip archive members while preventing path traversal (zip slip).
"""
destination_path = os.path.abspath(destination)
for member in zip_ref.namelist():
member_path = os.path.abspath(os.path.join(destination_path, member))
if not (
member_path == destination_path
or member_path.startswith(destination_path + os.sep)
):
raise zipfile.BadZipFile(
"Zip archive contains unsafe file paths."
)
zip_ref.extractall(destination_path)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | 🏗️ Heavy lift

This still leaves a traversal path via YAML-referenced files.

safe_extract_zip_file() only validates archive member paths. In the challenge import flows, the extracted YAML is still used to build paths for files like description, evaluation_details, terms_and_conditions, submission_guidelines, evaluation_script, and test_annotation_file with plain join(..., yaml_value) calls. A config value containing ../ can still escape the extracted tree and turn this into an arbitrary file read even though extraction itself is now bounded. Please add a shared safe_join_under_root()-style helper and use it everywhere a YAML value becomes a filesystem path.

🔐 Suggested direction
+def safe_join_under_root(root, relative_path):
+    candidate = os.path.abspath(os.path.join(root, relative_path))
+    if not (
+        candidate == root
+        or candidate.startswith(root + os.sep)
+    ):
+        raise ValueError("Path escapes extraction root.")
+    return candidate

Use that helper before every isfile() / open() on YAML-derived paths.

As per coding guidelines, apps/**: Django backend. Pay attention to: Safe handling of user-submitted files and Docker image references.

📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
def safe_extract_zip_file(zip_ref, destination):
"""
Extract zip archive members while preventing path traversal (zip slip).
"""
destination_path = os.path.abspath(destination)
for member in zip_ref.namelist():
member_path = os.path.abspath(os.path.join(destination_path, member))
if not (
member_path == destination_path
or member_path.startswith(destination_path + os.sep)
):
raise zipfile.BadZipFile(
"Zip archive contains unsafe file paths."
)
zip_ref.extractall(destination_path)
def safe_extract_zip_file(zip_ref, destination):
"""
Extract zip archive members while preventing path traversal (zip slip).
"""
destination_path = os.path.abspath(destination)
for member in zip_ref.namelist():
member_path = os.path.abspath(os.path.join(destination_path, member))
if not (
member_path == destination_path
or member_path.startswith(destination_path + os.sep)
):
raise zipfile.BadZipFile(
"Zip archive contains unsafe file paths."
)
zip_ref.extractall(destination_path)
def safe_join_under_root(root, relative_path):
candidate = os.path.abspath(os.path.join(root, relative_path))
if not (
candidate == root
or candidate.startswith(root + os.sep)
):
raise ValueError("Path escapes extraction root.")
return candidate
🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@apps/base/utils.py` around lines 503 - 517, The ZIP extraction is protected
but YAML-sourced paths can still traverse out of the extracted tree; add a
helper (e.g., safe_join_under_root(root, *parts)) that resolves and verifies the
final path is inside root (similar check to safe_extract_zip_file), then replace
every place that builds filesystem paths from YAML values (references to
description, evaluation_details, terms_and_conditions, submission_guidelines,
evaluation_script, test_annotation_file) to call
safe_join_under_root(extracted_root, yaml_value) before any
os.path.isfile/open/stat calls; ensure the helper raises on unsafe targets and
is used everywhere YAML-derived values are passed to isfile() or open().



def is_user_a_staff(user):
"""
Function to check if a user is staff or not
Expand Down
4 changes: 3 additions & 1 deletion apps/challenges/challenge_config_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,10 @@ def write_file(output_path, mode, file_content):


def extract_zip_file(file_path, mode, output_path):
from base.utils import safe_extract_zip_file

zip_ref = zipfile.ZipFile(file_path, mode)
zip_ref.extractall(output_path)
safe_extract_zip_file(zip_ref, output_path)
logger.info("Zip file extracted to {}".format(output_path))
zip_ref.close()
return zip_ref
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Expand Down
5 changes: 4 additions & 1 deletion apps/challenges/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
get_user_by_email,
is_user_a_staff,
paginated_queryset,
safe_extract_zip_file,
send_slack_notification,
)
from challenges.challenge_config_utils import (
Expand Down Expand Up @@ -1412,7 +1413,9 @@ def create_challenge_using_zip_file(request, challenge_host_team_pk):
# Extract zip file
try:
zip_ref = zipfile.ZipFile(CHALLENGE_ZIP_DOWNLOAD_LOCATION, "r")
zip_ref.extractall(join(BASE_LOCATION, unique_folder_name))
safe_extract_zip_file(
zip_ref, join(BASE_LOCATION, unique_folder_name)
)
zip_ref.close()
except zipfile.BadZipfile:
message = (
Expand Down
4 changes: 3 additions & 1 deletion scripts/workers/remote_submission_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,10 @@ def download_and_extract_zip_file(url, download_location, extract_location):
with open(download_location, "wb") as f:
f.write(response.content)
# extract zip file
from base.utils import safe_extract_zip_file

zip_ref = zipfile.ZipFile(download_location, "r")
zip_ref.extractall(extract_location)
safe_extract_zip_file(zip_ref, extract_location)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
zip_ref.close()
# delete zip file
try:
Expand Down
4 changes: 3 additions & 1 deletion scripts/workers/submission_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,10 @@ def extract_zip_file(download_location, extract_location):
* `download_location`: Location of zip file
* `extract_location`: Location of directory for extracted file
"""
from base.utils import safe_extract_zip_file

zip_ref = zipfile.ZipFile(download_location, "r")
zip_ref.extractall(extract_location)
safe_extract_zip_file(zip_ref, extract_location)
zip_ref.close()


Expand Down
Loading