Skip to content
Draft
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions readthedocs/builds/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from readthedocs.core.permissions import AdminPermission
from readthedocs.core.utils import cancel_build
from readthedocs.doc_builder.exceptions import BuildAppError
from readthedocs.filetreediff import get_diff_for_build
from readthedocs.projects.models import Project
from readthedocs.projects.views.base import ProjectSpamMixin

Expand Down Expand Up @@ -116,6 +117,7 @@ def get_context_data(self, **kwargs):

build = self.get_object()
context["notifications"] = build.notifications.all()
context["files_changed_diff"] = get_diff_for_build(build)
if not build.notifications.filter(message_id=BuildAppError.GENERIC_WITH_BUILD_ID).exists():
# Do not suggest to open an issue if the error is not generic
return context
Expand Down
86 changes: 86 additions & 0 deletions readthedocs/filetreediff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

MANIFEST_FILE_NAME = "manifest.json"
BASE_MANIFEST_SNAPSHOT_FILE_NAME = "base_manifest_snapshot.json"
PREVIOUS_MANIFEST_SNAPSHOT_FILE_NAME = "previous_manifest_snapshot.json"


def get_diff(current_version: Version, base_version: Version) -> FileTreeDiff | None:
Expand All @@ -47,6 +48,10 @@ def get_diff(current_version: Version, base_version: Version) -> FileTreeDiff |
Set operations are used to calculate the added, deleted, and modified files.
To get the modified files, we compare the main content hash of each common file.
If there are no changes between the versions, all lists will be empty.

When ``current_version`` and ``base_version`` are the same normal version,
the comparison is done against the version's own previous build, using the
manifest snapshot taken before the latest build overwrote the manifest.
"""
current_version_manifest = get_manifest(current_version)
if not current_version_manifest:
Expand All @@ -72,6 +77,11 @@ def get_diff(current_version: Version, base_version: Version) -> FileTreeDiff |
base_version_manifest = None
if current_version.is_external:
base_version_manifest = _get_base_manifest_snapshot(current_version)
elif current_version.pk == base_version.pk:
# Comparing a normal version against itself: use the manifest snapshot
# taken before the latest build, so the diff reflects what the latest
# build changed relative to the version's previous build.
base_version_manifest = _get_previous_manifest_snapshot(current_version)

if not base_version_manifest:
base_version_manifest = get_manifest(base_version)
Expand Down Expand Up @@ -114,6 +124,37 @@ def get_diff(current_version: Version, base_version: Version) -> FileTreeDiff |
)


def get_diff_for_build(build: Build) -> FileTreeDiff | None:
"""
Get the file tree diff for a build, picking the right base to compare against.

Pull request builds are compared against the project's base version (the
latest version by default). Normal version builds are compared against the
version's own previous build.

Returns ``None`` if the build didn't finish successfully, has no version,
or has no diff available.
"""
if not build.success or not build.finished:
return None

version = build.version
if not version:
return None

project = build.project
if version.is_external:
base_version = project.addons.options_base_version or project.get_latest_version()
else:
# Normal versions are compared against their own previous build.
base_version = version

if not base_version:
return None

return get_diff(current_version=version, base_version=base_version)


def get_manifest(version: Version) -> FileTreeDiffManifest | None:
"""
Get the file manifest for a version.
Expand Down Expand Up @@ -202,3 +243,48 @@ def snapshot_base_manifest(external_version: Version, base_version: Version):
# rebase/synchronize webhook events so the snapshot refreshes when the
# PR is rebased against a newer base.
# See https://github.com/readthedocs/readthedocs.org/issues/12232


def _get_previous_manifest_snapshot(version: Version) -> FileTreeDiffManifest | None:
"""Get the manifest snapshot from before the version's latest build, or None."""
snapshot_path = version.get_storage_path(
media_type=MEDIA_TYPE_DIFF,
filename=PREVIOUS_MANIFEST_SNAPSHOT_FILE_NAME,
)
try:
with build_media_storage.open(snapshot_path) as f:
data = json.load(f)
except FileNotFoundError:
return None

return FileTreeDiffManifest.from_dict(data)


def snapshot_previous_manifest(version: Version):
"""
Snapshot a version's current manifest before it is overwritten.

This is called before writing the manifest for a new build of a normal
version, so the file tree diff can compare the new build against the
version's previous build.

Unlike :func:`snapshot_base_manifest`, this overwrites any existing
snapshot, so it always reflects the build right before the latest one.
"""
current_manifest = get_manifest(version)
if not current_manifest:
return

snapshot_path = version.get_storage_path(
media_type=MEDIA_TYPE_DIFF,
filename=PREVIOUS_MANIFEST_SNAPSHOT_FILE_NAME,
)
with build_media_storage.open(snapshot_path, "w") as f:
json.dump(current_manifest.as_dict(), f)

log.info(
"Previous manifest snapshot created.",
project_slug=version.project.slug,
version_slug=version.slug,
build_id=current_manifest.build.id,
)
158 changes: 156 additions & 2 deletions readthedocs/filetreediff/tests/test_filetreediff.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,19 @@
from django.test import TestCase
from django_dynamic_fixture import get

from readthedocs.builds.constants import BUILD_STATE_FINISHED, EXTERNAL, LATEST
from readthedocs.builds.constants import (
BUILD_STATE_FINISHED,
BUILD_STATE_TRIGGERED,
EXTERNAL,
LATEST,
)
from readthedocs.builds.models import Build, Version
from readthedocs.filetreediff import get_diff, snapshot_base_manifest
from readthedocs.filetreediff import (
get_diff,
get_diff_for_build,
snapshot_base_manifest,
snapshot_previous_manifest,
)
from readthedocs.projects.models import Project
from readthedocs.rtd_tests.storage import BuildMediaFileSystemStorageTest

Expand Down Expand Up @@ -240,3 +250,147 @@ def test_snapshot_is_write_once(self, storage_open, storage_exists):
"""snapshot_base_manifest is a no-op if a snapshot already exists."""
snapshot_base_manifest(self.pr_version, self.base_version)
storage_open.assert_not_called()

@mock.patch.object(BuildMediaFileSystemStorageTest, "open")
def test_get_diff_for_build_uses_base_snapshot(self, storage_open):
"""For PR builds, get_diff_for_build compares against the base version."""
pr_files = {"index.html": "pr-hash", "new-page.html": "new-hash"}
snapshot_files = {"index.html": "original-hash"}

# get_diff reads: 1) PR manifest, 2) base snapshot
storage_open.side_effect = [
_mock_manifest(self.pr_build.id, pr_files)(),
_mock_manifest(self.base_build.id, snapshot_files)(),
]
diff = get_diff_for_build(self.pr_build)
assert [f.path for f in diff.added] == ["new-page.html"]
assert [f.path for f in diff.modified] == ["index.html"]
assert diff.deleted == []


@mock.patch(
"readthedocs.filetreediff.build_media_storage",
new=BuildMediaFileSystemStorageTest(),
)
class TestsPreviousManifestSnapshot(TestCase):
"""Tests for comparing a normal version against its own previous build."""

def setUp(self):
self.project = get(Project)
self.version = self.project.versions.get(slug=LATEST)
self.previous_build = get(
Build,
project=self.project,
version=self.version,
state=BUILD_STATE_FINISHED,
success=True,
)
self.latest_build = get(
Build,
project=self.project,
version=self.version,
state=BUILD_STATE_FINISHED,
success=True,
)

@mock.patch.object(BuildMediaFileSystemStorageTest, "open")
def test_diff_uses_previous_manifest_snapshot(self, storage_open):
"""A normal version is compared against its previous build's manifest."""
current_files = {"index.html": "hash1", "new.html": "hash-new"}
previous_files = {"index.html": "hash0"}

# get_diff reads: 1) current manifest, 2) previous manifest snapshot
storage_open.side_effect = [
_mock_manifest(self.latest_build.id, current_files)(),
_mock_manifest(self.previous_build.id, previous_files)(),
]
diff = get_diff(self.version, self.version)
assert [f.path for f in diff.added] == ["new.html"]
assert [f.path for f in diff.modified] == ["index.html"]
assert diff.deleted == []
assert not diff.outdated

@mock.patch.object(BuildMediaFileSystemStorageTest, "open")
def test_diff_falls_back_to_live_manifest_when_no_snapshot(self, storage_open):
"""Without a snapshot, the diff against the same version has no changes."""
files = {"index.html": "hash1"}

# get_diff reads: 1) current manifest, 2) snapshot miss, 3) live manifest
storage_open.side_effect = [
_mock_manifest(self.latest_build.id, files)(),
FileNotFoundError,
_mock_manifest(self.latest_build.id, files)(),
]
diff = get_diff(self.version, self.version)
assert diff.files == []

@mock.patch.object(BuildMediaFileSystemStorageTest, "open")
def test_snapshot_previous_manifest_copies_current_manifest(self, storage_open):
"""snapshot_previous_manifest writes a copy of the current manifest."""
current_files = {"index.html": "hash1", "guide.html": "hash2"}
writes = []

@contextmanager
def write_capture(*args, **kwargs):
write_mock = mock.MagicMock()
write_mock.write.side_effect = writes.append
yield write_mock

storage_open.side_effect = [
_mock_manifest(self.latest_build.id, current_files)(),
write_capture(),
]
snapshot_previous_manifest(self.version)

assert storage_open.call_count == 2
written = json.loads("".join(writes))
assert written["build"]["id"] == self.latest_build.id
assert set(written["files"].keys()) == {"index.html", "guide.html"}

@mock.patch.object(BuildMediaFileSystemStorageTest, "open")
def test_snapshot_previous_manifest_noop_without_manifest(self, storage_open):
"""snapshot_previous_manifest does nothing if there is no manifest yet."""
storage_open.side_effect = FileNotFoundError
snapshot_previous_manifest(self.version)
storage_open.assert_called_once()

@mock.patch.object(BuildMediaFileSystemStorageTest, "open")
def test_get_diff_for_build_uses_previous_manifest_snapshot(self, storage_open):
"""For normal version builds, get_diff_for_build compares previous builds."""
current_files = {"index.html": "hash1", "new.html": "hash-new"}
previous_files = {"index.html": "hash1"}

storage_open.side_effect = [
_mock_manifest(self.latest_build.id, current_files)(),
_mock_manifest(self.previous_build.id, previous_files)(),
]
diff = get_diff_for_build(self.latest_build)
assert [f.path for f in diff.added] == ["new.html"]
assert diff.modified == []
assert diff.deleted == []

@mock.patch.object(BuildMediaFileSystemStorageTest, "open")
def test_get_diff_for_build_skipped_for_failed_build(self, storage_open):
"""A build that didn't succeed has no diff."""
failed_build = get(
Build,
project=self.project,
version=self.version,
state=BUILD_STATE_FINISHED,
success=False,
)
assert get_diff_for_build(failed_build) is None
storage_open.assert_not_called()

@mock.patch.object(BuildMediaFileSystemStorageTest, "open")
def test_get_diff_for_build_skipped_for_unfinished_build(self, storage_open):
"""A build that hasn't finished has no diff."""
running_build = get(
Build,
project=self.project,
version=self.version,
state=BUILD_STATE_TRIGGERED,
success=True,
)
assert get_diff_for_build(running_build) is None
storage_open.assert_not_called()
8 changes: 8 additions & 0 deletions readthedocs/projects/tasks/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from readthedocs.builds.models import Version
from readthedocs.builds.tasks import post_build_overview
from readthedocs.filetreediff import snapshot_base_manifest
from readthedocs.filetreediff import snapshot_previous_manifest
from readthedocs.filetreediff import write_manifest
from readthedocs.filetreediff.dataclasses import FileTreeDiffManifest
from readthedocs.filetreediff.dataclasses import FileTreeDiffManifestFile
Expand Down Expand Up @@ -148,6 +149,13 @@ def collect(self, sync_id: int):
for path, content_hash in self._hashes.items()
],
)

# For normal versions, snapshot the existing manifest before it gets
# overwritten, so the file tree diff can compare this build against the
# version's previous build.
if not self.version.is_external:
snapshot_previous_manifest(self.version)

write_manifest(self.version, manifest)

# For PR previews, snapshot the base version's manifest on the first
Expand Down