readthedocs · ericholscher · Apr 30, 2026 · Apr 30, 2026
@@ -1,5 +1,9 @@
 """Django administration interface for `projects.models`."""
 
+import re
+from urllib.parse import urlparse
+
+from django import forms
 from django.conf import settings
 from django.contrib import admin
 from django.contrib import messages
@@ -10,6 +14,8 @@
 from django.db.models import Sum
 from django.db.models import Value
 from django.forms import BaseInlineFormSet
+from django.template.response import TemplateResponse
+from django.urls import path
 from django.utils.html import format_html
 from django.utils.translation import gettext_lazy as _
 
@@ -211,6 +217,99 @@ def queryset(self, request, queryset):
         return queryset
 
 
+class SpamRuleChecksFromURLsForm(forms.Form):
+    """Form to paste URLs and queue spam rule checks for the matching projects."""
+
+    urls = forms.CharField(
+        label="URLs",
+        widget=forms.Textarea(attrs={"rows": 20, "cols": 100}),
+        help_text=(
+            "One URL per line. Both documentation URLs "
+            "(https://&lt;slug&gt;.readthedocs.io/...) and dashboard URLs "
+            "(https://app.readthedocs.org/projects/&lt;slug&gt;/...) are accepted. "
+            "Messy inputs from automated reports are tolerated: defanged URLs "
+            "(hxxps://, [.] / (.)), surrounding brackets or quotes, missing "
+            "scheme, and trailing punctuation are normalized automatically."
+        ),
+    )
+
+
+# Surrounding characters often added by mail clients, markdown, defanging
+# tools, or word-wrapping that should be stripped before parsing the URL.
+_URL_STRIP_CHARS = " \t\r\n\"'<>()[]{}.,;!?`"
+
+
+def _normalize_url(value):
+    """
+    Best-effort normalization of a possibly-defanged or messy URL.
+
+    Handles forms commonly seen in abuse reports and emails: ``hxxps://``,
+    ``[.]``/``(.)`` separators, surrounding angle/square brackets, markdown
+    ``[text](url)`` links, trailing punctuation, missing scheme, etc.
+    """
+    if not value:
+        return ""
+
+    value = value.strip()
+
+    # Markdown link: [label](http...)
+    md_link = re.match(r"^\[[^\]]*\]\((.+)\)$", value)
+    if md_link:
+        value = md_link.group(1)
+
+    value = value.strip(_URL_STRIP_CHARS)
+
+    # Undefang common patterns used in security reports.
+    value = re.sub(r"^hxxp(s?)\b", r"http\1", value, flags=re.IGNORECASE)
+    value = value.replace("[.]", ".").replace("(.)", ".").replace("{.}", ".")
+    value = value.replace("[:]", ":").replace("[/]", "/")
+
+    return value
+
+
+def _extract_project_slug_from_url(url):
+    """
+    Extract a project slug from a Read the Docs URL.
+
+    Supports docs subdomain URLs like ``https://<slug>.readthedocs.io/...`` and
+    dashboard URLs like ``https://readthedocs.org/projects/<slug>/...``. Tries
+    to be tolerant of messy inputs (defanged URLs, missing scheme, surrounding
+    brackets/quotes) so admins can paste output from automated reporting tools
+    directly. Returns ``None`` when no slug can be extracted.
+    """
+    if url is None:
+        return None
+
+    cleaned = _normalize_url(url)
+    if not cleaned:
+        return None
+
+    # urlparse needs a scheme to populate ``hostname``. Add one if missing.
+    if "://" not in cleaned:
+        cleaned = "https://" + cleaned.lstrip("/")
+
+    try:
+        parsed = urlparse(cleaned)
+    except ValueError:
+        return None
+
+    hostname = (parsed.hostname or "").lower()
+
+    # Dashboard URLs: <something>/projects/<slug>/...
+    path_parts = [p for p in parsed.path.split("/") if p]
+    if len(path_parts) >= 2 and path_parts[0] == "projects":
+        return path_parts[1] or None
+
+    # Docs subdomain URLs: <slug>.<PUBLIC_DOMAIN>
+    public_domain = (settings.PUBLIC_DOMAIN or "").lower()
+    if hostname and public_domain and hostname.endswith("." + public_domain):
+        subdomain = hostname[: -(len(public_domain) + 1)]
+        # Only the leftmost label is the project slug.
+        return subdomain.split(".")[0] or None
+
+    return None
+
+
 @admin.register(Project)
 class ProjectAdmin(ExtraSimpleHistoryAdmin):
     """Project model admin view."""
@@ -262,6 +361,104 @@ class ProjectAdmin(ExtraSimpleHistoryAdmin):
         "import_tags_from_vcs",
     ]
 
+    def get_urls(self):
+        urls = super().get_urls()
+        custom_urls = [
+            path(
+                "spam-rule-checks-from-urls/",
+                self.admin_site.admin_view(self.spam_rule_checks_from_urls_view),
+                name="projects_project_spam_rule_checks_from_urls",
+            ),
+        ]
+        return custom_urls + urls
+
+    def spam_rule_checks_from_urls_view(self, request):
+        """
+        Run spam rule checks on projects identified by a list of URLs.
+
+        Same effect as the ``run_spam_rule_checks`` admin action, but accepts
+        URLs (one per line) instead of a queryset selection so admins don't
+        have to convert URLs to project slugs by hand.
+        """
+        form = SpamRuleChecksFromURLsForm(request.POST or None)
+        results = None
+        if request.method == "POST" and form.is_valid():
+            raw_urls = [line.strip() for line in form.cleaned_data["urls"].splitlines()]
+            raw_urls = [url for url in raw_urls if url]
+
+            unparseable = []
+            slug_to_url = {}
+            for url in raw_urls:
+                slug = _extract_project_slug_from_url(url)
+                if slug:
+                    slug_to_url.setdefault(slug, url)
+                else:
+                    unparseable.append(url)
+
+            found_projects = list(
+                Project.objects.filter(slug__in=slug_to_url.keys()).values_list(
+                    "slug", flat=True
+                )
+            )
+            missing_slugs = sorted(set(slug_to_url) - set(found_projects))
+
+            if found_projects:
+                if "readthedocsext.spamfighting" in settings.INSTALLED_APPS:
+                    from readthedocsext.spamfighting.tasks import (  # noqa
+                        spam_rules_check,
+                    )
+
+                    spam_rules_check.delay(project_slugs=list(found_projects))
+                    messages.add_message(
+                        request,
+                        messages.INFO,
+                        "Spam check task triggered for {} project(s).".format(
+                            len(found_projects)
+                        ),
+                    )
+                else:
+                    messages.add_message(
+                        request,
+                        messages.ERROR,
+                        "Spam fighting Django application not installed",
+                    )
+
+            if missing_slugs:
+                messages.add_message(
+                    request,
+                    messages.WARNING,
+                    "No project found for slug(s): {}".format(", ".join(missing_slugs)),
+                )
+            if unparseable:
+                messages.add_message(
+                    request,
+                    messages.WARNING,
+                    "Could not extract a project slug from URL(s): {}".format(
+                        ", ".join(unparseable)
+                    ),
+                )
+            if not raw_urls:
+                messages.add_message(request, messages.ERROR, "No URLs provided")
+
+            results = {
+                "matched_slugs": sorted(found_projects),
+                "missing_slugs": missing_slugs,
+                "unparseable_urls": unparseable,
+            }
+
+        context = {
+            **self.admin_site.each_context(request),
+            "title": "Run spam rule checks from URLs",
+            "opts": self.model._meta,
+            "form": form,
+            "results": results,
+        }
+        return TemplateResponse(
+            request,
+            "admin/projects/project/spam_rule_checks_from_urls.html",
+            context,
+        )
+
     def get_queryset(self, request):
         qs = super().get_queryset(request)
 

@@ -7,6 +7,7 @@
 from django.test import TestCase
 
 from readthedocs.core.models import UserProfile
+from readthedocs.projects.admin import _extract_project_slug_from_url
 from readthedocs.projects.models import Project
 
 
@@ -57,6 +58,79 @@
         self.assertFalse(self.project.users.filter(profile__banned=True).exists())
         self.assertEqual(self.project.users.filter(profile__banned=False).count(), 2)
 
+    def test_extract_project_slug_from_dashboard_url(self):
+        assert (
+            _extract_project_slug_from_url(
+                "https://readthedocs.org/projects/pip/builds/12345/"
+            )
+            == "pip"
+        )
+
+    def test_extract_project_slug_from_subdomain_url(self):
+        assert (
+            _extract_project_slug_from_url("https://pip.readthedocs.io/en/latest/")
+            == "pip"
+        )
+
+    def test_extract_project_slug_from_unknown_url_returns_none(self):
+        assert _extract_project_slug_from_url("https://example.com/foo/bar") is None
+
+    def test_extract_project_slug_from_messy_urls(self):
+        cases = {
+            # Defanged with hxxps and [.]
+            "hxxps://pip[.]readthedocs[.]io/en/latest/": "pip",
+            # Defanged with (.)
+            "hxxp://pip(.)readthedocs(.)io/": "pip",
+            # Wrapped in angle brackets (mail clients)
+            "<https://pip.readthedocs.io/>": "pip",
+            # Trailing punctuation
+            "https://pip.readthedocs.io/.": "pip",
+            "https://pip.readthedocs.io/,": "pip",
+            # Surrounding quotes
+            '"https://pip.readthedocs.io/"': "pip",
+            # Markdown link form
+            "[pip docs](https://pip.readthedocs.io/en/latest/)": "pip",
+            # No scheme, just hostname
+            "pip.readthedocs.io": "pip",
+            # No scheme, dashboard path
+            "readthedocs.org/projects/pip/": "pip",
+            # Surrounding whitespace
+            "   https://pip.readthedocs.io/   ": "pip",
+        }
+        for raw, expected in cases.items():
+            assert _extract_project_slug_from_url(raw) == expected, (
+                f"failed for {raw!r}"
+            )
+
+    def test_extract_project_slug_handles_none_and_empty(self):
+        assert _extract_project_slug_from_url(None) is None
+        assert _extract_project_slug_from_url("") is None
+        assert _extract_project_slug_from_url("   ") is None
+
+    def test_spam_rule_checks_from_urls_view_get(self):
+        resp = self.client.get(
+            urls.reverse("admin:projects_project_spam_rule_checks_from_urls"),
+        )
+        assert resp.status_code == 200
+
+    def test_spam_rule_checks_from_urls_view_post(self):
+        urls_text = "\n".join(
+            [
+                f"https://{self.project.slug}.readthedocs.io/en/latest/",
+                "https://no-such-project-slug.readthedocs.io/",
+                "https://example.com/not/a/project",
+            ]
+        )
+        resp = self.client.post(
+            urls.reverse("admin:projects_project_spam_rule_checks_from_urls"),
+            {"urls": urls_text},
+        )
+        assert resp.status_code == 200
+        content = resp.content.decode()
+        assert self.project.slug in content
+        assert "no-such-project-slug" in content
+        assert "example.com" in content
+
     @mock.patch("readthedocs.projects.admin.clean_project_resources")
     def test_project_delete(self, clean_project_resources):
         """Test project and artifacts are removed."""

@@ -0,0 +1,11 @@
+{% extends "admin/change_list.html" %}
+{% load i18n %}
+
+{% block object-tools-items %}
+  <li>
+    <a href="{% url 'admin:projects_project_spam_rule_checks_from_urls' %}">
+      {% translate 'Run spam rule checks from URLs' %}
+    </a>
+  </li>
+  {{ block.super }}
+{% endblock %}
@@ -0,0 +1,56 @@
+{% extends "admin/base_site.html" %}
+{% load i18n admin_urls %}
+
+{% block breadcrumbs %}
+<div class="breadcrumbs">
+  <a href="{% url 'admin:index' %}">{% translate 'Home' %}</a>
+  &rsaquo; <a href="{% url 'admin:app_list' app_label=opts.app_label %}">{{ opts.app_config.verbose_name }}</a>
+  &rsaquo; <a href="{% url 'admin:projects_project_changelist' %}">{{ opts.verbose_name_plural|capfirst }}</a>
+  &rsaquo; {{ title }}
+</div>
+{% endblock %}
+
+{% block content %}
+<h1>{{ title }}</h1>
+
+<p>
+  Paste URLs of projects (one per line) to queue spam rule checks for them.
+  This is the same operation as the
+  <em>Run spam rule checks</em> admin action, but it accepts URLs and resolves
+  them to project slugs for you.
+</p>
+
+{% if results %}
+  <h2>Results</h2>
+  <ul>
+    {% if results.matched_slugs %}
+      <li>Matched project slugs ({{ results.matched_slugs|length }}):
+        <code>{{ results.matched_slugs|join:", " }}</code></li>
+    {% endif %}
+    {% if results.missing_slugs %}
+      <li>Slugs with no matching project ({{ results.missing_slugs|length }}):
+        <code>{{ results.missing_slugs|join:", " }}</code></li>
+    {% endif %}
+    {% if results.unparseable_urls %}
+      <li>URLs that could not be parsed ({{ results.unparseable_urls|length }}):
+        <code>{{ results.unparseable_urls|join:", " }}</code></li>
+    {% endif %}
+  </ul>
+{% endif %}
+
+<form method="post">{% csrf_token %}
+  <fieldset class="module aligned">
+    {% for field in form %}
+      <div class="form-row">
+        {{ field.label_tag }}
+        {{ field }}
+        {% if field.help_text %}<div class="help">{{ field.help_text|safe }}</div>{% endif %}
+        {{ field.errors }}
+      </div>
+    {% endfor %}
+  </fieldset>
+  <div class="submit-row">
+    <input type="submit" value="{% translate 'Run spam rule checks' %}" class="default">
+  </div>
+</form>
+{% endblock %}