diff --git a/ynr/apps/moderation_queue/forms.py b/ynr/apps/moderation_queue/forms.py index 4ca9e31b3..7f75d1fe8 100644 --- a/ynr/apps/moderation_queue/forms.py +++ b/ynr/apps/moderation_queue/forms.py @@ -11,9 +11,6 @@ from django.core.exceptions import ValidationError from django.template.loader import render_to_string from django.urls import reverse -from moderation_queue.helpers import ( - convert_image_to_png, -) from people.forms.forms import StrippedCharField from PIL import Image as PILImage from utils.mail import send_mail @@ -56,18 +53,13 @@ def clean(self): def save(self, commit): """ - Before saving, resize and rotate the image as needed - and convert the image to a PNG. This is done while the - image is still an InMemoryUpload object. + On save, start the process of normalizing the image """ - original_image = self.instance.image - png_image = convert_image_to_png(original_image) - filename = self.instance.image.name - extension = filename.split(".")[-1] - filename = filename.replace(extension, "png") - self.instance.image.save(filename, png_image, save=commit) - return super().save(commit=commit) + saved: QueuedImage = super().save(commit=commit) + if commit: + saved.start_image_processing() + return saved class UploadPersonPhotoURLForm(forms.Form): diff --git a/ynr/apps/moderation_queue/helpers.py b/ynr/apps/moderation_queue/helpers.py index 469fbdbeb..0a2fbc3a6 100644 --- a/ynr/apps/moderation_queue/helpers.py +++ b/ynr/apps/moderation_queue/helpers.py @@ -1,95 +1,128 @@ from io import BytesIO from tempfile import NamedTemporaryFile +from typing import Optional, Tuple import requests -from candidates.models.db import ActionType, LoggedAction -from candidates.views.version_data import get_client_ip -from django.http import HttpResponseRedirect -from django.shortcuts import render -from django.urls import reverse from PIL import Image as PillowImage +from PIL import ImageOps -from .models import QueuedImage - - -def upload_photo_response(request, person, image_form, url_form): - return render( - request, - "moderation_queue/photo-upload-new.html", - { - "image_form": image_form, - "url_form": url_form, - "queued_images": QueuedImage.objects.filter( - person=person, decision="undecided" - ).order_by("created"), - "person": person, - }, - ) +# 15MB — Rekognition's S3 object size limit +MAX_IMAGE_BYTES = 15 * 1024 * 1024 -def image_form_valid_response(request, person, image_form): - # Make sure that we save the user that made the upload - queued_image = image_form.save(commit=False) - queued_image.user = request.user - queued_image.save() - # Record that action: - LoggedAction.objects.create( - user=request.user, - action_type=ActionType.PHOTO_UPLOAD, - ip_address=get_client_ip(request), - popit_person_new_version="", - person=person, - source=image_form.cleaned_data["justification_for_use"], - ) - return HttpResponseRedirect( - reverse("photo-upload-success", kwargs={"person_id": person.id}) - ) +def strip_alpha(photo): + if photo.mode in ("RGBA", "LA") or ( + photo.mode == "P" and "transparency" in photo.info + ): + background = PillowImage.new("RGB", photo.size, (255, 255, 255)) + alpha_img = photo.convert("RGBA") + background.paste(alpha_img, mask=alpha_img.getchannel("A")) + alpha_img.close() + return background + return photo.convert("RGB") -# 15MB — Rekognition's S3 object size limit -MAX_IMAGE_BYTES = 15 * 1024 * 1024 +def save_png_to_bytes(photo): + buf = BytesIO() + photo.save(buf, "PNG", optimize=True) + size = buf.tell() + buf.seek(0) + return buf, size -def convert_image_to_png(photo): - # Some uploaded images are CYMK, which gives you an error when - # you try to write them as PNG, so convert to RGBA (this is - # RGBA rather than RGB so that any alpha channel (transparency) - # is preserved). +def strip_exifdata(photo): + photo.info.pop("exif", None) + photo.info.pop("icc_profile", None) + photo.info.pop("xmp", None) + photo.info.pop("XML:com.adobe.xmp", None) + return photo + + +def check_png_size(photo) -> Tuple[Optional[BytesIO], int]: + """ + Encode `photo` as PNG. + + If the size is below MAX_IMAGE_BYTES then return the BytesIO buffer, + otherwise delete the in-memory object. + + """ + buf = BytesIO() + photo.save(buf, "PNG") + size = buf.tell() + + if size <= MAX_IMAGE_BYTES: + buf.seek(0) + return buf, size + + buf.close() + return None, size + + +def convert_image_to_png(photo): # If the photo is not already a PillowImage object # coming from the form, then we need to # open it as a PillowImage object before # converting it to RGBA. if not isinstance(photo, PillowImage.Image): - photo = PillowImage.open(photo).convert("RGBA") - else: - photo = photo.convert("RGBA") - converted = photo.copy().convert("RGB") - w, h = converted.size + photo = PillowImage.open(photo) + + photo = ImageOps.exif_transpose(photo) + photo = strip_alpha(photo) + photo = strip_exifdata(photo) + + w, h = photo.size # Render at full size first; return immediately if already within the limit. - bytes_obj = BytesIO() - converted.save(bytes_obj, "PNG") - if bytes_obj.tell() <= MAX_IMAGE_BYTES: - return bytes_obj + # Try full-size first. + png_image, size = check_png_size(photo) + if png_image: + return png_image # Binary search over scale factors (0–1) to find the largest image that # still encodes to <= MAX_IMAGE_BYTES. lo, hi = 0.0, 1.0 - best = bytes_obj # fallback; always replaced within a couple of iterations - for _ in range(20): + best = None + + for _ in range(12): mid = (lo + hi) / 2 - resized = converted.resize( - (max(1, int(w * mid)), max(1, int(h * mid))), PillowImage.LANCZOS + + resized = photo.resize( + (max(1, int(w * mid)), max(1, int(h * mid))), + PillowImage.Resampling.LANCZOS, ) - buf = BytesIO() - resized.save(buf, "PNG") - if buf.tell() <= MAX_IMAGE_BYTES: - lo = mid # this scale fits — search higher - best = buf + + try: + png_image, size = check_png_size(resized) + finally: + resized.close() + + if png_image is not None: + lo = mid # it fits, try larger + + if best is not None: + best.close() + + best = png_image else: - hi = mid # too large — search lower - return best + hi = mid # too large, try smaller + + if best is not None: + best.seek(0) + return best + + # Worst case: the above has failed to find anything so we just + # resize the image to _something_. This is likely too lossy, but + # it's a failsafe to get some sort of image. + small = photo.thumbnail( + (800, 800), + PillowImage.Resampling.LANCZOS, + ) + try: + buf, _ = check_png_size(small) + return buf + finally: + small.close() class ImageDownloadException(Exception): diff --git a/ynr/apps/moderation_queue/management/commands/moderation_queue_process_queued_images.py b/ynr/apps/moderation_queue/management/commands/moderation_queue_process_queued_images.py deleted file mode 100644 index 4d7b55e4b..000000000 --- a/ynr/apps/moderation_queue/management/commands/moderation_queue_process_queued_images.py +++ /dev/null @@ -1,106 +0,0 @@ -import json - -import boto3 -import sorl -from django.core.management.base import BaseCommand -from moderation_queue.helpers import convert_image_to_png -from moderation_queue.models import QueuedImage -from PIL import Image, ImageOps - -try: - from storages.backends.s3 import S3Storage -except ImportError: - S3Storage = None - -# These magic values are because the AWS API crops faces quite tightly by -# default, meaning we literally just get the face. These values are about -# right or, they are more right than the default crop. -MIN_SCALING_FACTOR = 0.7 -MAX_SCALING_FACTOR = 1.3 - - -class Command(BaseCommand): - def handle(self, **options): - rekognition = boto3.client("rekognition", region_name="eu-west-1") - attributes = ["ALL"] - - qs = QueuedImage.objects.filter(decision="undecided").exclude( - face_detection_tried=True - ) - - for qi in qs: - try: - pil_img = Image.open(qi.image.file) - pil_img = ImageOps.exif_transpose(pil_img) - except Exception as e: - msg = "Skipping QueuedImage{id}: {error}" - self.stdout.write(msg.format(id=qi.id, error=e)) - continue - - png_buffer = convert_image_to_png(pil_img) - qi.image.save(qi.image.name, png_buffer) - sorl.thumbnail.delete(qi.image.name, delete_file=False) - - try: - storage = qi.image.storage - if S3Storage and isinstance(storage, S3Storage): - rekognition_image = { - "S3Object": { - "Bucket": storage.bucket_name, - "Name": storage._normalize_name(qi.image.name), - } - } - else: - rekognition_image = {"Bytes": png_buffer.getvalue()} - detected = rekognition.detect_faces( - Image=rekognition_image, Attributes=attributes - ) - self.set_x_y_from_response(qi, detected, options["verbosity"]) - except Exception as e: - msg = "Skipping QueuedImage{id}: {error}" - self.stderr.write(msg.format(id=qi.id, error=e)) - - qi.face_detection_tried = True - qi.rotation_tried = True - qi.save() - - def get_bound(self, bound, im_size, scaling_factor): - """ - In some situations the bound can be <0, and this breaks the DB - constraint. Use this methd to return at least 0 - - """ - bound = bound * im_size * scaling_factor - return max(0, bound) - - def set_x_y_from_response(self, qi, detected, verbosity=0): - if detected and detected["FaceDetails"]: - im_width = qi.image.width - im_height = qi.image.height - bounding_box = detected["FaceDetails"][0]["BoundingBox"] - qi.crop_min_x = self.get_bound( - bound=bounding_box["Left"], - im_size=im_width, - scaling_factor=MIN_SCALING_FACTOR, - ) - qi.crop_min_y = self.get_bound( - bound=bounding_box["Top"], - im_size=im_height, - scaling_factor=MIN_SCALING_FACTOR, - ) - qi.crop_max_x = self.get_bound( - bound=bounding_box["Width"], - im_size=im_width, - scaling_factor=MAX_SCALING_FACTOR, - ) - qi.crop_max_y = self.get_bound( - bound=bounding_box["Height"], - im_size=im_height, - scaling_factor=MAX_SCALING_FACTOR, - ) - qi.detection_metadata = json.dumps(detected, indent=4) - - if int(verbosity) > 1: - self.stdout.write("Set bounds of {}".format(qi)) - else: - self.stdout.write("Couldn't find a face in {}".format(qi)) diff --git a/ynr/apps/moderation_queue/models.py b/ynr/apps/moderation_queue/models.py index 76d71aa48..26d749383 100644 --- a/ynr/apps/moderation_queue/models.py +++ b/ynr/apps/moderation_queue/models.py @@ -1,13 +1,20 @@ import ast +import json import uuid from datetime import date from os.path import join, splitext from tempfile import NamedTemporaryFile +import boto3 +import sorl.thumbnail from django.contrib.auth.models import User from django.db import models from django.urls import reverse +from django_q.tasks import async_chain from PIL import Image as PillowImage +from PIL import ImageOps + +from .helpers import convert_image_to_png PHOTO_REVIEWERS_GROUP_NAME = "Photo Reviewers" VERY_TRUSTED_USER_GROUP_NAME = "Very Trusted User" @@ -126,6 +133,83 @@ def uploaded_by(self): return self.user.username return "a robot 🤖" + def start_image_processing(self): + async_chain( + [ + ( + "moderation_queue.tasks.normalise_queued_image", + (self.id,), + ), + ( + "moderation_queue.tasks.detect_faces_for_queued_image", + (self.id,), + ), + ] + ) + + def normalise_image(self): + pil_img = PillowImage.open(self.image.file) + pil_img = ImageOps.exif_transpose(pil_img) + png_buffer = convert_image_to_png(pil_img) + filename = self.image.name + extension = filename.split(".")[-1] + filename = filename.replace(extension, "png") + self.image.save(filename, png_buffer, save=True) + sorl.thumbnail.delete(self.image.name, delete_file=False) + + def _face_crop_bound(self, bound, im_size, scaling_factor): + return max(0, bound * im_size * scaling_factor) + + def _apply_face_detection(self, detected): + if not (detected and detected.get("FaceDetails")): + return + # AWS crops faces tightly by default. these scaling factors give a + # slightly wider crop that includes more context around the face. + MIN_SCALING_FACTOR = 0.7 + MAX_SCALING_FACTOR = 1.3 + bb = detected["FaceDetails"][0]["BoundingBox"] + self.crop_min_x = self._face_crop_bound( + bb["Left"], self.image.width, MIN_SCALING_FACTOR + ) + self.crop_min_y = self._face_crop_bound( + bb["Top"], self.image.height, MIN_SCALING_FACTOR + ) + self.crop_max_x = self._face_crop_bound( + bb["Width"], self.image.width, MAX_SCALING_FACTOR + ) + self.crop_max_y = self._face_crop_bound( + bb["Height"], self.image.height, MAX_SCALING_FACTOR + ) + self.detection_metadata = json.dumps(detected, indent=4) + + def detect_faces(self): + try: + from storages.backends.s3 import S3Storage + except ImportError: + S3Storage = None + + try: + rekognition = boto3.client("rekognition", region_name="eu-west-1") + storage = self.image.storage + if S3Storage and isinstance(storage, S3Storage): + rekognition_image = { + "S3Object": { + "Bucket": storage.bucket_name, + "Name": storage._normalize_name(self.image.name), + } + } + else: + with self.image.open("rb") as f: + rekognition_image = {"Bytes": f.read()} + detected = rekognition.detect_faces( + Image=rekognition_image, Attributes=["ALL"] + ) + self._apply_face_detection(detected) + finally: + self.face_detection_tried = True + self.rotation_tried = True + self.save() + def crop_image(self): """ Returns a temporary file containing the cropped image diff --git a/ynr/apps/moderation_queue/tasks.py b/ynr/apps/moderation_queue/tasks.py new file mode 100644 index 000000000..e52bdb9c2 --- /dev/null +++ b/ynr/apps/moderation_queue/tasks.py @@ -0,0 +1,11 @@ +from moderation_queue.models import QueuedImage + + +def normalise_queued_image(queued_image_id): + qi = QueuedImage.objects.get(pk=queued_image_id) + qi.normalise_image() + + +def detect_faces_for_queued_image(queued_image_id): + qi = QueuedImage.objects.get(pk=queued_image_id) + qi.detect_faces() diff --git a/ynr/apps/moderation_queue/tests/test_upload_photo.py b/ynr/apps/moderation_queue/tests/test_upload_photo.py index e60303660..03c7481ef 100644 --- a/ynr/apps/moderation_queue/tests/test_upload_photo.py +++ b/ynr/apps/moderation_queue/tests/test_upload_photo.py @@ -44,6 +44,10 @@ def setUpClass(cls): desired_storage_path = join("queued-images", "pilot.jpg") with open(cls.example_image_filename, "rb") as f: cls.storage_filename = storage.save(desired_storage_path, f) + with open(cls.rotated_image_filename, "rb") as f: + cls.rotated_storage_filename = storage.save( + join("queued-images", "rotated.jpg"), f + ) mkdir_p(TEST_MEDIA_ROOT) @classmethod @@ -140,7 +144,8 @@ def test_no_queued_images_form_visibility(self): "already has images in the queue waiting for review.You can review them here", ) - def test_photo_upload_through_image_field(self): + @patch("moderation_queue.models.QueuedImage.detect_faces") + def test_photo_upload_through_image_field(self, _mock_detect_faces): queued_images = QueuedImage.objects.all() initial_count = queued_images.count() upload_form_url = reverse("photo-upload", kwargs={"person_id": "2009"}) @@ -181,7 +186,8 @@ def test_shows_photo_policy_text_in_photo_upload_page(self): response = self.app.get(upload_form_url, user=self.test_upload_user) self.assertContains(response, "Photo policy") - def test_resize_image(self, *all_mock_requests): + @patch("moderation_queue.models.QueuedImage.detect_faces") + def test_resize_image(self, _mock_detect_faces, *all_mock_requests): # Test that the image is less than or equal to 5MB after # upload and before saving to the database. image_size = os.path.getsize(self.xl_image_filename) @@ -198,7 +204,81 @@ def test_resize_image(self, *all_mock_requests): image_size = queued_image.image.size self.assertLessEqual(image_size, 5000000) + def test_normalise_image_converts_to_png(self): + qi = QueuedImage.objects.create( + image=self.rotated_storage_filename, + person_id=2009, + why_allowed="public-domain", + ) + qi.normalise_image() + self.assertEqual(PillowImage.open(qi.image).format, "PNG") + + @patch("boto3.client") + def test_detect_faces_sets_face_detection_tried(self, mock_boto3): + mock_boto3.return_value.detect_faces.return_value = {"FaceDetails": []} + qi = QueuedImage.objects.create( + image=self.rotated_storage_filename, + person_id=2009, + why_allowed="public-domain", + ) + qi.detect_faces() + qi.refresh_from_db() + self.assertTrue(qi.face_detection_tried) + + @patch("boto3.client") + def test_detect_faces_sets_crop_bounds_when_face_found(self, mock_boto3): + mock_boto3.return_value.detect_faces.return_value = { + "FaceDetails": [ + { + "BoundingBox": { + "Left": 0.1, + "Top": 0.2, + "Width": 0.5, + "Height": 0.6, + } + } + ] + } + qi = QueuedImage.objects.create( + image=self.rotated_storage_filename, + person_id=2009, + why_allowed="public-domain", + ) + qi.detect_faces() + qi.refresh_from_db() + self.assertEqual(qi.crop_min_x, 21) + self.assertEqual(qi.crop_min_y, 42) + self.assertEqual(qi.crop_max_x, 195) + self.assertEqual(qi.crop_max_y, 234) + self.assertIn("FaceDetails", qi.detection_metadata) + + @patch("boto3.client") + def test_detect_faces_no_crop_bounds_when_no_face_found(self, mock_boto3): + mock_boto3.return_value.detect_faces.return_value = {"FaceDetails": []} + qi = QueuedImage.objects.create( + image=self.rotated_storage_filename, + person_id=2009, + why_allowed="public-domain", + ) + qi.detect_faces() + qi.refresh_from_db() + self.assertIsNone(qi.crop_min_x) + + @patch("boto3.client") + def test_detect_faces_sets_flag_on_exception(self, mock_boto3): + mock_boto3.side_effect = Exception("AWS unavailable") + qi = QueuedImage.objects.create( + image=self.rotated_storage_filename, + person_id=2009, + why_allowed="public-domain", + ) + with self.assertRaises(Exception): + qi.detect_faces() + qi.refresh_from_db() + self.assertTrue(qi.face_detection_tried) + +@patch("moderation_queue.models.QueuedImage.detect_faces") @patch("moderation_queue.forms.requests") @patch("moderation_queue.helpers.requests") @override_settings(MEDIA_ROOT=TEST_MEDIA_ROOT) @@ -237,10 +317,14 @@ def get_and_head_methods(self, *all_mock_requests): for attr in ("get", "head") ] - def successful_get_image(self, *all_mock_requests, **kwargs): + def successful_get_image( + self, *all_mock_requests, image_filename=None, **kwargs + ): content_type = kwargs.get("content_type", "image/jpeg") headers = {"content-type": content_type} - with open(self.example_image_filename, "rb") as image: + if image_filename is None: + image_filename = self.example_image_filename + with open(image_filename, "rb") as image: image_data = image.read() for mock_method in self.get_and_head_methods(*all_mock_requests): setattr( diff --git a/ynr/apps/moderation_queue/views.py b/ynr/apps/moderation_queue/views.py index 09cd79276..67d25cab7 100644 --- a/ynr/apps/moderation_queue/views.py +++ b/ynr/apps/moderation_queue/views.py @@ -20,7 +20,7 @@ HttpResponseRedirect, JsonResponse, ) -from django.shortcuts import get_object_or_404 +from django.shortcuts import get_object_or_404, render from django.template.response import TemplateResponse from django.urls import reverse from django.utils.html import urlize @@ -36,8 +36,6 @@ from moderation_queue.helpers import ( ImageDownloadException, download_image_from_url, - image_form_valid_response, - upload_photo_response, ) from people.models import TRUSTED_TO_EDIT_NAME, EditLimitationStatuses, Person from popolo.models import Membership, OtherName @@ -118,6 +116,8 @@ def upload_photo_url(request, person_id): person=person, source=url_form.cleaned_data["justification_for_use_url"], ) + + queued_image.start_image_processing() return HttpResponseRedirect( reverse("photo-upload-success", kwargs={"person_id": person.id}) ) @@ -588,3 +588,38 @@ def post(self, request, *args, **kwargs): if request.headers.get("x-requested-with") == "XMLHttpRequest": return JsonResponse({"removed": True}) return HttpResponseRedirect(ballot.get_absolute_url()) + + +def upload_photo_response(request, person, image_form, url_form): + return render( + request, + "moderation_queue/photo-upload-new.html", + { + "image_form": image_form, + "url_form": url_form, + "queued_images": QueuedImage.objects.filter( + person=person, decision="undecided" + ).order_by("created"), + "person": person, + }, + ) + + +def image_form_valid_response(request, person, image_form): + # Make sure that we save the user that made the upload + queued_image = image_form.save(commit=False) + queued_image.user = request.user + queued_image.save() + # Record that action: + LoggedAction.objects.create( + user=request.user, + action_type=ActionType.PHOTO_UPLOAD, + ip_address=get_client_ip(request), + popit_person_new_version="", + person=person, + source=image_form.cleaned_data["justification_for_use"], + ) + queued_image.start_image_processing() + return HttpResponseRedirect( + reverse("photo-upload-success", kwargs={"person_id": person.id}) + ) diff --git a/ynr/apps/ynr_refactoring/tasks.py b/ynr/apps/ynr_refactoring/tasks.py index 2eaa896ef..4bbd5342a 100644 --- a/ynr/apps/ynr_refactoring/tasks.py +++ b/ynr/apps/ynr_refactoring/tasks.py @@ -21,15 +21,6 @@ def uk_create_elections_from_every_election_recently_updated(): ) -@register_task( - name="Process images in moderation queue", - schedule_type=Schedule.CRON, - cron="1-59/5 * * * *", -) -def moderation_queue_process_queued_images(): - call_command("moderation_queue_process_queued_images") - - @register_task( name="Parse raw data from SOPNs", schedule_type=Schedule.CRON,