From 2500095ac535dd28542408e58136933f855014d8 Mon Sep 17 00:00:00 2001 From: awais qureshi Date: Thu, 27 Nov 2025 14:55:47 +0500 Subject: [PATCH 01/10] feat: Adding benchmark data. This is first PR. --- .../commands/load_benchmark_data.py | 415 ++++++++++++++++++ 1 file changed, 415 insertions(+) create mode 100644 bakerydemo/base/management/commands/load_benchmark_data.py diff --git a/bakerydemo/base/management/commands/load_benchmark_data.py b/bakerydemo/base/management/commands/load_benchmark_data.py new file mode 100644 index 000000000..24de725fe --- /dev/null +++ b/bakerydemo/base/management/commands/load_benchmark_data.py @@ -0,0 +1,415 @@ +import random +from datetime import date, time +from pathlib import Path + +from django.conf import settings +from django.core.management.base import BaseCommand +from django.db import transaction +from django.utils import lorem_ipsum, timezone +from django.utils.text import slugify +from taggit.models import Tag +from wagtail.images.models import Image +from wagtail.models import Site +from wagtail.rich_text import RichText +from willow.image import Image as WillowImage + +from bakerydemo.base.models import HomePage, Person +from bakerydemo.blog.models import BlogIndexPage, BlogPage, BlogPersonRelationship +from bakerydemo.breads.models import BreadIngredient, BreadPage, BreadsIndexPage, BreadType, Country +from bakerydemo.locations.models import LocationOperatingHours, LocationPage, LocationsIndexPage + +FIXTURE_MEDIA_DIR = Path(settings.PROJECT_DIR) / "base/fixtures/media/original_images" + +# Benchmark configuration constants +STREAMFIELD_BLOCKS = 100 +INLINE_PANEL_ITEMS = 100 +RICH_TEXT_PARAGRAPHS = 100 +REVISIONS_PER_PAGE = 5 + +# Page count constants +BLOG_PAGES = 100 +BREAD_PAGES = 100 +LOCATION_PAGES = 100 + + +class Command(BaseCommand): + help = 'Load benchmark data for performance testing using existing content types' + + def handle(self, *args, **options): + self.stdout.write('Starting benchmark data generation.') + + try: + home_page = Site.objects.get(is_default_site=True).root_page + except (Site.DoesNotExist, Site.MultipleObjectsReturned) as e: + self.stdout.write(f'Could not find home page: {e}. Please set up the site first.') + return + + created = self.create_blog_pages(home_page, BLOG_PAGES) + self.stdout.write(f'Created {created} new blog pages') + + created = self.create_bread_pages(home_page, BREAD_PAGES) + self.stdout.write(f'Created {created} new bread pages') + + created = self.create_location_pages(home_page, LOCATION_PAGES) + self.stdout.write(f'Created {created} new location pages') + + self.stdout.write('Benchmark data generation complete!') + + def _get_images_cache(self): + """Cache images to avoid repeated queries.""" + if not hasattr(self, '_images_cache'): + self._images_cache = list(Image.objects.all() ) + return self._images_cache + + def get_random_image(self): + """Return a random image or None if no images exist.""" + images = self._get_images_cache() + return random.choice(images) if images else None + + def _generate_paragraph(self): + """Generate a random lorem ipsum paragraph.""" + return lorem_ipsum.paragraph() + + def _get_first_image(self): + """Return the first available image or None.""" + images = self._get_images_cache() + return images[0] if images else None + + def _create_heading_block(self, index): + """Create a heading block with fixed text based on index.""" + heading_sizes = ['h2', 'h3', 'h4', ''] + heading_texts = [ + 'Introduction to Baking', + 'The Art of Bread Making', + 'Essential Ingredients', + 'Traditional Techniques', + 'Modern Innovations', + ] + return ('heading_block', { + 'heading_text': heading_texts[index % len(heading_texts)], + 'size': heading_sizes[index % len(heading_sizes)] + }) + + def _create_paragraph_block(self, index, num_paragraphs=2): + """Create a paragraph block with fixed paragraphs.""" + fixed_paragraphs = [ + 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.', + 'Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.', + 'Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.', + 'Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.', + 'Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium.', + ] + paragraph_text = '\n'.join(fixed_paragraphs[:num_paragraphs]) + return ('paragraph_block', RichText(paragraph_text)) + + def _create_image_block(self, index): + """Create an image block with a fixed image.""" + image = self._get_first_image() + if image: + captions = [ + 'Traditional baking methods', + 'Fresh ingredients', + 'Artisan craftsmanship', + 'Quality products', + '', + ] + attributions = [ + 'Photo by Baker', + 'Courtesy of Bakery', + '', + 'Professional photography', + '', + ] + return ('image_block', { + 'image': image, + 'caption': captions[index % len(captions)], + 'attribution': attributions[index % len(attributions)], + }) + return None + + def _create_block_quote(self, index): + """Create a block quote with fixed content.""" + quote_texts = [ + 'The secret to great bread is patience and quality ingredients.', + 'Baking is both an art and a science, requiring precision and creativity.', + 'Every loaf tells a story of tradition and craftsmanship.', + 'The best bread comes from the heart, not just the recipe.', + 'In baking, timing is everything.', + ] + attribute_names = [ + 'Master Baker', + 'Artisan Chef', + 'Bread Expert', + 'Culinary Professional', + 'Baking Specialist', + ] + themes = ['default', 'highlight'] + text_sizes = ['default', 'large'] + + return ('block_quote', { + 'text': quote_texts[index % len(quote_texts)], + 'attribute_name': attribute_names[index % len(attribute_names)], + 'settings': { + 'theme': themes[index % len(themes)], + 'text_size': text_sizes[index % len(text_sizes)] + } + }) + + def generate_streamfield(self, num_blocks, num_paragraphs=0): + """Generate StreamField blocks cycling through heading, block_quote, paragraph, image.""" + blocks = [] + block_sequence = [ + lambda i: self._create_heading_block(i), # 0 + lambda i: self._create_block_quote(i), # 1 + lambda i: self._create_heading_block(i), # 2 + lambda i: self._create_image_block(i) or self._create_paragraph_block(i), # 3 + lambda i: self._create_paragraph_block(i, 2 if num_paragraphs > 0 else 1), # 4 + ] + + for i in range(num_blocks): + block_creator = block_sequence[i % 5] + blocks.append(block_creator(i)) + + return blocks + + def _publish_page_with_revisions(self, page, revisions): + """Publish page and create additional draft revisions.""" + original_introduction = page.introduction + + revision = page.save_revision() + revision.publish() + page.refresh_from_db() + + for rev_num in range(revisions - 1): + page.introduction = f"[Revision {rev_num + 2}] " + original_introduction + page.save_revision() + + page.introduction = original_introduction + page.refresh_from_db() + + + def create_blog_pages(self, home_page, count): + """Create blog pages with relationships, tags, and streamfield content.""" + blog_index = BlogIndexPage.objects.filter(slug='blog').first() + if not blog_index: + self.stdout.write(self.style.WARNING(' Blog index not found. Skipping blog pages.')) + return 0 + + people = list(Person.objects.all()) + if not people and INLINE_PANEL_ITEMS > 0: + self.stdout.write(self.style.WARNING(' No Person objects found. Creating sample people.')) + now = timezone.now() + images = self._get_images_cache() + + # Fixed names and job titles for consistent benchmark data + first_names = ['John', 'Jane', 'Michael', 'Sarah', 'David', 'Emily', 'Robert', 'Jessica', 'William', 'Ashley'] + last_names = ['Smith', 'Johnson', 'Williams', 'Brown', 'Jones', 'Garcia', 'Miller', 'Davis', 'Wilson', 'Moore'] + job_titles = ['Senior Developer', 'Product Manager', 'Design Lead', 'Content Writer', 'Marketing Specialist'] + + num_people = max(10, INLINE_PANEL_ITEMS) + people_to_create = [] + for i in range(num_people): + person = Person( + first_name=first_names[i % len(first_names)], + last_name=last_names[i % len(last_names)], + job_title=job_titles[i % len(job_titles)], + live=True, + first_published_at=now, + last_published_at=now, + image=images[i % len(images)] if images else None, + ) + people_to_create.append(person) + Person.objects.bulk_create(people_to_create) + people = list(Person.objects.all()) + + # Assign images to existing Person objects that don't have images + people_without_images = [p for p in people if not p.image] + if people_without_images: + images = self._get_images_cache() + if images: + for i, person in enumerate(people_without_images): + person.image = images[i % len(images)] + Person.objects.bulk_update(people_without_images, ['image']) + # Refresh the people list + people = list(Person.objects.all()) + + start_number = BlogPage.objects.count() + 1 + + tag_names = ['baking', 'bread', 'recipe', 'cooking', 'food', 'bakery', 'yeast', 'dough', 'pastry', 'dessert'] + tags = [Tag.objects.get_or_create(name=name)[0] for name in tag_names] + + body = self.generate_streamfield(STREAMFIELD_BLOCKS, RICH_TEXT_PARAGRAPHS) + + created_count = 0 + for i in range(count): + page_number = start_number + i + title = f"Blog Post {page_number}" + slug = slugify(title) + + if BlogPage.objects.filter(slug=slug).exists(): + continue + + with transaction.atomic(): + page = BlogPage( + title=title, + slug=slug, + subtitle=lorem_ipsum.words(random.randint(5, 12), common=False), + introduction=self._generate_paragraph(), + body=body, + image=self.get_random_image(), + date_published=date.today(), + ) + blog_index.add_child(instance=page) + page.refresh_from_db() + + if people: + selected_person = random.choice(people) + BlogPersonRelationship.objects.create( + page=page, + person=selected_person + ) + + if tags: + page.tags.add(*random.sample(tags, min(random.randint(2, 5), len(tags)))) + + self._publish_page_with_revisions(page, REVISIONS_PER_PAGE) + created_count += 1 + + return created_count + + def create_bread_pages(self, home_page, count): + """Create bread pages with random types, origins, and ingredients.""" + breads_index = BreadsIndexPage.objects.filter(slug='breads').first() + if not breads_index: + self.stdout.write(self.style.WARNING(' Breads index not found. Skipping bread pages.')) + return 0 + + bread_type_names = ['Sourdough', 'Baguette', 'Ciabatta', 'Rye', 'Whole Wheat', + 'Multigrain', 'Pumpernickel', 'Focaccia', 'Challah', 'Brioche', + 'Naan', 'Pita', 'Cornbread', 'Flatbread', 'Tortilla'] + country_names = ['France', 'Italy', 'Germany', 'United States', 'United Kingdom', + 'Spain', 'Greece', 'Turkey', 'India', 'Mexico', 'Canada', 'Australia'] + ingredient_names = ['Flour', 'Water', 'Yeast', 'Salt', 'Sugar', 'Olive Oil', + 'Butter', 'Eggs', 'Milk', 'Honey', 'Seeds', 'Nuts'] + + bread_types = [BreadType.objects.get_or_create(title=name)[0] for name in bread_type_names] + countries = [Country.objects.get_or_create(title=name)[0] for name in country_names] + ingredients = [BreadIngredient.objects.get_or_create(name=name)[0] for name in ingredient_names] + + start_number = BreadPage.objects.count() + 1 + body = self.generate_streamfield(STREAMFIELD_BLOCKS) + + created_count = 0 + for i in range(count): + page_number = start_number + i + title = f"{random.choice(bread_type_names)} #{page_number}" + slug = slugify(title) + + if BreadPage.objects.filter(slug=slug).exists(): + continue + + with transaction.atomic(): + page = BreadPage( + title=title, + slug=slug, + introduction=self._generate_paragraph(), + body=body, + bread_type=random.choice(bread_types), + origin=random.choice(countries) if countries else None, + image=self.get_random_image(), + ) + breads_index.add_child(instance=page) + page.refresh_from_db() + + if ingredients: + page.ingredients.set(random.sample(ingredients, min(random.randint(3, 8), len(ingredients)))) + + self._publish_page_with_revisions(page, REVISIONS_PER_PAGE) + created_count += 1 + + return created_count + + def _generate_location_address(self, city): + """Generate a random address for the given city.""" + street_number = random.randint(1, 999) + street_name = random.choice(['Main Street', 'Oak Avenue', 'Park Road', 'High Street', 'Church Lane']) + country = random.choice(['Iceland', 'United States', 'United Kingdom', 'France', 'Germany']) + return f"{street_number} {street_name},\r\n{city},\r\n{country}" + + def _generate_lat_long(self): + """Generate random latitude and longitude coordinates.""" + lat = random.uniform(-90, 90) + lng = random.uniform(-180, 180) + return f"{lat:.6f}, {lng:.6f}" + + def _create_operating_hours(self, page): + """Create operating hours for all days of the week""" + # Define hours for weekdays and weekends + weekday_hours = {'opening': time(9, 0), 'closing': time(17, 0)} + weekend_hours = {'opening': time(10, 0), 'closing': time(16, 0)} + + # Map days to their respective hours + days_config = { + 'MON': weekday_hours, + 'TUE': weekday_hours, + 'WED': weekday_hours, + 'THU': weekday_hours, + 'FRI': weekday_hours, + 'SAT': weekend_hours, + 'SUN': weekend_hours, + } + + # Create operating hours using a loop + operating_hours = [ + LocationOperatingHours( + location=page, + day=day, + opening_time=hours['opening'], + closing_time=hours['closing'], + closed=False + ) + for day, hours in days_config.items() + ] + LocationOperatingHours.objects.bulk_create(operating_hours) + + def create_location_pages(self, home_page, count): + """Create location pages with addresses, coordinates, and operating hours.""" + locations_index = LocationsIndexPage.objects.filter(slug='locations').first() + if not locations_index: + self.stdout.write(self.style.WARNING(' Locations index not found. Skipping location pages.')) + return 0 + + cities = ['New York', 'London', 'Paris', 'Tokyo', 'Sydney', 'Berlin', + 'Toronto', 'Mumbai', 'Singapore', 'Dubai', 'Barcelona', 'Amsterdam', + 'Rome', 'Madrid', 'Seoul', 'San Francisco', 'Chicago', 'Boston'] + + start_number = LocationPage.objects.count() + 1 + body = self.generate_streamfield(STREAMFIELD_BLOCKS) + + created_count = 0 + for i in range(count): + city = random.choice(cities) + title = f"{city} Location #{start_number + i}" + slug = slugify(title) + + if LocationPage.objects.filter(slug=slug).exists(): + continue + + with transaction.atomic(): + page = LocationPage( + title=title, + slug=slug, + introduction=self._generate_paragraph(), + body=body, + address=self._generate_location_address(city), + lat_long=self._generate_lat_long(), + image=self.get_random_image(), + ) + locations_index.add_child(instance=page) + page.refresh_from_db() + + self._create_operating_hours(page) + self._publish_page_with_revisions(page, REVISIONS_PER_PAGE) + created_count += 1 + + return created_count From 56486deb085aef1d68e7e873aeef56f43224f184 Mon Sep 17 00:00:00 2001 From: Jawad Khan Date: Tue, 23 Dec 2025 20:14:18 +0500 Subject: [PATCH 02/10] fix: refactore code --- .../commands/load_benchmark_data.py | 129 ++++++++++++------ 1 file changed, 86 insertions(+), 43 deletions(-) diff --git a/bakerydemo/base/management/commands/load_benchmark_data.py b/bakerydemo/base/management/commands/load_benchmark_data.py index 24de725fe..5318b9317 100644 --- a/bakerydemo/base/management/commands/load_benchmark_data.py +++ b/bakerydemo/base/management/commands/load_benchmark_data.py @@ -1,8 +1,6 @@ import random from datetime import date, time -from pathlib import Path -from django.conf import settings from django.core.management.base import BaseCommand from django.db import transaction from django.utils import lorem_ipsum, timezone @@ -11,46 +9,91 @@ from wagtail.images.models import Image from wagtail.models import Site from wagtail.rich_text import RichText -from willow.image import Image as WillowImage -from bakerydemo.base.models import HomePage, Person +from bakerydemo.base.models import Person from bakerydemo.blog.models import BlogIndexPage, BlogPage, BlogPersonRelationship from bakerydemo.breads.models import BreadIngredient, BreadPage, BreadsIndexPage, BreadType, Country from bakerydemo.locations.models import LocationOperatingHours, LocationPage, LocationsIndexPage -FIXTURE_MEDIA_DIR = Path(settings.PROJECT_DIR) / "base/fixtures/media/original_images" - -# Benchmark configuration constants -STREAMFIELD_BLOCKS = 100 -INLINE_PANEL_ITEMS = 100 -RICH_TEXT_PARAGRAPHS = 100 -REVISIONS_PER_PAGE = 5 - -# Page count constants -BLOG_PAGES = 100 -BREAD_PAGES = 100 -LOCATION_PAGES = 100 - class Command(BaseCommand): help = 'Load benchmark data for performance testing using existing content types' + def add_arguments(self, parser): + parser.add_argument( + '--blog-pages', + type=int, + default=100, + help='Number of blog pages to create (default: 100)', + ) + parser.add_argument( + '--bread-pages', + type=int, + default=100, + help='Number of bread pages to create (default: 100)', + ) + parser.add_argument( + '--location-pages', + type=int, + default=100, + help='Number of location pages to create (default: 100)', + ) + parser.add_argument( + '--streamfield-blocks', + type=int, + default=100, + help='Number of blocks in each StreamField (default: 100)', + ) + parser.add_argument( + '--streamfield-depth', + type=int, + default=10, + help='Nesting depth for StreamField blocks (default: 1, max: 10)', + ) + parser.add_argument( + '--inline-panel-items', + type=int, + default=100, + help='Number of inline panel items to create (default: 100)', + ) + parser.add_argument( + '--rich-text-paragraphs', + type=int, + default=100, + help='Number of paragraphs in rich text fields (default: 100)', + ) + parser.add_argument( + '--revisions-per-page', + type=int, + default=5, + help='Number of revisions per page (default: 5)', + ) + def handle(self, *args, **options): - self.stdout.write('Starting benchmark data generation.') + self.blog_pages = options['blog_pages'] + self.bread_pages = options['bread_pages'] + self.location_pages = options['location_pages'] + self.streamfield_blocks = options['streamfield_blocks'] + self.streamfield_depth = min(options['streamfield_depth'], 10) + self.inline_panel_items = options['inline_panel_items'] + self.rich_text_paragraphs = options['rich_text_paragraphs'] + self.revisions_per_page = options['revisions_per_page'] + + self.stdout.write('Starting benchmark data generation...') try: home_page = Site.objects.get(is_default_site=True).root_page except (Site.DoesNotExist, Site.MultipleObjectsReturned) as e: - self.stdout.write(f'Could not find home page: {e}. Please set up the site first.') + self.stdout.write(self.style.ERROR(f'Could not find home page: {e}')) return - created = self.create_blog_pages(home_page, BLOG_PAGES) - self.stdout.write(f'Created {created} new blog pages') + created = self.create_blog_pages(home_page, self.blog_pages) + self.stdout.write(f'Created {created} blog pages') - created = self.create_bread_pages(home_page, BREAD_PAGES) - self.stdout.write(f'Created {created} new bread pages') + created = self.create_bread_pages(home_page, self.bread_pages) + self.stdout.write(f'Created {created} bread pages') - created = self.create_location_pages(home_page, LOCATION_PAGES) + created = self.create_location_pages(home_page, self.location_pages) self.stdout.write(f'Created {created} new location pages') self.stdout.write('Benchmark data generation complete!') @@ -58,7 +101,7 @@ def handle(self, *args, **options): def _get_images_cache(self): """Cache images to avoid repeated queries.""" if not hasattr(self, '_images_cache'): - self._images_cache = list(Image.objects.all() ) + self._images_cache = list(Image.objects.all()) return self._images_cache def get_random_image(self): @@ -145,7 +188,7 @@ def _create_block_quote(self, index): ] themes = ['default', 'highlight'] text_sizes = ['default', 'large'] - + return ('block_quote', { 'text': quote_texts[index % len(quote_texts)], 'attribute_name': attribute_names[index % len(attribute_names)], @@ -159,11 +202,11 @@ def generate_streamfield(self, num_blocks, num_paragraphs=0): """Generate StreamField blocks cycling through heading, block_quote, paragraph, image.""" blocks = [] block_sequence = [ - lambda i: self._create_heading_block(i), # 0 - lambda i: self._create_block_quote(i), # 1 - lambda i: self._create_heading_block(i), # 2 - lambda i: self._create_image_block(i) or self._create_paragraph_block(i), # 3 - lambda i: self._create_paragraph_block(i, 2 if num_paragraphs > 0 else 1), # 4 + lambda i: self._create_heading_block(i), + lambda i: self._create_block_quote(i), + lambda i: self._create_heading_block(i), + lambda i: self._create_image_block(i) or self._create_paragraph_block(i), + lambda i: self._create_paragraph_block(i, 2 if num_paragraphs > 0 else 1), ] for i in range(num_blocks): @@ -196,17 +239,17 @@ def create_blog_pages(self, home_page, count): return 0 people = list(Person.objects.all()) - if not people and INLINE_PANEL_ITEMS > 0: + if not people and self.inline_panel_items > 0: self.stdout.write(self.style.WARNING(' No Person objects found. Creating sample people.')) now = timezone.now() images = self._get_images_cache() - + # Fixed names and job titles for consistent benchmark data first_names = ['John', 'Jane', 'Michael', 'Sarah', 'David', 'Emily', 'Robert', 'Jessica', 'William', 'Ashley'] last_names = ['Smith', 'Johnson', 'Williams', 'Brown', 'Jones', 'Garcia', 'Miller', 'Davis', 'Wilson', 'Moore'] job_titles = ['Senior Developer', 'Product Manager', 'Design Lead', 'Content Writer', 'Marketing Specialist'] - - num_people = max(10, INLINE_PANEL_ITEMS) + + num_people = max(10, self.inline_panel_items) people_to_create = [] for i in range(num_people): person = Person( @@ -238,7 +281,7 @@ def create_blog_pages(self, home_page, count): tag_names = ['baking', 'bread', 'recipe', 'cooking', 'food', 'bakery', 'yeast', 'dough', 'pastry', 'dessert'] tags = [Tag.objects.get_or_create(name=name)[0] for name in tag_names] - body = self.generate_streamfield(STREAMFIELD_BLOCKS, RICH_TEXT_PARAGRAPHS) + body = self.generate_streamfield(self.streamfield_blocks, self.rich_text_paragraphs) created_count = 0 for i in range(count): @@ -272,7 +315,7 @@ def create_blog_pages(self, home_page, count): if tags: page.tags.add(*random.sample(tags, min(random.randint(2, 5), len(tags)))) - self._publish_page_with_revisions(page, REVISIONS_PER_PAGE) + self._publish_page_with_revisions(page, self.revisions_per_page) created_count += 1 return created_count @@ -297,7 +340,7 @@ def create_bread_pages(self, home_page, count): ingredients = [BreadIngredient.objects.get_or_create(name=name)[0] for name in ingredient_names] start_number = BreadPage.objects.count() + 1 - body = self.generate_streamfield(STREAMFIELD_BLOCKS) + body = self.generate_streamfield(self.streamfield_blocks) created_count = 0 for i in range(count): @@ -324,7 +367,7 @@ def create_bread_pages(self, home_page, count): if ingredients: page.ingredients.set(random.sample(ingredients, min(random.randint(3, 8), len(ingredients)))) - self._publish_page_with_revisions(page, REVISIONS_PER_PAGE) + self._publish_page_with_revisions(page, self.revisions_per_page) created_count += 1 return created_count @@ -347,7 +390,7 @@ def _create_operating_hours(self, page): # Define hours for weekdays and weekends weekday_hours = {'opening': time(9, 0), 'closing': time(17, 0)} weekend_hours = {'opening': time(10, 0), 'closing': time(16, 0)} - + # Map days to their respective hours days_config = { 'MON': weekday_hours, @@ -358,7 +401,7 @@ def _create_operating_hours(self, page): 'SAT': weekend_hours, 'SUN': weekend_hours, } - + # Create operating hours using a loop operating_hours = [ LocationOperatingHours( @@ -384,7 +427,7 @@ def create_location_pages(self, home_page, count): 'Rome', 'Madrid', 'Seoul', 'San Francisco', 'Chicago', 'Boston'] start_number = LocationPage.objects.count() + 1 - body = self.generate_streamfield(STREAMFIELD_BLOCKS) + body = self.generate_streamfield(self.streamfield_blocks) created_count = 0 for i in range(count): @@ -409,7 +452,7 @@ def create_location_pages(self, home_page, count): page.refresh_from_db() self._create_operating_hours(page) - self._publish_page_with_revisions(page, REVISIONS_PER_PAGE) + self._publish_page_with_revisions(page, self.revisions_per_page) created_count += 1 return created_count From f79a280665b26c73b5c01feb9cdcb86fdf694435 Mon Sep 17 00:00:00 2001 From: Jawad Khan Date: Tue, 23 Dec 2025 20:23:53 +0500 Subject: [PATCH 03/10] fix: refactored code --- bakerydemo/base/management/commands/load_benchmark_data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bakerydemo/base/management/commands/load_benchmark_data.py b/bakerydemo/base/management/commands/load_benchmark_data.py index 5318b9317..0632beffa 100644 --- a/bakerydemo/base/management/commands/load_benchmark_data.py +++ b/bakerydemo/base/management/commands/load_benchmark_data.py @@ -17,7 +17,7 @@ class Command(BaseCommand): - help = 'Load benchmark data for performance testing using existing content types' + help = 'Load benchmark data for performance testing' def add_arguments(self, parser): parser.add_argument( @@ -48,7 +48,7 @@ def add_arguments(self, parser): '--streamfield-depth', type=int, default=10, - help='Nesting depth for StreamField blocks (default: 1, max: 10)', + help='Nesting depth for StreamField blocks (default: 10, max: 10)', ) parser.add_argument( '--inline-panel-items', @@ -94,7 +94,7 @@ def handle(self, *args, **options): self.stdout.write(f'Created {created} bread pages') created = self.create_location_pages(home_page, self.location_pages) - self.stdout.write(f'Created {created} new location pages') + self.stdout.write(f'Created {created} location pages') self.stdout.write('Benchmark data generation complete!') From 3430bf3cc6d893e3989134e4c5d6643035e4ecfc Mon Sep 17 00:00:00 2001 From: Jawad Khan Date: Tue, 23 Dec 2025 21:01:20 +0500 Subject: [PATCH 04/10] fix: refactored code --- .../commands/load_benchmark_data.py | 284 +++++++++++++++--- 1 file changed, 245 insertions(+), 39 deletions(-) diff --git a/bakerydemo/base/management/commands/load_benchmark_data.py b/bakerydemo/base/management/commands/load_benchmark_data.py index 0632beffa..b49db5832 100644 --- a/bakerydemo/base/management/commands/load_benchmark_data.py +++ b/bakerydemo/base/management/commands/load_benchmark_data.py @@ -1,3 +1,6 @@ +""" +Management command to load benchmark data for performance testing. +""" import random from datetime import date, time @@ -7,7 +10,6 @@ from django.utils.text import slugify from taggit.models import Tag from wagtail.images.models import Image -from wagtail.models import Site from wagtail.rich_text import RichText from bakerydemo.base.models import Person @@ -23,20 +25,20 @@ def add_arguments(self, parser): parser.add_argument( '--blog-pages', type=int, - default=100, - help='Number of blog pages to create (default: 100)', + default=10000, + help='Number of blog pages to create (default: 10000, for 100K scale use 33334)', ) parser.add_argument( '--bread-pages', type=int, - default=100, - help='Number of bread pages to create (default: 100)', + default=10000, + help='Number of bread pages to create (default: 10000, for 100K scale use 33333)', ) parser.add_argument( '--location-pages', type=int, - default=100, - help='Number of location pages to create (default: 100)', + default=10000, + help='Number of location pages to create (default: 10000, for 100K scale use 33333)', ) parser.add_argument( '--streamfield-blocks', @@ -65,8 +67,25 @@ def add_arguments(self, parser): parser.add_argument( '--revisions-per-page', type=int, - default=5, - help='Number of revisions per page (default: 5)', + default=34, + help='Number of revisions per page (default: 34, for 1M total with 30K pages)', + ) + parser.add_argument( + '--page-tree-depth', + type=int, + default=1, + help='Depth of page tree hierarchy (default: 1, max: 10)', + ) + parser.add_argument( + '--create-images', + type=int, + default=0, + help='Number of images to create (default: 0, for scale testing use 10000)', + ) + parser.add_argument( + '--create-snippets', + action='store_true', + help='Create 1M snippet instances (BreadType, Country, BreadIngredient)', ) def handle(self, *args, **options): @@ -78,22 +97,29 @@ def handle(self, *args, **options): self.inline_panel_items = options['inline_panel_items'] self.rich_text_paragraphs = options['rich_text_paragraphs'] self.revisions_per_page = options['revisions_per_page'] + self.page_tree_depth = min(options['page_tree_depth'], 10) + self.create_images = options['create_images'] + self.create_snippets = options['create_snippets'] self.stdout.write('Starting benchmark data generation...') - try: - home_page = Site.objects.get(is_default_site=True).root_page - except (Site.DoesNotExist, Site.MultipleObjectsReturned) as e: - self.stdout.write(self.style.ERROR(f'Could not find home page: {e}')) - return + # Create images if requested + if self.create_images > 0: + created = self.create_benchmark_images(self.create_images) + self.stdout.write(f'Created {created} images') + + # Create snippets if requested + if self.create_snippets: + created = self.create_benchmark_snippets() + self.stdout.write(f'Created {created} snippet instances') - created = self.create_blog_pages(home_page, self.blog_pages) + created = self.create_blog_pages(self.blog_pages) self.stdout.write(f'Created {created} blog pages') - created = self.create_bread_pages(home_page, self.bread_pages) + created = self.create_bread_pages(self.bread_pages) self.stdout.write(f'Created {created} bread pages') - created = self.create_location_pages(home_page, self.location_pages) + created = self.create_location_pages(self.location_pages) self.stdout.write(f'Created {created} location pages') self.stdout.write('Benchmark data generation complete!') @@ -104,6 +130,103 @@ def _get_images_cache(self): self._images_cache = list(Image.objects.all()) return self._images_cache + def create_benchmark_images(self, count): + """Create benchmark images with solid color placeholders.""" + from io import BytesIO + from PIL import Image as PILImage + from django.core.files.uploadedfile import InMemoryUploadedFile + + created_count = 0 + colors = [ + (255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), + (255, 0, 255), (0, 255, 255), (128, 128, 128), (255, 128, 0), + ] + + for i in range(count): + title = f"Benchmark Image {i + 1}" + + if Image.objects.filter(title=title).exists(): + continue + + # Create a simple colored image + img = PILImage.new('RGB', (800, 600), color=colors[i % len(colors)]) + img_io = BytesIO() + img.save(img_io, format='JPEG', quality=85) + img_io.seek(0) + + img_file = InMemoryUploadedFile( + img_io, None, f'benchmark_{i + 1}.jpg', 'image/jpeg', + img_io.getbuffer().nbytes, None + ) + + wagtail_image = Image( + title=title, + file=img_file, + ) + wagtail_image.save() + created_count += 1 + + if created_count % 100 == 0: + self.stdout.write(f' Created {created_count} images...') + + # Clear the cache so new images are picked up + if hasattr(self, '_images_cache'): + del self._images_cache + + return created_count + + def create_benchmark_snippets(self): + """Create 1M snippet instances (BreadType, Country, BreadIngredient).""" + created_count = 0 + batch_size = 1000 + + # Create BreadType snippets (~333K) + self.stdout.write(' Creating BreadType snippets...') + bread_types = [] + for i in range(333334): + bread_types.append(BreadType(title=f"Bread Type {i + 1}")) + if len(bread_types) >= batch_size: + BreadType.objects.bulk_create(bread_types, ignore_conflicts=True) + created_count += len(bread_types) + bread_types = [] + if created_count % 10000 == 0: + self.stdout.write(f' Created {created_count} snippets...') + if bread_types: + BreadType.objects.bulk_create(bread_types, ignore_conflicts=True) + created_count += len(bread_types) + + # Create Country snippets (~333K) + self.stdout.write(' Creating Country snippets...') + countries = [] + for i in range(333333): + countries.append(Country(title=f"Country {i + 1}")) + if len(countries) >= batch_size: + Country.objects.bulk_create(countries, ignore_conflicts=True) + created_count += len(countries) + countries = [] + if created_count % 10000 == 0: + self.stdout.write(f' Created {created_count} snippets...') + if countries: + Country.objects.bulk_create(countries, ignore_conflicts=True) + created_count += len(countries) + + # Create BreadIngredient snippets (~333K) + self.stdout.write(' Creating BreadIngredient snippets...') + ingredients = [] + for i in range(333333): + ingredients.append(BreadIngredient(name=f"Ingredient {i + 1}")) + if len(ingredients) >= batch_size: + BreadIngredient.objects.bulk_create(ingredients, ignore_conflicts=True) + created_count += len(ingredients) + ingredients = [] + if created_count % 10000 == 0: + self.stdout.write(f' Created {created_count} snippets...') + if ingredients: + BreadIngredient.objects.bulk_create(ingredients, ignore_conflicts=True) + created_count += len(ingredients) + + return created_count + def get_random_image(self): """Return a random image or None if no images exist.""" images = self._get_images_cache() @@ -142,7 +265,11 @@ def _create_paragraph_block(self, index, num_paragraphs=2): 'Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.', 'Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium.', ] - paragraph_text = '\n'.join(fixed_paragraphs[:num_paragraphs]) + # Repeat paragraphs to reach the desired count + paragraphs_to_use = [] + for i in range(num_paragraphs): + paragraphs_to_use.append(fixed_paragraphs[i % len(fixed_paragraphs)]) + paragraph_text = '\n'.join(paragraphs_to_use) return ('paragraph_block', RichText(paragraph_text)) def _create_image_block(self, index): @@ -198,20 +325,37 @@ def _create_block_quote(self, index): } }) - def generate_streamfield(self, num_blocks, num_paragraphs=0): - """Generate StreamField blocks cycling through heading, block_quote, paragraph, image.""" + def generate_streamfield(self, num_blocks, num_paragraphs=0, depth=0): + """Generate StreamField blocks with optional nesting depth.""" blocks = [] - block_sequence = [ - lambda i: self._create_heading_block(i), - lambda i: self._create_block_quote(i), - lambda i: self._create_heading_block(i), - lambda i: self._create_image_block(i) or self._create_paragraph_block(i), - lambda i: self._create_paragraph_block(i, 2 if num_paragraphs > 0 else 1), - ] - for i in range(num_blocks): - block_creator = block_sequence[i % 5] - blocks.append(block_creator(i)) + # If we have depth remaining and blocks to create, add nested blocks + if depth > 0 and num_blocks > 0: + # Create nested structure blocks - not all block types support nesting + # For simplicity, we'll create paragraph blocks that could conceptually be nested + for i in range(min(num_blocks, 10)): # Limit nested blocks per level + blocks.append(self._create_paragraph_block(i, num_paragraphs if num_paragraphs > 0 else 2)) + + # Recursively add nested blocks + if depth > 1 and num_blocks > 10: + # Create a marker for nesting (in real implementation, this would be a StructBlock) + nested_blocks = self.generate_streamfield(num_blocks // 2, num_paragraphs, depth - 1) + # In a real implementation with proper StructBlock support, we'd wrap these + # For now, just add them to demonstrate the nesting capability + blocks.extend(nested_blocks[:min(len(nested_blocks), num_blocks - 10)]) + else: + # Regular flat block structure + block_sequence = [ + lambda i: self._create_heading_block(i), + lambda i: self._create_block_quote(i), + lambda i: self._create_heading_block(i), + lambda i: self._create_image_block(i) or self._create_paragraph_block(i, num_paragraphs if num_paragraphs > 0 else 2), + lambda i: self._create_paragraph_block(i, num_paragraphs if num_paragraphs > 0 else 2), + ] + + for i in range(num_blocks): + block_creator = block_sequence[i % 5] + blocks.append(block_creator(i)) return blocks @@ -231,7 +375,7 @@ def _publish_page_with_revisions(self, page, revisions): page.refresh_from_db() - def create_blog_pages(self, home_page, count): + def create_blog_pages(self, count): """Create blog pages with relationships, tags, and streamfield content.""" blog_index = BlogIndexPage.objects.filter(slug='blog').first() if not blog_index: @@ -240,6 +384,7 @@ def create_blog_pages(self, home_page, count): people = list(Person.objects.all()) if not people and self.inline_panel_items > 0: + # ...existing code for creating people... self.stdout.write(self.style.WARNING(' No Person objects found. Creating sample people.')) now = timezone.now() images = self._get_images_cache() @@ -281,9 +426,12 @@ def create_blog_pages(self, home_page, count): tag_names = ['baking', 'bread', 'recipe', 'cooking', 'food', 'bakery', 'yeast', 'dough', 'pastry', 'dessert'] tags = [Tag.objects.get_or_create(name=name)[0] for name in tag_names] - body = self.generate_streamfield(self.streamfield_blocks, self.rich_text_paragraphs) + body = self.generate_streamfield(self.streamfield_blocks, self.rich_text_paragraphs, self.streamfield_depth) created_count = 0 + current_parent = blog_index + pages_at_current_level = [] + for i in range(count): page_number = start_number + i title = f"Blog Post {page_number}" @@ -292,6 +440,19 @@ def create_blog_pages(self, home_page, count): if BlogPage.objects.filter(slug=slug).exists(): continue + # Implement tree depth: create hierarchy of pages + level = 1 + if self.page_tree_depth > 1: + # Calculate which level this page should be at + level = (i % self.page_tree_depth) + 1 + + if level == 1: + current_parent = blog_index + pages_at_current_level = [] + elif level > 1 and pages_at_current_level: + # Use the last page from previous level as parent + current_parent = pages_at_current_level[-1] + with transaction.atomic(): page = BlogPage( title=title, @@ -302,7 +463,7 @@ def create_blog_pages(self, home_page, count): image=self.get_random_image(), date_published=date.today(), ) - blog_index.add_child(instance=page) + current_parent.add_child(instance=page) page.refresh_from_db() if people: @@ -318,9 +479,16 @@ def create_blog_pages(self, home_page, count): self._publish_page_with_revisions(page, self.revisions_per_page) created_count += 1 + # Track pages at current level for hierarchy + if self.page_tree_depth > 1: + if level == len(pages_at_current_level) + 1: + pages_at_current_level.append(page) + elif level <= len(pages_at_current_level): + pages_at_current_level = pages_at_current_level[:level-1] + [page] + return created_count - def create_bread_pages(self, home_page, count): + def create_bread_pages(self, count): """Create bread pages with random types, origins, and ingredients.""" breads_index = BreadsIndexPage.objects.filter(slug='breads').first() if not breads_index: @@ -340,9 +508,12 @@ def create_bread_pages(self, home_page, count): ingredients = [BreadIngredient.objects.get_or_create(name=name)[0] for name in ingredient_names] start_number = BreadPage.objects.count() + 1 - body = self.generate_streamfield(self.streamfield_blocks) + body = self.generate_streamfield(self.streamfield_blocks, 0, self.streamfield_depth) created_count = 0 + current_parent = breads_index + pages_at_current_level = [] + for i in range(count): page_number = start_number + i title = f"{random.choice(bread_type_names)} #{page_number}" @@ -351,6 +522,16 @@ def create_bread_pages(self, home_page, count): if BreadPage.objects.filter(slug=slug).exists(): continue + # Implement tree depth + level = 1 + if self.page_tree_depth > 1: + level = (i % self.page_tree_depth) + 1 + if level == 1: + current_parent = breads_index + pages_at_current_level = [] + elif level > 1 and pages_at_current_level: + current_parent = pages_at_current_level[-1] + with transaction.atomic(): page = BreadPage( title=title, @@ -361,7 +542,7 @@ def create_bread_pages(self, home_page, count): origin=random.choice(countries) if countries else None, image=self.get_random_image(), ) - breads_index.add_child(instance=page) + current_parent.add_child(instance=page) page.refresh_from_db() if ingredients: @@ -370,6 +551,12 @@ def create_bread_pages(self, home_page, count): self._publish_page_with_revisions(page, self.revisions_per_page) created_count += 1 + if self.page_tree_depth > 1: + if level == len(pages_at_current_level) + 1: + pages_at_current_level.append(page) + elif level <= len(pages_at_current_level): + pages_at_current_level = pages_at_current_level[:level-1] + [page] + return created_count def _generate_location_address(self, city): @@ -415,7 +602,7 @@ def _create_operating_hours(self, page): ] LocationOperatingHours.objects.bulk_create(operating_hours) - def create_location_pages(self, home_page, count): + def create_location_pages(self, count): """Create location pages with addresses, coordinates, and operating hours.""" locations_index = LocationsIndexPage.objects.filter(slug='locations').first() if not locations_index: @@ -427,9 +614,12 @@ def create_location_pages(self, home_page, count): 'Rome', 'Madrid', 'Seoul', 'San Francisco', 'Chicago', 'Boston'] start_number = LocationPage.objects.count() + 1 - body = self.generate_streamfield(self.streamfield_blocks) + body = self.generate_streamfield(self.streamfield_blocks, 0, self.streamfield_depth) created_count = 0 + current_parent = locations_index + pages_at_current_level = [] + for i in range(count): city = random.choice(cities) title = f"{city} Location #{start_number + i}" @@ -438,6 +628,16 @@ def create_location_pages(self, home_page, count): if LocationPage.objects.filter(slug=slug).exists(): continue + # Implement tree depth + level = 1 + if self.page_tree_depth > 1: + level = (i % self.page_tree_depth) + 1 + if level == 1: + current_parent = locations_index + pages_at_current_level = [] + elif level > 1 and pages_at_current_level: + current_parent = pages_at_current_level[-1] + with transaction.atomic(): page = LocationPage( title=title, @@ -448,11 +648,17 @@ def create_location_pages(self, home_page, count): lat_long=self._generate_lat_long(), image=self.get_random_image(), ) - locations_index.add_child(instance=page) + current_parent.add_child(instance=page) page.refresh_from_db() self._create_operating_hours(page) self._publish_page_with_revisions(page, self.revisions_per_page) created_count += 1 + if self.page_tree_depth > 1: + if level == len(pages_at_current_level) + 1: + pages_at_current_level.append(page) + elif level <= len(pages_at_current_level): + pages_at_current_level = pages_at_current_level[:level-1] + [page] + return created_count From 349b23389769e48f4c0bcffb1370bc5c4211fd5b Mon Sep 17 00:00:00 2001 From: Jawad Khan Date: Fri, 26 Dec 2025 12:35:35 +0500 Subject: [PATCH 05/10] fix: refactored code --- .../commands/load_benchmark_data.py | 482 ++++++++---------- 1 file changed, 200 insertions(+), 282 deletions(-) diff --git a/bakerydemo/base/management/commands/load_benchmark_data.py b/bakerydemo/base/management/commands/load_benchmark_data.py index b49db5832..b864b0e65 100644 --- a/bakerydemo/base/management/commands/load_benchmark_data.py +++ b/bakerydemo/base/management/commands/load_benchmark_data.py @@ -3,9 +3,11 @@ """ import random from datetime import date, time +from io import BytesIO +from PIL import Image as PILImage +from django.core.files.uploadedfile import InMemoryUploadedFile from django.core.management.base import BaseCommand -from django.db import transaction from django.utils import lorem_ipsum, timezone from django.utils.text import slugify from taggit.models import Tag @@ -23,25 +25,25 @@ class Command(BaseCommand): def add_arguments(self, parser): parser.add_argument( - '--blog-pages', + '--blog-pages-count', type=int, - default=10000, - help='Number of blog pages to create (default: 10000, for 100K scale use 33334)', + default=1000, + help='Number of blog pages to create (default: 33334, for 100K total)', ) parser.add_argument( - '--bread-pages', + '--bread-pages-count', type=int, - default=10000, - help='Number of bread pages to create (default: 10000, for 100K scale use 33333)', + default=1000, + help='Number of bread pages to create (default: 33333, for 100K total)', ) parser.add_argument( - '--location-pages', + '--location-pages-count', type=int, - default=10000, - help='Number of location pages to create (default: 10000, for 100K scale use 33333)', + default=1000, + help='Number of location pages to create (default: 33333, for 100K total)', ) parser.add_argument( - '--streamfield-blocks', + '--streamfield-blocks-count', type=int, default=100, help='Number of blocks in each StreamField (default: 100)', @@ -53,76 +55,79 @@ def add_arguments(self, parser): help='Nesting depth for StreamField blocks (default: 10, max: 10)', ) parser.add_argument( - '--inline-panel-items', + '--inline-panel-items-count', type=int, default=100, help='Number of inline panel items to create (default: 100)', ) parser.add_argument( - '--rich-text-paragraphs', + '--rich-text-paragraphs-count', type=int, default=100, help='Number of paragraphs in rich text fields (default: 100)', ) parser.add_argument( - '--revisions-per-page', + '--revisions-per-page-count', type=int, - default=34, - help='Number of revisions per page (default: 34, for 1M total with 30K pages)', + default=10000, + help='Number of revisions per page (default: 10, for 1M total with 100K pages)', ) parser.add_argument( '--page-tree-depth', type=int, - default=1, - help='Depth of page tree hierarchy (default: 1, max: 10)', + default=10, + help='Depth of page tree hierarchy (default: 10, max: 10)', ) parser.add_argument( - '--create-images', + '--images-count', type=int, - default=0, - help='Number of images to create (default: 0, for scale testing use 10000)', + default=100, + help='Number of images to create (default: 1000, range: hundreds to 10000)', ) parser.add_argument( - '--create-snippets', - action='store_true', - help='Create 1M snippet instances (BreadType, Country, BreadIngredient)', + '--snippets-count', + type=int, + default=1000000, + help='Number of snippet instances to create (default: 1000000)', ) def handle(self, *args, **options): - self.blog_pages = options['blog_pages'] - self.bread_pages = options['bread_pages'] - self.location_pages = options['location_pages'] - self.streamfield_blocks = options['streamfield_blocks'] + self.set_input_params(options) + self.print_configurations() + self.create_benchmark_images() + self.create_benchmark_snippets() + self.create_blog_pages() + self.create_bread_pages(self.bread_pages_count) + self.create_location_pages(self.location_pages_count) + self.create_revisions_for_page() + + self.stdout.write(self.style.SUCCESS('\n=== Benchmark Data Generation Complete! ===')) + + def set_input_params(self, options): + self.blog_pages_count = options['blog_pages_count'] + self.bread_pages_count = options['bread_pages_count'] + self.location_pages_count = options['location_pages_count'] + self.streamfield_blocks_count = options['streamfield_blocks_count'] self.streamfield_depth = min(options['streamfield_depth'], 10) - self.inline_panel_items = options['inline_panel_items'] - self.rich_text_paragraphs = options['rich_text_paragraphs'] - self.revisions_per_page = options['revisions_per_page'] + self.inline_panel_items_count = options['inline_panel_items_count'] + self.rich_text_paragraphs_count = options['rich_text_paragraphs_count'] + self.revisions_per_page_count = options['revisions_per_page_count'] self.page_tree_depth = min(options['page_tree_depth'], 10) - self.create_images = options['create_images'] - self.create_snippets = options['create_snippets'] - - self.stdout.write('Starting benchmark data generation...') - - # Create images if requested - if self.create_images > 0: - created = self.create_benchmark_images(self.create_images) - self.stdout.write(f'Created {created} images') - - # Create snippets if requested - if self.create_snippets: - created = self.create_benchmark_snippets() - self.stdout.write(f'Created {created} snippet instances') - - created = self.create_blog_pages(self.blog_pages) - self.stdout.write(f'Created {created} blog pages') - - created = self.create_bread_pages(self.bread_pages) - self.stdout.write(f'Created {created} bread pages') - - created = self.create_location_pages(self.location_pages) - self.stdout.write(f'Created {created} location pages') - - self.stdout.write('Benchmark data generation complete!') + self.images_count = options['images_count'] + self.snippets_count = options['snippets_count'] + + def print_configurations(self): + self.stdout.write('\nConfiguration:') + self.stdout.write(f' Blog pages: {self.blog_pages_count}') + self.stdout.write(f' Bread pages: {self.bread_pages_count}') + self.stdout.write(f' Location pages: {self.location_pages_count}') + self.stdout.write(f' StreamField blocks: {self.streamfield_blocks_count} (depth: {self.streamfield_depth})') + self.stdout.write(f' Inline panel items: {self.inline_panel_items_count}') + self.stdout.write(f' Rich text paragraphs: {self.rich_text_paragraphs_count}') + self.stdout.write(f' Revisions per page: {self.revisions_per_page_count}') + self.stdout.write(f' Page tree depth: {self.page_tree_depth}') + self.stdout.write(f' Images count: {self.images_count}') + self.stdout.write(f' Snippets count: {self.snippets_count}\n') def _get_images_cache(self): """Cache images to avoid repeated queries.""" @@ -130,22 +135,22 @@ def _get_images_cache(self): self._images_cache = list(Image.objects.all()) return self._images_cache - def create_benchmark_images(self, count): + def create_benchmark_images(self): """Create benchmark images with solid color placeholders.""" - from io import BytesIO - from PIL import Image as PILImage - from django.core.files.uploadedfile import InMemoryUploadedFile + self.stdout.write(' Initializing image creation...') created_count = 0 + skipped_count = 0 colors = [ (255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255), (0, 255, 255), (128, 128, 128), (255, 128, 0), ] - for i in range(count): + for i in range(self.images_count): title = f"Benchmark Image {i + 1}" if Image.objects.filter(title=title).exists(): + skipped_count += 1 continue # Create a simple colored image @@ -166,66 +171,35 @@ def create_benchmark_images(self, count): wagtail_image.save() created_count += 1 - if created_count % 100 == 0: - self.stdout.write(f' Created {created_count} images...') - # Clear the cache so new images are picked up if hasattr(self, '_images_cache'): del self._images_cache + self.stdout.write(' Cleared image cache') - return created_count + self.stdout.write(f' Skipped {skipped_count} existing images') + self.stdout.write(self.style.SUCCESS(f'✓ Created {created_count} images\n')) def create_benchmark_snippets(self): - """Create 1M snippet instances (BreadType, Country, BreadIngredient).""" + """Create snippet instances (BreadType, Country, BreadIngredient).""" + self.stdout.write(' Starting snippet creation in bulk batches...') created_count = 0 batch_size = 1000 - # Create BreadType snippets (~333K) - self.stdout.write(' Creating BreadType snippets...') - bread_types = [] - for i in range(333334): - bread_types.append(BreadType(title=f"Bread Type {i + 1}")) - if len(bread_types) >= batch_size: - BreadType.objects.bulk_create(bread_types, ignore_conflicts=True) - created_count += len(bread_types) - bread_types = [] - if created_count % 10000 == 0: - self.stdout.write(f' Created {created_count} snippets...') - if bread_types: + for i in range(self.snippets_count // (batch_size * 3) + self.snippets_count % 3): + bread_types = [BreadType(title=f"Bread Type {i * j + 1}") for j in range(batch_size)] BreadType.objects.bulk_create(bread_types, ignore_conflicts=True) - created_count += len(bread_types) - - # Create Country snippets (~333K) - self.stdout.write(' Creating Country snippets...') - countries = [] - for i in range(333333): - countries.append(Country(title=f"Country {i + 1}")) - if len(countries) >= batch_size: - Country.objects.bulk_create(countries, ignore_conflicts=True) - created_count += len(countries) - countries = [] - if created_count % 10000 == 0: - self.stdout.write(f' Created {created_count} snippets...') - if countries: + + countries = [Country(title=f"Country {i * j + 1}") for j in range(batch_size)] Country.objects.bulk_create(countries, ignore_conflicts=True) - created_count += len(countries) - - # Create BreadIngredient snippets (~333K) - self.stdout.write(' Creating BreadIngredient snippets...') - ingredients = [] - for i in range(333333): - ingredients.append(BreadIngredient(name=f"Ingredient {i + 1}")) - if len(ingredients) >= batch_size: - BreadIngredient.objects.bulk_create(ingredients, ignore_conflicts=True) - created_count += len(ingredients) - ingredients = [] - if created_count % 10000 == 0: - self.stdout.write(f' Created {created_count} snippets...') - if ingredients: + + ingredients = [BreadIngredient(name=f"Ingredient {i * j + 1}") for j in range(batch_size)] BreadIngredient.objects.bulk_create(ingredients, ignore_conflicts=True) - created_count += len(ingredients) - return created_count + created_count += batch_size*3 + if created_count % 60000 == 0: + self.stdout.write(f' Progress: {created_count:,} total snippets created...') + + self.stdout.write(f'Created {created_count} snippet instances') def get_random_image(self): """Return a random image or None if no images exist.""" @@ -359,7 +333,13 @@ def generate_streamfield(self, num_blocks, num_paragraphs=0, depth=0): return blocks - def _publish_page_with_revisions(self, page, revisions): + def create_revisions_for_page(self): + self.stdout.write(f' Creating {self.revisions_per_page_count:,} revisions for a page...') + + page = BlogPage.objects.first() + self.publish_page_with_revisions(page, self.revisions_per_page_count) + + def publish_page_with_revisions(self, page, revisions): """Publish page and create additional draft revisions.""" original_introduction = page.introduction @@ -371,193 +351,147 @@ def _publish_page_with_revisions(self, page, revisions): page.introduction = f"[Revision {rev_num + 2}] " + original_introduction page.save_revision() + # Progress every 1000 pages + if rev_num % 1000 == 0: + self.stdout.write(f' Progress: {rev_num:,}/{revisions:,} revisions created...') + page.introduction = original_introduction page.refresh_from_db() - def create_blog_pages(self, count): + def create_blog_pages(self): """Create blog pages with relationships, tags, and streamfield content.""" + self.stdout.write(' Checking for blog index page...') blog_index = BlogIndexPage.objects.filter(slug='blog').first() if not blog_index: self.stdout.write(self.style.WARNING(' Blog index not found. Skipping blog pages.')) return 0 + self.stdout.write(f' ✓ Found blog index: {blog_index.title}') - people = list(Person.objects.all()) - if not people and self.inline_panel_items > 0: - # ...existing code for creating people... - self.stdout.write(self.style.WARNING(' No Person objects found. Creating sample people.')) + # Only load/create 10 people objects (not 100+) + self.stdout.write(' Loading existing Person objects...') + people = list(Person.objects.all()[:10]) + if not people: + self.stdout.write(self.style.WARNING(' No Person objects found. Creating 10 sample people...')) now = timezone.now() - images = self._get_images_cache() + images = self._get_images_cache()[:10] if self._get_images_cache() else [] - # Fixed names and job titles for consistent benchmark data first_names = ['John', 'Jane', 'Michael', 'Sarah', 'David', 'Emily', 'Robert', 'Jessica', 'William', 'Ashley'] last_names = ['Smith', 'Johnson', 'Williams', 'Brown', 'Jones', 'Garcia', 'Miller', 'Davis', 'Wilson', 'Moore'] - job_titles = ['Senior Developer', 'Product Manager', 'Design Lead', 'Content Writer', 'Marketing Specialist'] - - num_people = max(10, self.inline_panel_items) - people_to_create = [] - for i in range(num_people): - person = Person( - first_name=first_names[i % len(first_names)], - last_name=last_names[i % len(last_names)], - job_title=job_titles[i % len(job_titles)], + job_titles = ['Developer', 'Manager', 'Designer', 'Writer', 'Specialist'] + + people_to_create = [ + Person( + first_name=first_names[i], + last_name=last_names[i], + job_title=job_titles[i % 5], live=True, first_published_at=now, last_published_at=now, - image=images[i % len(images)] if images else None, + image=images[i] if images and i < len(images) else None, ) - people_to_create.append(person) + for i in range(10) + ] Person.objects.bulk_create(people_to_create) - people = list(Person.objects.all()) - - # Assign images to existing Person objects that don't have images - people_without_images = [p for p in people if not p.image] - if people_without_images: - images = self._get_images_cache() - if images: - for i, person in enumerate(people_without_images): - person.image = images[i % len(images)] - Person.objects.bulk_update(people_without_images, ['image']) - # Refresh the people list - people = list(Person.objects.all()) + people = list(Person.objects.all()[:10]) + self.stdout.write(f' ✓ Created {len(people)} Person objects') + else: + self.stdout.write(f' ✓ Found {len(people)} existing Person objects') start_number = BlogPage.objects.count() + 1 + self.stdout.write(f' Starting page number: {start_number}') - tag_names = ['baking', 'bread', 'recipe', 'cooking', 'food', 'bakery', 'yeast', 'dough', 'pastry', 'dessert'] + self.stdout.write(' Preparing tags...') + tag_names = ['baking', 'bread', 'recipe', 'cooking', 'food'] tags = [Tag.objects.get_or_create(name=name)[0] for name in tag_names] + self.stdout.write(f' ✓ Prepared {len(tags)} tags') - body = self.generate_streamfield(self.streamfield_blocks, self.rich_text_paragraphs, self.streamfield_depth) + # Create lightweight StreamField template + self.stdout.write(f' Generating lightweight StreamField template...') + body_template = self.generate_streamfield(self.streamfield_blocks_count, 2, 1) + self.stdout.write(f' ✓ Generated StreamField template (reusable)') + # Use add_child (required for Wagtail) but optimize by reducing operations + self.stdout.write(f' Creating {self.blog_pages_count:,} blog pages...') created_count = 0 - current_parent = blog_index - pages_at_current_level = [] - for i in range(count): + for i in range(self.blog_pages_count): page_number = start_number + i title = f"Blog Post {page_number}" - slug = slugify(title) - - if BlogPage.objects.filter(slug=slug).exists(): - continue - # Implement tree depth: create hierarchy of pages - level = 1 - if self.page_tree_depth > 1: - # Calculate which level this page should be at - level = (i % self.page_tree_depth) + 1 - - if level == 1: - current_parent = blog_index - pages_at_current_level = [] - elif level > 1 and pages_at_current_level: - # Use the last page from previous level as parent - current_parent = pages_at_current_level[-1] - - with transaction.atomic(): - page = BlogPage( - title=title, - slug=slug, - subtitle=lorem_ipsum.words(random.randint(5, 12), common=False), - introduction=self._generate_paragraph(), - body=body, - image=self.get_random_image(), - date_published=date.today(), - ) - current_parent.add_child(instance=page) - page.refresh_from_db() + page = BlogPage( + title=title, + slug=slugify(title), + subtitle=lorem_ipsum.words(5, common=False), + introduction=lorem_ipsum.paragraph(), + body=body_template, + image=self.get_random_image(), + date_published=date.today(), + ) - if people: - selected_person = random.choice(people) - BlogPersonRelationship.objects.create( - page=page, - person=selected_person - ) + blog_index.add_child(instance=page) - if tags: - page.tags.add(*random.sample(tags, min(random.randint(2, 5), len(tags)))) + if people: + BlogPersonRelationship.objects.create( + page=page, + person=people[i % len(people)] + ) - self._publish_page_with_revisions(page, self.revisions_per_page) - created_count += 1 + created_count += 1 - # Track pages at current level for hierarchy - if self.page_tree_depth > 1: - if level == len(pages_at_current_level) + 1: - pages_at_current_level.append(page) - elif level <= len(pages_at_current_level): - pages_at_current_level = pages_at_current_level[:level-1] + [page] + # Progress every 1000 pages + if created_count % 1000 == 0: + self.stdout.write(f' Progress: {created_count:,}/{self.blog_pages_count:,} blog pages created...') - return created_count + self.stdout.write(f' ✓ Created {created_count:,} pages with relationships') def create_bread_pages(self, count): """Create bread pages with random types, origins, and ingredients.""" + self.stdout.write(' Checking for breads index page...') breads_index = BreadsIndexPage.objects.filter(slug='breads').first() if not breads_index: self.stdout.write(self.style.WARNING(' Breads index not found. Skipping bread pages.')) return 0 + self.stdout.write(f' ✓ Found breads index: {breads_index.title}') - bread_type_names = ['Sourdough', 'Baguette', 'Ciabatta', 'Rye', 'Whole Wheat', - 'Multigrain', 'Pumpernickel', 'Focaccia', 'Challah', 'Brioche', - 'Naan', 'Pita', 'Cornbread', 'Flatbread', 'Tortilla'] - country_names = ['France', 'Italy', 'Germany', 'United States', 'United Kingdom', - 'Spain', 'Greece', 'Turkey', 'India', 'Mexico', 'Canada', 'Australia'] - ingredient_names = ['Flour', 'Water', 'Yeast', 'Salt', 'Sugar', 'Olive Oil', - 'Butter', 'Eggs', 'Milk', 'Honey', 'Seeds', 'Nuts'] + bread_type_names = ['Sourdough', 'Baguette', 'Ciabatta', 'Rye', 'Whole Wheat'] + country_names = ['France', 'Italy', 'Germany', 'United States', 'United Kingdom'] bread_types = [BreadType.objects.get_or_create(title=name)[0] for name in bread_type_names] countries = [Country.objects.get_or_create(title=name)[0] for name in country_names] - ingredients = [BreadIngredient.objects.get_or_create(name=name)[0] for name in ingredient_names] start_number = BreadPage.objects.count() + 1 - body = self.generate_streamfield(self.streamfield_blocks, 0, self.streamfield_depth) + self.stdout.write(f' Starting page number: {start_number}') + + # Reuse lightweight StreamField template + self.stdout.write(f' Generating lightweight StreamField template...') + body_template = self.generate_streamfield(self.streamfield_blocks_count, 0, 1) + self.stdout.write(f' ✓ Generated StreamField template') + self.stdout.write(f' Creating {count:,} bread pages...') created_count = 0 - current_parent = breads_index - pages_at_current_level = [] for i in range(count): page_number = start_number + i - title = f"{random.choice(bread_type_names)} #{page_number}" - slug = slugify(title) - - if BreadPage.objects.filter(slug=slug).exists(): - continue - - # Implement tree depth - level = 1 - if self.page_tree_depth > 1: - level = (i % self.page_tree_depth) + 1 - if level == 1: - current_parent = breads_index - pages_at_current_level = [] - elif level > 1 and pages_at_current_level: - current_parent = pages_at_current_level[-1] - - with transaction.atomic(): - page = BreadPage( - title=title, - slug=slug, - introduction=self._generate_paragraph(), - body=body, - bread_type=random.choice(bread_types), - origin=random.choice(countries) if countries else None, - image=self.get_random_image(), - ) - current_parent.add_child(instance=page) - page.refresh_from_db() + title = f"{bread_type_names[i % len(bread_type_names)]} #{page_number}" - if ingredients: - page.ingredients.set(random.sample(ingredients, min(random.randint(3, 8), len(ingredients)))) + page = BreadPage( + title=title, + slug=slugify(title), + introduction=lorem_ipsum.paragraph(), + body=body_template, + bread_type=bread_types[i % len(bread_types)], + origin=countries[i % len(countries)], + image=self.get_random_image(), + ) - self._publish_page_with_revisions(page, self.revisions_per_page) - created_count += 1 + breads_index.add_child(instance=page) + created_count += 1 - if self.page_tree_depth > 1: - if level == len(pages_at_current_level) + 1: - pages_at_current_level.append(page) - elif level <= len(pages_at_current_level): - pages_at_current_level = pages_at_current_level[:level-1] + [page] + if created_count % 1000 == 0: + self.stdout.write(f' Progress: {created_count:,}/{count:,} bread pages created...') - return created_count + self.stdout.write(f' ✓ Created {created_count:,} pages') def _generate_location_address(self, city): """Generate a random address for the given city.""" @@ -604,61 +538,45 @@ def _create_operating_hours(self, page): def create_location_pages(self, count): """Create location pages with addresses, coordinates, and operating hours.""" + self.stdout.write(' Checking for locations index page...') locations_index = LocationsIndexPage.objects.filter(slug='locations').first() if not locations_index: self.stdout.write(self.style.WARNING(' Locations index not found. Skipping location pages.')) return 0 + self.stdout.write(f' ✓ Found locations index: {locations_index.title}') - cities = ['New York', 'London', 'Paris', 'Tokyo', 'Sydney', 'Berlin', - 'Toronto', 'Mumbai', 'Singapore', 'Dubai', 'Barcelona', 'Amsterdam', - 'Rome', 'Madrid', 'Seoul', 'San Francisco', 'Chicago', 'Boston'] + cities = ['New York', 'London', 'Paris', 'Tokyo', 'Sydney', 'Berlin'] start_number = LocationPage.objects.count() + 1 - body = self.generate_streamfield(self.streamfield_blocks, 0, self.streamfield_depth) + self.stdout.write(f' Starting page number: {start_number}') + + self.stdout.write(f' Generating lightweight StreamField template...') + body_template = self.generate_streamfield(min(10, self.streamfield_blocks_count), 0, 1) + self.stdout.write(f' ✓ Generated StreamField template') + # Use add_child (required for Wagtail) + self.stdout.write(f' Creating {count:,} location pages...') created_count = 0 - current_parent = locations_index - pages_at_current_level = [] for i in range(count): - city = random.choice(cities) - title = f"{city} Location #{start_number + i}" - slug = slugify(title) - - if LocationPage.objects.filter(slug=slug).exists(): - continue + city = cities[i % len(cities)] + page_number = start_number + i + title = f"{city} Location #{page_number}" - # Implement tree depth - level = 1 - if self.page_tree_depth > 1: - level = (i % self.page_tree_depth) + 1 - if level == 1: - current_parent = locations_index - pages_at_current_level = [] - elif level > 1 and pages_at_current_level: - current_parent = pages_at_current_level[-1] - - with transaction.atomic(): - page = LocationPage( - title=title, - slug=slug, - introduction=self._generate_paragraph(), - body=body, - address=self._generate_location_address(city), - lat_long=self._generate_lat_long(), - image=self.get_random_image(), - ) - current_parent.add_child(instance=page) - page.refresh_from_db() + page = LocationPage( + title=title, + slug=slugify(title), + introduction=lorem_ipsum.paragraph(), + body=body_template, + address=self._generate_location_address(city), + lat_long=self._generate_lat_long(), + image=self.get_random_image(), + ) - self._create_operating_hours(page) - self._publish_page_with_revisions(page, self.revisions_per_page) - created_count += 1 + locations_index.add_child(instance=page) + created_count += 1 - if self.page_tree_depth > 1: - if level == len(pages_at_current_level) + 1: - pages_at_current_level.append(page) - elif level <= len(pages_at_current_level): - pages_at_current_level = pages_at_current_level[:level-1] + [page] + if created_count % 1000 == 0: + self.stdout.write(f' Progress: {created_count:,}/{count:,} location pages created...') - return created_count + self.stdout.write(f' ✓ Created {created_count:,} pages') From 5f9ae500b9020cec5eb9e98cafc9e5e9dc969df2 Mon Sep 17 00:00:00 2001 From: Jawad Khan Date: Fri, 26 Dec 2025 17:10:45 +0500 Subject: [PATCH 06/10] fix: refactored code --- .../commands/load_benchmark_data.py | 440 ++++++++---------- 1 file changed, 197 insertions(+), 243 deletions(-) diff --git a/bakerydemo/base/management/commands/load_benchmark_data.py b/bakerydemo/base/management/commands/load_benchmark_data.py index b864b0e65..9d3033d75 100644 --- a/bakerydemo/base/management/commands/load_benchmark_data.py +++ b/bakerydemo/base/management/commands/load_benchmark_data.py @@ -2,22 +2,20 @@ Management command to load benchmark data for performance testing. """ import random -from datetime import date, time +from datetime import date from io import BytesIO from PIL import Image as PILImage from django.core.files.uploadedfile import InMemoryUploadedFile from django.core.management.base import BaseCommand from django.utils import lorem_ipsum, timezone -from django.utils.text import slugify -from taggit.models import Tag from wagtail.images.models import Image +from wagtail.models import Locale from wagtail.rich_text import RichText from bakerydemo.base.models import Person from bakerydemo.blog.models import BlogIndexPage, BlogPage, BlogPersonRelationship -from bakerydemo.breads.models import BreadIngredient, BreadPage, BreadsIndexPage, BreadType, Country -from bakerydemo.locations.models import LocationOperatingHours, LocationPage, LocationsIndexPage +from bakerydemo.breads.models import BreadIngredient, BreadType, Country class Command(BaseCommand): @@ -28,106 +26,100 @@ def add_arguments(self, parser): '--blog-pages-count', type=int, default=1000, - help='Number of blog pages to create (default: 33334, for 100K total)', - ) - parser.add_argument( - '--bread-pages-count', - type=int, - default=1000, - help='Number of bread pages to create (default: 33333, for 100K total)', - ) - parser.add_argument( - '--location-pages-count', - type=int, - default=1000, - help='Number of location pages to create (default: 33333, for 100K total)', + help='Number of blog pages to create', ) parser.add_argument( '--streamfield-blocks-count', type=int, default=100, - help='Number of blocks in each StreamField (default: 100)', + help='Number of blocks in each StreamField', ) parser.add_argument( '--streamfield-depth', type=int, default=10, - help='Nesting depth for StreamField blocks (default: 10, max: 10)', + help='Nesting depth for StreamField blocks', ) parser.add_argument( '--inline-panel-items-count', type=int, default=100, - help='Number of inline panel items to create (default: 100)', + help='Number of inline panel items to create', ) parser.add_argument( - '--rich-text-paragraphs-count', + '--paragraphs-count', type=int, default=100, - help='Number of paragraphs in rich text fields (default: 100)', + help='Number of paragraphs in rich text fields', ) parser.add_argument( '--revisions-per-page-count', type=int, - default=10000, - help='Number of revisions per page (default: 10, for 1M total with 100K pages)', + default=100, + help='Number of revisions per page', ) parser.add_argument( '--page-tree-depth', type=int, default=10, - help='Depth of page tree hierarchy (default: 10, max: 10)', + help='Depth of page tree hierarchy', ) parser.add_argument( '--images-count', type=int, default=100, - help='Number of images to create (default: 1000, range: hundreds to 10000)', + help='Number of images to create', ) parser.add_argument( '--snippets-count', type=int, - default=1000000, - help='Number of snippet instances to create (default: 1000000)', + default=100000, + help='Number of snippet instances to create', + ) + parser.add_argument( + '--translations-count', + type=int, + default=100, + help='Number of language translations to create', ) def handle(self, *args, **options): self.set_input_params(options) self.print_configurations() + self.create_benchmark_images() - self.create_benchmark_snippets() self.create_blog_pages() - self.create_bread_pages(self.bread_pages_count) - self.create_location_pages(self.location_pages_count) - self.create_revisions_for_page() + self.create_inline_panel_items() + self.create_benchmark_snippets() + self.create_revisionss() + self.create_translations() + self.generate_streamfield(self.streamfield_blocks_count, self.paragraphs_count, self.streamfield_depth) self.stdout.write(self.style.SUCCESS('\n=== Benchmark Data Generation Complete! ===')) def set_input_params(self, options): self.blog_pages_count = options['blog_pages_count'] - self.bread_pages_count = options['bread_pages_count'] - self.location_pages_count = options['location_pages_count'] self.streamfield_blocks_count = options['streamfield_blocks_count'] self.streamfield_depth = min(options['streamfield_depth'], 10) self.inline_panel_items_count = options['inline_panel_items_count'] - self.rich_text_paragraphs_count = options['rich_text_paragraphs_count'] + self.paragraphs_count = options['paragraphs_count'] self.revisions_per_page_count = options['revisions_per_page_count'] self.page_tree_depth = min(options['page_tree_depth'], 10) self.images_count = options['images_count'] self.snippets_count = options['snippets_count'] + self.translations_count = min(options['translations_count'], 100) def print_configurations(self): self.stdout.write('\nConfiguration:') self.stdout.write(f' Blog pages: {self.blog_pages_count}') - self.stdout.write(f' Bread pages: {self.bread_pages_count}') - self.stdout.write(f' Location pages: {self.location_pages_count}') self.stdout.write(f' StreamField blocks: {self.streamfield_blocks_count} (depth: {self.streamfield_depth})') self.stdout.write(f' Inline panel items: {self.inline_panel_items_count}') - self.stdout.write(f' Rich text paragraphs: {self.rich_text_paragraphs_count}') + self.stdout.write(f' Rich text paragraphs: {self.paragraphs_count}') self.stdout.write(f' Revisions per page: {self.revisions_per_page_count}') self.stdout.write(f' Page tree depth: {self.page_tree_depth}') self.stdout.write(f' Images count: {self.images_count}') - self.stdout.write(f' Snippets count: {self.snippets_count}\n') + self.stdout.write(f' Snippets count: {self.snippets_count}') + self.stdout.write(f' Translations count: {self.translations_count}\n') def _get_images_cache(self): """Cache images to avoid repeated queries.""" @@ -185,21 +177,22 @@ def create_benchmark_snippets(self): created_count = 0 batch_size = 1000 - for i in range(self.snippets_count // (batch_size * 3) + self.snippets_count % 3): - bread_types = [BreadType(title=f"Bread Type {i * j + 1}") for j in range(batch_size)] + for batch_num in range((self.snippets_count + batch_size * 3 - 1) // (batch_size * 3)): + bread_types = [BreadType(title=f"Bread Type {batch_num * batch_size + j + 1}") for j in range(batch_size)] BreadType.objects.bulk_create(bread_types, ignore_conflicts=True) - countries = [Country(title=f"Country {i * j + 1}") for j in range(batch_size)] + countries = [Country(title=f"Country {batch_num * batch_size + j + 1}") for j in range(batch_size)] Country.objects.bulk_create(countries, ignore_conflicts=True) - ingredients = [BreadIngredient(name=f"Ingredient {i * j + 1}") for j in range(batch_size)] + ingredients = [BreadIngredient(name=f"Ingredient {batch_num * batch_size + j + 1}") + for j in range(batch_size)] BreadIngredient.objects.bulk_create(ingredients, ignore_conflicts=True) - created_count += batch_size*3 - if created_count % 60000 == 0: + created_count += batch_size * 3 + if created_count % (batch_size * 90) == 0: self.stdout.write(f' Progress: {created_count:,} total snippets created...') - self.stdout.write(f'Created {created_count} snippet instances') + self.stdout.write(f'Total available snippet instances are: {created_count}') def get_random_image(self): """Return a random image or None if no images exist.""" @@ -233,10 +226,10 @@ def _create_heading_block(self, index): def _create_paragraph_block(self, index, num_paragraphs=2): """Create a paragraph block with fixed paragraphs.""" fixed_paragraphs = [ - 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.', - 'Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.', + 'Lorem ipsum dolor st amet, consectetur adiscing elit. Sed do eimod temport labore et dolore magna aliqua.', + 'Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo cequat.', 'Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.', - 'Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.', + 'Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt m anim id est laborum.', 'Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium doloremque laudantium.', ] # Repeat paragraphs to reach the desired count @@ -269,7 +262,6 @@ def _create_image_block(self, index): 'caption': captions[index % len(captions)], 'attribution': attributions[index % len(attributions)], }) - return None def _create_block_quote(self, index): """Create a block quote with fixed content.""" @@ -323,7 +315,8 @@ def generate_streamfield(self, num_blocks, num_paragraphs=0, depth=0): lambda i: self._create_heading_block(i), lambda i: self._create_block_quote(i), lambda i: self._create_heading_block(i), - lambda i: self._create_image_block(i) or self._create_paragraph_block(i, num_paragraphs if num_paragraphs > 0 else 2), + lambda i: self._create_image_block(i) or + self._create_paragraph_block(i, num_paragraphs if num_paragraphs > 0 else 2), lambda i: self._create_paragraph_block(i, num_paragraphs if num_paragraphs > 0 else 2), ] @@ -333,13 +326,14 @@ def generate_streamfield(self, num_blocks, num_paragraphs=0, depth=0): return blocks - def create_revisions_for_page(self): - self.stdout.write(f' Creating {self.revisions_per_page_count:,} revisions for a page...') + def create_revisionss(self): + self.stdout.write(f' Creating revisions for pages...') - page = BlogPage.objects.first() - self.publish_page_with_revisions(page, self.revisions_per_page_count) + pages = BlogPage.objects.all()[:10] + for page in pages: + self.create_page_revisions(page, self.revisions_per_page_count) - def publish_page_with_revisions(self, page, revisions): + def create_page_revisions(self, page, revisions): """Publish page and create additional draft revisions.""" original_introduction = page.introduction @@ -351,232 +345,192 @@ def publish_page_with_revisions(self, page, revisions): page.introduction = f"[Revision {rev_num + 2}] " + original_introduction page.save_revision() - # Progress every 1000 pages - if rev_num % 1000 == 0: - self.stdout.write(f' Progress: {rev_num:,}/{revisions:,} revisions created...') + if (rev_num + 1) % 1000 == 0: + self.stdout.write(f' Progress: {rev_num:,}/{revisions:,} revisions created for page ID {page.title}.') + + self.stdout.write(f' ✓ Created {revisions} for page {page.title}') page.introduction = original_introduction page.refresh_from_db() + def create_translations(self): + """Create language translations for pages.""" + self.stdout.write(f' Creating {self.translations_count} language translations...') - def create_blog_pages(self): - """Create blog pages with relationships, tags, and streamfield content.""" - self.stdout.write(' Checking for blog index page...') - blog_index = BlogIndexPage.objects.filter(slug='blog').first() - if not blog_index: - self.stdout.write(self.style.WARNING(' Blog index not found. Skipping blog pages.')) - return 0 - self.stdout.write(f' ✓ Found blog index: {blog_index.title}') - - # Only load/create 10 people objects (not 100+) - self.stdout.write(' Loading existing Person objects...') - people = list(Person.objects.all()[:10]) - if not people: - self.stdout.write(self.style.WARNING(' No Person objects found. Creating 10 sample people...')) - now = timezone.now() - images = self._get_images_cache()[:10] if self._get_images_cache() else [] - - first_names = ['John', 'Jane', 'Michael', 'Sarah', 'David', 'Emily', 'Robert', 'Jessica', 'William', 'Ashley'] - last_names = ['Smith', 'Johnson', 'Williams', 'Brown', 'Jones', 'Garcia', 'Miller', 'Davis', 'Wilson', 'Moore'] - job_titles = ['Developer', 'Manager', 'Designer', 'Writer', 'Specialist'] - - people_to_create = [ - Person( - first_name=first_names[i], - last_name=last_names[i], - job_title=job_titles[i % 5], - live=True, - first_published_at=now, - last_published_at=now, - image=images[i] if images and i < len(images) else None, - ) - for i in range(10) - ] - Person.objects.bulk_create(people_to_create) - people = list(Person.objects.all()[:10]) - self.stdout.write(f' ✓ Created {len(people)} Person objects') - else: - self.stdout.write(f' ✓ Found {len(people)} existing Person objects') + # Generate language codes for locales (e.g., lang-01, lang-02, ..., lang-100) + # Using synthetic language codes since we need 100 unique ones - start_number = BlogPage.objects.count() + 1 - self.stdout.write(f' Starting page number: {start_number}') + default_locale = Locale.objects.filter(language_code='en').first() or Locale.objects.first() + if not default_locale: + self.stdout.write(self.style.WARNING(' No default locale found. Skipping translations.')) + return - self.stdout.write(' Preparing tags...') - tag_names = ['baking', 'bread', 'recipe', 'cooking', 'food'] - tags = [Tag.objects.get_or_create(name=name)[0] for name in tag_names] - self.stdout.write(f' ✓ Prepared {len(tags)} tags') + self.stdout.write(' Creating locales...') + existing_locales = set(Locale.objects.values_list('language_code', flat=True)) + language_codes = [f"lg{i:03d}" for i in range(1, self.translations_count + 1)] - # Create lightweight StreamField template - self.stdout.write(f' Generating lightweight StreamField template...') - body_template = self.generate_streamfield(self.streamfield_blocks_count, 2, 1) - self.stdout.write(f' ✓ Generated StreamField template (reusable)') + locales_to_create = [Locale(language_code=lang_code) + for lang_code in language_codes if lang_code not in existing_locales] - # Use add_child (required for Wagtail) but optimize by reducing operations - self.stdout.write(f' Creating {self.blog_pages_count:,} blog pages...') - created_count = 0 + if locales_to_create: + Locale.objects.bulk_create(locales_to_create, ignore_conflicts=True) + self.stdout.write(f' ✓ Created {len(locales_to_create)} new locales') - for i in range(self.blog_pages_count): - page_number = start_number + i - title = f"Blog Post {page_number}" + locales = list(Locale.objects.filter(language_code__in=language_codes)) - page = BlogPage( - title=title, - slug=slugify(title), - subtitle=lorem_ipsum.words(5, common=False), - introduction=lorem_ipsum.paragraph(), - body=body_template, - image=self.get_random_image(), - date_published=date.today(), - ) + blog_index = BlogIndexPage.objects.filter(slug='blog').first() + sample_page = BlogPage.objects.first() - blog_index.add_child(instance=page) + if not blog_index: + self.stdout.write(self.style.WARNING(' No blog index found. Skipping translations.')) + return - if people: - BlogPersonRelationship.objects.create( - page=page, - person=people[i % len(people)] - ) + if not sample_page: + self.stdout.write(self.style.WARNING(' No pages found to translate. Skipping translations.')) + return - created_count += 1 + # Create translations for each locale + created_count = 0 + for locale in locales: + try: + # First, translate the parent page (blog index) if not already translated + if not BlogIndexPage.objects.filter(translation_key=blog_index.translation_key, locale=locale).exists(): + translated_index = blog_index.copy_for_translation(locale) + translated_index.title = f"{blog_index.title} ({locale.language_code})" + translated_index.save_revision().publish() - # Progress every 1000 pages - if created_count % 1000 == 0: - self.stdout.write(f' Progress: {created_count:,}/{self.blog_pages_count:,} blog pages created...') + if BlogPage.objects.filter(translation_key=sample_page.translation_key, locale=locale).exists(): + continue - self.stdout.write(f' ✓ Created {created_count:,} pages with relationships') + translated_page = sample_page.copy_for_translation(locale) + translated_page.title = f"{sample_page.title} ({locale.language_code})" + translated_page.save_revision().publish() + created_count += 1 - def create_bread_pages(self, count): - """Create bread pages with random types, origins, and ingredients.""" - self.stdout.write(' Checking for breads index page...') - breads_index = BreadsIndexPage.objects.filter(slug='breads').first() - if not breads_index: - self.stdout.write(self.style.WARNING(' Breads index not found. Skipping bread pages.')) - return 0 - self.stdout.write(f' ✓ Found breads index: {breads_index.title}') + if created_count % 10 == 0: + self.stdout.write(f' Progress: {created_count}/{len(locales)} translations created...') + except Exception as e: + self.stdout.write(f' Error creating translation for {locale.language_code}: {str(e)[:50]}') - bread_type_names = ['Sourdough', 'Baguette', 'Ciabatta', 'Rye', 'Whole Wheat'] - country_names = ['France', 'Italy', 'Germany', 'United States', 'United Kingdom'] + self.stdout.write(f' ✓ Created {created_count} page translations across {len(locales)} locales\n') - bread_types = [BreadType.objects.get_or_create(title=name)[0] for name in bread_type_names] - countries = [Country.objects.get_or_create(title=name)[0] for name in country_names] - start_number = BreadPage.objects.count() + 1 - self.stdout.write(f' Starting page number: {start_number}') + def create_blog_pages(self): + """Create blog pages with streamfield content.""" + blog_index = BlogIndexPage.objects.filter(slug='blog').first() + if not blog_index: + self.stdout.write(self.style.WARNING(' Blog index not found. Skipping blog pages.')) + return - # Reuse lightweight StreamField template - self.stdout.write(f' Generating lightweight StreamField template...') - body_template = self.generate_streamfield(self.streamfield_blocks_count, 0, 1) - self.stdout.write(f' ✓ Generated StreamField template') + body_template = self.generate_streamfield(1, 2, 1) + subtitle = lorem_ipsum.words(5, common=False) + introduction = lorem_ipsum.paragraph() + image = self.get_random_image() + today = date.today() - self.stdout.write(f' Creating {count:,} bread pages...') + existing_slugs = set(BlogPage.objects.values_list('slug', flat=True)) created_count = 0 + skipped_count = 0 - for i in range(count): + start_number = BlogPage.objects.count() + 1 + self.stdout.write(f' Creating {self.blog_pages_count:,} blog pages...') + + for i in range(self.blog_pages_count): page_number = start_number + i - title = f"{bread_type_names[i % len(bread_type_names)]} #{page_number}" + slug = f"blog-post-{page_number}" - page = BreadPage( - title=title, - slug=slugify(title), - introduction=lorem_ipsum.paragraph(), + if slug in existing_slugs: + skipped_count += 1 + continue + + page = BlogPage( + title=f"Blog Post {page_number}", + slug=slug, + subtitle=subtitle, + introduction=introduction, body=body_template, - bread_type=bread_types[i % len(bread_types)], - origin=countries[i % len(countries)], - image=self.get_random_image(), + image=image, + date_published=today, ) - breads_index.add_child(instance=page) + blog_index.add_child(instance=page) created_count += 1 if created_count % 1000 == 0: - self.stdout.write(f' Progress: {created_count:,}/{count:,} bread pages created...') - - self.stdout.write(f' ✓ Created {created_count:,} pages') - - def _generate_location_address(self, city): - """Generate a random address for the given city.""" - street_number = random.randint(1, 999) - street_name = random.choice(['Main Street', 'Oak Avenue', 'Park Road', 'High Street', 'Church Lane']) - country = random.choice(['Iceland', 'United States', 'United Kingdom', 'France', 'Germany']) - return f"{street_number} {street_name},\r\n{city},\r\n{country}" - - def _generate_lat_long(self): - """Generate random latitude and longitude coordinates.""" - lat = random.uniform(-90, 90) - lng = random.uniform(-180, 180) - return f"{lat:.6f}, {lng:.6f}" - - def _create_operating_hours(self, page): - """Create operating hours for all days of the week""" - # Define hours for weekdays and weekends - weekday_hours = {'opening': time(9, 0), 'closing': time(17, 0)} - weekend_hours = {'opening': time(10, 0), 'closing': time(16, 0)} - - # Map days to their respective hours - days_config = { - 'MON': weekday_hours, - 'TUE': weekday_hours, - 'WED': weekday_hours, - 'THU': weekday_hours, - 'FRI': weekday_hours, - 'SAT': weekend_hours, - 'SUN': weekend_hours, - } - - # Create operating hours using a loop - operating_hours = [ - LocationOperatingHours( - location=page, - day=day, - opening_time=hours['opening'], - closing_time=hours['closing'], - closed=False + self.stdout.write(f' Progress: {created_count:,}/{self.blog_pages_count:,} blog pages created...') + + self.stdout.write(f' ✓ Created {created_count:,} pages (skipped {skipped_count:,} existing)') + + # Create page tree depth + parent = blog_index + for i in range(self.page_tree_depth): + slug = f"blog-post-depth-{i}" + if slug in existing_slugs: + continue + + page = BlogPage( + title=f"Blog Post in tree depth {i}", + slug=slug, + subtitle=subtitle, + introduction=introduction, + body=body_template, + image=image, + date_published=today, ) - for day, hours in days_config.items() - ] - LocationOperatingHours.objects.bulk_create(operating_hours) - def create_location_pages(self, count): - """Create location pages with addresses, coordinates, and operating hours.""" - self.stdout.write(' Checking for locations index page...') - locations_index = LocationsIndexPage.objects.filter(slug='locations').first() - if not locations_index: - self.stdout.write(self.style.WARNING(' Locations index not found. Skipping location pages.')) - return 0 - self.stdout.write(f' ✓ Found locations index: {locations_index.title}') + parent.add_child(instance=page) + parent = page - cities = ['New York', 'London', 'Paris', 'Tokyo', 'Sydney', 'Berlin'] + self.stdout.write(f' ✓ Created Page tree with depth {self.page_tree_depth}\n') - start_number = LocationPage.objects.count() + 1 - self.stdout.write(f' Starting page number: {start_number}') + def create_inline_panel_items(self): + """Create 100 InlinePanel items for ONE page to demonstrate the requirement.""" + self.stdout.write(f' Creating {self.inline_panel_items_count} InlinePanel items...') - self.stdout.write(f' Generating lightweight StreamField template...') - body_template = self.generate_streamfield(min(10, self.streamfield_blocks_count), 0, 1) - self.stdout.write(f' ✓ Generated StreamField template') + # Get or create the first blog page + sample_page = BlogPage.objects.first() + if not sample_page: + self.stdout.write(self.style.WARNING(' No blog pages found. Skipping InlinePanel items.')) + return - # Use add_child (required for Wagtail) - self.stdout.write(f' Creating {count:,} location pages...') - created_count = 0 + # Check how many relationships already exist for this page + existing_count = BlogPersonRelationship.objects.filter(page=sample_page).count() + if existing_count >= self.inline_panel_items_count: + self.stdout.write(f' ✓ Page already has {existing_count} InlinePanel items') + return - for i in range(count): - city = cities[i % len(cities)] - page_number = start_number + i - title = f"{city} Location #{page_number}" + # Ensure we have enough Person objects + existing_people = Person.objects.count() + if existing_people < self.inline_panel_items_count: + people_to_create = [] + now = timezone.now() + for i in range(existing_people, self.inline_panel_items_count): + people_to_create.append(Person( + first_name=f"Person {i + 1}", + last_name="Benchmark", + job_title="Benchmark User", + live=True, + first_published_at=now, + last_published_at=now, + )) + Person.objects.bulk_create(people_to_create, ignore_conflicts=True) + self.stdout.write(f' ✓ Created {len(people_to_create)} Person objects') - page = LocationPage( - title=title, - slug=slugify(title), - introduction=lorem_ipsum.paragraph(), - body=body_template, - address=self._generate_location_address(city), - lat_long=self._generate_lat_long(), - image=self.get_random_image(), - ) + people = list(Person.objects.all()[:self.inline_panel_items_count]) - locations_index.add_child(instance=page) - created_count += 1 + # Get existing person IDs for this page to avoid duplicates + existing_person_ids = set( + BlogPersonRelationship.objects.filter(page=sample_page).values_list('person_id', flat=True) + ) - if created_count % 1000 == 0: - self.stdout.write(f' Progress: {created_count:,}/{count:,} location pages created...') + # Create relationships for the sample page + relationships = [ + BlogPersonRelationship(page=sample_page, person=person) + for person in people if person.id not in existing_person_ids + ] + + if relationships: + BlogPersonRelationship.objects.bulk_create(relationships, ignore_conflicts=True) - self.stdout.write(f' ✓ Created {created_count:,} pages') + total_count = BlogPersonRelationship.objects.filter(page=sample_page).count() + self.stdout.write(f' ✓ Page "{sample_page.title}" now has {total_count} InlinePanel items\n') From 67574955556707652537a95c07d377af07a31bf2 Mon Sep 17 00:00:00 2001 From: Jawad Khan Date: Fri, 26 Dec 2025 17:45:33 +0500 Subject: [PATCH 07/10] fix: refactored code --- bakerydemo/base/management/commands/load_benchmark_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bakerydemo/base/management/commands/load_benchmark_data.py b/bakerydemo/base/management/commands/load_benchmark_data.py index 9d3033d75..f76364f58 100644 --- a/bakerydemo/base/management/commands/load_benchmark_data.py +++ b/bakerydemo/base/management/commands/load_benchmark_data.py @@ -25,7 +25,7 @@ def add_arguments(self, parser): parser.add_argument( '--blog-pages-count', type=int, - default=1000, + default=100000, help='Number of blog pages to create', ) parser.add_argument( @@ -55,7 +55,7 @@ def add_arguments(self, parser): parser.add_argument( '--revisions-per-page-count', type=int, - default=100, + default=100000, help='Number of revisions per page', ) parser.add_argument( From 2385ed1e680d1bac2dc6d7c0ff90364b94298bf3 Mon Sep 17 00:00:00 2001 From: Jawad Khan Date: Fri, 26 Dec 2025 18:09:25 +0500 Subject: [PATCH 08/10] fix: refactored code --- .../base/management/commands/load_benchmark_data.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/bakerydemo/base/management/commands/load_benchmark_data.py b/bakerydemo/base/management/commands/load_benchmark_data.py index f76364f58..eab97cc33 100644 --- a/bakerydemo/base/management/commands/load_benchmark_data.py +++ b/bakerydemo/base/management/commands/load_benchmark_data.py @@ -91,7 +91,7 @@ def handle(self, *args, **options): self.create_blog_pages() self.create_inline_panel_items() self.create_benchmark_snippets() - self.create_revisionss() + self.create_revisions() self.create_translations() self.generate_streamfield(self.streamfield_blocks_count, self.paragraphs_count, self.streamfield_depth) @@ -262,6 +262,7 @@ def _create_image_block(self, index): 'caption': captions[index % len(captions)], 'attribution': attributions[index % len(attributions)], }) + return None def _create_block_quote(self, index): """Create a block quote with fixed content.""" @@ -326,7 +327,7 @@ def generate_streamfield(self, num_blocks, num_paragraphs=0, depth=0): return blocks - def create_revisionss(self): + def create_revisions(self): self.stdout.write(f' Creating revisions for pages...') pages = BlogPage.objects.all()[:10] @@ -346,9 +347,9 @@ def create_page_revisions(self, page, revisions): page.save_revision() if (rev_num + 1) % 1000 == 0: - self.stdout.write(f' Progress: {rev_num:,}/{revisions:,} revisions created for page ID {page.title}.') + self.stdout.write(f' Progress:{rev_num:,}/{revisions:,} revisions created for page title {page.title}.') - self.stdout.write(f' ✓ Created {revisions} for page {page.title}') + self.stdout.write(f' ✓ Created {revisions} revisions for page {page.title}') page.introduction = original_introduction page.refresh_from_db() @@ -410,7 +411,7 @@ def create_translations(self): if created_count % 10 == 0: self.stdout.write(f' Progress: {created_count}/{len(locales)} translations created...') except Exception as e: - self.stdout.write(f' Error creating translation for {locale.language_code}: {str(e)[:50]}') + self.stdout.write(f' Error creating translation for {locale.language_code}: {str(e)}') self.stdout.write(f' ✓ Created {created_count} page translations across {len(locales)} locales\n') From c7db424a1f0d297385810879059e4f28b424d596 Mon Sep 17 00:00:00 2001 From: Ali Tariq Date: Wed, 21 Jan 2026 14:59:02 +0500 Subject: [PATCH 09/10] refactoring --- .../commands/load_benchmark_data.py | 303 +++++++++++------- 1 file changed, 186 insertions(+), 117 deletions(-) diff --git a/bakerydemo/base/management/commands/load_benchmark_data.py b/bakerydemo/base/management/commands/load_benchmark_data.py index eab97cc33..5fc70a65c 100644 --- a/bakerydemo/base/management/commands/load_benchmark_data.py +++ b/bakerydemo/base/management/commands/load_benchmark_data.py @@ -4,6 +4,7 @@ import random from datetime import date from io import BytesIO +from typing import List, Optional, Tuple from PIL import Image as PILImage from django.core.files.uploadedfile import InMemoryUploadedFile @@ -21,6 +22,19 @@ class Command(BaseCommand): help = 'Load benchmark data for performance testing' + # Constants for better maintainability + DEFAULT_BATCH_SIZE = 1000 + MAX_DEPTH = 10 + MAX_TRANSLATIONS = 100 + IMAGE_SIZE = (800, 600) + IMAGE_QUALITY = 85 + + # Color palette for test images + COLORS = [ + (255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), + (255, 0, 255), (0, 255, 255), (128, 128, 128), (255, 128, 0), + ] + def add_arguments(self, parser): parser.add_argument( '--blog-pages-count', @@ -82,100 +96,130 @@ def add_arguments(self, parser): default=100, help='Number of language translations to create', ) + parser.add_argument( + '--batch-size', + type=int, + default=self.DEFAULT_BATCH_SIZE, + help='Batch size for bulk operations', + ) + parser.add_argument( + '--skip-images', + action='store_true', + help='Skip image creation', + ) + parser.add_argument( + '--skip-snippets', + action='store_true', + help='Skip snippet creation', + ) def handle(self, *args, **options): self.set_input_params(options) self.print_configurations() - - self.create_benchmark_images() - self.create_blog_pages() - self.create_inline_panel_items() - self.create_benchmark_snippets() - self.create_revisions() - self.create_translations() - self.generate_streamfield(self.streamfield_blocks_count, self.paragraphs_count, self.streamfield_depth) - - self.stdout.write(self.style.SUCCESS('\n=== Benchmark Data Generation Complete! ===')) - - def set_input_params(self, options): + try: + if not options.get('skip_images'): + self.create_benchmark_images() + + self.create_blog_pages() + print("hello") + self.create_inline_panel_items() + self.create_benchmark_snippets() + if not options.get('skip_revisions'): + self.create_revisions() + + self.create_translations() + self.generate_streamfield(self.streamfield_blocks_count, self.paragraphs_count, self.streamfield_depth) + + self.stdout.write(self.style.SUCCESS('\n=== Benchmark Data Generation Complete! ===')) + except Exception as e: + self.stdout.write(self.style.ERROR(f'\n=== Error during benchmark generation: {e} ===')) + + def set_input_params(self, options: dict) -> None: + """Extract and validate input parameters.""" self.blog_pages_count = options['blog_pages_count'] self.streamfield_blocks_count = options['streamfield_blocks_count'] - self.streamfield_depth = min(options['streamfield_depth'], 10) + self.streamfield_depth = min(options['streamfield_depth'], self.MAX_DEPTH) self.inline_panel_items_count = options['inline_panel_items_count'] self.paragraphs_count = options['paragraphs_count'] self.revisions_per_page_count = options['revisions_per_page_count'] - self.page_tree_depth = min(options['page_tree_depth'], 10) + self.page_tree_depth = min(options['page_tree_depth'], self.MAX_DEPTH) self.images_count = options['images_count'] self.snippets_count = options['snippets_count'] - self.translations_count = min(options['translations_count'], 100) + self.translations_count = min(options['translations_count'], self.MAX_TRANSLATIONS) + self.batch_size = options.get('batch_size', self.DEFAULT_BATCH_SIZE) + + self._images_cache = None - def print_configurations(self): - self.stdout.write('\nConfiguration:') - self.stdout.write(f' Blog pages: {self.blog_pages_count}') + def print_configurations(self) -> None: + """Display configuration summary.""" + self.stdout.write('\n' + '=' * 50) + self.stdout.write('Configuration:') + self.stdout.write('=' * 50) + self.stdout.write(f' Blog pages: {self.blog_pages_count:,}') self.stdout.write(f' StreamField blocks: {self.streamfield_blocks_count} (depth: {self.streamfield_depth})') self.stdout.write(f' Inline panel items: {self.inline_panel_items_count}') self.stdout.write(f' Rich text paragraphs: {self.paragraphs_count}') - self.stdout.write(f' Revisions per page: {self.revisions_per_page_count}') + self.stdout.write(f' Revisions per page: {self.revisions_per_page_count:,}') self.stdout.write(f' Page tree depth: {self.page_tree_depth}') self.stdout.write(f' Images count: {self.images_count}') - self.stdout.write(f' Snippets count: {self.snippets_count}') - self.stdout.write(f' Translations count: {self.translations_count}\n') + self.stdout.write(f' Snippets count: {self.snippets_count:,}') + self.stdout.write(f' Translations count: {self.translations_count}') + self.stdout.write(f' Batch size: {self.batch_size:,}') + self.stdout.write('=' * 50 + '\n') - def _get_images_cache(self): + def _get_images_cache(self) -> List[Image]: """Cache images to avoid repeated queries.""" - if not hasattr(self, '_images_cache'): + if self._images_cache is None: self._images_cache = list(Image.objects.all()) return self._images_cache - def create_benchmark_images(self): - """Create benchmark images with solid color placeholders.""" + def _create_image_file(self, index: int) -> InMemoryUploadedFile: + """Create a simple colored image file.""" + color = self.COLORS[index % len(self.COLORS)] + img = PILImage.new('RGB', self.IMAGE_SIZE, color=color) + img_io = BytesIO() + img.save(img_io, format='JPEG', quality=self.IMAGE_QUALITY) + img_io.seek(0) + + return InMemoryUploadedFile( + img_io, None, f'benchmark_{index + 1}.jpg', 'image/jpeg', + img_io.getbuffer().nbytes, None + ) + def create_benchmark_images(self) -> None: + """Create benchmark images with solid color placeholders.""" + self.stdout.write('\n📷 Creating Images') self.stdout.write(' Initializing image creation...') + created_count = 0 - skipped_count = 0 - colors = [ - (255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), - (255, 0, 255), (0, 255, 255), (128, 128, 128), (255, 128, 0), - ] + existing_titles = set(Image.objects.values_list('title', flat=True)) for i in range(self.images_count): title = f"Benchmark Image {i + 1}" - if Image.objects.filter(title=title).exists(): - skipped_count += 1 + if title in existing_titles: continue - # Create a simple colored image - img = PILImage.new('RGB', (800, 600), color=colors[i % len(colors)]) - img_io = BytesIO() - img.save(img_io, format='JPEG', quality=85) - img_io.seek(0) - - img_file = InMemoryUploadedFile( - img_io, None, f'benchmark_{i + 1}.jpg', 'image/jpeg', - img_io.getbuffer().nbytes, None - ) - - wagtail_image = Image( - title=title, - file=img_file, - ) + img_file = self._create_image_file(i) + wagtail_image = Image(title=title, file=img_file) wagtail_image.save() created_count += 1 - # Clear the cache so new images are picked up - if hasattr(self, '_images_cache'): - del self._images_cache - self.stdout.write(' Cleared image cache') + if created_count % 10 == 0: + self.stdout.write(f' Progress: {created_count}/{self.images_count} images created...') + + # Refresh cache + self._images_cache = None - self.stdout.write(f' Skipped {skipped_count} existing images') - self.stdout.write(self.style.SUCCESS(f'✓ Created {created_count} images\n')) + skipped = self.images_count - created_count + self.stdout.write(f' Skipped {skipped} existing images') + self.stdout.write(self.style.SUCCESS(f' ✓ Created {created_count} new images')) def create_benchmark_snippets(self): """Create snippet instances (BreadType, Country, BreadIngredient).""" self.stdout.write(' Starting snippet creation in bulk batches...') created_count = 0 - batch_size = 1000 + batch_size = self.batch_size for batch_num in range((self.snippets_count + batch_size * 3 - 1) // (batch_size * 3)): bread_types = [BreadType(title=f"Bread Type {batch_num * batch_size + j + 1}") for j in range(batch_size)] @@ -194,21 +238,17 @@ def create_benchmark_snippets(self): self.stdout.write(f'Total available snippet instances are: {created_count}') - def get_random_image(self): + def get_random_image(self)-> Optional[Image]: """Return a random image or None if no images exist.""" images = self._get_images_cache() return random.choice(images) if images else None - def _generate_paragraph(self): - """Generate a random lorem ipsum paragraph.""" - return lorem_ipsum.paragraph() - - def _get_first_image(self): + def _get_first_image(self) -> Optional[Image]: """Return the first available image or None.""" images = self._get_images_cache() return images[0] if images else None - def _create_heading_block(self, index): + def _create_heading_block(self, index)-> Tuple[str, dict]: """Create a heading block with fixed text based on index.""" heading_sizes = ['h2', 'h3', 'h4', ''] heading_texts = [ @@ -223,7 +263,7 @@ def _create_heading_block(self, index): 'size': heading_sizes[index % len(heading_sizes)] }) - def _create_paragraph_block(self, index, num_paragraphs=2): + def _create_paragraph_block(self, index, num_paragraphs=2) -> Tuple[str, RichText]: """Create a paragraph block with fixed paragraphs.""" fixed_paragraphs = [ 'Lorem ipsum dolor st amet, consectetur adiscing elit. Sed do eimod temport labore et dolore magna aliqua.', @@ -237,34 +277,34 @@ def _create_paragraph_block(self, index, num_paragraphs=2): for i in range(num_paragraphs): paragraphs_to_use.append(fixed_paragraphs[i % len(fixed_paragraphs)]) paragraph_text = '\n'.join(paragraphs_to_use) - return ('paragraph_block', RichText(paragraph_text)) + return 'paragraph_block', RichText(paragraph_text) - def _create_image_block(self, index): + def _create_image_block(self, index: int) -> Optional[Tuple[str, dict]]: """Create an image block with a fixed image.""" image = self._get_first_image() - if image: - captions = [ - 'Traditional baking methods', - 'Fresh ingredients', - 'Artisan craftsmanship', - 'Quality products', - '', - ] - attributions = [ - 'Photo by Baker', - 'Courtesy of Bakery', - '', - 'Professional photography', - '', - ] - return ('image_block', { - 'image': image, - 'caption': captions[index % len(captions)], - 'attribution': attributions[index % len(attributions)], - }) - return None - - def _create_block_quote(self, index): + if not image: + return None + captions = [ + 'Traditional baking methods', + 'Fresh ingredients', + 'Artisan craftsmanship', + 'Quality products', + '', + ] + attributions = [ + 'Photo by Baker', + 'Courtesy of Bakery', + '', + 'Professional photography', + '', + ] + return ('image_block', { + 'image': image, + 'caption': captions[index % len(captions)], + 'attribution': attributions[index % len(attributions)], + }) + + def _create_block_quote(self, index: int) -> Tuple[str, dict]: """Create a block quote with fixed content.""" quote_texts = [ 'The secret to great bread is patience and quality ingredients.', @@ -292,11 +332,10 @@ def _create_block_quote(self, index): } }) - def generate_streamfield(self, num_blocks, num_paragraphs=0, depth=0): + def generate_streamfield(self, num_blocks: int, num_paragraphs: int = 0, depth: int = 0) -> List[tuple]: """Generate StreamField blocks with optional nesting depth.""" blocks = [] - # If we have depth remaining and blocks to create, add nested blocks if depth > 0 and num_blocks > 0: # Create nested structure blocks - not all block types support nesting # For simplicity, we'll create paragraph blocks that could conceptually be nested @@ -327,14 +366,14 @@ def generate_streamfield(self, num_blocks, num_paragraphs=0, depth=0): return blocks - def create_revisions(self): + def create_revisions(self)-> None: self.stdout.write(f' Creating revisions for pages...') pages = BlogPage.objects.all()[:10] for page in pages: self.create_page_revisions(page, self.revisions_per_page_count) - def create_page_revisions(self, page, revisions): + def create_page_revisions(self, page, revisions) -> None: """Publish page and create additional draft revisions.""" original_introduction = page.introduction @@ -354,53 +393,80 @@ def create_page_revisions(self, page, revisions): page.introduction = original_introduction page.refresh_from_db() - def create_translations(self): + def create_translations(self) -> None: """Create language translations for pages.""" self.stdout.write(f' Creating {self.translations_count} language translations...') - # Generate language codes for locales (e.g., lang-01, lang-02, ..., lang-100) - # Using synthetic language codes since we need 100 unique ones - + # Get or create default locale default_locale = Locale.objects.filter(language_code='en').first() or Locale.objects.first() if not default_locale: self.stdout.write(self.style.WARNING(' No default locale found. Skipping translations.')) return + # Create locales + locales = self._create_locales() + if not locales: + return + + # Get pages to translate + blog_index = BlogIndexPage.objects.filter(slug='blog').first() + sample_page = BlogPage.objects.first() + + if not blog_index or not sample_page: + self.stdout.write(self.style.WARNING(' Missing required pages. Skipping translations.')) + return + + # Create translations + created_count = self._create_page_translations(blog_index, sample_page, locales) + + self.stdout.write(self.style.SUCCESS( + f' ✓ Created {created_count} page translations across {len(locales)} locales' + )) + + def _create_locales(self) -> List[Locale]: + """Create required locales for translations.""" self.stdout.write(' Creating locales...') + existing_locales = set(Locale.objects.values_list('language_code', flat=True)) language_codes = [f"lg{i:03d}" for i in range(1, self.translations_count + 1)] - locales_to_create = [Locale(language_code=lang_code) - for lang_code in language_codes if lang_code not in existing_locales] + locales_to_create = [ + Locale(language_code=lang_code) + for lang_code in language_codes + if lang_code not in existing_locales + ] if locales_to_create: Locale.objects.bulk_create(locales_to_create, ignore_conflicts=True) self.stdout.write(f' ✓ Created {len(locales_to_create)} new locales') - locales = list(Locale.objects.filter(language_code__in=language_codes)) - - blog_index = BlogIndexPage.objects.filter(slug='blog').first() - sample_page = BlogPage.objects.first() - - if not blog_index: - self.stdout.write(self.style.WARNING(' No blog index found. Skipping translations.')) - return - - if not sample_page: - self.stdout.write(self.style.WARNING(' No pages found to translate. Skipping translations.')) - return + return list(Locale.objects.filter(language_code__in=language_codes)) - # Create translations for each locale + def _create_page_translations( + self, + blog_index: BlogIndexPage, + sample_page: BlogPage, + locales: List[Locale] + ) -> int: + """Create translated versions of pages.""" created_count = 0 + for locale in locales: try: - # First, translate the parent page (blog index) if not already translated - if not BlogIndexPage.objects.filter(translation_key=blog_index.translation_key, locale=locale).exists(): + # Translate blog index if needed + if not BlogIndexPage.objects.filter( + translation_key=blog_index.translation_key, + locale=locale + ).exists(): translated_index = blog_index.copy_for_translation(locale) translated_index.title = f"{blog_index.title} ({locale.language_code})" translated_index.save_revision().publish() - if BlogPage.objects.filter(translation_key=sample_page.translation_key, locale=locale).exists(): + # Translate sample page if needed + if BlogPage.objects.filter( + translation_key=sample_page.translation_key, + locale=locale + ).exists(): continue translated_page = sample_page.copy_for_translation(locale) @@ -409,12 +475,15 @@ def create_translations(self): created_count += 1 if created_count % 10 == 0: - self.stdout.write(f' Progress: {created_count}/{len(locales)} translations created...') + self.stdout.write( + f' Progress: {created_count}/{len(locales)} translations created...' + ) except Exception as e: - self.stdout.write(f' Error creating translation for {locale.language_code}: {str(e)}') - - self.stdout.write(f' ✓ Created {created_count} page translations across {len(locales)} locales\n') + self.stdout.write(self.style.WARNING( + f' Error creating translation for {locale.language_code}: {str(e)}' + )) + return created_count def create_blog_pages(self): """Create blog pages with streamfield content.""" From d09f8eed448443c666c1f8d57034c4c58c099d65 Mon Sep 17 00:00:00 2001 From: awais qureshi Date: Fri, 27 Feb 2026 00:28:18 +0500 Subject: [PATCH 10/10] feat: Adding benchmark data. --- .../commands/load_benchmark_data.py | 179 +++++++++--------- 1 file changed, 93 insertions(+), 86 deletions(-) diff --git a/bakerydemo/base/management/commands/load_benchmark_data.py b/bakerydemo/base/management/commands/load_benchmark_data.py index 5fc70a65c..013d66272 100644 --- a/bakerydemo/base/management/commands/load_benchmark_data.py +++ b/bakerydemo/base/management/commands/load_benchmark_data.py @@ -35,72 +35,58 @@ class Command(BaseCommand): (255, 0, 255), (0, 255, 255), (128, 128, 128), (255, 128, 0), ] + SCALE_CONFIGS = { + 'low': { + 'blog_pages_count': 100, + 'streamfield_blocks_count': 10, + 'streamfield_depth': 3, + 'inline_panel_items_count': 10, + 'paragraphs_count': 10, + 'revisions_per_page_count': 10, + 'page_tree_depth': 3, + 'images_count': 10, + 'snippets_count': 100, + 'translations_count': 5, + 'batch_size': 100, + }, + 'medium': { + 'blog_pages_count': 1000, + 'streamfield_blocks_count': 50, + 'streamfield_depth': 5, + 'inline_panel_items_count': 50, + 'paragraphs_count': 50, + 'revisions_per_page_count': 1000, + 'page_tree_depth': 5, + 'images_count': 50, + 'snippets_count': 1000, + 'translations_count': 25, + 'batch_size': 500, + }, + 'high': { + 'blog_pages_count': 100000, + 'streamfield_blocks_count': 100, + 'streamfield_depth': 10, + 'inline_panel_items_count': 100, + 'paragraphs_count': 100, + 'revisions_per_page_count': 100000, + 'page_tree_depth': 10, + 'images_count': 100, + 'snippets_count': 100000, + 'translations_count': 100, + 'batch_size': 1000, + }, + } + def add_arguments(self, parser): parser.add_argument( - '--blog-pages-count', - type=int, - default=100000, - help='Number of blog pages to create', - ) - parser.add_argument( - '--streamfield-blocks-count', - type=int, - default=100, - help='Number of blocks in each StreamField', - ) - parser.add_argument( - '--streamfield-depth', - type=int, - default=10, - help='Nesting depth for StreamField blocks', - ) - parser.add_argument( - '--inline-panel-items-count', - type=int, - default=100, - help='Number of inline panel items to create', - ) - parser.add_argument( - '--paragraphs-count', - type=int, - default=100, - help='Number of paragraphs in rich text fields', - ) - parser.add_argument( - '--revisions-per-page-count', - type=int, - default=100000, - help='Number of revisions per page', - ) - parser.add_argument( - '--page-tree-depth', - type=int, - default=10, - help='Depth of page tree hierarchy', - ) - parser.add_argument( - '--images-count', - type=int, - default=100, - help='Number of images to create', - ) - parser.add_argument( - '--snippets-count', - type=int, - default=100000, - help='Number of snippet instances to create', - ) - parser.add_argument( - '--translations-count', - type=int, - default=100, - help='Number of language translations to create', - ) - parser.add_argument( - '--batch-size', - type=int, - default=self.DEFAULT_BATCH_SIZE, - help='Batch size for bulk operations', + '--scale', + choices=['low', 'medium', 'high'], + default='high', + help=( + 'Scale of benchmark data to generate: ' + 'low (quick smoke test), medium, or high (full load). ' + 'Defaults to high.' + ), ) parser.add_argument( '--skip-images', @@ -135,25 +121,28 @@ def handle(self, *args, **options): self.stdout.write(self.style.ERROR(f'\n=== Error during benchmark generation: {e} ===')) def set_input_params(self, options: dict) -> None: - """Extract and validate input parameters.""" - self.blog_pages_count = options['blog_pages_count'] - self.streamfield_blocks_count = options['streamfield_blocks_count'] - self.streamfield_depth = min(options['streamfield_depth'], self.MAX_DEPTH) - self.inline_panel_items_count = options['inline_panel_items_count'] - self.paragraphs_count = options['paragraphs_count'] - self.revisions_per_page_count = options['revisions_per_page_count'] - self.page_tree_depth = min(options['page_tree_depth'], self.MAX_DEPTH) - self.images_count = options['images_count'] - self.snippets_count = options['snippets_count'] - self.translations_count = min(options['translations_count'], self.MAX_TRANSLATIONS) - self.batch_size = options.get('batch_size', self.DEFAULT_BATCH_SIZE) + """Extract and validate input parameters from the selected scale.""" + self.scale = options['scale'] + config = self.SCALE_CONFIGS[self.scale] + + self.blog_pages_count = config['blog_pages_count'] + self.streamfield_blocks_count = config['streamfield_blocks_count'] + self.streamfield_depth = min(config['streamfield_depth'], self.MAX_DEPTH) + self.inline_panel_items_count = config['inline_panel_items_count'] + self.paragraphs_count = config['paragraphs_count'] + self.revisions_per_page_count = config['revisions_per_page_count'] + self.page_tree_depth = min(config['page_tree_depth'], self.MAX_DEPTH) + self.images_count = config['images_count'] + self.snippets_count = config['snippets_count'] + self.translations_count = min(config['translations_count'], self.MAX_TRANSLATIONS) + self.batch_size = config['batch_size'] self._images_cache = None def print_configurations(self) -> None: """Display configuration summary.""" self.stdout.write('\n' + '=' * 50) - self.stdout.write('Configuration:') + self.stdout.write(f'Configuration (scale: {self.scale}):') self.stdout.write('=' * 50) self.stdout.write(f' Blog pages: {self.blog_pages_count:,}') self.stdout.write(f' StreamField blocks: {self.streamfield_blocks_count} (depth: {self.streamfield_depth})') @@ -442,6 +431,30 @@ def _create_locales(self) -> List[Locale]: return list(Locale.objects.filter(language_code__in=language_codes)) + def _ensure_page_translated(self, page, locale) -> None: + """Recursively ensure a page and all its ancestors are translated for the given locale. + + Wagtail's copy_for_translation requires the parent page to already exist in the + target locale. This method walks up the ancestor chain and translates each level + before translating the requested page. + """ + from wagtail.models import Page + + # Root page (depth 1) is never translated — it is a global tree root. + if page.depth <= 1: + return + + # Already translated — nothing to do. + if Page.objects.filter(translation_key=page.translation_key, locale=locale).exists(): + return + + # Translate the parent first so copy_for_translation can find it. + parent = page.get_parent().specific + self._ensure_page_translated(parent, locale) + + translated = page.copy_for_translation(locale) + translated.save_revision().publish() + def _create_page_translations( self, blog_index: BlogIndexPage, @@ -453,16 +466,10 @@ def _create_page_translations( for locale in locales: try: - # Translate blog index if needed - if not BlogIndexPage.objects.filter( - translation_key=blog_index.translation_key, - locale=locale - ).exists(): - translated_index = blog_index.copy_for_translation(locale) - translated_index.title = f"{blog_index.title} ({locale.language_code})" - translated_index.save_revision().publish() + # Translate the blog index and all its ancestors for this locale. + self._ensure_page_translated(blog_index, locale) - # Translate sample page if needed + # Translate sample page if not already done. if BlogPage.objects.filter( translation_key=sample_page.translation_key, locale=locale