diff --git a/include/mem_root_deque.h b/include/mem_root_deque.h
index ea325cd33e3c..b7ac8984e278 100644
--- a/include/mem_root_deque.h
+++ b/include/mem_root_deque.h
@@ -35,19 +35,32 @@
 
 template <class Element_type>
 static constexpr size_t FindElementsPerBlock() {
-  // Aim for 1 kB.
-  size_t base_number_elems =
-      1024 / sizeof(Element_type);  // NOLINT(bugprone-sizeof-expression)
+  // Upstream aims for 1 kB. In Percona Server we prefer 256 byte blocks
+  // instead.
+  //
+  // In the majority of cases mem_root_deque is used to store pointers
+  // (e.g. to Item objects). Also in many cases number of those pointers
+  // is going to be much lower than 128 (number of elements which fit
+  // 1kb page on 64-bit system). This means that most of the 1kB block
+  // is going to be wasted in those cases. The effect is multiplied by
+  // the fact that even relatively simple query can use 5-10 mem_root_deque
+  // instances. Allocating those unnecessary 5-10kb on main MEM_ROOT
+  // often triggers allocation of new MEM_ROOT block creating small but
+  // visible performance overhead.
+  const size_t target_block_bytes = 256;
+  const size_t base_number_elems =
+      target_block_bytes /
+      sizeof(Element_type);  // NOLINT(bugprone-sizeof-expression)
 
   // Find the next power of two, rounded up. We should have at least 16 elements
   // per block to avoid allocating way too often (although the code itself
   // should work fine with 1, for debugging purposes).
-  for (size_t block_size = 16; block_size < 1024; ++block_size) {
+  for (size_t block_size = 16; block_size < target_block_bytes; ++block_size) {
     if (block_size >= base_number_elems) {
       return block_size;
     }
   }
-  return 1024;
+  return target_block_bytes;
 }
 
 /**
diff --git a/unittest/gunit/mem_root_deque-t.cc b/unittest/gunit/mem_root_deque-t.cc
index 9f6eb3dbc746..d13f87fece5e 100644
--- a/unittest/gunit/mem_root_deque-t.cc
+++ b/unittest/gunit/mem_root_deque-t.cc
@@ -281,10 +281,11 @@ TEST(MemRootDequeTest, ExponentialGrowth) {
   }
 
   // Check exponential growth occurred (not linear).
-  // With ~256 elements per block (1024 bytes / 4 bytes per int),
-  // 1000 elements need ~4 blocks.
-  // Growth sequence: 1 -> 2 -> 4 blocks allocated, exp goes 0 -> 1 -> 2
-  EXPECT_GE(d.back_growth_exp(), 2);   // At least 2 growth events
+  // With ~64 elements per block (256 bytes / 4 bytes per int),
+  // 1000 elements need ~16 blocks.
+  // Growth sequence: 1 -> 2 -> 4 -> 8 -> 16 blocks allocated,
+  // exp goes 0 -> 1 -> 2 -> 3 -> 4
+  EXPECT_GE(d.back_growth_exp(), 4);   // At least 4 growth events
   EXPECT_EQ(0, d.front_growth_exp());  // Front not grown yet
   EXPECT_EQ(0, d.first_block_idx());   // Blocks start at index 0
 
@@ -307,7 +308,7 @@ TEST(MemRootDequeTest, ExponentialGrowth) {
   }
 
   // Check front exponential growth occurred independently
-  EXPECT_GE(d.front_growth_exp(), 2);  // At least 2 growth events
+  EXPECT_GE(d.front_growth_exp(), 4);  // At least 4 growth events
   // Back exponent should not have changed (back didn't need more blocks)
   EXPECT_EQ(back_exp_after_push_back, d.back_growth_exp());
   // first_block_idx should have moved to accommodate front spare slots
@@ -319,7 +320,7 @@ TEST(MemRootDequeTest, ExponentialGrowth) {
   // blocks_allocated should be at most ~3x the minimum needed
   // (due to spare slots for future growth)
   size_t min_blocks_needed =
-      (d.size() + 255) / 256;  // ~8 blocks for 2000 elements
+      (d.size() + 63) / 64;  // ~32 blocks for 2000 elements
   EXPECT_LE(d.block_slots(), min_blocks_needed * 3);  // Allow 3x for spare
 }
 
@@ -356,6 +357,12 @@ TEST(MemRootDequeTest, MemoryEfficiency) {
 // Verifies exponential growth pattern (not quadratic).
 TEST(MemRootDequeTest, MemoryGrowthPushBack) {
   MEM_ROOT mem_root;
+  // Set MEM_ROOT block size low enough to avoid situation when two
+  // mem_root_deque blocks fit into a single MEM_ROOT block (this is
+  // necessary because MEM_ROOT::allocated_size() returns total size
+  // of memory used by MEM_ROOT blocks, and not the net size of memory
+  // allocated on it).
+  mem_root.set_block_size(128);
   mem_root_deque<int> d(&mem_root);
 
   std::vector<size_t> memory_at_growth;