diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp
index 39b7eb218e69..ecdf18fa1a06 100644
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@@ -2597,6 +2597,16 @@ struct server_context_impl {
                     slot->prompt.tokens.clear();
                     slot->prompt.tokens.insert(tokens);
 
+                    // A restored slot has no context checkpoint, so the next
+                    // request with cache_prompt finds no reuse anchor and
+                    // reprocesses the entire restored prefix. Create a
+                    // checkpoint spanning the restored span so the restored
+                    // KV is actually reused.
+                    if (params_base.n_ctx_checkpoints > 0) {
+                        create_checkpoint(*slot, (int64_t) 0, 0,
+                                          (llama_pos) (token_count > 0 ? token_count - 1 : 0));
+                    }
+
                     const int64_t t_end = ggml_time_us();
                     const double t_restore_ms = (t_end - t_start) / 1000.0;