datalab-to · Br1an67 · Mar 1, 2026
diff --git a/marker/processors/llm/llm_image_description.py b/marker/processors/llm/llm_image_description.py
@@ -24,6 +24,7 @@ class LLMImageDescriptionProcessor(BaseLLMSimpleBlockProcessor):
 1. Carefully examine the provided image.
 2. Analyze any text that was extracted from within the image.
 3. Output a faithful description of the image.  Make sure there is enough specific detail to accurately reconstruct the image.  If the image is a figure or contains numeric data, include the numeric data in the output.
+4. Output the description in the same language as the input text. If the input text is in French, write the description in French. If the input text is in German, write the description in German, and so on. Only use English if the input text is in English or if there is no input text.
 **Example:**
 Input:
 ```text
@@ -49,8 +50,15 @@ def block_prompts(self, document: Document) -> List[PromptData]:
         prompt_data = []
         for block_data in self.inference_blocks(document):
             block = block_data["block"]
+            page = block_data["page"]
+            raw_text = block.raw_text(document)
+            # Include surrounding page text for language context if image has little text
+            if len(raw_text.strip()) < 20:
+                page_text = page.raw_text(document)[:200]
+                if page_text.strip():
+                    raw_text = raw_text + "\n\n[Surrounding document text for language context:]\n" + page_text
             prompt = self.image_description_prompt.replace(
-                "{raw_text}", block.raw_text(document)
+                "{raw_text}", raw_text
             )
             image = self.extract_image(document, block)