diff --git a/nvdaHelper/localWin10/sconscript b/nvdaHelper/localWin10/sconscript index bad1a590f49..837e8562a02 100644 --- a/nvdaHelper/localWin10/sconscript +++ b/nvdaHelper/localWin10/sconscript @@ -28,9 +28,12 @@ localWin10Lib = env.SharedLibrary( env["projectResFile"], "oneCoreSpeech.cpp", "uwpOcr.cpp", + "wgcCapture.cpp", ], LIBS=[ "WindowsApp", + "d3d11", + "dxgi", # Ignoring Flake8 F821: 'undefined name' due to nonstandard SCons import localLib[2], ], # noqa: F821 diff --git a/nvdaHelper/localWin10/wgcCapture.cpp b/nvdaHelper/localWin10/wgcCapture.cpp new file mode 100644 index 00000000000..90c4dce2c73 --- /dev/null +++ b/nvdaHelper/localWin10/wgcCapture.cpp @@ -0,0 +1,402 @@ +// wgcCapture.cpp +// Windows Graphics Capture + Windows.Media.Ocr integration for NVDA. +// CreateForWindow captures from the DWM compositor, before the +// Magnification API color transform, so OCR works with screen curtain. + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "wgcCapture.h" + +#pragma comment(lib, "d3d11.lib") +#pragma comment(lib, "dxgi.lib") + +using namespace winrt; +using namespace Windows::Foundation; +using namespace Windows::Graphics; +using namespace Windows::Graphics::Capture; +using namespace Windows::Graphics::DirectX; +using namespace Windows::Graphics::DirectX::Direct3D11; +using namespace Windows::Graphics::Imaging; +using namespace Windows::Media::Ocr; +using namespace Windows::Globalization; +using namespace Windows::Data::Json; + +class WgcCapture { +private: + OcrEngine m_ocrEngine{ nullptr }; + wgcCapture_Callback m_callback; + IDirect3DDevice m_device{ nullptr }; + com_ptr m_d3dDevice; + std::atomic m_cancelled{ false }; + + void createDevice() { + check_hresult(D3D11CreateDevice( + nullptr, + D3D_DRIVER_TYPE_HARDWARE, + nullptr, + D3D11_CREATE_DEVICE_BGRA_SUPPORT, + nullptr, 0, + D3D11_SDK_VERSION, + m_d3dDevice.put(), + nullptr, + nullptr + )); + + auto dxgiDevice = m_d3dDevice.as(); + com_ptr inspectable; + check_hresult(CreateDirect3D11DeviceFromDXGIDevice( + dxgiDevice.get(), inspectable.put())); + m_device = inspectable.as(); + } + + GraphicsCaptureItem createItemForWindow(HWND hwnd) { + auto interopFactory = get_activation_factory< + GraphicsCaptureItem, + IGraphicsCaptureItemInterop>(); + + GraphicsCaptureItem item{ nullptr }; + check_hresult(interopFactory->CreateForWindow( + hwnd, + guid_of(), + put_abi(item) + )); + return item; + } + + hstring serializeOcrResult(OcrResult const& result) { + JsonArray jLines; + for (auto const& line : result.Lines()) { + JsonArray jWords; + for (auto const& word : line.Words()) { + JsonObject jWord; + auto rect = word.BoundingRect(); + jWord.Insert(L"x", + JsonValue::CreateNumberValue(rect.X)); + jWord.Insert(L"y", + JsonValue::CreateNumberValue(rect.Y)); + jWord.Insert(L"width", + JsonValue::CreateNumberValue(rect.Width)); + jWord.Insert(L"height", + JsonValue::CreateNumberValue(rect.Height)); + jWord.Insert(L"text", + JsonValue::CreateStringValue(word.Text())); + jWords.Append(jWord); + } + jLines.Append(jWords); + } + return jLines.Stringify(); + } + + // Convert a SoftwareBitmap to BGRA8 format suitable for OCR + SoftwareBitmap toBgra8(SoftwareBitmap const& bitmap) { + return SoftwareBitmap::Convert( + bitmap, + BitmapPixelFormat::Bgra8, + BitmapAlphaMode::Premultiplied + ); + } + + // Crop a BGRA8 bitmap to the specified region (clamped to bounds) + SoftwareBitmap cropBitmap( + SoftwareBitmap const& src, + unsigned int regionX, + unsigned int regionY, + unsigned int regionW, + unsigned int regionH) + { + unsigned int bmpW = src.PixelWidth(); + unsigned int bmpH = src.PixelHeight(); + unsigned int cx = min(regionX, bmpW); + unsigned int cy = min(regionY, bmpH); + unsigned int cw = min(regionW, bmpW - cx); + unsigned int ch = min(regionH, bmpH - cy); + + if (cw == 0 || ch == 0) { + return nullptr; + } + + auto srcBuffer = src.LockBuffer(BitmapBufferAccessMode::Read); + auto srcRef = srcBuffer.CreateReference(); + auto srcAccess = srcRef.as< + Windows::Foundation::IMemoryBufferByteAccess>(); + + BYTE* srcData = nullptr; + UINT32 srcCapacity = 0; + check_hresult(srcAccess->GetBuffer(&srcData, &srcCapacity)); + int srcStride = srcBuffer.GetPlaneDescription(0).Stride; + + SoftwareBitmap dst( + BitmapPixelFormat::Bgra8, cw, ch, + BitmapAlphaMode::Premultiplied); + + auto dstBuffer = dst.LockBuffer(BitmapBufferAccessMode::Write); + auto dstRef = dstBuffer.CreateReference(); + auto dstAccess = dstRef.as< + Windows::Foundation::IMemoryBufferByteAccess>(); + + BYTE* dstData = nullptr; + UINT32 dstCapacity = 0; + check_hresult(dstAccess->GetBuffer(&dstData, &dstCapacity)); + int dstStride = dstBuffer.GetPlaneDescription(0).Stride; + + constexpr int bytesPerPixel = 4; // BGRA8 + for (unsigned int row = 0; row < ch; ++row) { + BYTE* srcRow = srcData + + (cy + row) * srcStride + + cx * bytesPerPixel; + BYTE* dstRow = dstData + row * dstStride; + memcpy(dstRow, srcRow, cw * bytesPerPixel); + } + + dstRef.Close(); + dstBuffer.Close(); + srcRef.Close(); + srcBuffer.Close(); + + return dst; + } + +public: + WgcCapture( + OcrEngine const& engine, + wgcCapture_Callback callback + ) : m_ocrEngine(engine), m_callback(callback), m_cancelled(false) { + createDevice(); + } + + void markCancelled() { + m_cancelled.store(true, std::memory_order_release); + } + + fire_and_forget recognizeWindow( + HWND hwnd, + bool useRegion, + unsigned int regionX, + unsigned int regionY, + unsigned int regionW, + unsigned int regionH) + { + try { + co_await resume_background(); + + if (m_cancelled.load(std::memory_order_acquire)) { + co_return; + } + + if (!IsWindow(hwnd)) { + LOG_ERROR(L"wgcCapture: invalid HWND"); + if (!m_cancelled.load(std::memory_order_acquire)) { + m_callback(nullptr); + } + co_return; + } + + auto item = createItemForWindow(hwnd); + auto framePool = + Direct3D11CaptureFramePool::CreateFreeThreaded( + m_device, + DirectXPixelFormat::B8G8R8A8UIntNormalized, + 1, + item.Size() + ); + + auto session = framePool.CreateCaptureSession(item); + + // Hide yellow border on Win11 (no-op on Win10) + if (auto session3 = + session.try_as()) { + session3.IsBorderRequired(false); + } + + session.StartCapture(); + + Direct3D11CaptureFrame frame{ nullptr }; + for (int attempt = 0; attempt < 20 && !frame; ++attempt) { + frame = framePool.TryGetNextFrame(); + if (!frame) { + co_await resume_after( + std::chrono::milliseconds(50)); + } + } + + session.Close(); + framePool.Close(); + + if (!frame) { + LOG_ERROR(L"wgcCapture: no frame received"); + if (!m_cancelled.load(std::memory_order_acquire)) { + m_callback(nullptr); + } + co_return; + } + + auto surface = frame.Surface(); + auto fullBitmap = + co_await SoftwareBitmap::CreateCopyFromSurfaceAsync( + surface, + BitmapAlphaMode::Premultiplied + ); + frame.Close(); + + SoftwareBitmap ocrBitmap{ nullptr }; + + if (useRegion) { + auto converted = toBgra8(fullBitmap); + ocrBitmap = cropBitmap( + converted, regionX, regionY, regionW, regionH); + if (!ocrBitmap) { + LOG_ERROR( + L"wgcCapture: region out of bounds"); + if (!m_cancelled.load(std::memory_order_acquire)) { + m_callback(nullptr); + } + co_return; + } + } else { + ocrBitmap = toBgra8(fullBitmap); + } + + unsigned int maxDim = m_ocrEngine.MaxImageDimension(); + unsigned int ocrW = ocrBitmap.PixelWidth(); + unsigned int ocrH = ocrBitmap.PixelHeight(); + if (ocrW > maxDim || ocrH > maxDim) { + LOG_WARNING( + L"wgcCapture: bitmap %ux%u exceeds " + L"MaxImageDimension %u, OCR may fail", + ocrW, ocrH, maxDim); + } + + auto ocrResult = + co_await m_ocrEngine.RecognizeAsync(ocrBitmap); + + if (m_cancelled.load(std::memory_order_acquire)) { + co_return; + } + + auto json = serializeOcrResult(ocrResult); + m_callback(json.c_str()); + + } catch (winrt::hresult_error const& ex) { + LOG_ERROR( + L"wgcCapture error: 0x%08X %s", + ex.code(), + ex.message().c_str() + ); + if (!m_cancelled.load(std::memory_order_acquire)) { + m_callback(nullptr); + } + } catch (...) { + LOG_ERROR(L"wgcCapture: unknown exception"); + if (!m_cancelled.load(std::memory_order_acquire)) { + m_callback(nullptr); + } + } + } +}; + + +// ---- Exported C functions ---- + +bool __stdcall wgcCapture_isSupported() { + try { + if (!GraphicsCaptureSession::IsSupported()) { + return false; + } + auto interopFactory = get_activation_factory< + GraphicsCaptureItem, + IGraphicsCaptureItemInterop>(); + return interopFactory != nullptr; + } catch (...) { + return false; + } +} + +WgcCapture_H __stdcall wgcCapture_initialize( + const wchar_t* language, + wgcCapture_Callback callback) +{ + if (!callback) { + LOG_ERROR(L"wgcCapture_initialize: null callback"); + return nullptr; + } + + try { + winrt::init_apartment(winrt::apartment_type::multi_threaded); + + OcrEngine engine{ nullptr }; + if (language && language[0] != L'\0') { + engine = OcrEngine::TryCreateFromLanguage( + Language{ language }); + } else { + engine = + OcrEngine::TryCreateFromUserProfileLanguages(); + } + + if (!engine) { + LOG_ERROR( + L"wgcCapture_initialize: " + L"failed to create OcrEngine"); + return nullptr; + } + + return static_cast( + new WgcCapture(engine, callback)); + + } catch (winrt::hresult_error const& ex) { + LOG_ERROR( + L"wgcCapture_initialize error: 0x%08X %s", + ex.code(), ex.message().c_str()); + return nullptr; + } catch (...) { + LOG_ERROR( + L"wgcCapture_initialize: unknown exception"); + return nullptr; + } +} + +void __stdcall wgcCapture_recognizeWindow( + WgcCapture_H handle, HWND hwnd) +{ + if (!handle || !hwnd) return; + static_cast(handle)->recognizeWindow( + hwnd, false, 0, 0, 0, 0); +} + +void __stdcall wgcCapture_recognizeWindowRegion( + WgcCapture_H handle, HWND hwnd, + unsigned int x, unsigned int y, + unsigned int width, unsigned int height) +{ + if (!handle || !hwnd) return; + if (width == 0 || height == 0) return; + static_cast(handle)->recognizeWindow( + hwnd, true, x, y, width, height); +} + +void __stdcall wgcCapture_terminate(WgcCapture_H handle) { + if (!handle) return; + auto* instance = static_cast(handle); + // Mark cancelled so the coroutine stops calling back into Python. + // The Python side is responsible for calling terminate only after the + // recognition callback has fired (matching the uwpOcr pattern), + // so by this point the coroutine has completed and delete is safe. + instance->markCancelled(); + delete instance; +} diff --git a/nvdaHelper/localWin10/wgcCapture.h b/nvdaHelper/localWin10/wgcCapture.h new file mode 100644 index 00000000000..9da2b268289 --- /dev/null +++ b/nvdaHelper/localWin10/wgcCapture.h @@ -0,0 +1,58 @@ +// wgcCapture.h +// Windows Graphics Capture integration for NVDA. +// Captures window content via CreateForWindow, bypassing +// Magnification API color transforms (screen curtain). + +#pragma once +#include +#define export __declspec(dllexport) + +#ifdef __cplusplus +extern "C" { +#endif + +// Callback receiving JSON OCR results (nullptr on failure). +// Format: [[{"x","y","width","height","text"}, ...], ...] +typedef void (*wgcCapture_Callback)(const wchar_t*); + +// Opaque handle to a WGC capture + OCR instance. +typedef void* WgcCapture_H; + +// True if Windows.Graphics.Capture is available (Win10 1903+). +export bool __stdcall wgcCapture_isSupported(); + +// Create a WGC capture + OCR instance. +// language: BCP-47 tag (e.g. L"en-US"), or nullptr for user profile language. +// Returns handle, or nullptr on failure. +export WgcCapture_H __stdcall wgcCapture_initialize( + const wchar_t* language, + wgcCapture_Callback callback +); + +// Capture entire window by HWND and run OCR asynchronously. +// Works even when screen curtain is active (captures from compositor). +export void __stdcall wgcCapture_recognizeWindow( + WgcCapture_H handle, + HWND hwnd +); + +// Capture a sub-region of a window and run OCR asynchronously. +// Coordinates are relative to the top-left of the window (including +// non-client area such as title bar and borders), matching the coordinate +// system of CreateForWindow's captured surface. +export void __stdcall wgcCapture_recognizeWindowRegion( + WgcCapture_H handle, + HWND hwnd, + unsigned int x, + unsigned int y, + unsigned int width, + unsigned int height +); + +// Terminate and free a WGC capture + OCR instance. +// Must only be called after the recognition callback has fired. +export void __stdcall wgcCapture_terminate(WgcCapture_H handle); + +#ifdef __cplusplus +} +#endif diff --git a/source/NVDAHelper/localWin10.py b/source/NVDAHelper/localWin10.py index 890eca6592f..a5efafbfe5b 100644 --- a/source/NVDAHelper/localWin10.py +++ b/source/NVDAHelper/localWin10.py @@ -5,7 +5,8 @@ """Functions exported by nvdaHelperLocalWin10.dll, and supporting definitions.""" -from ctypes import CFUNCTYPE, POINTER, c_uint, c_void_p, c_wchar_p, windll +from ctypes import CFUNCTYPE, POINTER, c_bool, c_uint, c_void_p, c_wchar_p, windll +from ctypes.wintypes import HWND from comtypes import BSTR import NVDAState @@ -68,3 +69,76 @@ c_uint, # height ) uwpOcr_recognize.restype = None + +# --- Windows Graphics Capture functions --- + +WgcCapture_H = c_void_p +"""Opaque handle to a WgcCapture instance.""" + +wgcCapture_Callback = CFUNCTYPE(None, c_wchar_p) +"""Function called when WGC recognition is complete.""" + +wgcCapture_isSupported = dll.wgcCapture_isSupported +""" +Check whether Windows.Graphics.Capture is available (Win10 1903+). + +.. seealso:: + ``nvdaHelper/localWin10/wgcCapture.h`` +""" +wgcCapture_isSupported.argtypes = () +wgcCapture_isSupported.restype = c_bool + +wgcCapture_initialize = dll.wgcCapture_initialize +""" +Initialise a WGC capture and OCR instance. + +.. seealso:: + ``nvdaHelper/localWin10/wgcCapture.h`` +""" +wgcCapture_initialize.argtypes = ( + c_wchar_p, # language + wgcCapture_Callback, # callback +) +wgcCapture_initialize.restype = WgcCapture_H + +wgcCapture_recognizeWindow = dll.wgcCapture_recognizeWindow +""" +Capture an entire window by HWND and run OCR asynchronously. + +.. seealso:: + ``nvdaHelper/localWin10/wgcCapture.h`` +""" +wgcCapture_recognizeWindow.argtypes = ( + WgcCapture_H, # handle + HWND, # hwnd +) +wgcCapture_recognizeWindow.restype = None + +wgcCapture_recognizeWindowRegion = dll.wgcCapture_recognizeWindowRegion +""" +Capture a sub-region of a window and run OCR asynchronously. + +.. seealso:: + ``nvdaHelper/localWin10/wgcCapture.h`` +""" +wgcCapture_recognizeWindowRegion.argtypes = ( + WgcCapture_H, # handle + HWND, # hwnd + c_uint, # x + c_uint, # y + c_uint, # width + c_uint, # height +) +wgcCapture_recognizeWindowRegion.restype = None + +wgcCapture_terminate = dll.wgcCapture_terminate +""" +Terminate and free a WGC capture and OCR instance. + +.. seealso:: + ``nvdaHelper/localWin10/wgcCapture.h`` +""" +wgcCapture_terminate.argtypes = ( + WgcCapture_H, # handle +) +wgcCapture_terminate.restype = None diff --git a/source/config/configSpec.py b/source/config/configSpec.py index a425c3a566b..3307f820713 100644 --- a/source/config/configSpec.py +++ b/source/config/configSpec.py @@ -543,6 +543,12 @@ enabled = boolean(default=false) warnOnLoad = boolean(default=true) playToggleSounds = boolean(default=true) + +[wgcCapture] + # auto: use WGC only when Screen Curtain is active (recommended) + # always: always use WGC (requires Win10 1903+) + # never: disable WGC, use legacy GDI capture + captureMode = option("auto", "always", "never", default="auto") """ #: The configuration specification diff --git a/source/contentRecog/recogUi.py b/source/contentRecog/recogUi.py index 86042fac9fe..b83a84919d1 100644 --- a/source/contentRecog/recogUi.py +++ b/source/contentRecog/recogUi.py @@ -136,14 +136,20 @@ def _recognize(self, onResult: onRecognizeResultCallbackT): # shouldn't recognize again. return imgInfo = self.imageInfo - sb = screenBitmap.ScreenBitmap(imgInfo.recogWidth, imgInfo.recogHeight) - pixels = sb.captureImage( - imgInfo.screenLeft, - imgInfo.screenTop, - imgInfo.screenWidth, - imgInfo.screenHeight, - ) - self.recognizer.recognize(pixels, self.imageInfo, onResult) + from contentRecog.wgcCapture import WgcOcr + + if isinstance(self.recognizer, WgcOcr): + # WGC captures its own frames via HWND; skip GDI screen capture. + self.recognizer.recognize(None, self.imageInfo, onResult) + else: + sb = screenBitmap.ScreenBitmap(imgInfo.recogWidth, imgInfo.recogHeight) + pixels = sb.captureImage( + imgInfo.screenLeft, + imgInfo.screenTop, + imgInfo.screenWidth, + imgInfo.screenHeight, + ) + self.recognizer.recognize(pixels, self.imageInfo, onResult) def _onFirstResult(self, result: Union[RecognitionResult, Exception]): global _activeRecog @@ -233,6 +239,11 @@ def start(self): def recognizeNavigatorObject(recognizer: ContentRecognizer): """User interface function to recognize content in the navigator object. This should be called from a script or in response to a GUI action. + + When Screen Curtain is active and Windows Graphics Capture is supported, + the recognizer is automatically switched to L{wgcCapture.WgcOcr} so OCR + works without disabling the curtain. + @param recognizer: The content recognizer to use. """ global _activeRecog @@ -241,6 +252,33 @@ def recognizeNavigatorObject(recognizer: ContentRecognizer): # but the user is already reading a content recognition result. ui.message(_("Already in a content recognition result")) return + # When Screen Curtain is active, switch to WGC-based OCR if available. + import screenCurtain + + isScreenCurtainActive = screenCurtain.screenCurtain is not None and screenCurtain.screenCurtain.enabled + if isScreenCurtainActive: + from contentRecog import wgcCapture + + if wgcCapture.isSupported(): + language = None + from contentRecog.uwpOcr import UwpOcr + + if isinstance(recognizer, UwpOcr): + language = recognizer.language + recognizer = wgcCapture.WgcOcr(language=language) + log.debug("recogUi: Screen Curtain active, using WGC capture") + else: + ui.message( + # Translators: Message when OCR cannot work with Screen Curtain + # on older Windows versions. + _( + "Screen curtain is active. " + "OCR requires Windows 10 version 1903 or later " + "to work with screen curtain enabled. " + "Please disable screen curtain or upgrade Windows.", + ), + ) + return nav = api.getNavigatorObject() if not recognizer.validateObject(nav): return diff --git a/source/contentRecog/wgcCapture.py b/source/contentRecog/wgcCapture.py new file mode 100644 index 00000000000..d167864a976 --- /dev/null +++ b/source/contentRecog/wgcCapture.py @@ -0,0 +1,171 @@ +# A part of NonVisual Desktop Access (NVDA) +# Copyright (C) 2026 NV Access Limited, Pratik Patel +# This file is covered by the GNU General Public License. +# See the file COPYING for more details. + +"""Content recognizer using Windows.Graphics.Capture. + +Captures window content via CreateForWindow, which reads from the DWM compositor +before the Magnification API color transform. This allows OCR to work while +Screen Curtain is active, without exposing screen content to sighted observers. +""" + +import json +from collections.abc import Callable +from ctypes import c_uint + +import api +import config +import winUser +from NVDAHelper.localWin10 import ( + wgcCapture_isSupported, + wgcCapture_initialize, + wgcCapture_recognizeWindow, + wgcCapture_recognizeWindowRegion, + wgcCapture_terminate, + wgcCapture_Callback as _wgcCapture_Callback, +) +from logHandler import log +from . import ContentRecognizer, LinesWordsResult, RecogImageInfo + + +def isSupported() -> bool: + """Check whether WGC capture and OCR are available (Win10 1903+).""" + try: + return bool(wgcCapture_isSupported()) + except (OSError, AttributeError): + return False + + +def _getRootWindow(hwnd: int) -> int: + """Walk up to the root owner window required by CreateForWindow.""" + root = winUser.getAncestor(hwnd, winUser.GA_ROOT) + return root if root else hwnd + + +class WgcOcr(ContentRecognizer): + """OCR recognizer using Windows.Graphics.Capture. + + Works when Screen Curtain is active because WGC captures from the + DWM compositor before the Magnification API transform. + """ + + @classmethod + def _get_allowAutoRefresh(cls) -> bool: + return config.conf["uwpOcr"]["autoRefresh"] + + @classmethod + def _get_autoRefreshInterval(cls) -> int: + return config.conf["uwpOcr"]["autoRefreshInterval"] + + @classmethod + def _get_autoSayAllOnResult(cls) -> bool: + return config.conf["uwpOcr"]["autoSayAllOnResult"] + + def getResizeFactor(self, width: int, height: int) -> int: + """WGC captures at native resolution; no resize needed.""" + return 1 + + def __init__(self, language: str | None = None): + """ + :param language: BCP-47 language code for OCR, or C{None} to use + the user's configured Windows OCR language. + """ + from contentRecog.uwpOcr import getConfigLanguage + + self.language: str = language or getConfigLanguage() + self._handle = None + self._onResult: Callable | None = None + self._cCallbackRef = None + + def recognize(self, pixels, imageInfo: RecogImageInfo, onResult: Callable) -> None: + """Capture the target window via HWND and run OCR. + + :param pixels: Ignored. WGC captures its own frames via HWND. + Kept for ContentRecognizer interface compatibility. + :param imageInfo: Screen region information for the recognition area. + :param onResult: Callback invoked with a L{LinesWordsResult} or C{Exception}. + """ + self._onResult = onResult + + hwnd = self._getTargetHwnd(imageInfo) + if not hwnd: + log.error("wgcCapture: could not find target HWND") + self._fireResult(RuntimeError("wgcCapture: no target HWND")) + return + + @_wgcCapture_Callback + def callback(resultJson): + self._onCppResult(resultJson, imageInfo, hwnd) + + self._cCallbackRef = callback + + self._handle = wgcCapture_initialize(self.language, self._cCallbackRef) + if not self._handle: + log.error("wgcCapture: failed to initialize (language=%s)", self.language) + self._fireResult(RuntimeError("WGC OCR initialization failed")) + return + + windowRect = winUser.getWindowRect(hwnd) + if windowRect: + relX = max(0, imageInfo.screenLeft - windowRect[0]) + relY = max(0, imageInfo.screenTop - windowRect[1]) + wgcCapture_recognizeWindowRegion( + self._handle, + hwnd, + c_uint(relX), + c_uint(relY), + c_uint(imageInfo.screenWidth), + c_uint(imageInfo.screenHeight), + ) + else: + wgcCapture_recognizeWindow(self._handle, hwnd) + + def _getTargetHwnd(self, imageInfo: RecogImageInfo) -> int | None: + """Get the top-level HWND for the target screen location.""" + nav = api.getNavigatorObject() + if nav and hasattr(nav, "windowHandle") and nav.windowHandle: + return _getRootWindow(nav.windowHandle) + return None + + def _onCppResult( + self, + resultJson: str | None, + imageInfo: RecogImageInfo, + hwnd: int, + ) -> None: + """Parse C++ OCR JSON results into L{LinesWordsResult}. + + Called from the C++ completion callback. Handles cleanup of the + native instance (matching the uwpOcr pattern where terminate is + called inside the callback, never while async work is in-flight). + """ + # If _onResult is None, recognition was cancelled. + if self._onResult: + if resultJson: + try: + data = json.loads(resultJson) + self._onResult(LinesWordsResult(data, imageInfo)) + except (json.JSONDecodeError, KeyError, TypeError) as e: + log.error("wgcCapture: failed to parse OCR result: %s", e) + self._onResult(RuntimeError(f"WGC OCR parse error: {e}")) + else: + log.debugWarning("wgcCapture: OCR returned no results") + self._onResult(RuntimeError("WGC OCR returned no results")) + # Clean up the native instance now that the coroutine has completed. + wgcCapture_terminate(self._handle) + self._cCallbackRef = None + self._handle = None + + def cancel(self) -> None: + """Cancel pending recognition. + + Marks recognition as cancelled so results are ignored when the + C++ callback fires. Does not terminate the native instance + (the callback handles cleanup when C++ signals completion). + """ + self._onResult = None + + def validateObject(self, nav) -> bool: + """WGC requires a valid HWND on the navigator object.""" + return bool(getattr(nav, "windowHandle", None)) diff --git a/source/globalCommands.py b/source/globalCommands.py index d09e4c57349..d67a12c43b6 100755 --- a/source/globalCommands.py +++ b/source/globalCommands.py @@ -4726,13 +4726,6 @@ def script_recognizeWithUwpOcr(self, gesture): # Translators: Reported when Windows OCR is not available. ui.message(_("Windows OCR not available")) return - from screenCurtain import screenCurtain - - isScreenCurtainRunning = screenCurtain is not None and screenCurtain.enabled - if isScreenCurtainRunning: - # Translators: Reported when screen curtain is enabled. - ui.message(_("Please disable screen curtain before using Windows OCR.")) - return from contentRecog import uwpOcr, recogUi recog = uwpOcr.UwpOcr() @@ -4960,11 +4953,19 @@ def _enableScreenCurtain(doEnable: bool = True): isinstance(focusObj, RefreshableRecogResultNVDAObject) and focusObj.recognizer.allowAutoRefresh ): - ui.message( - screenCurtain._screenCurtain.UNAVAILABLE_WHEN_RECOGNISING_CONTENT_MESSAGE, - speechPriority=speech.priorities.Spri.NOW, + # WGC-based OCR works with Screen Curtain; only block for legacy GDI OCR. + from contentRecog import wgcCapture + + isWgcRecognizer = wgcCapture.isSupported() and isinstance( + focusObj.recognizer, + wgcCapture.WgcOcr, ) - return + if not isWgcRecognizer: + ui.message( + screenCurtain._screenCurtain.UNAVAILABLE_WHEN_RECOGNISING_CONTENT_MESSAGE, + speechPriority=speech.priorities.Spri.NOW, + ) + return _enableScreenCurtain() @script( diff --git a/user_docs/en/changes.md b/user_docs/en/changes.md index 5d53a5bafa0..a70308a7c5a 100644 --- a/user_docs/en/changes.md +++ b/user_docs/en/changes.md @@ -6,6 +6,7 @@ ### New Features +* Windows OCR now works while Screen Curtain is active on Windows 10 version 1903 and later, using Windows Graphics Capture to read window content directly from the compositor without exposing screen content visually. (#19164, @pratikp1) * NVDA now includes a built-in Magnifier feature that allows you to zoom and magnify parts of the screen. (#19228, @Boumtchack) * The magnifier supports various zoom levels, color filters (normal, grayscale, inverted), and different focus tracking modes. * Color filters can help users with visual impairments or light sensitivity by inverting or desaturating screen colors. @@ -43,6 +44,9 @@ The triple-press keyboard shortcut (`NVDA+ctrl+r`) is not affected, as it is int ### Changes for Developers +* Added `contentRecog.wgcCapture` module with `WgcOcr` recognizer using Windows Graphics Capture API (`CreateForWindow`). This captures window content from the DWM compositor, bypassing the Magnification API transform used by Screen Curtain. (#19164, @pratikp1) +* Added WGC capture functions to `NVDAHelper.localWin10`: `wgcCapture_isSupported`, `wgcCapture_initialize`, `wgcCapture_recognizeWindow`, `wgcCapture_recognizeWindowRegion`, `wgcCapture_terminate`. (#19164, @pratikp1) +* `recogUi.recognizeNavigatorObject` now auto-switches to WGC-based OCR when Screen Curtain is active and WGC is available. (#19164, @pratikp1) * NVDA libraries built by the build system are now linked with the [/SETCOMPAT](https://learn.microsoft.com/en-us/cpp/build/reference/cetcompat) flag, improving protection against certain malware attacks. (#19435, @LeonarddeR) Please refer to [the developer guide](https://download.nvaccess.org/documentation/developerGuide.html#API) for information on NVDA's API deprecation and removal process. diff --git a/user_docs/en/userGuide.md b/user_docs/en/userGuide.md index b4ed87ee2ef..39b74f5fc4c 100644 --- a/user_docs/en/userGuide.md +++ b/user_docs/en/userGuide.md @@ -1503,9 +1503,13 @@ You can enable Screen Curtain in the [Privacy and Security category](#PrivacyAnd When Screen Curtain is enabled, features that rely on what is literally on screen will not function. -For example, you cannot [use OCR](#Win10Ocr). Some screenshot utilities also may not work. +On Windows 10 version 1903 and later, [Windows OCR](#Win10Ocr) will continue to work while Screen Curtain is active. +NVDA automatically uses Windows Graphics Capture to read window content directly from the compositor, bypassing the screen blackout. +The physical display remains black, preserving your privacy. +On older versions of Windows, you will need to disable Screen Curtain before using OCR. + Please note that while Windows Magnifier is running and inverted screen colors are being used, Screen Curtain cannot be enabled. ## Magnifier {#Magnifier} @@ -1642,7 +1646,10 @@ Additional languages can be installed by opening the Start menu, choosing Settin When you want to monitor constantly changing content, such as when watching a video with subtitles, you can optionally enable automatic refresh of the recognized content. This can also be done in the [Windows OCR category](#Win10OcrSettings) of the [NVDA Settings](#NVDASettings) dialog. -Windows OCR may be partially or fully incompatible with [NVDA vision enhancements](#Vision) or other external visual aids. You will need to disable these aids before proceeding to a recognition. +Windows OCR may be partially or fully incompatible with some [NVDA vision enhancements](#Vision) or other external visual aids. +However, on Windows 10 version 1903 and later, OCR works while [Screen Curtain](#VisionScreenCurtain) is active. +When Screen Curtain is enabled, NVDA automatically uses Windows Graphics Capture to read window content directly from the desktop compositor, preserving your visual privacy while still performing recognition. +For other vision enhancements, you may need to disable them before proceeding to a recognition. To recognize the text in the current navigator object using Windows OCR, press NVDA+r.