diff --git a/ALL.sln b/ALL.sln
index cdfe3282919..a0d80166361 100644
--- a/ALL.sln
+++ b/ALL.sln
@@ -153,8 +153,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Scripts", "Scripts", "{0AB2
Externals\winimerge\ExpandEnvironmenStrings.js = Externals\winimerge\ExpandEnvironmenStrings.js
EndProjectSection
EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SampleStatic", "Externals\crystaledit\Sample\SampleStatic.vcxproj", "{C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}"
-EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "WinIMerge", "WinIMerge", "{0B7E421E-A8CA-444C-B650-D1D7F1D55688}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Frhed", "Frhed", "{97E9C1F1-F87E-4BE6-A66C-6B97DCD80873}"
@@ -506,6 +504,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Translations", "Translation
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CrashLogger", "Src\CrashLogger\CrashLogger.vcxitems", "{CDEDDAC4-79B9-4A34-A564-8019F60D241F}"
EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tree-sitter", "Externals\crystaledit\editlib\tree-sitter\tree-sitter.vcxitems", "{A3E7E04F-5D2B-4F3A-B725-8F5C4B1E9D3C}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SampleStatic", "Externals\crystaledit\Sample\SampleStatic.vcxproj", "{C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|ARM = Debug|ARM
@@ -1137,26 +1139,6 @@ Global
{49A7104F-08CD-48B1-99E6-6A7C3FFB0EEC}.Test|ARM64.ActiveCfg = Debug|ARM64
{49A7104F-08CD-48B1-99E6-6A7C3FFB0EEC}.Test|x64.ActiveCfg = Debug|x64
{49A7104F-08CD-48B1-99E6-6A7C3FFB0EEC}.Test|x86.ActiveCfg = Debug|Win32
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Debug|ARM.ActiveCfg = Debug|ARM
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Debug|ARM.Build.0 = Debug|ARM
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Debug|ARM64.ActiveCfg = Debug|ARM64
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Debug|ARM64.Build.0 = Debug|ARM64
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Debug|x64.ActiveCfg = Debug|x64
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Debug|x64.Build.0 = Debug|x64
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Debug|x86.ActiveCfg = Debug|Win32
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Debug|x86.Build.0 = Debug|Win32
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Release|ARM.ActiveCfg = Release|ARM
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Release|ARM.Build.0 = Release|ARM
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Release|ARM64.ActiveCfg = Release|ARM64
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Release|ARM64.Build.0 = Release|ARM64
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Release|x64.ActiveCfg = Release|x64
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Release|x64.Build.0 = Release|x64
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Release|x86.ActiveCfg = Release|Win32
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Release|x86.Build.0 = Release|Win32
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Test|ARM.ActiveCfg = Debug|ARM
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Test|ARM64.ActiveCfg = Debug|ARM64
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Test|x64.ActiveCfg = Debug|x64
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Test|x86.ActiveCfg = Debug|Win32
{139D8239-98B4-4B29-A199-B27CDB8B4708}.Debug|ARM.ActiveCfg = Debug|ARM
{139D8239-98B4-4B29-A199-B27CDB8B4708}.Debug|ARM.Build.0 = Debug|ARM
{139D8239-98B4-4B29-A199-B27CDB8B4708}.Debug|ARM64.ActiveCfg = Debug|ARM64
@@ -1389,6 +1371,30 @@ Global
{40AF6A7D-B4E7-4919-B38A-75F625E36AF5}.Test|x64.Build.0 = Debug|x64
{40AF6A7D-B4E7-4919-B38A-75F625E36AF5}.Test|x86.ActiveCfg = Debug|Win32
{40AF6A7D-B4E7-4919-B38A-75F625E36AF5}.Test|x86.Build.0 = Debug|Win32
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Debug|ARM.ActiveCfg = Debug|ARM
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Debug|ARM.Build.0 = Debug|ARM
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Debug|ARM64.ActiveCfg = Debug|ARM64
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Debug|ARM64.Build.0 = Debug|ARM64
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Debug|x64.ActiveCfg = Debug|x64
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Debug|x64.Build.0 = Debug|x64
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Debug|x86.ActiveCfg = Debug|Win32
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Debug|x86.Build.0 = Debug|Win32
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Release|ARM.ActiveCfg = Release|ARM
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Release|ARM.Build.0 = Release|ARM
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Release|ARM64.ActiveCfg = Release|ARM64
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Release|ARM64.Build.0 = Release|ARM64
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Release|x64.ActiveCfg = Release|x64
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Release|x64.Build.0 = Release|x64
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Release|x86.ActiveCfg = Release|Win32
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Release|x86.Build.0 = Release|Win32
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Test|ARM.ActiveCfg = Debug|ARM
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Test|ARM.Build.0 = Debug|ARM
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Test|ARM64.ActiveCfg = Debug|ARM64
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Test|ARM64.Build.0 = Debug|ARM64
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Test|x64.ActiveCfg = Release|x64
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Test|x64.Build.0 = Release|x64
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Test|x86.ActiveCfg = Debug|Win32
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}.Test|x86.Build.0 = Debug|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@@ -1427,7 +1433,6 @@ Global
{49A7104F-08CD-48B1-99E6-6A7C3FFB0EEC} = {0B7E421E-A8CA-444C-B650-D1D7F1D55688}
{A9E83C9D-8AD6-4C87-888D-3F91B5656CD1} = {0B7E421E-A8CA-444C-B650-D1D7F1D55688}
{0AB212AD-1F11-4BAB-BAD5-8BFC4435F453} = {A9E83C9D-8AD6-4C87-888D-3F91B5656CD1}
- {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD} = {6BBF0DEA-C0B8-4B73-B540-3BF8297B49B4}
{0B7E421E-A8CA-444C-B650-D1D7F1D55688} = {CE514278-A13F-4F6A-93EB-5653410AC214}
{97E9C1F1-F87E-4BE6-A66C-6B97DCD80873} = {CE514278-A13F-4F6A-93EB-5653410AC214}
{6BBF0DEA-C0B8-4B73-B540-3BF8297B49B4} = {CE514278-A13F-4F6A-93EB-5653410AC214}
@@ -1476,6 +1481,8 @@ Global
{DAFD8D9E-6509-4482-A533-44C843C3BCC7} = {220B870C-D051-463E-997B-8C392081EE15}
{40AF6A7D-B4E7-4919-B38A-75F625E36AF5} = {02EA681E-C7D8-13C7-8484-4AC65E1B71E8}
{02828B8D-E4B9-41FE-84C4-D5CDA7761627} = {5360DC20-6552-4F74-8D4D-BF88B23E0B3A}
+ {A3E7E04F-5D2B-4F3A-B725-8F5C4B1E9D3C} = {6BBF0DEA-C0B8-4B73-B540-3BF8297B49B4}
+ {C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD} = {6BBF0DEA-C0B8-4B73-B540-3BF8297B49B4}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {CC2E4F75-FADC-4F44-BD62-47A321828081}
@@ -1501,6 +1508,7 @@ Global
Externals\googletest\googletest\googletest.vcxitems*{9ee35458-b145-444f-92b7-27ff72112c42}*SharedItemsImports = 9
Externals\crystaledit\editlib\editlib.vcxitems*{9fda4af0-ccfd-4812-bdb9-53efedb32bde}*SharedItemsImports = 4
Externals\crystaledit\editlib\editlibparsers.vcxitems*{9fda4af0-ccfd-4812-bdb9-53efedb32bde}*SharedItemsImports = 4
+ Externals\crystaledit\editlib\tree-sitter\tree-sitter.vcxitems*{9fda4af0-ccfd-4812-bdb9-53efedb32bde}*SharedItemsImports = 4
Externals\poco\Foundation\Foundation.vcxitems*{9fda4af0-ccfd-4812-bdb9-53efedb32bde}*SharedItemsImports = 4
Externals\poco\XML\XML.vcxitems*{9fda4af0-ccfd-4812-bdb9-53efedb32bde}*SharedItemsImports = 4
Externals\xdiff\xdiff.vcxitems*{9fda4af0-ccfd-4812-bdb9-53efedb32bde}*SharedItemsImports = 4
@@ -1508,6 +1516,7 @@ Global
Src\CrashLogger\CrashLogger.vcxitems*{9fda4af0-ccfd-4812-bdb9-53efedb32bde}*SharedItemsImports = 4
Src\diffutils\diffutils.vcxitems*{9fda4af0-ccfd-4812-bdb9-53efedb32bde}*SharedItemsImports = 4
Src\FilterEngine\FilterEngine.vcxitems*{9fda4af0-ccfd-4812-bdb9-53efedb32bde}*SharedItemsImports = 4
+ Externals\crystaledit\editlib\tree-sitter\tree-sitter.vcxitems*{a3e7e04f-5d2b-4f3a-b725-8f5c4b1e9d3c}*SharedItemsImports = 9
Plugins\src_VCPP\Common\Common.vcxitems*{a644fba4-d76e-4500-b4b7-04d7a245359a}*SharedItemsImports = 4
Plugins\src_VCPP\Common\Common.vcxitems*{aa88b46e-b2e2-4b03-8cd5-1e9d60db6ab2}*SharedItemsImports = 4
Externals\crystaledit\editlib\editlibparsers.vcxitems*{ab827c6b-5116-408f-b453-e2075e9b73b4}*SharedItemsImports = 4
@@ -1523,6 +1532,7 @@ Global
Plugins\src_VCPP\Common\Common.vcxitems*{bd0c5fe1-8457-49c2-8801-0c99a6e6cc03}*SharedItemsImports = 4
Externals\crystaledit\editlib\editlib.vcxitems*{c347d6ae-7a2b-4ed0-97ad-2595e1c5d7dd}*SharedItemsImports = 4
Externals\crystaledit\editlib\editlibparsers.vcxitems*{c347d6ae-7a2b-4ed0-97ad-2595e1c5d7dd}*SharedItemsImports = 4
+ Externals\crystaledit\editlib\tree-sitter\tree-sitter.vcxitems*{c347d6ae-7a2b-4ed0-97ad-2595e1c5d7dd}*SharedItemsImports = 4
Src\CrashLogger\CrashLogger.vcxitems*{cdeddac4-79b9-4a34-a564-8019f60d241f}*SharedItemsImports = 9
Externals\poco\XML\XML.vcxitems*{dafd8d9e-6509-4482-a533-44c843c3bcc7}*SharedItemsImports = 9
Src\diffutils\diffutils.vcxitems*{fc3b9df3-2854-4264-ab4b-ee8c43982513}*SharedItemsImports = 9
diff --git a/ALL.vs2017.sln b/ALL.vs2017.sln
index 0b09fa68fe6..771fa6ce0e8 100644
--- a/ALL.vs2017.sln
+++ b/ALL.vs2017.sln
@@ -153,8 +153,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Scripts", "Scripts", "{0AB2
Externals\winimerge\ExpandEnvironmenStrings.js = Externals\winimerge\ExpandEnvironmenStrings.js
EndProjectSection
EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SampleStatic", "Externals\crystaledit\Sample\SampleStatic.vcxproj", "{C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}"
-EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "WinIMerge", "WinIMerge", "{0B7E421E-A8CA-444C-B650-D1D7F1D55688}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Frhed", "Frhed", "{97E9C1F1-F87E-4BE6-A66C-6B97DCD80873}"
@@ -437,6 +435,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Manual", "Manual", "{7DF650
Translations\Docs\Manual\English.pot = Translations\Docs\Manual\English.pot
Translations\Docs\Manual\French.po = Translations\Docs\Manual\French.po
Translations\Docs\Manual\Hebrew.po = Translations\Docs\Manual\Hebrew.po
+ Translations\Docs\Manual\Italian.po = Translations\Docs\Manual\Italian.po
Translations\Docs\Manual\Japanese.po = Translations\Docs\Manual\Japanese.po
EndProjectSection
EndProject
@@ -490,6 +489,10 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Translations", "Translation
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CrashLogger", "Src\CrashLogger\CrashLogger.vcxitems", "{CDEDDAC4-79B9-4A34-A564-8019F60D241F}"
EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "tree-sitter", "Externals\crystaledit\editlib\tree-sitter\tree-sitter.vcxitems", "{A3E7E04F-5D2B-4F3A-B725-8F5C4B1E9D3C}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SampleStatic", "Externals\crystaledit\Sample\SampleStatic.vcxproj", "{C347D6AE-7A2B-4ED0-97AD-2595E1C5D7DD}"
+EndProject
Global
GlobalSection(SharedMSBuildProjectFiles) = preSolution
Plugins\src_VCPP\Common\Common.vcxitems*{098b241d-63af-4683-baf8-6da57fd8fe75}*SharedItemsImports = 4
@@ -514,11 +517,13 @@ Global
Externals\crystaledit\editlib\editlibparsers.vcxitems*{9fda4af0-ccfd-4812-bdb9-53efedb32bde}*SharedItemsImports = 4
Externals\poco\Foundation\Foundation.vcxitems*{9fda4af0-ccfd-4812-bdb9-53efedb32bde}*SharedItemsImports = 4
Externals\poco\XML\XML.vcxitems*{9fda4af0-ccfd-4812-bdb9-53efedb32bde}*SharedItemsImports = 4
+ Externals\crystaledit\editlib\tree-sitter\tree-sitter.vcxitems*{9fda4af0-ccfd-4812-bdb9-53efedb32bde}*SharedItemsImports = 4
Externals\xdiff\xdiff.vcxitems*{9fda4af0-ccfd-4812-bdb9-53efedb32bde}*SharedItemsImports = 4
Src\CompareEngines\CompareEngines.vcxitems*{9fda4af0-ccfd-4812-bdb9-53efedb32bde}*SharedItemsImports = 4
Src\CrashLogger\CrashLogger.vcxitems*{9fda4af0-ccfd-4812-bdb9-53efedb32bde}*SharedItemsImports = 4
Src\diffutils\diffutils.vcxitems*{9fda4af0-ccfd-4812-bdb9-53efedb32bde}*SharedItemsImports = 4
Src\FilterEngine\FilterEngine.vcxitems*{9fda4af0-ccfd-4812-bdb9-53efedb32bde}*SharedItemsImports = 4
+ Externals\crystaledit\editlib\tree-sitter\tree-sitter.vcxitems*{a3e7e04f-5d2b-4f3a-b725-8f5c4b1e9d3c}*SharedItemsImports = 9
Plugins\src_VCPP\Common\Common.vcxitems*{a644fba4-d76e-4500-b4b7-04d7a245359a}*SharedItemsImports = 4
Plugins\src_VCPP\Common\Common.vcxitems*{aa88b46e-b2e2-4b03-8cd5-1e9d60db6ab2}*SharedItemsImports = 4
Externals\crystaledit\editlib\editlibparsers.vcxitems*{ab827c6b-5116-408f-b453-e2075e9b73b4}*SharedItemsImports = 4
@@ -534,6 +539,7 @@ Global
Plugins\src_VCPP\Common\Common.vcxitems*{bd0c5fe1-8457-49c2-8801-0c99a6e6cc03}*SharedItemsImports = 4
Externals\crystaledit\editlib\editlib.vcxitems*{c347d6ae-7a2b-4ed0-97ad-2595e1c5d7dd}*SharedItemsImports = 4
Externals\crystaledit\editlib\editlibparsers.vcxitems*{c347d6ae-7a2b-4ed0-97ad-2595e1c5d7dd}*SharedItemsImports = 4
+ Externals\crystaledit\editlib\tree-sitter\tree-sitter.vcxitems*{c347d6ae-7a2b-4ed0-97ad-2595e1c5d7dd}*SharedItemsImports = 4
Src\CrashLogger\CrashLogger.vcxitems*{cdeddac4-79b9-4a34-a564-8019f60d241f}*SharedItemsImports = 9
Externals\poco\XML\XML.vcxitems*{dafd8d9e-6509-4482-a533-44c843c3bcc7}*SharedItemsImports = 9
Src\diffutils\diffutils.vcxitems*{fc3b9df3-2854-4264-ab4b-ee8c43982513}*SharedItemsImports = 9
@@ -1423,6 +1429,7 @@ Global
{B30837C4-F99F-46F9-ADFB-5FAEFB1D2FBB} = {220B870C-D051-463E-997B-8C392081EE15}
{DAFD8D9E-6509-4482-A533-44C843C3BCC7} = {220B870C-D051-463E-997B-8C392081EE15}
{02828B8D-E4B9-41FE-84C4-D5CDA7761627} = {5360DC20-6552-4F74-8D4D-BF88B23E0B3A}
+ {A3E7E04F-5D2B-4F3A-B725-8F5C4B1E9D3C} = {6BBF0DEA-C0B8-4B73-B540-3BF8297B49B4}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {CC2E4F75-FADC-4F44-BD62-47A321828081}
diff --git a/Docs/Manual/Italian/Filters.xml b/Docs/Manual/Italian/Filters.xml
index 67bf2545950..04d2a15952c 100644
--- a/Docs/Manual/Italian/Filters.xml
+++ b/Docs/Manual/Italian/Filters.xml
@@ -591,7 +591,7 @@ I seguenti attributi sono progettati per essere usati solo nei filtri
i prefissi e:, e!:,
fe:, fe!:,
de: o de!: in il campo
-Filtro). Non devono essere usati nelle regole di inclusione/esclusione del
+Filtro). Non devono essere uszati nelle regole di inclusione/esclusione del
filtro file (.flt), poiché richiedono che il confronto
venga prima completato e potrebbero causare comportamenti imprevisti.
diff --git a/Externals/crystaledit/Sample/SampleStatic.vcxproj b/Externals/crystaledit/Sample/SampleStatic.vcxproj
index 31e7032a148..ee108d31f76 100644
--- a/Externals/crystaledit/Sample/SampleStatic.vcxproj
+++ b/Externals/crystaledit/Sample/SampleStatic.vcxproj
@@ -127,6 +127,7 @@
+
diff --git a/Externals/crystaledit/editlib/TreeSitterParser.cpp b/Externals/crystaledit/editlib/TreeSitterParser.cpp
new file mode 100644
index 00000000000..5238e9130bd
--- /dev/null
+++ b/Externals/crystaledit/editlib/TreeSitterParser.cpp
@@ -0,0 +1,2115 @@
+////////////////////////////////////////////////////////////////////////////
+// File: TreeSitterParser.cpp
+// Version: 1.0.1
+// Created: 2026-03-26
+//
+// Tree-sitter based syntax highlighting bridge for CrystalEdit.
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+////////////////////////////////////////////////////////////////////////////
+
+#include "StdAfx.h"
+#include "TreeSitterParser.h"
+#include "ccrystaltextbuffer.h"
+
+#include
+
+#include
+#include
+#include
+#include
+#include
+#include
+
+#ifdef _DEBUG
+#define new DEBUG_NEW
+#endif
+
+namespace
+{
+bool HasCapturePrefix(const std::string& captureName, const char* prefix)
+{
+ const size_t prefixLen = strlen(prefix);
+ return captureName == prefix ||
+ (captureName.size() > prefixLen &&
+ captureName.compare(0, prefixLen, prefix) == 0 &&
+ captureName[prefixLen] == '.');
+}
+
+int CountCaptureSegments(const std::string& captureName)
+{
+ return static_cast(std::count(captureName.begin(), captureName.end(), '.')) + 1;
+}
+
+int MakeCapturePriority(const std::string& captureName, uint32_t startByte, uint32_t endByte)
+{
+ const uint32_t span = (endByte > startByte) ? (endByte - startByte) : 0;
+ const int specificityScore = CountCaptureSegments(captureName) * 200000;
+ const int spanScore = 100000 - static_cast(std::min(span, 100000));
+ return specificityScore + spanScore;
+}
+}
+
+// ============================================================================
+// CTreeSitterColorMap
+// ============================================================================
+
+CTreeSitterColorMap::CTreeSitterColorMap()
+{
+ // Map standard tree-sitter highlight capture names to WinMerge COLORINDEX.
+ //
+ // Tree-sitter highlight queries use capture names like @keyword, @string,
+ // @comment, etc. These names come from nvim-treesitter conventions.
+ // We collapse them into WinMerge's available color categories.
+
+ // Keywords: control flow, storage, operators as keywords
+ m_map["keyword"] = COLORINDEX_KEYWORD;
+ m_map["keyword.function"] = COLORINDEX_KEYWORD;
+ m_map["keyword.operator"] = COLORINDEX_KEYWORD;
+ m_map["keyword.import"] = COLORINDEX_KEYWORD;
+ m_map["keyword.type"] = COLORINDEX_KEYWORD;
+ m_map["keyword.modifier"] = COLORINDEX_KEYWORD;
+ m_map["keyword.repeat"] = COLORINDEX_KEYWORD;
+ m_map["keyword.return"] = COLORINDEX_KEYWORD;
+ m_map["keyword.conditional"] = COLORINDEX_KEYWORD;
+ m_map["keyword.exception"] = COLORINDEX_KEYWORD;
+ m_map["keyword.directive"] = COLORINDEX_PREPROCESSOR;
+ m_map["keyword.coroutine"] = COLORINDEX_KEYWORD;
+ m_map["include"] = COLORINDEX_KEYWORD;
+ m_map["repeat"] = COLORINDEX_KEYWORD;
+ m_map["conditional"] = COLORINDEX_KEYWORD;
+ m_map["exception"] = COLORINDEX_KEYWORD;
+
+ // Functions
+ m_map["function"] = COLORINDEX_FUNCNAME;
+ m_map["function.call"] = COLORINDEX_FUNCNAME;
+ m_map["function.builtin"] = COLORINDEX_FUNCNAME;
+ m_map["function.macro"] = COLORINDEX_FUNCNAME;
+ m_map["method"] = COLORINDEX_FUNCNAME;
+ m_map["method.call"] = COLORINDEX_FUNCNAME;
+ m_map["constructor"] = COLORINDEX_FUNCNAME;
+
+ // Comments
+ m_map["comment"] = COLORINDEX_COMMENT;
+ m_map["comment.documentation"] = COLORINDEX_COMMENT;
+
+ // Strings
+ m_map["string"] = COLORINDEX_STRING;
+ m_map["string.documentation"] = COLORINDEX_STRING;
+ m_map["string.regex"] = COLORINDEX_STRING;
+ m_map["string.escape"] = COLORINDEX_STRING;
+ m_map["string.special"] = COLORINDEX_STRING;
+ m_map["character"] = COLORINDEX_STRING;
+ m_map["character.special"] = COLORINDEX_STRING;
+
+ // Numbers
+ m_map["number"] = COLORINDEX_NUMBER;
+ m_map["number.float"] = COLORINDEX_NUMBER;
+ m_map["float"] = COLORINDEX_NUMBER;
+ m_map["boolean"] = COLORINDEX_NUMBER;
+
+ // Operators and punctuation
+ m_map["operator"] = COLORINDEX_OPERATOR;
+ m_map["punctuation"] = COLORINDEX_OPERATOR;
+ m_map["punctuation.bracket"] = COLORINDEX_OPERATOR;
+ m_map["punctuation.delimiter"] = COLORINDEX_OPERATOR;
+ m_map["punctuation.special"] = COLORINDEX_OPERATOR;
+
+ // Preprocessor / attributes
+ m_map["preproc"] = COLORINDEX_PREPROCESSOR;
+ m_map["define"] = COLORINDEX_PREPROCESSOR;
+ m_map["attribute"] = COLORINDEX_PREPROCESSOR;
+ m_map["attribute.builtin"] = COLORINDEX_PREPROCESSOR;
+
+ // Types -> USER1 (types are important in F# for merge understanding)
+ m_map["type"] = COLORINDEX_USER1;
+ m_map["type.builtin"] = COLORINDEX_USER1;
+ m_map["type.definition"] = COLORINDEX_USER1;
+ m_map["type.qualifier"] = COLORINDEX_USER1;
+ m_map["storageclass"] = COLORINDEX_USER1;
+
+ // Variables / properties / modules -> USER2 or NORMALTEXT
+ m_map["variable"] = COLORINDEX_NORMALTEXT;
+ m_map["variable.builtin"] = COLORINDEX_USER2;
+ m_map["variable.parameter"] = COLORINDEX_NORMALTEXT;
+ m_map["variable.member"] = COLORINDEX_USER2;
+ m_map["property"] = COLORINDEX_USER2;
+ m_map["field"] = COLORINDEX_USER2;
+ m_map["constant"] = COLORINDEX_USER2;
+ m_map["constant.builtin"] = COLORINDEX_USER2;
+ m_map["constant.macro"] = COLORINDEX_USER2;
+ m_map["module"] = COLORINDEX_USER1;
+ m_map["namespace"] = COLORINDEX_USER1;
+ m_map["label"] = COLORINDEX_USER2;
+ m_map["tag"] = COLORINDEX_KEYWORD;
+ m_map["tag.attribute"] = COLORINDEX_USER2;
+ m_map["tag.delimiter"] = COLORINDEX_OPERATOR;
+}
+
+int CTreeSitterColorMap::MapCapture(const std::string& sCaptureName) const
+{
+ // Try exact match first
+ auto it = m_map.find(sCaptureName);
+ if (it != m_map.end())
+ return it->second;
+
+ // Try prefix match: e.g. "keyword.control.fsharp" -> "keyword"
+ std::string prefix = sCaptureName;
+ while (true)
+ {
+ auto pos = prefix.rfind('.');
+ if (pos == std::string::npos)
+ break;
+ prefix = prefix.substr(0, pos);
+ it = m_map.find(prefix);
+ if (it != m_map.end())
+ return it->second;
+ }
+
+ return COLORINDEX_NORMALTEXT;
+}
+
+
+// ============================================================================
+// CTreeSitterLanguage
+// ============================================================================
+
+CTreeSitterLanguage::CTreeSitterLanguage()
+ : m_hDll(nullptr)
+ , m_pLanguage(nullptr)
+ , m_pHighlightQuery(nullptr)
+ , m_pLocalsQuery(nullptr)
+ , m_pTagsQuery(nullptr)
+ , m_pInjectionQuery(nullptr)
+{
+}
+
+CTreeSitterLanguage::~CTreeSitterLanguage()
+{
+ if (m_pHighlightQuery)
+ ts_query_delete(m_pHighlightQuery);
+ if (m_pLocalsQuery)
+ ts_query_delete(m_pLocalsQuery);
+ if (m_pTagsQuery)
+ ts_query_delete(m_pTagsQuery);
+ if (m_pInjectionQuery)
+ ts_query_delete(m_pInjectionQuery);
+ if (m_hDll)
+ FreeLibrary(m_hDll);
+}
+
+TSQuery* CTreeSitterLanguage::LoadQuery(const std::wstring& sPath)
+{
+ std::ifstream file(sPath, std::ios::binary);
+ if (!file.is_open())
+ return nullptr;
+
+ std::string source((std::istreambuf_iterator(file)),
+ std::istreambuf_iterator());
+
+ uint32_t errorOffset = 0;
+ TSQueryError errorType = TSQueryErrorNone;
+ TSQuery* pQuery = ts_query_new(
+ m_pLanguage,
+ source.c_str(),
+ static_cast(source.size()),
+ &errorOffset,
+ &errorType);
+
+ if (errorType != TSQueryErrorNone)
+ {
+ // Query compilation failed - log but continue without this query
+ return nullptr;
+ }
+
+ return pQuery;
+}
+
+bool CTreeSitterLanguage::Load(const std::wstring& sGrammarDir, const std::wstring& sLanguage)
+{
+ m_sName = sLanguage;
+
+ // Load the grammar DLL
+ // Expected name: tree-sitter-.dll (e.g. tree-sitter-fsharp.dll)
+ std::wstring sDllPath = sGrammarDir + L"\\tree-sitter-" + sLanguage + L".dll";
+ m_hDll = LoadLibraryW(sDllPath.c_str());
+ if (!m_hDll)
+ return false;
+
+ // Get the language function
+ // Expected export: tree_sitter_ (e.g. tree_sitter_fsharp)
+ // Note: hyphens in language names are converted to underscores for the
+ // C export name (e.g. "c-sharp" -> "tree_sitter_c_sharp")
+ std::string sFuncName = "tree_sitter_";
+ for (wchar_t ch : sLanguage)
+ {
+ if (ch == L'-')
+ sFuncName += '_';
+ else
+ sFuncName += static_cast(ch); // ASCII language names only
+ }
+
+ typedef const TSLanguage* (*TSLanguageFunc)();
+ TSLanguageFunc pfnLanguage = reinterpret_cast(
+ GetProcAddress(m_hDll, sFuncName.c_str()));
+ if (!pfnLanguage)
+ {
+ FreeLibrary(m_hDll);
+ m_hDll = nullptr;
+ return false;
+ }
+
+ m_pLanguage = pfnLanguage();
+ if (!m_pLanguage)
+ {
+ FreeLibrary(m_hDll);
+ m_hDll = nullptr;
+ return false;
+ }
+
+ // Load highlight query (.scm file) - required
+ // Expected name: -highlights.scm (e.g. fsharp-highlights.scm)
+ std::wstring sHighlightPath = sGrammarDir + L"\\" + sLanguage + L"-highlights.scm";
+ m_pHighlightQuery = LoadQuery(sHighlightPath);
+
+ // Load locals query (.scm file) - optional
+ // Expected name: -locals.scm (e.g. fsharp-locals.scm)
+ std::wstring sLocalsPath = sGrammarDir + L"\\" + sLanguage + L"-locals.scm";
+ m_pLocalsQuery = LoadQuery(sLocalsPath);
+
+ // Load tags query (.scm file) - optional
+ // Expected name: -tags.scm (e.g. python-tags.scm)
+ std::wstring sTagsPath = sGrammarDir + L"\\" + sLanguage + L"-tags.scm";
+ m_pTagsQuery = LoadQuery(sTagsPath);
+
+ // Load injection query (.scm file) - optional
+ // Expected name: -injections.scm (e.g. html-injections.scm)
+ std::wstring sInjectionPath = sGrammarDir + L"\\" + sLanguage + L"-injections.scm";
+ m_pInjectionQuery = LoadQuery(sInjectionPath);
+
+ return true;
+}
+
+
+// ============================================================================
+// CTreeSitterParser
+// ============================================================================
+
+CTreeSitterParser::CTreeSitterParser()
+ : m_pParser(nullptr) // Fix #2: lazy-init, don't call ts_parser_new() here
+ , m_pTree(nullptr)
+ , m_pLang(nullptr)
+ , m_bDirty(false)
+ , m_nLineCount(0)
+{
+}
+
+CTreeSitterParser::~CTreeSitterParser()
+{
+ if (m_pTree)
+ ts_tree_delete(m_pTree);
+ if (m_pParser)
+ ts_parser_delete(m_pParser);
+}
+
+/**
+ * @brief Lazily create the TSParser instance on first use.
+ *
+ * This avoids calling ts_parser_new() in the constructor, which would
+ * happen for every CMergeEditView even when tree-sitter isn't needed.
+ */
+void CTreeSitterParser::EnsureParser()
+{
+ if (!m_pParser)
+ m_pParser = ts_parser_new();
+}
+
+void CTreeSitterParser::SetLanguage(const CTreeSitterLanguage* pLang)
+{
+ m_pLang = pLang;
+ if (pLang && pLang->GetLanguage())
+ {
+ EnsureParser();
+ if (m_pParser)
+ ts_parser_set_language(m_pParser, pLang->GetLanguage());
+ }
+ Invalidate();
+}
+
+void CTreeSitterParser::Invalidate()
+{
+ if (m_pTree)
+ {
+ ts_tree_delete(m_pTree);
+ m_pTree = nullptr;
+ }
+ m_lineBlocks.clear();
+ m_lineUtf8.clear();
+ m_documentText.clear();
+ m_localScopes.clear();
+ m_localRefHighlights.clear();
+ m_pendingRefs.clear();
+ m_tagDefs.clear();
+ m_tagRefs.clear();
+ m_nLineCount = 0;
+ m_nextBlockOrder = 0;
+ m_bDirty = false;
+}
+
+void CTreeSitterParser::ParseDocument(const tchar_t* const* ppszLines,
+ const int* pnLineLengths,
+ int nLineCount)
+{
+ EnsureParser();
+ if (!m_pParser || !m_pLang || !m_pLang->GetLanguage())
+ return;
+
+ // Build contiguous document text from line pointers.
+ // Tree-sitter requires UTF-8 input, so we convert from tchar_t (wchar_t on Windows).
+ m_documentText.clear();
+ m_lineUtf8.clear();
+ m_nLineCount = nLineCount;
+
+ // Pre-calculate total size estimate
+ size_t totalEstimate = 0;
+ for (int i = 0; i < nLineCount; i++)
+ totalEstimate += static_cast(pnLineLengths[i]) * 3 + 1; // worst case UTF-8
+ m_documentText.reserve(totalEstimate);
+ m_lineUtf8.resize(nLineCount);
+
+ for (int i = 0; i < nLineCount; i++)
+ {
+ if (ppszLines[i] && pnLineLengths[i] > 0)
+ {
+#ifdef _UNICODE
+ // Convert UTF-16 to UTF-8
+ int nLen = WideCharToMultiByte(CP_UTF8, 0,
+ ppszLines[i], pnLineLengths[i],
+ nullptr, 0, nullptr, nullptr);
+ if (nLen > 0)
+ {
+ std::string lineUtf8(nLen, '\0');
+ WideCharToMultiByte(CP_UTF8, 0,
+ ppszLines[i], pnLineLengths[i],
+ &lineUtf8[0], nLen,
+ nullptr, nullptr);
+ m_lineUtf8[i] = lineUtf8;
+ m_documentText.append(lineUtf8);
+ }
+#else
+ m_lineUtf8[i].assign(ppszLines[i], pnLineLengths[i]);
+ m_documentText.append(ppszLines[i], pnLineLengths[i]);
+#endif
+ }
+ if (i < nLineCount - 1)
+ m_documentText += '\n';
+ }
+
+ // Parse the document.
+ // Pass the old tree if available -- tree-sitter can reuse unchanged subtrees
+ // for faster re-parsing. (For full incremental support, ts_tree_edit() should
+ // be called on the old tree before re-parsing, but even without edit info
+ // tree-sitter benefits from having the previous tree as a reference.)
+ TSTree* pOldTree = m_pTree;
+ m_pTree = ts_parser_parse_string(
+ m_pParser,
+ pOldTree,
+ m_documentText.c_str(),
+ static_cast(m_documentText.size()));
+
+ if (pOldTree)
+ ts_tree_delete(pOldTree);
+
+ if (m_pTree)
+ {
+ m_nextBlockOrder = 0;
+ // 1. Run locals query first to build scope/def/ref information
+ RunLocalsQuery();
+ // 2. Run tags query for same-file symbol definitions/references
+ RunTagsQuery();
+ // 3. Run highlight query (uses locals info for scope-aware coloring)
+ RunHighlightQuery();
+ // 4. Run injection query to handle embedded languages
+ RunInjectionQuery();
+ BuildLineCache(nLineCount);
+ }
+
+ // Cache is now fresh
+ m_bDirty = false;
+}
+
+/**
+ * @brief Notify the parser of an edit for incremental reparsing.
+ *
+ * Reads the last UndoRecord from the buffer to get the edit position,
+ * then calls ts_tree_edit() on the existing tree. This allows tree-sitter
+ * to reuse unchanged subtrees during the next reparse, which is
+ * significantly faster for large documents.
+ *
+ * Falls back to a simple MarkDirty() if the tree or undo info is unavailable.
+ */
+void CTreeSitterParser::NotifyEdit(CCrystalTextBuffer* pBuf)
+{
+ m_bDirty = true;
+
+ // If we don't have a tree, there's nothing to edit incrementally
+ if (!m_pTree || !pBuf || !pBuf->CanUndo())
+ return;
+
+ int nUndoPos = pBuf->GetUndoPosition();
+ if (nUndoPos <= 0)
+ return;
+
+ UndoRecord ur = pBuf->GetUndoRecord(nUndoPos - 1);
+ bool bInsert = (ur.m_dwFlags & UNDO_INSERT) != 0;
+
+ // Convert CEPoint (char-based, line/col) to UTF-8 byte offsets.
+ // m_lineUtf8 holds the per-line UTF-8 from the previous parse.
+ // We need:
+ // start_byte: absolute byte offset of the edit start in old doc
+ // old_end_byte: absolute byte offset of the old content end
+ // new_end_byte: absolute byte offset of the new content end
+
+ // Helper: compute absolute byte offset from (line, charPos) using old m_lineUtf8
+ // Each line in m_documentText is followed by '\n' (except the last)
+ auto charPosToByteOffset = [this](int line, int charPos) -> uint32_t
+ {
+ uint32_t byteOffset = 0;
+ int nLines = static_cast(m_lineUtf8.size());
+
+ // Sum up all lines before 'line'
+ for (int i = 0; i < line && i < nLines; i++)
+ {
+ byteOffset += static_cast(m_lineUtf8[i].size());
+ byteOffset += 1; // for '\n' separator
+ }
+
+ // Add the byte offset within the target line
+ if (line >= 0 && line < nLines && charPos > 0)
+ {
+#ifdef _UNICODE
+ const std::string& utf8Line = m_lineUtf8[line];
+ // Convert charPos (UTF-16 units) to UTF-8 byte count
+ // We need the original line text to do this properly.
+ // Use the stored UTF-8 line and convert back to count bytes.
+ int nUtf16Len = MultiByteToWideChar(CP_UTF8, 0,
+ utf8Line.c_str(), static_cast(utf8Line.size()),
+ nullptr, 0);
+ if (charPos >= nUtf16Len)
+ {
+ byteOffset += static_cast(utf8Line.size());
+ }
+ else
+ {
+ // Walk the UTF-8 bytes counting UTF-16 chars until we reach charPos
+ int utf16Count = 0;
+ uint32_t byteIdx = 0;
+ const uint8_t* p = reinterpret_cast(utf8Line.c_str());
+ uint32_t lineLen = static_cast(utf8Line.size());
+ while (byteIdx < lineLen && utf16Count < charPos)
+ {
+ uint8_t ch = p[byteIdx];
+ uint32_t seqLen;
+ if (ch < 0x80)
+ seqLen = 1;
+ else if (ch < 0xE0)
+ seqLen = 2;
+ else if (ch < 0xF0)
+ seqLen = 3;
+ else
+ seqLen = 4;
+
+ byteIdx += seqLen;
+ // A 4-byte UTF-8 sequence produces a surrogate pair (2 UTF-16 units)
+ utf16Count += (seqLen == 4) ? 2 : 1;
+ }
+ byteOffset += byteIdx;
+ }
+#else
+ byteOffset += static_cast(charPos);
+#endif
+ }
+ else if (line >= nLines && line > 0)
+ {
+ // Line is beyond our cached data -- use end of document
+ byteOffset = static_cast(m_documentText.size());
+ }
+
+ return byteOffset;
+ };
+
+ // Helper: compute TSPoint (row, column in bytes) from (line, charPos)
+ auto charPosToTSPoint = [this](int line, int charPos) -> TSPoint
+ {
+ TSPoint pt;
+ pt.row = static_cast(line);
+ pt.column = 0;
+
+ if (charPos > 0 && line >= 0 && line < static_cast(m_lineUtf8.size()))
+ {
+ const std::string& utf8Line = m_lineUtf8[line];
+#ifdef _UNICODE
+ int nUtf16Len = MultiByteToWideChar(CP_UTF8, 0,
+ utf8Line.c_str(), static_cast(utf8Line.size()),
+ nullptr, 0);
+ if (charPos >= nUtf16Len)
+ {
+ pt.column = static_cast(utf8Line.size());
+ }
+ else
+ {
+ int utf16Count = 0;
+ uint32_t byteIdx = 0;
+ const uint8_t* p = reinterpret_cast(utf8Line.c_str());
+ uint32_t lineLen = static_cast(utf8Line.size());
+ while (byteIdx < lineLen && utf16Count < charPos)
+ {
+ uint8_t ch = p[byteIdx];
+ uint32_t seqLen;
+ if (ch < 0x80)
+ seqLen = 1;
+ else if (ch < 0xE0)
+ seqLen = 2;
+ else if (ch < 0xF0)
+ seqLen = 3;
+ else
+ seqLen = 4;
+ byteIdx += seqLen;
+ utf16Count += (seqLen == 4) ? 2 : 1;
+ }
+ pt.column = byteIdx;
+ }
+#else
+ pt.column = static_cast(charPos);
+#endif
+ }
+ return pt;
+ };
+
+ TSInputEdit edit;
+ memset(&edit, 0, sizeof(edit));
+
+ if (bInsert)
+ {
+ // Insert: old range is empty (start == old_end), new range is the inserted text
+ edit.start_byte = charPosToByteOffset(ur.m_ptStartPos.y, ur.m_ptStartPos.x);
+ edit.old_end_byte = edit.start_byte;
+ edit.start_point = charPosToTSPoint(ur.m_ptStartPos.y, ur.m_ptStartPos.x);
+ edit.old_end_point = edit.start_point;
+
+ // For new_end, we need the end position after insert.
+ // The UndoRecord's m_ptEndPos gives us the end position in the *new* document.
+ // But our m_lineUtf8 is from the *old* document, so we can't use
+ // charPosToByteOffset for the end position directly.
+ // Instead, compute new_end_byte = start_byte + utf8_length_of_inserted_text.
+ // Normalize to LF-only: ParseDocument() concatenates lines with '\n' (no '\r'),
+ // so '\r' bytes must be excluded from all byte-offset calculations.
+ const tchar_t* pInsText = ur.GetText();
+ size_t nInsLen = ur.GetTextLength();
+#ifdef _UNICODE
+ // Convert to UTF-8 and strip '\r' to match ParseDocument()'s representation.
+ int nRawUtf8Len = WideCharToMultiByte(CP_UTF8, 0,
+ pInsText, static_cast(nInsLen),
+ nullptr, 0, nullptr, nullptr);
+ std::string insUtf8;
+ if (nRawUtf8Len > 0)
+ {
+ insUtf8.resize(nRawUtf8Len);
+ WideCharToMultiByte(CP_UTF8, 0, pInsText, static_cast(nInsLen),
+ &insUtf8[0], nRawUtf8Len, nullptr, nullptr);
+ insUtf8.erase(std::remove(insUtf8.begin(), insUtf8.end(), '\r'), insUtf8.end());
+ }
+ int nUtf8Len = static_cast(insUtf8.size());
+ edit.new_end_byte = edit.start_byte + static_cast(nUtf8Len);
+#else
+ // Exclude '\r' characters to match ParseDocument()'s LF-only representation.
+ int nCrCount = static_cast(std::count(pInsText, pInsText + nInsLen, static_cast('\r')));
+ edit.new_end_byte = edit.start_byte + static_cast(nInsLen - nCrCount);
+#endif
+ edit.new_end_point.row = static_cast(ur.m_ptEndPos.y);
+ // For the column, we can compute it from the text: count bytes after last newline
+ uint32_t lastNewlineBytes = 0;
+ bool foundNewline = false;
+#ifdef _UNICODE
+ // Use the already-normalized UTF-8 string to find the last newline position.
+ if (nUtf8Len > 0)
+ {
+ auto lastNL = insUtf8.rfind('\n');
+ if (lastNL != std::string::npos)
+ {
+ foundNewline = true;
+ lastNewlineBytes = static_cast(nUtf8Len - lastNL - 1);
+ }
+ }
+#else
+ {
+ const char* pText = pInsText;
+ for (int i = static_cast(nInsLen) - 1; i >= 0; i--)
+ {
+ if (pText[i] == '\n') { foundNewline = true; lastNewlineBytes = nInsLen - i - 1; break; }
+ }
+ }
+#endif
+ if (foundNewline)
+ {
+ edit.new_end_point.column = lastNewlineBytes;
+ }
+ else
+ {
+ // No newline in inserted text: column = start column + inserted byte length
+ edit.new_end_point.column = edit.start_point.column +
+ (edit.new_end_byte - edit.start_byte);
+ }
+ }
+ else
+ {
+ // Delete: old range is the deleted text, new range is empty (start == new_end)
+ edit.start_byte = charPosToByteOffset(ur.m_ptStartPos.y, ur.m_ptStartPos.x);
+ edit.start_point = charPosToTSPoint(ur.m_ptStartPos.y, ur.m_ptStartPos.x);
+
+ // For old_end, we use the old document positions
+ edit.old_end_byte = charPosToByteOffset(ur.m_ptEndPos.y, ur.m_ptEndPos.x);
+ edit.old_end_point = charPosToTSPoint(ur.m_ptEndPos.y, ur.m_ptEndPos.x);
+
+ // After deletion, the cursor is at start
+ edit.new_end_byte = edit.start_byte;
+ edit.new_end_point = edit.start_point;
+ }
+
+ ts_tree_edit(m_pTree, &edit);
+}
+
+/**
+ * @brief Ensure the document is parsed if the cache is dirty.
+ *
+ * Called lazily from ParseLine during the paint cycle. This means
+ * we reparse at most once per paint, not once per keystroke.
+ */
+void CTreeSitterParser::EnsureParsed(CCrystalTextBuffer* pBuffer)
+{
+ if (m_bDirty && m_pLang)
+ {
+ ParseFromBuffer(pBuffer);
+ // ParseFromBuffer sets m_bDirty = false via ParseDocument
+ }
+}
+
+/**
+ * @brief Convert a UTF-8 byte offset within a line to a UTF-16 character position.
+ *
+ * Tree-sitter reports column positions as byte offsets in the UTF-8 text.
+ * WinMerge's TEXTBLOCK.m_nCharPos expects tchar_t (wchar_t) character indices.
+ * For pure ASCII, these are identical. For multi-byte UTF-8 / surrogate pairs,
+ * we need to walk the UTF-8 bytes and count the corresponding UTF-16 code units.
+ *
+ * @param nLine Zero-based line index.
+ * @param byteCol Byte offset within the line's UTF-8 representation.
+ * @return Character position (index into the wchar_t line).
+ */
+int CTreeSitterParser::Utf8ByteOffsetToCharPos(int nLine, uint32_t byteCol) const
+{
+ if (nLine < 0 || nLine >= static_cast(m_lineUtf8.size()))
+ return static_cast(byteCol);
+
+ const std::string& utf8Line = m_lineUtf8[nLine];
+ if (utf8Line.empty() || byteCol == 0)
+ return 0;
+
+ // Clamp to line length
+ uint32_t maxByte = static_cast(utf8Line.size());
+ if (byteCol > maxByte)
+ byteCol = maxByte;
+
+#ifdef _UNICODE
+ // Convert the prefix [0..byteCol) from UTF-8 to UTF-16 and count chars
+ int nChars = MultiByteToWideChar(CP_UTF8, 0,
+ utf8Line.c_str(), static_cast(byteCol),
+ nullptr, 0);
+ return nChars;
+#else
+ return static_cast(byteCol);
+#endif
+}
+
+/**
+ * @brief Extract a #set! predicate property value from a query pattern.
+ *
+ * Tree-sitter predicates like (#set! injection.language "javascript") are
+ * encoded as sequences of TSQueryPredicateStep:
+ * [String "set!", String "injection.language", String "javascript", Done]
+ *
+ * @param pQuery The query containing the pattern.
+ * @param patternIndex The pattern index.
+ * @param key The property key to look for (e.g. "injection.language").
+ * @return The property value, or empty string if not found.
+ */
+std::string CTreeSitterParser::GetSetProperty(const TSQuery* pQuery,
+ uint32_t patternIndex,
+ const std::string& key)
+{
+ uint32_t stepCount = 0;
+ const TSQueryPredicateStep* steps =
+ ts_query_predicates_for_pattern(pQuery, patternIndex, &stepCount);
+ if (!steps || stepCount == 0)
+ return std::string();
+
+ // Walk through predicate steps looking for: "set!"
+ for (uint32_t i = 0; i + 2 < stepCount; i++)
+ {
+ // Look for a string step containing "set!"
+ if (steps[i].type != TSQueryPredicateStepTypeString)
+ continue;
+
+ uint32_t nameLen = 0;
+ const char* name = ts_query_string_value_for_id(pQuery, steps[i].value_id, &nameLen);
+ if (!name || std::string(name, nameLen) != "set!")
+ continue;
+
+ // Next step should be the property key (string)
+ if (i + 1 >= stepCount || steps[i + 1].type != TSQueryPredicateStepTypeString)
+ continue;
+
+ uint32_t keyLen = 0;
+ const char* keyStr = ts_query_string_value_for_id(pQuery, steps[i + 1].value_id, &keyLen);
+ if (!keyStr || std::string(keyStr, keyLen) != key)
+ {
+ // Skip to the Done sentinel for this predicate
+ while (i < stepCount && steps[i].type != TSQueryPredicateStepTypeDone)
+ i++;
+ continue;
+ }
+
+ // Next step should be the property value (string)
+ if (i + 2 >= stepCount || steps[i + 2].type != TSQueryPredicateStepTypeString)
+ continue;
+
+ uint32_t valLen = 0;
+ const char* valStr = ts_query_string_value_for_id(pQuery, steps[i + 2].value_id, &valLen);
+ if (valStr)
+ return std::string(valStr, valLen);
+ }
+
+ return std::string();
+}
+
+void CTreeSitterParser::RunTagsQuery()
+{
+ m_tagDefs.clear();
+ m_tagRefs.clear();
+
+ if (!m_pTree || !m_pLang || !m_pLang->GetTagsQuery())
+ return;
+
+ const TSQuery* pQuery = m_pLang->GetTagsQuery();
+ TSNode rootNode = ts_tree_root_node(m_pTree);
+
+ TSQueryCursor* pCursor = ts_query_cursor_new();
+ if (!pCursor)
+ return;
+
+ ts_query_cursor_exec(pCursor, pQuery, rootNode);
+
+ TSQueryMatch match;
+ while (ts_query_cursor_next_match(pCursor, &match))
+ {
+ std::string name;
+ uint32_t nameStart = 0;
+ uint32_t nameEnd = 0;
+ bool hasName = false;
+
+ struct Range
+ {
+ uint32_t startByte;
+ uint32_t endByte;
+ };
+ std::vector defs;
+ std::vector refs;
+
+ for (uint16_t i = 0; i < match.capture_count; ++i)
+ {
+ TSQueryCapture capture = match.captures[i];
+ TSNode node = capture.node;
+
+ uint32_t captureNameLen = 0;
+ const char* captureName = ts_query_capture_name_for_id(
+ pQuery, capture.index, &captureNameLen);
+ std::string sCapture(captureName, captureNameLen);
+
+ const uint32_t nodeStart = ts_node_start_byte(node);
+ const uint32_t nodeEnd = ts_node_end_byte(node);
+
+ if (sCapture == "name")
+ {
+ if (nodeStart < m_documentText.size() && nodeEnd <= m_documentText.size())
+ {
+ name = m_documentText.substr(nodeStart, nodeEnd - nodeStart);
+ nameStart = nodeStart;
+ nameEnd = nodeEnd;
+ hasName = true;
+ }
+ }
+ else if (HasCapturePrefix(sCapture, "definition"))
+ {
+ defs.push_back({ nodeStart, nodeEnd });
+ }
+ else if (HasCapturePrefix(sCapture, "reference"))
+ {
+ refs.push_back({ nodeStart, nodeEnd });
+ }
+ }
+
+ if (!hasName)
+ continue;
+
+ if (defs.empty() && !refs.empty())
+ {
+ for (const auto& ref : refs)
+ {
+ const bool useNameRange =
+ nameStart >= ref.startByte && nameEnd <= ref.endByte;
+ m_tagRefs.push_back({
+ name,
+ useNameRange ? nameStart : ref.startByte,
+ useNameRange ? nameEnd : ref.endByte,
+ });
+ }
+ }
+
+ for (const auto& def : defs)
+ {
+ const bool useNameRange =
+ nameStart >= def.startByte && nameEnd <= def.endByte;
+ m_tagDefs.push_back({
+ name,
+ useNameRange ? nameStart : def.startByte,
+ useNameRange ? nameEnd : def.endByte,
+ });
+ }
+ }
+
+ ts_query_cursor_delete(pCursor);
+}
+
+/**
+ * @brief Run the locals query to build scope/definition/reference information.
+ *
+ * Processes locals.scm captures:
+ * @local.scope - Defines a scope boundary
+ * @local.definition - Defines a local variable/symbol
+ * @local.reference - References a local variable/symbol
+ *
+ * The results are stored in m_localScopes and m_localRefHighlights,
+ * which are used by RunHighlightQuery to provide scope-aware coloring.
+ */
+void CTreeSitterParser::RunLocalsQuery()
+{
+ m_localScopes.clear();
+ m_localRefHighlights.clear();
+
+ if (!m_pTree || !m_pLang || !m_pLang->GetLocalsQuery())
+ return;
+
+ const TSQuery* pQuery = m_pLang->GetLocalsQuery();
+ TSNode rootNode = ts_tree_root_node(m_pTree);
+
+ TSQueryCursor* pCursor = ts_query_cursor_new();
+ if (!pCursor)
+ return;
+
+ ts_query_cursor_exec(pCursor, pQuery, rootNode);
+
+ // Temporary storage for references (resolved after all defs are collected)
+ std::vector references;
+
+ TSQueryMatch match;
+ while (ts_query_cursor_next_match(pCursor, &match))
+ {
+ for (uint16_t i = 0; i < match.capture_count; i++)
+ {
+ TSQueryCapture capture = match.captures[i];
+ TSNode node = capture.node;
+
+ uint32_t nameLen = 0;
+ const char* captureName = ts_query_capture_name_for_id(
+ pQuery, capture.index, &nameLen);
+ std::string sCapture(captureName, nameLen);
+
+ uint32_t nodeStart = ts_node_start_byte(node);
+ uint32_t nodeEnd = ts_node_end_byte(node);
+
+ if (HasCapturePrefix(sCapture, "local.scope"))
+ {
+ // Check for #set! local.scope-inherits predicate
+ bool inherits = true;
+ std::string inheritVal = GetSetProperty(pQuery, match.pattern_index, "local.scope-inherits");
+ if (inheritVal == "false")
+ inherits = false;
+
+ LocalScope scope;
+ scope.startByte = nodeStart;
+ scope.endByte = nodeEnd;
+ scope.inherits = inherits;
+ m_localScopes.push_back(scope);
+ }
+ else if (HasCapturePrefix(sCapture, "local.definition"))
+ {
+ // Extract the text of the definition node as the symbol name
+ if (nodeStart < m_documentText.size() && nodeEnd <= m_documentText.size())
+ {
+ std::string defName = m_documentText.substr(nodeStart, nodeEnd - nodeStart);
+
+ // Find the innermost enclosing scope and add this definition
+ LocalScope* pBestScope = nullptr;
+ for (auto& scope : m_localScopes)
+ {
+ if (nodeStart >= scope.startByte && nodeEnd <= scope.endByte)
+ {
+ if (!pBestScope ||
+ (scope.endByte - scope.startByte) < (pBestScope->endByte - pBestScope->startByte))
+ {
+ pBestScope = &scope;
+ }
+ }
+ }
+ if (pBestScope)
+ {
+ LocalDef def;
+ def.name = defName;
+ def.startByte = nodeStart;
+ def.endByte = nodeEnd;
+ def.highlight = -1; // Will be resolved during RunHighlightQuery
+ pBestScope->defs.push_back(def);
+ }
+ }
+ }
+ else if (HasCapturePrefix(sCapture, "local.reference"))
+ {
+ if (nodeStart < m_documentText.size() && nodeEnd <= m_documentText.size())
+ {
+ PendingRef ref;
+ ref.name = m_documentText.substr(nodeStart, nodeEnd - nodeStart);
+ ref.startByte = nodeStart;
+ ref.endByte = nodeEnd;
+ ref.scopeStartByte = nodeStart;
+ references.push_back(ref);
+ }
+ }
+ }
+ }
+
+ ts_query_cursor_delete(pCursor);
+
+ // Sort scopes by start byte, then by size (smallest/innermost first for lookup)
+ std::sort(m_localScopes.begin(), m_localScopes.end(),
+ [](const LocalScope& a, const LocalScope& b)
+ {
+ if (a.startByte != b.startByte)
+ return a.startByte < b.startByte;
+ return (a.endByte - a.startByte) < (b.endByte - b.startByte);
+ });
+
+ // Store references for later resolution in RunHighlightQuery.
+ // We can't resolve them yet because definition highlights haven't been
+ // determined. Instead, store them and resolve after RunHighlightQuery
+ // has assigned highlights to definitions.
+ //
+ // Actually, we'll store the reference info and do a two-pass approach:
+ // RunHighlightQuery will first assign highlights to definition nodes,
+ // then we resolve references.
+ //
+ // For now, store references as pending. The key is (startByte << 32 | endByte).
+ // We'll store reference name -> node range for later lookup.
+ m_pendingRefs = std::move(references);
+}
+
+/**
+ * @brief Run the highlight query against the parsed tree and collect token ranges.
+ *
+ * This produces per-line block arrays by walking all highlight query matches
+ * and mapping tree-sitter byte positions to WinMerge character positions.
+ *
+ * If locals information is available, definition nodes get their highlights
+ * recorded, and references are resolved to use the same highlight as their
+ * matching definition.
+ */
+void CTreeSitterParser::RunHighlightQuery()
+{
+ if (!m_pTree || !m_pLang || !m_pLang->GetHighlightQuery())
+ return;
+
+ const TSQuery* pQuery = m_pLang->GetHighlightQuery();
+ TSNode rootNode = ts_tree_root_node(m_pTree);
+
+ TSQueryCursor* pCursor = ts_query_cursor_new();
+ if (!pCursor)
+ return;
+
+ ts_query_cursor_exec(pCursor, pQuery, rootNode);
+
+ // Temporary structure to collect all highlights
+ struct HighlightEntry
+ {
+ uint32_t startRow;
+ uint32_t startCol; // UTF-8 byte offset in line
+ uint32_t endRow;
+ uint32_t endCol; // UTF-8 byte offset in line
+ uint32_t startByte;
+ uint32_t endByte;
+ int colorIndex;
+ int priority;
+ uint32_t order;
+ };
+ std::vector highlights;
+
+ // Map from (startByte, endByte) to colorIndex for definition resolution.
+ // Key: (startByte << 32 | endByte). This assumes files < 4GB and that
+ // nodes with identical byte ranges should share the same highlight.
+ std::unordered_map nodeHighlightMap;
+
+ TSQueryMatch match;
+ while (ts_query_cursor_next_match(pCursor, &match))
+ {
+ for (uint16_t i = 0; i < match.capture_count; i++)
+ {
+ TSQueryCapture capture = match.captures[i];
+ TSNode node = capture.node;
+
+ uint32_t captureNameLen = 0;
+ const char* captureName = ts_query_capture_name_for_id(
+ pQuery, capture.index, &captureNameLen);
+
+ std::string sName(captureName, captureNameLen);
+ int colorIndex = m_colorMap.MapCapture(sName);
+
+ uint32_t nodeStartByte = ts_node_start_byte(node);
+ uint32_t nodeEndByte = ts_node_end_byte(node);
+ TSPoint startPoint = ts_node_start_point(node);
+ TSPoint endPoint = ts_node_end_point(node);
+
+ HighlightEntry entry;
+ entry.startRow = startPoint.row;
+ entry.startCol = startPoint.column;
+ entry.endRow = endPoint.row;
+ entry.endCol = endPoint.column;
+ entry.startByte = nodeStartByte;
+ entry.endByte = nodeEndByte;
+ entry.colorIndex = colorIndex;
+ entry.priority = MakeCapturePriority(sName, nodeStartByte, nodeEndByte);
+ entry.order = NextBlockOrder();
+
+ highlights.push_back(entry);
+
+ // Record this node's highlight for locals resolution
+ uint64_t nodeKey = (static_cast(nodeStartByte) << 32) |
+ static_cast(nodeEndByte);
+ nodeHighlightMap[nodeKey] = colorIndex;
+ }
+ }
+
+ ts_query_cursor_delete(pCursor);
+
+ // --- Locals resolution ---
+ // Now that we know the highlight for each node, assign highlights to
+ // definitions and resolve references.
+ if (!m_localScopes.empty())
+ {
+ // Pass 1: Assign highlights to definitions based on their node's highlight
+ for (auto& scope : m_localScopes)
+ {
+ for (auto& def : scope.defs)
+ {
+ uint64_t defKey = (static_cast(def.startByte) << 32) |
+ static_cast(def.endByte);
+ auto it = nodeHighlightMap.find(defKey);
+ if (it != nodeHighlightMap.end())
+ def.highlight = it->second;
+ }
+ }
+
+ // Pass 2: Resolve references — find matching definition in enclosing scopes
+ for (const auto& ref : m_pendingRefs)
+ {
+ int resolvedHighlight = -1;
+
+ // Search scopes from innermost to outermost
+ // Find all scopes that contain this reference
+ std::vector enclosingScopes;
+ for (const auto& scope : m_localScopes)
+ {
+ if (ref.startByte >= scope.startByte && ref.endByte <= scope.endByte)
+ enclosingScopes.push_back(&scope);
+ }
+
+ // Sort by size (smallest = innermost first)
+ std::sort(enclosingScopes.begin(), enclosingScopes.end(),
+ [](const LocalScope* a, const LocalScope* b)
+ {
+ return (a->endByte - a->startByte) < (b->endByte - b->startByte);
+ });
+
+ // Search for a matching definition
+ for (const auto* pScope : enclosingScopes)
+ {
+ for (const auto& def : pScope->defs)
+ {
+ // Match by name and ensure the definition appears before the reference
+ if (def.name == ref.name && def.startByte <= ref.startByte && def.highlight >= 0)
+ {
+ resolvedHighlight = def.highlight;
+ break;
+ }
+ }
+ if (resolvedHighlight >= 0)
+ break;
+ // If this scope doesn't inherit, stop searching
+ if (!pScope->inherits)
+ break;
+ }
+
+ if (resolvedHighlight >= 0)
+ {
+ uint64_t refKey = (static_cast(ref.startByte) << 32) |
+ static_cast(ref.endByte);
+ m_localRefHighlights[refKey] = resolvedHighlight;
+ }
+ }
+
+ // Pass 3: Apply resolved reference highlights to the highlight entries
+ for (auto& h : highlights)
+ {
+ uint64_t key = (static_cast(h.startByte) << 32) |
+ static_cast(h.endByte);
+ auto it = m_localRefHighlights.find(key);
+ if (it != m_localRefHighlights.end())
+ h.colorIndex = it->second;
+ }
+ }
+
+ // Sort by start position (row, then column)
+ std::sort(highlights.begin(), highlights.end(),
+ [](const HighlightEntry& a, const HighlightEntry& b)
+ {
+ if (a.startRow != b.startRow)
+ return a.startRow < b.startRow;
+ return a.startCol < b.startCol;
+ });
+
+ // Build per-line block arrays
+ m_lineBlocks.clear();
+ m_lineBlocks.resize(m_nLineCount);
+
+ for (const auto& h : highlights)
+ {
+ // Handle tokens that span multiple lines (e.g. multi-line strings/comments)
+ for (uint32_t row = h.startRow; row <= h.endRow && row < static_cast(m_nLineCount); row++)
+ {
+ uint32_t byteCol = (row == h.startRow) ? h.startCol : 0;
+
+ // Convert UTF-8 byte offset to UTF-16 character position
+ int charPos = Utf8ByteOffsetToCharPos(static_cast(row), byteCol);
+
+ TreeSitterLineBlock block;
+ block.nCharPos = charPos;
+ block.nColorIndex = h.colorIndex;
+ block.nPriority = h.priority;
+ block.nOrder = h.order;
+ m_lineBlocks[row].push_back(block);
+ }
+
+ // Emit a block at the end of the capture to restore normal color.
+ // This prevents the token's color from "bleeding" past its end.
+ if (h.endRow < static_cast(m_nLineCount))
+ {
+ uint32_t endByteCol = h.endCol;
+
+ // Convert UTF-8 byte offset at token end to UTF-16 character position
+ int endCharPos = Utf8ByteOffsetToCharPos(static_cast(h.endRow), endByteCol);
+
+ TreeSitterLineBlock endBlock;
+ endBlock.nCharPos = endCharPos;
+ endBlock.nColorIndex = COLORINDEX_NORMALTEXT;
+ endBlock.nPriority = INT_MIN;
+ endBlock.nOrder = h.order;
+ m_lineBlocks[h.endRow].push_back(endBlock);
+ }
+ }
+}
+
+/**
+ * @brief Run the injection query to find embedded language regions.
+ *
+ * Processes injections.scm captures:
+ * @injection.content - The node whose content should be parsed as another language
+ * @injection.language - The node whose text specifies the language name
+ * (#set! injection.language "xxx") - Hard-coded language name
+ *
+ * For each injection region, this spawns a sub-parser with the appropriate
+ * language grammar, runs highlights on the injected content, and merges
+ * the results into the main highlight blocks.
+ */
+void CTreeSitterParser::RunInjectionQuery()
+{
+ if (!m_pTree || !m_pLang || !m_pLang->GetInjectionQuery())
+ return;
+
+ const TSQuery* pQuery = m_pLang->GetInjectionQuery();
+ TSNode rootNode = ts_tree_root_node(m_pTree);
+
+ TSQueryCursor* pCursor = ts_query_cursor_new();
+ if (!pCursor)
+ return;
+
+ ts_query_cursor_exec(pCursor, pQuery, rootNode);
+
+ // Collect injection regions
+ struct InjectionRegion
+ {
+ std::string language; // Target language name
+ uint32_t contentStart; // Byte offset in document
+ uint32_t contentEnd;
+ TSPoint startPoint;
+ TSPoint endPoint;
+ };
+ std::vector injections;
+
+ TSQueryMatch match;
+ while (ts_query_cursor_next_match(pCursor, &match))
+ {
+ std::string language;
+ TSNode contentNode = {};
+ bool hasContent = false;
+
+ // Check for #set! injection.language predicate first
+ language = GetSetProperty(pQuery, match.pattern_index, "injection.language");
+
+ // Also check for #set! injection.self
+ if (language.empty())
+ {
+ std::string selfVal = GetSetProperty(pQuery, match.pattern_index, "injection.self");
+ if (!selfVal.empty())
+ {
+ // Use the current language
+ const std::wstring& wname = m_pLang->GetName();
+ for (wchar_t ch : wname)
+ language += static_cast(ch); // ASCII only
+ }
+ }
+
+ for (uint16_t i = 0; i < match.capture_count; i++)
+ {
+ TSQueryCapture capture = match.captures[i];
+
+ uint32_t nameLen = 0;
+ const char* captureName = ts_query_capture_name_for_id(
+ pQuery, capture.index, &nameLen);
+ std::string sCapture(captureName, nameLen);
+
+ if (sCapture == "injection.content")
+ {
+ contentNode = capture.node;
+ hasContent = true;
+ }
+ else if (sCapture == "injection.language" && language.empty())
+ {
+ // The captured node's text is the language name
+ uint32_t start = ts_node_start_byte(capture.node);
+ uint32_t end = ts_node_end_byte(capture.node);
+ if (start < m_documentText.size() && end <= m_documentText.size())
+ language = m_documentText.substr(start, end - start);
+ }
+ }
+
+ if (hasContent && !language.empty())
+ {
+ InjectionRegion region;
+ region.language = language;
+ region.contentStart = ts_node_start_byte(contentNode);
+ region.contentEnd = ts_node_end_byte(contentNode);
+ region.startPoint = ts_node_start_point(contentNode);
+ region.endPoint = ts_node_end_point(contentNode);
+ injections.push_back(region);
+ }
+ }
+
+ ts_query_cursor_delete(pCursor);
+
+ if (injections.empty())
+ return;
+
+ // Process each injection: look up the language, parse the content, run highlights
+ TreeSitterRegistry& registry = TreeSitterRegistry::Instance();
+
+ for (const auto& inj : injections)
+ {
+ // Convert language name to wstring for registry lookup
+ std::wstring wLangName;
+ for (char ch : inj.language)
+ wLangName += static_cast(ch);
+
+ // Try to find the language — look it up by name directly in the registry's
+ // available languages (we need a way to get language by name, not just by ext).
+ // For now, try common mappings. The language name from injections.scm
+ // is typically the tree-sitter language name (e.g. "javascript", "css").
+ // We can look it up as an extension since many languages use their name
+ // as the extension.
+ const CTreeSitterLanguage* pInjLang = registry.GetLanguageForName(wLangName);
+ if (!pInjLang || !pInjLang->GetHighlightQuery())
+ continue;
+
+ // Extract the injection content
+ if (inj.contentStart >= m_documentText.size() || inj.contentEnd > m_documentText.size())
+ continue;
+
+ std::string injContent = m_documentText.substr(inj.contentStart, inj.contentEnd - inj.contentStart);
+ if (injContent.empty())
+ continue;
+
+ // Create a temporary parser for the injected content
+ TSParser* pInjParser = ts_parser_new();
+ if (!pInjParser)
+ continue;
+
+ ts_parser_set_language(pInjParser, pInjLang->GetLanguage());
+
+ TSTree* pInjTree = ts_parser_parse_string(
+ pInjParser, nullptr,
+ injContent.c_str(),
+ static_cast(injContent.size()));
+
+ if (pInjTree)
+ {
+ // Run highlight query on the injected tree
+ const TSQuery* pInjQuery = pInjLang->GetHighlightQuery();
+ TSNode injRoot = ts_tree_root_node(pInjTree);
+ TSQueryCursor* pInjCursor = ts_query_cursor_new();
+
+ if (pInjCursor)
+ {
+ ts_query_cursor_exec(pInjCursor, pInjQuery, injRoot);
+
+ TSQueryMatch injMatch;
+ while (ts_query_cursor_next_match(pInjCursor, &injMatch))
+ {
+ for (uint16_t ci = 0; ci < injMatch.capture_count; ci++)
+ {
+ TSQueryCapture cap = injMatch.captures[ci];
+ TSNode capNode = cap.node;
+
+ uint32_t capNameLen = 0;
+ const char* capName = ts_query_capture_name_for_id(
+ pInjQuery, cap.index, &capNameLen);
+ std::string sCapName(capName, capNameLen);
+ int colorIndex = m_colorMap.MapCapture(sCapName);
+
+ // Map the injected node's position back to the parent document.
+ // The injection content starts at inj.startPoint in the parent doc.
+ TSPoint capStart = ts_node_start_point(capNode);
+ TSPoint capEnd = ts_node_end_point(capNode);
+
+ // Translate rows/columns to parent document coordinates.
+ // Since the injection content is a contiguous substring of
+ // m_documentText, continuation line columns in the sub-parse
+ // are already correct relative to the parent document lines.
+ // NOTE: This does NOT handle injection.combined (multiple
+ // disjoint ranges concatenated into one parse), but we don't
+ // support that feature currently.
+ uint32_t parentStartRow = inj.startPoint.row + capStart.row;
+ uint32_t parentEndRow = inj.startPoint.row + capEnd.row;
+ uint32_t parentStartCol = (capStart.row == 0)
+ ? inj.startPoint.column + capStart.column
+ : capStart.column;
+
+ // Add to parent's line blocks
+ for (uint32_t row = parentStartRow;
+ row <= parentEndRow && row < static_cast(m_nLineCount);
+ row++)
+ {
+ uint32_t byteCol = (row == parentStartRow) ? parentStartCol : 0;
+ int charPos = Utf8ByteOffsetToCharPos(static_cast(row), byteCol);
+
+ TreeSitterLineBlock block;
+ block.nCharPos = charPos;
+ block.nColorIndex = colorIndex;
+ block.nPriority = MakeCapturePriority(sCapName,
+ ts_node_start_byte(capNode), ts_node_end_byte(capNode));
+ block.nOrder = NextBlockOrder();
+ m_lineBlocks[row].push_back(block);
+ }
+ }
+ }
+
+ ts_query_cursor_delete(pInjCursor);
+ }
+
+ ts_tree_delete(pInjTree);
+ }
+
+ ts_parser_delete(pInjParser);
+ }
+}
+
+void CTreeSitterParser::BuildLineCache(int nLineCount)
+{
+ // Sort each line's blocks by character position and deterministic precedence.
+ // For identical start positions, keep the most specific / shortest capture.
+ for (int i = 0; i < nLineCount && i < static_cast(m_lineBlocks.size()); i++)
+ {
+ auto& blocks = m_lineBlocks[i];
+ std::sort(blocks.begin(), blocks.end(),
+ [](const TreeSitterLineBlock& a, const TreeSitterLineBlock& b)
+ {
+ if (a.nCharPos != b.nCharPos)
+ return a.nCharPos < b.nCharPos;
+ if (a.nPriority != b.nPriority)
+ return a.nPriority < b.nPriority;
+ return a.nOrder < b.nOrder;
+ });
+
+ // Remove consecutive entries at the same position (keep the last one,
+ // which is typically the more specific/inner match)
+ if (blocks.size() > 1)
+ {
+ std::vector deduped;
+ deduped.reserve(blocks.size());
+ for (size_t j = 0; j < blocks.size(); j++)
+ {
+ if (j + 1 < blocks.size() && blocks[j].nCharPos == blocks[j + 1].nCharPos)
+ continue; // skip, keep the later one
+ deduped.push_back(blocks[j]);
+ }
+ blocks = std::move(deduped);
+ }
+ }
+}
+
+void CTreeSitterParser::GetLineBlocks(int nLineIndex,
+ CrystalLineParser::TEXTBLOCK* pBuf,
+ int& nActualItems,
+ int nMaxBlocks) const
+{
+ // Cookie-only mode (pBuf == nullptr): caller just wants the cookie.
+ // Tree-sitter doesn't use cookies, so nothing to do.
+ if (!pBuf)
+ return;
+
+ // Fix #4: bounds check against cached line count
+ if (nLineIndex < 0 || nLineIndex >= static_cast(m_lineBlocks.size()))
+ return;
+
+ const auto& blocks = m_lineBlocks[nLineIndex];
+
+ // The caller (GetTextBlocks) pre-inserts a NORMALTEXT block at position 0
+ // and sets nActualItems = 1 before calling ParseLine. We follow the same
+ // convention as existing parsers: append our blocks starting at the current
+ // nActualItems, but overwrite the caller's default block if we have our own
+ // block at position 0.
+
+ for (const auto& block : blocks)
+ {
+ // If the caller's last block is at the same position, overwrite it
+ // (same logic as DEFINE_BLOCK macro in crystallineparser.h)
+ if (nActualItems > 0 && pBuf[nActualItems - 1].m_nCharPos == block.nCharPos)
+ {
+ pBuf[nActualItems - 1].m_nColorIndex = block.nColorIndex;
+ pBuf[nActualItems - 1].m_nBgColorIndex = COLORINDEX_BKGND;
+ continue;
+ }
+
+ // Skip if same color as previous block (no visible change)
+ if (nActualItems > 0 && pBuf[nActualItems - 1].m_nColorIndex == block.nColorIndex)
+ continue;
+
+ // Bounds check: stop if we'd overflow the buffer
+ if (nMaxBlocks > 0 && nActualItems >= nMaxBlocks)
+ break;
+
+ pBuf[nActualItems].m_nCharPos = block.nCharPos;
+ pBuf[nActualItems].m_nColorIndex = block.nColorIndex;
+ pBuf[nActualItems].m_nBgColorIndex = COLORINDEX_BKGND;
+ nActualItems++;
+ }
+}
+
+
+// ============================================================================
+// TreeSitterRegistry
+// ============================================================================
+
+// Default extension -> language mappings.
+// Users can extend this via configuration.
+static const struct
+{
+ const wchar_t* ext;
+ const wchar_t* language;
+} s_defaultExtMap[] =
+{
+ // F# (primary target)
+ { L"fs", L"fsharp" },
+ { L"fsx", L"fsharp" },
+ { L"fsi", L"fsharp_signature" },
+
+ // Common languages
+ { L"c", L"c" },
+ { L"h", L"c" },
+ { L"cpp", L"cpp" },
+ { L"cxx", L"cpp" },
+ { L"cc", L"cpp" },
+ { L"hpp", L"cpp" },
+ { L"hxx", L"cpp" },
+ { L"cs", L"c-sharp" },
+ { L"py", L"python" },
+ { L"js", L"javascript" },
+ { L"ts", L"typescript" },
+ { L"tsx", L"tsx" },
+ { L"jsx", L"javascript" },
+ { L"java", L"java" },
+ { L"go", L"go" },
+ { L"rs", L"rust" },
+ { L"rb", L"ruby" },
+ { L"lua", L"lua" },
+ { L"sh", L"bash" },
+ { L"bash", L"bash" },
+ { L"json", L"json" },
+ { L"yaml", L"yaml" },
+ { L"yml", L"yaml" },
+ { L"xml", L"xml" },
+ { L"html", L"html" },
+ { L"htm", L"html" },
+ { L"css", L"css" },
+ { L"sql", L"sql" },
+ { L"ps1", L"powershell" },
+ { L"psm1", L"powershell" },
+ { L"php", L"php" },
+ { L"pl", L"perl" },
+ { L"swift", L"swift" },
+ { L"kt", L"kotlin" },
+ { L"scala", L"scala" },
+ { L"hs", L"haskell" },
+ { L"ml", L"ocaml" },
+ { L"mli", L"ocaml" },
+ { L"ex", L"elixir" },
+ { L"exs", L"elixir" },
+ { L"zig", L"zig" },
+ { L"nim", L"nim" },
+ { L"toml", L"toml" },
+ { L"md", L"markdown" },
+};
+
+TreeSitterRegistry& TreeSitterRegistry::Instance()
+{
+ static TreeSitterRegistry instance;
+ return instance;
+}
+
+void TreeSitterRegistry::Initialize(const std::wstring& sGrammarDir)
+{
+ if (m_bInitialized)
+ return;
+
+ // Determine grammar directory
+ if (sGrammarDir.empty())
+ {
+ // Use /TreeSitterGrammars/
+ wchar_t szExePath[MAX_PATH] = {};
+ GetModuleFileNameW(nullptr, szExePath, MAX_PATH);
+ std::wstring sExeDir(szExePath);
+ auto pos = sExeDir.rfind(L'\\');
+ if (pos != std::wstring::npos)
+ sExeDir = sExeDir.substr(0, pos);
+ m_sGrammarDir = sExeDir + L"\\TreeSitterGrammars";
+ }
+ else
+ {
+ m_sGrammarDir = sGrammarDir;
+ }
+
+ // Register default extension mappings
+ for (const auto& mapping : s_defaultExtMap)
+ {
+ m_extMap[mapping.ext] = mapping.language;
+ }
+
+ // Check if the grammar directory exists
+ DWORD dwAttrib = GetFileAttributesW(m_sGrammarDir.c_str());
+ if (dwAttrib == INVALID_FILE_ATTRIBUTES || !(dwAttrib & FILE_ATTRIBUTE_DIRECTORY))
+ {
+ // Directory doesn't exist - that's OK, just means no tree-sitter support
+ m_bInitialized = true;
+ return;
+ }
+
+ // Fix #3: Only discover available grammar DLLs, don't load them yet.
+ // Grammars are loaded lazily on first request in GetLanguageForExt().
+ WIN32_FIND_DATAW findData;
+ std::wstring searchPattern = m_sGrammarDir + L"\\tree-sitter-*.dll";
+ HANDLE hFind = FindFirstFileW(searchPattern.c_str(), &findData);
+ if (hFind != INVALID_HANDLE_VALUE)
+ {
+ do
+ {
+ // Extract language name from "tree-sitter-.dll"
+ std::wstring sFileName(findData.cFileName);
+ const std::wstring prefix = L"tree-sitter-";
+ const std::wstring suffix = L".dll";
+ if (sFileName.size() > prefix.size() + suffix.size())
+ {
+ std::wstring sLangName = sFileName.substr(
+ prefix.size(),
+ sFileName.size() - prefix.size() - suffix.size());
+
+ // Just record that this language is available
+ m_availableLanguages.insert(sLangName);
+ }
+ } while (FindNextFileW(hFind, &findData));
+ FindClose(hFind);
+ }
+
+ m_bInitialized = true;
+}
+
+const CTreeSitterLanguage* TreeSitterRegistry::GetLanguageForExt(const std::wstring& sExt)
+{
+ // Look up extension -> language name
+ auto itExt = m_extMap.find(sExt);
+ if (itExt == m_extMap.end())
+ return nullptr;
+
+ const std::wstring& sLangName = itExt->second;
+
+ // Check if already loaded
+ auto itLang = m_languages.find(sLangName);
+ if (itLang != m_languages.end())
+ {
+ // Already loaded - return if it has a valid highlight query
+ if (!itLang->second->GetHighlightQuery())
+ return nullptr;
+ return itLang->second.get();
+ }
+
+ // Check if this language previously failed to load
+ if (m_failedLanguages.count(sLangName) > 0)
+ return nullptr;
+
+ // Check if a DLL is available for this language
+ if (m_availableLanguages.count(sLangName) == 0)
+ return nullptr;
+
+ // Lazy load: load the grammar DLL now
+ auto pLang = std::make_unique();
+ if (!pLang->Load(m_sGrammarDir, sLangName))
+ {
+ // Record failure so we don't retry
+ m_failedLanguages.insert(sLangName);
+ return nullptr;
+ }
+
+ // Check if the loaded grammar has a highlight query
+ if (!pLang->GetHighlightQuery())
+ {
+ m_failedLanguages.insert(sLangName);
+ return nullptr;
+ }
+
+ const CTreeSitterLanguage* pResult = pLang.get();
+ m_languages[sLangName] = std::move(pLang);
+ return pResult;
+}
+
+const CTreeSitterLanguage* TreeSitterRegistry::GetLanguageForName(const std::wstring& sLangName)
+{
+ // Check if already loaded
+ auto itLang = m_languages.find(sLangName);
+ if (itLang != m_languages.end())
+ {
+ if (!itLang->second->GetHighlightQuery())
+ return nullptr;
+ return itLang->second.get();
+ }
+
+ // Check if this language previously failed to load
+ if (m_failedLanguages.count(sLangName) > 0)
+ return nullptr;
+
+ // Check if a DLL is available for this language
+ if (m_availableLanguages.count(sLangName) == 0)
+ return nullptr;
+
+ // Lazy load: load the grammar DLL now
+ auto pLang = std::make_unique();
+ if (!pLang->Load(m_sGrammarDir, sLangName))
+ {
+ m_failedLanguages.insert(sLangName);
+ return nullptr;
+ }
+
+ if (!pLang->GetHighlightQuery())
+ {
+ m_failedLanguages.insert(sLangName);
+ return nullptr;
+ }
+
+ const CTreeSitterLanguage* pResult = pLang.get();
+ m_languages[sLangName] = std::move(pLang);
+ return pResult;
+}
+
+void TreeSitterRegistry::RegisterExtension(const std::wstring& sExt, const std::wstring& sLanguage)
+{
+ m_extMap[sExt] = sLanguage;
+}
+
+// ============================================================================
+// CTreeSitterParser - Additional Methods
+// ============================================================================
+
+/**
+ * @brief Convenience: parse document from a text buffer.
+ */
+void CTreeSitterParser::ParseFromBuffer(CCrystalTextBuffer* pBuffer)
+{
+ if (!pBuffer)
+ return;
+
+ const int nLineCount = pBuffer->GetLineCount();
+ if (nLineCount == 0)
+ return;
+
+ // Collect line pointers and lengths
+ std::vector ppszLines(nLineCount);
+ std::vector pnLineLengths(nLineCount);
+
+ for (int i = 0; i < nLineCount; i++)
+ {
+ ppszLines[i] = pBuffer->GetLineChars(i);
+ pnLineLengths[i] = pBuffer->GetLineLength(i);
+ }
+
+ ParseDocument(ppszLines.data(), pnLineLengths.data(), nLineCount);
+}
+
+/**
+ * @brief Get the node type name at a specific position.
+ *
+ * This is used for comment filtering and other syntax-aware operations.
+ */
+std::wstring CTreeSitterParser::GetNodeTypeAt(int nLineIndex, int nCharPos) const
+{
+ if (!m_pTree || nLineIndex < 0 || nLineIndex >= m_nLineCount)
+ return _T("");
+
+ // Convert line + character position to byte offset
+ if (nLineIndex >= static_cast(m_lineUtf8.size()))
+ return _T("");
+
+ // Calculate byte offset
+ uint32_t byteOffset = 0;
+
+ // Add bytes from all previous lines
+ for (int i = 0; i < nLineIndex; i++)
+ {
+ if (i >= static_cast(m_lineUtf8.size()))
+ return _T("");
+ byteOffset += static_cast(m_lineUtf8[i].size());
+ byteOffset++; // newline character
+ }
+
+ // Add bytes from current line up to nCharPos
+ const std::string& lineUtf8 = m_lineUtf8[nLineIndex];
+ int charCount = 0;
+ for (size_t i = 0; i < lineUtf8.size() && charCount < nCharPos; )
+ {
+ unsigned char byte = lineUtf8[i];
+
+ // UTF-8 character length determination
+ int charLen = 1;
+ if ((byte & 0x80) == 0x00)
+ charLen = 1; // ASCII
+ else if ((byte & 0xE0) == 0xC0)
+ charLen = 2;
+ else if ((byte & 0xF0) == 0xE0)
+ charLen = 3;
+ else if ((byte & 0xF8) == 0xF0)
+ charLen = 4;
+
+ i += charLen;
+ byteOffset += charLen;
+ charCount++;
+ }
+
+ // Get the tree-sitter node at this byte position
+ TSNode rootNode = ts_tree_root_node(m_pTree);
+ TSNode node = ts_node_descendant_for_byte_range(rootNode, byteOffset, byteOffset);
+
+ if (ts_node_is_null(node))
+ return _T("");
+
+ // Get node type
+ const char* pszType = ts_node_type(node);
+ if (!pszType)
+ return _T("");
+
+ // Convert from UTF-8 to wstring/String
+#ifdef UNICODE
+ int nLen = MultiByteToWideChar(CP_UTF8, 0, pszType, -1, nullptr, 0);
+ if (nLen == 0)
+ return _T("");
+
+ std::vector buffer(nLen);
+ MultiByteToWideChar(CP_UTF8, 0, pszType, -1, buffer.data(), nLen);
+ return std::wstring(buffer.data());
+#else
+ return String(pszType);
+#endif
+}
+
+bool CTreeSitterParser::IsCommentPosition(int nLineIndex, int nCharPos) const
+{
+ const std::wstring sNodeType = GetNodeTypeAt(nLineIndex, nCharPos);
+ return sNodeType.find(L"comment") != std::wstring::npos;
+}
+
+bool CTreeSitterParser::TryGetDefinitionByteRangeAt(uint32_t byteOffset, uint32_t& defStartByte, uint32_t& defEndByte) const
+{
+ for (const auto& scope : m_localScopes)
+ {
+ for (const auto& def : scope.defs)
+ {
+ if (byteOffset >= def.startByte && byteOffset < def.endByte)
+ {
+ defStartByte = def.startByte;
+ defEndByte = def.endByte;
+ return true;
+ }
+ }
+ }
+
+ for (const auto& ref : m_pendingRefs)
+ {
+ if (byteOffset < ref.startByte || byteOffset >= ref.endByte)
+ continue;
+
+ std::vector enclosingScopes;
+ for (const auto& scope : m_localScopes)
+ {
+ if (ref.startByte >= scope.startByte && ref.endByte <= scope.endByte)
+ enclosingScopes.push_back(&scope);
+ }
+
+ std::sort(enclosingScopes.begin(), enclosingScopes.end(),
+ [](const LocalScope* a, const LocalScope* b)
+ {
+ return (a->endByte - a->startByte) < (b->endByte - b->startByte);
+ });
+
+ for (const auto* pScope : enclosingScopes)
+ {
+ for (const auto& def : pScope->defs)
+ {
+ if (def.name == ref.name && def.startByte <= ref.startByte)
+ {
+ defStartByte = def.startByte;
+ defEndByte = def.endByte;
+ return true;
+ }
+ }
+ if (!pScope->inherits)
+ break;
+ }
+
+ return false;
+ }
+
+ for (const auto& def : m_tagDefs)
+ {
+ if (byteOffset >= def.startByte && byteOffset < def.endByte)
+ {
+ defStartByte = def.startByte;
+ defEndByte = def.endByte;
+ return true;
+ }
+ }
+
+ for (const auto& ref : m_tagRefs)
+ {
+ if (byteOffset < ref.startByte || byteOffset >= ref.endByte)
+ continue;
+
+ const TagDef* pBestDef = nullptr;
+ for (const auto& def : m_tagDefs)
+ {
+ if (def.name != ref.name)
+ continue;
+ if (!pBestDef || def.startByte < pBestDef->startByte)
+ pBestDef = &def;
+ }
+
+ if (pBestDef)
+ {
+ defStartByte = pBestDef->startByte;
+ defEndByte = pBestDef->endByte;
+ return true;
+ }
+
+ return false;
+ }
+
+ return false;
+}
+
+bool CTreeSitterParser::ByteOffsetToLineChar(uint32_t byteOffset, int& nLineIndex, int& nCharPos) const
+{
+ uint32_t currentOffset = 0;
+ for (int i = 0; i < static_cast(m_lineUtf8.size()); ++i)
+ {
+ const std::string& utf8Line = m_lineUtf8[i];
+ const uint32_t lineStart = currentOffset;
+ const uint32_t lineEnd = lineStart + static_cast(utf8Line.size());
+ if (byteOffset <= lineEnd)
+ {
+ nLineIndex = i;
+ nCharPos = Utf8ByteOffsetToCharPos(i, byteOffset - lineStart);
+ return true;
+ }
+ currentOffset = lineEnd + 1;
+ }
+
+ if (!m_lineUtf8.empty() && byteOffset == currentOffset - 1)
+ {
+ nLineIndex = static_cast(m_lineUtf8.size()) - 1;
+ nCharPos = static_cast(m_lineUtf8.back().size());
+ return true;
+ }
+
+ return false;
+}
+
+bool CTreeSitterParser::TryGetTagDefinitionByNameAt(int nLineIndex, int nCharPos, uint32_t& defStartByte, uint32_t& defEndByte) const
+{
+ if (!m_pBuffer || nLineIndex < 0 || nLineIndex >= m_pBuffer->GetLineCount())
+ return false;
+
+ const tchar_t* pszLine = m_pBuffer->GetLineChars(nLineIndex);
+ const int nLineLength = m_pBuffer->GetLineLength(nLineIndex);
+ if (!pszLine || nLineLength <= 0)
+ return false;
+
+ auto IsIdentChar = [](tchar_t ch)
+ {
+ return _istalnum(ch) || ch == _T('_') || ch == _T('`') || ch == _T('\'');
+ };
+
+ int nIndex = nCharPos;
+ if (nIndex >= nLineLength)
+ nIndex = nLineLength - 1;
+ if (nIndex < 0)
+ return false;
+
+ if (!IsIdentChar(pszLine[nIndex]) && nIndex > 0 && IsIdentChar(pszLine[nIndex - 1]))
+ --nIndex;
+ if (!IsIdentChar(pszLine[nIndex]))
+ return false;
+
+ int nStart = nIndex;
+ while (nStart > 0 && IsIdentChar(pszLine[nStart - 1]))
+ --nStart;
+
+ int nEnd = nIndex + 1;
+ while (nEnd < nLineLength && IsIdentChar(pszLine[nEnd]))
+ ++nEnd;
+
+ std::wstring symbolW(pszLine + nStart, nEnd - nStart);
+#ifdef _UNICODE
+ int nUtf8Len = WideCharToMultiByte(CP_UTF8, 0, symbolW.c_str(), static_cast(symbolW.size()), nullptr, 0, nullptr, nullptr);
+ if (nUtf8Len <= 0)
+ return false;
+ std::string symbol(nUtf8Len, '\0');
+ WideCharToMultiByte(CP_UTF8, 0, symbolW.c_str(), static_cast(symbolW.size()), symbol.data(), nUtf8Len, nullptr, nullptr);
+#else
+ std::string symbol(symbolW.begin(), symbolW.end());
+#endif
+
+ const TagDef* pBestDef = nullptr;
+ for (const auto& def : m_tagDefs)
+ {
+ if (def.name != symbol)
+ continue;
+ if (!pBestDef || def.startByte < pBestDef->startByte)
+ pBestDef = &def;
+ }
+
+ if (!pBestDef)
+ return false;
+
+ defStartByte = pBestDef->startByte;
+ defEndByte = pBestDef->endByte;
+ return true;
+}
+
+bool CTreeSitterParser::FindDefinition(int nLineIndex, int nCharPos, int& nDefLine, int& nDefChar) const
+{
+ if (!m_pTree || nLineIndex < 0 || nLineIndex >= m_nLineCount)
+ return false;
+
+ uint32_t byteOffset = 0;
+ for (int i = 0; i < nLineIndex; ++i)
+ byteOffset += static_cast(m_lineUtf8[i].size()) + 1;
+
+ const std::string& lineUtf8 = m_lineUtf8[nLineIndex];
+ int charCount = 0;
+ for (size_t i = 0; i < lineUtf8.size() && charCount < nCharPos; )
+ {
+ const unsigned char byte = static_cast(lineUtf8[i]);
+ int charLen = 1;
+ if ((byte & 0x80) == 0x00)
+ charLen = 1;
+ else if ((byte & 0xE0) == 0xC0)
+ charLen = 2;
+ else if ((byte & 0xF0) == 0xE0)
+ charLen = 3;
+ else if ((byte & 0xF8) == 0xF0)
+ charLen = 4;
+ i += charLen;
+ byteOffset += charLen;
+ charCount++;
+ }
+
+ uint32_t defStartByte = 0;
+ uint32_t defEndByte = 0;
+ const bool foundAtPosition = TryGetDefinitionByteRangeAt(byteOffset, defStartByte, defEndByte);
+
+ uint32_t tagDefStartByte = 0;
+ uint32_t tagDefEndByte = 0;
+ if (!foundAtPosition)
+ {
+ if (!TryGetTagDefinitionByNameAt(
+ nLineIndex, nCharPos, tagDefStartByte, tagDefEndByte))
+ return false;
+
+ defStartByte = tagDefStartByte;
+ defEndByte = tagDefEndByte;
+ }
+ else
+ {
+ int posDefLine = 0;
+ int posDefChar = 0;
+ const bool positionResolved =
+ ByteOffsetToLineChar(defStartByte, posDefLine, posDefChar);
+
+ // If the position-based lookup resolves back to the clicked symbol,
+ // prefer the tag-definition target when it points somewhere else.
+ // This avoids getting stuck on the clicked type reference after the
+ // context-menu click has moved the caret onto the symbol.
+ if (!positionResolved || (posDefLine == nLineIndex && posDefChar == nCharPos))
+ {
+ if (TryGetTagDefinitionByNameAt(
+ nLineIndex, nCharPos, tagDefStartByte, tagDefEndByte) &&
+ (!positionResolved || tagDefStartByte != defStartByte))
+ {
+ defStartByte = tagDefStartByte;
+ defEndByte = tagDefEndByte;
+ }
+ }
+ }
+
+ return ByteOffsetToLineChar(defStartByte, nDefLine, nDefChar);
+}
diff --git a/Externals/crystaledit/editlib/TreeSitterParser.h b/Externals/crystaledit/editlib/TreeSitterParser.h
new file mode 100644
index 00000000000..af3dafb4574
--- /dev/null
+++ b/Externals/crystaledit/editlib/TreeSitterParser.h
@@ -0,0 +1,441 @@
+////////////////////////////////////////////////////////////////////////////
+// File: TreeSitterParser.h
+// Version: 1.0.1
+// Created: 2026-03-26
+//
+// Tree-sitter based syntax highlighting bridge for CrystalEdit.
+//
+// Loads tree-sitter grammar DLLs and highlight query (.scm) files
+// at runtime. Parses full documents into ASTs and extracts per-line
+// color blocks mapped to WinMerge's COLORINDEX values.
+//
+// Grammar DLLs are loaded from a "TreeSitterGrammars" directory
+// alongside the WinMerge executable. Each DLL exports a function
+// named tree_sitter_() returning const TSLanguage*.
+//
+// Highlight queries are loaded from .scm files in the same directory.
+//
+// SPDX-License-Identifier: GPL-2.0-or-later
+////////////////////////////////////////////////////////////////////////////
+
+#pragma once
+
+#include "SyntaxColors.h"
+#include "parsers/crystallineparser.h"
+
+#include
+#include
+#include
+#include
+#include
+
+// Forward declarations for tree-sitter C API types.
+// The actual tree_sitter/api.h header is included only in the .cpp file.
+typedef struct TSParser TSParser;
+typedef struct TSTree TSTree;
+typedef struct TSQuery TSQuery;
+typedef struct TSLanguage TSLanguage;
+
+class CCrystalTextBuffer;
+
+/**
+ * @brief Manages a tree-sitter grammar loaded from a DLL.
+ *
+ * Each instance holds a loaded grammar DLL, the TSLanguage pointer,
+ * and the compiled highlight, locals, tags, and injection queries for that language.
+ */
+class CTreeSitterLanguage
+{
+public:
+ CTreeSitterLanguage();
+ ~CTreeSitterLanguage();
+
+ CTreeSitterLanguage(const CTreeSitterLanguage&) = delete;
+ CTreeSitterLanguage& operator=(const CTreeSitterLanguage&) = delete;
+
+ /**
+ * @brief Load a grammar DLL and its query files.
+ * @param sGrammarDir Directory containing grammar DLLs and .scm files.
+ * @param sLanguage Language name (e.g. "fsharp", "python", "cpp").
+ * @return true if both the DLL and highlight query loaded successfully.
+ *
+ * Also attempts to load locals.scm, tags.scm, and injections.scm if present.
+ * Failure to load optional queries is not fatal.
+ */
+ bool Load(const std::wstring& sGrammarDir, const std::wstring& sLanguage);
+
+ /** @brief Get the loaded TSLanguage pointer (or nullptr). */
+ const TSLanguage* GetLanguage() const { return m_pLanguage; }
+
+ /** @brief Get the compiled highlight TSQuery (or nullptr). */
+ const TSQuery* GetHighlightQuery() const { return m_pHighlightQuery; }
+
+ /** @brief Get the compiled locals TSQuery (or nullptr). */
+ const TSQuery* GetLocalsQuery() const { return m_pLocalsQuery; }
+
+ /** @brief Get the compiled tags TSQuery (or nullptr). */
+ const TSQuery* GetTagsQuery() const { return m_pTagsQuery; }
+
+ /** @brief Get the compiled injection TSQuery (or nullptr). */
+ const TSQuery* GetInjectionQuery() const { return m_pInjectionQuery; }
+
+ /** @brief Get the language name. */
+ const std::wstring& GetName() const { return m_sName; }
+
+private:
+ /** @brief Helper to load and compile a .scm query file. */
+ TSQuery* LoadQuery(const std::wstring& sPath);
+
+ HMODULE m_hDll;
+ const TSLanguage* m_pLanguage;
+ TSQuery* m_pHighlightQuery;
+ TSQuery* m_pLocalsQuery;
+ TSQuery* m_pTagsQuery;
+ TSQuery* m_pInjectionQuery;
+ std::wstring m_sName;
+};
+
+
+/**
+ * @brief Maps tree-sitter highlight capture names to COLORINDEX values.
+ *
+ * Standard capture names from nvim-treesitter / tree-sitter highlight queries:
+ * @keyword, @function, @function.call, @string, @comment, @number,
+ * @operator, @type, @variable, @property, @constant, @punctuation,
+ * @constructor, @module, @attribute, @label, @preproc, etc.
+ *
+ * These are collapsed into WinMerge's 9 syntax COLORINDEX values.
+ */
+class CTreeSitterColorMap
+{
+public:
+ CTreeSitterColorMap();
+
+ /**
+ * @brief Map a tree-sitter capture name to a COLORINDEX.
+ * @param sCaptureName The capture name (e.g. "keyword", "string").
+ * @return The COLORINDEX, or COLORINDEX_NORMALTEXT if unknown.
+ */
+ int MapCapture(const std::string& sCaptureName) const;
+
+private:
+ std::unordered_map m_map;
+};
+
+
+/**
+ * @brief Per-line cached highlight result.
+ */
+struct TreeSitterLineBlock
+{
+ int nCharPos;
+ int nColorIndex;
+ int nPriority;
+ uint32_t nOrder;
+};
+
+
+/**
+ * @brief Tree-sitter based syntax parser for CrystalEdit views.
+ *
+ * This class replaces the line-by-line keyword parsers with full-document
+ * AST-based parsing via tree-sitter. It:
+ * 1. Parses the entire document into a tree-sitter AST.
+ * 2. Runs highlight queries to extract token ranges.
+ * 3. Caches per-line color block arrays.
+ * 4. Serves cached results on per-line ParseLine() calls.
+ * 5. Supports lazy re-parsing: edits mark the cache dirty,
+ * and the next paint cycle triggers a single reparse.
+ *
+ * Override ParseLine() in the view to call this parser's GetLineBlocks().
+ *
+ * Usage:
+ * @code
+ * auto pLang = TreeSitterRegistry::Instance().GetLanguageForExt(L"fs");
+ * if (pLang) {
+ * m_treeSitterParser.SetLanguage(pLang);
+ * m_treeSitterParser.ParseFromView(pView);
+ * // In ParseLine override:
+ * m_treeSitterParser.GetLineBlocks(nLineIndex, pBuf, nActualItems);
+ * }
+ * @endcode
+ */
+class CTreeSitterParser
+{
+public:
+ CTreeSitterParser();
+ ~CTreeSitterParser();
+
+ CTreeSitterParser(const CTreeSitterParser&) = delete;
+ CTreeSitterParser& operator=(const CTreeSitterParser&) = delete;
+
+ /**
+ * @brief Set the language to use for parsing.
+ * @param pLang Pointer to a loaded CTreeSitterLanguage.
+ */
+ void SetLanguage(const CTreeSitterLanguage* pLang);
+
+ /**
+ * @brief Parse (or re-parse) the full document.
+ * @param ppszLines Array of line pointers (from CCrystalTextBuffer).
+ * @param pnLineLengths Array of line lengths.
+ * @param nLineCount Number of lines in the document.
+ */
+ void ParseDocument(const tchar_t* const* ppszLines, const int* pnLineLengths, int nLineCount);
+
+ /**
+ * @brief Mark the parse cache as dirty (e.g. after an edit).
+ *
+ * The next call to EnsureParsed() (from ParseLine/GetLineBlocks)
+ * will trigger a single reparse. This avoids reparsing on every
+ * keystroke and instead defers to the next paint cycle.
+ */
+ void MarkDirty() { m_bDirty = true; }
+
+ /**
+ * @brief Notify the parser of an edit for incremental reparsing.
+ *
+ * Extracts the last edit's position from the buffer's UndoRecord
+ * and calls ts_tree_edit() on the existing tree so tree-sitter
+ * can reuse unchanged subtrees during the next reparse.
+ *
+ * Also marks the cache dirty.
+ *
+ * @param pBuf The text buffer that was edited.
+ */
+ void NotifyEdit(class CCrystalTextBuffer* pBuf);
+
+ /**
+ * @brief Ensure the document is parsed and cache is up-to-date.
+ * @param pView The text view to read line data from (if reparse needed).
+ *
+ * Called lazily from ParseLine. Only reparses if marked dirty.
+ */
+ void EnsureParsed(class CCrystalTextBuffer* pBuf);
+
+ /**
+ * @brief Get the cached color blocks for a specific line.
+ * @param nLineIndex Zero-based line index.
+ * @param pBuf Output buffer for TEXTBLOCK entries (may be nullptr for cookie-only).
+ * @param nActualItems In/out: on entry, the number of blocks already in pBuf
+ * (caller pre-inserts a default NORMALTEXT block at position 0);
+ * on return, the total number of blocks.
+ * @param nMaxBlocks Maximum number of TEXTBLOCK entries that fit in pBuf.
+ * Pass 0 to skip bounds checking (legacy behavior).
+ */
+ void GetLineBlocks(int nLineIndex,
+ CrystalLineParser::TEXTBLOCK* pBuf,
+ int& nActualItems,
+ int nMaxBlocks = 0) const;
+
+ /** @brief Check if a valid tree is available. */
+ bool HasTree() const { return m_pTree != nullptr; }
+
+ /** @brief Check if a language is set. */
+ bool HasLanguage() const { return m_pLang != nullptr; }
+
+ /** @brief Get the number of lines in the cached parse result. */
+ int GetCachedLineCount() const { return m_nLineCount; }
+
+ /** @brief Check if cache needs rebuilding. */
+ bool IsDirty() const { return m_bDirty; }
+
+ /** @brief Invalidate cached results and free tree. */
+ void Invalidate();
+
+ /** @brief Get the language object (for service layer). */
+ const CTreeSitterLanguage* GetLanguage() const { return m_pLang; }
+
+ /**
+ * @brief Get the node type name at a specific position.
+ * @param nLineIndex Zero-based line index.
+ * @param nCharPos Zero-based character position in line.
+ * @return Node type name (e.g. "comment", "string_literal"), or empty string.
+ *
+ * This is used by TreeSitterHighlightService to determine if a position
+ * is within a comment or string for filtering purposes.
+ */
+ std::wstring GetNodeTypeAt(int nLineIndex, int nCharPos) const;
+
+ bool IsCommentPosition(int nLineIndex, int nCharPos) const;
+
+ bool FindDefinition(int nLineIndex, int nCharPos, int& nDefLine, int& nDefChar) const;
+
+ /**
+ * @brief Convenience: parse document from a text buffer.
+ * @param pBuffer The text buffer to read line data from.
+ */
+ void ParseFromBuffer(class CCrystalTextBuffer* pBuffer);
+
+ void SetBuffer(class CCrystalTextBuffer* pBuffer) { m_pBuffer = pBuffer; }
+ CCrystalTextBuffer* GetBuffer() const { return m_pBuffer; }
+
+private:
+ void EnsureParser();
+ void RunHighlightQuery();
+ void RunLocalsQuery();
+ void RunTagsQuery();
+ void RunInjectionQuery();
+ void BuildLineCache(int nLineCount);
+ int Utf8ByteOffsetToCharPos(int nLine, uint32_t byteCol) const;
+ bool TryGetDefinitionByteRangeAt(uint32_t byteOffset, uint32_t& defStartByte, uint32_t& defEndByte) const;
+ bool ByteOffsetToLineChar(uint32_t byteOffset, int& nLineIndex, int& nCharPos) const;
+ bool TryGetTagDefinitionByNameAt(int nLineIndex, int nCharPos, uint32_t& defStartByte, uint32_t& defEndByte) const;
+ uint32_t NextBlockOrder() { return m_nextBlockOrder++; }
+
+ /**
+ * @brief Extract #set! predicate properties from a query pattern.
+ * @param pQuery The query containing the pattern.
+ * @param patternIndex The pattern index.
+ * @param key The property key to look for (e.g. "injection.language").
+ * @return The property value, or empty string if not found.
+ */
+ static std::string GetSetProperty(const TSQuery* pQuery,
+ uint32_t patternIndex,
+ const std::string& key);
+
+ TSParser* m_pParser; // Created lazily on first use
+ TSTree* m_pTree;
+ const CTreeSitterLanguage* m_pLang;
+ CTreeSitterColorMap m_colorMap;
+ bool m_bDirty; // True when cache needs rebuild
+
+ // Cached per-line highlight blocks
+ std::vector> m_lineBlocks;
+
+ // Per-line UTF-8 content (for byte offset -> character position mapping)
+ std::vector m_lineUtf8;
+
+ // Full document text (needed for tree-sitter, which requires contiguous input
+ // or a custom read callback)
+ std::string m_documentText;
+ int m_nLineCount;
+ uint32_t m_nextBlockOrder;
+
+ // --- Locals support ---
+ // Maps (startByte, endByte) of definition nodes to their highlight color.
+ // Built by RunLocalsQuery + RunHighlightQuery cross-referencing.
+ struct LocalDef
+ {
+ std::string name; // Variable/symbol name
+ uint32_t startByte; // Definition node start
+ uint32_t endByte; // Definition node end
+ int highlight; // COLORINDEX from highlights.scm (-1 = unknown)
+ };
+
+ struct LocalScope
+ {
+ uint32_t startByte;
+ uint32_t endByte;
+ bool inherits;
+ std::vector defs;
+ };
+
+ // Scopes from locals.scm, sorted by startByte
+ std::vector m_localScopes;
+
+ // Map from reference node byte range -> resolved highlight color.
+ // Key: (startByte << 32 | endByte). Assumes files < 4GB.
+ std::unordered_map m_localRefHighlights;
+
+ // Pending references from RunLocalsQuery, resolved during RunHighlightQuery
+ struct PendingRef
+ {
+ std::string name;
+ uint32_t startByte;
+ uint32_t endByte;
+ uint32_t scopeStartByte;
+ };
+ std::vector m_pendingRefs;
+
+ struct TagDef
+ {
+ std::string name;
+ uint32_t startByte;
+ uint32_t endByte;
+ };
+
+ struct TagRef
+ {
+ std::string name;
+ uint32_t startByte;
+ uint32_t endByte;
+ };
+
+ std::vector m_tagDefs;
+ std::vector m_tagRefs;
+ CCrystalTextBuffer* m_pBuffer; // Needed to get line/char info for unresolved references
+};
+
+
+/**
+ * @brief Global registry for tree-sitter grammars.
+ *
+ * Scans the grammar directory on first use to discover available grammar DLLs,
+ * but loads them lazily on first request. Maps file extensions to languages.
+ */
+class TreeSitterRegistry
+{
+public:
+ /**
+ * @brief Get the singleton instance.
+ */
+ static TreeSitterRegistry& Instance();
+
+ /**
+ * @brief Initialize the registry by scanning the grammar directory.
+ * @param sGrammarDir Path to directory containing grammar DLLs and .scm files.
+ * If empty, uses "/TreeSitterGrammars/".
+ *
+ * This only discovers available DLL filenames. Grammars are loaded
+ * lazily on first request via GetLanguageForExt().
+ */
+ void Initialize(const std::wstring& sGrammarDir = L"");
+
+ /**
+ * @brief Find a loaded language for a file extension.
+ * @param sExt File extension without dot (e.g. "fs", "py", "cpp").
+ * @return Pointer to the language, or nullptr if not available.
+ *
+ * Loads the grammar DLL lazily on first request for each language.
+ */
+ const CTreeSitterLanguage* GetLanguageForExt(const std::wstring& sExt);
+
+ /**
+ * @brief Find a loaded language by language name.
+ * @param sLangName Language name (e.g. "javascript", "css", "python").
+ * @return Pointer to the language, or nullptr if not available.
+ *
+ * Used by injection processing to look up grammars for embedded languages.
+ * Loads the grammar DLL lazily on first request.
+ */
+ const CTreeSitterLanguage* GetLanguageForName(const std::wstring& sLangName);
+
+ /**
+ * @brief Register a file extension mapping to a language name.
+ * @param sExt Extension (e.g. "fs").
+ * @param sLanguage Language name (e.g. "fsharp").
+ */
+ void RegisterExtension(const std::wstring& sExt, const std::wstring& sLanguage);
+
+ /** @brief Check if the registry has been initialized. */
+ bool IsInitialized() const { return m_bInitialized; }
+
+private:
+ TreeSitterRegistry() : m_bInitialized(false) {}
+
+ bool m_bInitialized;
+ std::wstring m_sGrammarDir;
+
+ // language name -> loaded grammar (loaded lazily)
+ std::unordered_map> m_languages;
+
+ // language names that have a DLL available but haven't been loaded yet
+ std::unordered_set m_availableLanguages;
+
+ // language names that failed to load (don't retry)
+ std::unordered_set m_failedLanguages;
+
+ // file extension -> language name
+ std::unordered_map m_extMap;
+};
diff --git a/Externals/crystaledit/editlib/TreeSitterWrapper.cpp b/Externals/crystaledit/editlib/TreeSitterWrapper.cpp
new file mode 100644
index 00000000000..c5bc3626828
--- /dev/null
+++ b/Externals/crystaledit/editlib/TreeSitterWrapper.cpp
@@ -0,0 +1,201 @@
+/**
+ * @file TreeSitterWrapper.cpp
+ *
+ * @brief TreeSitter wrapper implementation for integration with TextDefinition::ParseLineX
+ */
+
+#include "pch.h"
+#include
+#include "TreeSitterWrapper.h"
+#include "TreeSitterParser.h"
+#include "utils/ctchar.h"
+#include
+
+ /**
+ * @brief TreeSitter-based ParseLineX implementation
+ *
+ * Can be registered as TextDefinition::ParseLineX.
+ * Cookie upper 16 bits: BufferID
+ * Cookie lower 16 bits: Line number
+ *
+ * @param dwCookie State cookie (BufferID | LineIndex)
+ * @param pszChars Line text (unused - TreeSitter parses the entire content)
+ * @param nLength Line length (unused)
+ * @param pBuf Output buffer
+ * @param nActualItems Output item count
+ * @return Cookie for the next line
+ */
+unsigned ParseLineTreeSitter(unsigned dwCookie, int nLineIndex, const tchar_t* pszChars,
+ int nLength, CrystalLineParser::TEXTBLOCK* pBuf,
+ int& nActualItems, void* pContext)
+{
+ // Get parser from global context
+ CTreeSitterParser* pParser = reinterpret_cast(pContext);
+ if (pParser != nullptr)
+ {
+ // Lazy reparse: if cache is dirty, reparse now (once per paint cycle)
+ if (pParser->IsDirty())
+ pParser->EnsureParsed(pParser->GetBuffer());
+
+ if (pParser->HasTree())
+ {
+ // Bounds safety: use the parser's cached line count to avoid
+ // accessing stale data. We intentionally avoid calling
+ // LocateTextBuffer() here since ParseLine may be called
+ // before the window handle is fully set up.
+ if (nLineIndex >= 0 && nLineIndex < pParser->GetCachedLineCount())
+ {
+ // Buffer size: GetTextBlocks allocates (nLength+1)*3 TEXTBLOCK entries
+ int nMaxBlocks = (nLength + 1) * 3;
+
+ pParser->GetLineBlocks(nLineIndex, pBuf, nActualItems, 0);
+
+ // Return cookie for next line
+ return 0;
+ }
+ }
+ }
+
+ // If parser is not available, set default NORMALTEXT block
+ if (pBuf != nullptr)
+ {
+ nActualItems = 1;
+ pBuf[0].m_nCharPos = 0;
+ pBuf[0].m_nColorIndex = COLORINDEX_NORMALTEXT;
+ pBuf[0].m_nBgColorIndex = COLORINDEX_BKGND;
+ }
+
+ // Return cookie for next line
+ return 0;
+}
+
+/**
+ * @brief Dynamically create TextDefinition for TreeSitter
+ *
+ * @param pszExt File extension (without dot, e.g., "cpp", "fs")
+ * @param pszName Language name (for display, e.g., "C++", "F#")
+ * @param nBufferId Buffer ID (for context identification)
+ * @return New TextDefinition if TreeSitter is available, nullptr otherwise
+ *
+ * @note The returned TextDefinition must be freed by the caller using FreeTreeSitterTextDefinition()
+ */
+CrystalLineParser::TextDefinition* CreateTreeSitterTextDefinition(
+ const tchar_t* pszExt, const tchar_t* pszName, int nBufferId)
+{
+ if (pszExt == nullptr || pszName == nullptr)
+ return nullptr;
+
+ // Check if TreeSitterRegistry has a language for this extension
+ const CTreeSitterLanguage* pLang = TreeSitterRegistry::Instance().GetLanguageForExt(pszExt);
+ if (pLang == nullptr || pLang->GetLanguage() == nullptr)
+ {
+ // Return nullptr if TreeSitter language is not available
+ return nullptr;
+ }
+
+ // Dynamically allocate new TextDefinition
+ auto pDef = new CrystalLineParser::TextDefinition();
+ if (pDef == nullptr)
+ return nullptr;
+
+ // Initialize TextDefinition fields
+ pDef->type = CrystalLineParser::SRC_PLAIN; // TreeSitter does not have a specific type
+ pDef->ParseLineX = ParseLineTreeSitter;
+ pDef->flags = 0;
+ pDef->encoding = 0;
+
+ // Copy language name (dynamic allocation)
+ const size_t nNameLen = tc::tcslen(pszName);
+ pDef->name = new tchar_t[nNameLen + 1];
+#ifdef _UNICODE
+ _wcslcpy(const_cast(pDef->name), nNameLen + 1, pszName);
+#else
+ _strlcpy(const_cast(pDef->name), nNameLen + 1, pszName);
+#endif
+
+ // Copy extension (dynamic allocation)
+ const size_t nExtLen = tc::tcslen(pszExt);
+ pDef->exts = new tchar_t[nExtLen + 1];
+#ifdef _UNICODE
+ _wcslcpy(pDef->exts, nExtLen + 1, pszExt);
+#else
+ _strlcpy(pDef->exts, nExtLen + 1, pszExt);
+#endif
+ pDef->extsIsDynamic = true;
+
+ // Comment syntax is empty (TreeSitter is AST-based, so not needed)
+ pDef->opencomment[0] = _T('\0');
+ pDef->closecomment[0] = _T('\0');
+ pDef->commentline[0] = _T('\0');
+
+ return pDef;
+}
+
+/**
+ * @brief Free TreeSitter TextDefinition
+ * @param pDef TextDefinition to free
+ */
+void FreeTreeSitterTextDefinition(CrystalLineParser::TextDefinition* pDef)
+{
+ if (pDef == nullptr)
+ return;
+
+ // Free dynamically allocated fields
+ if (pDef->name != nullptr)
+ {
+ delete[] pDef->name;
+ }
+
+ if (pDef->extsIsDynamic && pDef->exts != nullptr)
+ {
+ delete[] pDef->exts;
+ }
+
+ // Free TextDefinition itself
+ delete pDef;
+}
+
+void* CreateTreeSitterParseContextForDiff(const std::wstring& filePath, const std::vector& lines)
+{
+ std::wstring ext = filePath;
+ size_t posOfDot = ext.rfind('.');
+ if (posOfDot != std::wstring::npos)
+ ext.erase(0, posOfDot + 1);
+
+ TreeSitterRegistry& registry = TreeSitterRegistry::Instance();
+ if (!registry.IsInitialized())
+ registry.Initialize();
+
+ const CTreeSitterLanguage* pLang = registry.GetLanguageForExt(ext.c_str());
+ if (pLang == nullptr || pLang->GetLanguage() == nullptr)
+ return nullptr;
+
+ auto* pParser = new CTreeSitterParser();
+ pParser->SetLanguage(pLang);
+
+ std::vector linePtrs;
+ std::vector lineLens;
+ linePtrs.reserve(lines.size());
+ lineLens.reserve(lines.size());
+ for (const auto& line : lines)
+ {
+ linePtrs.push_back(line.c_str());
+ lineLens.push_back(static_cast(line.size()));
+ }
+ pParser->ParseDocument(linePtrs.data(), lineLens.data(), static_cast(linePtrs.size()));
+ return pParser;
+}
+
+void DestroyTreeSitterParseContextForDiff(void* parseContext)
+{
+ delete reinterpret_cast(parseContext);
+}
+
+bool IsTreeSitterCommentPositionForDiff(void* parseContext, int nLineIndex, int nCharPos)
+{
+ auto* pParser = reinterpret_cast(parseContext);
+ if (pParser == nullptr)
+ return false;
+
+ return pParser->IsCommentPosition(nLineIndex, nCharPos);
+}
diff --git a/Externals/crystaledit/editlib/TreeSitterWrapper.h b/Externals/crystaledit/editlib/TreeSitterWrapper.h
new file mode 100644
index 00000000000..0c532998a14
--- /dev/null
+++ b/Externals/crystaledit/editlib/TreeSitterWrapper.h
@@ -0,0 +1,53 @@
+/**
+ * @file TreeSitterWrapper.h
+ *
+ * @brief TreeSitter wrapper for integration with TextDefinition::ParseLineX
+ */
+#pragma once
+
+#include
+#include
+#include "parsers/crystallineparser.h"
+#include