From 243273758b51e9d2aa1b426434d51cb25aff4ce1 Mon Sep 17 00:00:00 2001 From: Joe Wicentowski Date: Fri, 24 Apr 2026 15:59:32 -0400 Subject: [PATCH 1/4] [feature] Add EXPath Binary Module 4.0 as built-in extension module Port the EXPath Binary Module (http://expath.org/ns/binary) from a standalone XAR package (eXist-db/exist-binary) to a built-in extension module. This makes the module available out of the box without requiring XAR installation. The EXPath Binary Module implements the EXPath Binary Module 4.0 specification for binary data manipulation (hex/bin/octal conversion, packing/unpacking, bitwise operations, text encoding/decoding). Source: https://github.com/eXist-db/exist-binary Co-Authored-By: Claude Opus 4.6 (1M context) --- exist-distribution/pom.xml | 6 + exist-distribution/src/main/config/conf.xml | 1 + extensions/modules/expath-binary/pom.xml | 67 ++++ .../modules/binary/BinaryBasicFunctions.java | 326 +++++++++++++++++ .../binary/BinaryBitwiseFunctions.java | 199 ++++++++++ .../binary/BinaryConversionFunctions.java | 258 +++++++++++++ .../xquery/modules/binary/BinaryModule.java | 119 ++++++ .../modules/binary/BinaryModuleErrorCode.java | 68 ++++ .../modules/binary/BinaryModuleHelper.java | 119 ++++++ .../binary/BinaryPackingFunctions.java | 339 ++++++++++++++++++ .../modules/binary/BinaryTextFunctions.java | 194 ++++++++++ extensions/modules/pom.xml | 1 + 12 files changed, 1697 insertions(+) create mode 100644 extensions/modules/expath-binary/pom.xml create mode 100644 extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBasicFunctions.java create mode 100644 extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBitwiseFunctions.java create mode 100644 extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryConversionFunctions.java create mode 100644 extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModule.java create mode 100644 extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModuleErrorCode.java create mode 100644 extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModuleHelper.java create mode 100644 extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryPackingFunctions.java create mode 100644 extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryTextFunctions.java diff --git a/exist-distribution/pom.xml b/exist-distribution/pom.xml index 2369cda5832..a7d1683b598 100644 --- a/exist-distribution/pom.xml +++ b/exist-distribution/pom.xml @@ -188,6 +188,12 @@ ${project.version} runtime + + ${project.groupId} + exist-expath-binary + ${project.version} + runtime + ${project.groupId} exist-counter diff --git a/exist-distribution/src/main/config/conf.xml b/exist-distribution/src/main/config/conf.xml index 6a9937c0e03..0045adb6448 100644 --- a/exist-distribution/src/main/config/conf.xml +++ b/exist-distribution/src/main/config/conf.xml @@ -1048,6 +1048,7 @@ + diff --git a/extensions/modules/expath-binary/pom.xml b/extensions/modules/expath-binary/pom.xml new file mode 100644 index 00000000000..6f0723e05de --- /dev/null +++ b/extensions/modules/expath-binary/pom.xml @@ -0,0 +1,67 @@ + + + + 4.0.0 + + + org.exist-db + exist-parent + 7.0.0-SNAPSHOT + ../../../exist-parent + + + exist-expath-binary + jar + + eXist-db EXPath Binary Module + EXPath Binary Module 4.0 for eXist-db (http://expath.org/ns/binary) + + + scm:git:https://github.com/exist-db/exist.git + scm:git:https://github.com/exist-db/exist.git + scm:git:https://github.com/exist-db/exist.git + HEAD + + + + + org.exist-db + exist-core + ${project.version} + + + + commons-io + commons-io + + + + com.google.code.findbugs + jsr305 + + + + + diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBasicFunctions.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBasicFunctions.java new file mode 100644 index 00000000000..f78789e2f06 --- /dev/null +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBasicFunctions.java @@ -0,0 +1,326 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.modules.binary; + +import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream; +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.Type; + +import java.io.IOException; +import java.util.Arrays; + +import static org.exist.xquery.FunctionDSL.*; + +/** + * EXPath Binary Module 4.0 — Basic Operations (Section 5). + * + *
    + *
  • bin:length
  • + *
  • bin:part
  • + *
  • bin:join
  • + *
  • bin:insert-before
  • + *
  • bin:pad-left
  • + *
  • bin:pad-right
  • + *
  • bin:find
  • + *
+ * + * @see EXPath Binary Module 4.0 §5 + */ +public class BinaryBasicFunctions extends BasicFunction { + + private static final QName QN_LENGTH = new QName("length", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_PART = new QName("part", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_JOIN = new QName("join", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_INSERT_BEFORE = new QName("insert-before", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_PAD_LEFT = new QName("pad-left", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_PAD_RIGHT = new QName("pad-right", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_FIND = new QName("find", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + + static final FunctionSignature FS_LENGTH = functionSignature( + QN_LENGTH, + "Returns the size of binary data in octets.", + returns(Type.INTEGER), + param("value", Type.BASE64_BINARY, "The binary data") + ); + + static final FunctionSignature[] FS_PART = functionSignatures( + QN_PART, + "Returns a specified part of binary data.", + returnsOpt(Type.BASE64_BINARY), + arities( + arity( + optParam("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based offset") + ), + arity( + optParam("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based offset"), + param("size", Type.INTEGER, "The number of octets to return") + ) + ) + ); + + static final FunctionSignature FS_JOIN = functionSignature( + QN_JOIN, + "Returns the concatenation of binary data.", + returns(Type.BASE64_BINARY), + optManyParam("values", Type.BASE64_BINARY, "The binary data items to join") + ); + + static final FunctionSignature FS_INSERT_BEFORE = functionSignature( + QN_INSERT_BEFORE, + "Inserts additional binary data at a given point in other binary data.", + returnsOpt(Type.BASE64_BINARY), + optParam("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based offset for insertion"), + optParam("extra", Type.BASE64_BINARY, "The binary data to insert") + ); + + static final FunctionSignature[] FS_PAD_LEFT = functionSignatures( + QN_PAD_LEFT, + "Pads binary data on the left to a specified size.", + returnsOpt(Type.BASE64_BINARY), + arities( + arity( + optParam("value", Type.BASE64_BINARY, "The binary data"), + param("count", Type.INTEGER, "The number of octets to pad") + ), + arity( + optParam("value", Type.BASE64_BINARY, "The binary data"), + param("count", Type.INTEGER, "The number of octets to pad"), + param("octet", Type.INTEGER, "The octet value to use for padding (0-255)") + ) + ) + ); + + static final FunctionSignature[] FS_PAD_RIGHT = functionSignatures( + QN_PAD_RIGHT, + "Pads binary data on the right to a specified size.", + returnsOpt(Type.BASE64_BINARY), + arities( + arity( + optParam("value", Type.BASE64_BINARY, "The binary data"), + param("count", Type.INTEGER, "The number of octets to pad") + ), + arity( + optParam("value", Type.BASE64_BINARY, "The binary data"), + param("count", Type.INTEGER, "The number of octets to pad"), + param("octet", Type.INTEGER, "The octet value to use for padding (0-255)") + ) + ) + ); + + static final FunctionSignature FS_FIND = functionSignature( + QN_FIND, + "Returns the first location of a binary search sequence within binary data.", + returnsOpt(Type.INTEGER), + optParam("value", Type.BASE64_BINARY, "The binary data to search"), + param("offset", Type.INTEGER, "The zero-based offset to start searching from"), + param("search", Type.BASE64_BINARY, "The binary data to search for") + ); + + public BinaryBasicFunctions(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (isCalledAs("length")) { + return length(args); + } else if (isCalledAs("part")) { + return part(args); + } else if (isCalledAs("join")) { + return join(args); + } else if (isCalledAs("insert-before")) { + return insertBefore(args); + } else if (isCalledAs("pad-left")) { + return padLeft(args); + } else if (isCalledAs("pad-right")) { + return padRight(args); + } else { + return find(args); + } + } + + private Sequence length(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null) { + throw new XPathException(this, org.exist.xquery.ErrorCodes.XPTY0004, + "Empty sequence is not allowed as argument to bin:length()"); + } + return new IntegerValue(this, data.length); + } + + private Sequence part(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null) { + return Sequence.EMPTY_SEQUENCE; + } + + final int offset = ((IntegerValue) args[1].itemAt(0)).getInt(); + if (offset < 0 || offset > data.length) { + throw new XPathException(this, BinaryModuleErrorCode.INDEX_OUT_OF_RANGE, + "Offset " + offset + " is out of range for binary data of length " + data.length); + } + + final int size; + if (args.length > 2 && !args[2].isEmpty()) { + size = ((IntegerValue) args[2].itemAt(0)).getInt(); + if (size < 0) { + throw new XPathException(this, BinaryModuleErrorCode.NEGATIVE_SIZE, + "Size must not be negative: " + size); + } + if (offset + size > data.length) { + throw new XPathException(this, BinaryModuleErrorCode.INDEX_OUT_OF_RANGE, + "Offset " + offset + " + size " + size + " exceeds binary data length " + data.length); + } + } else { + size = data.length - offset; + } + + final byte[] result = Arrays.copyOfRange(data, offset, offset + size); + return BinaryModuleHelper.createBinaryResult(context, this, result); + } + + private Sequence join(final Sequence[] args) throws XPathException { + if (args[0].isEmpty()) { + return BinaryModuleHelper.createBinaryResult(context, this, new byte[0]); + } + + try (final UnsynchronizedByteArrayOutputStream os = new UnsynchronizedByteArrayOutputStream()) { + for (int i = 0; i < args[0].getItemCount(); i++) { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0].itemAt(i).toSequence()); + if (data != null) { + os.write(data); + } + } + return BinaryModuleHelper.createBinaryResult(context, this, os.toByteArray()); + } catch (final IOException e) { + throw new XPathException(this, "Failed to join binary data: " + e.getMessage(), e); + } + } + + private Sequence insertBefore(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null) { + return Sequence.EMPTY_SEQUENCE; + } + + final int offset = ((IntegerValue) args[1].itemAt(0)).getInt(); + if (offset < 0 || offset > data.length) { + throw new XPathException(this, BinaryModuleErrorCode.INDEX_OUT_OF_RANGE, + "Offset " + offset + " is out of range for binary data of length " + data.length); + } + + final byte[] extra = BinaryModuleHelper.getBinaryData(args[2]); + if (extra == null || extra.length == 0) { + return BinaryModuleHelper.createBinaryResult(context, this, data); + } + + final byte[] result = new byte[data.length + extra.length]; + System.arraycopy(data, 0, result, 0, offset); + System.arraycopy(extra, 0, result, offset, extra.length); + System.arraycopy(data, offset, result, offset + extra.length, data.length - offset); + return BinaryModuleHelper.createBinaryResult(context, this, result); + } + + private Sequence padLeft(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null) { + return Sequence.EMPTY_SEQUENCE; + } + + final int count = ((IntegerValue) args[1].itemAt(0)).getInt(); + if (count < 0) { + throw new XPathException(this, BinaryModuleErrorCode.NEGATIVE_SIZE, + "Pad count must not be negative: " + count); + } + + final byte octet = (args.length > 2 && !args[2].isEmpty()) + ? (byte) ((IntegerValue) args[2].itemAt(0)).getInt() + : 0; + + final byte[] result = new byte[data.length + count]; + Arrays.fill(result, 0, count, octet); + System.arraycopy(data, 0, result, count, data.length); + return BinaryModuleHelper.createBinaryResult(context, this, result); + } + + private Sequence padRight(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null) { + return Sequence.EMPTY_SEQUENCE; + } + + final int count = ((IntegerValue) args[1].itemAt(0)).getInt(); + if (count < 0) { + throw new XPathException(this, BinaryModuleErrorCode.NEGATIVE_SIZE, + "Pad count must not be negative: " + count); + } + + final byte octet = (args.length > 2 && !args[2].isEmpty()) + ? (byte) ((IntegerValue) args[2].itemAt(0)).getInt() + : 0; + + final byte[] result = new byte[data.length + count]; + System.arraycopy(data, 0, result, 0, data.length); + Arrays.fill(result, data.length, result.length, octet); + return BinaryModuleHelper.createBinaryResult(context, this, result); + } + + private Sequence find(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null) { + return Sequence.EMPTY_SEQUENCE; + } + + final int offset = ((IntegerValue) args[1].itemAt(0)).getInt(); + if (offset < 0 || offset > data.length) { + throw new XPathException(this, BinaryModuleErrorCode.INDEX_OUT_OF_RANGE, + "Offset " + offset + " is out of range for binary data of length " + data.length); + } + + final byte[] search = BinaryModuleHelper.getBinaryData(args[2]); + if (search == null || search.length == 0) { + return new IntegerValue(this, offset); + } + + // Naive byte subsequence search + outer: + for (int i = offset; i <= data.length - search.length; i++) { + for (int j = 0; j < search.length; j++) { + if (data[i + j] != search[j]) { + continue outer; + } + } + return new IntegerValue(this, i); + } + + return Sequence.EMPTY_SEQUENCE; + } +} diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBitwiseFunctions.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBitwiseFunctions.java new file mode 100644 index 00000000000..ab13dc209ec --- /dev/null +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBitwiseFunctions.java @@ -0,0 +1,199 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.modules.binary; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.Type; + +import java.math.BigInteger; +import java.util.Arrays; + +import static org.exist.xquery.FunctionDSL.*; + +/** + * EXPath Binary Module 4.0 — Bitwise Operations (Section 8). + * + *
    + *
  • bin:or
  • + *
  • bin:xor
  • + *
  • bin:and
  • + *
  • bin:not
  • + *
  • bin:shift
  • + *
+ * + * @see EXPath Binary Module 4.0 §8 + */ +public class BinaryBitwiseFunctions extends BasicFunction { + + private static final QName QN_OR = new QName("or", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_XOR = new QName("xor", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_AND = new QName("and", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_NOT = new QName("not", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_SHIFT = new QName("shift", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + + static final FunctionSignature FS_OR = functionSignature( + QN_OR, + "Returns the bitwise OR of two binary values.", + returnsOpt(Type.BASE64_BINARY), + optParam("value1", Type.BASE64_BINARY, "The first binary value"), + optParam("value2", Type.BASE64_BINARY, "The second binary value") + ); + + static final FunctionSignature FS_XOR = functionSignature( + QN_XOR, + "Returns the bitwise XOR of two binary values.", + returnsOpt(Type.BASE64_BINARY), + optParam("value1", Type.BASE64_BINARY, "The first binary value"), + optParam("value2", Type.BASE64_BINARY, "The second binary value") + ); + + static final FunctionSignature FS_AND = functionSignature( + QN_AND, + "Returns the bitwise AND of two binary values.", + returnsOpt(Type.BASE64_BINARY), + optParam("value1", Type.BASE64_BINARY, "The first binary value"), + optParam("value2", Type.BASE64_BINARY, "The second binary value") + ); + + static final FunctionSignature FS_NOT = functionSignature( + QN_NOT, + "Returns the bitwise NOT of a binary value.", + returnsOpt(Type.BASE64_BINARY), + optParam("value", Type.BASE64_BINARY, "The binary value") + ); + + static final FunctionSignature FS_SHIFT = functionSignature( + QN_SHIFT, + "Shifts bits in binary data. Positive shifts left, negative shifts right.", + returnsOpt(Type.BASE64_BINARY), + optParam("value", Type.BASE64_BINARY, "The binary value"), + param("by", Type.INTEGER, "The number of bits to shift (positive = left, negative = right)") + ); + + public BinaryBitwiseFunctions(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (isCalledAs("or")) { + return bitwiseOp(args, BitwiseOp.OR); + } else if (isCalledAs("xor")) { + return bitwiseOp(args, BitwiseOp.XOR); + } else if (isCalledAs("and")) { + return bitwiseOp(args, BitwiseOp.AND); + } else if (isCalledAs("not")) { + return bitwiseNot(args); + } else { + return bitwiseShift(args); + } + } + + private enum BitwiseOp { OR, XOR, AND } + + private Sequence bitwiseOp(final Sequence[] args, final BitwiseOp op) throws XPathException { + final byte[] data1 = BinaryModuleHelper.getBinaryData(args[0]); + final byte[] data2 = BinaryModuleHelper.getBinaryData(args[1]); + + if (data1 == null || data2 == null) { + return Sequence.EMPTY_SEQUENCE; + } + + if (data1.length != data2.length) { + throw new XPathException(this, BinaryModuleErrorCode.DIFFERING_LENGTH_ARGUMENTS, + "Arguments to bin:" + op.name().toLowerCase() + "() must have equal length, but got " + + data1.length + " and " + data2.length); + } + + final byte[] result = new byte[data1.length]; + for (int i = 0; i < data1.length; i++) { + switch (op) { + case OR: + result[i] = (byte) (data1[i] | data2[i]); + break; + case XOR: + result[i] = (byte) (data1[i] ^ data2[i]); + break; + case AND: + result[i] = (byte) (data1[i] & data2[i]); + break; + } + } + return BinaryModuleHelper.createBinaryResult(context, this, result); + } + + private Sequence bitwiseNot(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null) { + return Sequence.EMPTY_SEQUENCE; + } + + final byte[] result = new byte[data.length]; + for (int i = 0; i < data.length; i++) { + result[i] = (byte) ~data[i]; + } + return BinaryModuleHelper.createBinaryResult(context, this, result); + } + + private Sequence bitwiseShift(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null) { + return Sequence.EMPTY_SEQUENCE; + } + + final int by = ((IntegerValue) args[1].itemAt(0)).getInt(); + final int originalLength = data.length; + + if (originalLength == 0) { + return BinaryModuleHelper.createBinaryResult(context, this, new byte[0]); + } + + // Use BigInteger for bit shifting, then maintain original length + BigInteger bigInt = new BigInteger(1, data); + + if (by > 0) { + bigInt = bigInt.shiftLeft(by); + } else if (by < 0) { + bigInt = bigInt.shiftRight(-by); + } + + // Convert back to byte array of original length + final byte[] shifted = bigInt.toByteArray(); + final byte[] result = new byte[originalLength]; + + if (shifted.length <= originalLength) { + // Right-align in result + System.arraycopy(shifted, 0, result, originalLength - shifted.length, shifted.length); + } else { + // Truncate from the left to maintain original length + System.arraycopy(shifted, shifted.length - originalLength, result, 0, originalLength); + } + + return BinaryModuleHelper.createBinaryResult(context, this, result); + } +} diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryConversionFunctions.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryConversionFunctions.java new file mode 100644 index 00000000000..8ec6f9614fa --- /dev/null +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryConversionFunctions.java @@ -0,0 +1,258 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.modules.binary; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +import static org.exist.xquery.FunctionDSL.*; + +/** + * EXPath Binary Module 4.0 — Constants and Conversions (Section 4). + * + *
    + *
  • bin:hex
  • + *
  • bin:bin
  • + *
  • bin:octal
  • + *
  • bin:to-octets
  • + *
  • bin:from-octets
  • + *
+ * + * @see EXPath Binary Module 4.0 §4 + */ +public class BinaryConversionFunctions extends BasicFunction { + + private static final QName QN_HEX = new QName("hex", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_BIN = new QName("bin", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_OCTAL = new QName("octal", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_TO_OCTETS = new QName("to-octets", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_FROM_OCTETS = new QName("from-octets", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + + static final FunctionSignature FS_HEX = functionSignature( + QN_HEX, + "Creates an xs:base64Binary value from a hexadecimal string.", + returnsOpt(Type.BASE64_BINARY), + optParam("value", Type.STRING, "The hexadecimal string") + ); + + static final FunctionSignature FS_BIN = functionSignature( + QN_BIN, + "Creates an xs:base64Binary value from a binary (0/1) string.", + returnsOpt(Type.BASE64_BINARY), + optParam("value", Type.STRING, "The binary digit string") + ); + + static final FunctionSignature FS_OCTAL = functionSignature( + QN_OCTAL, + "Creates an xs:base64Binary value from an octal string.", + returnsOpt(Type.BASE64_BINARY), + optParam("value", Type.STRING, "The octal string") + ); + + static final FunctionSignature FS_TO_OCTETS = functionSignature( + QN_TO_OCTETS, + "Returns the binary data as a sequence of octets.", + returnsOptMany(Type.INTEGER), + param("value", Type.BASE64_BINARY, "The binary data") + ); + + static final FunctionSignature FS_FROM_OCTETS = functionSignature( + QN_FROM_OCTETS, + "Converts a sequence of octets into binary data.", + returns(Type.BASE64_BINARY), + optManyParam("values", Type.INTEGER, "The octet values (0-255)") + ); + + public BinaryConversionFunctions(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (isCalledAs("hex")) { + return hexToBinary(args); + } else if (isCalledAs("bin")) { + return binToBinary(args); + } else if (isCalledAs("octal")) { + return octalToBinary(args); + } else if (isCalledAs("to-octets")) { + return toOctets(args); + } else { + return fromOctets(args); + } + } + + private Sequence hexToBinary(final Sequence[] args) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + String hex = args[0].getStringValue(); + // Strip whitespace and underscores per spec + hex = hex.replaceAll("[\\s_]", ""); + + if (hex.isEmpty()) { + return BinaryModuleHelper.createBinaryResult(context, this, new byte[0]); + } + + // Validate characters + for (int i = 0; i < hex.length(); i++) { + final char c = hex.charAt(i); + if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) { + throw new XPathException(this, BinaryModuleErrorCode.NON_NUMERIC_CHARACTER, + "Invalid hexadecimal character: '" + c + "'"); + } + } + + // Prepend "0" if odd length + if (hex.length() % 2 != 0) { + hex = "0" + hex; + } + + final byte[] data = new byte[hex.length() / 2]; + for (int i = 0; i < data.length; i++) { + data[i] = (byte) Integer.parseInt(hex.substring(i * 2, i * 2 + 2), 16); + } + return BinaryModuleHelper.createBinaryResult(context, this, data); + } + + private Sequence binToBinary(final Sequence[] args) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + String bin = args[0].getStringValue(); + bin = bin.replaceAll("[\\s_]", ""); + + if (bin.isEmpty()) { + return BinaryModuleHelper.createBinaryResult(context, this, new byte[0]); + } + + // Validate characters + for (int i = 0; i < bin.length(); i++) { + final char c = bin.charAt(i); + if (c != '0' && c != '1') { + throw new XPathException(this, BinaryModuleErrorCode.NON_NUMERIC_CHARACTER, + "Invalid binary character: '" + c + "'"); + } + } + + // Pad to 8-bit multiple + final int remainder = bin.length() % 8; + if (remainder != 0) { + bin = "0".repeat(8 - remainder) + bin; + } + + final byte[] data = new byte[bin.length() / 8]; + for (int i = 0; i < data.length; i++) { + data[i] = (byte) Integer.parseInt(bin.substring(i * 8, i * 8 + 8), 2); + } + return BinaryModuleHelper.createBinaryResult(context, this, data); + } + + private Sequence octalToBinary(final Sequence[] args) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + String octal = args[0].getStringValue(); + octal = octal.replaceAll("[\\s_]", ""); + + if (octal.isEmpty()) { + return BinaryModuleHelper.createBinaryResult(context, this, new byte[0]); + } + + // Validate characters + for (int i = 0; i < octal.length(); i++) { + final char c = octal.charAt(i); + if (c < '0' || c > '7') { + throw new XPathException(this, BinaryModuleErrorCode.NON_NUMERIC_CHARACTER, + "Invalid octal character: '" + c + "'"); + } + } + + // Convert each octal digit to 3-bit binary + final StringBuilder bits = new StringBuilder(); + for (int i = 0; i < octal.length(); i++) { + final int digit = octal.charAt(i) - '0'; + bits.append(String.format("%3s", Integer.toBinaryString(digit)).replace(' ', '0')); + } + + // Strip up to 2 leading zeros (octal digit = 3 bits, but only multiples of 8 matter) + String binaryStr = bits.toString(); + int stripCount = 0; + while (stripCount < 2 && binaryStr.length() > 0 && binaryStr.charAt(0) == '0' + && (binaryStr.length() - 1) % 8 != 7) { + binaryStr = binaryStr.substring(1); + stripCount++; + } + + // Pad to 8-bit multiple + final int remainder = binaryStr.length() % 8; + if (remainder != 0) { + binaryStr = "0".repeat(8 - remainder) + binaryStr; + } + + if (binaryStr.isEmpty()) { + return BinaryModuleHelper.createBinaryResult(context, this, new byte[0]); + } + + final byte[] data = new byte[binaryStr.length() / 8]; + for (int i = 0; i < data.length; i++) { + data[i] = (byte) Integer.parseInt(binaryStr.substring(i * 8, i * 8 + 8), 2); + } + return BinaryModuleHelper.createBinaryResult(context, this, data); + } + + private Sequence toOctets(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null || data.length == 0) { + return Sequence.EMPTY_SEQUENCE; + } + + final ValueSequence result = new ValueSequence(data.length); + for (final byte b : data) { + result.add(new IntegerValue(this, b & 0xFF)); + } + return result; + } + + private Sequence fromOctets(final Sequence[] args) throws XPathException { + if (args[0].isEmpty()) { + return BinaryModuleHelper.createBinaryResult(context, this, new byte[0]); + } + + final int len = args[0].getItemCount(); + final byte[] data = new byte[len]; + for (int i = 0; i < len; i++) { + data[i] = (byte) ((IntegerValue) args[0].itemAt(i)).getInt(); + } + return BinaryModuleHelper.createBinaryResult(context, this, data); + } +} diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModule.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModule.java new file mode 100644 index 00000000000..28d6f3eb820 --- /dev/null +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModule.java @@ -0,0 +1,119 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.modules.binary; + +import org.exist.xquery.AbstractInternalModule; +import org.exist.xquery.FunctionDef; + +import java.util.List; +import java.util.Map; + +import static org.exist.xquery.FunctionDSL.functionDefs; + +/** + * EXPath Binary Module 4.0. + * + * @see EXPath Binary Module 4.0 + */ +public class BinaryModule extends AbstractInternalModule { + + public static final String NAMESPACE_URI = "http://expath.org/ns/binary"; + public static final String PREFIX = "bin"; + public static final String INCLUSION_DATE = "2026-03-04"; + public static final String RELEASED_IN_VERSION = "eXist-7.0.0"; + + private static final FunctionDef[] functions = functionDefs( + functionDefs(BinaryConversionFunctions.class, + BinaryConversionFunctions.FS_HEX, + BinaryConversionFunctions.FS_BIN, + BinaryConversionFunctions.FS_OCTAL, + BinaryConversionFunctions.FS_TO_OCTETS, + BinaryConversionFunctions.FS_FROM_OCTETS), + + functionDefs(BinaryBasicFunctions.class, + BinaryBasicFunctions.FS_LENGTH, + BinaryBasicFunctions.FS_PART[0], + BinaryBasicFunctions.FS_PART[1], + BinaryBasicFunctions.FS_JOIN, + BinaryBasicFunctions.FS_INSERT_BEFORE, + BinaryBasicFunctions.FS_PAD_LEFT[0], + BinaryBasicFunctions.FS_PAD_LEFT[1], + BinaryBasicFunctions.FS_PAD_RIGHT[0], + BinaryBasicFunctions.FS_PAD_RIGHT[1], + BinaryBasicFunctions.FS_FIND), + + functionDefs(BinaryTextFunctions.class, + BinaryTextFunctions.FS_DECODE_STRING[0], + BinaryTextFunctions.FS_DECODE_STRING[1], + BinaryTextFunctions.FS_DECODE_STRING[2], + BinaryTextFunctions.FS_DECODE_STRING[3], + BinaryTextFunctions.FS_ENCODE_STRING[0], + BinaryTextFunctions.FS_ENCODE_STRING[1]), + + functionDefs(BinaryPackingFunctions.class, + BinaryPackingFunctions.FS_PACK_DOUBLE[0], + BinaryPackingFunctions.FS_PACK_DOUBLE[1], + BinaryPackingFunctions.FS_PACK_FLOAT[0], + BinaryPackingFunctions.FS_PACK_FLOAT[1], + BinaryPackingFunctions.FS_PACK_INTEGER[0], + BinaryPackingFunctions.FS_PACK_INTEGER[1], + BinaryPackingFunctions.FS_UNPACK_DOUBLE[0], + BinaryPackingFunctions.FS_UNPACK_DOUBLE[1], + BinaryPackingFunctions.FS_UNPACK_FLOAT[0], + BinaryPackingFunctions.FS_UNPACK_FLOAT[1], + BinaryPackingFunctions.FS_UNPACK_INTEGER[0], + BinaryPackingFunctions.FS_UNPACK_INTEGER[1], + BinaryPackingFunctions.FS_UNPACK_UNSIGNED_INTEGER[0], + BinaryPackingFunctions.FS_UNPACK_UNSIGNED_INTEGER[1]), + + functionDefs(BinaryBitwiseFunctions.class, + BinaryBitwiseFunctions.FS_OR, + BinaryBitwiseFunctions.FS_XOR, + BinaryBitwiseFunctions.FS_AND, + BinaryBitwiseFunctions.FS_NOT, + BinaryBitwiseFunctions.FS_SHIFT) + ); + + public BinaryModule(final Map> parameters) { + super(functions, parameters); + } + + @Override + public String getNamespaceURI() { + return NAMESPACE_URI; + } + + @Override + public String getDefaultPrefix() { + return PREFIX; + } + + @Override + public String getDescription() { + return "EXPath Binary Module 4.0 https://qt4cg.org/specifications/expath-binary-40/Overview.html"; + } + + @Override + public String getReleaseVersion() { + return RELEASED_IN_VERSION; + } +} diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModuleErrorCode.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModuleErrorCode.java new file mode 100644 index 00000000000..cd2b0ad8daa --- /dev/null +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModuleErrorCode.java @@ -0,0 +1,68 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.modules.binary; + +import org.exist.dom.QName; +import org.exist.xquery.ErrorCodes.ErrorCode; + +/** + * Error codes for the EXPath Binary Module 4.0. + * + * @see EXPath Binary Module 4.0 - Errors + */ +public class BinaryModuleErrorCode { + + public static final ErrorCode NON_NUMERIC_CHARACTER = new ErrorCode( + new QName("non-numeric-character", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX), + "The argument to bin:hex(), bin:bin(), or bin:octal() contains a character that is not valid for the specified notation."); + + public static final ErrorCode INDEX_OUT_OF_RANGE = new ErrorCode( + new QName("index-out-of-range", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX), + "Offset and/or size is out of range for the given binary data."); + + public static final ErrorCode NEGATIVE_SIZE = new ErrorCode( + new QName("negative-size", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX), + "Size, count, or padding is negative."); + + public static final ErrorCode UNKNOWN_ENCODING = new ErrorCode( + new QName("unknown-encoding", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX), + "The specified encoding is not supported."); + + public static final ErrorCode CONVERSION_ERROR = new ErrorCode( + new QName("conversion-error", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX), + "An error occurred during encoding or decoding of a string."); + + public static final ErrorCode DIFFERING_LENGTH_ARGUMENTS = new ErrorCode( + new QName("differing-length-arguments", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX), + "The arguments to a bitwise operation are of differing length."); + + public static final ErrorCode INVALID_ENCODING = new ErrorCode( + new QName("invalid-encoding", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX), + "The encoding is invalid for the given data."); + + public static final ErrorCode INTEGER_TOO_LARGE = new ErrorCode( + new QName("integer-too-large", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX), + "Integer value exceeds the implementation-defined maximum."); + + private BinaryModuleErrorCode() { + } +} diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModuleHelper.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModuleHelper.java new file mode 100644 index 00000000000..0b365bcba25 --- /dev/null +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModuleHelper.java @@ -0,0 +1,119 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.modules.binary; + +import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream; +import org.exist.xquery.Expression; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.Base64BinaryValueType; +import org.exist.xquery.value.BinaryValue; +import org.exist.xquery.value.BinaryValueFromInputStream; +import org.exist.xquery.value.Sequence; + +import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream; + +import javax.annotation.Nullable; +import java.io.IOException; + +/** + * Shared utility methods for the EXPath Binary Module functions. + */ +class BinaryModuleHelper { + + /** + * Extracts a byte array from a binary sequence argument. + * + * @param arg the sequence argument (expected to contain a single binary value) + * @return the byte array, or null if the argument is an empty sequence + * @throws XPathException if the binary data cannot be read + */ + @Nullable + static byte[] getBinaryData(final Sequence arg) throws XPathException { + if (arg.isEmpty()) { + return null; + } + final BinaryValue binary = (BinaryValue) arg.itemAt(0); + try (final UnsynchronizedByteArrayOutputStream os = new UnsynchronizedByteArrayOutputStream()) { + binary.streamBinaryTo(os); + return os.toByteArray(); + } catch (final IOException e) { + throw new XPathException((Expression) null, "Failed to read binary data: " + e.getMessage(), e); + } + } + + /** + * Creates an xs:base64Binary value from a byte array. + * + * @param context the XQuery context + * @param expr the calling expression (for error reporting) + * @param data the byte array + * @return the base64Binary value + * @throws XPathException if the value cannot be created + */ + static BinaryValue createBinaryResult(final XQueryContext context, final Expression expr, final byte[] data) throws XPathException { + return BinaryValueFromInputStream.getInstance( + context, + new Base64BinaryValueType(), + new UnsynchronizedByteArrayInputStream(data), + expr + ); + } + + /** + * Validates the octet-order parameter string. + * + * @param order the order string + * @return true if little-endian, false if big-endian + * @throws XPathException if the value is not a valid octet order + */ + static boolean isLittleEndian(final Expression expr, final String order) throws XPathException { + switch (order) { + case "most-significant-first": + case "big-endian": + case "BE": + return false; + case "least-significant-first": + case "little-endian": + case "LE": + return true; + default: + throw new XPathException(expr, + org.exist.xquery.ErrorCodes.XPTY0004, + "Invalid octet order: '" + order + "'. Expected one of: most-significant-first, big-endian, BE, least-significant-first, little-endian, LE"); + } + } + + /** + * Reverses a byte array in place. + */ + static void reverseBytes(final byte[] data) { + for (int i = 0, j = data.length - 1; i < j; i++, j--) { + final byte tmp = data[i]; + data[i] = data[j]; + data[j] = tmp; + } + } + + private BinaryModuleHelper() { + } +} diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryPackingFunctions.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryPackingFunctions.java new file mode 100644 index 00000000000..f29577e28e1 --- /dev/null +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryPackingFunctions.java @@ -0,0 +1,339 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.modules.binary; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.DoubleValue; +import org.exist.xquery.value.FloatValue; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.Type; + +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.util.Arrays; + +import static org.exist.xquery.FunctionDSL.*; + +/** + * EXPath Binary Module 4.0 — Numeric Packing and Unpacking (Section 7). + * + *
    + *
  • bin:pack-double
  • + *
  • bin:pack-float
  • + *
  • bin:pack-integer
  • + *
  • bin:unpack-double
  • + *
  • bin:unpack-float
  • + *
  • bin:unpack-integer
  • + *
  • bin:unpack-unsigned-integer
  • + *
+ * + * @see EXPath Binary Module 4.0 §7 + */ +public class BinaryPackingFunctions extends BasicFunction { + + private static final QName QN_PACK_DOUBLE = new QName("pack-double", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_PACK_FLOAT = new QName("pack-float", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_PACK_INTEGER = new QName("pack-integer", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_UNPACK_DOUBLE = new QName("unpack-double", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_UNPACK_FLOAT = new QName("unpack-float", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_UNPACK_INTEGER = new QName("unpack-integer", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_UNPACK_UNSIGNED_INTEGER = new QName("unpack-unsigned-integer", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + + static final FunctionSignature[] FS_PACK_DOUBLE = functionSignatures( + QN_PACK_DOUBLE, + "Returns the 8-octet binary representation of an xs:double value.", + returns(Type.BASE64_BINARY), + arities( + arity(param("value", Type.DOUBLE, "The double value to pack")), + arity(param("value", Type.DOUBLE, "The double value to pack"), + param("order", Type.STRING, "The octet order: 'most-significant-first' (default), 'big-endian', 'BE', 'least-significant-first', 'little-endian', 'LE'")) + ) + ); + + static final FunctionSignature[] FS_PACK_FLOAT = functionSignatures( + QN_PACK_FLOAT, + "Returns the 4-octet binary representation of an xs:float value.", + returns(Type.BASE64_BINARY), + arities( + arity(param("value", Type.FLOAT, "The float value to pack")), + arity(param("value", Type.FLOAT, "The float value to pack"), + param("order", Type.STRING, "The octet order")) + ) + ); + + static final FunctionSignature[] FS_PACK_INTEGER = functionSignatures( + QN_PACK_INTEGER, + "Returns the two's-complement binary representation of an xs:integer value.", + returns(Type.BASE64_BINARY), + arities( + arity(param("value", Type.INTEGER, "The integer value to pack"), + param("size", Type.INTEGER, "The number of octets in the result")), + arity(param("value", Type.INTEGER, "The integer value to pack"), + param("size", Type.INTEGER, "The number of octets in the result"), + param("order", Type.STRING, "The octet order")) + ) + ); + + static final FunctionSignature[] FS_UNPACK_DOUBLE = functionSignatures( + QN_UNPACK_DOUBLE, + "Extracts an xs:double value from binary data.", + returns(Type.DOUBLE), + arities( + arity(param("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based byte offset")), + arity(param("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based byte offset"), + param("order", Type.STRING, "The octet order")) + ) + ); + + static final FunctionSignature[] FS_UNPACK_FLOAT = functionSignatures( + QN_UNPACK_FLOAT, + "Extracts an xs:float value from binary data.", + returns(Type.FLOAT), + arities( + arity(param("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based byte offset")), + arity(param("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based byte offset"), + param("order", Type.STRING, "The octet order")) + ) + ); + + static final FunctionSignature[] FS_UNPACK_INTEGER = functionSignatures( + QN_UNPACK_INTEGER, + "Extracts a signed xs:integer value from binary data.", + returns(Type.INTEGER), + arities( + arity(param("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based byte offset"), + param("size", Type.INTEGER, "The number of octets to read")), + arity(param("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based byte offset"), + param("size", Type.INTEGER, "The number of octets to read"), + param("order", Type.STRING, "The octet order")) + ) + ); + + static final FunctionSignature[] FS_UNPACK_UNSIGNED_INTEGER = functionSignatures( + QN_UNPACK_UNSIGNED_INTEGER, + "Extracts an unsigned xs:integer value from binary data.", + returns(Type.INTEGER), + arities( + arity(param("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based byte offset"), + param("size", Type.INTEGER, "The number of octets to read")), + arity(param("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based byte offset"), + param("size", Type.INTEGER, "The number of octets to read"), + param("order", Type.STRING, "The octet order")) + ) + ); + + public BinaryPackingFunctions(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (isCalledAs("pack-double")) { + return packDouble(args); + } else if (isCalledAs("pack-float")) { + return packFloat(args); + } else if (isCalledAs("pack-integer")) { + return packInteger(args); + } else if (isCalledAs("unpack-double")) { + return unpackDouble(args); + } else if (isCalledAs("unpack-float")) { + return unpackFloat(args); + } else if (isCalledAs("unpack-integer")) { + return unpackInteger(args); + } else { + return unpackUnsignedInteger(args); + } + } + + private boolean getByteOrder(final Sequence[] args, final int orderArgIndex) throws XPathException { + if (args.length > orderArgIndex && !args[orderArgIndex].isEmpty()) { + return BinaryModuleHelper.isLittleEndian(this, args[orderArgIndex].getStringValue()); + } + return false; // big-endian by default + } + + private Sequence packDouble(final Sequence[] args) throws XPathException { + final double value = ((DoubleValue) args[0].itemAt(0)).getDouble(); + final boolean le = getByteOrder(args, 1); + + final byte[] data = new byte[8]; + ByteBuffer.wrap(data).putLong(Double.doubleToRawLongBits(value)); + if (le) { + BinaryModuleHelper.reverseBytes(data); + } + return BinaryModuleHelper.createBinaryResult(context, this, data); + } + + private Sequence packFloat(final Sequence[] args) throws XPathException { + final float value = ((FloatValue) args[0].itemAt(0)).getValue(); + final boolean le = getByteOrder(args, 1); + + final byte[] data = new byte[4]; + ByteBuffer.wrap(data).putInt(Float.floatToRawIntBits(value)); + if (le) { + BinaryModuleHelper.reverseBytes(data); + } + return BinaryModuleHelper.createBinaryResult(context, this, data); + } + + private Sequence packInteger(final Sequence[] args) throws XPathException { + final BigInteger value = ((IntegerValue) args[0].itemAt(0)).toJavaObject(BigInteger.class); + final int size = ((IntegerValue) args[1].itemAt(0)).getInt(); + final boolean le = getByteOrder(args, 2); + + if (size < 0) { + throw new XPathException(this, BinaryModuleErrorCode.NEGATIVE_SIZE, + "Size must not be negative: " + size); + } + + if (size == 0) { + return BinaryModuleHelper.createBinaryResult(context, this, new byte[0]); + } + + final byte[] twosComplement = value.toByteArray(); + final byte[] data = new byte[size]; + + // Fill with sign extension byte (0x00 for positive, 0xFF for negative) + if (value.signum() < 0) { + Arrays.fill(data, (byte) 0xFF); + } + + // Copy the significant bytes into the result, right-aligned (big-endian) + if (twosComplement.length <= size) { + System.arraycopy(twosComplement, 0, data, size - twosComplement.length, twosComplement.length); + } else { + // Truncate from the left (most significant bytes) + System.arraycopy(twosComplement, twosComplement.length - size, data, 0, size); + } + + if (le) { + BinaryModuleHelper.reverseBytes(data); + } + return BinaryModuleHelper.createBinaryResult(context, this, data); + } + + private Sequence unpackDouble(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + final int offset = ((IntegerValue) args[1].itemAt(0)).getInt(); + final boolean le = getByteOrder(args, 2); + + validateUnpackRange(data, offset, 8); + + final byte[] slice = Arrays.copyOfRange(data, offset, offset + 8); + if (le) { + BinaryModuleHelper.reverseBytes(slice); + } + final long bits = ByteBuffer.wrap(slice).getLong(); + return new DoubleValue(this, Double.longBitsToDouble(bits)); + } + + private Sequence unpackFloat(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + final int offset = ((IntegerValue) args[1].itemAt(0)).getInt(); + final boolean le = getByteOrder(args, 2); + + validateUnpackRange(data, offset, 4); + + final byte[] slice = Arrays.copyOfRange(data, offset, offset + 4); + if (le) { + BinaryModuleHelper.reverseBytes(slice); + } + final int bits = ByteBuffer.wrap(slice).getInt(); + return new FloatValue(this, Float.intBitsToFloat(bits)); + } + + private Sequence unpackInteger(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + final int offset = ((IntegerValue) args[1].itemAt(0)).getInt(); + final int size = ((IntegerValue) args[2].itemAt(0)).getInt(); + final boolean le = getByteOrder(args, 3); + + if (size < 0) { + throw new XPathException(this, BinaryModuleErrorCode.NEGATIVE_SIZE, + "Size must not be negative: " + size); + } + + validateUnpackRange(data, offset, size); + + if (size == 0) { + return new IntegerValue(this, 0); + } + + final byte[] slice = Arrays.copyOfRange(data, offset, offset + size); + if (le) { + BinaryModuleHelper.reverseBytes(slice); + } + // BigInteger(byte[]) interprets as signed two's-complement + final BigInteger result = new BigInteger(slice); + return new IntegerValue(this, result); + } + + private Sequence unpackUnsignedInteger(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + final int offset = ((IntegerValue) args[1].itemAt(0)).getInt(); + final int size = ((IntegerValue) args[2].itemAt(0)).getInt(); + final boolean le = getByteOrder(args, 3); + + if (size < 0) { + throw new XPathException(this, BinaryModuleErrorCode.NEGATIVE_SIZE, + "Size must not be negative: " + size); + } + + validateUnpackRange(data, offset, size); + + if (size == 0) { + return new IntegerValue(this, 0); + } + + final byte[] slice = Arrays.copyOfRange(data, offset, offset + size); + if (le) { + BinaryModuleHelper.reverseBytes(slice); + } + // BigInteger(1, byte[]) interprets as unsigned (positive signum) + final BigInteger result = new BigInteger(1, slice); + return new IntegerValue(this, result); + } + + private void validateUnpackRange(final byte[] data, final int offset, final int size) throws XPathException { + if (data == null) { + throw new XPathException(this, BinaryModuleErrorCode.INDEX_OUT_OF_RANGE, + "Binary data is empty"); + } + if (offset < 0 || offset + size > data.length) { + throw new XPathException(this, BinaryModuleErrorCode.INDEX_OUT_OF_RANGE, + "Offset " + offset + " + size " + size + " exceeds binary data length " + data.length); + } + } +} diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryTextFunctions.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryTextFunctions.java new file mode 100644 index 00000000000..aa759c93485 --- /dev/null +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryTextFunctions.java @@ -0,0 +1,194 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.modules.binary; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.UnsupportedCharsetException; +import java.util.Arrays; + +import static org.exist.xquery.FunctionDSL.*; + +/** + * EXPath Binary Module 4.0 — Text Encoding and Decoding (Section 6). + * + *
    + *
  • bin:decode-string
  • + *
  • bin:encode-string
  • + *
+ * + * @see EXPath Binary Module 4.0 §6 + */ +public class BinaryTextFunctions extends BasicFunction { + + private static final QName QN_DECODE_STRING = new QName("decode-string", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_ENCODE_STRING = new QName("encode-string", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + + static final FunctionSignature[] FS_DECODE_STRING = functionSignatures( + QN_DECODE_STRING, + "Decodes binary data to an xs:string using the specified encoding.", + returnsOpt(Type.STRING), + arities( + arity( + optParam("value", Type.BASE64_BINARY, "The binary data to decode") + ), + arity( + optParam("value", Type.BASE64_BINARY, "The binary data to decode"), + param("encoding", Type.STRING, "The character encoding (default: UTF-8)") + ), + arity( + optParam("value", Type.BASE64_BINARY, "The binary data to decode"), + param("encoding", Type.STRING, "The character encoding (default: UTF-8)"), + param("offset", Type.INTEGER, "The zero-based byte offset to start decoding") + ), + arity( + optParam("value", Type.BASE64_BINARY, "The binary data to decode"), + param("encoding", Type.STRING, "The character encoding (default: UTF-8)"), + param("offset", Type.INTEGER, "The zero-based byte offset to start decoding"), + param("size", Type.INTEGER, "The number of bytes to decode") + ) + ) + ); + + static final FunctionSignature[] FS_ENCODE_STRING = functionSignatures( + QN_ENCODE_STRING, + "Encodes an xs:string to binary data using the specified encoding.", + returnsOpt(Type.BASE64_BINARY), + arities( + arity( + optParam("value", Type.STRING, "The string to encode") + ), + arity( + optParam("value", Type.STRING, "The string to encode"), + param("encoding", Type.STRING, "The character encoding (default: UTF-8)") + ) + ) + ); + + public BinaryTextFunctions(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (isCalledAs("decode-string")) { + return decodeString(args); + } else { + return encodeString(args); + } + } + + private Sequence decodeString(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null) { + return Sequence.EMPTY_SEQUENCE; + } + + final String encoding = (args.length > 1 && !args[1].isEmpty()) + ? args[1].getStringValue() + : "UTF-8"; + + final int offset = (args.length > 2 && !args[2].isEmpty()) + ? ((IntegerValue) args[2].itemAt(0)).getInt() + : 0; + + final int size = (args.length > 3 && !args[3].isEmpty()) + ? ((IntegerValue) args[3].itemAt(0)).getInt() + : data.length - offset; + + if (offset < 0 || offset > data.length) { + throw new XPathException(this, BinaryModuleErrorCode.INDEX_OUT_OF_RANGE, + "Offset " + offset + " is out of range for binary data of length " + data.length); + } + + if (size < 0) { + throw new XPathException(this, BinaryModuleErrorCode.NEGATIVE_SIZE, + "Size must not be negative: " + size); + } + + if (offset + size > data.length) { + throw new XPathException(this, BinaryModuleErrorCode.INDEX_OUT_OF_RANGE, + "Offset " + offset + " + size " + size + " exceeds binary data length " + data.length); + } + + final Charset charset = resolveCharset(encoding); + + try { + final CharsetDecoder decoder = charset.newDecoder() + .onMalformedInput(CodingErrorAction.REPORT) + .onUnmappableCharacter(CodingErrorAction.REPORT); + final CharBuffer result = decoder.decode(ByteBuffer.wrap(data, offset, size)); + return new StringValue(this, result.toString()); + } catch (final CharacterCodingException e) { + throw new XPathException(this, BinaryModuleErrorCode.CONVERSION_ERROR, + "Failed to decode binary data using encoding '" + encoding + "': " + e.getMessage()); + } + } + + private Sequence encodeString(final Sequence[] args) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final String value = args[0].getStringValue(); + final String encoding = (args.length > 1 && !args[1].isEmpty()) + ? args[1].getStringValue() + : "UTF-8"; + + final Charset charset = resolveCharset(encoding); + + try { + final ByteBuffer encoded = charset.newEncoder() + .onMalformedInput(CodingErrorAction.REPORT) + .onUnmappableCharacter(CodingErrorAction.REPORT) + .encode(CharBuffer.wrap(value)); + final byte[] data = Arrays.copyOf(encoded.array(), encoded.limit()); + return BinaryModuleHelper.createBinaryResult(context, this, data); + } catch (final CharacterCodingException e) { + throw new XPathException(this, BinaryModuleErrorCode.CONVERSION_ERROR, + "Failed to encode string using encoding '" + encoding + "': " + e.getMessage()); + } + } + + private Charset resolveCharset(final String encoding) throws XPathException { + try { + return Charset.forName(encoding); + } catch (final UnsupportedCharsetException e) { + throw new XPathException(this, BinaryModuleErrorCode.UNKNOWN_ENCODING, + "Unknown encoding: '" + encoding + "'"); + } + } +} diff --git a/extensions/modules/pom.xml b/extensions/modules/pom.xml index 0f8bf723555..8f48305a083 100644 --- a/extensions/modules/pom.xml +++ b/extensions/modules/pom.xml @@ -52,6 +52,7 @@ cqlparser example exi + expath-binary expathrepo expathrepo/expathrepo-trigger-test file From f4ea0912c2942b3f87856025d0ed949088be3bfe Mon Sep 17 00:00:00 2001 From: Joe Wicentowski Date: Fri, 24 Apr 2026 18:47:13 -0400 Subject: [PATCH 2/4] [bugfix] Register EXPath Binary module in all test conf.xml files The module was only registered in the distribution conf.xml but not in any test conf.xml files. The XQTS runner uses the test conf.xml, so all EXPath Binary tests fail with "unknown function" errors without this. Co-Authored-By: Claude Opus 4.6 (1M context) --- exist-ant/src/test/resources-filtered/conf.xml | 1 + exist-core/src/test/resources-filtered/conf.xml | 1 + .../resources-filtered/org/exist/storage/statistics/conf.xml | 1 + exist-core/src/test/resources-filtered/org/exist/xquery/conf.xml | 1 + .../org/exist/xquery/functions/transform/conf.xml | 1 + .../contentextraction/src/test/resources-filtered/conf.xml | 1 + extensions/debuggee/src/test/resources-filtered/conf.xml | 1 + extensions/expath/src/test/resources-filtered/conf.xml | 1 + extensions/exquery/restxq/src/test/resources-filtered/conf.xml | 1 + .../src/test/resources-filtered/conf.xml | 1 + extensions/indexes/lucene/src/test/resources-filtered/conf.xml | 1 + extensions/indexes/ngram/src/test/resources-filtered/conf.xml | 1 + extensions/indexes/range/src/test/resources-filtered/conf.xml | 1 + extensions/indexes/sort/src/test/resources-filtered/conf.xml | 1 + extensions/indexes/spatial/src/test/resources-filtered/conf.xml | 1 + .../indexes/vector-it/src/test/resources-filtered/conf.xml | 1 + extensions/modules/cache/src/test/resources-filtered/conf.xml | 1 + .../modules/compression/src/test/resources-filtered/conf.xml | 1 + extensions/modules/counter/src/test/resources-filtered/conf.xml | 1 + .../expathrepo-trigger-test/src/test/resources/conf.xml | 1 + .../modules/expathrepo/src/test/resources-filtered/conf.xml | 1 + extensions/modules/file/src/test/resources-filtered/conf.xml | 1 + extensions/modules/image/src/test/resources-filtered/conf.xml | 1 + extensions/modules/mail/src/test/resources-filtered/conf.xml | 1 + .../modules/persistentlogin/src/test/resources-filtered/conf.xml | 1 + extensions/modules/sql/src/test/resources-filtered/conf.xml | 1 + extensions/modules/xmldiff/src/test/resources-filtered/conf.xml | 1 + extensions/modules/xslfo/src/test/resources-filtered/conf.xml | 1 + extensions/webdav/src/test/resources-filtered/conf.xml | 1 + extensions/xqdoc/src/test/resources-filtered/conf.xml | 1 + 30 files changed, 30 insertions(+) diff --git a/exist-ant/src/test/resources-filtered/conf.xml b/exist-ant/src/test/resources-filtered/conf.xml index 52cac5dde3f..d631056d7b3 100644 --- a/exist-ant/src/test/resources-filtered/conf.xml +++ b/exist-ant/src/test/resources-filtered/conf.xml @@ -753,6 +753,7 @@
+ diff --git a/exist-core/src/test/resources-filtered/conf.xml b/exist-core/src/test/resources-filtered/conf.xml index 9a76f8c79a5..d52a2b21e75 100644 --- a/exist-core/src/test/resources-filtered/conf.xml +++ b/exist-core/src/test/resources-filtered/conf.xml @@ -910,6 +910,7 @@ + diff --git a/exist-core/src/test/resources-filtered/org/exist/storage/statistics/conf.xml b/exist-core/src/test/resources-filtered/org/exist/storage/statistics/conf.xml index 15d68dea5fb..e7beac537dc 100644 --- a/exist-core/src/test/resources-filtered/org/exist/storage/statistics/conf.xml +++ b/exist-core/src/test/resources-filtered/org/exist/storage/statistics/conf.xml @@ -901,6 +901,7 @@ + diff --git a/exist-core/src/test/resources-filtered/org/exist/xquery/conf.xml b/exist-core/src/test/resources-filtered/org/exist/xquery/conf.xml index b9bc14f5b53..b71958d4e6e 100644 --- a/exist-core/src/test/resources-filtered/org/exist/xquery/conf.xml +++ b/exist-core/src/test/resources-filtered/org/exist/xquery/conf.xml @@ -920,6 +920,7 @@ + diff --git a/exist-core/src/test/resources-filtered/org/exist/xquery/functions/transform/conf.xml b/exist-core/src/test/resources-filtered/org/exist/xquery/functions/transform/conf.xml index 7f2354f9f40..9404c4001d8 100644 --- a/exist-core/src/test/resources-filtered/org/exist/xquery/functions/transform/conf.xml +++ b/exist-core/src/test/resources-filtered/org/exist/xquery/functions/transform/conf.xml @@ -912,6 +912,7 @@ + diff --git a/extensions/contentextraction/src/test/resources-filtered/conf.xml b/extensions/contentextraction/src/test/resources-filtered/conf.xml index 1311e06f555..2f3ca467f56 100644 --- a/extensions/contentextraction/src/test/resources-filtered/conf.xml +++ b/extensions/contentextraction/src/test/resources-filtered/conf.xml @@ -757,6 +757,7 @@ + diff --git a/extensions/debuggee/src/test/resources-filtered/conf.xml b/extensions/debuggee/src/test/resources-filtered/conf.xml index 5dc0efc380a..18570098dc1 100644 --- a/extensions/debuggee/src/test/resources-filtered/conf.xml +++ b/extensions/debuggee/src/test/resources-filtered/conf.xml @@ -743,6 +743,7 @@ + diff --git a/extensions/expath/src/test/resources-filtered/conf.xml b/extensions/expath/src/test/resources-filtered/conf.xml index a0e02a2c06d..9a5ca96f5ce 100644 --- a/extensions/expath/src/test/resources-filtered/conf.xml +++ b/extensions/expath/src/test/resources-filtered/conf.xml @@ -757,6 +757,7 @@ + diff --git a/extensions/exquery/restxq/src/test/resources-filtered/conf.xml b/extensions/exquery/restxq/src/test/resources-filtered/conf.xml index 697afdbf11b..e2304f08ce0 100644 --- a/extensions/exquery/restxq/src/test/resources-filtered/conf.xml +++ b/extensions/exquery/restxq/src/test/resources-filtered/conf.xml @@ -738,6 +738,7 @@ + diff --git a/extensions/indexes/indexes-integration-tests/src/test/resources-filtered/conf.xml b/extensions/indexes/indexes-integration-tests/src/test/resources-filtered/conf.xml index 2aae0f7d207..72dac23139c 100644 --- a/extensions/indexes/indexes-integration-tests/src/test/resources-filtered/conf.xml +++ b/extensions/indexes/indexes-integration-tests/src/test/resources-filtered/conf.xml @@ -906,6 +906,7 @@ + diff --git a/extensions/indexes/lucene/src/test/resources-filtered/conf.xml b/extensions/indexes/lucene/src/test/resources-filtered/conf.xml index 4eaa2642bde..375fd1dc11a 100644 --- a/extensions/indexes/lucene/src/test/resources-filtered/conf.xml +++ b/extensions/indexes/lucene/src/test/resources-filtered/conf.xml @@ -905,6 +905,7 @@ + diff --git a/extensions/indexes/ngram/src/test/resources-filtered/conf.xml b/extensions/indexes/ngram/src/test/resources-filtered/conf.xml index 7b290c22429..0d3c8899b02 100644 --- a/extensions/indexes/ngram/src/test/resources-filtered/conf.xml +++ b/extensions/indexes/ngram/src/test/resources-filtered/conf.xml @@ -903,6 +903,7 @@ + diff --git a/extensions/indexes/range/src/test/resources-filtered/conf.xml b/extensions/indexes/range/src/test/resources-filtered/conf.xml index a22d440f625..b6c17cf73f0 100644 --- a/extensions/indexes/range/src/test/resources-filtered/conf.xml +++ b/extensions/indexes/range/src/test/resources-filtered/conf.xml @@ -908,6 +908,7 @@ + diff --git a/extensions/indexes/sort/src/test/resources-filtered/conf.xml b/extensions/indexes/sort/src/test/resources-filtered/conf.xml index e6d70cea684..9c58ec76fb7 100644 --- a/extensions/indexes/sort/src/test/resources-filtered/conf.xml +++ b/extensions/indexes/sort/src/test/resources-filtered/conf.xml @@ -903,6 +903,7 @@ + diff --git a/extensions/indexes/spatial/src/test/resources-filtered/conf.xml b/extensions/indexes/spatial/src/test/resources-filtered/conf.xml index b3ea3200f72..3bcb1927a96 100644 --- a/extensions/indexes/spatial/src/test/resources-filtered/conf.xml +++ b/extensions/indexes/spatial/src/test/resources-filtered/conf.xml @@ -889,6 +889,7 @@ + diff --git a/extensions/indexes/vector-it/src/test/resources-filtered/conf.xml b/extensions/indexes/vector-it/src/test/resources-filtered/conf.xml index 2c516c836c9..1939512eb44 100644 --- a/extensions/indexes/vector-it/src/test/resources-filtered/conf.xml +++ b/extensions/indexes/vector-it/src/test/resources-filtered/conf.xml @@ -74,6 +74,7 @@ + diff --git a/extensions/modules/cache/src/test/resources-filtered/conf.xml b/extensions/modules/cache/src/test/resources-filtered/conf.xml index af9663be608..44458834dda 100644 --- a/extensions/modules/cache/src/test/resources-filtered/conf.xml +++ b/extensions/modules/cache/src/test/resources-filtered/conf.xml @@ -760,6 +760,7 @@ + diff --git a/extensions/modules/compression/src/test/resources-filtered/conf.xml b/extensions/modules/compression/src/test/resources-filtered/conf.xml index 0bdebfee2d6..2032502e355 100644 --- a/extensions/modules/compression/src/test/resources-filtered/conf.xml +++ b/extensions/modules/compression/src/test/resources-filtered/conf.xml @@ -757,6 +757,7 @@ + diff --git a/extensions/modules/counter/src/test/resources-filtered/conf.xml b/extensions/modules/counter/src/test/resources-filtered/conf.xml index 1a31ae00a0e..df5ba61ccb5 100644 --- a/extensions/modules/counter/src/test/resources-filtered/conf.xml +++ b/extensions/modules/counter/src/test/resources-filtered/conf.xml @@ -746,6 +746,7 @@ + diff --git a/extensions/modules/expathrepo/expathrepo-trigger-test/src/test/resources/conf.xml b/extensions/modules/expathrepo/expathrepo-trigger-test/src/test/resources/conf.xml index 399137a7230..ffb375210bd 100644 --- a/extensions/modules/expathrepo/expathrepo-trigger-test/src/test/resources/conf.xml +++ b/extensions/modules/expathrepo/expathrepo-trigger-test/src/test/resources/conf.xml @@ -750,6 +750,7 @@ + diff --git a/extensions/modules/expathrepo/src/test/resources-filtered/conf.xml b/extensions/modules/expathrepo/src/test/resources-filtered/conf.xml index 0203297b9dd..8fc7015bcc6 100644 --- a/extensions/modules/expathrepo/src/test/resources-filtered/conf.xml +++ b/extensions/modules/expathrepo/src/test/resources-filtered/conf.xml @@ -760,6 +760,7 @@ + diff --git a/extensions/modules/file/src/test/resources-filtered/conf.xml b/extensions/modules/file/src/test/resources-filtered/conf.xml index 11c020c728e..32dcc9017ff 100644 --- a/extensions/modules/file/src/test/resources-filtered/conf.xml +++ b/extensions/modules/file/src/test/resources-filtered/conf.xml @@ -757,6 +757,7 @@ + diff --git a/extensions/modules/image/src/test/resources-filtered/conf.xml b/extensions/modules/image/src/test/resources-filtered/conf.xml index 9df613700e8..1131923b2a1 100644 --- a/extensions/modules/image/src/test/resources-filtered/conf.xml +++ b/extensions/modules/image/src/test/resources-filtered/conf.xml @@ -760,6 +760,7 @@ + diff --git a/extensions/modules/mail/src/test/resources-filtered/conf.xml b/extensions/modules/mail/src/test/resources-filtered/conf.xml index cfebd73a39d..8c8a943dae1 100644 --- a/extensions/modules/mail/src/test/resources-filtered/conf.xml +++ b/extensions/modules/mail/src/test/resources-filtered/conf.xml @@ -749,6 +749,7 @@ + diff --git a/extensions/modules/persistentlogin/src/test/resources-filtered/conf.xml b/extensions/modules/persistentlogin/src/test/resources-filtered/conf.xml index 6850c1477fe..67d1ff8c774 100644 --- a/extensions/modules/persistentlogin/src/test/resources-filtered/conf.xml +++ b/extensions/modules/persistentlogin/src/test/resources-filtered/conf.xml @@ -753,6 +753,7 @@ + diff --git a/extensions/modules/sql/src/test/resources-filtered/conf.xml b/extensions/modules/sql/src/test/resources-filtered/conf.xml index 09ba6545e1e..856d8eb692b 100644 --- a/extensions/modules/sql/src/test/resources-filtered/conf.xml +++ b/extensions/modules/sql/src/test/resources-filtered/conf.xml @@ -753,6 +753,7 @@ + diff --git a/extensions/modules/xmldiff/src/test/resources-filtered/conf.xml b/extensions/modules/xmldiff/src/test/resources-filtered/conf.xml index a1a95c324d6..4b8ef88cae7 100644 --- a/extensions/modules/xmldiff/src/test/resources-filtered/conf.xml +++ b/extensions/modules/xmldiff/src/test/resources-filtered/conf.xml @@ -757,6 +757,7 @@ + diff --git a/extensions/modules/xslfo/src/test/resources-filtered/conf.xml b/extensions/modules/xslfo/src/test/resources-filtered/conf.xml index 3e14e631740..d3ee6da9421 100644 --- a/extensions/modules/xslfo/src/test/resources-filtered/conf.xml +++ b/extensions/modules/xslfo/src/test/resources-filtered/conf.xml @@ -759,6 +759,7 @@ + diff --git a/extensions/webdav/src/test/resources-filtered/conf.xml b/extensions/webdav/src/test/resources-filtered/conf.xml index 5dc0efc380a..18570098dc1 100644 --- a/extensions/webdav/src/test/resources-filtered/conf.xml +++ b/extensions/webdav/src/test/resources-filtered/conf.xml @@ -743,6 +743,7 @@ + diff --git a/extensions/xqdoc/src/test/resources-filtered/conf.xml b/extensions/xqdoc/src/test/resources-filtered/conf.xml index 7c96ef98809..0fe40cf5e0c 100644 --- a/extensions/xqdoc/src/test/resources-filtered/conf.xml +++ b/extensions/xqdoc/src/test/resources-filtered/conf.xml @@ -759,6 +759,7 @@ + From b47f0510d3e08064921879c52c8e5de6be0bec55 Mon Sep 17 00:00:00 2001 From: Joe Wicentowski Date: Sat, 25 Apr 2026 17:43:00 -0400 Subject: [PATCH 3/4] [refactor] Address review feedback from @reinhapa - Remove labeled continue pattern in BinaryBasicFunctions (Codacy branching warning) - Convert switch statements to switch expressions in BinaryBitwiseFunctions and BinaryModuleHelper - Reduce NPath complexity in BinaryConversionFunctions.octalToBinary by extracting helpers - Reduce NPath complexity in BinaryTextFunctions.decodeString by extracting helpers Co-Authored-By: Claude Opus 4.6 (1M context) --- .../modules/binary/BinaryBasicFunctions.java | 9 ++++-- .../binary/BinaryBitwiseFunctions.java | 16 +++------- .../binary/BinaryConversionFunctions.java | 24 +++++++++----- .../modules/binary/BinaryModuleHelper.java | 21 ++++-------- .../modules/binary/BinaryTextFunctions.java | 32 +++++++++++-------- 5 files changed, 53 insertions(+), 49 deletions(-) diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBasicFunctions.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBasicFunctions.java index f78789e2f06..203f8cca59c 100644 --- a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBasicFunctions.java +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBasicFunctions.java @@ -311,14 +311,17 @@ private Sequence find(final Sequence[] args) throws XPathException { } // Naive byte subsequence search - outer: for (int i = offset; i <= data.length - search.length; i++) { + boolean match = true; for (int j = 0; j < search.length; j++) { if (data[i + j] != search[j]) { - continue outer; + match = false; + break; } } - return new IntegerValue(this, i); + if (match) { + return new IntegerValue(this, i); + } } return Sequence.EMPTY_SEQUENCE; diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBitwiseFunctions.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBitwiseFunctions.java index ab13dc209ec..c9354f94a4e 100644 --- a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBitwiseFunctions.java +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBitwiseFunctions.java @@ -132,17 +132,11 @@ private Sequence bitwiseOp(final Sequence[] args, final BitwiseOp op) throws XPa final byte[] result = new byte[data1.length]; for (int i = 0; i < data1.length; i++) { - switch (op) { - case OR: - result[i] = (byte) (data1[i] | data2[i]); - break; - case XOR: - result[i] = (byte) (data1[i] ^ data2[i]); - break; - case AND: - result[i] = (byte) (data1[i] & data2[i]); - break; - } + result[i] = switch (op) { + case OR -> (byte) (data1[i] | data2[i]); + case XOR -> (byte) (data1[i] ^ data2[i]); + case AND -> (byte) (data1[i] & data2[i]); + }; } return BinaryModuleHelper.createBinaryResult(context, this, result); } diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryConversionFunctions.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryConversionFunctions.java index 8ec6f9614fa..2ef6c934ec0 100644 --- a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryConversionFunctions.java +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryConversionFunctions.java @@ -181,14 +181,17 @@ private Sequence octalToBinary(final Sequence[] args) throws XPathException { return Sequence.EMPTY_SEQUENCE; } - String octal = args[0].getStringValue(); - octal = octal.replaceAll("[\\s_]", ""); - + final String octal = stripAndValidateOctal(args[0].getStringValue()); if (octal.isEmpty()) { return BinaryModuleHelper.createBinaryResult(context, this, new byte[0]); } - // Validate characters + final String binaryStr = octalToBinaryString(octal); + return BinaryModuleHelper.createBinaryResult(context, this, binaryStringToBytes(binaryStr)); + } + + private String stripAndValidateOctal(final String input) throws XPathException { + final String octal = input.replaceAll("[\\s_]", ""); for (int i = 0; i < octal.length(); i++) { final char c = octal.charAt(i); if (c < '0' || c > '7') { @@ -196,7 +199,10 @@ private Sequence octalToBinary(final Sequence[] args) throws XPathException { "Invalid octal character: '" + c + "'"); } } + return octal; + } + private static String octalToBinaryString(final String octal) { // Convert each octal digit to 3-bit binary final StringBuilder bits = new StringBuilder(); for (int i = 0; i < octal.length(); i++) { @@ -207,7 +213,7 @@ private Sequence octalToBinary(final Sequence[] args) throws XPathException { // Strip up to 2 leading zeros (octal digit = 3 bits, but only multiples of 8 matter) String binaryStr = bits.toString(); int stripCount = 0; - while (stripCount < 2 && binaryStr.length() > 0 && binaryStr.charAt(0) == '0' + while (stripCount < 2 && !binaryStr.isEmpty() && binaryStr.charAt(0) == '0' && (binaryStr.length() - 1) % 8 != 7) { binaryStr = binaryStr.substring(1); stripCount++; @@ -218,16 +224,18 @@ private Sequence octalToBinary(final Sequence[] args) throws XPathException { if (remainder != 0) { binaryStr = "0".repeat(8 - remainder) + binaryStr; } + return binaryStr; + } + private static byte[] binaryStringToBytes(final String binaryStr) { if (binaryStr.isEmpty()) { - return BinaryModuleHelper.createBinaryResult(context, this, new byte[0]); + return new byte[0]; } - final byte[] data = new byte[binaryStr.length() / 8]; for (int i = 0; i < data.length; i++) { data[i] = (byte) Integer.parseInt(binaryStr.substring(i * 8, i * 8 + 8), 2); } - return BinaryModuleHelper.createBinaryResult(context, this, data); + return data; } private Sequence toOctets(final Sequence[] args) throws XPathException { diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModuleHelper.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModuleHelper.java index 0b365bcba25..1d27b8e8e45 100644 --- a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModuleHelper.java +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModuleHelper.java @@ -87,20 +87,13 @@ static BinaryValue createBinaryResult(final XQueryContext context, final Express * @throws XPathException if the value is not a valid octet order */ static boolean isLittleEndian(final Expression expr, final String order) throws XPathException { - switch (order) { - case "most-significant-first": - case "big-endian": - case "BE": - return false; - case "least-significant-first": - case "little-endian": - case "LE": - return true; - default: - throw new XPathException(expr, - org.exist.xquery.ErrorCodes.XPTY0004, - "Invalid octet order: '" + order + "'. Expected one of: most-significant-first, big-endian, BE, least-significant-first, little-endian, LE"); - } + return switch (order) { + case "most-significant-first", "big-endian", "BE" -> false; + case "least-significant-first", "little-endian", "LE" -> true; + default -> throw new XPathException(expr, + org.exist.xquery.ErrorCodes.XPTY0004, + "Invalid octet order: '" + order + "'. Expected one of: most-significant-first, big-endian, BE, least-significant-first, little-endian, LE"); + }; } /** diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryTextFunctions.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryTextFunctions.java index aa759c93485..9d6d08d1260 100644 --- a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryTextFunctions.java +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryTextFunctions.java @@ -117,35 +117,33 @@ private Sequence decodeString(final Sequence[] args) throws XPathException { return Sequence.EMPTY_SEQUENCE; } - final String encoding = (args.length > 1 && !args[1].isEmpty()) - ? args[1].getStringValue() - : "UTF-8"; + final String encoding = getOptionalStringArg(args, 1, "UTF-8"); + final int offset = getOptionalIntArg(args, 2, 0); + final int size = getOptionalIntArg(args, 3, data.length - offset); - final int offset = (args.length > 2 && !args[2].isEmpty()) - ? ((IntegerValue) args[2].itemAt(0)).getInt() - : 0; + validateOffsetAndSize(data, offset, size); - final int size = (args.length > 3 && !args[3].isEmpty()) - ? ((IntegerValue) args[3].itemAt(0)).getInt() - : data.length - offset; + final Charset charset = resolveCharset(encoding); + return decodeBytes(data, offset, size, charset, encoding); + } + private void validateOffsetAndSize(final byte[] data, final int offset, final int size) throws XPathException { if (offset < 0 || offset > data.length) { throw new XPathException(this, BinaryModuleErrorCode.INDEX_OUT_OF_RANGE, "Offset " + offset + " is out of range for binary data of length " + data.length); } - if (size < 0) { throw new XPathException(this, BinaryModuleErrorCode.NEGATIVE_SIZE, "Size must not be negative: " + size); } - if (offset + size > data.length) { throw new XPathException(this, BinaryModuleErrorCode.INDEX_OUT_OF_RANGE, "Offset " + offset + " + size " + size + " exceeds binary data length " + data.length); } + } - final Charset charset = resolveCharset(encoding); - + private Sequence decodeBytes(final byte[] data, final int offset, final int size, + final Charset charset, final String encoding) throws XPathException { try { final CharsetDecoder decoder = charset.newDecoder() .onMalformedInput(CodingErrorAction.REPORT) @@ -158,6 +156,14 @@ private Sequence decodeString(final Sequence[] args) throws XPathException { } } + private static String getOptionalStringArg(final Sequence[] args, final int index, final String defaultValue) throws XPathException { + return (args.length > index && !args[index].isEmpty()) ? args[index].getStringValue() : defaultValue; + } + + private static int getOptionalIntArg(final Sequence[] args, final int index, final int defaultValue) throws XPathException { + return (args.length > index && !args[index].isEmpty()) ? ((IntegerValue) args[index].itemAt(0)).getInt() : defaultValue; + } + private Sequence encodeString(final Sequence[] args) throws XPathException { if (args[0].isEmpty()) { return Sequence.EMPTY_SEQUENCE; From 866f8455ed3253f43df33315d913f480e69e91d1 Mon Sep 17 00:00:00 2001 From: Joe Wicentowski Date: Sun, 26 Apr 2026 03:18:38 -0400 Subject: [PATCH 4/4] [feature] Implement bin:infer-encoding for EXPath Binary Module 4.0 Adds bin:infer-encoding($data, $encoding?) which detects BOMs in binary data and returns a map with "encoding" (resolved encoding name) and "offset" (byte offset past any BOM). Handles UTF-8 BOM (EF BB BF), UTF-16 BE BOM (FE FF), and UTF-16 LE BOM (FF FE). For unambiguous encodings without BOM, returns the declared encoding with offset 0. Targets 33 XQTS tests. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../binary/BinaryInferEncodingFunction.java | 152 ++++++++++++++++++ .../xquery/modules/binary/BinaryModule.java | 6 +- 2 files changed, 157 insertions(+), 1 deletion(-) create mode 100644 extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryInferEncodingFunction.java diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryInferEncodingFunction.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryInferEncodingFunction.java new file mode 100644 index 00000000000..d0c93fd202a --- /dev/null +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryInferEncodingFunction.java @@ -0,0 +1,152 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.modules.binary; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; + +import java.nio.charset.Charset; +import java.nio.charset.UnsupportedCharsetException; + +import static org.exist.xquery.FunctionDSL.*; + +/** + * EXPath Binary Module 4.0 — bin:infer-encoding (Section 6.3). + * + *

Infers the actual encoding and byte offset of text data within binary data, + * based on BOM detection and the declared encoding.

+ * + * @see EXPath Binary Module 4.0 §6.3 + */ +public class BinaryInferEncodingFunction extends BasicFunction { + + private static final QName QN_INFER_ENCODING = new QName("infer-encoding", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + + private static final byte[] UTF8_BOM = {(byte) 0xEF, (byte) 0xBB, (byte) 0xBF}; + private static final byte[] UTF16_BE_BOM = {(byte) 0xFE, (byte) 0xFF}; + private static final byte[] UTF16_LE_BOM = {(byte) 0xFF, (byte) 0xFE}; + + static final FunctionSignature[] FS_INFER_ENCODING = functionSignatures( + QN_INFER_ENCODING, + "Infers the actual encoding and data offset from binary data, detecting BOMs and resolving encoding families.", + returns(Type.MAP_ITEM), + arities( + arity( + param("data", Type.BASE64_BINARY, "The binary data to analyze") + ), + arity( + param("data", Type.BASE64_BINARY, "The binary data to analyze"), + optParam("encoding", Type.STRING, "The declared encoding (default: UTF-8)") + ) + ) + ); + + public BinaryInferEncodingFunction(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null) { + throw new XPathException(this, org.exist.xquery.ErrorCodes.XPTY0004, + "Empty sequence is not allowed as the first argument of bin:infer-encoding"); + } + + final String declaredEncoding; + if (args.length > 1 && !args[1].isEmpty()) { + declaredEncoding = args[1].getStringValue(); + } else { + declaredEncoding = "UTF-8"; + } + + validateEncoding(declaredEncoding); + + final String normalizedEncoding = normalizeEncodingFamily(declaredEncoding); + String resultEncoding = declaredEncoding; + int resultOffset = 0; + + if (isUtf8Family(normalizedEncoding)) { + if (startsWith(data, UTF8_BOM)) { + resultEncoding = "UTF-8"; + resultOffset = 3; + } + } else if (isUtf16Family(normalizedEncoding)) { + if (startsWith(data, UTF16_BE_BOM)) { + resultEncoding = "UTF-16BE"; + resultOffset = 2; + } else if (startsWith(data, UTF16_LE_BOM)) { + resultEncoding = "UTF-16LE"; + resultOffset = 2; + } else if ("UTF-16".equalsIgnoreCase(normalizedEncoding)) { + resultEncoding = "UTF-16BE"; + resultOffset = 0; + } + } + + final MapType result = new MapType(this, context); + result.add(new StringValue(this, "encoding"), new StringValue(this, resultEncoding)); + result.add(new StringValue(this, "offset"), new IntegerValue(this, resultOffset)); + return result; + } + + private void validateEncoding(final String encoding) throws XPathException { + try { + Charset.forName(encoding); + } catch (final UnsupportedCharsetException e) { + throw new XPathException(this, BinaryModuleErrorCode.UNKNOWN_ENCODING, + "Unknown encoding: '" + encoding + "'"); + } + } + + private static String normalizeEncodingFamily(final String encoding) { + return encoding.toUpperCase().replace("-", "").replace("_", ""); + } + + private static boolean isUtf8Family(final String normalized) { + return "UTF8".equals(normalized); + } + + private static boolean isUtf16Family(final String normalized) { + return "UTF16".equals(normalized) || "UTF16BE".equals(normalized) || "UTF16LE".equals(normalized); + } + + private static boolean startsWith(final byte[] data, final byte[] prefix) { + if (data.length < prefix.length) { + return false; + } + for (int i = 0; i < prefix.length; i++) { + if (data[i] != prefix[i]) { + return false; + } + } + return true; + } +} diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModule.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModule.java index 28d6f3eb820..18c4c905645 100644 --- a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModule.java +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModule.java @@ -90,7 +90,11 @@ public class BinaryModule extends AbstractInternalModule { BinaryBitwiseFunctions.FS_XOR, BinaryBitwiseFunctions.FS_AND, BinaryBitwiseFunctions.FS_NOT, - BinaryBitwiseFunctions.FS_SHIFT) + BinaryBitwiseFunctions.FS_SHIFT), + + functionDefs(BinaryInferEncodingFunction.class, + BinaryInferEncodingFunction.FS_INFER_ENCODING[0], + BinaryInferEncodingFunction.FS_INFER_ENCODING[1]) ); public BinaryModule(final Map> parameters) {