diff --git a/exist-ant/src/test/resources-filtered/conf.xml b/exist-ant/src/test/resources-filtered/conf.xml index 52cac5dde3f..d631056d7b3 100644 --- a/exist-ant/src/test/resources-filtered/conf.xml +++ b/exist-ant/src/test/resources-filtered/conf.xml @@ -753,6 +753,7 @@ + diff --git a/exist-core/src/test/resources-filtered/conf.xml b/exist-core/src/test/resources-filtered/conf.xml index 9a76f8c79a5..d52a2b21e75 100644 --- a/exist-core/src/test/resources-filtered/conf.xml +++ b/exist-core/src/test/resources-filtered/conf.xml @@ -910,6 +910,7 @@ + diff --git a/exist-core/src/test/resources-filtered/org/exist/storage/statistics/conf.xml b/exist-core/src/test/resources-filtered/org/exist/storage/statistics/conf.xml index 15d68dea5fb..e7beac537dc 100644 --- a/exist-core/src/test/resources-filtered/org/exist/storage/statistics/conf.xml +++ b/exist-core/src/test/resources-filtered/org/exist/storage/statistics/conf.xml @@ -901,6 +901,7 @@ + diff --git a/exist-core/src/test/resources-filtered/org/exist/xquery/conf.xml b/exist-core/src/test/resources-filtered/org/exist/xquery/conf.xml index b9bc14f5b53..b71958d4e6e 100644 --- a/exist-core/src/test/resources-filtered/org/exist/xquery/conf.xml +++ b/exist-core/src/test/resources-filtered/org/exist/xquery/conf.xml @@ -920,6 +920,7 @@ + diff --git a/exist-core/src/test/resources-filtered/org/exist/xquery/functions/transform/conf.xml b/exist-core/src/test/resources-filtered/org/exist/xquery/functions/transform/conf.xml index 7f2354f9f40..9404c4001d8 100644 --- a/exist-core/src/test/resources-filtered/org/exist/xquery/functions/transform/conf.xml +++ b/exist-core/src/test/resources-filtered/org/exist/xquery/functions/transform/conf.xml @@ -912,6 +912,7 @@ + diff --git a/exist-distribution/pom.xml b/exist-distribution/pom.xml index 2369cda5832..a7d1683b598 100644 --- a/exist-distribution/pom.xml +++ b/exist-distribution/pom.xml @@ -188,6 +188,12 @@ ${project.version} runtime + + ${project.groupId} + exist-expath-binary + ${project.version} + runtime + ${project.groupId} exist-counter diff --git a/exist-distribution/src/main/config/conf.xml b/exist-distribution/src/main/config/conf.xml index 6a9937c0e03..0045adb6448 100644 --- a/exist-distribution/src/main/config/conf.xml +++ b/exist-distribution/src/main/config/conf.xml @@ -1048,6 +1048,7 @@ + diff --git a/extensions/contentextraction/src/test/resources-filtered/conf.xml b/extensions/contentextraction/src/test/resources-filtered/conf.xml index 1311e06f555..2f3ca467f56 100644 --- a/extensions/contentextraction/src/test/resources-filtered/conf.xml +++ b/extensions/contentextraction/src/test/resources-filtered/conf.xml @@ -757,6 +757,7 @@ + diff --git a/extensions/debuggee/src/test/resources-filtered/conf.xml b/extensions/debuggee/src/test/resources-filtered/conf.xml index 5dc0efc380a..18570098dc1 100644 --- a/extensions/debuggee/src/test/resources-filtered/conf.xml +++ b/extensions/debuggee/src/test/resources-filtered/conf.xml @@ -743,6 +743,7 @@ + diff --git a/extensions/expath/src/test/resources-filtered/conf.xml b/extensions/expath/src/test/resources-filtered/conf.xml index a0e02a2c06d..9a5ca96f5ce 100644 --- a/extensions/expath/src/test/resources-filtered/conf.xml +++ b/extensions/expath/src/test/resources-filtered/conf.xml @@ -757,6 +757,7 @@ + diff --git a/extensions/exquery/restxq/src/test/resources-filtered/conf.xml b/extensions/exquery/restxq/src/test/resources-filtered/conf.xml index 697afdbf11b..e2304f08ce0 100644 --- a/extensions/exquery/restxq/src/test/resources-filtered/conf.xml +++ b/extensions/exquery/restxq/src/test/resources-filtered/conf.xml @@ -738,6 +738,7 @@ + diff --git a/extensions/indexes/indexes-integration-tests/src/test/resources-filtered/conf.xml b/extensions/indexes/indexes-integration-tests/src/test/resources-filtered/conf.xml index 2aae0f7d207..72dac23139c 100644 --- a/extensions/indexes/indexes-integration-tests/src/test/resources-filtered/conf.xml +++ b/extensions/indexes/indexes-integration-tests/src/test/resources-filtered/conf.xml @@ -906,6 +906,7 @@ + diff --git a/extensions/indexes/lucene/src/test/resources-filtered/conf.xml b/extensions/indexes/lucene/src/test/resources-filtered/conf.xml index 4eaa2642bde..375fd1dc11a 100644 --- a/extensions/indexes/lucene/src/test/resources-filtered/conf.xml +++ b/extensions/indexes/lucene/src/test/resources-filtered/conf.xml @@ -905,6 +905,7 @@ + diff --git a/extensions/indexes/ngram/src/test/resources-filtered/conf.xml b/extensions/indexes/ngram/src/test/resources-filtered/conf.xml index 7b290c22429..0d3c8899b02 100644 --- a/extensions/indexes/ngram/src/test/resources-filtered/conf.xml +++ b/extensions/indexes/ngram/src/test/resources-filtered/conf.xml @@ -903,6 +903,7 @@ + diff --git a/extensions/indexes/range/src/test/resources-filtered/conf.xml b/extensions/indexes/range/src/test/resources-filtered/conf.xml index a22d440f625..b6c17cf73f0 100644 --- a/extensions/indexes/range/src/test/resources-filtered/conf.xml +++ b/extensions/indexes/range/src/test/resources-filtered/conf.xml @@ -908,6 +908,7 @@ + diff --git a/extensions/indexes/sort/src/test/resources-filtered/conf.xml b/extensions/indexes/sort/src/test/resources-filtered/conf.xml index e6d70cea684..9c58ec76fb7 100644 --- a/extensions/indexes/sort/src/test/resources-filtered/conf.xml +++ b/extensions/indexes/sort/src/test/resources-filtered/conf.xml @@ -903,6 +903,7 @@ + diff --git a/extensions/indexes/spatial/src/test/resources-filtered/conf.xml b/extensions/indexes/spatial/src/test/resources-filtered/conf.xml index b3ea3200f72..3bcb1927a96 100644 --- a/extensions/indexes/spatial/src/test/resources-filtered/conf.xml +++ b/extensions/indexes/spatial/src/test/resources-filtered/conf.xml @@ -889,6 +889,7 @@ + diff --git a/extensions/indexes/vector-it/src/test/resources-filtered/conf.xml b/extensions/indexes/vector-it/src/test/resources-filtered/conf.xml index 2c516c836c9..1939512eb44 100644 --- a/extensions/indexes/vector-it/src/test/resources-filtered/conf.xml +++ b/extensions/indexes/vector-it/src/test/resources-filtered/conf.xml @@ -74,6 +74,7 @@ + diff --git a/extensions/modules/cache/src/test/resources-filtered/conf.xml b/extensions/modules/cache/src/test/resources-filtered/conf.xml index af9663be608..44458834dda 100644 --- a/extensions/modules/cache/src/test/resources-filtered/conf.xml +++ b/extensions/modules/cache/src/test/resources-filtered/conf.xml @@ -760,6 +760,7 @@ + diff --git a/extensions/modules/compression/src/test/resources-filtered/conf.xml b/extensions/modules/compression/src/test/resources-filtered/conf.xml index 0bdebfee2d6..2032502e355 100644 --- a/extensions/modules/compression/src/test/resources-filtered/conf.xml +++ b/extensions/modules/compression/src/test/resources-filtered/conf.xml @@ -757,6 +757,7 @@ + diff --git a/extensions/modules/counter/src/test/resources-filtered/conf.xml b/extensions/modules/counter/src/test/resources-filtered/conf.xml index 1a31ae00a0e..df5ba61ccb5 100644 --- a/extensions/modules/counter/src/test/resources-filtered/conf.xml +++ b/extensions/modules/counter/src/test/resources-filtered/conf.xml @@ -746,6 +746,7 @@ + diff --git a/extensions/modules/expath-binary/pom.xml b/extensions/modules/expath-binary/pom.xml new file mode 100644 index 00000000000..6f0723e05de --- /dev/null +++ b/extensions/modules/expath-binary/pom.xml @@ -0,0 +1,67 @@ + + + + 4.0.0 + + + org.exist-db + exist-parent + 7.0.0-SNAPSHOT + ../../../exist-parent + + + exist-expath-binary + jar + + eXist-db EXPath Binary Module + EXPath Binary Module 4.0 for eXist-db (http://expath.org/ns/binary) + + + scm:git:https://github.com/exist-db/exist.git + scm:git:https://github.com/exist-db/exist.git + scm:git:https://github.com/exist-db/exist.git + HEAD + + + + + org.exist-db + exist-core + ${project.version} + + + + commons-io + commons-io + + + + com.google.code.findbugs + jsr305 + + + + + diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBasicFunctions.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBasicFunctions.java new file mode 100644 index 00000000000..203f8cca59c --- /dev/null +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBasicFunctions.java @@ -0,0 +1,329 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.modules.binary; + +import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream; +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.Type; + +import java.io.IOException; +import java.util.Arrays; + +import static org.exist.xquery.FunctionDSL.*; + +/** + * EXPath Binary Module 4.0 — Basic Operations (Section 5). + * + *
    + *
  • bin:length
  • + *
  • bin:part
  • + *
  • bin:join
  • + *
  • bin:insert-before
  • + *
  • bin:pad-left
  • + *
  • bin:pad-right
  • + *
  • bin:find
  • + *
+ * + * @see EXPath Binary Module 4.0 §5 + */ +public class BinaryBasicFunctions extends BasicFunction { + + private static final QName QN_LENGTH = new QName("length", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_PART = new QName("part", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_JOIN = new QName("join", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_INSERT_BEFORE = new QName("insert-before", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_PAD_LEFT = new QName("pad-left", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_PAD_RIGHT = new QName("pad-right", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_FIND = new QName("find", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + + static final FunctionSignature FS_LENGTH = functionSignature( + QN_LENGTH, + "Returns the size of binary data in octets.", + returns(Type.INTEGER), + param("value", Type.BASE64_BINARY, "The binary data") + ); + + static final FunctionSignature[] FS_PART = functionSignatures( + QN_PART, + "Returns a specified part of binary data.", + returnsOpt(Type.BASE64_BINARY), + arities( + arity( + optParam("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based offset") + ), + arity( + optParam("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based offset"), + param("size", Type.INTEGER, "The number of octets to return") + ) + ) + ); + + static final FunctionSignature FS_JOIN = functionSignature( + QN_JOIN, + "Returns the concatenation of binary data.", + returns(Type.BASE64_BINARY), + optManyParam("values", Type.BASE64_BINARY, "The binary data items to join") + ); + + static final FunctionSignature FS_INSERT_BEFORE = functionSignature( + QN_INSERT_BEFORE, + "Inserts additional binary data at a given point in other binary data.", + returnsOpt(Type.BASE64_BINARY), + optParam("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based offset for insertion"), + optParam("extra", Type.BASE64_BINARY, "The binary data to insert") + ); + + static final FunctionSignature[] FS_PAD_LEFT = functionSignatures( + QN_PAD_LEFT, + "Pads binary data on the left to a specified size.", + returnsOpt(Type.BASE64_BINARY), + arities( + arity( + optParam("value", Type.BASE64_BINARY, "The binary data"), + param("count", Type.INTEGER, "The number of octets to pad") + ), + arity( + optParam("value", Type.BASE64_BINARY, "The binary data"), + param("count", Type.INTEGER, "The number of octets to pad"), + param("octet", Type.INTEGER, "The octet value to use for padding (0-255)") + ) + ) + ); + + static final FunctionSignature[] FS_PAD_RIGHT = functionSignatures( + QN_PAD_RIGHT, + "Pads binary data on the right to a specified size.", + returnsOpt(Type.BASE64_BINARY), + arities( + arity( + optParam("value", Type.BASE64_BINARY, "The binary data"), + param("count", Type.INTEGER, "The number of octets to pad") + ), + arity( + optParam("value", Type.BASE64_BINARY, "The binary data"), + param("count", Type.INTEGER, "The number of octets to pad"), + param("octet", Type.INTEGER, "The octet value to use for padding (0-255)") + ) + ) + ); + + static final FunctionSignature FS_FIND = functionSignature( + QN_FIND, + "Returns the first location of a binary search sequence within binary data.", + returnsOpt(Type.INTEGER), + optParam("value", Type.BASE64_BINARY, "The binary data to search"), + param("offset", Type.INTEGER, "The zero-based offset to start searching from"), + param("search", Type.BASE64_BINARY, "The binary data to search for") + ); + + public BinaryBasicFunctions(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (isCalledAs("length")) { + return length(args); + } else if (isCalledAs("part")) { + return part(args); + } else if (isCalledAs("join")) { + return join(args); + } else if (isCalledAs("insert-before")) { + return insertBefore(args); + } else if (isCalledAs("pad-left")) { + return padLeft(args); + } else if (isCalledAs("pad-right")) { + return padRight(args); + } else { + return find(args); + } + } + + private Sequence length(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null) { + throw new XPathException(this, org.exist.xquery.ErrorCodes.XPTY0004, + "Empty sequence is not allowed as argument to bin:length()"); + } + return new IntegerValue(this, data.length); + } + + private Sequence part(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null) { + return Sequence.EMPTY_SEQUENCE; + } + + final int offset = ((IntegerValue) args[1].itemAt(0)).getInt(); + if (offset < 0 || offset > data.length) { + throw new XPathException(this, BinaryModuleErrorCode.INDEX_OUT_OF_RANGE, + "Offset " + offset + " is out of range for binary data of length " + data.length); + } + + final int size; + if (args.length > 2 && !args[2].isEmpty()) { + size = ((IntegerValue) args[2].itemAt(0)).getInt(); + if (size < 0) { + throw new XPathException(this, BinaryModuleErrorCode.NEGATIVE_SIZE, + "Size must not be negative: " + size); + } + if (offset + size > data.length) { + throw new XPathException(this, BinaryModuleErrorCode.INDEX_OUT_OF_RANGE, + "Offset " + offset + " + size " + size + " exceeds binary data length " + data.length); + } + } else { + size = data.length - offset; + } + + final byte[] result = Arrays.copyOfRange(data, offset, offset + size); + return BinaryModuleHelper.createBinaryResult(context, this, result); + } + + private Sequence join(final Sequence[] args) throws XPathException { + if (args[0].isEmpty()) { + return BinaryModuleHelper.createBinaryResult(context, this, new byte[0]); + } + + try (final UnsynchronizedByteArrayOutputStream os = new UnsynchronizedByteArrayOutputStream()) { + for (int i = 0; i < args[0].getItemCount(); i++) { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0].itemAt(i).toSequence()); + if (data != null) { + os.write(data); + } + } + return BinaryModuleHelper.createBinaryResult(context, this, os.toByteArray()); + } catch (final IOException e) { + throw new XPathException(this, "Failed to join binary data: " + e.getMessage(), e); + } + } + + private Sequence insertBefore(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null) { + return Sequence.EMPTY_SEQUENCE; + } + + final int offset = ((IntegerValue) args[1].itemAt(0)).getInt(); + if (offset < 0 || offset > data.length) { + throw new XPathException(this, BinaryModuleErrorCode.INDEX_OUT_OF_RANGE, + "Offset " + offset + " is out of range for binary data of length " + data.length); + } + + final byte[] extra = BinaryModuleHelper.getBinaryData(args[2]); + if (extra == null || extra.length == 0) { + return BinaryModuleHelper.createBinaryResult(context, this, data); + } + + final byte[] result = new byte[data.length + extra.length]; + System.arraycopy(data, 0, result, 0, offset); + System.arraycopy(extra, 0, result, offset, extra.length); + System.arraycopy(data, offset, result, offset + extra.length, data.length - offset); + return BinaryModuleHelper.createBinaryResult(context, this, result); + } + + private Sequence padLeft(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null) { + return Sequence.EMPTY_SEQUENCE; + } + + final int count = ((IntegerValue) args[1].itemAt(0)).getInt(); + if (count < 0) { + throw new XPathException(this, BinaryModuleErrorCode.NEGATIVE_SIZE, + "Pad count must not be negative: " + count); + } + + final byte octet = (args.length > 2 && !args[2].isEmpty()) + ? (byte) ((IntegerValue) args[2].itemAt(0)).getInt() + : 0; + + final byte[] result = new byte[data.length + count]; + Arrays.fill(result, 0, count, octet); + System.arraycopy(data, 0, result, count, data.length); + return BinaryModuleHelper.createBinaryResult(context, this, result); + } + + private Sequence padRight(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null) { + return Sequence.EMPTY_SEQUENCE; + } + + final int count = ((IntegerValue) args[1].itemAt(0)).getInt(); + if (count < 0) { + throw new XPathException(this, BinaryModuleErrorCode.NEGATIVE_SIZE, + "Pad count must not be negative: " + count); + } + + final byte octet = (args.length > 2 && !args[2].isEmpty()) + ? (byte) ((IntegerValue) args[2].itemAt(0)).getInt() + : 0; + + final byte[] result = new byte[data.length + count]; + System.arraycopy(data, 0, result, 0, data.length); + Arrays.fill(result, data.length, result.length, octet); + return BinaryModuleHelper.createBinaryResult(context, this, result); + } + + private Sequence find(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null) { + return Sequence.EMPTY_SEQUENCE; + } + + final int offset = ((IntegerValue) args[1].itemAt(0)).getInt(); + if (offset < 0 || offset > data.length) { + throw new XPathException(this, BinaryModuleErrorCode.INDEX_OUT_OF_RANGE, + "Offset " + offset + " is out of range for binary data of length " + data.length); + } + + final byte[] search = BinaryModuleHelper.getBinaryData(args[2]); + if (search == null || search.length == 0) { + return new IntegerValue(this, offset); + } + + // Naive byte subsequence search + for (int i = offset; i <= data.length - search.length; i++) { + boolean match = true; + for (int j = 0; j < search.length; j++) { + if (data[i + j] != search[j]) { + match = false; + break; + } + } + if (match) { + return new IntegerValue(this, i); + } + } + + return Sequence.EMPTY_SEQUENCE; + } +} diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBitwiseFunctions.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBitwiseFunctions.java new file mode 100644 index 00000000000..c9354f94a4e --- /dev/null +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryBitwiseFunctions.java @@ -0,0 +1,193 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.modules.binary; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.Type; + +import java.math.BigInteger; +import java.util.Arrays; + +import static org.exist.xquery.FunctionDSL.*; + +/** + * EXPath Binary Module 4.0 — Bitwise Operations (Section 8). + * + *
    + *
  • bin:or
  • + *
  • bin:xor
  • + *
  • bin:and
  • + *
  • bin:not
  • + *
  • bin:shift
  • + *
+ * + * @see EXPath Binary Module 4.0 §8 + */ +public class BinaryBitwiseFunctions extends BasicFunction { + + private static final QName QN_OR = new QName("or", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_XOR = new QName("xor", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_AND = new QName("and", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_NOT = new QName("not", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_SHIFT = new QName("shift", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + + static final FunctionSignature FS_OR = functionSignature( + QN_OR, + "Returns the bitwise OR of two binary values.", + returnsOpt(Type.BASE64_BINARY), + optParam("value1", Type.BASE64_BINARY, "The first binary value"), + optParam("value2", Type.BASE64_BINARY, "The second binary value") + ); + + static final FunctionSignature FS_XOR = functionSignature( + QN_XOR, + "Returns the bitwise XOR of two binary values.", + returnsOpt(Type.BASE64_BINARY), + optParam("value1", Type.BASE64_BINARY, "The first binary value"), + optParam("value2", Type.BASE64_BINARY, "The second binary value") + ); + + static final FunctionSignature FS_AND = functionSignature( + QN_AND, + "Returns the bitwise AND of two binary values.", + returnsOpt(Type.BASE64_BINARY), + optParam("value1", Type.BASE64_BINARY, "The first binary value"), + optParam("value2", Type.BASE64_BINARY, "The second binary value") + ); + + static final FunctionSignature FS_NOT = functionSignature( + QN_NOT, + "Returns the bitwise NOT of a binary value.", + returnsOpt(Type.BASE64_BINARY), + optParam("value", Type.BASE64_BINARY, "The binary value") + ); + + static final FunctionSignature FS_SHIFT = functionSignature( + QN_SHIFT, + "Shifts bits in binary data. Positive shifts left, negative shifts right.", + returnsOpt(Type.BASE64_BINARY), + optParam("value", Type.BASE64_BINARY, "The binary value"), + param("by", Type.INTEGER, "The number of bits to shift (positive = left, negative = right)") + ); + + public BinaryBitwiseFunctions(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (isCalledAs("or")) { + return bitwiseOp(args, BitwiseOp.OR); + } else if (isCalledAs("xor")) { + return bitwiseOp(args, BitwiseOp.XOR); + } else if (isCalledAs("and")) { + return bitwiseOp(args, BitwiseOp.AND); + } else if (isCalledAs("not")) { + return bitwiseNot(args); + } else { + return bitwiseShift(args); + } + } + + private enum BitwiseOp { OR, XOR, AND } + + private Sequence bitwiseOp(final Sequence[] args, final BitwiseOp op) throws XPathException { + final byte[] data1 = BinaryModuleHelper.getBinaryData(args[0]); + final byte[] data2 = BinaryModuleHelper.getBinaryData(args[1]); + + if (data1 == null || data2 == null) { + return Sequence.EMPTY_SEQUENCE; + } + + if (data1.length != data2.length) { + throw new XPathException(this, BinaryModuleErrorCode.DIFFERING_LENGTH_ARGUMENTS, + "Arguments to bin:" + op.name().toLowerCase() + "() must have equal length, but got " + + data1.length + " and " + data2.length); + } + + final byte[] result = new byte[data1.length]; + for (int i = 0; i < data1.length; i++) { + result[i] = switch (op) { + case OR -> (byte) (data1[i] | data2[i]); + case XOR -> (byte) (data1[i] ^ data2[i]); + case AND -> (byte) (data1[i] & data2[i]); + }; + } + return BinaryModuleHelper.createBinaryResult(context, this, result); + } + + private Sequence bitwiseNot(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null) { + return Sequence.EMPTY_SEQUENCE; + } + + final byte[] result = new byte[data.length]; + for (int i = 0; i < data.length; i++) { + result[i] = (byte) ~data[i]; + } + return BinaryModuleHelper.createBinaryResult(context, this, result); + } + + private Sequence bitwiseShift(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null) { + return Sequence.EMPTY_SEQUENCE; + } + + final int by = ((IntegerValue) args[1].itemAt(0)).getInt(); + final int originalLength = data.length; + + if (originalLength == 0) { + return BinaryModuleHelper.createBinaryResult(context, this, new byte[0]); + } + + // Use BigInteger for bit shifting, then maintain original length + BigInteger bigInt = new BigInteger(1, data); + + if (by > 0) { + bigInt = bigInt.shiftLeft(by); + } else if (by < 0) { + bigInt = bigInt.shiftRight(-by); + } + + // Convert back to byte array of original length + final byte[] shifted = bigInt.toByteArray(); + final byte[] result = new byte[originalLength]; + + if (shifted.length <= originalLength) { + // Right-align in result + System.arraycopy(shifted, 0, result, originalLength - shifted.length, shifted.length); + } else { + // Truncate from the left to maintain original length + System.arraycopy(shifted, shifted.length - originalLength, result, 0, originalLength); + } + + return BinaryModuleHelper.createBinaryResult(context, this, result); + } +} diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryConversionFunctions.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryConversionFunctions.java new file mode 100644 index 00000000000..2ef6c934ec0 --- /dev/null +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryConversionFunctions.java @@ -0,0 +1,266 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.modules.binary; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.Type; +import org.exist.xquery.value.ValueSequence; + +import static org.exist.xquery.FunctionDSL.*; + +/** + * EXPath Binary Module 4.0 — Constants and Conversions (Section 4). + * + *
    + *
  • bin:hex
  • + *
  • bin:bin
  • + *
  • bin:octal
  • + *
  • bin:to-octets
  • + *
  • bin:from-octets
  • + *
+ * + * @see EXPath Binary Module 4.0 §4 + */ +public class BinaryConversionFunctions extends BasicFunction { + + private static final QName QN_HEX = new QName("hex", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_BIN = new QName("bin", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_OCTAL = new QName("octal", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_TO_OCTETS = new QName("to-octets", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_FROM_OCTETS = new QName("from-octets", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + + static final FunctionSignature FS_HEX = functionSignature( + QN_HEX, + "Creates an xs:base64Binary value from a hexadecimal string.", + returnsOpt(Type.BASE64_BINARY), + optParam("value", Type.STRING, "The hexadecimal string") + ); + + static final FunctionSignature FS_BIN = functionSignature( + QN_BIN, + "Creates an xs:base64Binary value from a binary (0/1) string.", + returnsOpt(Type.BASE64_BINARY), + optParam("value", Type.STRING, "The binary digit string") + ); + + static final FunctionSignature FS_OCTAL = functionSignature( + QN_OCTAL, + "Creates an xs:base64Binary value from an octal string.", + returnsOpt(Type.BASE64_BINARY), + optParam("value", Type.STRING, "The octal string") + ); + + static final FunctionSignature FS_TO_OCTETS = functionSignature( + QN_TO_OCTETS, + "Returns the binary data as a sequence of octets.", + returnsOptMany(Type.INTEGER), + param("value", Type.BASE64_BINARY, "The binary data") + ); + + static final FunctionSignature FS_FROM_OCTETS = functionSignature( + QN_FROM_OCTETS, + "Converts a sequence of octets into binary data.", + returns(Type.BASE64_BINARY), + optManyParam("values", Type.INTEGER, "The octet values (0-255)") + ); + + public BinaryConversionFunctions(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (isCalledAs("hex")) { + return hexToBinary(args); + } else if (isCalledAs("bin")) { + return binToBinary(args); + } else if (isCalledAs("octal")) { + return octalToBinary(args); + } else if (isCalledAs("to-octets")) { + return toOctets(args); + } else { + return fromOctets(args); + } + } + + private Sequence hexToBinary(final Sequence[] args) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + String hex = args[0].getStringValue(); + // Strip whitespace and underscores per spec + hex = hex.replaceAll("[\\s_]", ""); + + if (hex.isEmpty()) { + return BinaryModuleHelper.createBinaryResult(context, this, new byte[0]); + } + + // Validate characters + for (int i = 0; i < hex.length(); i++) { + final char c = hex.charAt(i); + if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) { + throw new XPathException(this, BinaryModuleErrorCode.NON_NUMERIC_CHARACTER, + "Invalid hexadecimal character: '" + c + "'"); + } + } + + // Prepend "0" if odd length + if (hex.length() % 2 != 0) { + hex = "0" + hex; + } + + final byte[] data = new byte[hex.length() / 2]; + for (int i = 0; i < data.length; i++) { + data[i] = (byte) Integer.parseInt(hex.substring(i * 2, i * 2 + 2), 16); + } + return BinaryModuleHelper.createBinaryResult(context, this, data); + } + + private Sequence binToBinary(final Sequence[] args) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + String bin = args[0].getStringValue(); + bin = bin.replaceAll("[\\s_]", ""); + + if (bin.isEmpty()) { + return BinaryModuleHelper.createBinaryResult(context, this, new byte[0]); + } + + // Validate characters + for (int i = 0; i < bin.length(); i++) { + final char c = bin.charAt(i); + if (c != '0' && c != '1') { + throw new XPathException(this, BinaryModuleErrorCode.NON_NUMERIC_CHARACTER, + "Invalid binary character: '" + c + "'"); + } + } + + // Pad to 8-bit multiple + final int remainder = bin.length() % 8; + if (remainder != 0) { + bin = "0".repeat(8 - remainder) + bin; + } + + final byte[] data = new byte[bin.length() / 8]; + for (int i = 0; i < data.length; i++) { + data[i] = (byte) Integer.parseInt(bin.substring(i * 8, i * 8 + 8), 2); + } + return BinaryModuleHelper.createBinaryResult(context, this, data); + } + + private Sequence octalToBinary(final Sequence[] args) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final String octal = stripAndValidateOctal(args[0].getStringValue()); + if (octal.isEmpty()) { + return BinaryModuleHelper.createBinaryResult(context, this, new byte[0]); + } + + final String binaryStr = octalToBinaryString(octal); + return BinaryModuleHelper.createBinaryResult(context, this, binaryStringToBytes(binaryStr)); + } + + private String stripAndValidateOctal(final String input) throws XPathException { + final String octal = input.replaceAll("[\\s_]", ""); + for (int i = 0; i < octal.length(); i++) { + final char c = octal.charAt(i); + if (c < '0' || c > '7') { + throw new XPathException(this, BinaryModuleErrorCode.NON_NUMERIC_CHARACTER, + "Invalid octal character: '" + c + "'"); + } + } + return octal; + } + + private static String octalToBinaryString(final String octal) { + // Convert each octal digit to 3-bit binary + final StringBuilder bits = new StringBuilder(); + for (int i = 0; i < octal.length(); i++) { + final int digit = octal.charAt(i) - '0'; + bits.append(String.format("%3s", Integer.toBinaryString(digit)).replace(' ', '0')); + } + + // Strip up to 2 leading zeros (octal digit = 3 bits, but only multiples of 8 matter) + String binaryStr = bits.toString(); + int stripCount = 0; + while (stripCount < 2 && !binaryStr.isEmpty() && binaryStr.charAt(0) == '0' + && (binaryStr.length() - 1) % 8 != 7) { + binaryStr = binaryStr.substring(1); + stripCount++; + } + + // Pad to 8-bit multiple + final int remainder = binaryStr.length() % 8; + if (remainder != 0) { + binaryStr = "0".repeat(8 - remainder) + binaryStr; + } + return binaryStr; + } + + private static byte[] binaryStringToBytes(final String binaryStr) { + if (binaryStr.isEmpty()) { + return new byte[0]; + } + final byte[] data = new byte[binaryStr.length() / 8]; + for (int i = 0; i < data.length; i++) { + data[i] = (byte) Integer.parseInt(binaryStr.substring(i * 8, i * 8 + 8), 2); + } + return data; + } + + private Sequence toOctets(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null || data.length == 0) { + return Sequence.EMPTY_SEQUENCE; + } + + final ValueSequence result = new ValueSequence(data.length); + for (final byte b : data) { + result.add(new IntegerValue(this, b & 0xFF)); + } + return result; + } + + private Sequence fromOctets(final Sequence[] args) throws XPathException { + if (args[0].isEmpty()) { + return BinaryModuleHelper.createBinaryResult(context, this, new byte[0]); + } + + final int len = args[0].getItemCount(); + final byte[] data = new byte[len]; + for (int i = 0; i < len; i++) { + data[i] = (byte) ((IntegerValue) args[0].itemAt(i)).getInt(); + } + return BinaryModuleHelper.createBinaryResult(context, this, data); + } +} diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryInferEncodingFunction.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryInferEncodingFunction.java new file mode 100644 index 00000000000..d0c93fd202a --- /dev/null +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryInferEncodingFunction.java @@ -0,0 +1,152 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.modules.binary; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.functions.map.MapType; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; + +import java.nio.charset.Charset; +import java.nio.charset.UnsupportedCharsetException; + +import static org.exist.xquery.FunctionDSL.*; + +/** + * EXPath Binary Module 4.0 — bin:infer-encoding (Section 6.3). + * + *

Infers the actual encoding and byte offset of text data within binary data, + * based on BOM detection and the declared encoding.

+ * + * @see EXPath Binary Module 4.0 §6.3 + */ +public class BinaryInferEncodingFunction extends BasicFunction { + + private static final QName QN_INFER_ENCODING = new QName("infer-encoding", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + + private static final byte[] UTF8_BOM = {(byte) 0xEF, (byte) 0xBB, (byte) 0xBF}; + private static final byte[] UTF16_BE_BOM = {(byte) 0xFE, (byte) 0xFF}; + private static final byte[] UTF16_LE_BOM = {(byte) 0xFF, (byte) 0xFE}; + + static final FunctionSignature[] FS_INFER_ENCODING = functionSignatures( + QN_INFER_ENCODING, + "Infers the actual encoding and data offset from binary data, detecting BOMs and resolving encoding families.", + returns(Type.MAP_ITEM), + arities( + arity( + param("data", Type.BASE64_BINARY, "The binary data to analyze") + ), + arity( + param("data", Type.BASE64_BINARY, "The binary data to analyze"), + optParam("encoding", Type.STRING, "The declared encoding (default: UTF-8)") + ) + ) + ); + + public BinaryInferEncodingFunction(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null) { + throw new XPathException(this, org.exist.xquery.ErrorCodes.XPTY0004, + "Empty sequence is not allowed as the first argument of bin:infer-encoding"); + } + + final String declaredEncoding; + if (args.length > 1 && !args[1].isEmpty()) { + declaredEncoding = args[1].getStringValue(); + } else { + declaredEncoding = "UTF-8"; + } + + validateEncoding(declaredEncoding); + + final String normalizedEncoding = normalizeEncodingFamily(declaredEncoding); + String resultEncoding = declaredEncoding; + int resultOffset = 0; + + if (isUtf8Family(normalizedEncoding)) { + if (startsWith(data, UTF8_BOM)) { + resultEncoding = "UTF-8"; + resultOffset = 3; + } + } else if (isUtf16Family(normalizedEncoding)) { + if (startsWith(data, UTF16_BE_BOM)) { + resultEncoding = "UTF-16BE"; + resultOffset = 2; + } else if (startsWith(data, UTF16_LE_BOM)) { + resultEncoding = "UTF-16LE"; + resultOffset = 2; + } else if ("UTF-16".equalsIgnoreCase(normalizedEncoding)) { + resultEncoding = "UTF-16BE"; + resultOffset = 0; + } + } + + final MapType result = new MapType(this, context); + result.add(new StringValue(this, "encoding"), new StringValue(this, resultEncoding)); + result.add(new StringValue(this, "offset"), new IntegerValue(this, resultOffset)); + return result; + } + + private void validateEncoding(final String encoding) throws XPathException { + try { + Charset.forName(encoding); + } catch (final UnsupportedCharsetException e) { + throw new XPathException(this, BinaryModuleErrorCode.UNKNOWN_ENCODING, + "Unknown encoding: '" + encoding + "'"); + } + } + + private static String normalizeEncodingFamily(final String encoding) { + return encoding.toUpperCase().replace("-", "").replace("_", ""); + } + + private static boolean isUtf8Family(final String normalized) { + return "UTF8".equals(normalized); + } + + private static boolean isUtf16Family(final String normalized) { + return "UTF16".equals(normalized) || "UTF16BE".equals(normalized) || "UTF16LE".equals(normalized); + } + + private static boolean startsWith(final byte[] data, final byte[] prefix) { + if (data.length < prefix.length) { + return false; + } + for (int i = 0; i < prefix.length; i++) { + if (data[i] != prefix[i]) { + return false; + } + } + return true; + } +} diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModule.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModule.java new file mode 100644 index 00000000000..18c4c905645 --- /dev/null +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModule.java @@ -0,0 +1,123 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.modules.binary; + +import org.exist.xquery.AbstractInternalModule; +import org.exist.xquery.FunctionDef; + +import java.util.List; +import java.util.Map; + +import static org.exist.xquery.FunctionDSL.functionDefs; + +/** + * EXPath Binary Module 4.0. + * + * @see EXPath Binary Module 4.0 + */ +public class BinaryModule extends AbstractInternalModule { + + public static final String NAMESPACE_URI = "http://expath.org/ns/binary"; + public static final String PREFIX = "bin"; + public static final String INCLUSION_DATE = "2026-03-04"; + public static final String RELEASED_IN_VERSION = "eXist-7.0.0"; + + private static final FunctionDef[] functions = functionDefs( + functionDefs(BinaryConversionFunctions.class, + BinaryConversionFunctions.FS_HEX, + BinaryConversionFunctions.FS_BIN, + BinaryConversionFunctions.FS_OCTAL, + BinaryConversionFunctions.FS_TO_OCTETS, + BinaryConversionFunctions.FS_FROM_OCTETS), + + functionDefs(BinaryBasicFunctions.class, + BinaryBasicFunctions.FS_LENGTH, + BinaryBasicFunctions.FS_PART[0], + BinaryBasicFunctions.FS_PART[1], + BinaryBasicFunctions.FS_JOIN, + BinaryBasicFunctions.FS_INSERT_BEFORE, + BinaryBasicFunctions.FS_PAD_LEFT[0], + BinaryBasicFunctions.FS_PAD_LEFT[1], + BinaryBasicFunctions.FS_PAD_RIGHT[0], + BinaryBasicFunctions.FS_PAD_RIGHT[1], + BinaryBasicFunctions.FS_FIND), + + functionDefs(BinaryTextFunctions.class, + BinaryTextFunctions.FS_DECODE_STRING[0], + BinaryTextFunctions.FS_DECODE_STRING[1], + BinaryTextFunctions.FS_DECODE_STRING[2], + BinaryTextFunctions.FS_DECODE_STRING[3], + BinaryTextFunctions.FS_ENCODE_STRING[0], + BinaryTextFunctions.FS_ENCODE_STRING[1]), + + functionDefs(BinaryPackingFunctions.class, + BinaryPackingFunctions.FS_PACK_DOUBLE[0], + BinaryPackingFunctions.FS_PACK_DOUBLE[1], + BinaryPackingFunctions.FS_PACK_FLOAT[0], + BinaryPackingFunctions.FS_PACK_FLOAT[1], + BinaryPackingFunctions.FS_PACK_INTEGER[0], + BinaryPackingFunctions.FS_PACK_INTEGER[1], + BinaryPackingFunctions.FS_UNPACK_DOUBLE[0], + BinaryPackingFunctions.FS_UNPACK_DOUBLE[1], + BinaryPackingFunctions.FS_UNPACK_FLOAT[0], + BinaryPackingFunctions.FS_UNPACK_FLOAT[1], + BinaryPackingFunctions.FS_UNPACK_INTEGER[0], + BinaryPackingFunctions.FS_UNPACK_INTEGER[1], + BinaryPackingFunctions.FS_UNPACK_UNSIGNED_INTEGER[0], + BinaryPackingFunctions.FS_UNPACK_UNSIGNED_INTEGER[1]), + + functionDefs(BinaryBitwiseFunctions.class, + BinaryBitwiseFunctions.FS_OR, + BinaryBitwiseFunctions.FS_XOR, + BinaryBitwiseFunctions.FS_AND, + BinaryBitwiseFunctions.FS_NOT, + BinaryBitwiseFunctions.FS_SHIFT), + + functionDefs(BinaryInferEncodingFunction.class, + BinaryInferEncodingFunction.FS_INFER_ENCODING[0], + BinaryInferEncodingFunction.FS_INFER_ENCODING[1]) + ); + + public BinaryModule(final Map> parameters) { + super(functions, parameters); + } + + @Override + public String getNamespaceURI() { + return NAMESPACE_URI; + } + + @Override + public String getDefaultPrefix() { + return PREFIX; + } + + @Override + public String getDescription() { + return "EXPath Binary Module 4.0 https://qt4cg.org/specifications/expath-binary-40/Overview.html"; + } + + @Override + public String getReleaseVersion() { + return RELEASED_IN_VERSION; + } +} diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModuleErrorCode.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModuleErrorCode.java new file mode 100644 index 00000000000..cd2b0ad8daa --- /dev/null +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModuleErrorCode.java @@ -0,0 +1,68 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.modules.binary; + +import org.exist.dom.QName; +import org.exist.xquery.ErrorCodes.ErrorCode; + +/** + * Error codes for the EXPath Binary Module 4.0. + * + * @see EXPath Binary Module 4.0 - Errors + */ +public class BinaryModuleErrorCode { + + public static final ErrorCode NON_NUMERIC_CHARACTER = new ErrorCode( + new QName("non-numeric-character", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX), + "The argument to bin:hex(), bin:bin(), or bin:octal() contains a character that is not valid for the specified notation."); + + public static final ErrorCode INDEX_OUT_OF_RANGE = new ErrorCode( + new QName("index-out-of-range", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX), + "Offset and/or size is out of range for the given binary data."); + + public static final ErrorCode NEGATIVE_SIZE = new ErrorCode( + new QName("negative-size", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX), + "Size, count, or padding is negative."); + + public static final ErrorCode UNKNOWN_ENCODING = new ErrorCode( + new QName("unknown-encoding", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX), + "The specified encoding is not supported."); + + public static final ErrorCode CONVERSION_ERROR = new ErrorCode( + new QName("conversion-error", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX), + "An error occurred during encoding or decoding of a string."); + + public static final ErrorCode DIFFERING_LENGTH_ARGUMENTS = new ErrorCode( + new QName("differing-length-arguments", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX), + "The arguments to a bitwise operation are of differing length."); + + public static final ErrorCode INVALID_ENCODING = new ErrorCode( + new QName("invalid-encoding", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX), + "The encoding is invalid for the given data."); + + public static final ErrorCode INTEGER_TOO_LARGE = new ErrorCode( + new QName("integer-too-large", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX), + "Integer value exceeds the implementation-defined maximum."); + + private BinaryModuleErrorCode() { + } +} diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModuleHelper.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModuleHelper.java new file mode 100644 index 00000000000..1d27b8e8e45 --- /dev/null +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryModuleHelper.java @@ -0,0 +1,112 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.modules.binary; + +import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream; +import org.exist.xquery.Expression; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.Base64BinaryValueType; +import org.exist.xquery.value.BinaryValue; +import org.exist.xquery.value.BinaryValueFromInputStream; +import org.exist.xquery.value.Sequence; + +import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream; + +import javax.annotation.Nullable; +import java.io.IOException; + +/** + * Shared utility methods for the EXPath Binary Module functions. + */ +class BinaryModuleHelper { + + /** + * Extracts a byte array from a binary sequence argument. + * + * @param arg the sequence argument (expected to contain a single binary value) + * @return the byte array, or null if the argument is an empty sequence + * @throws XPathException if the binary data cannot be read + */ + @Nullable + static byte[] getBinaryData(final Sequence arg) throws XPathException { + if (arg.isEmpty()) { + return null; + } + final BinaryValue binary = (BinaryValue) arg.itemAt(0); + try (final UnsynchronizedByteArrayOutputStream os = new UnsynchronizedByteArrayOutputStream()) { + binary.streamBinaryTo(os); + return os.toByteArray(); + } catch (final IOException e) { + throw new XPathException((Expression) null, "Failed to read binary data: " + e.getMessage(), e); + } + } + + /** + * Creates an xs:base64Binary value from a byte array. + * + * @param context the XQuery context + * @param expr the calling expression (for error reporting) + * @param data the byte array + * @return the base64Binary value + * @throws XPathException if the value cannot be created + */ + static BinaryValue createBinaryResult(final XQueryContext context, final Expression expr, final byte[] data) throws XPathException { + return BinaryValueFromInputStream.getInstance( + context, + new Base64BinaryValueType(), + new UnsynchronizedByteArrayInputStream(data), + expr + ); + } + + /** + * Validates the octet-order parameter string. + * + * @param order the order string + * @return true if little-endian, false if big-endian + * @throws XPathException if the value is not a valid octet order + */ + static boolean isLittleEndian(final Expression expr, final String order) throws XPathException { + return switch (order) { + case "most-significant-first", "big-endian", "BE" -> false; + case "least-significant-first", "little-endian", "LE" -> true; + default -> throw new XPathException(expr, + org.exist.xquery.ErrorCodes.XPTY0004, + "Invalid octet order: '" + order + "'. Expected one of: most-significant-first, big-endian, BE, least-significant-first, little-endian, LE"); + }; + } + + /** + * Reverses a byte array in place. + */ + static void reverseBytes(final byte[] data) { + for (int i = 0, j = data.length - 1; i < j; i++, j--) { + final byte tmp = data[i]; + data[i] = data[j]; + data[j] = tmp; + } + } + + private BinaryModuleHelper() { + } +} diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryPackingFunctions.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryPackingFunctions.java new file mode 100644 index 00000000000..f29577e28e1 --- /dev/null +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryPackingFunctions.java @@ -0,0 +1,339 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.modules.binary; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.DoubleValue; +import org.exist.xquery.value.FloatValue; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.Type; + +import java.math.BigInteger; +import java.nio.ByteBuffer; +import java.util.Arrays; + +import static org.exist.xquery.FunctionDSL.*; + +/** + * EXPath Binary Module 4.0 — Numeric Packing and Unpacking (Section 7). + * + *
    + *
  • bin:pack-double
  • + *
  • bin:pack-float
  • + *
  • bin:pack-integer
  • + *
  • bin:unpack-double
  • + *
  • bin:unpack-float
  • + *
  • bin:unpack-integer
  • + *
  • bin:unpack-unsigned-integer
  • + *
+ * + * @see EXPath Binary Module 4.0 §7 + */ +public class BinaryPackingFunctions extends BasicFunction { + + private static final QName QN_PACK_DOUBLE = new QName("pack-double", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_PACK_FLOAT = new QName("pack-float", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_PACK_INTEGER = new QName("pack-integer", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_UNPACK_DOUBLE = new QName("unpack-double", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_UNPACK_FLOAT = new QName("unpack-float", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_UNPACK_INTEGER = new QName("unpack-integer", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_UNPACK_UNSIGNED_INTEGER = new QName("unpack-unsigned-integer", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + + static final FunctionSignature[] FS_PACK_DOUBLE = functionSignatures( + QN_PACK_DOUBLE, + "Returns the 8-octet binary representation of an xs:double value.", + returns(Type.BASE64_BINARY), + arities( + arity(param("value", Type.DOUBLE, "The double value to pack")), + arity(param("value", Type.DOUBLE, "The double value to pack"), + param("order", Type.STRING, "The octet order: 'most-significant-first' (default), 'big-endian', 'BE', 'least-significant-first', 'little-endian', 'LE'")) + ) + ); + + static final FunctionSignature[] FS_PACK_FLOAT = functionSignatures( + QN_PACK_FLOAT, + "Returns the 4-octet binary representation of an xs:float value.", + returns(Type.BASE64_BINARY), + arities( + arity(param("value", Type.FLOAT, "The float value to pack")), + arity(param("value", Type.FLOAT, "The float value to pack"), + param("order", Type.STRING, "The octet order")) + ) + ); + + static final FunctionSignature[] FS_PACK_INTEGER = functionSignatures( + QN_PACK_INTEGER, + "Returns the two's-complement binary representation of an xs:integer value.", + returns(Type.BASE64_BINARY), + arities( + arity(param("value", Type.INTEGER, "The integer value to pack"), + param("size", Type.INTEGER, "The number of octets in the result")), + arity(param("value", Type.INTEGER, "The integer value to pack"), + param("size", Type.INTEGER, "The number of octets in the result"), + param("order", Type.STRING, "The octet order")) + ) + ); + + static final FunctionSignature[] FS_UNPACK_DOUBLE = functionSignatures( + QN_UNPACK_DOUBLE, + "Extracts an xs:double value from binary data.", + returns(Type.DOUBLE), + arities( + arity(param("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based byte offset")), + arity(param("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based byte offset"), + param("order", Type.STRING, "The octet order")) + ) + ); + + static final FunctionSignature[] FS_UNPACK_FLOAT = functionSignatures( + QN_UNPACK_FLOAT, + "Extracts an xs:float value from binary data.", + returns(Type.FLOAT), + arities( + arity(param("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based byte offset")), + arity(param("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based byte offset"), + param("order", Type.STRING, "The octet order")) + ) + ); + + static final FunctionSignature[] FS_UNPACK_INTEGER = functionSignatures( + QN_UNPACK_INTEGER, + "Extracts a signed xs:integer value from binary data.", + returns(Type.INTEGER), + arities( + arity(param("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based byte offset"), + param("size", Type.INTEGER, "The number of octets to read")), + arity(param("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based byte offset"), + param("size", Type.INTEGER, "The number of octets to read"), + param("order", Type.STRING, "The octet order")) + ) + ); + + static final FunctionSignature[] FS_UNPACK_UNSIGNED_INTEGER = functionSignatures( + QN_UNPACK_UNSIGNED_INTEGER, + "Extracts an unsigned xs:integer value from binary data.", + returns(Type.INTEGER), + arities( + arity(param("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based byte offset"), + param("size", Type.INTEGER, "The number of octets to read")), + arity(param("value", Type.BASE64_BINARY, "The binary data"), + param("offset", Type.INTEGER, "The zero-based byte offset"), + param("size", Type.INTEGER, "The number of octets to read"), + param("order", Type.STRING, "The octet order")) + ) + ); + + public BinaryPackingFunctions(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (isCalledAs("pack-double")) { + return packDouble(args); + } else if (isCalledAs("pack-float")) { + return packFloat(args); + } else if (isCalledAs("pack-integer")) { + return packInteger(args); + } else if (isCalledAs("unpack-double")) { + return unpackDouble(args); + } else if (isCalledAs("unpack-float")) { + return unpackFloat(args); + } else if (isCalledAs("unpack-integer")) { + return unpackInteger(args); + } else { + return unpackUnsignedInteger(args); + } + } + + private boolean getByteOrder(final Sequence[] args, final int orderArgIndex) throws XPathException { + if (args.length > orderArgIndex && !args[orderArgIndex].isEmpty()) { + return BinaryModuleHelper.isLittleEndian(this, args[orderArgIndex].getStringValue()); + } + return false; // big-endian by default + } + + private Sequence packDouble(final Sequence[] args) throws XPathException { + final double value = ((DoubleValue) args[0].itemAt(0)).getDouble(); + final boolean le = getByteOrder(args, 1); + + final byte[] data = new byte[8]; + ByteBuffer.wrap(data).putLong(Double.doubleToRawLongBits(value)); + if (le) { + BinaryModuleHelper.reverseBytes(data); + } + return BinaryModuleHelper.createBinaryResult(context, this, data); + } + + private Sequence packFloat(final Sequence[] args) throws XPathException { + final float value = ((FloatValue) args[0].itemAt(0)).getValue(); + final boolean le = getByteOrder(args, 1); + + final byte[] data = new byte[4]; + ByteBuffer.wrap(data).putInt(Float.floatToRawIntBits(value)); + if (le) { + BinaryModuleHelper.reverseBytes(data); + } + return BinaryModuleHelper.createBinaryResult(context, this, data); + } + + private Sequence packInteger(final Sequence[] args) throws XPathException { + final BigInteger value = ((IntegerValue) args[0].itemAt(0)).toJavaObject(BigInteger.class); + final int size = ((IntegerValue) args[1].itemAt(0)).getInt(); + final boolean le = getByteOrder(args, 2); + + if (size < 0) { + throw new XPathException(this, BinaryModuleErrorCode.NEGATIVE_SIZE, + "Size must not be negative: " + size); + } + + if (size == 0) { + return BinaryModuleHelper.createBinaryResult(context, this, new byte[0]); + } + + final byte[] twosComplement = value.toByteArray(); + final byte[] data = new byte[size]; + + // Fill with sign extension byte (0x00 for positive, 0xFF for negative) + if (value.signum() < 0) { + Arrays.fill(data, (byte) 0xFF); + } + + // Copy the significant bytes into the result, right-aligned (big-endian) + if (twosComplement.length <= size) { + System.arraycopy(twosComplement, 0, data, size - twosComplement.length, twosComplement.length); + } else { + // Truncate from the left (most significant bytes) + System.arraycopy(twosComplement, twosComplement.length - size, data, 0, size); + } + + if (le) { + BinaryModuleHelper.reverseBytes(data); + } + return BinaryModuleHelper.createBinaryResult(context, this, data); + } + + private Sequence unpackDouble(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + final int offset = ((IntegerValue) args[1].itemAt(0)).getInt(); + final boolean le = getByteOrder(args, 2); + + validateUnpackRange(data, offset, 8); + + final byte[] slice = Arrays.copyOfRange(data, offset, offset + 8); + if (le) { + BinaryModuleHelper.reverseBytes(slice); + } + final long bits = ByteBuffer.wrap(slice).getLong(); + return new DoubleValue(this, Double.longBitsToDouble(bits)); + } + + private Sequence unpackFloat(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + final int offset = ((IntegerValue) args[1].itemAt(0)).getInt(); + final boolean le = getByteOrder(args, 2); + + validateUnpackRange(data, offset, 4); + + final byte[] slice = Arrays.copyOfRange(data, offset, offset + 4); + if (le) { + BinaryModuleHelper.reverseBytes(slice); + } + final int bits = ByteBuffer.wrap(slice).getInt(); + return new FloatValue(this, Float.intBitsToFloat(bits)); + } + + private Sequence unpackInteger(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + final int offset = ((IntegerValue) args[1].itemAt(0)).getInt(); + final int size = ((IntegerValue) args[2].itemAt(0)).getInt(); + final boolean le = getByteOrder(args, 3); + + if (size < 0) { + throw new XPathException(this, BinaryModuleErrorCode.NEGATIVE_SIZE, + "Size must not be negative: " + size); + } + + validateUnpackRange(data, offset, size); + + if (size == 0) { + return new IntegerValue(this, 0); + } + + final byte[] slice = Arrays.copyOfRange(data, offset, offset + size); + if (le) { + BinaryModuleHelper.reverseBytes(slice); + } + // BigInteger(byte[]) interprets as signed two's-complement + final BigInteger result = new BigInteger(slice); + return new IntegerValue(this, result); + } + + private Sequence unpackUnsignedInteger(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + final int offset = ((IntegerValue) args[1].itemAt(0)).getInt(); + final int size = ((IntegerValue) args[2].itemAt(0)).getInt(); + final boolean le = getByteOrder(args, 3); + + if (size < 0) { + throw new XPathException(this, BinaryModuleErrorCode.NEGATIVE_SIZE, + "Size must not be negative: " + size); + } + + validateUnpackRange(data, offset, size); + + if (size == 0) { + return new IntegerValue(this, 0); + } + + final byte[] slice = Arrays.copyOfRange(data, offset, offset + size); + if (le) { + BinaryModuleHelper.reverseBytes(slice); + } + // BigInteger(1, byte[]) interprets as unsigned (positive signum) + final BigInteger result = new BigInteger(1, slice); + return new IntegerValue(this, result); + } + + private void validateUnpackRange(final byte[] data, final int offset, final int size) throws XPathException { + if (data == null) { + throw new XPathException(this, BinaryModuleErrorCode.INDEX_OUT_OF_RANGE, + "Binary data is empty"); + } + if (offset < 0 || offset + size > data.length) { + throw new XPathException(this, BinaryModuleErrorCode.INDEX_OUT_OF_RANGE, + "Offset " + offset + " + size " + size + " exceeds binary data length " + data.length); + } + } +} diff --git a/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryTextFunctions.java b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryTextFunctions.java new file mode 100644 index 00000000000..9d6d08d1260 --- /dev/null +++ b/extensions/modules/expath-binary/src/main/java/org/exist/xquery/modules/binary/BinaryTextFunctions.java @@ -0,0 +1,200 @@ +/* + * eXist-db Open Source Native XML Database + * Copyright (C) 2001 The eXist-db Authors + * + * info@exist-db.org + * http://www.exist-db.org + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +package org.exist.xquery.modules.binary; + +import org.exist.dom.QName; +import org.exist.xquery.BasicFunction; +import org.exist.xquery.FunctionSignature; +import org.exist.xquery.XPathException; +import org.exist.xquery.XQueryContext; +import org.exist.xquery.value.IntegerValue; +import org.exist.xquery.value.Sequence; +import org.exist.xquery.value.StringValue; +import org.exist.xquery.value.Type; + +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.UnsupportedCharsetException; +import java.util.Arrays; + +import static org.exist.xquery.FunctionDSL.*; + +/** + * EXPath Binary Module 4.0 — Text Encoding and Decoding (Section 6). + * + *
    + *
  • bin:decode-string
  • + *
  • bin:encode-string
  • + *
+ * + * @see EXPath Binary Module 4.0 §6 + */ +public class BinaryTextFunctions extends BasicFunction { + + private static final QName QN_DECODE_STRING = new QName("decode-string", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + private static final QName QN_ENCODE_STRING = new QName("encode-string", BinaryModule.NAMESPACE_URI, BinaryModule.PREFIX); + + static final FunctionSignature[] FS_DECODE_STRING = functionSignatures( + QN_DECODE_STRING, + "Decodes binary data to an xs:string using the specified encoding.", + returnsOpt(Type.STRING), + arities( + arity( + optParam("value", Type.BASE64_BINARY, "The binary data to decode") + ), + arity( + optParam("value", Type.BASE64_BINARY, "The binary data to decode"), + param("encoding", Type.STRING, "The character encoding (default: UTF-8)") + ), + arity( + optParam("value", Type.BASE64_BINARY, "The binary data to decode"), + param("encoding", Type.STRING, "The character encoding (default: UTF-8)"), + param("offset", Type.INTEGER, "The zero-based byte offset to start decoding") + ), + arity( + optParam("value", Type.BASE64_BINARY, "The binary data to decode"), + param("encoding", Type.STRING, "The character encoding (default: UTF-8)"), + param("offset", Type.INTEGER, "The zero-based byte offset to start decoding"), + param("size", Type.INTEGER, "The number of bytes to decode") + ) + ) + ); + + static final FunctionSignature[] FS_ENCODE_STRING = functionSignatures( + QN_ENCODE_STRING, + "Encodes an xs:string to binary data using the specified encoding.", + returnsOpt(Type.BASE64_BINARY), + arities( + arity( + optParam("value", Type.STRING, "The string to encode") + ), + arity( + optParam("value", Type.STRING, "The string to encode"), + param("encoding", Type.STRING, "The character encoding (default: UTF-8)") + ) + ) + ); + + public BinaryTextFunctions(final XQueryContext context, final FunctionSignature signature) { + super(context, signature); + } + + @Override + public Sequence eval(final Sequence[] args, final Sequence contextSequence) throws XPathException { + if (isCalledAs("decode-string")) { + return decodeString(args); + } else { + return encodeString(args); + } + } + + private Sequence decodeString(final Sequence[] args) throws XPathException { + final byte[] data = BinaryModuleHelper.getBinaryData(args[0]); + if (data == null) { + return Sequence.EMPTY_SEQUENCE; + } + + final String encoding = getOptionalStringArg(args, 1, "UTF-8"); + final int offset = getOptionalIntArg(args, 2, 0); + final int size = getOptionalIntArg(args, 3, data.length - offset); + + validateOffsetAndSize(data, offset, size); + + final Charset charset = resolveCharset(encoding); + return decodeBytes(data, offset, size, charset, encoding); + } + + private void validateOffsetAndSize(final byte[] data, final int offset, final int size) throws XPathException { + if (offset < 0 || offset > data.length) { + throw new XPathException(this, BinaryModuleErrorCode.INDEX_OUT_OF_RANGE, + "Offset " + offset + " is out of range for binary data of length " + data.length); + } + if (size < 0) { + throw new XPathException(this, BinaryModuleErrorCode.NEGATIVE_SIZE, + "Size must not be negative: " + size); + } + if (offset + size > data.length) { + throw new XPathException(this, BinaryModuleErrorCode.INDEX_OUT_OF_RANGE, + "Offset " + offset + " + size " + size + " exceeds binary data length " + data.length); + } + } + + private Sequence decodeBytes(final byte[] data, final int offset, final int size, + final Charset charset, final String encoding) throws XPathException { + try { + final CharsetDecoder decoder = charset.newDecoder() + .onMalformedInput(CodingErrorAction.REPORT) + .onUnmappableCharacter(CodingErrorAction.REPORT); + final CharBuffer result = decoder.decode(ByteBuffer.wrap(data, offset, size)); + return new StringValue(this, result.toString()); + } catch (final CharacterCodingException e) { + throw new XPathException(this, BinaryModuleErrorCode.CONVERSION_ERROR, + "Failed to decode binary data using encoding '" + encoding + "': " + e.getMessage()); + } + } + + private static String getOptionalStringArg(final Sequence[] args, final int index, final String defaultValue) throws XPathException { + return (args.length > index && !args[index].isEmpty()) ? args[index].getStringValue() : defaultValue; + } + + private static int getOptionalIntArg(final Sequence[] args, final int index, final int defaultValue) throws XPathException { + return (args.length > index && !args[index].isEmpty()) ? ((IntegerValue) args[index].itemAt(0)).getInt() : defaultValue; + } + + private Sequence encodeString(final Sequence[] args) throws XPathException { + if (args[0].isEmpty()) { + return Sequence.EMPTY_SEQUENCE; + } + + final String value = args[0].getStringValue(); + final String encoding = (args.length > 1 && !args[1].isEmpty()) + ? args[1].getStringValue() + : "UTF-8"; + + final Charset charset = resolveCharset(encoding); + + try { + final ByteBuffer encoded = charset.newEncoder() + .onMalformedInput(CodingErrorAction.REPORT) + .onUnmappableCharacter(CodingErrorAction.REPORT) + .encode(CharBuffer.wrap(value)); + final byte[] data = Arrays.copyOf(encoded.array(), encoded.limit()); + return BinaryModuleHelper.createBinaryResult(context, this, data); + } catch (final CharacterCodingException e) { + throw new XPathException(this, BinaryModuleErrorCode.CONVERSION_ERROR, + "Failed to encode string using encoding '" + encoding + "': " + e.getMessage()); + } + } + + private Charset resolveCharset(final String encoding) throws XPathException { + try { + return Charset.forName(encoding); + } catch (final UnsupportedCharsetException e) { + throw new XPathException(this, BinaryModuleErrorCode.UNKNOWN_ENCODING, + "Unknown encoding: '" + encoding + "'"); + } + } +} diff --git a/extensions/modules/expathrepo/expathrepo-trigger-test/src/test/resources/conf.xml b/extensions/modules/expathrepo/expathrepo-trigger-test/src/test/resources/conf.xml index 399137a7230..ffb375210bd 100644 --- a/extensions/modules/expathrepo/expathrepo-trigger-test/src/test/resources/conf.xml +++ b/extensions/modules/expathrepo/expathrepo-trigger-test/src/test/resources/conf.xml @@ -750,6 +750,7 @@ + diff --git a/extensions/modules/expathrepo/src/test/resources-filtered/conf.xml b/extensions/modules/expathrepo/src/test/resources-filtered/conf.xml index 0203297b9dd..8fc7015bcc6 100644 --- a/extensions/modules/expathrepo/src/test/resources-filtered/conf.xml +++ b/extensions/modules/expathrepo/src/test/resources-filtered/conf.xml @@ -760,6 +760,7 @@
+ diff --git a/extensions/modules/file/src/test/resources-filtered/conf.xml b/extensions/modules/file/src/test/resources-filtered/conf.xml index 11c020c728e..32dcc9017ff 100644 --- a/extensions/modules/file/src/test/resources-filtered/conf.xml +++ b/extensions/modules/file/src/test/resources-filtered/conf.xml @@ -757,6 +757,7 @@ + diff --git a/extensions/modules/image/src/test/resources-filtered/conf.xml b/extensions/modules/image/src/test/resources-filtered/conf.xml index 9df613700e8..1131923b2a1 100644 --- a/extensions/modules/image/src/test/resources-filtered/conf.xml +++ b/extensions/modules/image/src/test/resources-filtered/conf.xml @@ -760,6 +760,7 @@ + diff --git a/extensions/modules/mail/src/test/resources-filtered/conf.xml b/extensions/modules/mail/src/test/resources-filtered/conf.xml index cfebd73a39d..8c8a943dae1 100644 --- a/extensions/modules/mail/src/test/resources-filtered/conf.xml +++ b/extensions/modules/mail/src/test/resources-filtered/conf.xml @@ -749,6 +749,7 @@ + diff --git a/extensions/modules/persistentlogin/src/test/resources-filtered/conf.xml b/extensions/modules/persistentlogin/src/test/resources-filtered/conf.xml index 6850c1477fe..67d1ff8c774 100644 --- a/extensions/modules/persistentlogin/src/test/resources-filtered/conf.xml +++ b/extensions/modules/persistentlogin/src/test/resources-filtered/conf.xml @@ -753,6 +753,7 @@ + diff --git a/extensions/modules/pom.xml b/extensions/modules/pom.xml index 0f8bf723555..8f48305a083 100644 --- a/extensions/modules/pom.xml +++ b/extensions/modules/pom.xml @@ -52,6 +52,7 @@ cqlparser example exi + expath-binary expathrepo expathrepo/expathrepo-trigger-test file diff --git a/extensions/modules/sql/src/test/resources-filtered/conf.xml b/extensions/modules/sql/src/test/resources-filtered/conf.xml index 09ba6545e1e..856d8eb692b 100644 --- a/extensions/modules/sql/src/test/resources-filtered/conf.xml +++ b/extensions/modules/sql/src/test/resources-filtered/conf.xml @@ -753,6 +753,7 @@ + diff --git a/extensions/modules/xmldiff/src/test/resources-filtered/conf.xml b/extensions/modules/xmldiff/src/test/resources-filtered/conf.xml index a1a95c324d6..4b8ef88cae7 100644 --- a/extensions/modules/xmldiff/src/test/resources-filtered/conf.xml +++ b/extensions/modules/xmldiff/src/test/resources-filtered/conf.xml @@ -757,6 +757,7 @@ + diff --git a/extensions/modules/xslfo/src/test/resources-filtered/conf.xml b/extensions/modules/xslfo/src/test/resources-filtered/conf.xml index 3e14e631740..d3ee6da9421 100644 --- a/extensions/modules/xslfo/src/test/resources-filtered/conf.xml +++ b/extensions/modules/xslfo/src/test/resources-filtered/conf.xml @@ -759,6 +759,7 @@ + diff --git a/extensions/webdav/src/test/resources-filtered/conf.xml b/extensions/webdav/src/test/resources-filtered/conf.xml index 5dc0efc380a..18570098dc1 100644 --- a/extensions/webdav/src/test/resources-filtered/conf.xml +++ b/extensions/webdav/src/test/resources-filtered/conf.xml @@ -743,6 +743,7 @@ + diff --git a/extensions/xqdoc/src/test/resources-filtered/conf.xml b/extensions/xqdoc/src/test/resources-filtered/conf.xml index 7c96ef98809..0fe40cf5e0c 100644 --- a/extensions/xqdoc/src/test/resources-filtered/conf.xml +++ b/extensions/xqdoc/src/test/resources-filtered/conf.xml @@ -759,6 +759,7 @@ +