Prepare to update spec test suite (#1909)

alexcrichton · web-flow · commit 09175fa31ed7 · 2024-11-18T17:31:46.000Z
* Prepare to update spec test suite Some minor updates in preparation for WebAssembly/testsuite#110: * Don't limit the size of tables in the validator and instead defer such maximal validation to runtimes themselves. * Parse the `(pagesize N)` syntax on "inline memories" * Always parse table/memory limits as 64-bit integers * Stop matching spec test suite error messages This last point is something I've tried to hold off on doing for a long time but as the giant `error_matches` function continues to grow and become more unwieldy it has become less and less tenable to do this. Overall it doesn't seem too beneficial to keep maintaining this especially in the face of numerous proposals, so instead basically remove it entirely. * Rename test
diff --git a/crates/wasmparser/src/validator/core.rs b/crates/wasmparser/src/validator/core.rs
@@ -821,12 +821,6 @@ impl Module {
         }
 
         self.check_limits(ty.initial, ty.maximum, offset)?;
-        if ty.initial > MAX_WASM_TABLE_ENTRIES as u64 {
-            return Err(BinaryReaderError::new(
-                "minimum table size is out of bounds",
-                offset,
-            ));
-        }
 
         if ty.shared {
             if !features.shared_everything_threads() {
diff --git a/crates/wast/src/core/memory.rs b/crates/wast/src/core/memory.rs
@@ -38,6 +38,8 @@ pub enum MemoryKind<'a> {
         is64: bool,
         /// The inline data specified for this memory
         data: Vec<DataVal<'a>>,
+        /// Optional page size for this inline memory.
+        page_size_log2: Option<u32>,
     },
 }
 
@@ -68,6 +70,7 @@ impl<'a> Parse<'a> for Memory<'a> {
             } else {
                 parser.parse::<Option<kw::i64>>()?.is_some()
             };
+            let page_size_log2 = page_size(parser)?;
             let data = parser.parens(|parser| {
                 parser.parse::<kw::data>()?;
                 let mut data = Vec::new();
@@ -76,7 +79,11 @@ impl<'a> Parse<'a> for Memory<'a> {
                 }
                 Ok(data)
             })?;
-            MemoryKind::Inline { data, is64 }
+            MemoryKind::Inline {
+                data,
+                is64,
+                page_size_log2,
+            }
         } else if l.peek::<u32>()? || l.peek::<kw::i32>()? || l.peek::<kw::i64>()? {
             MemoryKind::Normal(parser.parse()?)
         } else {
diff --git a/crates/wast/src/core/resolve/deinline_import_export.rs b/crates/wast/src/core/resolve/deinline_import_export.rs
@@ -48,7 +48,11 @@ pub fn run(fields: &mut Vec<ModuleField>) {
                     }
                     // If data is defined inline insert an explicit `data` module
                     // field here instead, switching this to a `Normal` memory.
-                    MemoryKind::Inline { is64, ref data } => {
+                    MemoryKind::Inline {
+                        is64,
+                        ref data,
+                        page_size_log2,
+                    } => {
                         let len = data.iter().map(|l| l.len()).sum::<usize>() as u64;
                         let pages = (len + default_page_size() - 1) / default_page_size();
                         let kind = MemoryKind::Normal(MemoryType {
@@ -58,7 +62,7 @@ pub fn run(fields: &mut Vec<ModuleField>) {
                                 max: Some(pages),
                             },
                             shared: false,
-                            page_size_log2: None,
+                            page_size_log2,
                         });
                         let data = match mem::replace(&mut m.kind, kind) {
                             MemoryKind::Inline { data, .. } => data,
diff --git a/crates/wast/src/core/types.rs b/crates/wast/src/core/types.rs
@@ -613,17 +613,9 @@ impl<'a> Parse<'a> for Limits {
             false
         };
 
-        let parse = || {
-            if is64 {
-                parser.parse::<u64>()
-            } else {
-                parser.parse::<u32>().map(|x| x.into())
-            }
-        };
-
-        let min = parse()?;
+        let min = parser.parse()?;
         let max = if parser.peek::<u64>()? {
-            Some(parse()?)
+            Some(parser.parse()?)
         } else {
             None
         };
@@ -663,8 +655,9 @@ pub struct MemoryType {
     pub page_size_log2: Option<u32>,
 }
 
-fn page_size(parser: Parser<'_>) -> Result<Option<u32>> {
-    if parser.peek::<LParen>()? {
+/// Parse `(pagesize N)` or nothing.
+pub fn page_size(parser: Parser<'_>) -> Result<Option<u32>> {
+    if parser.peek::<LParen>()? && parser.peek2::<kw::pagesize>()? {
         Ok(Some(parser.parens(|parser| {
             parser.parse::<kw::pagesize>()?;
             let span = parser.cur_span();
diff --git a/tests/local/table-big.wast b/tests/local/table-big.wast
@@ -0,0 +1,3 @@
+(module definition
+  (table 1_000_000_000 funcref)
+)
diff --git a/tests/local/table-too-big.wast b/tests/local/table-too-big.wast
diff --git a/tests/roundtrip.rs b/tests/roundtrip.rs
@@ -697,173 +697,19 @@ fn error_matches(test: &Path, error: &str, message: &str) -> bool {
     if error.contains(message) {
         return true;
     }
-    // we are in control over all tsets in `tests/local/*` so all the error
-    // messages there should exactly match the `assert_invalid` or such. No need
-    // for fuzzy matching on error messages.
+
+    // we are in control over all tests in `tests/local/*` so all the error
+    // messages there should exactly match the `assert_invalid` or such
     if test.starts_with("tests/local") {
         return false;
     }
 
-    if message == "unknown operator"
-        || message == "unexpected token"
-        || message == "wrong number of lane literals"
-        || message == "type mismatch"
-        || message == "malformed lane index"
-        || message == "expected i8 literal"
-        || message == "invalid lane length"
-        || message == "unclosed annotation"
-        || message == "malformed annotation id"
-        || message == "alignment must be a power of two"
-        || message == "i32 constant out of range"
-        || message == "constant expression required"
-        || message == "legacy exceptions support is not enabled"
-    {
-        return error.contains("expected ")
-            || error.contains("constant out of range")
-            || error.contains("extra tokens remaining")
-            || error.contains("unimplemented validation of deprecated opcode")
-            || error.contains("legacy exceptions support is not enabled");
-    }
-
-    if message == "illegal character" {
-        return error.contains("unexpected character");
-    }
-
-    if message == "unclosed string" {
-        return error.contains("unexpected end-of-file");
-    }
-
-    if message == "duplicate identifier" {
-        return error.contains("duplicate") && error.contains("identifier");
-    }
-
-    // wasmparser differentiates these cases, the spec interpreter apparently
-    // doesn't
-    if message == "function and code section have inconsistent lengths" {
-        return error.contains("code section without function section");
-    }
-
-    // This test in binary.wast uses a section id implemented by other
-    // proposals, so it's valid from wasmparser's point of view
-    if message == "malformed section id" {
-        return error.contains("unexpected end-of-file");
-    }
-
-    // The spec interpreter will apparently read beyond the limits of a section
-    // as defined by its size to parse a function, wasmparser doesn't do that.
-    // That means that the error message here is legitimately different.
-    if message == "section size mismatch" {
-        return error.contains("control frames remain at end of function");
-    }
-
-    if message == "malformed import kind" {
-        return error.contains("invalid leading byte")
-            // wasmparser understands more import kinds than the default spec
-            // interpreter
-            || error.contains("unexpected end-of-file");
-    }
-
-    if message == "integer representation too long" {
-        // wasmparser implements more features than the default spec
-        // interpreter, so these error looks different.
-        return error.contains("invalid memory limits flags")
-            || error.contains("invalid table resizable limits flags")
-            // different error message for types
-            || error.contains("invalid leading byte")
-            // the spec interpreter will read past section boundaries when
-            // decoding, wasmparser won't, producing different errors.
-            || error.contains("unexpected end-of-file")
-            || error.contains("malformed section id");
-    }
-
-    if message == "integer too large" {
-        // wasmparser implements more features than the default spec
-        // interpreter, so these error looks different.
-        return error.contains("threads must be enabled for shared memories")
-            || error.contains("shared tables require the shared-everything-threads proposal")
-            || error.contains("invalid table resizable limits flags")
-            // honestly this feels like the spec interpreter is just weird
-            || error.contains("unexpected end-of-file")
-            // This mostly comes from the memory64/binary-leb128.wast test file
-            // which I think is largely busted as it looks like a bunch of lebs
-            // were inflated to a larger size while not updating the binary
-            // encoding of the size of the section.
-            || error.contains("invalid var_u32: integer representation too long")
-            || error.contains("malformed section id");
-    }
-
-    // wasmparser blames a truncated file here, the spec interpreter blames the
-    // section counts/lengths.
-    if message == "length out of bounds" || message == "unexpected end of section or function" {
-        return error.contains("unexpected end-of-file")
-            || error.contains("control frames remain at end of function");
-    }
-
-    // binary.wast includes a test in which a 0b (End) is eaten by a botched
-    // br_table.  The test assumes that the parser (not the validator) errors on
-    // a missing End before failing to validate the botched instruction.  However
-    // wasmparser fails to validate the botched instruction first
-    if message == "unexpected end" {
-        return error.contains("type index out of bounds") || error.contains("END opcode expected");
-    }
-
-    if message == "unexpected content after last section" {
-        return error.contains("section out of order")
-            || error.contains("function and code section have inconsistent lengths")
-            || error.contains("type index out of bounds");
-    }
-
-    if message == "malformed limits flags" {
-        return error.contains("invalid memory limits flags")
-            || error.contains("invalid table resizable limits flags")
-            // These tests need to be updated for the new limits flags in the
-            // custom-page-sizes-proposal.
-            || error.contains("unexpected end-of-file");
-    }
-
-    if message == "malformed memop flags" {
-        return error.contains("malformed memop alignment");
-    }
-
-    // Our error for these tests is happening as a parser error of
-    // the text file, not a validation error of the binary.
-    if message == "memory size must be at most 65536 pages (4GiB)" {
-        return error.contains("invalid u32 number: constant out of range");
-    }
-
-    if message == "illegal opcode" {
-        // The test suite contains a test with a global section where the
-        // init expression is truncated and doesn't have an "end"
-        // instruction. That's reported with wasmparser as end-of-file
-        // because the end of the section was reached while the spec
-        // interpreter says "illegal opcode".
-        return error.contains("unexpected end-of-file");
-    }
-    if message == "unknown global" {
-        return error.contains("global.get of locally defined global");
-    }
-
-    if message == "immutable global" {
-        return error.contains("global is immutable");
-    }
-
-    if message.starts_with("unknown operator") {
-        return error.starts_with("unknown operator") || error.starts_with("unexpected token");
-    }
-
-    if message.starts_with("type mismatch") {
-        return error.starts_with("type mismatch");
-    }
-
-    if message == "table size must be at most 2^32-1" {
-        return error.contains("invalid u32 number: constant out of range");
-    }
-
-    // WebAssembly/annotations#25 - the spec interpreter's lexer is different
-    // than ours which produces a different error message.
-    if message == "empty identifier" || message == "empty annotation id" {
-        return error.contains("invalid character in string");
-    }
-
-    return false;
+    // Historically wasm-tools tried to match the upstream error message. This
+    // generally led to a large sequence of matches here which is not easy to
+    // maintain and is particularly difficult when test suites and proposals
+    // conflict with each other (e.g. one asserts one error message and another
+    // asserts a different error message). Overall we didn't benefit a whole lot
+    // from trying to match errors so just assume the error is roughly the same
+    // and otherwise don't try to match it.
+    true
 }
diff --git a/tests/snapshots/local/table-big.wast.json b/tests/snapshots/local/table-big.wast.json
@@ -0,0 +1,11 @@
+{
+  "source_filename": "tests/local/table-big.wast",
+  "commands": [
+    {
+      "type": "module_definition",
+      "line": 1,
+      "filename": "table-big.0.wasm",
+      "module_type": "binary"
+    }
+  ]
+}
diff --git a/tests/snapshots/local/table-big.wast/0.print b/tests/snapshots/local/table-big.wast/0.print
@@ -0,0 +1,3 @@
+(module
+  (table (;0;) 1000000000 funcref)
+)
diff --git a/tests/snapshots/local/table-too-big.wast.json b/tests/snapshots/local/table-too-big.wast.json
diff --git a/tests/snapshots/testsuite/proposals/memory64/memory.wast.json b/tests/snapshots/testsuite/proposals/memory64/memory.wast.json
@@ -217,20 +217,23 @@
       "line": 77,
       "filename": "memory.25.wat",
       "module_type": "text",
+      "binary_filename": "memory.25.wasm",
       "text": "memory size must be at most 65536 pages (4GiB)"
     },
     {
       "type": "assert_invalid",
       "line": 81,
       "filename": "memory.26.wat",
       "module_type": "text",
+      "binary_filename": "memory.26.wasm",
       "text": "memory size must be at most 65536 pages (4GiB)"
     },
     {
       "type": "assert_invalid",
       "line": 85,
       "filename": "memory.27.wat",
       "module_type": "text",
+      "binary_filename": "memory.27.wasm",
       "text": "memory size must be at most 65536 pages (4GiB)"
     },
     {
diff --git a/tests/snapshots/testsuite/proposals/memory64/table.wast.json b/tests/snapshots/testsuite/proposals/memory64/table.wast.json
@@ -124,20 +124,23 @@
       "line": 35,
       "filename": "table.19.wat",
       "module_type": "text",
+      "binary_filename": "table.19.wasm",
       "text": "table size must be at most 2^32-1"
     },
     {
       "type": "assert_invalid",
       "line": 39,
       "filename": "table.20.wat",
       "module_type": "text",
+      "binary_filename": "table.20.wasm",
       "text": "table size must be at most 2^32-1"
     },
     {
       "type": "assert_invalid",
       "line": 43,
       "filename": "table.21.wat",
       "module_type": "text",
+      "binary_filename": "table.21.wasm",
       "text": "table size must be at most 2^32-1"
     },
     {

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+(module definition`
	`2`	`+ (table 1_000_000_000 funcref)`
	`3`	`+)`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+(module`
	`2`	`+ (table (;0;) 1000000000 funcref)`
	`3`	`+)`