From 51d5e2e06e19274fef73e444099323d0f2519157 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Mon, 27 Apr 2026 18:54:30 +0000 Subject: [PATCH] Implement Python-like built-in methods for bytes type - Added split, splitlines, rsplit, strip, lstrip, rstrip, startswith, endswith, removeprefix, removesuffix, find, index, rfind, rindex, count, replace, join, partition, and rpartition to bytes type. - Added comprehensive tests in tests/bytes_methods.rs. - Ensured methods handle edge cases safely (empty pattern, pattern longer than source). Co-authored-by: nullmonk <15130760+nullmonk@users.noreply.github.com> --- .../src/interpreter/methods/bytes.rs | 423 ++++++++++++++++++ .../src/interpreter/methods/mod.rs | 23 +- .../eldritch-core/tests/bytes_methods.rs | 147 ++++++ 3 files changed, 592 insertions(+), 1 deletion(-) create mode 100644 implants/lib/eldritch/eldritch-core/tests/bytes_methods.rs diff --git a/implants/lib/eldritch/eldritch-core/src/interpreter/methods/bytes.rs b/implants/lib/eldritch/eldritch-core/src/interpreter/methods/bytes.rs index 551955fdd..ad5c855ef 100644 --- a/implants/lib/eldritch/eldritch-core/src/interpreter/methods/bytes.rs +++ b/implants/lib/eldritch/eldritch-core/src/interpreter/methods/bytes.rs @@ -1,6 +1,10 @@ use super::ArgCheck; use crate::ast::Value; +use crate::interpreter::introspection::is_truthy; use alloc::string::String; +use alloc::sync::Arc; +use alloc::vec::Vec; +use spin::RwLock; pub fn handle_bytes_methods( b: &[u8], @@ -15,6 +19,425 @@ pub fn handle_bytes_methods( Err(e) => Err(alloc::format!("UnicodeDecodeError: {}", e)), } })()), + "split" => Some((|| { + args.require_range(0, 1, "split")?; + let parts: Vec = if args.is_empty() { + b.split(|&c| (c as char).is_ascii_whitespace()) + .filter(|p| !p.is_empty()) + .map(|p| Value::Bytes(p.to_vec())) + .collect() + } else { + match &args[0] { + Value::Bytes(delim) => { + if delim.is_empty() { + return Err("ValueError: empty separator".into()); + } + let mut result = Vec::new(); + let mut last = 0; + let mut i = 0; + while i + delim.len() <= b.len() { + if &b[i..i + delim.len()] == delim { + result.push(Value::Bytes(b[last..i].to_vec())); + i += delim.len(); + last = i; + } else { + i += 1; + } + } + result.push(Value::Bytes(b[last..].to_vec())); + result + } + _ => return Err("TypeError: expected bytes".into()), + } + }; + Ok(Value::List(Arc::new(RwLock::new(parts)))) + })()), + "splitlines" => Some((|| { + args.require_range(0, 1, "splitlines")?; + let keepends = if !args.is_empty() { + is_truthy(&args[0]) + } else { + false + }; + + let mut lines = Vec::new(); + let mut start = 0; + let mut i = 0; + while i < b.len() { + if b[i] == b'\n' || b[i] == b'\r' { + let mut end = i + 1; + if b[i] == b'\r' && i + 1 < b.len() && b[i + 1] == b'\n' { + end += 1; + } + if keepends { + lines.push(Value::Bytes(b[start..end].to_vec())); + } else { + lines.push(Value::Bytes(b[start..i].to_vec())); + } + start = end; + i = end; + } else { + i += 1; + } + } + if start < b.len() { + lines.push(Value::Bytes(b[start..].to_vec())); + } + + Ok(Value::List(Arc::new(RwLock::new(lines)))) + })()), + "rsplit" => Some((|| { + args.require_range(0, 1, "rsplit")?; + let parts: Vec = if args.is_empty() { + b.split(|&c| (c as char).is_ascii_whitespace()) + .filter(|p| !p.is_empty()) + .map(|p| Value::Bytes(p.to_vec())) + .collect() + } else { + match &args[0] { + Value::Bytes(delim) => { + if delim.is_empty() { + return Err("ValueError: empty separator".into()); + } + let mut result = Vec::new(); + let mut last = b.len(); + if b.len() >= delim.len() { + let mut i = b.len() - delim.len(); + loop { + if &b[i..i + delim.len()] == delim { + result.push(Value::Bytes(b[i + delim.len()..last].to_vec())); + last = i; + if i < delim.len() { + break; + } + i -= delim.len(); + } else { + if i == 0 { + break; + } + i -= 1; + } + } + } + result.push(Value::Bytes(b[..last].to_vec())); + result.reverse(); + result + } + _ => return Err("TypeError: expected bytes".into()), + } + }; + Ok(Value::List(Arc::new(RwLock::new(parts)))) + })()), + "strip" => Some((|| { + args.require_range(0, 1, "strip")?; + if args.is_empty() { + let start = b + .iter() + .position(|&c| !(c as char).is_ascii_whitespace()) + .unwrap_or(b.len()); + let end = b[start..] + .iter() + .rposition(|&c| !(c as char).is_ascii_whitespace()) + .map(|pos| start + pos + 1) + .unwrap_or(start); + Ok(Value::Bytes(b[start..end].to_vec())) + } else { + match &args[0] { + Value::Bytes(chars) => { + let start = b.iter().position(|c| !chars.contains(c)).unwrap_or(b.len()); + let end = b[start..] + .iter() + .rposition(|c| !chars.contains(c)) + .map(|pos| start + pos + 1) + .unwrap_or(start); + Ok(Value::Bytes(b[start..end].to_vec())) + } + _ => Err("TypeError: expected bytes".into()), + } + } + })()), + "lstrip" => Some((|| { + args.require_range(0, 1, "lstrip")?; + if args.is_empty() { + let start = b + .iter() + .position(|&c| !(c as char).is_ascii_whitespace()) + .unwrap_or(b.len()); + Ok(Value::Bytes(b[start..].to_vec())) + } else { + match &args[0] { + Value::Bytes(chars) => { + let start = b.iter().position(|c| !chars.contains(c)).unwrap_or(b.len()); + Ok(Value::Bytes(b[start..].to_vec())) + } + _ => Err("TypeError: expected bytes".into()), + } + } + })()), + "rstrip" => Some((|| { + args.require_range(0, 1, "rstrip")?; + if args.is_empty() { + let end = b + .iter() + .rposition(|&c| !(c as char).is_ascii_whitespace()) + .map(|pos| pos + 1) + .unwrap_or(0); + Ok(Value::Bytes(b[..end].to_vec())) + } else { + match &args[0] { + Value::Bytes(chars) => { + let end = b + .iter() + .rposition(|c| !chars.contains(c)) + .map(|pos| pos + 1) + .unwrap_or(0); + Ok(Value::Bytes(b[..end].to_vec())) + } + _ => Err("TypeError: expected bytes".into()), + } + } + })()), + "startswith" => Some((|| { + args.require(1, "startswith")?; + match &args[0] { + Value::Bytes(prefix) => Ok(Value::Bool(b.starts_with(prefix))), + _ => Err("TypeError: expected bytes".into()), + } + })()), + "endswith" => Some((|| { + args.require(1, "endswith")?; + match &args[0] { + Value::Bytes(suffix) => Ok(Value::Bool(b.ends_with(suffix))), + _ => Err("TypeError: expected bytes".into()), + } + })()), + "removeprefix" => Some((|| { + args.require(1, "removeprefix")?; + match &args[0] { + Value::Bytes(prefix) => { + if b.starts_with(prefix) { + Ok(Value::Bytes(b[prefix.len()..].to_vec())) + } else { + Ok(Value::Bytes(b.to_vec())) + } + } + _ => Err("TypeError: expected bytes".into()), + } + })()), + "removesuffix" => Some((|| { + args.require(1, "removesuffix")?; + match &args[0] { + Value::Bytes(suffix) => { + if b.ends_with(suffix) { + Ok(Value::Bytes(b[..b.len() - suffix.len()].to_vec())) + } else { + Ok(Value::Bytes(b.to_vec())) + } + } + _ => Err("TypeError: expected bytes".into()), + } + })()), + "find" => Some((|| { + args.require(1, "find")?; + match &args[0] { + Value::Bytes(sub) => { + if sub.is_empty() { + return Ok(Value::Int(0)); + } + if sub.len() <= b.len() { + for i in 0..=b.len() - sub.len() { + if &b[i..i + sub.len()] == sub { + return Ok(Value::Int(i as i64)); + } + } + } + Ok(Value::Int(-1)) + } + _ => Err("TypeError: expected bytes".into()), + } + })()), + "index" => Some((|| { + args.require(1, "index")?; + match &args[0] { + Value::Bytes(sub) => { + if sub.is_empty() { + return Ok(Value::Int(0)); + } + if sub.len() <= b.len() { + for i in 0..=b.len() - sub.len() { + if &b[i..i + sub.len()] == sub { + return Ok(Value::Int(i as i64)); + } + } + } + Err("ValueError: substring not found".into()) + } + _ => Err("TypeError: expected bytes".into()), + } + })()), + "rfind" => Some((|| { + args.require(1, "rfind")?; + match &args[0] { + Value::Bytes(sub) => { + if sub.is_empty() { + return Ok(Value::Int(b.len() as i64)); + } + if sub.len() <= b.len() { + for i in (0..=b.len() - sub.len()).rev() { + if &b[i..i + sub.len()] == sub { + return Ok(Value::Int(i as i64)); + } + } + } + Ok(Value::Int(-1)) + } + _ => Err("TypeError: expected bytes".into()), + } + })()), + "rindex" => Some((|| { + args.require(1, "rindex")?; + match &args[0] { + Value::Bytes(sub) => { + if sub.is_empty() { + return Ok(Value::Int(b.len() as i64)); + } + if sub.len() <= b.len() { + for i in (0..=b.len() - sub.len()).rev() { + if &b[i..i + sub.len()] == sub { + return Ok(Value::Int(i as i64)); + } + } + } + Err("ValueError: substring not found".into()) + } + _ => Err("TypeError: expected bytes".into()), + } + })()), + "count" => Some((|| { + args.require(1, "count")?; + match &args[0] { + Value::Bytes(sub) => { + if sub.is_empty() { + return Ok(Value::Int((b.len() + 1) as i64)); + } + let mut count = 0; + let mut i = 0; + while i + sub.len() <= b.len() { + if &b[i..i + sub.len()] == sub { + count += 1; + i += sub.len(); + } else { + i += 1; + } + } + Ok(Value::Int(count)) + } + _ => Err("TypeError: expected bytes".into()), + } + })()), + "replace" => Some((|| { + args.require(2, "replace")?; + match (&args[0], &args[1]) { + (Value::Bytes(old), Value::Bytes(new)) => { + if old.is_empty() { + let mut result = Vec::new(); + for &byte in b { + result.extend_from_slice(new); + result.push(byte); + } + result.extend_from_slice(new); + return Ok(Value::Bytes(result)); + } + let mut result = Vec::new(); + let mut i = 0; + while i < b.len() { + if i + old.len() <= b.len() && &b[i..i + old.len()] == old { + result.extend_from_slice(new); + i += old.len(); + } else { + result.push(b[i]); + i += 1; + } + } + Ok(Value::Bytes(result)) + } + _ => Err("TypeError: expected bytes".into()), + } + })()), + "join" => Some((|| { + args.require(1, "join")?; + match &args[0] { + Value::List(l) => { + let list = l.read(); + let mut result = Vec::new(); + for (i, v) in list.iter().enumerate() { + if i > 0 { + result.extend_from_slice(b); + } + match v { + Value::Bytes(bb) => result.extend_from_slice(bb), + _ => return Err("TypeError: join() expects list of bytes".to_string()), + } + } + Ok(Value::Bytes(result)) + } + _ => Err("TypeError: join() expects a list".into()), + } + })()), + "partition" => Some((|| { + args.require(1, "partition")?; + match &args[0] { + Value::Bytes(sep) => { + if sep.is_empty() { + return Err("ValueError: empty separator".into()); + } + if sep.len() <= b.len() { + for i in 0..=b.len() - sep.len() { + if &b[i..i + sep.len()] == sep { + return Ok(Value::Tuple(vec![ + Value::Bytes(b[..i].to_vec()), + Value::Bytes(sep.clone()), + Value::Bytes(b[i + sep.len()..].to_vec()), + ])); + } + } + } + Ok(Value::Tuple(vec![ + Value::Bytes(b.to_vec()), + Value::Bytes(Vec::new()), + Value::Bytes(Vec::new()), + ])) + } + _ => Err("TypeError: expected bytes".into()), + } + })()), + "rpartition" => Some((|| { + args.require(1, "rpartition")?; + match &args[0] { + Value::Bytes(sep) => { + if sep.is_empty() { + return Err("ValueError: empty separator".into()); + } + if sep.len() <= b.len() { + for i in (0..=b.len() - sep.len()).rev() { + if &b[i..i + sep.len()] == sep { + return Ok(Value::Tuple(vec![ + Value::Bytes(b[..i].to_vec()), + Value::Bytes(sep.clone()), + Value::Bytes(b[i + sep.len()..].to_vec()), + ])); + } + } + } + Ok(Value::Tuple(vec![ + Value::Bytes(Vec::new()), + Value::Bytes(Vec::new()), + Value::Bytes(b.to_vec()), + ])) + } + _ => Err("TypeError: expected bytes".into()), + } + })()), _ => None, } } diff --git a/implants/lib/eldritch/eldritch-core/src/interpreter/methods/mod.rs b/implants/lib/eldritch/eldritch-core/src/interpreter/methods/mod.rs index b360e1bda..d8bfa9093 100644 --- a/implants/lib/eldritch/eldritch-core/src/interpreter/methods/mod.rs +++ b/implants/lib/eldritch/eldritch-core/src/interpreter/methods/mod.rs @@ -122,7 +122,28 @@ pub fn get_native_methods(value: &Value) -> Vec { "istitle".to_string(), "encode".to_string(), ], - Value::Bytes(_) => vec!["decode".to_string()], + Value::Bytes(_) => vec![ + "decode".to_string(), + "split".to_string(), + "splitlines".to_string(), + "rsplit".to_string(), + "strip".to_string(), + "lstrip".to_string(), + "rstrip".to_string(), + "startswith".to_string(), + "endswith".to_string(), + "removeprefix".to_string(), + "removesuffix".to_string(), + "find".to_string(), + "index".to_string(), + "rfind".to_string(), + "rindex".to_string(), + "count".to_string(), + "replace".to_string(), + "join".to_string(), + "partition".to_string(), + "rpartition".to_string(), + ], _ => Vec::new(), } } diff --git a/implants/lib/eldritch/eldritch-core/tests/bytes_methods.rs b/implants/lib/eldritch/eldritch-core/tests/bytes_methods.rs new file mode 100644 index 000000000..d40d8d9cd --- /dev/null +++ b/implants/lib/eldritch/eldritch-core/tests/bytes_methods.rs @@ -0,0 +1,147 @@ +mod assert; + +#[test] +fn test_bytes_split() { + assert::pass( + r#" + b = b"a,b,c" + assert_eq(b.split(b","), [b"a", b"b", b"c"]) + assert_eq(b" a b ".split(), [b"a", b"b"]) + assert_eq(b"a--b--c".split(b"--"), [b"a", b"b", b"c"]) + # Edge case: delim longer than bytes + assert_eq(b"abc".split(b"abcdef"), [b"abc"]) + "#, + ); +} + +#[test] +fn test_bytes_splitlines() { + assert::pass( + r#" + b = b"line1\nline2\r\nline3" + assert_eq(b.splitlines(), [b"line1", b"line2", b"line3"]) + assert_eq(b.splitlines(True), [b"line1\n", b"line2\r\n", b"line3"]) + "#, + ); +} + +#[test] +fn test_bytes_rsplit() { + assert::pass( + r#" + b = b"a,b,c" + assert_eq(b.rsplit(b","), [b"a", b"b", b"c"]) + assert_eq(b" a b ".rsplit(), [b"a", b"b"]) + # Edge case: delim longer than bytes + assert_eq(b"abc".rsplit(b"abcdef"), [b"abc"]) + "#, + ); +} + +#[test] +fn test_bytes_strip() { + assert::pass( + r#" + assert_eq(b" abc ".strip(), b"abc") + assert_eq(b"xxabcyy".strip(b"xy"), b"abc") + assert_eq(b" abc ".lstrip(), b"abc ") + assert_eq(b"xxabcyy".lstrip(b"x"), b"abcyy") + assert_eq(b" abc ".rstrip(), b" abc") + assert_eq(b"xxabcyy".rstrip(b"y"), b"xxabc") + "#, + ); +} + +#[test] +fn test_bytes_startswith_endswith() { + assert::pass( + r#" + b = b"hello" + assert(b.startswith(b"he")) + assert(not b.startswith(b"ho")) + assert(b.endswith(b"lo")) + assert(not b.endswith(b"la")) + "#, + ); +} + +#[test] +fn test_bytes_removeprefix_removesuffix() { + assert::pass( + r#" + b = b"hello" + assert_eq(b.removeprefix(b"he"), b"llo") + assert_eq(b.removeprefix(b"lo"), b"hello") + assert_eq(b.removesuffix(b"lo"), b"hel") + assert_eq(b.removesuffix(b"he"), b"hello") + "#, + ); +} + +#[test] +fn test_bytes_find_index() { + assert::pass( + r#" + b = b"hello" + assert_eq(b.find(b"l"), 2) + assert_eq(b.find(b"z"), -1) + assert_eq(b.index(b"l"), 2) + assert_eq(b.rfind(b"l"), 3) + assert_eq(b.rindex(b"l"), 3) + # Edge cases: pattern longer than bytes + assert_eq(b"abc".find(b"abcdef"), -1) + assert_eq(b"abc".rfind(b"abcdef"), -1) + "#, + ); +} + +#[test] +fn test_bytes_count() { + assert::pass( + r#" + b = b"hello" + assert_eq(b.count(b"l"), 2) + assert_eq(b.count(b"o"), 1) + assert_eq(b.count(b"z"), 0) + # Edge case: pattern longer than bytes + assert_eq(b"abc".count(b"abcdef"), 0) + "#, + ); +} + +#[test] +fn test_bytes_replace() { + assert::pass( + r#" + b = b"hello" + assert_eq(b.replace(b"l", b"p"), b"heppo") + assert_eq(b.replace(b"he", b"ha"), b"hallo") + # Edge case: old longer than bytes + assert_eq(b"abc".replace(b"abcdef", b"x"), b"abc") + "#, + ); +} + +#[test] +fn test_bytes_join() { + assert::pass( + r#" + sep = b"," + assert_eq(sep.join([b"a", b"b", b"c"]), b"a,b,c") + "#, + ); +} + +#[test] +fn test_bytes_partition() { + assert::pass( + r#" + b = b"a,b,c" + assert_eq(b.partition(b","), (b"a", b",", b"b,c")) + assert_eq(b.rpartition(b","), (b"a,b", b",", b"c")) + # Edge cases: sep longer than bytes + assert_eq(b"abc".partition(b"abcdef"), (b"abc", b"", b"")) + assert_eq(b"abc".rpartition(b"abcdef"), (b"", b"", b"abc")) + "#, + ); +}