Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
6502c1c
sar -> shr
HodanPlodky May 27, 2026
0afb59e
tests
HodanPlodky May 27, 2026
111b1f2
Revert "sar -> shr"
HodanPlodky May 27, 2026
1cdc683
Merge branch 'master' into feat/codegen/changed_the_convert_from_bytes
HodanPlodky May 29, 2026
b4c04d3
change for venom codegen
HodanPlodky May 29, 2026
af57b24
legacy fix
HodanPlodky May 29, 2026
0ef9d80
added stray return from test
HodanPlodky May 29, 2026
4755251
propagate input type in legacy
HodanPlodky May 29, 2026
24a4a8e
small fix
HodanPlodky May 29, 2026
bea1841
full propagete
HodanPlodky May 29, 2026
e902103
literal bytes to int fix legacy
HodanPlodky May 29, 2026
1ab91c3
lint
HodanPlodky May 29, 2026
f9814dd
more expansive test
HodanPlodky May 31, 2026
4c94532
Merge branch 'master' into feat/codegen/changed_the_convert_from_bytes
HodanPlodky Jun 2, 2026
d7f001d
first widen test
HodanPlodky Jun 4, 2026
adcff64
legacy fix
HodanPlodky Jun 4, 2026
f1fd92b
venom codegen
HodanPlodky Jun 4, 2026
273673b
decimal fix
HodanPlodky Jun 5, 2026
7d7da6a
convert in tests
HodanPlodky Jun 5, 2026
a18e618
Merge branch 'master' into feat/codegen/changed_the_convert_from_bytes
HodanPlodky Jun 8, 2026
4d7d5bb
used bits
HodanPlodky Jun 8, 2026
b72d8bb
Merge branch 'master' into feat/codegen/changed_the_convert_from_bytes
HodanPlodky Jun 9, 2026
a8043e9
did in for other bytes
HodanPlodky Jun 9, 2026
1c06eb8
small opt for info we know at compile time
HodanPlodky Jun 9, 2026
28c3d81
lint
HodanPlodky Jun 9, 2026
d74e3fd
venom codegen match legacy
HodanPlodky Jun 9, 2026
1ce1906
lint
HodanPlodky Jun 9, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 65 additions & 1 deletion tests/functional/builtins/codegen/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,9 @@ def _py_convert(val, i_typ, o_typ):
val_bits = _padconvert(val_bits, _padding_direction(o_typ), n, padding_byte)

if getattr(o_typ, "is_signed", False) and isinstance(i_typ, BytesM_T):
n_bits = _bits_of_type(i_typ)
out_size = _bits_of_type(o_typ)
in_size = _bits_of_type(i_typ)
n_bits = max(out_size, in_size)
val_bits = _signextend(val_bits, n_bits)

try:
Expand Down Expand Up @@ -426,6 +428,68 @@ def _vyper_literal(val, typ):
return str(val)


def test_bytes_to_int_different_sizes(get_contract):
code = r"""
@external
def foo() -> int16:
return convert(b'\xff', int16)
"""

c = get_contract(code)
assert c.foo() == 255

code = r"""
@external
def foo() -> int16:
return convert(b'\x00\xff', int16)
"""

c = get_contract(code)
assert c.foo() == 255

code = r"""
FOO: constant(Bytes[2]) = b'\xff'

@external
def foo() -> int16:
return convert(FOO, int16)
"""

c = get_contract(code)
assert c.foo() == 255


def test_bytes_to_int_different_sizes_bytes3(get_contract):
code = r"""
@external
def foo(x: bytes3) -> int96:
return convert(x, int96)
"""

c = get_contract(code)
assert c.foo(b"\xff\xff\xff") == 0xFF_FF_FF


def test_bytes_to_int_different_sizes_runtime(get_contract):
code = """
@external
def foo(x: Bytes[1]) -> int16:
return convert(x, int16)
"""

c = get_contract(code)
assert c.foo(b"\xff") == 255

code = """
@external
def foo(x: Bytes[2]) -> int16:
return convert(x, int16)
"""

c = get_contract(code)
assert c.foo(b"\xff") == 255


@pytest.mark.parametrize("i_typ,o_typ,val", generate_passing_cases())
@pytest.mark.fuzzing
def test_convert_passing(get_contract, assert_compile_failed, i_typ, o_typ, val):
Expand Down
104 changes: 65 additions & 39 deletions vyper/builtins/_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
)
from vyper.semantics.types.bytestrings import _BytestringT
from vyper.semantics.types.infinity import is_bounded_length
from vyper.semantics.types.primitives import NumericT

Check notice

Code scanning / CodeQL

Cyclic import Note

Import of module
vyper.semantics.types.primitives
begins an import cycle.
from vyper.semantics.types.shortcuts import INT256_T, UINT160_T, UINT256_T
from vyper.utils import DECIMAL_DIVISOR, round_towards_zero, unsigned_to_signed

Expand All @@ -56,7 +57,7 @@
def _input_types(*allowed_types):
def decorator(f):
@functools.wraps(f)
def check_input_type(expr, arg, out_typ):
def check_input_type(expr, arg, in_typ, out_typ):
# convert arg to out_typ.
# (expr is the AST corresponding to `arg`)
ok = isinstance(arg.typ, allowed_types)
Expand All @@ -69,34 +70,55 @@
if arg.typ == out_typ and arg.typ not in (UINT256_T, INT256_T):
raise InvalidType(f"value and target are both {out_typ}", expr)

return f(expr, arg, out_typ)
return f(expr, arg, in_typ, out_typ)

return check_input_type

return decorator


def _bytes_to_num(arg, out_typ, signed):
def _bits_count(typ) -> int | None:
if isinstance(typ, BoolT):
return 8
elif isinstance(typ, NumericT):
return typ.bits
return None


def _bytes_to_num(arg, in_typ, out_typ, signed):
# converting a bytestring to a number:
# bytestring and bytes_m are right-padded with zeroes, int is left-padded.
# convert by shr or sar the number of zero bytes (converted to bits)
# e.g. "abcd000000000000" -> bitcast(000000000000abcd, output_type)
out_size = _bits_count(out_typ)
assert out_size is not None
out_size = out_size // 8

if isinstance(arg.typ, _BytestringT):
if not is_bounded_length(arg.typ.maxlen):
if isinstance(in_typ, _BytestringT):
if not is_bounded_length(in_typ.maxlen):
raise CodegenPanic("convert: unbounded bytestring type")
_len = get_bytearray_length(arg)
assert isinstance(in_typ, _BytestringT)
if in_typ.maxlen > out_size:
out_size = in_typ.maxlen

arg = LOAD(bytes_data_ptr(arg))
num_zero_bits = ["mul", 8, ["sub", 32, _len]]
elif is_bytes_m_type(arg.typ):
num_zero_bits = 8 * (32 - arg.typ.m)
runtime_compile_diff = ["sub", out_size, _len]
val = shr(["mul", runtime_compile_diff, 8], arg)
num_zero_bits = 8 * (32 - out_size)
elif is_bytes_m_type(in_typ):
if in_typ.m > out_size:
out_size = in_typ.m
runtime_compile_diff = out_size - in_typ.m
val = shr(["mul", runtime_compile_diff, 8], arg)
num_zero_bits = 8 * (32 - out_size)
else: # pragma: nocover
raise CompilerPanic("unreachable")

if signed:
ret = sar(num_zero_bits, arg)
ret = sar(num_zero_bits, val)
else:
ret = shr(num_zero_bits, arg)
ret = shr(num_zero_bits, val)

annotation = (f"__intrinsic__byte_array_to_num({out_typ})",)
return IRnode.from_list(ret, annotation=annotation)
Expand Down Expand Up @@ -208,14 +230,17 @@
# apply sign extension, if expected. note that the sign bit
# is always taken to be the first bit of the bytestring.
# (e.g. convert(0xff <bytes1>, int16) == -1)
def _signextend(expr, val, arg_typ):
def _signextend(expr, val, arg_typ, out_size):
if isinstance(expr, vy_ast.Hex):
assert len(expr.value[2:]) // 2 == arg_typ.m
n_bits = arg_typ.m_bits
else:
assert len(expr.value) == arg_typ.maxlen
assert len(expr.value) <= arg_typ.maxlen
n_bits = arg_typ.maxlen * 8

if n_bits < out_size:
n_bits = out_size

return unsigned_to_signed(val, n_bits)


Expand All @@ -231,7 +256,7 @@
raise CompilerPanic("unreachable")

if isinstance(expr, (vy_ast.Hex, vy_ast.Bytes, vy_ast.HexBytes)) and out_typ.is_signed:
val = _signextend(expr, val, arg_typ)
val = _signextend(expr, val, arg_typ, out_size=_bits_count(out_typ))

lo, hi = out_typ.int_bounds
if not (lo <= val <= hi):
Expand All @@ -258,7 +283,7 @@

# apply sign extension, if expected
if isinstance(expr, vy_ast.Hex) and out_typ.is_signed:
val = _signextend(expr, val, arg_typ)
val = _signextend(expr, val, arg_typ, out_size=_bits_count(out_typ))

lo, hi = out_typ.int_bounds
if not lo <= val <= hi:
Expand All @@ -269,12 +294,12 @@

# any base type or bytes/string
@_input_types(IntegerT, DecimalT, BytesM_T, AddressT, BoolT, BytesT, StringT)
def to_bool(expr, arg, out_typ):
def to_bool(expr, arg, in_typ, out_typ):
_check_bytes(expr, arg, out_typ, 32) # should we restrict to Bytes[1]?

if isinstance(arg.typ, _BytestringT):
# no clamp. checks for any nonzero bytes.
arg = _bytes_to_num(arg, out_typ, signed=False)
arg = _bytes_to_num(arg, in_typ, out_typ, signed=False)

# NOTE: for decimal, the behavior is x != 0.0,
# (we do not issue an `sdiv DECIMAL_DIVISOR`)
Expand All @@ -283,27 +308,27 @@


@_input_types(IntegerT, DecimalT, BytesM_T, AddressT, BoolT, FlagT, BytesT)
def to_int(expr, arg, out_typ):
return _to_int(expr, arg, out_typ)
def to_int(expr, arg, in_typ, out_typ):
return _to_int(expr, arg, in_typ, out_typ)


# an internal version of to_int without input validation
def _to_int(expr, arg, out_typ):
def _to_int(expr, arg, in_typ, out_typ):
assert out_typ.bits % 8 == 0
_check_bytes(expr, arg, out_typ, 32)

if isinstance(expr, vy_ast.Constant):
return _literal_int(expr, arg.typ, out_typ)
return _literal_int(expr, in_typ, out_typ)

elif isinstance(arg.typ, BytesT):
arg_typ = arg.typ
arg = _bytes_to_num(arg, out_typ, signed=out_typ.is_signed)
arg = _bytes_to_num(arg, in_typ, out_typ, signed=out_typ.is_signed)
if arg_typ.maxlen * 8 > out_typ.bits:
arg = int_clamp(arg, out_typ.bits, signed=out_typ.is_signed)

elif is_bytes_m_type(arg.typ):
arg_typ = arg.typ
arg = _bytes_to_num(arg, out_typ, signed=out_typ.is_signed)
arg = _bytes_to_num(arg, in_typ, out_typ, signed=out_typ.is_signed)
if arg_typ.m_bits > out_typ.bits:
arg = int_clamp(arg, out_typ.bits, signed=out_typ.is_signed)

Expand Down Expand Up @@ -332,15 +357,15 @@


@_input_types(IntegerT, BoolT, BytesM_T, BytesT)
def to_decimal(expr, arg, out_typ):
def to_decimal(expr, arg, in_typ, out_typ):
_check_bytes(expr, arg, out_typ, 32)

if isinstance(expr, vy_ast.Constant):
return _literal_decimal(expr, arg.typ, out_typ)

if isinstance(arg.typ, BytesT):
arg_typ = arg.typ
arg = _bytes_to_num(arg, out_typ, signed=True)
arg = _bytes_to_num(arg, in_typ, out_typ, signed=True)
if arg_typ.maxlen * 8 > 168:
arg = IRnode.from_list(arg, typ=out_typ)
arg = clamp_basetype(arg)
Expand All @@ -349,7 +374,7 @@

elif is_bytes_m_type(arg.typ):
arg_typ = arg.typ
arg = _bytes_to_num(arg, out_typ, signed=True)
arg = _bytes_to_num(arg, in_typ, out_typ, signed=True)
if arg_typ.m_bits > 168:
arg = IRnode.from_list(arg, typ=out_typ)
arg = clamp_basetype(arg)
Expand All @@ -369,7 +394,7 @@


@_input_types(IntegerT, DecimalT, BytesM_T, AddressT, BytesT, BoolT)
def to_bytes_m(expr, arg, out_typ):
def to_bytes_m(expr, arg, in_typ, out_typ):
_check_bytes(expr, arg, out_typ, max_bytes_allowed=out_typ.m)

if isinstance(arg.typ, BytesT):
Expand Down Expand Up @@ -417,13 +442,13 @@


@_input_types(BytesM_T, IntegerT, BytesT)
def to_address(expr, arg, out_typ):
def to_address(expr, arg, in_typ, out_typ):
# question: should this be allowed?
if is_integer_type(arg.typ):
if arg.typ.is_signed:
_FAIL(arg.typ, out_typ, expr)

ret = _to_int(expr, arg, UINT160_T)
ret = _to_int(expr, arg, in_typ, UINT160_T)
return IRnode.from_list(ret, out_typ)


Expand All @@ -445,17 +470,17 @@

# question: should we allow bytesM -> String?
@_input_types(BytesT, StringT)
def to_string(expr, arg, out_typ):
def to_string(expr, arg, in_typ, out_typ):
return _cast_bytestring(expr, arg, out_typ)


@_input_types(StringT, BytesT)
def to_bytes(expr, arg, out_typ):
def to_bytes(expr, arg, in_typ, out_typ):
return _cast_bytestring(expr, arg, out_typ)


@_input_types(IntegerT)
def to_flag(expr, arg, out_typ):
def to_flag(expr, arg, in_typ, out_typ):
if arg.typ != UINT256_T:

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When is arg.typ different from in_typ ?

If the answer is "sometimes", this feels like something which should be fixed in the typer

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be only in the case the constant.

FOO: constant(Bytes[2]) = b'\xff'

@external
def foo() -> int16:
    return convert(FOO, int16)

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this example, what are the arg.typ and in_typ ?

@HodanPlodky HodanPlodky Jun 2, 2026

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

arg.typ would be Bytes[1] and and in_typ would be Bytes[2], since the argument is reduced to the b'\xff' at the start.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be better to have arg.typ be Bytes[2], which makes sense since the arg is FOO and its type is Bytes[2]

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would be some where in the typechecker I think. Do you think it should be done like that? Since there was discussion on what semantics the convert should have. Or maybe I could reduce it later in convert

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that your implementation also assumes a specific semantics !
I'm not sure what the semantics of convert should be, but I am 100% sure the following should be equivalent:

FOO: constant(Bytes[2]) = b'\xff'
FOO: constant(Bytes[2]) = b'\x00\xff'

Both FOOs should have type Bytes[2] even if their expression has a stricter type.

So we should fix this where appropriate, probably in the typer, since this is not related only to conversions.
(Conversions might be the only place for now where it produces an observable difference however)

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wait, what I said is wrong, I am 100% sure the following should be equivalent:

FOO: constant(Bytes[2]) = b'\xff'

_FOO: constant(Bytes[1]) = b'\xff'
FOO: constant(Bytes[2]) = _FOO

Both FOOs should have type Bytes[2].

But in my previous example, both should still have type Bytes[2]

_FAIL(arg.typ, out_typ, expr)

Expand All @@ -468,6 +493,7 @@
def convert(expr, context):
assert len(expr.args) == 2, "bad typecheck: convert"

in_typ = expr.args[0]._metadata["type"]
arg_ast = expr.args[0].reduced()
arg = Expr(arg_ast, context).ir_node
original_arg = arg
Expand All @@ -478,21 +504,21 @@
arg = unwrap_location(arg)
with arg.cache_when_complex("arg") as (b, arg):
if out_typ == BoolT():
ret = to_bool(arg_ast, arg, out_typ)
ret = to_bool(arg_ast, arg, in_typ, out_typ)
elif out_typ == AddressT():
ret = to_address(arg_ast, arg, out_typ)
ret = to_address(arg_ast, arg, in_typ, out_typ)
elif is_flag_type(out_typ):
ret = to_flag(arg_ast, arg, out_typ)
ret = to_flag(arg_ast, arg, in_typ, out_typ)
elif is_integer_type(out_typ):
ret = to_int(arg_ast, arg, out_typ)
ret = to_int(arg_ast, arg, in_typ, out_typ)
elif is_bytes_m_type(out_typ):
ret = to_bytes_m(arg_ast, arg, out_typ)
ret = to_bytes_m(arg_ast, arg, in_typ, out_typ)
elif is_decimal_type(out_typ):
ret = to_decimal(arg_ast, arg, out_typ)
ret = to_decimal(arg_ast, arg, in_typ, out_typ)
elif isinstance(out_typ, BytesT):
ret = to_bytes(arg_ast, arg, out_typ)
ret = to_bytes(arg_ast, arg, in_typ, out_typ)
elif isinstance(out_typ, StringT):
ret = to_string(arg_ast, arg, out_typ)
ret = to_string(arg_ast, arg, in_typ, out_typ)
else:
raise StructureException(f"Conversion to {out_typ} is invalid.", arg_ast)

Expand Down
Loading
Loading