Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions loopy/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
from typing import TYPE_CHECKING
from typing import (
TYPE_CHECKING,
)
Comment thread
nkoskelo marked this conversation as resolved.
Outdated

import numpy as np

Expand Down Expand Up @@ -162,10 +164,7 @@ def map_constant(self, expr: object) -> bool:

def map_variable(self, expr: p.Variable) -> bool:
if expr.name == self.vec_iname:
# Technically, this is doable. But we're not going there.
raise UnvectorizableError()

# A single variable is always a scalar.
Comment thread
nkoskelo marked this conversation as resolved.
return True
return False

map_tagged_variable = map_variable
Expand Down
73 changes: 72 additions & 1 deletion loopy/target/opencl.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@

from loopy.codegen import CodeGenerationState
from loopy.codegen.result import CodeGenerationResult
from loopy.kernel import LoopKernel


# {{{ dtype registry wrappers
Expand Down Expand Up @@ -456,7 +457,7 @@ def get_opencl_callables():

# {{{ symbol mangler

def opencl_symbol_mangler(kernel, name):
def opencl_symbol_mangler(kernel: LoopKernel, name: str):
Comment thread
nkoskelo marked this conversation as resolved.
Outdated
# FIXME: should be more picky about exact names
if name.startswith("FLT_"):
return NumpyType(np.dtype(np.float32)), name
Expand Down Expand Up @@ -545,6 +546,20 @@ def wrap_in_typecast(self, actual_type, needed_dtype, s):
from pymbolic.primitives import Comparison
return Comparison(s, "!=", 0)

if needed_dtype == actual_type:
return s

registry = self.codegen_state.ast_builder.target.get_dtype_registry()
if self.codegen_state.target.is_vector_dtype(needed_dtype):
# OpenCL does not let you do explicit vector type casts between vector
# types. Instead you need to call their function which is of the form
# <desttype> convert_<desttype><n>(src) where n
# is the number of elements in the vector which is the same as in src.
if self.codegen_state.target.is_vector_dtype(actual_type) or \
actual_type.dtype.kind == "b":

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this bool handling doing here?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The type, actual_type is computed using the expression before the vector literal is inserted. I think the better solution would be to update the type inference system to use the vectorized version of the expression instead.

cast = var("convert_%s" % registry.dtype_to_ctype(needed_dtype))
return cast(s)

return super().wrap_in_typecast(actual_type, needed_dtype, s)

def map_group_hw_index(self, expr, type_context):
Expand All @@ -553,6 +568,62 @@ def map_group_hw_index(self, expr, type_context):
def map_local_hw_index(self, expr, type_context):
return var("lid")(expr.axis)

def map_variable(self, expr, type_context):

if self.codegen_state.vectorization_info:
if self.codegen_state.vectorization_info.iname == expr.name:
# This needs to be converted into a vector literal.
from loopy.symbolic import Literal
vector_length = self.codegen_state.vectorization_info.length
index_type = self.codegen_state.kernel.index_dtype
vector_type = self.codegen_state.target.vector_dtype(index_type,
vector_length)
typecast = self.codegen_state.target.dtype_to_typename(vector_type)
Comment thread
nkoskelo marked this conversation as resolved.
Outdated
vector_literal = f"(({typecast})" + " (" + \
",".join([f"{i}" for i in range(vector_length)]) + "))"
return Literal(vector_literal)
return super().map_variable(expr, type_context)

def map_if(self, expr, type_context):
from loopy.types import to_loopy_type
result_type = self.infer_type(expr)
conditional_needed_loopy_type = to_loopy_type(np.bool_)
if self.codegen_state.vectorization_info:
from loopy.codegen import UnvectorizableError
from loopy.expression import VectorizabilityChecker
checker = VectorizabilityChecker(self.codegen_state.kernel,
self.codegen_state.vectorization_info.iname,
self.codegen_state.vectorization_info.length)

try:
is_vector = checker(expr)

if is_vector:
"""
We could have a vector literal here.
Comment thread
nkoskelo marked this conversation as resolved.
Outdated
So we may need to type cast the condition.
OpenCL specification states that for ( c ? a : b)
to be vectorized appropriately c must have the same
Comment thread
nkoskelo marked this conversation as resolved.
Outdated
number of elements in the vector as that of a and b.
Also each element must have the same number of bits,
and c must be an integral type.
Comment thread
nkoskelo marked this conversation as resolved.
Outdated
"""
index_type = to_loopy_type(np.int64)
Comment thread
nkoskelo marked this conversation as resolved.
Outdated
if type_context == "f":
Comment thread
nkoskelo marked this conversation as resolved.
Outdated
index_type = to_loopy_type(np.int32)
length = self.codegen_state.vectorization_info.length
vector_type = self.codegen_state.target.vector_dtype(index_type,
length)
conditional_needed_loopy_type = to_loopy_type(vector_type)
except UnvectorizableError:
Comment thread
nkoskelo marked this conversation as resolved.
Outdated

Copy link
Copy Markdown
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we suppressing exceptions here in the first place?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We know that VectorizabilityChecker has succeeded at least once before. However, it is unclear that it will pass for the current expression which may be for a different part of the code. So out of caution, I thought it would be best to rerun the VectorizabilityChecker. If the VectorizabilityChecker succeeds then we need to ensure the proper typing of the vector conditional. If it fails, then the expression is not a vector and so we just handle the case like normal.

Comment thread
nkoskelo marked this conversation as resolved.
Outdated
pass

return type(expr)(
self.rec(expr.condition, type_context,
conditional_needed_loopy_type),
self.rec(expr.then, type_context, result_type),
self.rec(expr.else_, type_context, result_type),
)
# }}}


Expand Down
30 changes: 30 additions & 0 deletions test/test_target.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,36 @@ def test_float3():
assert "float3" in device_code


def test_cl_vectorize_index_variable(ctx_factory):
knl = lp.make_kernel(
"{ [i]: 0<=i<n }",
"""
b[i] = a[i]*3 if i < 32 else sin(a[i])
""")

knl = lp.split_array_axis(knl, "a,b", 0, 4)
knl = lp.split_iname(knl, "i", 4)
knl = lp.tag_inames(knl, {"i_inner": "vec"})
knl = lp.tag_array_axes(knl, "a,b", "c,vec")
knl = lp.set_options(knl, write_code=True)
knl = lp.assume(knl, "n % 4 = 0 and n>0")

rng = np.random.default_rng(seed=12)
a = rng.normal(size=(16, 4))
ctx = ctx_factory()
queue = cl.CommandQueue(ctx)
knl = lp.add_and_infer_dtypes(knl, {"a": np.float64, "n": np.int64})
_evt, (result,) = knl(queue, a=a, n=a.size)

result_ref = np.zeros(a.shape, dtype=np.float64)
for i in range(16):
for j in range(4):
ind = i*4 + j
result_ref[i, j] = a[i, j] * 3 if ind < 32 else np.sin(a[i, j])
Comment thread
nkoskelo marked this conversation as resolved.
Outdated

assert np.allclose(result, result_ref)


if __name__ == "__main__":
import sys
if len(sys.argv) > 1:
Expand Down