Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main'
Browse files Browse the repository at this point in the history
  • Loading branch information
connorjward committed Jun 29, 2023
2 parents cd388f7 + 93fcd9c commit 8158afd
Show file tree
Hide file tree
Showing 23 changed files with 489 additions and 105 deletions.
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ jobs:
curl -L -O https://tiker.net/ci-support-v0
. ./ci-support-v0
build_py_project_in_conda_env
rewrite_pyopencl_test
. ./.ci/examples-funcs.sh
install_example_prereqs
Expand Down
1 change: 1 addition & 0 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ Pytest POCL Examples:
curl -L -O https://tiker.net/ci-support-v0
. ./ci-support-v0
build_py_project_in_venv
rewrite_pyopencl_test
. ./.ci/examples-funcs.sh
install_example_prereqs
Expand Down
4 changes: 2 additions & 2 deletions loopy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@
make_reduction_inames_unique,
has_schedulable_iname_nesting, get_iname_duplication_options,
add_inames_to_insn, add_inames_for_unused_hw_axes, map_domain,
remove_inames_from_insn)
remove_inames_from_insn, remove_predicates_from_insn)

from loopy.transform.instruction import (
find_instructions, map_instructions,
Expand Down Expand Up @@ -212,7 +212,7 @@
"make_reduction_inames_unique",
"has_schedulable_iname_nesting", "get_iname_duplication_options",
"add_inames_to_insn", "add_inames_for_unused_hw_axes", "map_domain",
"remove_inames_from_insn",
"remove_inames_from_insn", "remove_predicates_from_insn",

"add_prefetch", "change_arg_to_image",
"tag_array_axes", "tag_data_axes",
Expand Down
28 changes: 18 additions & 10 deletions loopy/codegen/bounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,12 @@
"""


from typing import FrozenSet
import islpy as isl
from islpy import dim_type
from loopy.codegen.tools import CodegenOperationCacheManager

from loopy.kernel import LoopKernel


# {{{ approximate, convex bounds check generator
Expand Down Expand Up @@ -55,35 +59,39 @@ def get_approximate_convex_bounds_checks(domain, check_inames,

# {{{ on which inames may a conditional depend?

def get_usable_inames_for_conditional(kernel, sched_index, op_cache_manager):
result = op_cache_manager.active_inames[sched_index]
def get_usable_inames_for_conditional(
kernel: LoopKernel, sched_index: int,
op_cache_manager: CodegenOperationCacheManager) -> FrozenSet[str]:
active_inames = op_cache_manager.active_inames[sched_index]
crosses_barrier = op_cache_manager.has_barrier_within[sched_index]

# Find our containing subkernel. Grab inames for all insns from there.
subkernel_index = op_cache_manager.callkernel_index[sched_index]

if subkernel_index is None:
# Outside all subkernels - use only inames available to host.
assert isinstance(result, frozenset)
return result
assert isinstance(active_inames, frozenset)
return active_inames

parallel_inames_in_subkernel = (
op_cache_manager.get_parallel_inames_in_a_callkernel(
concurrent_inames_in_subkernel = (
op_cache_manager.get_concurrent_inames_in_a_callkernel(
subkernel_index))

# not all parallel inames are usable:
# - local indices may not be used in conditionals that cross barriers.
# - ILP indices and vector lane indices are not available in loop
# bounds, they only get defined at the innermost level of nesting.
from loopy.schedule import find_used_inames_within
from loopy.kernel.data import VectorizeTag, LocalInameTagBase, IlpBaseTag
usable_parallel_inames_in_subkernel = frozenset(iname
for iname in parallel_inames_in_subkernel
usable_concurrent_inames_in_subkernel = frozenset(
iname for iname in concurrent_inames_in_subkernel
if (not (kernel.iname_tags_of_type(iname, LocalInameTagBase)
and crosses_barrier)
and not kernel.iname_tags_of_type(iname, VectorizeTag)
and not kernel.iname_tags_of_type(iname, IlpBaseTag)))
and not kernel.iname_tags_of_type(iname, IlpBaseTag))
) & find_used_inames_within(kernel, sched_index)

return result | usable_parallel_inames_in_subkernel
return active_inames | usable_concurrent_inames_in_subkernel

# }}}

Expand Down
17 changes: 14 additions & 3 deletions loopy/codegen/instruction.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,17 @@
from pytools import memoize_on_first_arg


# These 'id' arguments are here because Set has a __hash__ supplied by isl,
# which ignores names. This may lead to incorrect things being returned from
# the cache. Passing Set id()s breaks that cache aliasing.
# This should be removed once there is a proper solution for the cache
# aliasing, such as what's under discussion in
# https://github.com/inducer/islpy/pull/103/.
@memoize_on_first_arg
def _get_new_implemented_domain(kernel, chk_domain, implemented_domain):
def _get_new_implemented_domain(
kernel,
id_chk_domain, chk_domain,
id_implemented_domain, implemented_domain):

chk_domain, implemented_domain = isl.align_two(
chk_domain, implemented_domain)
Expand All @@ -51,7 +60,9 @@ def to_codegen_result(
check_inames, (dim_type.set,))

chk_domain, new_implemented_domain = _get_new_implemented_domain(
codegen_state.kernel, chk_domain, codegen_state.implemented_domain)
codegen_state.kernel,
id(chk_domain), chk_domain,
id(codegen_state.implemented_domain), codegen_state.implemented_domain)

if chk_domain.is_empty():
return None
Expand Down Expand Up @@ -286,7 +297,7 @@ def generate_c_instruction_code(codegen_state, insn):
def generate_nop_instruction_code(codegen_state, insn):
if codegen_state.vectorization_info is not None:
raise UnvectorizableError("C instructions cannot be vectorized")
return codegen_state.ast_builder.emit_comment(
return codegen_state.ast_builder.emit_noop_with_comment(
"no-op (insn=%s)" % (insn.id))

# vim: foldmethod=marker
1 change: 1 addition & 0 deletions loopy/codegen/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,7 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index):
# Note: this does not include loop_iname itself!
usable_inames = get_usable_inames_for_conditional(kernel, sched_index,
codegen_state.codegen_cachemanager)

domain = kernel.get_inames_domain(loop_iname)

result = []
Expand Down
9 changes: 5 additions & 4 deletions loopy/codegen/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
Barrier, BeginBlockItem, gather_schedule_block,
ScheduleItem)
from dataclasses import dataclass
from typing import List, Dict
from typing import FrozenSet, List, Dict
from loopy.kernel.instruction import InstructionBase
from loopy.kernel import LoopKernel
from loopy.kernel.data import Iname
Expand Down Expand Up @@ -86,7 +86,7 @@ class CodegenOperationCacheManager:
An instance of :class:`KernelProxyForCodegenOperationCacheManager`.
.. automethod:: with_kernel
.. automethod:: get_parallel_inames_in_a_callkernel
.. automethod:: get_concurrent_inames_in_a_callkernel
"""
def __init__(self, kernel_proxy):
assert isinstance(kernel_proxy, KernelProxyForCodegenOperationCacheManager)
Expand Down Expand Up @@ -199,9 +199,10 @@ def get_insn_ids_for_block_at(self, sched_index):
sched_index)

@memoize_method
def get_parallel_inames_in_a_callkernel(self, callkernel_index):
def get_concurrent_inames_in_a_callkernel(
self, callkernel_index: int) -> FrozenSet[str]:
"""
Returns a :class:`frozenset` of parallel inames in a callkernel
Returns a :class:`frozenset` of concurrent inames in a callkernel
:arg callkernel_index: Index of the :class:`loopy.schedule.CallKernel`
in the :attr:`CodegenOperationCacheManager.kernel_proxy`'s
Expand Down
16 changes: 15 additions & 1 deletion loopy/kernel/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,20 @@ def combine_domains(self, domains: Sequence[int]) -> isl.BasicSet:
dom, result)
result = aligned_result & aligned_dom

assert result is not None
# Subdomains may carry other domains' inames as parameters.
# Move them back into the 'set' part of the space.
param_names = {
result.get_dim_name(dim_type.param, i)
for i in range(result.dim(dim_type.param))}
for actual_iname in param_names - self.all_params():
result = result.move_dims(
dim_type.set,
result.dim(dim_type.set),
dim_type.param,
result.find_dim_by_name(dim_type.param, actual_iname),
1)

return result

def get_inames_domain(self, inames: FrozenSet[str]) -> isl.BasicSet:
Expand Down Expand Up @@ -556,7 +570,7 @@ def all_inames(self):
return frozenset(self.inames.keys())

@memoize_method
def all_params(self):
def all_params(self) -> FrozenSet[str]:
all_inames = self.all_inames()

result = set()
Expand Down
2 changes: 1 addition & 1 deletion loopy/kernel/creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ def parse_nosync_option(opt_value):

elif opt_key == "if" and opt_value is not None:
predicates = opt_value.split(":")
new_predicates = set()
new_predicates = set(result["predicates"])

for pred in predicates:
from pymbolic.primitives import LogicalNot
Expand Down
Loading

0 comments on commit 8158afd

Please sign in to comment.