From 75c1772581e03f299fd2e108523d87197dd16450 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 28 Apr 2021 13:54:04 -0500 Subject: [PATCH 01/17] if iname is an IlpBaseTag then iname is also a ConcurrentTag --- loopy/schedule/__init__.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index c6a9ec3ac..26772c70d 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -195,12 +195,11 @@ def find_loop_nest_with_map(kernel): """ result = {} - from loopy.kernel.data import ConcurrentTag, IlpBaseTag + from loopy.kernel.data import ConcurrentTag all_nonpar_inames = { iname for iname in kernel.all_inames() - if not kernel.iname_tags_of_type(iname, - (ConcurrentTag, IlpBaseTag))} + if not kernel.iname_tags_of_type(iname, ConcurrentTag)} iname_to_insns = kernel.iname_to_insns() From e94cf786d69a34f092c19f55b3191b2582ad2e39 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 28 Apr 2021 17:21:47 -0500 Subject: [PATCH 02/17] improves the performance of find_loop_nest_around_map --- loopy/schedule/__init__.py | 41 +++++----- loopy/schedule/tools.py | 159 +++++++++++++++++++++++++++++++++++++ 2 files changed, 179 insertions(+), 21 deletions(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 26772c70d..a2762024b 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -215,30 +215,27 @@ def find_loop_nest_around_map(kernel): """Returns a dictionary mapping inames to other inames that are always nested around them. """ - result = {} + from functools import reduce + from collections import defaultdict + from loopy.schedule.tools import get_loop_nest_tree - all_inames = kernel.all_inames() + tree = get_loop_nest_tree(kernel) - iname_to_insns = kernel.iname_to_insns() + loop_nest_around_map = defaultdict(frozenset) - # examine pairs of all inames--O(n**2), I know. - from loopy.kernel.data import IlpBaseTag - for inner_iname in all_inames: - result[inner_iname] = set() - for outer_iname in all_inames: - if inner_iname == outer_iname: - continue + for node in tree.all_nodes_itr(): + nest = node.identifier + depth = tree.depth(nest) + all_ancestors = reduce(frozenset.union, (tree.ancestor(nest, d).identifier + for d in range(depth)), + frozenset()) - if kernel.iname_tags_of_type(outer_iname, IlpBaseTag): - # ILP tags are special because they are parallel tags - # and therefore 'in principle' nest around everything. - # But they're realized by the scheduler as a loop - # at the innermost level, so we'll cut them some - # slack here. - continue + for iname in nest: + loop_nest_around_map[iname] = all_ancestors - if iname_to_insns[inner_iname] < iname_to_insns[outer_iname]: - result[inner_iname].add(outer_iname) + # {{{ impose constraints by the domain tree + + all_inames = kernel.all_inames() for dom_idx, dom in enumerate(kernel.domains): for outer_iname in dom.get_var_names(isl.dim_type.param): @@ -246,9 +243,11 @@ def find_loop_nest_around_map(kernel): continue for inner_iname in dom.get_var_names(isl.dim_type.set): - result[inner_iname].add(outer_iname) + loop_nest_around_map[inner_iname] |= {outer_iname} - return result + # }}} + + return loop_nest_around_map def find_loop_insn_dep_map(kernel, loop_nest_with_map, loop_nest_around_map): diff --git a/loopy/schedule/tools.py b/loopy/schedule/tools.py index afcdfb07b..8d3b07371 100644 --- a/loopy/schedule/tools.py +++ b/loopy/schedule/tools.py @@ -21,6 +21,10 @@ """ from loopy.kernel.data import AddressSpace +from loopy.diagnostic import LoopyError +from treelib import Tree +from collections import defaultdict +from functools import reduce # {{{ block boundary finder @@ -104,3 +108,158 @@ def add_extra_args_to_schedule(kernel): return kernel.copy(schedule=new_schedule) # }}} + + +class _not_seen: # noqa: N801 + pass + + +def pullout_loop_nest(tree, loop_nests, inames_to_pull_out): + """ + Updates *tree* to make *inames_to_pull_out* a loop nesting level in + *loop_nests* + + :returns: a :class:`tuple` ``(outer_loop_nest, inner_loop_nest)``, where + outer_loop_nest is the identifier for the new outer and inner loop + nests so that *inames_to_pull_out* is a valid nesting. + """ + assert all(isinstance(loop_nest, frozenset) for loop_nest in loop_nests) + assert inames_to_pull_out <= reduce(frozenset.union, loop_nests, frozenset()) + + # {{{ sanity check to ensure the loop nest *inames_to_pull_out* is possible + + err = LoopyError(f"Cannot schedule loop nest {inames_to_pull_out} " + f" in the nesting tree:\n{tree}") + + loop_nests = sorted(loop_nests, key=lambda nest: tree.depth(nest)) + + for outer, inner in zip(loop_nests[:-1], loop_nests[1:]): + if outer != tree.parent(inner).identifier: + raise err + + if tree.depth(loop_nests[0]) != 0: + raise err + + # }}} + + innermost_loop_nest = loop_nests[-1] + new_outer_loop_nest = inames_to_pull_out - reduce(frozenset.union, + loop_nests[:-1], + frozenset()) + new_inner_loop_nest = innermost_loop_nest - inames_to_pull_out + + if new_outer_loop_nest == innermost_loop_nest: + # such a loop nesting already exists => do nothing + return new_outer_loop_nest, None + + # add the outer loop to our loop nest tree + tree.create_node(identifier=new_outer_loop_nest, + parent=tree.parent(innermost_loop_nest).identifier) + + # rename the old loop to the inner loop + tree.update_node(innermost_loop_nest, + identifier=new_inner_loop_nest, + tag=new_inner_loop_nest) + + # set the parent of inner loop to be the outer loop + tree.move_node(new_inner_loop_nest, new_outer_loop_nest) + + return new_outer_loop_nest, new_inner_loop_nest + + +def add_inner_loops(tree, outer_loop_nest, inner_loop_nest): + """ + Update *tree* to nest *inner_loop_nest* inside *outer_loop_nest*. + """ + # add the outer loop to our loop nest tree + tree.create_node(identifier=inner_loop_nest, parent=outer_loop_nest) + + +def get_loop_nest_tree(kernel): + """ + Returns an instance of :class:`treelib.Tree` denoting the kernel's loop + nestings. + + Each node of the returned tree has a :class:`frozenset` of inames. + All the inames in the identifier of a parent node of a loop nest in the + tree must be nested outside all the iname in identifier of the loop nest. + + .. note:: + + This routine only takes into account the nesting dependency + constraints of :attr:`loopy.InstructionBase.within_inames` of all the + *kernel*'s instructions and the iname tags. This routine does *NOT* + include the nesting constraints imposed by the dependencies between the + instructions and the dependencies imposed by the kernel's domain tree. + """ + from loopy.kernel.data import ConcurrentTag, IlpBaseTag + + concurrent_inames = {iname for iname in kernel.all_inames() + if kernel.iname_tags_of_type(iname, ConcurrentTag)} + ilp_inames = {iname for iname in kernel.all_inames() + if kernel.iname_tags_of_type(iname, IlpBaseTag)} + + # figuring the possible loop nestings minus the concurrent_inames as they + # are never realized as actual loops + iname_chains = {insn.within_inames - concurrent_inames + for insn in kernel.instructions} + + tree = Tree() + root = frozenset() + + # mapping from iname to the innermost loop nest they are part of in *tree*. + iname_to_tree_node_id = defaultdict(lambda: _not_seen) + + tree.create_node(identifier=root) + + # if there were any loop with no inames, those have been already account + # for as the root. + iname_chains = iname_chains - {root} + + for iname_chain in iname_chains: + not_seen_inames = frozenset(iname for iname in iname_chain + if iname_to_tree_node_id[iname] is _not_seen) + seen_inames = iname_chain - not_seen_inames + + all_nests = {iname_to_tree_node_id[iname] for iname in seen_inames} + + outer_loop, inner_loop = pullout_loop_nest(tree, + (all_nests | {frozenset()}), + seen_inames) + if not_seen_inames: + add_inner_loops(tree, outer_loop, not_seen_inames) + + # {{{ update iname to node id + + for iname in outer_loop: + iname_to_tree_node_id[iname] = outer_loop + + if inner_loop is not None: + for iname in inner_loop: + iname_to_tree_node_id[iname] = inner_loop + + for iname in not_seen_inames: + iname_to_tree_node_id[iname] = not_seen_inames + + # }}} + + # {{{ make ILP tagged inames innermost + + for iname_chain in iname_chains: + for ilp_iname in (ilp_inames & iname_chains): + # pull out other loops so that ilp_iname is the innermost + all_nests = {iname_to_tree_node_id[iname] for iname in seen_inames} + outer_loop, inner_loop = pullout_loop_nest(tree, + (all_nests | {frozenset()}), + iname_chain - {ilp_iname}) + + for iname in outer_loop: + iname_to_tree_node_id[iname] = outer_loop + + if inner_loop is not None: + for iname in inner_loop: + iname_to_tree_node_id[iname] = inner_loop + + # }}} + + return tree From 03f39a19d1eeb149aabd3e80d9ef400b06cfe711 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 28 Apr 2021 17:23:19 -0500 Subject: [PATCH 03/17] add treelib as a dependency --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 89927f28c..0002e37cf 100644 --- a/setup.py +++ b/setup.py @@ -92,6 +92,7 @@ def write_git_revision(package_name): "codepy>=2017.1", "colorama", "Mako", + "treelib", ], extras_require={ From 9934dc646d35eb71e643f2153346dcef49fddec8 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 28 Apr 2021 18:11:01 -0500 Subject: [PATCH 04/17] convert a LoopyError to AssertionError to differentiate between user/developer error --- loopy/schedule/tools.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/loopy/schedule/tools.py b/loopy/schedule/tools.py index 8d3b07371..1b3969d4a 100644 --- a/loopy/schedule/tools.py +++ b/loopy/schedule/tools.py @@ -128,17 +128,14 @@ def pullout_loop_nest(tree, loop_nests, inames_to_pull_out): # {{{ sanity check to ensure the loop nest *inames_to_pull_out* is possible - err = LoopyError(f"Cannot schedule loop nest {inames_to_pull_out} " - f" in the nesting tree:\n{tree}") - loop_nests = sorted(loop_nests, key=lambda nest: tree.depth(nest)) for outer, inner in zip(loop_nests[:-1], loop_nests[1:]): if outer != tree.parent(inner).identifier: - raise err + raise LoopyError(f"Cannot schedule loop nest {inames_to_pull_out} " + f" in the nesting tree:\n{tree}") - if tree.depth(loop_nests[0]) != 0: - raise err + assert tree.depth(loop_nests[0]) == 0 # }}} From 32cb2824d480871aae6ea5d4b8a93af2af56bb92 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 28 Apr 2021 18:16:00 -0500 Subject: [PATCH 05/17] noun -> verb for function names --- loopy/schedule/tools.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/loopy/schedule/tools.py b/loopy/schedule/tools.py index 1b3969d4a..97908921e 100644 --- a/loopy/schedule/tools.py +++ b/loopy/schedule/tools.py @@ -114,7 +114,7 @@ class _not_seen: # noqa: N801 pass -def pullout_loop_nest(tree, loop_nests, inames_to_pull_out): +def pull_out_loop_nest(tree, loop_nests, inames_to_pull_out): """ Updates *tree* to make *inames_to_pull_out* a loop nesting level in *loop_nests* @@ -220,9 +220,9 @@ def get_loop_nest_tree(kernel): all_nests = {iname_to_tree_node_id[iname] for iname in seen_inames} - outer_loop, inner_loop = pullout_loop_nest(tree, - (all_nests | {frozenset()}), - seen_inames) + outer_loop, inner_loop = pull_out_loop_nest(tree, + (all_nests | {frozenset()}), + seen_inames) if not_seen_inames: add_inner_loops(tree, outer_loop, not_seen_inames) @@ -246,9 +246,9 @@ def get_loop_nest_tree(kernel): for ilp_iname in (ilp_inames & iname_chains): # pull out other loops so that ilp_iname is the innermost all_nests = {iname_to_tree_node_id[iname] for iname in seen_inames} - outer_loop, inner_loop = pullout_loop_nest(tree, - (all_nests | {frozenset()}), - iname_chain - {ilp_iname}) + outer_loop, inner_loop = pull_out_loop_nest(tree, + (all_nests | {frozenset()}), + iname_chain - {ilp_iname}) for iname in outer_loop: iname_to_tree_node_id[iname] = outer_loop From 94058bb13239a35f308bb9717d3924fb31ab3498 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 4 May 2021 10:10:20 -0500 Subject: [PATCH 06/17] use topological sort to get the schedule --- loopy/schedule/__init__.py | 105 ++++++++++++++++++++++++++++++++++++- loopy/schedule/tools.py | 68 +++++++++++++++++++++++- 2 files changed, 170 insertions(+), 3 deletions(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index a2762024b..9b2bd8e31 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -802,8 +802,109 @@ def is_similar_to_template(insn): # {{{ scheduling algorithm -def generate_loop_schedules_internal( - sched_state, debug=None): +def _get_dep_equivalent_nests(tree, within1, within2): + common_ancestors = (within1 & within2) | {""} + + innermost_parent = max(common_ancestors, + key=lambda k: tree.depth(k)) + iname1, = [iname.identifier + for iname in tree.children(innermost_parent) + if iname.identifier in within1] + + iname2, = [iname.identifier + for iname in tree.children(innermost_parent) + if iname.identifier in within2] + + return iname1, iname2 + + +def no_recurse_schedule(kernel): + from loopy.schedule.tools import get_loop_nest_tree + from functools import reduce + from pytools.graph import compute_topological_order + from loopy.kernel.data import (filter_iname_tags_by_type, + ConcurrentTag) + + if any(insn.priority != 0 for insn in kernel.instructions): + raise NotImplementedError + + parallel_inames = {name + for name, iname in kernel.inames.items() + if filter_iname_tags_by_type(iname.tags, ConcurrentTag)} + + # the first step is to figure out the loop nest trees + # I would rather get the loop nest tree first + loop_nest_tree = get_loop_nest_tree(kernel) + + # loop_inames: inames that are realized as loops. Concurrent inames aren't + # realized as a loop in the generated code for a loopy.TargetBase. + loop_inames = (reduce(frozenset.union, (insn.within_inames + for insn in kernel.instructions), + frozenset()) + - parallel_inames) + + dag = {} + dag.update({EnterLoop(iname=iname): frozenset({LeaveLoop(iname=iname)}) + for iname in loop_inames}) + dag.update({LeaveLoop(iname=iname): frozenset() + for iname in loop_inames}) + dag.update({RunInstruction(insn_id=insn.id): frozenset() + for insn in kernel.instructions}) + + for parent in loop_nest_tree.all_nodes_itr(): + outer_loop = parent.identifier + if outer_loop == "": + continue + + for child in loop_nest_tree.children(outer_loop): + inner_loop = child.identifier + dag[EnterLoop(iname=outer_loop)] |= {EnterLoop(iname=inner_loop)} + dag[LeaveLoop(iname=inner_loop)] |= {LeaveLoop(iname=outer_loop)} + + for insn in kernel.instructions: + for dep_id in insn.depends_on: + dep = kernel.id_to_insn[dep_id] + dag[RunInstruction(insn_id=dep_id)] |= {RunInstruction(insn_id=insn.id)} + if dep.within_inames < insn.within_inames: + for iname in insn.within_inames - dep.within_inames: + dag[RunInstruction(insn_id=dep.id)] |= {EnterLoop(iname=iname)} + elif insn.within_inames < dep.within_inames: + for iname in dep.within_inames - insn.within_inames: + dag[LeaveLoop(iname=iname)] |= {RunInstruction(insn_id=insn.id)} + elif dep.within_inames != insn.within_inames: + insn_iname, dep_iname = _get_dep_equivalent_nests(loop_nest_tree, + insn.within_inames, + dep.within_inames) + dag[LeaveLoop(iname=dep_iname)] |= {EnterLoop(iname=insn_iname)} + else: + pass + + for iname in insn.within_inames: + dag[EnterLoop(iname=iname)] |= {RunInstruction(insn_id=insn.id)} + dag[RunInstruction(insn_id=insn.id)] |= {LeaveLoop(iname=iname)} + + def iname_key(iname): + all_ancestors = [loop_nest_tree.ancestor(iname, i).identifier + for i in range(1, loop_nest_tree.depth(iname))] + return ",".join(all_ancestors+[iname]) + + def key(x): + if isinstance(x, RunInstruction): + iname = max(kernel.id_to_insn[x.insn_id].within_inames, + key=lambda k: loop_nest_tree.depth(k), + default="") + result = (iname_key(iname), x.insn_id) + elif isinstance(x, (EnterLoop, LeaveLoop)): + result = (iname_key(x.iname),) + else: + raise NotImplementedError + + return result + + return compute_topological_order(dag, key=key) + + +def generate_loop_schedules_internal(sched_state, debug=None): # allow_insn is set to False initially and after entering each loop # to give loops containing high-priority instructions a chance. kernel = sched_state.kernel diff --git a/loopy/schedule/tools.py b/loopy/schedule/tools.py index 97908921e..ef5dd6e87 100644 --- a/loopy/schedule/tools.py +++ b/loopy/schedule/tools.py @@ -189,6 +189,7 @@ def get_loop_nest_tree(kernel): include the nesting constraints imposed by the dependencies between the instructions and the dependencies imposed by the kernel's domain tree. """ + from islpy import dim_type from loopy.kernel.data import ConcurrentTag, IlpBaseTag concurrent_inames = {iname for iname in kernel.all_inames() @@ -259,4 +260,69 @@ def get_loop_nest_tree(kernel): # }}} - return tree + loop_priorities = kernel.loop_priority.copy() + + # {{{ impose constraints by the domain tree + + all_inames = kernel.all_inames() + + for dom_idx, dom in enumerate(kernel.domains): + for outer_iname in dom.get_var_names(dim_type.param): + if outer_iname not in all_inames: + continue + + for inner_iname in dom.get_var_names(dim_type.set): + # either outer_iname and inner_iname should belong to the same + # loop nest level or outer should be strictly outside inner + # iname + + inner_iname_nest = iname_to_tree_node_id[inner_iname] + outer_iname_nest = iname_to_tree_node_id[outer_iname] + + if inner_iname_nest == outer_iname_nest: + loop_priorities |= {(outer_iname, inner_iname)} + else: + ancestors_of_inner_iname = { + tree.ancestor(inner_iname_nest, k) + for k in range(tree.depth(inner_iname_nest))} + if outer_iname_nest not in ancestors_of_inner_iname: + raise LoopyError(f"Loop '{outer_iname}' cannot be nested" + f" outside '{inner_iname}'.") + + # }}} + + if loop_priorities: + raise NotImplementedError + + # {{{ just choose one of the possible loop nestings + + # Either all of these loop nestings would be valid or all would invalid => + # we aren't marking any schedulable kernel as unschedulable. + + new_tree = Tree() + + old_to_new_parent = {} + + new_tree.create_node(identifier="") + old_to_new_parent[root] = "" + + # traversing 'tree' in an BFS fashion to create 'new_tree' + queue = [node.identifier for node in tree.children(root)] + + while queue: + current_node = queue.pop(0) + + sorted_inames = sorted(current_node) + new_tree.create_node(identifier=sorted_inames[0], + parent=old_to_new_parent[tree.parent(current_node) + .identifier]) + for new_parent, new_child in zip(sorted_inames[:-1], sorted_inames[1:]): + new_tree.create_node(identifier=new_child, parent=new_parent) + + old_to_new_parent[current_node] = sorted_inames[-1] + + queue.extend([child.identifier for child in tree.children(current_node)]) + + # }}} + + return new_tree From 8230994a1e0b1c62748172dcc1ce632a33eccc25 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 4 May 2021 13:57:45 -0500 Subject: [PATCH 07/17] factor out generate_schedule_v2 --- loopy/schedule/__init__.py | 316 ++++++++++++++++++++----------------- 1 file changed, 168 insertions(+), 148 deletions(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 9b2bd8e31..d21a92ea0 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -818,7 +818,7 @@ def _get_dep_equivalent_nests(tree, within1, within2): return iname1, iname2 -def no_recurse_schedule(kernel): +def generate_loop_schedules_v2(kernel): from loopy.schedule.tools import get_loop_nest_tree from functools import reduce from pytools.graph import compute_topological_order @@ -828,6 +828,10 @@ def no_recurse_schedule(kernel): if any(insn.priority != 0 for insn in kernel.instructions): raise NotImplementedError + if kernel.schedule is not None: + # handle preschedule ?? + raise NotImplementedError + parallel_inames = {name for name, iname in kernel.inames.items() if filter_iname_tags_by_type(iname.tags, ConcurrentTag)} @@ -2041,171 +2045,187 @@ def generate_loop_schedules(kernel, debug_args={}): yield from generate_loop_schedules_inner(kernel, debug_args=debug_args) -def generate_loop_schedules_inner(kernel, debug_args={}): +def postprocess_schedule(kernel, gen_sched): from loopy.kernel import KernelState - if kernel.state not in (KernelState.PREPROCESSED, KernelState.LINEARIZED): - raise LoopyError("cannot schedule a kernel that has not been " - "preprocessed") - - from loopy.check import pre_schedule_checks - pre_schedule_checks(kernel) - - schedule_count = 0 - - debug = ScheduleDebugger(**debug_args) - - preschedule = kernel.schedule if kernel.state == KernelState.LINEARIZED else () + gen_sched = convert_barrier_instructions_to_barriers( + kernel, gen_sched) - prescheduled_inames = { - insn.iname - for insn in preschedule - if isinstance(insn, EnterLoop)} + gsize, lsize = kernel.get_grid_size_upper_bounds() - prescheduled_insn_ids = { - insn_id - for item in preschedule - for insn_id in sched_item_to_insn_id(item)} - - from loopy.kernel.data import (IlpBaseTag, ConcurrentTag, VectorizeTag, - filter_iname_tags_by_type) - ilp_inames = { - name - for name, iname in kernel.inames.items() - if filter_iname_tags_by_type(iname.tags, IlpBaseTag)} - vec_inames = { - name - for name, iname in kernel.inames.items() - if filter_iname_tags_by_type(iname.tags, VectorizeTag)} - parallel_inames = { - name - for name, iname in kernel.inames.items() - if filter_iname_tags_by_type(iname.tags, ConcurrentTag)} - - loop_nest_with_map = find_loop_nest_with_map(kernel) - loop_nest_around_map = find_loop_nest_around_map(kernel) - sched_state = SchedulerState( - kernel=kernel, - loop_nest_around_map=loop_nest_around_map, - loop_insn_dep_map=find_loop_insn_dep_map( - kernel, - loop_nest_with_map=loop_nest_with_map, - loop_nest_around_map=loop_nest_around_map), - breakable_inames=ilp_inames, - ilp_inames=ilp_inames, - vec_inames=vec_inames, - - prescheduled_inames=prescheduled_inames, - prescheduled_insn_ids=prescheduled_insn_ids, - - # time-varying part - active_inames=(), - entered_inames=frozenset(), - enclosing_subkernel_inames=(), - - schedule=(), - - unscheduled_insn_ids={insn.id for insn in kernel.instructions}, - scheduled_insn_ids=frozenset(), - within_subkernel=kernel.state != KernelState.LINEARIZED, - may_schedule_global_barriers=True, - - preschedule=preschedule, - insn_ids_to_try=None, + if (gsize or lsize): + if not kernel.options.disable_global_barriers: + logger.debug("%s: barrier insertion: global" % kernel.name) + gen_sched = insert_barriers(kernel, gen_sched, + synchronization_kind="global", verify_only=True) - # ilp and vec are not parallel for the purposes of the scheduler - parallel_inames=parallel_inames - ilp_inames - vec_inames, + logger.debug("%s: barrier insertion: local" % kernel.name) + gen_sched = insert_barriers(kernel, gen_sched, + synchronization_kind="local", verify_only=False) + logger.debug("%s: barrier insertion: done" % kernel.name) - group_insn_counts=group_insn_counts(kernel), - active_group_counts={}, + new_kernel = kernel.copy( + schedule=gen_sched, + state=KernelState.LINEARIZED) - insns_in_topologically_sorted_order=( - get_insns_in_topologically_sorted_order(kernel)), - ) - - schedule_gen_kwargs = {} - - def print_longest_dead_end(): - if debug.interactive: - print("Loopy will now show you the scheduler state at the point") - print("where the longest (dead-end) schedule was generated, in the") - print("the hope that some of this makes sense and helps you find") - print("the issue.") - print() - print("To disable this interactive behavior, pass") - print(" debug_args=dict(interactive=False)") - print("to generate_loop_schedules().") - print(75*"-") - input("Enter:") - print() - print() + from loopy.schedule.device_mapping import \ + map_schedule_onto_host_or_device + if kernel.state != KernelState.LINEARIZED: + # Device mapper only gets run once. + new_kernel = map_schedule_onto_host_or_device(new_kernel) - debug.debug_length = len(debug.longest_rejected_schedule) - while True: - try: - for _ in generate_loop_schedules_internal( - sched_state, debug=debug, **schedule_gen_kwargs): - pass + from loopy.schedule.tools import add_extra_args_to_schedule + return add_extra_args_to_schedule(new_kernel) - except ScheduleDebugInput as e: - debug.debug_length = int(str(e)) - continue - break - - try: - for gen_sched in generate_loop_schedules_internal( - sched_state, debug=debug, **schedule_gen_kwargs): - debug.stop() - - gen_sched = convert_barrier_instructions_to_barriers( - kernel, gen_sched) - - gsize, lsize = kernel.get_grid_size_upper_bounds() +def generate_loop_schedules_inner(kernel, debug_args={}): + from loopy.kernel import KernelState + if kernel.state not in (KernelState.PREPROCESSED, KernelState.LINEARIZED): + raise LoopyError("cannot schedule a kernel that has not been " + "preprocessed") - if (gsize or lsize): - if not kernel.options.disable_global_barriers: - logger.debug("%s: barrier insertion: global" % kernel.name) - gen_sched = insert_barriers(kernel, gen_sched, - synchronization_kind="global", verify_only=True) + from loopy.check import pre_schedule_checks + pre_schedule_checks(kernel) - logger.debug("%s: barrier insertion: local" % kernel.name) - gen_sched = insert_barriers(kernel, gen_sched, - synchronization_kind="local", verify_only=False) - logger.debug("%s: barrier insertion: done" % kernel.name) + can_v2_scheduler_handle = ( # v2-scheduler cannot handle insn groups + all(len(insn.conflicts_with_groups) == 0 + for insn in kernel.instructions)) + if can_v2_scheduler_handle: + gen_sched = generate_loop_schedules_v2(kernel) + yield postprocess_schedule(kernel, gen_sched) + else: + schedule_count = 0 + + debug = ScheduleDebugger(**debug_args) + + preschedule = (kernel.schedule + + if kernel.state == KernelState.LINEARIZED + + else ()) + + prescheduled_inames = { + insn.iname + for insn in preschedule + if isinstance(insn, EnterLoop)} + + prescheduled_insn_ids = { + insn_id + for item in preschedule + for insn_id in sched_item_to_insn_id(item)} + + from loopy.kernel.data import (IlpBaseTag, ConcurrentTag, VectorizeTag, + filter_iname_tags_by_type) + ilp_inames = { + name + for name, iname in kernel.inames.items() + if filter_iname_tags_by_type(iname.tags, IlpBaseTag)} + vec_inames = { + name + for name, iname in kernel.inames.items() + if filter_iname_tags_by_type(iname.tags, VectorizeTag)} + parallel_inames = { + name + for name, iname in kernel.inames.items() + if filter_iname_tags_by_type(iname.tags, ConcurrentTag)} + + loop_nest_with_map = find_loop_nest_with_map(kernel) + loop_nest_around_map = find_loop_nest_around_map(kernel) + sched_state = SchedulerState( + kernel=kernel, + loop_nest_around_map=loop_nest_around_map, + loop_insn_dep_map=find_loop_insn_dep_map( + kernel, + loop_nest_with_map=loop_nest_with_map, + loop_nest_around_map=loop_nest_around_map), + breakable_inames=ilp_inames, + ilp_inames=ilp_inames, + vec_inames=vec_inames, + + prescheduled_inames=prescheduled_inames, + prescheduled_insn_ids=prescheduled_insn_ids, + + # time-varying part + active_inames=(), + entered_inames=frozenset(), + enclosing_subkernel_inames=(), + + schedule=(), + + unscheduled_insn_ids={insn.id for insn in kernel.instructions}, + scheduled_insn_ids=frozenset(), + within_subkernel=kernel.state != KernelState.LINEARIZED, + may_schedule_global_barriers=True, + + preschedule=preschedule, + insn_ids_to_try=None, + + # ilp and vec are not parallel for the purposes of the scheduler + parallel_inames=parallel_inames - ilp_inames - vec_inames, + + group_insn_counts=group_insn_counts(kernel), + active_group_counts={}, + + insns_in_topologically_sorted_order=( + get_insns_in_topologically_sorted_order(kernel)), + ) + + schedule_gen_kwargs = {} + + def print_longest_dead_end(): + if debug.interactive: + print("Loopy will now show you the scheduler state at the point") + print("where the longest (dead-end) schedule was generated, in the") + print("the hope that some of this makes sense and helps you find") + print("the issue.") + print() + print("To disable this interactive behavior, pass") + print(" debug_args=dict(interactive=False)") + print("to generate_loop_schedules().") + print(75*"-") + input("Enter:") + print() + print() + + debug.debug_length = len(debug.longest_rejected_schedule) + while True: + try: + for _ in generate_loop_schedules_internal( + sched_state, debug=debug, **schedule_gen_kwargs): + pass + + except ScheduleDebugInput as e: + debug.debug_length = int(str(e)) + continue - new_kernel = kernel.copy( - schedule=gen_sched, - state=KernelState.LINEARIZED) + break - from loopy.schedule.device_mapping import \ - map_schedule_onto_host_or_device - if kernel.state != KernelState.LINEARIZED: - # Device mapper only gets run once. - new_kernel = map_schedule_onto_host_or_device(new_kernel) + try: + for gen_sched in generate_loop_schedules_internal( + sched_state, debug=debug, **schedule_gen_kwargs): + debug.stop() - from loopy.schedule.tools import add_extra_args_to_schedule - new_kernel = add_extra_args_to_schedule(new_kernel) - yield new_kernel + new_kernel = postprocess_schedule(kernel, gen_sched) + yield new_kernel - debug.start() + debug.start() - schedule_count += 1 + schedule_count += 1 - except KeyboardInterrupt: - print() - print(75*"-") - print("Interrupted during scheduling") - print(75*"-") - print_longest_dead_end() - raise + except KeyboardInterrupt: + print() + print(75*"-") + print("Interrupted during scheduling") + print(75*"-") + print_longest_dead_end() + raise - debug.done_scheduling() - if not schedule_count: - print(75*"-") - print("ERROR: Sorry--loopy did not find a schedule for your kernel.") - print(75*"-") - print_longest_dead_end() - raise RuntimeError("no valid schedules found") + debug.done_scheduling() + if not schedule_count: + print(75*"-") + print("ERROR: Sorry--loopy did not find a schedule for your kernel.") + print(75*"-") + print_longest_dead_end() + raise RuntimeError("no valid schedules found") logger.info("%s: schedule done" % kernel.name) From 05c9ce73a2f8746393942bd86d8742e34ba7058f Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Tue, 4 May 2021 14:22:21 -0500 Subject: [PATCH 08/17] only consider sequential loops for entry/exit --- loopy/schedule/__init__.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index d21a92ea0..fe431f81b 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -866,24 +866,26 @@ def generate_loop_schedules_v2(kernel): dag[LeaveLoop(iname=inner_loop)] |= {LeaveLoop(iname=outer_loop)} for insn in kernel.instructions: + insn_loop_inames = insn.within_inames & loop_inames for dep_id in insn.depends_on: dep = kernel.id_to_insn[dep_id] + dep_loop_inames = dep.within_inames & loop_inames dag[RunInstruction(insn_id=dep_id)] |= {RunInstruction(insn_id=insn.id)} - if dep.within_inames < insn.within_inames: - for iname in insn.within_inames - dep.within_inames: + if dep_loop_inames < insn_loop_inames: + for iname in insn_loop_inames - dep_loop_inames: dag[RunInstruction(insn_id=dep.id)] |= {EnterLoop(iname=iname)} - elif insn.within_inames < dep.within_inames: - for iname in dep.within_inames - insn.within_inames: + elif insn_loop_inames < dep_loop_inames: + for iname in dep_loop_inames - insn_loop_inames: dag[LeaveLoop(iname=iname)] |= {RunInstruction(insn_id=insn.id)} - elif dep.within_inames != insn.within_inames: + elif dep_loop_inames != insn_loop_inames: insn_iname, dep_iname = _get_dep_equivalent_nests(loop_nest_tree, - insn.within_inames, - dep.within_inames) + insn_loop_inames, + dep_loop_inames) dag[LeaveLoop(iname=dep_iname)] |= {EnterLoop(iname=insn_iname)} else: pass - for iname in insn.within_inames: + for iname in insn_loop_inames: dag[EnterLoop(iname=iname)] |= {RunInstruction(insn_id=insn.id)} dag[RunInstruction(insn_id=insn.id)] |= {LeaveLoop(iname=iname)} @@ -894,7 +896,7 @@ def iname_key(iname): def key(x): if isinstance(x, RunInstruction): - iname = max(kernel.id_to_insn[x.insn_id].within_inames, + iname = max((kernel.id_to_insn[x.insn_id].within_inames & loop_inames), key=lambda k: loop_nest_tree.depth(k), default="") result = (iname_key(iname), x.insn_id) From 69e6749816dadf7fd2e47c47cb7c57a9d60e625d Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 8 May 2021 14:22:06 -0500 Subject: [PATCH 09/17] [schedule_v2]: be more specific in what kernels to handle, implements the nesting constraints --- loopy/schedule/__init__.py | 58 +++++++------- loopy/schedule/tools.py | 156 +++++++++++++++++++++++++++---------- 2 files changed, 140 insertions(+), 74 deletions(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index fe431f81b..3b572e310 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -23,7 +23,6 @@ from pytools import ImmutableRecord import sys -import islpy as isl from loopy.diagnostic import warn_with_kernel, LoopyError # noqa from pytools import MinRecursionLimit, ProcessLogger @@ -215,7 +214,6 @@ def find_loop_nest_around_map(kernel): """Returns a dictionary mapping inames to other inames that are always nested around them. """ - from functools import reduce from collections import defaultdict from loopy.schedule.tools import get_loop_nest_tree @@ -224,28 +222,14 @@ def find_loop_nest_around_map(kernel): loop_nest_around_map = defaultdict(frozenset) for node in tree.all_nodes_itr(): - nest = node.identifier - depth = tree.depth(nest) - all_ancestors = reduce(frozenset.union, (tree.ancestor(nest, d).identifier - for d in range(depth)), - frozenset()) - - for iname in nest: - loop_nest_around_map[iname] = all_ancestors - - # {{{ impose constraints by the domain tree - - all_inames = kernel.all_inames() - - for dom_idx, dom in enumerate(kernel.domains): - for outer_iname in dom.get_var_names(isl.dim_type.param): - if outer_iname not in all_inames: - continue + if node.identifier == tree.root: + continue + iname = node.identifier + depth = tree.depth(iname) + all_ancestors = frozenset(tree.ancestor(iname, d).identifier + for d in range(1, depth)) - for inner_iname in dom.get_var_names(isl.dim_type.set): - loop_nest_around_map[inner_iname] |= {outer_iname} - - # }}} + loop_nest_around_map[iname] = all_ancestors return loop_nest_around_map @@ -822,19 +806,21 @@ def generate_loop_schedules_v2(kernel): from loopy.schedule.tools import get_loop_nest_tree from functools import reduce from pytools.graph import compute_topological_order - from loopy.kernel.data import (filter_iname_tags_by_type, - ConcurrentTag) + from loopy.kernel.data import ConcurrentTag, IlpBaseTag, VectorizeTag if any(insn.priority != 0 for insn in kernel.instructions): raise NotImplementedError if kernel.schedule is not None: - # handle preschedule ?? raise NotImplementedError - parallel_inames = {name - for name, iname in kernel.inames.items() - if filter_iname_tags_by_type(iname.tags, ConcurrentTag)} + concurrent_inames = {iname for iname in kernel.all_inames() + if kernel.iname_tags_of_type(iname, ConcurrentTag)} + ilp_inames = {iname for iname in kernel.all_inames() + if kernel.iname_tags_of_type(iname, IlpBaseTag)} + vec_inames = {iname for iname in kernel.all_inames() + if kernel.iname_tags_of_type(iname, VectorizeTag)} + parallel_inames = (concurrent_inames - ilp_inames - vec_inames) # the first step is to figure out the loop nest trees # I would rather get the loop nest tree first @@ -2088,9 +2074,17 @@ def generate_loop_schedules_inner(kernel, debug_args={}): from loopy.check import pre_schedule_checks pre_schedule_checks(kernel) - can_v2_scheduler_handle = ( # v2-scheduler cannot handle insn groups - all(len(insn.conflicts_with_groups) == 0 - for insn in kernel.instructions)) + can_v2_scheduler_handle = ( + # v2-scheduler cannot handle insn groups + all(len(insn.conflicts_with_groups) == 0 + for insn in kernel.instructions) + # v2-scheduler cannot handle prescheduled kernel + and (not kernel.schedule) + # v2-scheduler cannot handle instruction priorities + and all(insn.priority == 0 + for insn in kernel.instructions) + ) + if can_v2_scheduler_handle: gen_sched = generate_loop_schedules_v2(kernel) yield postprocess_schedule(kernel, gen_sched) diff --git a/loopy/schedule/tools.py b/loopy/schedule/tools.py index ef5dd6e87..8e09d59d1 100644 --- a/loopy/schedule/tools.py +++ b/loopy/schedule/tools.py @@ -172,6 +172,106 @@ def add_inner_loops(tree, outer_loop_nest, inner_loop_nest): tree.create_node(identifier=inner_loop_nest, parent=outer_loop_nest) +def _order_loop_nests(loop_nest_tree, + strict_priorities, + relaxed_priorities, + iname_to_tree_node_id): + """ + Returns a loop nest where all nodes in the tree are instances of + :class:`str` denoting inames. Unlike *loop_nest_tree* which corresponds to + multiple loop nesting, this routine returns a unique loop nest that is + obtained after constraining *loop_nest_tree* with the constraints enforced + by *priorities*. + """ + from pytools.graph import compute_topological_order as toposort + from warnings import warn + + loop_nests = set(iname_to_tree_node_id.values()) + + flow_requirements = {loop_nest: {iname: frozenset() + for iname in loop_nest} + for loop_nest in loop_nests} + + def _update_flow_requirements(priorities, cannot_satisfy_callback): + for priority in priorities: + for outer_iname, inner_iname in zip(priority[:-1], priority[1:]): + inner_iname_nest = iname_to_tree_node_id[inner_iname] + outer_iname_nest = iname_to_tree_node_id[outer_iname] + if inner_iname_nest == outer_iname_nest: + flow_requirements[inner_iname_nest][outer_iname] |= {inner_iname} + else: + ancestors_of_inner_iname = reduce( + frozenset.union, + (loop_nest_tree.ancestor(inner_iname_nest, k).identifier + for k in range(loop_nest_tree.depth(inner_iname_nest))), + frozenset()) + ancestors_of_outer_iname = reduce( + frozenset.union, + (loop_nest_tree.ancestor(outer_iname_nest, k).identifier + for k in range(loop_nest_tree.depth(outer_iname_nest))), + frozenset()) + if outer_iname in ancestors_of_inner_iname: + # constraint already satisfied => do nothing + pass + elif inner_iname in ancestors_of_outer_iname: + cannot_satisfy_callback("Cannot satisfy constraint that" + f" iname '{inner_iname}' must be" + f" nested within '{outer_iname}''.") + else: + # inner iname and outer iname are indirect family members + # => must be realized via dependencies in the linearization + # phase + raise NotImplementedError + + def _raise_loopy_err(x): + raise LoopyError(x) + + _update_flow_requirements(strict_priorities, _raise_loopy_err) + _update_flow_requirements(relaxed_priorities, warn) + + ordered_loop_nests = {unordered_nest: toposort(flow, + key=lambda x: x) + for unordered_nest, flow in flow_requirements.items()} + + # {{{ just choose one of the possible loop nestings + + assert loop_nest_tree.root == frozenset() + + # Either all of these loop nestings would be valid or all would invalid => + # we aren't marking any schedulable kernel as unschedulable. + + new_tree = Tree() + + old_to_new_parent = {} + + new_tree.create_node(identifier="") + old_to_new_parent[loop_nest_tree.root] = "" + + # traversing 'tree' in an BFS fashion to create 'new_tree' + queue = [node.identifier + for node in loop_nest_tree.children(loop_nest_tree.root)] + + while queue: + current_nest = queue.pop(0) + + ordered_nest = ordered_loop_nests[current_nest] + new_tree.create_node(identifier=ordered_nest[0], + parent=old_to_new_parent[loop_nest_tree + .parent(current_nest) + .identifier]) + for new_parent, new_child in zip(ordered_nest[:-1], ordered_nest[1:]): + new_tree.create_node(identifier=new_child, parent=new_parent) + + old_to_new_parent[current_nest] = ordered_nest[-1] + + queue.extend([child.identifier + for child in loop_nest_tree.children(current_nest)]) + + # }}} + + return new_tree + + def get_loop_nest_tree(kernel): """ Returns an instance of :class:`treelib.Tree` denoting the kernel's loop @@ -190,16 +290,19 @@ def get_loop_nest_tree(kernel): instructions and the dependencies imposed by the kernel's domain tree. """ from islpy import dim_type - from loopy.kernel.data import ConcurrentTag, IlpBaseTag + from loopy.kernel.data import ConcurrentTag, IlpBaseTag, VectorizeTag concurrent_inames = {iname for iname in kernel.all_inames() if kernel.iname_tags_of_type(iname, ConcurrentTag)} ilp_inames = {iname for iname in kernel.all_inames() if kernel.iname_tags_of_type(iname, IlpBaseTag)} + vec_inames = {iname for iname in kernel.all_inames() + if kernel.iname_tags_of_type(iname, VectorizeTag)} + parallel_inames = (concurrent_inames - ilp_inames - vec_inames) # figuring the possible loop nestings minus the concurrent_inames as they # are never realized as actual loops - iname_chains = {insn.within_inames - concurrent_inames + iname_chains = {insn.within_inames - parallel_inames for insn in kernel.instructions} tree = Tree() @@ -260,15 +363,15 @@ def get_loop_nest_tree(kernel): # }}} - loop_priorities = kernel.loop_priority.copy() + strict_loop_priorities = frozenset() # {{{ impose constraints by the domain tree - all_inames = kernel.all_inames() + loop_inames = kernel.all_inames() - parallel_inames for dom_idx, dom in enumerate(kernel.domains): - for outer_iname in dom.get_var_names(dim_type.param): - if outer_iname not in all_inames: + for outer_iname in set(dom.get_var_names(dim_type.param)): + if outer_iname not in loop_inames: continue for inner_iname in dom.get_var_names(dim_type.set): @@ -280,7 +383,7 @@ def get_loop_nest_tree(kernel): outer_iname_nest = iname_to_tree_node_id[outer_iname] if inner_iname_nest == outer_iname_nest: - loop_priorities |= {(outer_iname, inner_iname)} + strict_loop_priorities |= {(outer_iname, inner_iname)} else: ancestors_of_inner_iname = { tree.ancestor(inner_iname_nest, k) @@ -291,38 +394,7 @@ def get_loop_nest_tree(kernel): # }}} - if loop_priorities: - raise NotImplementedError - - # {{{ just choose one of the possible loop nestings - - # Either all of these loop nestings would be valid or all would invalid => - # we aren't marking any schedulable kernel as unschedulable. - - new_tree = Tree() - - old_to_new_parent = {} - - new_tree.create_node(identifier="") - old_to_new_parent[root] = "" - - # traversing 'tree' in an BFS fashion to create 'new_tree' - queue = [node.identifier for node in tree.children(root)] - - while queue: - current_node = queue.pop(0) - - sorted_inames = sorted(current_node) - new_tree.create_node(identifier=sorted_inames[0], - parent=old_to_new_parent[tree.parent(current_node) - .identifier]) - for new_parent, new_child in zip(sorted_inames[:-1], sorted_inames[1:]): - new_tree.create_node(identifier=new_child, parent=new_parent) - - old_to_new_parent[current_node] = sorted_inames[-1] - - queue.extend([child.identifier for child in tree.children(current_node)]) - - # }}} - - return new_tree + return _order_loop_nests(tree, + strict_loop_priorities, + kernel.loop_priority, + iname_to_tree_node_id) From 77459e10dcbe6275263da9a0cdea3898d809bc96 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Sat, 8 May 2021 14:39:10 -0500 Subject: [PATCH 10/17] [bugfix]: fixes typos --- loopy/schedule/tools.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/loopy/schedule/tools.py b/loopy/schedule/tools.py index 8e09d59d1..29b92dcfa 100644 --- a/loopy/schedule/tools.py +++ b/loopy/schedule/tools.py @@ -375,10 +375,12 @@ def get_loop_nest_tree(kernel): continue for inner_iname in dom.get_var_names(dim_type.set): + if inner_iname not in loop_inames: + continue + # either outer_iname and inner_iname should belong to the same # loop nest level or outer should be strictly outside inner # iname - inner_iname_nest = iname_to_tree_node_id[inner_iname] outer_iname_nest = iname_to_tree_node_id[outer_iname] @@ -386,7 +388,7 @@ def get_loop_nest_tree(kernel): strict_loop_priorities |= {(outer_iname, inner_iname)} else: ancestors_of_inner_iname = { - tree.ancestor(inner_iname_nest, k) + tree.ancestor(inner_iname_nest, k).identifier for k in range(tree.depth(inner_iname_nest))} if outer_iname_nest not in ancestors_of_inner_iname: raise LoopyError(f"Loop '{outer_iname}' cannot be nested" From e2b41e58f678f1c13bd0cde8f2ffc9f659f40281 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sat, 8 May 2021 22:47:43 -0500 Subject: [PATCH 11/17] import constrain_loop_nesting --- loopy/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/loopy/__init__.py b/loopy/__init__.py index b6008df3c..8a7d082ba 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -67,7 +67,8 @@ from loopy.version import VERSION, MOST_RECENT_LANGUAGE_VERSION from loopy.transform.iname import ( - set_loop_priority, prioritize_loops, untag_inames, + set_loop_priority, prioritize_loops, constrain_loop_nesting, + untag_inames, split_iname, chunk_iname, join_inames, tag_inames, duplicate_inames, rename_iname, remove_unused_inames, split_reduction_inward, split_reduction_outward, @@ -184,7 +185,8 @@ # {{{ transforms - "set_loop_priority", "prioritize_loops", "untag_inames", + "set_loop_priority", "prioritize_loops", "constrain_loop_nesting", + "untag_inames", "split_iname", "chunk_iname", "join_inames", "tag_inames", "duplicate_inames", "rename_iname", "remove_unused_inames", From dbd01cab79a1745573620f4f698deeaaee9e4ee7 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sat, 8 May 2021 22:49:43 -0500 Subject: [PATCH 12/17] add loop_nest_constraints attribute to kernel --- loopy/kernel/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 9b022936b..5aec9c752 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -272,6 +272,7 @@ def __init__(self, domains, instructions, args=None, iname_slab_increments=None, loop_priority=frozenset(), + loop_nest_constraints=None, silenced_warnings=None, applied_iname_rewrites=None, @@ -417,6 +418,7 @@ def __init__(self, domains, instructions, args=None, assumptions=assumptions, iname_slab_increments=iname_slab_increments, loop_priority=loop_priority, + loop_nest_constraints=loop_nest_constraints, silenced_warnings=silenced_warnings, temporary_variables=temporary_variables, local_sizes=local_sizes, @@ -1550,6 +1552,7 @@ def __setstate__(self, state): "substitutions", "iname_slab_increments", "loop_priority", + "loop_nest_constraints", "silenced_warnings", "options", "state", From 01c8583fe439c6ab858b7038ae6000c9147e73b1 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sat, 8 May 2021 22:53:35 -0500 Subject: [PATCH 13/17] copy in classes to contain loop nest constraints from branch loop-nest-constraints-v2 --- loopy/transform/iname.py | 95 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 32c56a5a3..3d09daa93 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -28,6 +28,7 @@ RuleAwareIdentityMapper, RuleAwareSubstitutionMapper, SubstitutionRuleMappingContext) from loopy.diagnostic import LoopyError +from pytools import Record __doc__ = """ @@ -112,6 +113,100 @@ def prioritize_loops(kernel, loop_priority): # }}} +# {{{ Handle loop nest constraints + +# {{{ Classes to house loop nest constraints + +# {{{ UnexpandedInameSet + +class UnexpandedInameSet(Record): + def __init__(self, inames, complement=False): + Record.__init__( + self, + inames=inames, + complement=complement, + ) + + def contains(self, inames): + if isinstance(inames, set): + return (not (inames & self.inames) if self.complement + else inames.issubset(self.inames)) + else: + return (inames not in self.inames if self.complement + else inames in self.inames) + + def get_inames_represented(self, iname_universe=None): + """Return the set of inames represented by the UnexpandedInameSet + """ + if self.complement: + if not iname_universe: + raise ValueError( + "Cannot expand UnexpandedInameSet %s without " + "iname_universe." % (self)) + return iname_universe-self.inames + else: + return self.inames.copy() + + def __lt__(self, other): + # FIXME is this function really necessary? If so, what should it return? + return self.__hash__() < other.__hash__() + + def __hash__(self): + return hash(repr(self)) + + def update_persistent_hash(self, key_hash, key_builder): + """Custom hash computation function for use with + :class:`pytools.persistent_dict.PersistentDict`. + """ + + key_builder.rec(key_hash, self.inames) + key_builder.rec(key_hash, self.complement) + + def __str__(self): + return "%s{%s}" % ("~" if self.complement else "", + ",".join(i for i in sorted(self.inames))) + +# }}} + + +# {{{ LoopNestConstraints + +class LoopNestConstraints(Record): + def __init__(self, must_nest=None, must_not_nest=None, + must_nest_graph=None): + Record.__init__( + self, + must_nest=must_nest, + must_not_nest=must_not_nest, + must_nest_graph=must_nest_graph, + ) + + def __hash__(self): + return hash(repr(self)) + + def update_persistent_hash(self, key_hash, key_builder): + """Custom hash computation function for use with + :class:`pytools.persistent_dict.PersistentDict`. + """ + + key_builder.rec(key_hash, self.must_nest) + key_builder.rec(key_hash, self.must_not_nest) + key_builder.rec(key_hash, self.must_nest_graph) + + def __str__(self): + return "LoopNestConstraints(\n" \ + " must_nest = " + str(self.must_nest) + "\n" \ + " must_not_nest = " + str(self.must_not_nest) + "\n" \ + " must_nest_graph = " + str(self.must_nest_graph) + "\n" \ + ")" + +# }}} + +# }}} + +# }}} + + # {{{ split/chunk inames # {{{ backend From f144896453d4a703659ef644ae90d5c0071e796b Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sat, 8 May 2021 22:56:11 -0500 Subject: [PATCH 14/17] copy in functions for initial loop nest constraint creation from branch loop-nest-constraints-v2 --- loopy/transform/iname.py | 396 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 396 insertions(+) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 3d09daa93..2d96c602e 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -204,6 +204,402 @@ def __str__(self): # }}} + +# {{{ Initial loop nest constraint creation + +# {{{ process_loop_nest_specification + +def process_loop_nest_specification( + nesting, + max_tuple_size=None, + complement_sets_allowed=True, + ): + + # Ensure that user-supplied nesting conforms to syntax rules, and + # convert string representations of nestings to tuple of UnexpandedInameSets + + import re + + def _raise_loop_nest_input_error(msg): + valid_prio_rules = ( + "Valid `must_nest` description formats: " # noqa + "\"iname, iname, ...\" or (str, str, str, ...), " # noqa + "where str can be of form " # noqa + "\"iname\" or \"{iname, iname, ...}\". " # noqa + "No set complements allowed.\n" # noqa + "Valid `must_not_nest` description tuples must have length 2: " # noqa + "\"iname, iname\", \"iname, ~iname\", or " # noqa + "(str, str), where str can be of form " # noqa + "\"iname\", \"~iname\", \"{iname, iname, ...}\", or " # noqa + "\"~{iname, iname, ...}\"." # noqa + ) + raise ValueError( + "Invalid loop nest prioritization: %s\n" + "Loop nest prioritization formatting rules:\n%s" + % (msg, valid_prio_rules)) + + def _error_on_regex_match(match_str, target_str): + if re.findall(match_str, target_str): + _raise_loop_nest_input_error( + "Unrecognized character(s) %s in nest string %s" + % (re.findall(match_str, target_str), target_str)) + + def _process_iname_set_str(iname_set_str): + # Convert something like ~{i,j} or ~i or "i,j" to an UnexpandedInameSet + + # Remove leading/trailing whitespace + iname_set_str_stripped = iname_set_str.strip() + + if not iname_set_str_stripped: + _raise_loop_nest_input_error( + "Found 0 inames in string %s." + % (iname_set_str)) + + # Process complement sets + if iname_set_str_stripped[0] == "~": + # Make sure compelement is allowed + if not complement_sets_allowed: + _raise_loop_nest_input_error( + "Complement (~) not allowed in this loop nest string %s. " + "If you have a use-case where allowing a currently " + "disallowed set complement would be helpful, and the " + "desired nesting constraint cannot easily be expressed " + "another way, " + "please contact the Loo.py maintainers." + % (iname_set_str)) + + # Remove tilde + iname_set_str_stripped = iname_set_str_stripped[1:] + if "~" in iname_set_str_stripped: + _raise_loop_nest_input_error( + "Multiple complement symbols found in iname set string %s" + % (iname_set_str)) + + # Make sure that braces are included if multiple inames present + if "," in iname_set_str_stripped and not ( + iname_set_str_stripped.startswith("{") and + iname_set_str_stripped.endswith("}")): + _raise_loop_nest_input_error( + "Complements of sets containing multiple inames must " + "enclose inames in braces: %s is not valid." + % (iname_set_str)) + + complement = True + else: + complement = False + + # Remove leading/trailing spaces + iname_set_str_stripped = iname_set_str_stripped.strip(" ") + + # Make sure braces are valid and strip them + if iname_set_str_stripped[0] == "{": + if not iname_set_str_stripped[-1] == "}": + _raise_loop_nest_input_error( + "Invalid braces: %s" % (iname_set_str)) + else: + # Remove enclosing braces + iname_set_str_stripped = iname_set_str_stripped[1:-1] + # (If there are dangling braces around, they will be caught next) + + # Remove any more spaces + iname_set_str_stripped = iname_set_str_stripped.strip() + + # Should be no remaining special characters besides comma and space + _error_on_regex_match(r"([^,\w ])", iname_set_str_stripped) + + # Split by commas or spaces to get inames + inames = re.findall(r"([\w]+)(?:[ |,]*|$)", iname_set_str_stripped) + + # Make sure iname count matches what we expect from comma count + if len(inames) != iname_set_str_stripped.count(",") + 1: + _raise_loop_nest_input_error( + "Found %d inames but expected %d in string %s." + % (len(inames), iname_set_str_stripped.count(",") + 1, + iname_set_str)) + + if len(inames) == 0: + _raise_loop_nest_input_error( + "Found empty set in string %s." + % (iname_set_str)) + + # NOTE this won't catch certain cases of bad syntax, e.g., ("{h i j,,}", "k") + + return UnexpandedInameSet( + set([s.strip() for s in iname_set_str_stripped.split(",")]), + complement=complement) + + if isinstance(nesting, str): + # Enforce that constraints involving iname sets be passed as tuple. + # Iname sets defined negatively with a *single* iname are allowed here. + + # Check for any special characters besides comma, space, and tilde. + # E.g., curly braces would indicate that an iname set was NOT + # passed as a tuple, which is not allowed. + _error_on_regex_match(r"([^,\w~ ])", nesting) + + # Split by comma and process each tier + nesting_as_tuple = tuple( + _process_iname_set_str(set_str) for set_str in nesting.split(",")) + else: + assert isinstance(nesting, (tuple, list)) + # Process each tier + nesting_as_tuple = tuple( + _process_iname_set_str(set_str) for set_str in nesting) + + # Check max_inames_per_set + if max_tuple_size and len(nesting_as_tuple) > max_tuple_size: + _raise_loop_nest_input_error( + "Loop nest prioritization tuple %s exceeds max tuple size %d." + % (nesting_as_tuple)) + + # Make sure nesting has len > 1 + if len(nesting_as_tuple) <= 1: + _raise_loop_nest_input_error( + "Loop nest prioritization tuple %s must have length > 1." + % (nesting_as_tuple)) + + # Return tuple of UnexpandedInameSets + return nesting_as_tuple + +# }}} + + +# {{{ constrain_loop_nesting + +def constrain_loop_nesting( + kernel, must_nest=None, must_not_nest=None): + r"""Add the provided constraints to the kernel. + :arg must_nest: A tuple or comma-separated string representing + an ordering of loop nesting tiers that must appear in the + linearized kernel. Each item in the tuple represents a + :class:`UnexpandedInameSet`\ s. + :arg must_not_nest: A two-tuple or comma-separated string representing + an ordering of loop nesting tiers that must not appear in the + linearized kernel. Each item in the tuple represents a + :class:`UnexpandedInameSet`\ s. + """ + + # {{{ Get any current constraints, if they exist + if kernel.loop_nest_constraints: + if kernel.loop_nest_constraints.must_nest: + must_nest_constraints_old = kernel.loop_nest_constraints.must_nest + else: + must_nest_constraints_old = set() + + if kernel.loop_nest_constraints.must_not_nest: + must_not_nest_constraints_old = \ + kernel.loop_nest_constraints.must_not_nest + else: + must_not_nest_constraints_old = set() + + if kernel.loop_nest_constraints.must_nest_graph: + must_nest_graph_old = kernel.loop_nest_constraints.must_nest_graph + else: + must_nest_graph_old = {} + else: + must_nest_constraints_old = set() + must_not_nest_constraints_old = set() + must_nest_graph_old = {} + + # }}} + + # {{{ Process must_nest + + if must_nest: + # {{{ Parse must_nest, check for conflicts, combine with old constraints + + # {{{ Parse must_nest (no complements allowed) + must_nest_tuple = process_loop_nest_specification( + must_nest, complement_sets_allowed=False) + # }}} + + # {{{ Error if someone prioritizes concurrent iname + + from loopy.kernel.data import ConcurrentTag + for iname_set in must_nest_tuple: + for iname in iname_set.inames: + if kernel.iname_tags_of_type(iname, ConcurrentTag): + raise ValueError( + "iname %s tagged with ConcurrentTag, " + "cannot use iname in must-nest constraint %s." + % (iname, must_nest_tuple)) + + # }}} + + # {{{ Update must_nest graph (and check for cycles) + + must_nest_graph_new = update_must_nest_graph( + must_nest_graph_old, must_nest_tuple, kernel.all_inames()) + + # }}} + + # {{{ Make sure must_nest constraints don't violate must_not_nest + # (this may not catch all problems) + check_must_not_nest_against_must_nest_graph( + must_not_nest_constraints_old, must_nest_graph_new) + # }}} + + # {{{ Check for conflicts with inames tagged 'vec' (must be innermost) + + from loopy.kernel.data import VectorizeTag + for iname in kernel.all_inames(): + if kernel.iname_tags_of_type(iname, VectorizeTag) and ( + must_nest_graph_new.get(iname, set())): + # Must-nest graph doesn't allow iname to be a leaf, error + raise ValueError( + "Iname %s tagged as 'vec', but loop nest constraints " + "%s require that iname %s nest outside of inames %s. " + "Vectorized inames must nest innermost; cannot " + "impose loop nest specification." + % (iname, must_nest, iname, + must_nest_graph_new.get(iname, set()))) + + # }}} + + # {{{ Add new must_nest constraints to existing must_nest constraints + must_nest_constraints_new = must_nest_constraints_old | set( + [must_nest_tuple, ]) + # }}} + + # }}} + else: + # {{{ No new must_nest constraints, just keep the old ones + + must_nest_constraints_new = must_nest_constraints_old + must_nest_graph_new = must_nest_graph_old + + # }}} + + # }}} + + # {{{ Process must_not_nest + + if must_not_nest: + # {{{ Parse must_not_nest, check for conflicts, combine with old constraints + + # {{{ Parse must_not_nest; complements allowed; max_tuple_size=2 + + must_not_nest_tuple = process_loop_nest_specification( + must_not_nest, max_tuple_size=2) + + # }}} + + # {{{ Make sure must_not_nest constraints don't violate must_nest + + # (cycles are allowed in must_not_nest constraints) + import itertools + must_pairs = [] + for iname_before, inames_after in must_nest_graph_new.items(): + must_pairs.extend(list(itertools.product([iname_before], inames_after))) + + if not check_must_not_nest(must_pairs, must_not_nest_tuple): + raise ValueError( + "constrain_loop_nesting: nest constraint conflict detected. " + "must_not_nest constraints %s inconsistent with " + "must_nest constraints %s." + % (must_not_nest_tuple, must_nest_constraints_new)) + + # }}} + + # {{{ Add new must_not_nest constraints to exisitng must_not_nest constraints + must_not_nest_constraints_new = must_not_nest_constraints_old | set([ + must_not_nest_tuple, ]) + # }}} + + # }}} + else: + # {{{ No new must_not_nest constraints, just keep the old ones + + must_not_nest_constraints_new = must_not_nest_constraints_old + + # }}} + + # }}} + + nest_constraints = LoopNestConstraints( + must_nest=must_nest_constraints_new, + must_not_nest=must_not_nest_constraints_new, + must_nest_graph=must_nest_graph_new, + ) + + return kernel.copy(loop_nest_constraints=nest_constraints) + +# }}} + + +# {{{ update_must_nest_graph + +def update_must_nest_graph(must_nest_graph, must_nest, all_inames): + # Note: there should *not* be any complements in the must_nest tuples + + from copy import deepcopy + new_graph = deepcopy(must_nest_graph) + + # First, each iname must be a node in the graph + for missing_iname in all_inames - new_graph.keys(): + new_graph[missing_iname] = set() + + # Expand must_nest into (before, after) pairs + must_nest_expanded = _expand_iname_sets_in_tuple(must_nest, all_inames) + + # Update must_nest_graph with new pairs + for before, after in must_nest_expanded: + new_graph[before].add(after) + + # Compute transitive closure + from pytools.graph import compute_transitive_closure, contains_cycle + new_graph_closure = compute_transitive_closure(new_graph) + # Note: compute_transitive_closure now allows cycles, will not error + + # Check for inconsistent must_nest constraints by checking for cycle: + if contains_cycle(new_graph_closure): + raise ValueError( + "update_must_nest_graph: Nest constraint cycle detected. " + "must_nest constraints %s inconsistent with existing " + "must_nest constraints %s." + % (must_nest, must_nest_graph)) + + return new_graph_closure + +# }}} + + +# {{{ _expand_iname_sets_in_tuple + +def _expand_iname_sets_in_tuple( + iname_sets_tuple, + iname_universe=None, + ): + + # First convert UnexpandedInameSets to sets. + # Note that must_nest constraints cannot be negatively defined. + positively_defined_iname_sets = [ + iname_set.get_inames_represented(iname_universe) + for iname_set in iname_sets_tuple] + + # Now expand all priority tuples into (before, after) pairs using + # Cartesian product of all pairs of sets + # (Assumes prio_sets length > 1) + import itertools + loop_priority_pairs = set() + for i, before_set in enumerate(positively_defined_iname_sets[:-1]): + for after_set in positively_defined_iname_sets[i+1:]: + loop_priority_pairs.update( + list(itertools.product(before_set, after_set))) + + # Make sure no priority tuple contains an iname twice + for prio_tuple in loop_priority_pairs: + if len(set(prio_tuple)) != len(prio_tuple): + raise ValueError( + "Loop nesting %s contains cycle: %s. " + % (iname_sets_tuple, prio_tuple)) + + return loop_priority_pairs + +# }}} + +# }}} + # }}} From b8bde22ba8b63405ee078cfaa5498bcb073bfa19 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sat, 8 May 2021 23:01:38 -0500 Subject: [PATCH 15/17] copy in functions for checking loop nest constraints from branch loop-nest-constraints-v2 --- loopy/transform/iname.py | 179 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 2d96c602e..d7c6897f2 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -369,10 +369,12 @@ def _process_iname_set_str(iname_set_str): def constrain_loop_nesting( kernel, must_nest=None, must_not_nest=None): r"""Add the provided constraints to the kernel. + :arg must_nest: A tuple or comma-separated string representing an ordering of loop nesting tiers that must appear in the linearized kernel. Each item in the tuple represents a :class:`UnexpandedInameSet`\ s. + :arg must_not_nest: A two-tuple or comma-separated string representing an ordering of loop nesting tiers that must not appear in the linearized kernel. Each item in the tuple represents a @@ -600,6 +602,183 @@ def _expand_iname_sets_in_tuple( # }}} + +# {{{ Checking constraints + +# {{{ check_must_nest + +def check_must_nest(all_loop_nests, must_nest, all_inames): + r"""Determine whether must_nest constraint is satisfied by + all_loop_nests + + :arg all_loop_nests: A list of lists of inames, each representing + the nesting order of nested loops. + + :arg must_nest: A tuple of :class:`UnexpandedInameSet`\ s describing + nestings that must appear in all_loop_nests. + + :returns: A :class:`bool` indicating whether the must nest constraints + are satisfied by the provided loop nesting. + """ + + # In order to make sure must_nest is satisfied, we + # need to expand all must_nest tiers + + # FIXME instead of expanding tiers into all pairs up front, + # create these pairs one at a time so that we can stop as soon as we fail + + must_nest_expanded = _expand_iname_sets_in_tuple(must_nest) + + # must_nest_expanded contains pairs + for before, after in must_nest_expanded: + found = False + for nesting in all_loop_nests: + if before in nesting and after in nesting and ( + nesting.index(before) < nesting.index(after)): + found = True + break + if not found: + return False + return True + +# }}} + + +# {{{ check_must_not_nest + +def check_must_not_nest(all_loop_nests, must_not_nest): + r"""Determine whether must_not_nest constraint is satisfied by + all_loop_nests + + :arg all_loop_nests: A list of lists of inames, each representing + the nesting order of nested loops. + + :arg must_not_nest: A two-tuple of :class:`UnexpandedInameSet`\ s + describing nestings that must not appear in all_loop_nests. + + :returns: A :class:`bool` indicating whether the must_not_nest constraints + are satisfied by the provided loop nesting. + """ + + # Note that must_not_nest may only contain two tiers + + for nesting in all_loop_nests: + + # Go through each pair in all_loop_nests + for i, iname_before in enumerate(nesting): + for iname_after in nesting[i+1:]: + + # Check whether it violates must not nest + if (must_not_nest[0].contains(iname_before) + and must_not_nest[1].contains(iname_after)): + # Stop as soon as we fail + return False + return True + +# }}} + + +# {{{ loop_nest_constraints_satisfied + +def loop_nest_constraints_satisfied( + all_loop_nests, + must_nest_constraints=None, + must_not_nest_constraints=None, + all_inames=None): + r"""Determine whether must_not_nest constraint is satisfied by + all_loop_nests + + :arg all_loop_nests: A set of lists of inames, each representing + the nesting order of loops. + + :arg must_nest_constraints: An iterable of tuples of + :class:`UnexpandedInameSet`\ s, each describing nestings that must + appear in all_loop_nests. + + :arg must_not_nest_constraints: An iterable of two-tuples of + :class:`UnexpandedInameSet`\ s, each describing nestings that must not + appear in all_loop_nests. + + :returns: A :class:`bool` indicating whether the constraints + are satisfied by the provided loop nesting. + """ + + # Check must-nest constraints + if must_nest_constraints: + for must_nest in must_nest_constraints: + if not check_must_nest( + all_loop_nests, must_nest, all_inames): + return False + + # Check must-not-nest constraints + if must_not_nest_constraints: + for must_not_nest in must_not_nest_constraints: + if not check_must_not_nest( + all_loop_nests, must_not_nest): + return False + + return True + +# }}} + + +# {{{ check_must_not_nest_against_must_nest_graph + +def check_must_not_nest_against_must_nest_graph( + must_not_nest_constraints, must_nest_graph): + r"""Ensure none of the must_not_nest constraints are violated by + nestings represented in the must_nest_graph + + :arg must_not_nest_constraints: A set of two-tuples of + :class:`UnexpandedInameSet`\ s describing nestings that must not appear + in loop nestings. + + :arg must_nest_graph: A :class:`dict` mapping each iname to other inames + that must be nested inside it. + """ + + if must_not_nest_constraints and must_nest_graph: + import itertools + must_pairs = [] + for iname_before, inames_after in must_nest_graph.items(): + must_pairs.extend( + list(itertools.product([iname_before], inames_after))) + if any(not check_must_not_nest(must_pairs, must_not_nest_tuple) + for must_not_nest_tuple in must_not_nest_constraints): + raise ValueError( + "Nest constraint conflict detected. " + "must_not_nest constraints %s inconsistent with " + "must_nest relationships (must_nest graph: %s)." + % (must_not_nest_constraints, must_nest_graph)) + +# }}} + + +# {{{ get_iname_nestings + +def get_iname_nestings(linearization): + """Return a list of iname tuples representing the deepest loop nestings + in a kernel linearization. + """ + from loopy.schedule import EnterLoop, LeaveLoop + nestings = [] + current_tiers = [] + already_exiting_loops = False + for lin_item in linearization: + if isinstance(lin_item, EnterLoop): + already_exiting_loops = False + current_tiers.append(lin_item.iname) + elif isinstance(lin_item, LeaveLoop): + if not already_exiting_loops: + nestings.append(tuple(current_tiers)) + already_exiting_loops = True + del current_tiers[-1] + return nestings + +# }}} + +# }}} + # }}} From 0721a2c9672a5eb9af8c5f9459efd20684426b63 Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sat, 8 May 2021 23:16:15 -0500 Subject: [PATCH 16/17] add tests for loop nest constraint creation from branch loop-nest-constraints-v2 --- test/test_nest_constraints.py | 376 ++++++++++++++++++++++++++++++++++ 1 file changed, 376 insertions(+) create mode 100644 test/test_nest_constraints.py diff --git a/test/test_nest_constraints.py b/test/test_nest_constraints.py new file mode 100644 index 000000000..6bc1e8ef4 --- /dev/null +++ b/test/test_nest_constraints.py @@ -0,0 +1,376 @@ +__copyright__ = "Copyright (C) 2021 James Stevens" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import sys +import loopy as lp +import numpy as np +import pyopencl as cl + +import logging +logger = logging.getLogger(__name__) + +try: + import faulthandler +except ImportError: + pass +else: + faulthandler.enable() + +from pyopencl.tools import pytest_generate_tests_for_pyopencl \ + as pytest_generate_tests + +__all__ = [ + "pytest_generate_tests", + "cl" # "cl.create_some_context" + ] + + +from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2 # noqa + + +# {{{ test_loop_constraint_string_parsing + +def test_loop_constraint_string_parsing(): + ref_knl = lp.make_kernel( + "{ [g,h,i,j,k,xx]: 0<=g,h,i,j,k,xx 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + +# vim: foldmethod=marker From 04219ab4d473d57c6c03fb364cc71a284c2c5d3e Mon Sep 17 00:00:00 2001 From: jdsteve2 Date: Sat, 8 May 2021 23:38:45 -0500 Subject: [PATCH 17/17] fix typo in comment --- loopy/transform/iname.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index d7c6897f2..4bee180c3 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -346,7 +346,7 @@ def _process_iname_set_str(iname_set_str): nesting_as_tuple = tuple( _process_iname_set_str(set_str) for set_str in nesting) - # Check max_inames_per_set + # Check max_tuple_size if max_tuple_size and len(nesting_as_tuple) > max_tuple_size: _raise_loop_nest_input_error( "Loop nest prioritization tuple %s exceeds max tuple size %d."