From a81889b4c7ab1f0681bda6da726f12f15a2cabcd Mon Sep 17 00:00:00 2001 From: Nick Date: Tue, 12 Nov 2024 12:50:19 -0600 Subject: [PATCH 01/24] Add a new failing test case for tag_inames. --- test/test_transform.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/test/test_transform.py b/test/test_transform.py index 98398fef..11ec3715 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -377,6 +377,35 @@ def test_set_arg_order(): knl = lp.set_argument_order(knl, "out,a,n,b") +def test_tag_inames_keeps_all_tags_if_able(): + t_unit = lp.make_kernel( + "{ [i,j]: 0<=i,j Date: Tue, 12 Nov 2024 15:38:33 -0600 Subject: [PATCH 02/24] Fix tag_inames to apply multiple tags, type it --- loopy/transform/iname.py | 91 +++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 47 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 97257745..1f318313 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -21,10 +21,14 @@ """ +from collections.abc import Iterable, Mapping, Sequence from typing import Any, FrozenSet, Optional +from typing_extensions import TypeAlias + import islpy as isl from islpy import dim_type +from pytools.tag import Tag from loopy.diagnostic import LoopyError from loopy.kernel import LoopKernel @@ -675,9 +679,18 @@ def untag_inames(kernel, iname_to_untag, tag_type): # {{{ tag inames +_Tags_ish: TypeAlias = Tag | Sequence[Tag] | str | Sequence[str] + + @for_each_kernel -def tag_inames(kernel, iname_to_tag, force=False, - ignore_nonexistent=False): +def tag_inames( + kernel: LoopKernel, + iname_to_tag: (Mapping[str, _Tags_ish] + | Sequence[tuple[str, _Tags_ish]] + | str), + force: bool = False, + ignore_nonexistent: bool = False + ) -> LoopKernel: """Tag an iname :arg iname_to_tag: a list of tuples ``(iname, new_tag)``. *new_tag* is given @@ -697,74 +710,67 @@ def tag_inames(kernel, iname_to_tag, force=False, """ if isinstance(iname_to_tag, str): - def parse_kv(s): + def parse_kv(s: str) -> tuple[str, str]: colon_index = s.find(":") if colon_index == -1: raise ValueError("tag decl '%s' has no colon" % s) return (s[:colon_index].strip(), s[colon_index+1:].strip()) - iname_to_tag = [ + iname_to_tags_seq: Sequence[tuple[str, _Tags_ish]] = [ parse_kv(s) for s in iname_to_tag.split(",") if s.strip()] + elif isinstance(iname_to_tag, Mapping): + iname_to_tags_seq = list(iname_to_tag.items()) + else: + iname_to_tags_seq = iname_to_tag if not iname_to_tag: return kernel - # convert dict to list of tuples - if isinstance(iname_to_tag, dict): - iname_to_tag = list(iname_to_tag.items()) - # flatten iterables of tags for each iname - try: - from collections.abc import Iterable - except ImportError: - from collections import Iterable # pylint:disable=no-name-in-module - - unpack_iname_to_tag = [] - for iname, tags in iname_to_tag: + unpack_iname_to_tag: list[tuple[str, Tag | str]] = [] + for iname, tags in iname_to_tags_seq: if isinstance(tags, Iterable) and not isinstance(tags, str): for tag in tags: unpack_iname_to_tag.append((iname, tag)) else: unpack_iname_to_tag.append((iname, tags)) - iname_to_tag = unpack_iname_to_tag from loopy.kernel.data import parse_tag as inner_parse_tag - def parse_tag(tag): + def parse_tag(tag: Tag | str) -> Iterable[Tag]: if isinstance(tag, str): if tag.startswith("like."): - tags = kernel.iname_tags(tag[5:]) - if len(tags) == 0: - return None - if len(tags) == 1: - return tags[0] - else: - raise LoopyError("cannot use like for multiple tags (for now)") + return kernel.iname_tags(tag[5:]) elif tag == "unused.g": return find_unused_axis_tag(kernel, "g") elif tag == "unused.l": return find_unused_axis_tag(kernel, "l") - return inner_parse_tag(tag) - - iname_to_tag = [(iname, parse_tag(tag)) for iname, tag in iname_to_tag] + result = inner_parse_tag(tag) + if result is None: + return [] + else: + return [result] - # {{{ globbing + iname_to_parsed_tag = [ + (iname, subtag) + for iname, tag in unpack_iname_to_tag + for subtag in parse_tag(tag) + ] + knl_inames = dict(kernel.inames) all_inames = kernel.all_inames() from loopy.match import re_from_glob - new_iname_to_tag = {} - for iname, new_tag in iname_to_tag: + + for iname, new_tag in iname_to_parsed_tag: if "*" in iname or "?" in iname: match_re = re_from_glob(iname) - for sub_iname in all_inames: - if match_re.match(sub_iname): - new_iname_to_tag[sub_iname] = new_tag - + inames = [sub_iname for sub_iname in all_inames + if match_re.match(sub_iname)] else: if iname not in all_inames: if ignore_nonexistent: @@ -772,22 +778,13 @@ def parse_tag(tag): else: raise LoopyError("iname '%s' does not exist" % iname) - new_iname_to_tag[iname] = new_tag - - iname_to_tag = new_iname_to_tag - del new_iname_to_tag + inames = [iname] - # }}} - - knl_inames = kernel.inames.copy() - for name, new_tag in iname_to_tag.items(): - if not new_tag: + if new_tag is None: continue - if name not in kernel.all_inames(): - raise ValueError("cannot tag '%s'--not known" % name) - - knl_inames[name] = knl_inames[name].tagged(new_tag) + for sub_iname in inames: + knl_inames[sub_iname] = knl_inames[sub_iname].tagged(new_tag) return kernel.copy(inames=knl_inames) From 038793df683442a5d0a36a7969c24ff826fafe51 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 13 Nov 2024 11:07:23 -0600 Subject: [PATCH 03/24] Fix type errors from more precise types in DependencyMapper --- loopy/kernel/data.py | 4 ++-- loopy/target/execution.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 31c06fdb..5d1de0e5 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -45,7 +45,7 @@ import numpy as np from immutables import Map -from pymbolic import ArithmeticExpressionT +from pymbolic import ArithmeticExpressionT, Variable from pytools import ImmutableRecord from pytools.tag import Tag, Taggable, UniqueTag as UniqueTagBase @@ -113,7 +113,7 @@ def _names_from_expr(expr: Union[None, ExpressionT, str]) -> FrozenSet[str]: if isinstance(expr, str): return frozenset({expr}) elif isinstance(expr, Expression): - return frozenset(v.name for v in dep_mapper(expr)) + return frozenset(cast(Variable, v).name for v in dep_mapper(expr)) elif expr is None: return frozenset() elif isinstance(expr, Number): diff --git a/loopy/target/execution.py b/loopy/target/execution.py index 1b62be8c..2443a142 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -36,11 +36,12 @@ Set, Tuple, Union, + cast, ) from immutables import Map -from pymbolic import var +from pymbolic import Variable, var from pytools.codegen import CodeGenerator, Indentation from pytools.py_codegen import PythonFunctionGenerator @@ -260,7 +261,7 @@ def generate_integer_arg_finding_from_array_data( unknown_var, = deps order_to_unknown_to_equations \ .setdefault(eqn.order, {}) \ - .setdefault(unknown_var.name, []) \ + .setdefault(cast(Variable, unknown_var).name, []) \ .append((eqn)) else: # Zero deps: nothing to determine, forget about it. @@ -274,7 +275,6 @@ def generate_integer_arg_finding_from_array_data( # {{{ generate arg finding code from pymbolic.algorithm import solve_affine_equations_for - from pymbolic.primitives import Variable from pytools.codegen import CodeGenerator gen("# {{{ find integer arguments from array data") From d52c2909ee7fce7a6088685f155013c0eca550ce Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 13 Nov 2024 11:16:03 -0600 Subject: [PATCH 04/24] Type re_from_glob --- loopy/match.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/match.py b/loopy/match.py index 5e409791..ae52e6c6 100644 --- a/loopy/match.py +++ b/loopy/match.py @@ -24,6 +24,7 @@ THE SOFTWARE. """ +import re from abc import ABC, abstractmethod from dataclasses import dataclass from sys import intern @@ -66,8 +67,7 @@ """ -def re_from_glob(s): - import re +def re_from_glob(s: str) -> re.Pattern: from fnmatch import translate return re.compile("^"+translate(s.strip())+"$") From 920cb49887ef815eac2debf7aa2a4bc128f6e6a0 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 13 Nov 2024 11:16:08 -0600 Subject: [PATCH 05/24] Type rename_inames --- loopy/transform/iname.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 1f318313..79515409 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -21,7 +21,7 @@ """ -from collections.abc import Iterable, Mapping, Sequence +from collections.abc import Collection, Iterable, Mapping, Sequence from typing import Any, FrozenSet, Optional from typing_extensions import TypeAlias @@ -34,6 +34,7 @@ from loopy.kernel import LoopKernel from loopy.kernel.function_interface import CallableKernel from loopy.kernel.instruction import InstructionBase +from loopy.match import ToStackMatchCovertible from loopy.symbolic import ( RuleAwareIdentityMapper, RuleAwareSubstitutionMapper, @@ -2369,8 +2370,14 @@ def add_inames_for_unused_hw_axes(kernel, within=None): @for_each_kernel @remove_any_newly_unused_inames -def rename_inames(kernel, old_inames, new_iname, existing_ok=False, - within=None, raise_on_domain_mismatch: Optional[bool] = None): +def rename_inames( + kernel: LoopKernel, + old_inames: Collection[str], + new_iname: str, + existing_ok: bool = False, + within: ToStackMatchCovertible = None, + raise_on_domain_mismatch: Optional[bool] = None + ) -> LoopKernel: r""" :arg old_inames: A collection of inames that must be renamed to **new_iname**. :arg within: a stack match as understood by @@ -2380,7 +2387,6 @@ def rename_inames(kernel, old_inames, new_iname, existing_ok=False, :math:`\exists (i_1,i_2) \in \{\text{old\_inames}\}^2 | \mathcal{D}_{i_1} \neq \mathcal{D}_{i_2}`. """ - from collections.abc import Collection if (isinstance(old_inames, str) or not isinstance(old_inames, Collection)): raise LoopyError("'old_inames' must be a collection of strings, " @@ -2508,9 +2514,15 @@ def does_insn_involve_iname(kernel, insn, *args): @for_each_kernel -def rename_iname(kernel, old_iname, new_iname, existing_ok=False, - within=None, preserve_tags=True, - raise_on_domain_mismatch: Optional[bool] = None): +def rename_iname( + kernel: LoopKernel, + old_iname: str, + new_iname: str, + existing_ok: bool = False, + within: ToStackMatchCovertible = None, + preserve_tags: bool = True, + raise_on_domain_mismatch: Optional[bool] = None + ) -> LoopKernel: r""" Single iname version of :func:`loopy.rename_inames`. :arg existing_ok: execute even if *new_iname* already exists. @@ -2528,7 +2540,7 @@ def rename_iname(kernel, old_iname, new_iname, existing_ok=False, kernel = rename_inames(kernel, [old_iname], new_iname, existing_ok, within, raise_on_domain_mismatch) if preserve_tags: - kernel = tag_inames(kernel, product([new_iname], tags)) + kernel = tag_inames(kernel, list(product([new_iname], tags))) return kernel # }}} From 57f6654662dcf0188ae4d9976e166d59addb019a Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Wed, 13 Nov 2024 14:02:54 -0600 Subject: [PATCH 06/24] schedule: update types --- loopy/schedule/__init__.py | 85 ++++++++++++++++------------------- loopy/schedule/tree.py | 23 +++++----- loopy/transform/precompute.py | 5 ++- 3 files changed, 53 insertions(+), 60 deletions(-) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 1364be85..a9121de8 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -27,20 +27,11 @@ import logging import sys +from collections.abc import Hashable, Iterator, Mapping, Sequence, Set from dataclasses import dataclass, replace from typing import ( TYPE_CHECKING, - AbstractSet, Any, - Dict, - FrozenSet, - Hashable, - Iterator, - Mapping, - Optional, - Sequence, - Set, - Tuple, TypeVar, ) @@ -155,7 +146,7 @@ class Barrier(ScheduleItem): def gather_schedule_block( schedule: Sequence[ScheduleItem], start_idx: int - ) -> Tuple[Sequence[ScheduleItem], int]: + ) -> tuple[Sequence[ScheduleItem], int]: assert isinstance(schedule[start_idx], BeginBlockItem) level = 0 @@ -176,7 +167,7 @@ def gather_schedule_block( def generate_sub_sched_items( schedule: Sequence[ScheduleItem], start_idx: int - ) -> Iterator[Tuple[int, ScheduleItem]]: + ) -> Iterator[tuple[int, ScheduleItem]]: if not isinstance(schedule[start_idx], BeginBlockItem): yield start_idx, schedule[start_idx] @@ -203,7 +194,7 @@ def generate_sub_sched_items( def get_insn_ids_for_block_at( schedule: Sequence[ScheduleItem], start_idx: int - ) -> FrozenSet[str]: + ) -> frozenset[str]: return frozenset( sub_sched_item.insn_id for i, sub_sched_item in generate_sub_sched_items( @@ -212,7 +203,7 @@ def get_insn_ids_for_block_at( def find_used_inames_within( - kernel: LoopKernel, sched_index: int) -> AbstractSet[str]: + kernel: LoopKernel, sched_index: int) -> set[str]: assert kernel.linearization is not None sched_item = kernel.linearization[sched_index] @@ -234,7 +225,7 @@ def find_used_inames_within( return result -def find_loop_nest_with_map(kernel: LoopKernel) -> Mapping[str, AbstractSet[str]]: +def find_loop_nest_with_map(kernel: LoopKernel) -> Mapping[str, set[str]]: """Returns a dictionary mapping inames to other inames that are always nested with them. """ @@ -257,11 +248,11 @@ def find_loop_nest_with_map(kernel: LoopKernel) -> Mapping[str, AbstractSet[str] return result -def find_loop_nest_around_map(kernel: LoopKernel) -> Mapping[str, AbstractSet[str]]: +def find_loop_nest_around_map(kernel: LoopKernel) -> Mapping[str, set[str]]: """Returns a dictionary mapping inames to other inames that are always nested around them. """ - result: Dict[str, Set[str]] = {} + result: dict[str, set[str]] = {} all_inames = kernel.all_inames() @@ -299,14 +290,14 @@ def find_loop_nest_around_map(kernel: LoopKernel) -> Mapping[str, AbstractSet[st def find_loop_insn_dep_map( kernel: LoopKernel, - loop_nest_with_map: Mapping[str, AbstractSet[str]], - loop_nest_around_map: Mapping[str, AbstractSet[str]] - ) -> Mapping[str, AbstractSet[str]]: + loop_nest_with_map: Mapping[str, Set[str]], + loop_nest_around_map: Mapping[str, Set[str]] + ) -> Mapping[str, set[str]]: """Returns a dictionary mapping inames to other instruction ids that need to be scheduled before the iname should be eligible for scheduling. """ - result: Dict[str, Set[str]] = {} + result: dict[str, set[str]] = {} from loopy.kernel.data import ConcurrentTag, IlpBaseTag for insn in kernel.instructions: @@ -372,7 +363,7 @@ def find_loop_insn_dep_map( def group_insn_counts(kernel: LoopKernel) -> Mapping[str, int]: - result: Dict[str, int] = {} + result: dict[str, int] = {} for insn in kernel.instructions: for grp in insn.groups: @@ -382,7 +373,7 @@ def group_insn_counts(kernel: LoopKernel) -> Mapping[str, int]: def gen_dependencies_except( - kernel: LoopKernel, insn_id: str, except_insn_ids: AbstractSet[str] + kernel: LoopKernel, insn_id: str, except_insn_ids: Set[str] ) -> Iterator[str]: insn = kernel.id_to_insn[insn_id] for dep_id in insn.depends_on: @@ -396,9 +387,9 @@ def gen_dependencies_except( def get_priority_tiers( - wanted: AbstractSet[int], - priorities: AbstractSet[Sequence[int]] - ) -> Iterator[AbstractSet[int]]: + wanted: Set[int], + priorities: Set[Sequence[int]] + ) -> Iterator[set[int]]: # Get highest priority tier candidates: These are the first inames # of all the given priority constraints candidates = set() @@ -677,24 +668,24 @@ class SchedulerState: order with instruction priorities as tie breaker. """ kernel: LoopKernel - loop_nest_around_map: Mapping[str, AbstractSet[str]] - loop_insn_dep_map: Mapping[str, AbstractSet[str]] + loop_nest_around_map: Mapping[str, set[str]] + loop_insn_dep_map: Mapping[str, set[str]] - breakable_inames: AbstractSet[str] - ilp_inames: AbstractSet[str] - vec_inames: AbstractSet[str] - concurrent_inames: AbstractSet[str] + breakable_inames: set[str] + ilp_inames: set[str] + vec_inames: set[str] + concurrent_inames: set[str] - insn_ids_to_try: Optional[AbstractSet[str]] + insn_ids_to_try: set[str] | None active_inames: Sequence[str] - entered_inames: FrozenSet[str] - enclosing_subkernel_inames: Tuple[str, ...] + entered_inames: frozenset[str] + enclosing_subkernel_inames: tuple[str, ...] schedule: Sequence[ScheduleItem] - scheduled_insn_ids: AbstractSet[str] - unscheduled_insn_ids: AbstractSet[str] + scheduled_insn_ids: frozenset[str] + unscheduled_insn_ids: set[str] preschedule: Sequence[ScheduleItem] - prescheduled_insn_ids: AbstractSet[str] - prescheduled_inames: AbstractSet[str] + prescheduled_insn_ids: set[str] + prescheduled_inames: set[str] may_schedule_global_barriers: bool within_subkernel: bool group_insn_counts: Mapping[str, int] @@ -702,7 +693,7 @@ class SchedulerState: insns_in_topologically_sorted_order: Sequence[InstructionBase] @property - def last_entered_loop(self) -> Optional[str]: + def last_entered_loop(self) -> str | None: if self.active_inames: return self.active_inames[-1] else: @@ -718,7 +709,7 @@ def get_insns_in_topologically_sorted_order( kernel: LoopKernel) -> Sequence[InstructionBase]: from pytools.graph import compute_topological_order - rev_dep_map: Dict[str, Set[str]] = { + rev_dep_map: dict[str, set[str]] = { not_none(insn.id): set() for insn in kernel.instructions} for insn in kernel.instructions: for dep in insn.depends_on: @@ -733,7 +724,7 @@ def get_insns_in_topologically_sorted_order( # Instead of returning these features as a key, we assign an id to # each set of features to avoid comparing them which can be expensive. insn_id_to_feature_id = {} - insn_features: Dict[Hashable, int] = {} + insn_features: dict[Hashable, int] = {} for insn in kernel.instructions: feature = (insn.within_inames, insn.groups, insn.conflicts_with_groups) if feature not in insn_features: @@ -890,7 +881,7 @@ def _get_outermost_diverging_inames( tree: LoopTree, within1: InameStrSet, within2: InameStrSet - ) -> Tuple[InameStr, InameStr]: + ) -> tuple[InameStr, InameStr]: """ For loop nestings *within1* and *within2*, returns the first inames at which the loops nests diverge in the loop nesting tree *tree*. @@ -2180,7 +2171,7 @@ def __init__(self, kernel): def generate_loop_schedules( kernel: LoopKernel, callables_table: CallablesTable, - debug_args: Optional[Dict[str, Any]] = None) -> Iterator[LoopKernel]: + debug_args: Mapping[str, Any] | None = None) -> Iterator[LoopKernel]: """ .. warning:: @@ -2236,7 +2227,7 @@ def _postprocess_schedule(kernel, callables_table, gen_sched): def _generate_loop_schedules_inner( kernel: LoopKernel, callables_table: CallablesTable, - debug_args: Optional[Dict[str, Any]]) -> Iterator[LoopKernel]: + debug_args: Mapping[str, Any] | None) -> Iterator[LoopKernel]: if debug_args is None: debug_args = {} @@ -2337,7 +2328,7 @@ def _generate_loop_schedules_inner( get_insns_in_topologically_sorted_order(kernel)), ) - schedule_gen_kwargs: Dict[str, Any] = {} + schedule_gen_kwargs: dict[str, Any] = {} def print_longest_dead_end(): if debug.interactive: @@ -2402,7 +2393,7 @@ def print_longest_dead_end(): schedule_cache: WriteOncePersistentDict[ - Tuple[LoopKernel, CallablesTable], + tuple[LoopKernel, CallablesTable], LoopKernel ] = WriteOncePersistentDict( "loopy-schedule-cache-v4-"+DATA_MODEL_VERSION, diff --git a/loopy/schedule/tree.py b/loopy/schedule/tree.py index 253ff5f8..e98724f8 100644 --- a/loopy/schedule/tree.py +++ b/loopy/schedule/tree.py @@ -34,9 +34,10 @@ THE SOFTWARE. """ +from collections.abc import Hashable, Iterator, Sequence from dataclasses import dataclass from functools import cached_property -from typing import Generic, Hashable, Iterator, List, Optional, Sequence, Tuple, TypeVar +from typing import Generic, TypeVar from immutables import Map @@ -70,11 +71,11 @@ class Tree(Generic[NodeT]): this allocates a new stack frame for each iteration of the operation. """ - _parent_to_children: Map[NodeT, Tuple[NodeT, ...]] - _child_to_parent: Map[NodeT, Optional[NodeT]] + _parent_to_children: Map[NodeT, tuple[NodeT, ...]] + _child_to_parent: Map[NodeT, NodeT | None] @staticmethod - def from_root(root: NodeT) -> "Tree[NodeT]": + def from_root(root: NodeT) -> Tree[NodeT]: return Tree(Map({root: ()}), Map({root: None})) @@ -89,7 +90,7 @@ def root(self) -> NodeT: return guess @memoize_method - def ancestors(self, node: NodeT) -> Tuple[NodeT, ...]: + def ancestors(self, node: NodeT) -> tuple[NodeT, ...]: """ Returns a :class:`tuple` of nodes that are ancestors of *node*. """ @@ -104,7 +105,7 @@ def ancestors(self, node: NodeT) -> Tuple[NodeT, ...]: return (parent,) + self.ancestors(parent) - def parent(self, node: NodeT) -> Optional[NodeT]: + def parent(self, node: NodeT) -> NodeT | None: """ Returns the parent of *node*. """ @@ -112,7 +113,7 @@ def parent(self, node: NodeT) -> Optional[NodeT]: return self._child_to_parent[node] - def children(self, node: NodeT) -> Tuple[NodeT, ...]: + def children(self, node: NodeT) -> tuple[NodeT, ...]: """ Returns the children of *node*. """ @@ -150,7 +151,7 @@ def __contains__(self, node: NodeT) -> bool: """Return *True* if *node* is a node in the tree.""" return node in self._child_to_parent - def add_node(self, node: NodeT, parent: NodeT) -> "Tree[NodeT]": + def add_node(self, node: NodeT, parent: NodeT) -> Tree[NodeT]: """ Returns a :class:`Tree` with added node *node* having a parent *parent*. @@ -165,7 +166,7 @@ def add_node(self, node: NodeT, parent: NodeT) -> "Tree[NodeT]": .set(node, ())), self._child_to_parent.set(node, parent)) - def replace_node(self, node: NodeT, new_node: NodeT) -> "Tree[NodeT]": + def replace_node(self, node: NodeT, new_node: NodeT) -> Tree[NodeT]: """ Returns a copy of *self* with *node* replaced with *new_node*. """ @@ -207,7 +208,7 @@ def replace_node(self, node: NodeT, new_node: NodeT) -> "Tree[NodeT]": return Tree(parent_to_children_mut.finish(), child_to_parent_mut.finish()) - def move_node(self, node: NodeT, new_parent: Optional[NodeT]) -> "Tree[NodeT]": + def move_node(self, node: NodeT, new_parent: NodeT | None) -> Tree[NodeT]: """ Returns a copy of *self* with node *node* as a child of *new_parent*. """ @@ -262,7 +263,7 @@ def __str__(self) -> str: ├── D └── E """ - def rec(node: NodeT) -> List[str]: + def rec(node: NodeT) -> list[str]: children_result = [rec(c) for c in self.children(node)] def post_process_non_last_child(children: Sequence[str]) -> list[str]: diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index c2cd0a5c..b0fbb546 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -155,7 +155,8 @@ def storage_axis_exprs(storage_axis_sources, args) -> Sequence[ExpressionT]: # {{{ gather rule invocations class RuleInvocationGatherer(RuleAwareIdentityMapper): - def __init__(self, rule_mapping_context, kernel, subst_name, subst_tag, within): + def __init__(self, rule_mapping_context, kernel, subst_name, subst_tag, within) \ + -> None: super().__init__(rule_mapping_context) from loopy.symbolic import SubstitutionRuleExpander @@ -167,7 +168,7 @@ def __init__(self, rule_mapping_context, kernel, subst_name, subst_tag, within): self.subst_tag = subst_tag self.within = within - self.access_descriptors: List[RuleAccessDescriptor] = [] + self.access_descriptors: list[RuleAccessDescriptor] = [] def map_substitution(self, name, tag, arguments, expn_state): process_me = name == self.subst_name From 41b328882172aafddd6c16ee6260df1177a5319b Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Thu, 14 Nov 2024 10:18:48 -0600 Subject: [PATCH 07/24] LazilyUnpickling{Dict,List}: add __repr__ (#817) * LazilyUnpickling{Dict,List}: better repr * add type to repr of PickledObject --- loopy/tools.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/loopy/tools.py b/loopy/tools.py index bf7785fc..bb4904bf 100644 --- a/loopy/tools.py +++ b/loopy/tools.py @@ -348,6 +348,9 @@ def unpickle(self): def __getstate__(self): return {"objstring": self.objstring} + def __repr__(self) -> str: + return type(self).__name__ + "(" + repr(self.unpickle()) + ")" + class _PickledObjectWithEqAndPersistentHashKeys(_PickledObject): """Like :class:`_PickledObject`, with two additional attributes: @@ -406,6 +409,9 @@ def __getstate__(self): key: _PickledObject(val) for key, val in self._map.items()}} + def __repr__(self) -> str: + return type(self).__name__ + "(" + repr(self._map) + ")" + # }}} @@ -444,6 +450,9 @@ def __add__(self, other): def __mul__(self, other): return self._list * other + def __repr__(self) -> str: + return type(self).__name__ + "(" + repr(self._list) + ")" + class LazilyUnpicklingListWithEqAndPersistentHashing(LazilyUnpicklingList): """A list which lazily unpickles its values, and supports equality comparison From 5fe8255b1c3217ba310cc6a2951158272df6c966 Mon Sep 17 00:00:00 2001 From: Connor Ward Date: Tue, 19 Nov 2024 10:58:56 +0000 Subject: [PATCH 08/24] Remove kernel.schedule (deprecated) --- loopy/kernel/__init__.py | 8 -------- loopy/schedule/__init__.py | 2 +- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 617ac4ed..4a92d12a 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -530,14 +530,6 @@ def _get_inames_domain_backend(self, inames): # }}} - @property - def schedule(self): - warn( - "'LoopKernel.schedule' is deprecated and will be removed in 2022. " - "Call 'LoopKernel.linearization' instead.", - DeprecationWarning, stacklevel=2) - return self.linearization - # {{{ iname wrangling def iname_tags(self, iname): diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index a9121de8..2460f5ed 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -923,7 +923,7 @@ def _generate_loop_schedules_v2(kernel: LoopKernel) -> Sequence[ScheduleItem]: raise V2SchedulerNotImplementedError("v2 scheduler cannot schedule" " kernels with instruction priorities set.") - if kernel.schedule is not None: + if kernel.linearization is not None: # cannot handle preschedule yet raise V2SchedulerNotImplementedError("v2 scheduler cannot schedule" " prescheduled kernels.") From d72326629d778321a7d55ad6a8edd150a86b4ad9 Mon Sep 17 00:00:00 2001 From: Connor Ward Date: Tue, 19 Nov 2024 11:08:38 +0000 Subject: [PATCH 09/24] fix docs --- loopy/kernel/__init__.py | 2 +- loopy/target/c/compyte | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 4a92d12a..96764026 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -121,7 +121,7 @@ class LoopKernel(Taggable): .. autoattribute:: domains .. autoattribute:: instructions .. autoattribute:: args - .. autoattribute:: schedule + .. autoattribute:: linearization .. autoattribute:: name .. autoattribute:: preambles .. autoattribute:: preamble_generators diff --git a/loopy/target/c/compyte b/loopy/target/c/compyte index fcb59401..d4549d4c 160000 --- a/loopy/target/c/compyte +++ b/loopy/target/c/compyte @@ -1 +1 @@ -Subproject commit fcb59401cd61704037002b714519d0f7af2c4c59 +Subproject commit d4549d4c711513e2cc098d3f5d4e918eac53ee7a From c80684d6cb4de8d9933b10a913f4bf1f94440c12 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Tue, 19 Nov 2024 08:55:08 -0600 Subject: [PATCH 10/24] Update compyte --- loopy/target/c/compyte | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/target/c/compyte b/loopy/target/c/compyte index d4549d4c..955160ac 160000 --- a/loopy/target/c/compyte +++ b/loopy/target/c/compyte @@ -1 +1 @@ -Subproject commit d4549d4c711513e2cc098d3f5d4e918eac53ee7a +Subproject commit 955160ac2f504dabcd8641471a56146fa1afe35d From 3d16d57d9fd3de152f45725fa823c7ab7928c7e9 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Thu, 21 Nov 2024 10:18:56 -0600 Subject: [PATCH 11/24] Track renames in pymbolic 2024.2 --- doc/conf.py | 2 +- doc/tutorial.rst | 4 +-- loopy/check.py | 4 +-- loopy/codegen/__init__.py | 12 +++---- loopy/codegen/tools.py | 2 -- loopy/isl_helpers.py | 6 ++-- loopy/kernel/__init__.py | 10 +++--- loopy/kernel/array.py | 36 +++++++++---------- loopy/kernel/data.py | 20 +++++------ loopy/kernel/instruction.py | 18 +++++----- loopy/preprocess.py | 6 ++-- loopy/statistics.py | 4 +-- loopy/symbolic.py | 54 ++++++++++++++++------------ loopy/target/__init__.py | 6 ++-- loopy/target/c/__init__.py | 6 ++-- loopy/target/c/c_execution.py | 4 +-- loopy/target/c/codegen/expression.py | 4 +-- loopy/target/execution.py | 12 +++---- loopy/target/ispc.py | 6 ++-- loopy/target/pyopencl.py | 4 +-- loopy/target/pyopencl_execution.py | 4 +-- loopy/transform/array_buffer_map.py | 16 ++++----- loopy/transform/data.py | 6 ++-- loopy/transform/precompute.py | 12 +++---- loopy/transform/realize_reduction.py | 4 +-- loopy/typing.py | 14 ++++---- pyproject.toml | 2 +- 27 files changed, 140 insertions(+), 138 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index c4a13c44..951b0221 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -62,7 +62,7 @@ ["py:class", r"immutables\.(.+)"], # Reference not found from ""? I'm not even sure where to look. - ["py:class", r"Expression"], + ["py:class", r"ExpressionNode"], ] autodoc_type_aliases = { diff --git a/doc/tutorial.rst b/doc/tutorial.rst index 4aeb4242..dad7c171 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -1681,7 +1681,7 @@ Each line of output will look roughly like:: data type accessed. - lid_strides: A :class:`dict` of **{** :class:`int` **:** - :class:`pymbolic.primitives.Expression` or :class:`int` **}** that specifies + :attr:`~pymbolic.typing.Expression` or :class:`int` **}** that specifies local strides for each local id in the memory access index. Local ids not found will not be present in ``lid_strides.keys()``. Uniform access (i.e. work-items within a sub-group access the same item) is indicated by setting @@ -1689,7 +1689,7 @@ Each line of output will look roughly like:: which case the 0 key will not be present in lid_strides. - gid_strides: A :class:`dict` of **{** :class:`int` **:** - :class:`pymbolic.primitives.Expression` or :class:`int` **}** that specifies + :attr:`~pymbolic.typing.Expression` or :class:`int` **}** that specifies global strides for each global id in the memory access index. Global ids not found will not be present in ``gid_strides.keys()``. diff --git a/loopy/check.py b/loopy/check.py index ee24d6e4..f355e99c 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -68,7 +68,7 @@ check_each_kernel, ) from loopy.type_inference import TypeReader -from loopy.typing import ExpressionT, not_none +from loopy.typing import not_none logger = logging.getLogger(__name__) @@ -221,7 +221,7 @@ def check_offsets_and_dim_tags(kernel: LoopKernel) -> None: dep_mapper = DependencyMapper() def ensure_depends_only_on_arguments( - what: str, expr: Union[str, ExpressionT]) -> None: + what: str, expr: Union[str, Expression]) -> None: if isinstance(expr, str): expr = Variable(expr) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index 2e39d89b..d460dd54 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -57,7 +57,7 @@ from loopy.target import TargetBase from loopy.tools import LoopyKeyBuilder, caches from loopy.types import LoopyType -from loopy.typing import ExpressionT +from loopy.typing import Expression from loopy.version import DATA_MODEL_VERSION @@ -90,9 +90,9 @@ References ^^^^^^^^^^ -.. class:: Expression +.. class:: ExpressionNode - See :class:`pymbolic.Expression`. + See :class:`pymbolic.primitives.ExpressionNode`. """ @@ -200,14 +200,14 @@ class CodeGenerationState: kernel: LoopKernel target: TargetBase implemented_domain: isl.Set - implemented_predicates: FrozenSet[Union[str, ExpressionT]] + implemented_predicates: FrozenSet[Union[str, Expression]] # /!\ mutable seen_dtypes: Set[LoopyType] seen_functions: Set[SeenFunction] seen_atomic_dtypes: Set[LoopyType] - var_subst_map: Map[str, ExpressionT] + var_subst_map: Map[str, Expression] allow_complex: bool callables_table: CallablesTable is_entrypoint: bool @@ -231,7 +231,7 @@ def copy(self, **kwargs: Any) -> "CodeGenerationState": return replace(self, **kwargs) def copy_and_assign( - self, name: str, value: ExpressionT) -> "CodeGenerationState": + self, name: str, value: Expression) -> "CodeGenerationState": """Make a copy of self with variable *name* fixed to *value*.""" return self.copy(var_subst_map=self.var_subst_map.set(name, value)) diff --git a/loopy/codegen/tools.py b/loopy/codegen/tools.py index cb6285b0..783892f8 100644 --- a/loopy/codegen/tools.py +++ b/loopy/codegen/tools.py @@ -42,8 +42,6 @@ __doc__ = """ -.. currentmodule:: loopy.codegen.tools - .. autoclass:: KernelProxyForCodegenOperationCacheManager .. autoclass:: CodegenOperationCacheManager diff --git a/loopy/isl_helpers.py b/loopy/isl_helpers.py index 28aa3be3..9fbb3c9d 100644 --- a/loopy/isl_helpers.py +++ b/loopy/isl_helpers.py @@ -93,12 +93,12 @@ def make_slab(space, iname, start, stop, iname_multiplier=1): space = zero.get_domain_space() - from pymbolic.primitives import Expression + from pymbolic.primitives import ExpressionNode from loopy.symbolic import aff_from_expr - if isinstance(start, Expression): + if isinstance(start, ExpressionNode): start = aff_from_expr(space, start) - if isinstance(stop, Expression): + if isinstance(stop, ExpressionNode): stop = aff_from_expr(space, stop) if isinstance(start, int): diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 96764026..4f392edd 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -49,7 +49,7 @@ import islpy as isl from islpy import dim_type -from pymbolic import ArithmeticExpressionT +from pymbolic import ArithmeticExpression from pytools import ( UniqueNameGenerator, generate_unique_names, @@ -75,7 +75,7 @@ from loopy.target import TargetBase from loopy.tools import update_persistent_hash from loopy.types import LoopyType, NumpyType -from loopy.typing import ExpressionT, InameStr +from loopy.typing import Expression, InameStr if TYPE_CHECKING: @@ -193,7 +193,7 @@ class LoopKernel(Taggable): with non-parallel implementation tags. """ - applied_iname_rewrites: Tuple[Dict[InameStr, ExpressionT], ...] = () + applied_iname_rewrites: Tuple[Dict[InameStr, Expression], ...] = () """ A list of past substitution dictionaries that were applied to the kernel. These are stored so that they may be repeated @@ -1036,8 +1036,8 @@ def get_grid_size_upper_bounds_as_exprs( self, callables_table, ignore_auto=False, return_dict=False ) -> Tuple[ - Tuple[ArithmeticExpressionT, ...], - Tuple[ArithmeticExpressionT, ...]]: + Tuple[ArithmeticExpression, ...], + Tuple[ArithmeticExpression, ...]]: """Return a tuple (global_size, local_size) containing a grid that could accommodate execution of *all* instructions in the kernel. diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index 8cabbec2..64a9b857 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -45,7 +45,7 @@ import numpy as np # noqa from typing_extensions import TypeAlias -from pymbolic import ArithmeticExpressionT +from pymbolic import ArithmeticExpression from pymbolic.primitives import is_arithmetic_expression from pytools import ImmutableRecord from pytools.tag import Tag, Taggable @@ -53,7 +53,7 @@ from loopy.diagnostic import LoopyError from loopy.symbolic import flatten from loopy.types import LoopyType -from loopy.typing import ExpressionT, ShapeType, auto, is_integer +from loopy.typing import Expression, ShapeType, auto, is_integer if TYPE_CHECKING: @@ -93,10 +93,6 @@ See :class:`loopy.typing.ShapeType` -.. class:: ExpressionT - - See :class:`loopy.typing.ExpressionT` - .. class:: Tag See :class:`pytools.tag.Tag` @@ -150,7 +146,7 @@ class FixedStrideArrayDimTag(_StrideArrayDimTagBase): May be one of the following: - - A :class:`pymbolic.primitives.Expression`, including an + - A :attr:`~pymbolic.typing.Expression`, including an integer, indicating the stride in units of the underlying array's :attr:`ArrayBase.dtype`. @@ -609,8 +605,8 @@ def convert_computed_to_fixed_dim_tags(name, num_user_axes, num_target_axes, # {{{ array base class (for arguments and temporary arrays) -ToShapeLikeConvertible: TypeAlias = (Tuple[ExpressionT | str, ...] - | ExpressionT | type[auto] | str | tuple[str, ...]) +ToShapeLikeConvertible: TypeAlias = (Tuple[Expression | str, ...] + | Expression | type[auto] | str | tuple[str, ...]) def _parse_shape_or_strides( @@ -634,12 +630,12 @@ def _parse_shape_or_strides( raise ValueError("shape can't be a list") if isinstance(x_parsed, tuple): - x_tup: tuple[ExpressionT | str, ...] = x_parsed + x_tup: tuple[Expression | str, ...] = x_parsed else: assert x_parsed is not auto - x_tup = (cast(ExpressionT, x_parsed),) + x_tup = (cast(Expression, x_parsed),) - def parse_arith(x: ExpressionT | str) -> ArithmeticExpressionT: + def parse_arith(x: Expression | str) -> ArithmeticExpression: if isinstance(x, str): res = parse(x) else: @@ -714,7 +710,7 @@ class ArrayBase(ImmutableRecord, Taggable): """See :ref:`data-dim-tags`. """ - offset: Union[ExpressionT, str, None] + offset: Union[Expression, str, None] """Offset from the beginning of the buffer to the point from which the strides are counted, in units of the :attr:`dtype`. May be one of @@ -1158,9 +1154,9 @@ def drop_vec_dims( if not isinstance(dim_tag, VectorArrayDimTag)) -def get_strides(array: ArrayBase) -> Tuple[ExpressionT, ...]: +def get_strides(array: ArrayBase) -> Tuple[Expression, ...]: from pymbolic import var - result: List[ExpressionT] = [] + result: List[Expression] = [] if array.dim_tags is None: return () @@ -1188,10 +1184,10 @@ def get_strides(array: ArrayBase) -> Tuple[ExpressionT, ...]: class AccessInfo(ImmutableRecord): array_name: str vector_index: Optional[int] - subscripts: Tuple[ExpressionT, ...] + subscripts: Tuple[Expression, ...] -def _apply_offset(sub: ExpressionT, ary: ArrayBase) -> ExpressionT: +def _apply_offset(sub: Expression, ary: ArrayBase) -> Expression: """ Helper for :func:`get_access_info`. Augments *ary*'s subscript index expression (*sub*) with its offset info. @@ -1228,8 +1224,8 @@ def _apply_offset(sub: ExpressionT, ary: ArrayBase) -> ExpressionT: def get_access_info(kernel: "LoopKernel", ary: Union["ArrayArg", "TemporaryVariable"], - index: Union[ExpressionT, Tuple[ExpressionT, ...]], - eval_expr: Callable[[ExpressionT], int], + index: Union[Expression, Tuple[Expression, ...]], + eval_expr: Callable[[Expression], int], vectorization_info: "VectorizationInfo") -> AccessInfo: """ :arg ary: an object of type :class:`ArrayBase` @@ -1283,7 +1279,7 @@ def eval_expr_assert_integer_constant(i, expr) -> int: num_target_axes = ary.num_target_axes() vector_index = None - subscripts: List[ExpressionT] = [0] * num_target_axes + subscripts: List[Expression] = [0] * num_target_axes vector_size = ary.vector_size(kernel.target) diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 5d1de0e5..8ca5aa87 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -45,7 +45,7 @@ import numpy as np from immutables import Map -from pymbolic import ArithmeticExpressionT, Variable +from pymbolic import ArithmeticExpression, Variable from pytools import ImmutableRecord from pytools.tag import Tag, Taggable, UniqueTag as UniqueTagBase @@ -65,7 +65,7 @@ make_assignment, ) from loopy.types import LoopyType, ToLoopyTypeConvertible -from loopy.typing import ExpressionT, ShapeType, auto +from loopy.typing import Expression, ShapeType, auto __doc__ = """ @@ -103,7 +103,7 @@ # {{{ utilities -def _names_from_expr(expr: Union[None, ExpressionT, str]) -> FrozenSet[str]: +def _names_from_expr(expr: Union[None, Expression, str]) -> FrozenSet[str]: from numbers import Number from loopy.symbolic import DependencyMapper @@ -651,7 +651,7 @@ class TemporaryVariable(ArrayBase): """ storage_shape: Optional[ShapeType] - base_indices: Optional[Tuple[ExpressionT, ...]] + base_indices: Optional[Tuple[Expression, ...]] address_space: Union[AddressSpace, Type[auto]] base_storage: Optional[str] """The name of a storage array that is to be used to actually @@ -698,12 +698,12 @@ def __init__( shape: Union[ShapeType, Type["auto"], None] = auto, address_space: Union[AddressSpace, Type[auto], None] = None, dim_tags: Optional[Sequence[ArrayDimImplementationTag]] = None, - offset: Union[ExpressionT, str, None] = 0, + offset: Union[Expression, str, None] = 0, dim_names: Optional[Tuple[str, ...]] = None, - strides: Optional[Tuple[ExpressionT, ...]] = None, + strides: Optional[Tuple[Expression, ...]] = None, order: str | None = None, - base_indices: Optional[Tuple[ExpressionT, ...]] = None, + base_indices: Optional[Tuple[Expression, ...]] = None, storage_shape: ShapeType | None = None, base_storage: Optional[str] = None, @@ -809,7 +809,7 @@ def copy(self, **kwargs: Any) -> TemporaryVariable: return super().copy(**kwargs) @property - def nbytes(self) -> ExpressionT: + def nbytes(self) -> Expression: if self.storage_shape is not None: shape = self.storage_shape else: @@ -817,7 +817,7 @@ def nbytes(self) -> ExpressionT: raise ValueError("shape is None") if self.shape is auto: raise ValueError("shape is auto") - shape = cast(Tuple[ArithmeticExpressionT], self.shape) + shape = cast(Tuple[ArithmeticExpression], self.shape) if self.dtype is None: raise ValueError("data type is indeterminate") @@ -898,7 +898,7 @@ class SubstitutionRule: name: str arguments: Sequence[str] - expression: ExpressionT + expression: Expression def copy(self, **kwargs: Any) -> SubstitutionRule: return replace(self, **kwargs) diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 51d4856d..32cf664b 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -37,7 +37,7 @@ from loopy.diagnostic import LoopyError from loopy.tools import Optional as LoopyOptional from loopy.types import LoopyType -from loopy.typing import ExpressionT, InameStr +from loopy.typing import Expression, InameStr # {{{ instruction tags @@ -250,7 +250,7 @@ class InstructionBase(ImmutableRecord, Taggable): groups: FrozenSet[str] conflicts_with_groups: FrozenSet[str] no_sync_with: FrozenSet[Tuple[str, str]] - predicates: FrozenSet[ExpressionT] + predicates: FrozenSet[Expression] within_inames: FrozenSet[InameStr] within_inames_is_final: bool priority: int @@ -901,8 +901,8 @@ class Assignment(MultiAssignmentBase): .. automethod:: __init__ """ - assignee: ExpressionT - expression: ExpressionT + assignee: Expression + expression: Expression temp_var_type: LoopyOptional atomicity: Tuple[VarAtomicity, ...] @@ -910,8 +910,8 @@ class Assignment(MultiAssignmentBase): set("assignee temp_var_type atomicity".split()) def __init__(self, - assignee: Union[str, ExpressionT], - expression: Union[str, ExpressionT], + assignee: Union[str, Expression], + expression: Union[str, Expression], id: Optional[str] = None, happens_after: Union[ Mapping[str, HappensAfter], FrozenSet[str], str, None] = None, @@ -1271,8 +1271,8 @@ def modify_assignee_for_array_call(assignee): "SubArrayRef as its inputs") -def make_assignment(assignees: tuple[ExpressionT, ...], - expression: ExpressionT, +def make_assignment(assignees: tuple[Expression, ...], + expression: Expression, temp_var_types: ( Sequence[LoopyType | None] | None) = None, **kwargs: Any) -> Assignment | CallInstruction: @@ -1372,7 +1372,7 @@ class CInstruction(InstructionBase): .. attribute:: assignees A sequence (typically a :class:`tuple`) of variable references (with or - without subscript) as :class:`pymbolic.primitives.Expression` instances + without subscript) as :attr:`pymbolic.typing.Expression` instances that :attr:`code` writes to. This is optional and only used for figuring out dependencies. """ diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 3293e9a1..4dc824ea 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -68,7 +68,7 @@ # for the benefit of loopy.statistics, for now from loopy.type_inference import infer_unknown_types -from loopy.typing import ExpressionT +from loopy.typing import Expression # {{{ check for writes to predicates @@ -174,14 +174,14 @@ def make_arrays_for_sep_arrays(kernel: LoopKernel) -> LoopKernel: sep_axis_indices_set = frozenset(sep_axis_indices) assert isinstance(arg.shape, tuple) - new_shape: Optional[Tuple[ExpressionT, ...]] = \ + new_shape: Optional[Tuple[Expression, ...]] = \ _remove_at_indices(sep_axis_indices_set, arg.shape) new_dim_tags: Optional[Tuple[ArrayDimImplementationTag, ...]] = \ _remove_at_indices(sep_axis_indices_set, arg.dim_tags) new_dim_names: Optional[Tuple[Optional[str], ...]] = \ _remove_at_indices(sep_axis_indices_set, arg.dim_names) - sep_shape: List[ExpressionT] = [arg.shape[i] for i in sep_axis_indices] + sep_shape: List[Expression] = [arg.shape[i] for i in sep_axis_indices] for i, sep_shape_i in enumerate(sep_shape): if not isinstance(sep_shape_i, (int, np.integer)): raise LoopyError( diff --git a/loopy/statistics.py b/loopy/statistics.py index 99b163f8..94f82058 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -681,7 +681,7 @@ class MemAccess(ImmutableRecord): .. attribute:: lid_strides A :class:`dict` of **{** :class:`int` **:** - :class:`pymbolic.primitives.Expression` or :class:`int` **}** that + :attr:`~pymbolic.typing.Expression` or :class:`int` **}** that specifies local strides for each local id in the memory access index. Local ids not found will not be present in ``lid_strides.keys()``. Uniform access (i.e. work-items within a sub-group access the same @@ -692,7 +692,7 @@ class MemAccess(ImmutableRecord): .. attribute:: gid_strides A :class:`dict` of **{** :class:`int` **:** - :class:`pymbolic.primitives.Expression` or :class:`int` **}** that + :attr:`~pymbolic.typing.Expression` or :class:`int` **}** that specifies global strides for each global id in the memory access index. global ids not found will not be present in ``gid_strides.keys()``. diff --git a/loopy/symbolic.py b/loopy/symbolic.py index ad502e1a..1ef933ad 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -47,7 +47,7 @@ import pymbolic.primitives as p import pytools.lex from islpy import dim_type -from pymbolic import ArithmeticExpressionT, Variable +from pymbolic import ArithmeticExpression, Variable from pymbolic.mapper import ( CachedCombineMapper as CombineMapperBase, CachedIdentityMapper as IdentityMapperBase, @@ -81,7 +81,7 @@ UnableToDetermineAccessRangeError, ) from loopy.types import LoopyType, NumpyType, ToLoopyTypeConvertible -from loopy.typing import ExpressionT, auto +from loopy.typing import Expression, auto if TYPE_CHECKING: @@ -124,12 +124,20 @@ .. class:: Variable See :class:`pymbolic.Variable`. + +.. class:: Expression + + See :attr:`pymbolic.typing.Expression`. + +.. class:: _Expression + + See :class:`pymbolic.primitives.ExpressionNode`. """ # {{{ mappers with support for loopy-specific primitives -class IdentityMapperMixin(Mapper[ExpressionT, P]): +class IdentityMapperMixin(Mapper[Expression, P]): def map_literal(self, expr: Literal, *args, **kwargs): return expr @@ -206,7 +214,7 @@ def map_resolved_function(self, expr, *args, **kwargs): class FlattenMapper(FlattenMapperBase, IdentityMapperMixin): # FIXME: Lies! This needs to be made precise. - def is_expr_integer_valued(self, expr: ExpressionT) -> bool: + def is_expr_integer_valued(self, expr: Expression) -> bool: return True @@ -505,7 +513,7 @@ def map_substitution(self, name, rule, arguments): # {{{ loopy-specific primitives -class LoopyExpressionBase(p.Expression): +class LoopyExpressionBase(p.ExpressionNode): def stringifier(self): from loopy.diagnostic import LoopyError raise LoopyError("pymbolic < 2019.1 is in use. Please upgrade.") @@ -539,7 +547,7 @@ class ArrayLiteral(LoopyExpressionBase): similar mappers). Not for use in Loopy source representation. """ - children: tuple[ExpressionT, ...] + children: tuple[Expression, ...] @p.expr_dataclass() @@ -602,7 +610,7 @@ class TypeAnnotation(LoopyExpressionBase): """ type: LoopyType - child: ExpressionT + child: Expression @p.expr_dataclass(init=False) @@ -618,10 +626,10 @@ class TypeCast(LoopyExpressionBase): # numpy pickling bug madness. (see loopy.types) _type_name: str - child: ExpressionT + child: Expression """The expression to be cast.""" - def __init__(self, type: ToLoopyTypeConvertible, child: ExpressionT): + def __init__(self, type: ToLoopyTypeConvertible, child: Expression): super().__init__() from loopy.types import NumpyType, to_loopy_type @@ -700,11 +708,11 @@ class Reduction(LoopyExpressionBase): carried out. """ - expr: ExpressionT + expr: Expression """An expression which may have tuple type. If the expression has tuple type, it must be one of the following: - * a :class:`tuple` of :class:`pymbolic.primitives.Expression`, or + * a :class:`tuple` of :attr:`pymbolic.typing.Expression`, or * a :class:`loopy.symbolic.Reduction`, or * a function call or substitution rule invocation. """ @@ -718,7 +726,7 @@ def __init__(self, operation: ReductionOperation | str, inames: (tuple[str | pymbolic.primitives.Variable, ...] | pymbolic.primitives.Variable | str), - expr: ExpressionT, + expr: Expression, allow_simultaneous: bool = False ) -> None: if isinstance(inames, str): @@ -780,8 +788,8 @@ class LinearSubscript(LoopyExpressionBase): """Represents a linear index into a multi-dimensional array, completely ignoring any multi-dimensional layout. """ - aggregate: ExpressionT - index: ExpressionT + aggregate: Expression + index: Expression @p.expr_dataclass() @@ -966,11 +974,11 @@ def _get_dependencies_and_reduction_inames(expr): return deps, reduction_inames -def get_dependencies(expr: ExpressionT | type[auto]) -> AbstractSet[str]: +def get_dependencies(expr: Expression | type[auto]) -> AbstractSet[str]: return _get_dependencies_and_reduction_inames(expr)[0] -def get_reduction_inames(expr: ExpressionT) -> AbstractSet[str]: +def get_reduction_inames(expr: Expression) -> AbstractSet[str]: return _get_dependencies_and_reduction_inames(expr)[1] @@ -1255,9 +1263,9 @@ def map_call(self, expr, expn_state, *args, **kwargs): def make_new_arg_context( rule_name: str, arg_names: Sequence[str], - arguments: Sequence[ExpressionT], - arg_context: Mapping[str, ExpressionT] - ) -> Mapping[str, ExpressionT]: + arguments: Sequence[Expression], + arg_context: Mapping[str, Expression] + ) -> Mapping[str, Expression]: if len(arg_names) != len(arguments): raise RuntimeError("Rule '%s' invoked with %d arguments (needs %d)" % (rule_name, len(arguments), len(arg_names), )) @@ -1709,7 +1717,7 @@ def map_subscript(self, expr): # {{{ (pw)aff to expr conversion -def aff_to_expr(aff: isl.Aff) -> ArithmeticExpressionT: +def aff_to_expr(aff: isl.Aff) -> ArithmeticExpression: from pymbolic import var denom = aff.get_denominator_val().to_python() @@ -1730,7 +1738,7 @@ def aff_to_expr(aff: isl.Aff) -> ArithmeticExpressionT: return flatten(result // denom) -def pw_aff_to_expr(pw_aff: isl.PwAff, int_ok: bool = False) -> ExpressionT: +def pw_aff_to_expr(pw_aff: isl.PwAff, int_ok: bool = False) -> Expression: if isinstance(pw_aff, int): if not int_ok: warn("expected PwAff, got int", stacklevel=2) @@ -1844,7 +1852,7 @@ def map_call(self, expr): "for as-pwaff evaluation") -def aff_from_expr(space: isl.Space, expr: ExpressionT, vars_to_zero=None) -> isl.Aff: +def aff_from_expr(space: isl.Space, expr: Expression, vars_to_zero=None) -> isl.Aff: if vars_to_zero is None: vars_to_zero = frozenset() @@ -1985,7 +1993,7 @@ def simplify_using_aff(kernel, expr): """ Simplifies *expr* on *kernel*'s domain. - :arg expr: An instance of :class:`pymbolic.primitives.Expression`. + :arg expr: An instance of :attr:`pymbolic.typing.Expression`. """ deps = get_dependencies(expr) diff --git a/loopy/target/__init__.py b/loopy/target/__init__.py index 48ab04f8..b8ada88d 100644 --- a/loopy/target/__init__.py +++ b/loopy/target/__init__.py @@ -64,7 +64,7 @@ from loopy.codegen.result import CodeGenerationResult from loopy.target.execution import ExecutorBase from loopy.translation_unit import FunctionIdT, TranslationUnit - from loopy.typing import ExpressionT + from loopy.typing import Expression ASTType = TypeVar("ASTType") @@ -240,8 +240,8 @@ def get_temporary_decls(self, codegen_state: CodeGenerationState, def get_kernel_call(self, codegen_state: CodeGenerationState, subkernel_name: str, - gsize: Tuple[ExpressionT, ...], - lsize: Tuple[ExpressionT, ...]) -> Optional[ASTType]: + gsize: Tuple[Expression, ...], + lsize: Tuple[Expression, ...]) -> Optional[ASTType]: raise NotImplementedError() @property diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 9f227bd3..a2961eee 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -64,7 +64,7 @@ from loopy.tools import remove_common_indentation from loopy.translation_unit import FunctionIdT, TranslationUnit from loopy.types import LoopyType, NumpyType, to_loopy_type -from loopy.typing import ExpressionT, auto +from loopy.typing import Expression, auto __doc__ = """ @@ -880,8 +880,8 @@ def get_function_declaration( def get_kernel_call(self, codegen_state: CodeGenerationState, subkernel_name: str, - gsize: Tuple[ExpressionT, ...], - lsize: Tuple[ExpressionT, ...]) -> Optional[Generable]: + gsize: Tuple[Expression, ...], + lsize: Tuple[Expression, ...]) -> Optional[Generable]: return None def emit_temp_var_decl_for_tv_with_base_storage(self, diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index 9cde501a..6bc496f5 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -48,7 +48,7 @@ ) from loopy.translation_unit import TranslationUnit from loopy.types import LoopyType -from loopy.typing import ExpressionT +from loopy.typing import Expression logger = logging.getLogger(__name__) @@ -105,7 +105,7 @@ def handle_non_numpy_arg(self, gen, arg): def handle_alloc( self, gen: CodeGenerator, arg: ArrayArg, - strify: Callable[[Union[ExpressionT, Tuple[ExpressionT]]], str], + strify: Callable[[Union[Expression, Tuple[Expression]]], str], skip_arg_checks: bool) -> None: """ Handle allocation of non-specified arguments for C-execution diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 0c15faa5..e201326a 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -48,7 +48,7 @@ from loopy.target.c import CExpression from loopy.type_inference import TypeReader from loopy.types import LoopyType -from loopy.typing import ExpressionT, is_integer +from loopy.typing import Expression, is_integer __doc__ = """ @@ -92,7 +92,7 @@ def with_assignments(self, names_to_vars): type_inf_mapper = self.type_inf_mapper.with_assignments(names_to_vars) return type(self)(self.codegen_state, self.fortran_abi, type_inf_mapper) - def infer_type(self, expr: ExpressionT) -> LoopyType: + def infer_type(self, expr: Expression) -> LoopyType: result = self.type_inf_mapper(expr) assert isinstance(result, LoopyType) diff --git a/loopy/target/execution.py b/loopy/target/execution.py index 2443a142..eaeb76b4 100644 --- a/loopy/target/execution.py +++ b/loopy/target/execution.py @@ -58,7 +58,7 @@ from loopy.tools import LoopyKeyBuilder, caches from loopy.translation_unit import TranslationUnit from loopy.types import LoopyType, NumpyType -from loopy.typing import ExpressionT, integer_expr_or_err +from loopy.typing import Expression, integer_expr_or_err from loopy.version import DATA_MODEL_VERSION @@ -109,7 +109,7 @@ def __call__(self, kernel_kwargs: Dict[str, Any]) -> Dict[str, Any]: # {{{ ExecutionWrapperGeneratorBase -def _str_to_expr(name_or_expr: Union[str, ExpressionT]) -> ExpressionT: +def _str_to_expr(name_or_expr: Union[str, Expression]) -> Expression: if isinstance(name_or_expr, str): return var(name_or_expr) else: @@ -118,8 +118,8 @@ def _str_to_expr(name_or_expr: Union[str, ExpressionT]) -> ExpressionT: @dataclass(frozen=True) class _ArgFindingEquation: - lhs: ExpressionT - rhs: ExpressionT + lhs: Expression + rhs: Expression # Arg finding code is sorted by priority, all equations (across all unknowns) # of lowest priority first. @@ -389,7 +389,7 @@ def handle_non_numpy_arg(self, gen: CodeGenerator, arg): def handle_alloc( self, gen: CodeGenerator, arg: ArrayArg, - strify: Callable[[Union[ExpressionT, Tuple[ExpressionT]]], str], + strify: Callable[[Union[Expression, Tuple[Expression]]], str], skip_arg_checks: bool) -> None: """ Handle allocation of non-specified arguments for C-execution @@ -534,7 +534,7 @@ def strify_allowing_none(shape_axis): else: return strify(shape_axis) - def strify_tuple(t: Optional[Tuple[ExpressionT, ...]]) -> str: + def strify_tuple(t: Optional[Tuple[Expression, ...]]) -> str: if t is None: return "None" if len(t) == 0: diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 1cd7a5bd..4200a4b2 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -43,7 +43,7 @@ from loopy.target.c import CFamilyASTBuilder, CFamilyTarget from loopy.target.c.codegen.expression import ExpressionToCExpressionMapper from loopy.types import LoopyType -from loopy.typing import ExpressionT +from loopy.typing import Expression # {{{ expression mapper @@ -252,8 +252,8 @@ def get_function_declaration( def get_kernel_call(self, codegen_state: CodeGenerationState, subkernel_name: str, - gsize: Tuple[ExpressionT, ...], - lsize: Tuple[ExpressionT, ...]) -> Generable: + gsize: Tuple[Expression, ...], + lsize: Tuple[Expression, ...]) -> Generable: kernel = codegen_state.kernel ecm = self.get_expression_to_code_mapper(codegen_state) diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index e4da6cd8..fa7fd20e 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -67,7 +67,7 @@ from loopy.target.python import PythonASTBuilderBase from loopy.translation_unit import FunctionIdT, TranslationUnit from loopy.types import NumpyType -from loopy.typing import ExpressionT +from loopy.typing import Expression logger = logging.getLogger(__name__) @@ -855,7 +855,7 @@ def get_temporary_decls(self, codegen_state, schedule_index): def get_kernel_call( self, codegen_state: CodeGenerationState, subkernel_name: str, - gsize: Tuple[ExpressionT, ...], lsize: Tuple[ExpressionT, ...] + gsize: Tuple[Expression, ...], lsize: Tuple[Expression, ...] ) -> genpy.Suite: from genpy import Assert, Assign, Comment, Line, Suite diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index be859ab7..248f5f2e 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -37,7 +37,7 @@ from loopy.schedule.tools import KernelArgInfo from loopy.target.execution import ExecutionWrapperGeneratorBase, ExecutorBase from loopy.types import LoopyType -from loopy.typing import ExpressionT, integer_expr_or_err +from loopy.typing import Expression, integer_expr_or_err logger = logging.getLogger(__name__) @@ -109,7 +109,7 @@ def handle_non_numpy_arg(self, gen: CodeGenerator, arg: ArrayArg) -> None: def handle_alloc( self, gen: CodeGenerator, arg: ArrayArg, - strify: Callable[[ExpressionT], str], + strify: Callable[[Expression], str], skip_arg_checks: bool) -> None: """ Handle allocation of non-specified arguments for pyopencl execution diff --git a/loopy/transform/array_buffer_map.py b/loopy/transform/array_buffer_map.py index 81b5c933..5e8e5623 100644 --- a/loopy/transform/array_buffer_map.py +++ b/loopy/transform/array_buffer_map.py @@ -29,12 +29,12 @@ import islpy as isl from islpy import dim_type -from pymbolic import ArithmeticExpressionT, var +from pymbolic import ArithmeticExpression, var from pymbolic.mapper.substitutor import make_subst_func from pytools import memoize_method from loopy.symbolic import SubstitutionMapper, get_dependencies -from loopy.typing import ExpressionT +from loopy.typing import Expression @dataclass(frozen=True) @@ -47,7 +47,7 @@ class AccessDescriptor: """ identifier: Any = None - storage_axis_exprs: Optional[Sequence[ArithmeticExpressionT]] = None + storage_axis_exprs: Optional[Sequence[ArithmeticExpression]] = None def copy(self, **kwargs) -> Self: return replace(self, **kwargs) @@ -72,10 +72,10 @@ def to_parameters_or_project_out(param_inames, set_inames, set): # {{{ construct storage->sweep map def build_per_access_storage_to_domain_map( - storage_axis_exprs: Sequence[ExpressionT], + storage_axis_exprs: Sequence[Expression], domain: isl.BasicSet, storage_axis_names: Sequence[str], - prime_sweep_inames: Callable[[ExpressionT], ExpressionT] + prime_sweep_inames: Callable[[Expression], Expression] ) -> isl.BasicMap: map_space = domain.space @@ -204,9 +204,9 @@ def compute_bounds(kernel, domain, stor2sweep, class ArrayToBufferMapBase(ABC): non1_storage_axis_names: Tuple[str, ...] - storage_base_indices: Tuple[ArithmeticExpressionT, ...] - non1_storage_shape: Tuple[ArithmeticExpressionT, ...] - non1_storage_axis_flags: Tuple[ArithmeticExpressionT, ...] + storage_base_indices: Tuple[ArithmeticExpression, ...] + non1_storage_shape: Tuple[ArithmeticExpression, ...] + non1_storage_axis_flags: Tuple[ArithmeticExpression, ...] @abstractmethod def is_access_descriptor_in_footprint(self, accdesc: AccessDescriptor) -> bool: diff --git a/loopy/transform/data.py b/loopy/transform/data.py index c63604f8..73971786 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -36,7 +36,7 @@ from loopy.kernel.function_interface import CallableKernel, ScalarCallable from loopy.translation_unit import TranslationUnit, for_each_kernel from loopy.types import LoopyType -from loopy.typing import ExpressionT +from loopy.typing import Expression # {{{ convenience: add_prefetch @@ -984,11 +984,11 @@ def add_padding_to_avoid_bank_conflicts(kernel, device): @dataclass(frozen=True) class _BaseStorageInfo: name: str - next_offset: ExpressionT + next_offset: Expression approx_nbytes: Optional[int] = None -def _sym_max(a: ExpressionT, b: ExpressionT) -> ExpressionT: +def _sym_max(a: Expression, b: Expression) -> Expression: from numbers import Number if isinstance(a, Number) and isinstance(b, Number): return max(a, b) diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index b0fbb546..0982c43f 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -28,7 +28,7 @@ from immutables import Map import islpy as isl -from pymbolic import ArithmeticExpressionT, var +from pymbolic import ArithmeticExpression, var from pymbolic.mapper.substitutor import make_subst_func from pytools import memoize_on_first_arg from pytools.tag import Tag @@ -60,7 +60,7 @@ from loopy.translation_unit import CallablesTable, TranslationUnit from loopy.types import LoopyType, ToLoopyTypeConvertible, to_loopy_type from loopy.typing import ( - ExpressionT, + Expression, auto, integer_expr_or_err, integer_or_err, @@ -133,14 +133,14 @@ def contains_a_subst_rule_invocation(kernel, insn): @dataclass(frozen=True) class RuleAccessDescriptor(AccessDescriptor): - args: Optional[Sequence[ArithmeticExpressionT]] = None + args: Optional[Sequence[ArithmeticExpression]] = None def access_descriptor_id(args, expansion_stack): return (args, expansion_stack) -def storage_axis_exprs(storage_axis_sources, args) -> Sequence[ExpressionT]: +def storage_axis_exprs(storage_axis_sources, args) -> Sequence[Expression]: result = [] for saxis_source in storage_axis_sources: @@ -577,9 +577,9 @@ def precompute_for_single_kernel( for fpg in footprint_generators: if isinstance(fpg, Variable): - args: tuple[ArithmeticExpressionT, ...] = () + args: tuple[ArithmeticExpression, ...] = () elif isinstance(fpg, Call): - args = cast(tuple[ArithmeticExpressionT, ...], fpg.parameters) + args = cast(tuple[ArithmeticExpression, ...], fpg.parameters) else: raise ValueError("footprint generator must " "be substitution rule invocation") diff --git a/loopy/transform/realize_reduction.py b/loopy/transform/realize_reduction.py index 7d1f3c87..e981ad4b 100644 --- a/loopy/transform/realize_reduction.py +++ b/loopy/transform/realize_reduction.py @@ -34,7 +34,7 @@ from immutables import Map import islpy as isl -from pymbolic.primitives import Expression +from pymbolic.primitives import ExpressionNode from pytools import memoize_on_first_arg from pytools.tag import Tag @@ -103,7 +103,7 @@ class _ReductionRealizationContext: surrounding_within_inames: FrozenSet[str] surrounding_depends_on: FrozenSet[str] surrounding_no_sync_with: FrozenSet[Tuple[str, str]] - surrounding_predicates: FrozenSet[Expression] + surrounding_predicates: FrozenSet[ExpressionNode] # }}} diff --git a/loopy/typing.py b/loopy/typing.py index 7cc7209b..1bf5150d 100644 --- a/loopy/typing.py +++ b/loopy/typing.py @@ -1,5 +1,5 @@ """ -.. autoclass:: ExpressionT +.. autoclass:: Expression .. autoclass:: ShapeType .. autoclass:: auto """ @@ -36,13 +36,13 @@ import numpy as np from typing_extensions import TypeAlias, TypeIs -from pymbolic.primitives import Expression -from pymbolic.typing import ArithmeticExpressionT, ExpressionT, IntegerT +from pymbolic.primitives import ExpressionNode +from pymbolic.typing import ArithmeticExpression, Expression, Integer # The Fortran parser may insert dimensions of 'None', but I'd like to phase # that out, so we're not encoding that in the type. -ShapeType: TypeAlias = Tuple[ArithmeticExpressionT, ...] +ShapeType: TypeAlias = Tuple[ArithmeticExpression, ...] StridesType: TypeAlias = ShapeType InameStr: TypeAlias = str @@ -67,15 +67,15 @@ def is_integer(obj: object) -> TypeIs[int | np.integer]: return isinstance(obj, (int, np.integer)) -def integer_or_err(expr: ExpressionT) -> IntegerT: +def integer_or_err(expr: Expression) -> Integer: if isinstance(expr, (int, np.integer)): return expr else: raise ValueError(f"expected integer, got {type(expr)}") -def integer_expr_or_err(expr: ExpressionT) -> IntegerT | Expression: - if isinstance(expr, (int, np.integer, Expression)): +def integer_expr_or_err(expr: Expression) -> Integer | ExpressionNode: + if isinstance(expr, (int, np.integer, ExpressionNode)): return expr else: raise ValueError(f"expected integer or expression, got {type(expr)}") diff --git a/pyproject.toml b/pyproject.toml index 3204163f..57b6ba44 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ classifiers = [ ] dependencies = [ "pytools>=2024.1.5", - "pymbolic>=2024.1", + "pymbolic>=2024.2", "genpy>=2016.1.2", # https://github.com/inducer/loopy/pull/419 From 37da494b559145b651a156c742221aeaa7682bd3 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 22 Nov 2024 13:59:14 -0600 Subject: [PATCH 12/24] More deprecation fixes for pymbolic 2024.2 --- doc/tutorial.rst | 4 ++-- loopy/check.py | 11 ++++++----- loopy/kernel/array.py | 2 +- loopy/kernel/data.py | 4 ++-- loopy/kernel/instruction.py | 2 +- loopy/preprocess.py | 10 +++++----- loopy/statistics.py | 4 ++-- loopy/symbolic.py | 8 ++++---- loopy/transform/data.py | 6 +++--- loopy/transform/pack_and_unpack_args.py | 6 +++--- loopy/transform/precompute.py | 10 +++++----- 11 files changed, 34 insertions(+), 33 deletions(-) diff --git a/doc/tutorial.rst b/doc/tutorial.rst index dad7c171..a791bc85 100644 --- a/doc/tutorial.rst +++ b/doc/tutorial.rst @@ -1681,7 +1681,7 @@ Each line of output will look roughly like:: data type accessed. - lid_strides: A :class:`dict` of **{** :class:`int` **:** - :attr:`~pymbolic.typing.Expression` or :class:`int` **}** that specifies + :data:`~pymbolic.typing.Expression` or :class:`int` **}** that specifies local strides for each local id in the memory access index. Local ids not found will not be present in ``lid_strides.keys()``. Uniform access (i.e. work-items within a sub-group access the same item) is indicated by setting @@ -1689,7 +1689,7 @@ Each line of output will look roughly like:: which case the 0 key will not be present in lid_strides. - gid_strides: A :class:`dict` of **{** :class:`int` **:** - :attr:`~pymbolic.typing.Expression` or :class:`int` **}** that specifies + :data:`~pymbolic.typing.Expression` or :class:`int` **}** that specifies global strides for each global id in the memory access index. Global ids not found will not be present in ``gid_strides.keys()``. diff --git a/loopy/check.py b/loopy/check.py index f355e99c..5e489723 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -213,7 +213,8 @@ def check_separated_array_consistency(kernel: LoopKernel) -> None: @check_each_kernel def check_offsets_and_dim_tags(kernel: LoopKernel) -> None: - from pymbolic.primitives import Expression, Variable + from pymbolic.primitives import ExpressionNode, Variable + from pymbolic.typing import Expression from loopy.symbolic import DependencyMapper @@ -241,7 +242,7 @@ def ensure_depends_only_on_arguments( continue if arg.offset is auto: pass - elif isinstance(arg.offset, (int, np.integer, Expression, str)): + elif isinstance(arg.offset, (int, np.integer, ExpressionNode, str)): ensure_depends_only_on_arguments(what, arg.offset) else: @@ -259,7 +260,7 @@ def ensure_depends_only_on_arguments( if dim_tag.stride is auto: pass elif isinstance( - dim_tag.stride, (int, np.integer, Expression)): + dim_tag.stride, (int, np.integer, ExpressionNode)): ensure_depends_only_on_arguments(what, dim_tag.stride) else: raise LoopyError(f"invalid value of {what}") @@ -281,7 +282,7 @@ def ensure_depends_only_on_arguments( pass if tv.offset is auto: pass - elif isinstance(tv.offset, (int, np.integer, Expression, str)): + elif isinstance(tv.offset, (int, np.integer, ExpressionNode, str)): ensure_depends_only_on_arguments(what, tv.offset) else: raise LoopyError(f"invalid value of offset for '{tv.name}'") @@ -294,7 +295,7 @@ def ensure_depends_only_on_arguments( if dim_tag.stride is auto: raise LoopyError(f"The {what}" f" is 'auto', " "which is not allowed.") - elif isinstance(dim_tag.stride, (int, np.integer, Expression)): + elif isinstance(dim_tag.stride, (int, np.integer, ExpressionNode)): ensure_depends_only_on_arguments(what, dim_tag.stride) else: raise LoopyError(f"invalid value of {what}") diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index 64a9b857..fa5ae6b1 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -146,7 +146,7 @@ class FixedStrideArrayDimTag(_StrideArrayDimTagBase): May be one of the following: - - A :attr:`~pymbolic.typing.Expression`, including an + - A :data:`~pymbolic.typing.Expression`, including an integer, indicating the stride in units of the underlying array's :attr:`ArrayBase.dtype`. diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 8ca5aa87..9761a294 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -109,10 +109,10 @@ def _names_from_expr(expr: Union[None, Expression, str]) -> FrozenSet[str]: from loopy.symbolic import DependencyMapper dep_mapper = DependencyMapper() - from pymbolic.primitives import Expression + from pymbolic.primitives import ExpressionNode if isinstance(expr, str): return frozenset({expr}) - elif isinstance(expr, Expression): + elif isinstance(expr, ExpressionNode): return frozenset(cast(Variable, v).name for v in dep_mapper(expr)) elif expr is None: return frozenset() diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index 32cf664b..d6517adc 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -1372,7 +1372,7 @@ class CInstruction(InstructionBase): .. attribute:: assignees A sequence (typically a :class:`tuple`) of variable references (with or - without subscript) as :attr:`pymbolic.typing.Expression` instances + without subscript) as :data:`pymbolic.typing.Expression` instances that :attr:`code` writes to. This is optional and only used for figuring out dependencies. """ diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 4dc824ea..7eeae715 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -224,7 +224,7 @@ def make_args_for_offsets_and_strides(kernel: LoopKernel) -> LoopKernel: vng = kernel.get_var_name_generator() - from pymbolic.primitives import Expression, Variable + from pymbolic.primitives import ExpressionNode, Variable from loopy.kernel.array import FixedStrideArrayDimTag @@ -241,7 +241,7 @@ def make_args_for_offsets_and_strides(kernel: LoopKernel) -> LoopKernel: additional_args.append(ValueArg( offset_name, kernel.index_dtype)) arg = arg.copy(offset=offset_name) - elif isinstance(arg.offset, (int, np.integer, Expression, str)): + elif isinstance(arg.offset, (int, np.integer, ExpressionNode, str)): pass else: raise LoopyError(f"invalid value of {what}") @@ -261,7 +261,7 @@ def make_args_for_offsets_and_strides(kernel: LoopKernel) -> LoopKernel: additional_args.append(ValueArg( stride_name, kernel.index_dtype)) elif isinstance( - dim_tag.stride, (int, np.integer, Expression)): + dim_tag.stride, (int, np.integer, ExpressionNode)): pass else: raise LoopyError(f"invalid value of {what}") @@ -286,7 +286,7 @@ def make_args_for_offsets_and_strides(kernel: LoopKernel) -> LoopKernel: def zero_offsets_and_strides(kernel: LoopKernel) -> LoopKernel: made_changes = False - from pymbolic.primitives import Expression + from pymbolic.primitives import ExpressionNode # {{{ process arguments @@ -298,7 +298,7 @@ def zero_offsets_and_strides(kernel: LoopKernel) -> LoopKernel: if arg.offset is auto: made_changes = True arg = arg.copy(offset=0) - elif isinstance(arg.offset, (int, np.integer, Expression, str)): + elif isinstance(arg.offset, (int, np.integer, ExpressionNode, str)): from pymbolic.primitives import is_zero if not is_zero(arg.offset): raise LoopyError( diff --git a/loopy/statistics.py b/loopy/statistics.py index 94f82058..fd697bc4 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -681,7 +681,7 @@ class MemAccess(ImmutableRecord): .. attribute:: lid_strides A :class:`dict` of **{** :class:`int` **:** - :attr:`~pymbolic.typing.Expression` or :class:`int` **}** that + :data:`~pymbolic.typing.Expression` or :class:`int` **}** that specifies local strides for each local id in the memory access index. Local ids not found will not be present in ``lid_strides.keys()``. Uniform access (i.e. work-items within a sub-group access the same @@ -692,7 +692,7 @@ class MemAccess(ImmutableRecord): .. attribute:: gid_strides A :class:`dict` of **{** :class:`int` **:** - :attr:`~pymbolic.typing.Expression` or :class:`int` **}** that + :data:`~pymbolic.typing.Expression` or :class:`int` **}** that specifies global strides for each global id in the memory access index. global ids not found will not be present in ``gid_strides.keys()``. diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 1ef933ad..d30581db 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -127,7 +127,7 @@ .. class:: Expression - See :attr:`pymbolic.typing.Expression`. + See :data:`pymbolic.typing.Expression`. .. class:: _Expression @@ -712,7 +712,7 @@ class Reduction(LoopyExpressionBase): """An expression which may have tuple type. If the expression has tuple type, it must be one of the following: - * a :class:`tuple` of :attr:`pymbolic.typing.Expression`, or + * a :class:`tuple` of :data:`pymbolic.typing.Expression`, or * a :class:`loopy.symbolic.Reduction`, or * a function call or substitution rule invocation. """ @@ -1993,7 +1993,7 @@ def simplify_using_aff(kernel, expr): """ Simplifies *expr* on *kernel*'s domain. - :arg expr: An instance of :attr:`pymbolic.typing.Expression`. + :arg expr: An instance of :data:`pymbolic.typing.Expression`. """ deps = get_dependencies(expr) @@ -2707,7 +2707,7 @@ def is_expression_equal(a, b): if a == b: return True - if isinstance(a, p.Expression) or isinstance(b, p.Expression): + if isinstance(a, p.ExpressionNode) or isinstance(b, p.ExpressionNode): if a is None or b is None: return False diff --git a/loopy/transform/data.py b/loopy/transform/data.py index 73971786..2e19eea7 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -229,10 +229,10 @@ def add_prefetch_for_single_kernel(kernel, callables_table, var_name, from pymbolic import var uni_template = parsed_var_name if len(parameters) > 1: - uni_template = uni_template.index( - tuple(var(par_name) for par_name in parameters)) + uni_template = uni_template[ + tuple(var(par_name) for par_name in parameters)] elif len(parameters) == 1: - uni_template = uni_template.index(var(parameters[0])) + uni_template = uni_template[var(parameters[0])] # }}} diff --git a/loopy/transform/pack_and_unpack_args.py b/loopy/transform/pack_and_unpack_args.py index 2a82952c..ae5339b5 100644 --- a/loopy/transform/pack_and_unpack_args.py +++ b/loopy/transform/pack_and_unpack_args.py @@ -222,9 +222,9 @@ def pack_and_unpack_args_for_call_for_single_kernel(kernel, new_indices = tuple(simplify_via_aff(i) for i in new_indices) pack_lhs_assignee = pack_subst_mapper( - var(pack_name).index(new_indices)) + var(pack_name)[new_indices]) unpack_rhs = unpack_subst_mapper( - var(pack_name).index(new_indices)) + var(pack_name)[new_indices]) # }}} @@ -272,7 +272,7 @@ def pack_and_unpack_args_for_call_for_single_kernel(kernel, new_id_to_parameters[arg_id] = SubArrayRef( tuple(updated_swept_inames), - (var(pack_name).index(tuple(updated_swept_inames)))) + (var(pack_name)[tuple(updated_swept_inames)])) else: new_id_to_parameters[arg_id] = p diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 0982c43f..147b6265 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -101,9 +101,9 @@ def _get_called_names(insn): assert isinstance(insn, MultiAssignmentBase) from functools import reduce - from pymbolic.primitives import Expression + from pymbolic.primitives import ExpressionNode return ((_get_calls_in_expr(insn.expression) - if isinstance(insn.expression, Expression) + if isinstance(insn.expression, ExpressionNode) else frozenset()) # indices of assignees might call the subst rules | reduce(frozenset.union, @@ -113,7 +113,7 @@ def _get_called_names(insn): | reduce(frozenset.union, (_get_calls_in_expr(pred) for pred in insn.predicates - if isinstance(pred, Expression)), + if isinstance(pred, ExpressionNode)), frozenset()) ) @@ -922,8 +922,8 @@ def add_assumptions(d): # should. if _enable_mirgecom_workaround: - from pymbolic.primitives import Expression - if is_length_1 and not isinstance(base_index, Expression): + from pymbolic.primitives import ExpressionNode + if is_length_1 and not isinstance(base_index, ExpressionNode): # I.e. base_index is an integer. from pytools import is_single_valued if is_single_valued( From f53ff4c69d8daff4393779a114f1c4cb133c7a76 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 22 Nov 2024 14:03:24 -0600 Subject: [PATCH 13/24] Switch to building with hatchling --- MANIFEST.in | 23 ----------------------- pyproject.toml | 26 ++++++++++++-------------- 2 files changed, 12 insertions(+), 37 deletions(-) delete mode 100644 MANIFEST.in diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index a87cfef7..00000000 --- a/MANIFEST.in +++ /dev/null @@ -1,23 +0,0 @@ -include test/*.py -include test/*.f90 -recursive-include examples *.py *.cl *.floopy *.sh *.ipynb *.cpp *.loopy -recursive-include contrib *.vim *.py - -include build-helpers/*.sh -include build-helpers/*.spec - -include doc/*.rst -include doc/Makefile -include doc/*.py -include doc/images/*.png -include doc/_static/*.css -include doc/_templates/*.html -include doc/images/*.svg -include doc/images/*.png - -include configure.py -include Makefile.in -include README.rst -include LICENSE -include CITATION.cff -include requirements*.txt diff --git a/pyproject.toml b/pyproject.toml index 57b6ba44..c23c2973 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,6 @@ [build-system] -build-backend = "setuptools.build_meta" -requires = [ - "setuptools>=63", -] +requires = ["hatchling"] +build-backend = "hatchling.build" [project] name = "loopy" @@ -62,22 +60,22 @@ fortran = [ "ply>=3.6", ] +[tool.hatch.metadata] +allow-direct-references = true + +[tool.hatch.build.targets.sdist] +exclude = [ + "/.git*", + "/doc/_build", + "/run-*.sh", +] + [project.scripts] [project.urls] Documentation = "https://documen.tician.de/loopy" Homepage = "https://github.com/inducer/loopy" -[tool.setuptools.packages.find] -include = [ - "loopy*", -] - -[tool.setuptools.package-data] -loopy = [ - "py.typed", -] - [tool.setuptools.package-dir] # https://github.com/Infleqtion/client-superstaq/pull/715 "" = "." From 9c171287e179c18ef54b665a54981693a63e3994 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 22 Nov 2024 14:19:27 -0600 Subject: [PATCH 14/24] Switch version regex to raw, to preemptively placate ruff --- loopy/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loopy/version.py b/loopy/version.py index 609e6c17..2ed932d5 100644 --- a/loopy/version.py +++ b/loopy/version.py @@ -26,7 +26,7 @@ VERSION_TEXT = metadata.version("loopy") -_match = re.match("^([0-9.]+)([a-z0-9]*?)$", VERSION_TEXT) +_match = re.match(r"^([0-9.]+)([a-z0-9]*?)$", VERSION_TEXT) assert _match is not None VERSION_STATUS = _match.group(2) VERSION = tuple(int(nr) for nr in _match.group(1).split(".")) From 70cc100753981e80aac60f93aa5c2ed62230a1e7 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Fri, 22 Nov 2024 16:15:51 -0600 Subject: [PATCH 15/24] Enable, fix RUF lint rules, a bit more dataclass conversion --- contrib/c-integer-semantics.py | 4 +- contrib/mem-pattern-explorer/pattern_vis.py | 6 +- doc/conf.py | 2 +- examples/python/ispc-stream-harness.py | 2 +- loopy/__init__.py | 4 +- loopy/auto_test.py | 4 +- loopy/check.py | 4 +- loopy/codegen/__init__.py | 6 +- loopy/codegen/control.py | 2 +- loopy/codegen/instruction.py | 2 +- loopy/codegen/result.py | 4 +- loopy/frontend/fortran/__init__.py | 2 +- loopy/frontend/fortran/expression.py | 7 +- loopy/frontend/fortran/translator.py | 11 +- loopy/frontend/fortran/tree.py | 2 +- loopy/kernel/__init__.py | 7 +- loopy/kernel/array.py | 4 +- loopy/kernel/creation.py | 16 +- loopy/kernel/data.py | 4 +- loopy/kernel/function_interface.py | 258 ++++++++++---------- loopy/kernel/instruction.py | 22 +- loopy/library/random123.py | 43 ++-- loopy/library/reduction.py | 11 +- loopy/options.py | 7 +- loopy/preprocess.py | 4 +- loopy/schedule/__init__.py | 25 +- loopy/schedule/device_mapping.py | 30 +-- loopy/schedule/tree.py | 11 +- loopy/statistics.py | 28 ++- loopy/symbolic.py | 14 +- loopy/target/c/__init__.py | 27 +- loopy/target/c/c_execution.py | 8 +- loopy/target/c/codegen/expression.py | 2 +- loopy/target/cuda.py | 3 +- loopy/target/ispc.py | 4 +- loopy/target/opencl.py | 7 +- loopy/target/pyopencl.py | 23 +- loopy/target/pyopencl_execution.py | 5 +- loopy/target/python.py | 6 +- loopy/tools.py | 25 +- loopy/transform/add_barrier.py | 2 +- loopy/transform/batch.py | 10 +- loopy/transform/data.py | 2 +- loopy/transform/iname.py | 16 +- loopy/transform/pack_and_unpack_args.py | 6 +- loopy/transform/precompute.py | 6 +- loopy/transform/realize_reduction.py | 41 ++-- loopy/transform/save.py | 4 +- loopy/transform/subst.py | 2 +- loopy/translation_unit.py | 9 +- loopy/types.py | 4 +- proto-tests/test_fem_assembly.py | 4 +- proto-tests/test_sem.py | 10 +- proto-tests/test_sem_tim.py | 10 +- proto-tests/test_tim.py | 10 +- pyproject.toml | 4 +- test/test_apps.py | 2 +- test/test_callables.py | 2 +- test/test_dg.py | 2 +- test/test_diff.py | 4 +- test/test_domain.py | 6 +- test/test_expression.py | 4 +- test/test_fortran.py | 4 +- test/test_isl.py | 2 +- test/test_loopy.py | 8 +- test/test_reduction.py | 4 +- test/test_scan.py | 6 +- test/test_sem_reagan.py | 2 +- test/test_split_iname_slabs.py | 2 +- test/test_statistics.py | 4 +- test/test_target.py | 4 +- test/test_transform.py | 18 +- 72 files changed, 431 insertions(+), 439 deletions(-) diff --git a/contrib/c-integer-semantics.py b/contrib/c-integer-semantics.py index 8556430d..62a980f0 100644 --- a/contrib/c-integer-semantics.py +++ b/contrib/c-integer-semantics.py @@ -95,8 +95,8 @@ def main(): func.argtypes = [ctypes.c_longlong, ctypes.c_longlong] func.restype = ctypes.c_longlong - cdiv = int_exp.cdiv # noqa - cmod = int_exp.cmod # noqa + cdiv = int_exp.cdiv + cmod = int_exp.cmod int_floor_div = int_exp.loopy_floor_div_int64 int_floor_div_pos_b = int_exp.loopy_floor_div_pos_b_int64 int_mod_pos_b = int_exp.loopy_mod_pos_b_int64 diff --git a/contrib/mem-pattern-explorer/pattern_vis.py b/contrib/mem-pattern-explorer/pattern_vis.py index bbde2317..f285dbb8 100644 --- a/contrib/mem-pattern-explorer/pattern_vis.py +++ b/contrib/mem-pattern-explorer/pattern_vis.py @@ -27,7 +27,7 @@ def __init__(self, gsize, lsize, subgroup_size=32, decay_constant=0.75): self.arrays = [] - def l(self, index): # noqa: E741,E743 + def l(self, index): # noqa: E743 subscript = [np.newaxis] * self.ind_length subscript[len(self.gsize) + index] = slice(None) @@ -147,7 +147,7 @@ def get_plot_data(self): div_ceil(nelements, self.elements_per_row), self.elements_per_row,) shaped_array = np.zeros( - base_shape + (self.nattributes,), + (*base_shape, self.nattributes), dtype=np.float32) shaped_array.reshape(-1, self.nattributes)[:nelements] = self.array @@ -160,7 +160,7 @@ def get_plot_data(self): else: subgroup.fill(1) - rgb_array = np.zeros(base_shape + (3,)) + rgb_array = np.zeros((*base_shape, 3)) if 1: if len(self.ctx.gsize) > 1: # g.0 -> red diff --git a/doc/conf.py b/doc/conf.py index 951b0221..70f7121b 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -2,7 +2,7 @@ from urllib.request import urlopen -_conf_url = "https://raw.githubusercontent.com/inducer/sphinxconfig/main/sphinxconfig.py" # noqa +_conf_url = "https://raw.githubusercontent.com/inducer/sphinxconfig/main/sphinxconfig.py" with urlopen(_conf_url) as _inf: exec(compile(_inf.read(), _conf_url, "exec"), globals()) diff --git a/examples/python/ispc-stream-harness.py b/examples/python/ispc-stream-harness.py index bf6e29e4..ce61b16b 100644 --- a/examples/python/ispc-stream-harness.py +++ b/examples/python/ispc-stream-harness.py @@ -26,7 +26,7 @@ def transform(knl, vars, stream_dtype): knl = lp.add_and_infer_dtypes(knl, dict.fromkeys(vars, stream_dtype)) - knl = lp.set_argument_order(knl, vars + ["n"]) + knl = lp.set_argument_order(knl, [*vars, "n"]) return knl diff --git a/loopy/__init__.py b/loopy/__init__.py index 07f06a02..01d69cf1 100644 --- a/loopy/__init__.py +++ b/loopy/__init__.py @@ -465,7 +465,7 @@ def register_preamble_generators(kernel: LoopKernel, preamble_generators): "and would thus disrupt loopy's caches" % pgen) - new_pgens = (pgen,) + new_pgens + new_pgens = (pgen, *new_pgens) return kernel.copy(preamble_generators=new_pgens) @@ -483,7 +483,7 @@ def register_symbol_manglers(kernel, manglers): "and would disrupt loopy's caches" % m) - new_manglers = (m,) + new_manglers + new_manglers = (m, *new_manglers) return kernel.copy(symbol_manglers=new_manglers) diff --git a/loopy/auto_test.py b/loopy/auto_test.py index 6ee76255..5b411658 100644 --- a/loopy/auto_test.py +++ b/loopy/auto_test.py @@ -410,12 +410,12 @@ def auto_test_vs_ref( if ref_entrypoint is None: if len(ref_prog.entrypoints) != 1: raise LoopyError("Unable to guess entrypoint for ref_prog.") - ref_entrypoint = list(ref_prog.entrypoints)[0] + ref_entrypoint = next(iter(ref_prog.entrypoints)) if test_entrypoint is None: if len(test_prog.entrypoints) != 1: raise LoopyError("Unable to guess entrypoint for ref_prog.") - test_entrypoint = list(test_prog.entrypoints)[0] + test_entrypoint = next(iter(test_prog.entrypoints)) ref_prog = lp.preprocess_kernel(ref_prog) test_prog = lp.preprocess_kernel(test_prog) diff --git a/loopy/check.py b/loopy/check.py index 5e489723..f96123de 100644 --- a/loopy/check.py +++ b/loopy/check.py @@ -206,7 +206,7 @@ def check_separated_array_consistency(kernel: LoopKernel) -> None: for attr_name in ["address_space", "is_input", "is_output"]: if getattr(arg, attr_name) != getattr(sub_arg, attr_name): raise LoopyError( - "Attribute '{attr_name}' of " + f"Attribute '{attr_name}' of " f"'{arg.name}' and associated sep array " f"'{sub_arg.name}' is not consistent.") @@ -266,7 +266,7 @@ def ensure_depends_only_on_arguments( raise LoopyError(f"invalid value of {what}") assert new_dim_tags is not None - new_dim_tags = new_dim_tags + (dim_tag,) + new_dim_tags = (*new_dim_tags, dim_tag) arg = arg.copy(dim_tags=new_dim_tags) diff --git a/loopy/codegen/__init__.py b/loopy/codegen/__init__.py index d460dd54..e9c19b30 100644 --- a/loopy/codegen/__init__.py +++ b/loopy/codegen/__init__.py @@ -67,8 +67,8 @@ if getattr(sys, "_BUILDING_SPHINX_DOCS", False): - from loopy.codegen.result import GeneratedProgram # noqa: F811 - from loopy.codegen.tools import CodegenOperationCacheManager # noqa: F811 + from loopy.codegen.result import GeneratedProgram + from loopy.codegen.tools import CodegenOperationCacheManager __doc__ = """ @@ -666,7 +666,7 @@ def generate_code_v2(t_unit: TranslationUnit) -> CodeGenerationResult: # adding the callee fdecls to the device_programs device_programs = ([device_programs[0].copy( ast=t_unit.target.get_device_ast_builder().ast_module.Collection( - callee_fdecls+[device_programs[0].ast]))] + + [*callee_fdecls, device_programs[0].ast]))] + device_programs[1:]) def not_reduction_op(name: str | ReductionOpFunction) -> str: diff --git a/loopy/codegen/control.py b/loopy/codegen/control.py index bee09229..26e1b8f3 100644 --- a/loopy/codegen/control.py +++ b/loopy/codegen/control.py @@ -470,7 +470,7 @@ def gen_code(inner_codegen_state): prev_gen_code = gen_code - def gen_code(inner_codegen_state): # noqa pylint:disable=function-redefined + def gen_code(inner_codegen_state): # pylint: disable=function-redefined condition_exprs = [ constraint_to_cond_expr(cns) for cns in bounds_checks] + list(pred_checks) diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index 1bc26733..84dedc3e 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -191,7 +191,7 @@ def generate_assignment_instruction_code(codegen_state, insn): from pymbolic.mapper.stringifier import PREC_NONE lhs_code = codegen_state.expression_to_code_mapper(insn.assignee, PREC_NONE) - from cgen import Statement as S # noqa + from cgen import Statement as S gs, ls = kernel.get_grid_size_upper_bounds(codegen_state.callables_table) diff --git a/loopy/codegen/result.py b/loopy/codegen/result.py index 7fcb4294..0f534592 100644 --- a/loopy/codegen/result.py +++ b/loopy/codegen/result.py @@ -210,9 +210,7 @@ def with_new_program(self, codegen_state, program): assert program.is_device_program return self.copy( device_programs=( - list(self.device_programs[:-1]) - + - [program])) + [*list(self.device_programs[:-1]), program])) else: assert program.name == codegen_state.gen_program_name assert not program.is_device_program diff --git a/loopy/frontend/fortran/__init__.py b/loopy/frontend/fortran/__init__.py index 5e6ff24d..29986ddd 100644 --- a/loopy/frontend/fortran/__init__.py +++ b/loopy/frontend/fortran/__init__.py @@ -225,7 +225,7 @@ def parse_transformed_fortran(source, free_form=True, strict=True, prev_sys_path = sys.path try: if infile_dirname: - sys.path = prev_sys_path + [infile_dirname] + sys.path = [*prev_sys_path, infile_dirname] if pre_transform_code is not None: proc_dict["_MODULE_SOURCE_CODE"] = pre_transform_code diff --git a/loopy/frontend/fortran/expression.py b/loopy/frontend/fortran/expression.py index bb839451..7b3200a9 100644 --- a/loopy/frontend/fortran/expression.py +++ b/loopy/frontend/fortran/expression.py @@ -21,7 +21,9 @@ """ import re +from collections.abc import Mapping from sys import intern +from typing import ClassVar import numpy as np @@ -29,6 +31,7 @@ from pymbolic.parser import Parser as ExpressionParserBase from loopy.frontend.fortran.diagnostic import TranslationError +from loopy.symbolic import LexTable _less_than = intern("less_than") @@ -65,7 +68,7 @@ def tuple_to_complex_literal(expr): # {{{ expression parser class FortranExpressionParser(ExpressionParserBase): - lex_table = [ + lex_table: ClassVar[LexTable] = [ (_less_than, pytools.lex.RE(r"\.lt\.", re.I)), (_greater_than, pytools.lex.RE(r"\.gt\.", re.I)), (_less_equal, pytools.lex.RE(r"\.le\.", re.I)), @@ -142,7 +145,7 @@ def parse_terminal(self, pstate): return ExpressionParserBase.parse_terminal( self, pstate) - COMP_MAP = { + COMP_MAP: ClassVar[Mapping[str, str]] = { _less_than: "<", _less_equal: "<=", _greater_than: ">", diff --git a/loopy/frontend/fortran/translator.py b/loopy/frontend/fortran/translator.py index fc9eace8..860ed723 100644 --- a/loopy/frontend/fortran/translator.py +++ b/loopy/frontend/fortran/translator.py @@ -22,6 +22,7 @@ import re from sys import intern +from typing import ClassVar from warnings import warn import numpy as np @@ -53,7 +54,7 @@ def __init__(self, scope): super().__init__() def get_cache_key(self, expr): - return super().get_cache_key(expr) + (self.scope,) + return (*super().get_cache_key(expr), self.scope) def map_subscript(self, expr): from pymbolic.primitives import Variable @@ -441,7 +442,7 @@ def map_Implicit(self, node): def map_Equivalence(self, node): raise NotImplementedError("equivalence") - TYPE_MAP = { + TYPE_MAP: ClassVar[dict[tuple[str, str], type[np.generic]]] = { ("real", ""): np.float32, ("real", "4"): np.float32, ("real", "8"): np.float64, @@ -455,9 +456,9 @@ def map_Equivalence(self, node): ("integer", "8"): np.int64, } if hasattr(np, "float128"): - TYPE_MAP[("real", "16")] = np.float128 # pylint:disable=no-member + TYPE_MAP["real", "16"] = np.float128 # pylint:disable=no-member if hasattr(np, "complex256"): - TYPE_MAP[("complex", "32")] = np.complex256 # pylint:disable=no-member + TYPE_MAP["complex", "32"] = np.complex256 # pylint:disable=no-member def dtype_from_stmt(self, stmt): length, kind = stmt.selector @@ -471,7 +472,7 @@ def dtype_from_stmt(self, stmt): else: raise RuntimeError("both length and kind specified") - return np.dtype(self.TYPE_MAP[(type(stmt).__name__.lower(), length)]) + return np.dtype(self.TYPE_MAP[type(stmt).__name__.lower(), length]) def map_type_decl(self, node): scope = self.scope_stack[-1] diff --git a/loopy/frontend/fortran/tree.py b/loopy/frontend/fortran/tree.py index b2af66f0..4abef510 100644 --- a/loopy/frontend/fortran/tree.py +++ b/loopy/frontend/fortran/tree.py @@ -62,7 +62,7 @@ def rec(self, expr, *args, **kwargs): r"^(?P[_0-9a-zA-Z]+)\s*" r"(\((?P[-+*/0-9:a-zA-Z, \t]+)\))?" r"(\s*=\s*(?P.+))?" - "$") + r"$") def parse_dimension_specs(self, node, dim_decls): def parse_bounds(bounds_str): diff --git a/loopy/kernel/__init__.py b/loopy/kernel/__init__.py index 4f392edd..a3fa94b3 100644 --- a/loopy/kernel/__init__.py +++ b/loopy/kernel/__init__.py @@ -31,6 +31,7 @@ TYPE_CHECKING, Any, Callable, + ClassVar, Dict, FrozenSet, Iterator, @@ -85,7 +86,7 @@ # {{{ loop kernel object -class KernelState(IntEnum): # noqa +class KernelState(IntEnum): INITIAL = 0 CALLS_RESOLVED = 1 PREPROCESSED = 2 @@ -199,7 +200,7 @@ class LoopKernel(Taggable): were applied to the kernel. These are stored so that they may be repeated on expressions the user specifies later. """ - index_dtype: NumpyType = NumpyType(np.dtype(np.int32)) + index_dtype: NumpyType = NumpyType(np.dtype(np.int32)) # noqa: RUF009 silenced_warnings: FrozenSet[str] = frozenset() # FIXME Yuck, this should go. @@ -1310,7 +1311,7 @@ def __setstate__(self, state): # {{{ persistent hash key generation / comparison - hash_fields = [ + hash_fields: ClassVar[Sequence[str]] = [ "domains", "instructions", "args", diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index fa5ae6b1..1c59a9ae 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -63,7 +63,7 @@ from loopy.target import TargetBase if getattr(sys, "_BUILDING_SPHINX_DOCS", False): - from loopy.target import TargetBase # noqa: F811 + from loopy.target import TargetBase T = TypeVar("T") @@ -1217,7 +1217,7 @@ def _apply_offset(sub: Expression, ary: ArrayBase) -> Expression: else: # assume it's an expression # FIXME: mypy can't figure out that ExpressionT + ExpressionT works - return ary.offset + sub # type: ignore[call-overload, arg-type, operator] # noqa: E501 + return ary.offset + sub # type: ignore[call-overload, arg-type, operator] else: return sub diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 4f1803f2..43c4a4ee 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -155,13 +155,13 @@ def expand_defines(insn, defines, single_valued=True): "in this context (when expanding '%s')" % define_name) replacements = [ - rep+((replace_pattern % define_name, subval),) + (*rep, (replace_pattern % define_name, subval)) for rep in replacements for subval in value ] else: replacements = [ - rep+((replace_pattern % define_name, value),) + (*rep, (replace_pattern % define_name, value)) for rep in replacements] for rep in replacements: @@ -285,14 +285,12 @@ def parse_nosync_option(opt_value): arrow_idx = value.find("->") if arrow_idx >= 0: result["inames_to_dup"] = ( - result.get("inames_to_dup", []) - + - [(value[:arrow_idx], value[arrow_idx+2:])]) + [*result.get("inames_to_dup", []), + (value[:arrow_idx], value[arrow_idx + 2:]) + ]) else: result["inames_to_dup"] = ( - result.get("inames_to_dup", []) - + - [(value, None)]) + [*result.get("inames_to_dup", []), (value, None)]) elif opt_key == "dep" and opt_value is not None: if opt_value.startswith("*"): @@ -2403,7 +2401,7 @@ def make_function(domains, instructions, kernel_data=None, **kwargs): kernel_args.append(dat) continue - if isinstance(dat, ArrayBase) and isinstance(dat.shape, tuple): # noqa pylint:disable=no-member + if isinstance(dat, ArrayBase) and isinstance(dat.shape, tuple): # pylint: disable=no-member new_shape = [] for shape_axis in dat.shape: # pylint:disable=no-member if shape_axis is not None: diff --git a/loopy/kernel/data.py b/loopy/kernel/data.py index 9761a294..01ce9f95 100644 --- a/loopy/kernel/data.py +++ b/loopy/kernel/data.py @@ -103,7 +103,7 @@ # {{{ utilities -def _names_from_expr(expr: Union[None, Expression, str]) -> FrozenSet[str]: +def _names_from_expr(expr: Union[Expression, str, None]) -> FrozenSet[str]: from numbers import Number from loopy.symbolic import DependencyMapper @@ -303,7 +303,7 @@ def __str__(self): return "ord" -ToInameTagConvertible = Union[str, None, Tag] +ToInameTagConvertible = Union[str, Tag, None] def parse_tag(tag: ToInameTagConvertible) -> Optional[Tag]: diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 22abeb8a..33dfd73f 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -22,38 +22,40 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ - from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Callable, ClassVar, FrozenSet, Tuple, TypeVar +from collections.abc import Mapping, Sequence +from dataclasses import dataclass, replace +from typing import TYPE_CHECKING, Any, Callable, FrozenSet, TypeVar +from warnings import warn -from pytools import ImmutableRecord +from immutabledict import immutabledict +from typing_extensions import Self from loopy.diagnostic import LoopyError -from loopy.kernel import LoopKernel -from loopy.kernel.array import ArrayBase -from loopy.kernel.data import ArrayArg, ValueArg +from loopy.kernel.array import ArrayBase, ArrayDimImplementationTag +from loopy.kernel.data import AddressSpace, ArrayArg, ValueArg from loopy.symbolic import DependencyMapper, WalkMapper -from loopy.tools import update_persistent_hash +from loopy.types import LoopyType +from loopy.typing import ShapeType if TYPE_CHECKING: from typing_extensions import Self + from loopy.kernel import LoopKernel from loopy.translation_unit import CallablesTable, FunctionIdT __doc__ = """ .. currentmodule:: loopy.kernel.function_interface +.. autoclass:: ArgDescriptor .. autoclass:: ValueArgDescriptor - .. autoclass:: ArrayArgDescriptor .. currentmodule:: loopy .. autoclass:: InKernelCallable - .. autoclass:: CallableKernel - .. autoclass:: ScalarCallable """ @@ -63,7 +65,7 @@ ArgDescriptorT = TypeVar("ArgDescriptorT", bound="ArgDescriptor") -class ArgDescriptor(ABC, ImmutableRecord): +class ArgDescriptor(ABC): @abstractmethod def map_expr( self, @@ -75,19 +77,25 @@ def map_expr( def depends_on(self) -> frozenset[str]: ... + @abstractmethod + def copy(self, **kwargs: Any) -> Self: + ... + +@dataclass(frozen=True) class ValueArgDescriptor(ArgDescriptor): - hash_fields = () def map_expr(self, subst_mapper): - return self.copy() + return self def depends_on(self): return frozenset() - update_persistent_hash = update_persistent_hash + def copy(self, **kwargs: Any) -> Self: + return replace(self, **kwargs) +@dataclass(frozen=True) class ArrayArgDescriptor(ArgDescriptor): """ Records information about an array argument to an in-kernel callable. To be @@ -95,46 +103,39 @@ class ArrayArgDescriptor(ArgDescriptor): :meth:`~loopy.InKernelCallable.with_descrs`, used for matching shape and address space of caller and callee kernels. - .. attribute:: shape - - Shape of the array. - - .. attribute:: address_space - - An attribute of :class:`loopy.AddressSpace`. - - .. attribute:: dim_tags - - A tuple of instances of - :class:`loopy.kernel.array.ArrayDimImplementationTag` + .. autoattribute:: shape + .. autoattribute:: address_space + .. autoattribute:: dim_tags .. automethod:: map_expr .. automethod:: depends_on """ - fields = {"shape", "address_space", "dim_tags"} + shape: ShapeType | None + address_space: AddressSpace + dim_tags: Sequence[ArrayDimImplementationTag] | None + """See :ref:`data-dim-tags`. + """ - def __init__(self, shape, address_space, dim_tags): + if __debug__: + def __post_init__(self): + # {{{ sanity checks - # {{{ sanity checks + from loopy.kernel.array import ArrayDimImplementationTag + from loopy.kernel.data import auto - from loopy.kernel.array import ArrayDimImplementationTag - from loopy.kernel.data import auto + assert isinstance(self.shape, tuple) or self.shape in [None, auto] + assert isinstance(self.dim_tags, tuple) or self.dim_tags is None - assert isinstance(shape, tuple) or shape in [None, auto] - assert isinstance(dim_tags, tuple) or dim_tags is None + if self.dim_tags: + # FIXME at least vector dim tags should be supported + assert all(isinstance(dim_tag, ArrayDimImplementationTag) for dim_tag in + self.dim_tags) - if dim_tags: - # FIXME at least vector dim tags should be supported - assert all(isinstance(dim_tag, ArrayDimImplementationTag) for dim_tag in - dim_tags) + # }}} - # }}} - - super().__init__( - shape=shape, - address_space=address_space, - dim_tags=dim_tags) + def copy(self, **kwargs: Any) -> Self: + return replace(self, **kwargs) def map_expr(self, f): """ @@ -173,11 +174,6 @@ def depends_on(self): return frozenset(var.name for var in result) - def update_persistent_hash(self, key_hash, key_builder): - key_builder.rec(key_hash, self.shape) - key_builder.rec(key_hash, self.address_space) - key_builder.rec(key_hash, self.dim_tags) - class ExpressionIsScalarChecker(WalkMapper): def __init__(self, kernel): @@ -308,25 +304,14 @@ def get_kw_pos_association(kernel): # {{{ template class -class InKernelCallable(ImmutableRecord): +@dataclass(frozen=True, init=False) +class InKernelCallable(ABC): """ An abstract interface to define a callable encountered in a kernel. - .. attribute:: name - - The name of the callable which can be encountered within expressions in - a kernel. - - .. attribute:: arg_id_to_dtype - - A mapping which indicates the arguments types and result types of the - callable. - - .. attribute:: arg_id_to_descr - - A mapping which gives indicates the argument shape and ``dim_tags`` it - would be responsible for generating code. - + .. autoattribute:: name + .. autoattribute:: arg_id_to_dtype + .. autoattribute:: arg_id_to_descr .. automethod:: __init__ .. automethod:: with_types @@ -352,17 +337,39 @@ class InKernelCallable(ImmutableRecord): return value with (0-based) index *i*. """ + arg_id_to_dtype: Mapping[int | str, LoopyType] | None + arg_id_to_descr: Mapping[int | str, ArgDescriptor] | None + + def __init__(self, + arg_id_to_dtype: Mapping[int | str, LoopyType] | None = None, + arg_id_to_descr: Mapping[int | str, ArgDescriptor] | None = None, + ) -> None: + try: + hash(arg_id_to_dtype) + except TypeError: + arg_id_to_dtype = immutabledict(arg_id_to_dtype) + warn("arg_id_to_dtype passed to InKernelCallable was not hashable. " + "This usage is deprecated and will stop working in 2026.", + DeprecationWarning, stacklevel=3) + + try: + hash(arg_id_to_descr) + except TypeError: + arg_id_to_descr = immutabledict(arg_id_to_descr) + warn("arg_id_to_descr passed to InKernelCallable was not hashable. " + "This usage is deprecated and will stop working in 2026.", + DeprecationWarning, stacklevel=3) + + object.__setattr__(self, "arg_id_to_dtype", arg_id_to_dtype) + object.__setattr__(self, "arg_id_to_descr", arg_id_to_descr) + + if TYPE_CHECKING: + @property + def name(self) -> str: + raise NotImplementedError() - hash_fields: ClassVar[Tuple[str, ...]] = ( - "name", "arg_id_to_dtype", "arg_id_to_descr") - - def __init__(self, name, arg_id_to_dtype=None, arg_id_to_descr=None): - - super().__init__(name=name, - arg_id_to_dtype=arg_id_to_dtype, - arg_id_to_descr=arg_id_to_descr) - - update_persistent_hash = update_persistent_hash + def copy(self, **kwargs: Any) -> Self: + return replace(self, **kwargs) def with_types(self, arg_id_to_dtype, clbl_inf_ctx): """ @@ -391,6 +398,7 @@ def with_types(self, arg_id_to_dtype, clbl_inf_ctx): raise NotImplementedError() + @abstractmethod def with_descrs(self, arg_id_to_descr, clbl_inf_ctx): """ :arg arg_id_to_descr: a mapping from argument identifiers (integers for @@ -418,12 +426,11 @@ def with_descrs(self, arg_id_to_descr, clbl_inf_ctx): other callables within it, then *clbl_inf_ctx* is returned as is. """ - raise NotImplementedError() - - def is_ready_for_codegen(self): + def is_ready_for_codegen(self) -> bool: return (self.arg_id_to_dtype is not None and self.arg_id_to_descr is not None) + @abstractmethod def get_hw_axes_sizes(self, arg_id_to_arg, space, callables_table): """ Returns ``gsizes, lsizes``, where *gsizes* and *lsizes* are mappings @@ -435,26 +442,28 @@ def get_hw_axes_sizes(self, arg_id_to_arg, space, callables_table): arguments at a call-site. :arg space: An instance of :class:`islpy.Space`. """ - raise NotImplementedError + ... + @abstractmethod def get_used_hw_axes(self, callables_table): """ Returns a tuple ``group_axes_used, local_axes_used``, where ``(group|local)_axes_used`` are :class:`frozenset` of hardware axes indices used by the callable. """ - raise NotImplementedError + @abstractmethod def generate_preambles(self, target): """ Yields the target specific preamble. """ raise NotImplementedError() + @abstractmethod def emit_call(self, expression_to_code_mapper, expression, target): + ... - raise NotImplementedError() - + @abstractmethod def emit_call_insn(self, insn, target, expression_to_code_mapper): """ Returns a tuple of ``(call, assignee_is_returned)`` which is the target @@ -469,18 +478,14 @@ def emit_call_insn(self, insn, target, expression_to_code_mapper): in the target as the statement ``f(c, d, &a, &b)``. """ - raise NotImplementedError() - - def __hash__(self): - return hash(self.hash_fields) - + @abstractmethod def with_added_arg(self, arg_dtype, arg_descr): """ Registers a new argument to the callable and returns the name of the argument in the callable's namespace. """ - raise NotImplementedError() + @abstractmethod def get_called_callables( self, callables_table: CallablesTable, @@ -496,27 +501,27 @@ def get_called_callables( callables, else only returns the callables directly called by *self*. """ - raise NotImplementedError + @abstractmethod def with_name(self, name): """ Returns a copy of *self* so that it could be referred by *name* in a :attr:`loopy.TranslationUnit.callables_table`'s namespace. """ - raise NotImplementedError + @abstractmethod def is_type_specialized(self): """ Returns *True* iff *self*'s type signature is known, else returns *False*. """ - raise NotImplementedError # }}} # {{{ scalar callable +@dataclass(frozen=True, init=False) class ScalarCallable(InKernelCallable): """ An abstract interface to a scalar callable encountered in a kernel. @@ -537,15 +542,20 @@ class ScalarCallable(InKernelCallable): The :meth:`ScalarCallable.with_types` is intended to assist with type specialization of the function and sub-classes must define it. """ - fields = {"name", "arg_id_to_dtype", "arg_id_to_descr", "name_in_target"} - hash_fields = InKernelCallable.hash_fields + ("name_in_target",) - - def __init__(self, name, arg_id_to_dtype=None, - arg_id_to_descr=None, name_in_target=None): - super().__init__(name=name, - arg_id_to_dtype=arg_id_to_dtype, - arg_id_to_descr=arg_id_to_descr) - self.name_in_target = name_in_target + name: str + name_in_target: str | None + + def __init__(self, + name: str, + arg_id_to_dtype: Mapping[int | str, LoopyType] | None = None, + arg_id_to_descr: Mapping[int | str, ArgDescriptor] | None = None, + name_in_target: str | None = None) -> None: + super().__init__( + arg_id_to_dtype=arg_id_to_dtype, + arg_id_to_descr=arg_id_to_descr, + ) + object.__setattr__(self, "name", name) + object.__setattr__(self, "name_in_target", name_in_target) def with_types(self, arg_id_to_dtype, callables_table): raise LoopyError("No type inference information present for " @@ -689,6 +699,7 @@ def is_type_specialized(self): # {{{ callable kernel +@dataclass(frozen=True, init=False) class CallableKernel(InKernelCallable): """ Records information about a callee kernel. Also provides interface through @@ -702,35 +713,27 @@ class CallableKernel(InKernelCallable): :meth:`CallableKernel.with_descrs` should be called in order to match the arguments' shapes/strides across the caller and the callee kernel. - .. attribute:: subkernel - - :class:`~loopy.LoopKernel` which is being called. - + .. autoattribute:: subkernel .. automethod:: with_descrs .. automethod:: with_types """ - fields = {"subkernel", "arg_id_to_dtype", "arg_id_to_descr"} - hash_fields = ("subkernel", "arg_id_to_dtype", "arg_id_to_descr") + subkernel: LoopKernel + + def __init__(self, + subkernel: LoopKernel, + arg_id_to_dtype: Mapping[int | str, LoopyType] | None = None, + arg_id_to_descr: Mapping[int | str, ArgDescriptor] | None = None, + ) -> None: - def __init__(self, subkernel, arg_id_to_dtype=None, - arg_id_to_descr=None): - assert isinstance(subkernel, LoopKernel) - super().__init__(name=subkernel.name, + super().__init__( arg_id_to_dtype=arg_id_to_dtype, arg_id_to_descr=arg_id_to_descr) - self.subkernel = subkernel - - def copy(self, subkernel=None, arg_id_to_dtype=None, - arg_id_to_descr=None): - if subkernel is None: - subkernel = self.subkernel - if arg_id_to_descr is None: - arg_id_to_descr = self.arg_id_to_descr - if arg_id_to_dtype is None: - arg_id_to_dtype = self.arg_id_to_dtype + object.__setattr__(self, "subkernel", subkernel) - return CallableKernel(subkernel, arg_id_to_dtype, arg_id_to_descr) + @property + def name(self) -> str: + return self.subkernel.name def with_types(self, arg_id_to_dtype, callables_table): kw_to_pos, pos_to_kw = get_kw_pos_association(self.subkernel) @@ -769,7 +772,7 @@ def with_types(self, arg_id_to_dtype, callables_table): # Return the kernel call with specialized subkernel and the corresponding # new arg_id_to_dtype return self.copy(subkernel=specialized_kernel, - arg_id_to_dtype=new_arg_id_to_dtype), callables_table + arg_id_to_dtype=immutabledict(new_arg_id_to_dtype)), callables_table def with_descrs(self, arg_id_to_descr, clbl_inf_ctx): @@ -844,7 +847,7 @@ def with_descrs(self, arg_id_to_descr, clbl_inf_ctx): # }}} return (self.copy(subkernel=subkernel, - arg_id_to_descr=arg_id_to_descr), + arg_id_to_descr=immutabledict(arg_id_to_descr)), clbl_inf_ctx) def with_added_arg(self, arg_dtype, arg_descr): @@ -852,7 +855,8 @@ def with_added_arg(self, arg_dtype, arg_descr): if isinstance(arg_descr, ValueArgDescriptor): subknl = self.subkernel.copy( - args=self.subkernel.args+[ + args=[ + *self.subkernel.args, ValueArg(var_name, arg_dtype, self.subkernel.target)]) kw_to_pos, pos_to_kw = get_kw_pos_association(subknl) @@ -860,11 +864,11 @@ def with_added_arg(self, arg_dtype, arg_descr): if self.arg_id_to_dtype is None: arg_id_to_dtype = {} else: - arg_id_to_dtype = self.arg_id_to_dtype.copy() + arg_id_to_dtype = dict(self.arg_id_to_dtype) if self.arg_id_to_descr is None: arg_id_to_descr = {} else: - arg_id_to_descr = self.arg_id_to_descr.copy() + arg_id_to_descr = dict(self.arg_id_to_descr) arg_id_to_dtype[var_name] = arg_dtype arg_id_to_descr[var_name] = arg_descr @@ -931,6 +935,10 @@ def generate_preambles(self, target): return yield + def emit_call(self, expression_to_code_mapper, expression, target): + raise LoopyError("Kernel '{self.name}' cannot be called " + "from within an expression, use a call statement") + def emit_call_insn(self, insn, target, expression_to_code_mapper): from loopy.target.c import CFamilyTarget if not isinstance(target, CFamilyTarget): diff --git a/loopy/kernel/instruction.py b/loopy/kernel/instruction.py index d6517adc..a5cefd80 100644 --- a/loopy/kernel/instruction.py +++ b/loopy/kernel/instruction.py @@ -27,7 +27,17 @@ from dataclasses import dataclass from functools import cached_property from sys import intern -from typing import Any, FrozenSet, Mapping, Optional, Sequence, Tuple, Type, Union +from typing import ( + Any, + ClassVar, + FrozenSet, + Mapping, + Optional, + Sequence, + Tuple, + Type, + Union, +) from warnings import warn import islpy as isl @@ -257,7 +267,7 @@ class InstructionBase(ImmutableRecord, Taggable): # within_inames_is_final is deprecated and will be removed in version 2017.x. - fields = set("id depends_on_is_final " + fields: ClassVar[set[str]] = set("id depends_on_is_final " "groups conflicts_with_groups " "no_sync_with " "predicates " @@ -634,7 +644,7 @@ def _get_assignee_subscript_deps(expr): # {{{ atomic ops -class MemoryOrdering: # noqa +class MemoryOrdering: """Ordering of atomic operations, defined as in C11 and OpenCL. .. attribute:: RELAXED @@ -662,7 +672,7 @@ def to_string(v): raise ValueError("Unknown value of MemoryOrdering") -class MemoryScope: # noqa +class MemoryScope: """Scope of atomicity, defined as in OpenCL. .. attribute:: auto @@ -925,8 +935,8 @@ def __init__(self, predicates: Optional[FrozenSet[str]] = None, tags: Optional[FrozenSet[Tag]] = None, temp_var_type: Union[ - Type[_not_provided], None, LoopyOptional, - LoopyType] = _not_provided, + Type[_not_provided], LoopyOptional, + LoopyType, None] = _not_provided, atomicity: Tuple[VarAtomicity, ...] = (), *, depends_on: Union[FrozenSet[str], str, None] = None, diff --git a/loopy/library/random123.py b/loopy/library/random123.py index 0afb0abb..329770e0 100644 --- a/loopy/library/random123.py +++ b/loopy/library/random123.py @@ -24,20 +24,31 @@ """ +from dataclasses import dataclass, replace + import numpy as np from mako.template import Template -from pytools import ImmutableRecord +from pymbolic.typing import not_none from loopy.kernel.function_interface import ScalarCallable +from loopy.target import TargetBase # {{{ rng metadata -class RNGInfo(ImmutableRecord): +@dataclass(frozen=True) +class RNGInfo: + name: str + pyopencl_header: str + generic_header: str + key_width: int + width: int | None = None + bits: int | None = None + @property - def full_name(self): - return "%s%dx%d" % (self.name, self.width, self.bits) + def full_name(self) -> str: + return "%s%dx%d" % (self.name, not_none(self.width), not_none(self.bits)) _philox_base_info = RNGInfo( @@ -53,15 +64,15 @@ def full_name(self): key_width=4) RNG_VARIANTS = [ - _philox_base_info.copy(width=2, bits=32), - _philox_base_info.copy(width=2, bits=64), - _philox_base_info.copy(width=4, bits=32), - _philox_base_info.copy(width=4, bits=64), - - _threefry_base_info.copy(width=2, bits=32), - _threefry_base_info.copy(width=2, bits=64), - _threefry_base_info.copy(width=4, bits=32), - _threefry_base_info.copy(width=4, bits=64), + replace(_philox_base_info, width=2, bits=32), + replace(_philox_base_info, width=2, bits=64), + replace(_philox_base_info, width=4, bits=32), + replace(_philox_base_info, width=4, bits=64), + + replace(_threefry_base_info, width=2, bits=32), + replace(_threefry_base_info, width=2, bits=64), + replace(_threefry_base_info, width=4, bits=32), + replace(_threefry_base_info, width=4, bits=64), ] FUNC_NAMES_TO_RNG = { @@ -165,12 +176,12 @@ def full_name(self): # }}} +@dataclass(frozen=True, init=False) class Random123Callable(ScalarCallable): """ Records information about for the random123 functions. """ - fields = ScalarCallable.fields | {"target"} - hash_fields = ScalarCallable.hash_fields + ("target",) + target: TargetBase def __init__(self, name, arg_id_to_dtype=None, arg_id_to_descr=None, name_in_target=None, target=None): @@ -179,7 +190,7 @@ def __init__(self, name, arg_id_to_dtype=None, arg_id_to_descr=arg_id_to_descr, name_in_target=name_in_target) - self.target = target + object.__setattr__(self, "target", target) def with_types(self, arg_id_to_dtype, callables_table): diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index 2d357d3b..445a0b86 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -347,8 +347,7 @@ def neutral_element(self, scalar_dtype, segment_flag_dtype, segment_flag_dtype.numpy_dtype.type(0)), callables_table def result_dtypes(self, scalar_dtype, segment_flag_dtype): - return (self.inner_reduction.result_dtypes(scalar_dtype) - + (segment_flag_dtype,)) + return ((*self.inner_reduction.result_dtypes(scalar_dtype), segment_flag_dtype)) def __str__(self): return "segmented(%s)" % self.which @@ -571,12 +570,12 @@ class ReductionCallable(ScalarCallable): def with_types(self, arg_id_to_dtype, callables_table): scalar_dtype = arg_id_to_dtype[0] index_dtype = arg_id_to_dtype[1] - result_dtypes = self.name.reduction_op.result_dtypes(scalar_dtype, + result_dtypes = self.name.reduction_op.result_dtypes(scalar_dtype, # pylint: disable=no-member index_dtype) new_arg_id_to_dtype = arg_id_to_dtype.copy() new_arg_id_to_dtype[-1] = result_dtypes[0] new_arg_id_to_dtype[-2] = result_dtypes[1] - name_in_target = self.name.reduction_op.prefix(scalar_dtype, + name_in_target = self.name.reduction_op.prefix(scalar_dtype, # pylint: disable=no-member index_dtype) + "_op" return self.copy(arg_id_to_dtype=new_arg_id_to_dtype, @@ -594,7 +593,7 @@ def with_descrs(self, arg_id_to_descr, callables_table): class ArgExtOpCallable(ReductionCallable): def generate_preambles(self, target): - op = self.name.reduction_op + op = self.name.reduction_op # pylint: disable=no-member scalar_dtype = self.arg_id_to_dtype[-1] index_dtype = self.arg_id_to_dtype[-2] @@ -630,7 +629,7 @@ def generate_preambles(self, target): class SegmentOpCallable(ReductionCallable): def generate_preambles(self, target): - op = self.name.reduction_op + op = self.name.reduction_op # pylint: disable=no-member scalar_dtype = self.arg_id_to_dtype[-1] segment_flag_dtype = self.arg_id_to_dtype[-2] prefix = op.prefix(scalar_dtype, segment_flag_dtype) diff --git a/loopy/options.py b/loopy/options.py index 29367077..1c798f7a 100644 --- a/loopy/options.py +++ b/loopy/options.py @@ -23,7 +23,8 @@ import os import re -from typing import Any +from collections.abc import Mapping +from typing import Any, ClassVar from warnings import warn from pytools import ImmutableRecord @@ -198,7 +199,7 @@ class Options(ImmutableRecord): RAW, WAR and WAW races. """ - _legacy_options_map = { + _legacy_options_map: ClassVar[Mapping[str, tuple[str, None] | None]] = { "cl_build_options": ("build_options", None), "write_cl": ("write_code", None), "highlight_cl": None, @@ -332,7 +333,7 @@ def _style(self): return _ColoramaStub() -KEY_VAL_RE = re.compile("^([a-zA-Z0-9]+)=(.*)$") +KEY_VAL_RE = re.compile(r"^([a-zA-Z0-9]+)=(.*)$") def make_options(options_arg): diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 7eeae715..98026fda 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -266,7 +266,7 @@ def make_args_for_offsets_and_strides(kernel: LoopKernel) -> LoopKernel: else: raise LoopyError(f"invalid value of {what}") - new_dim_tags = new_dim_tags + (dim_tag,) + new_dim_tags = (*new_dim_tags, dim_tag) arg = arg.copy(dim_tags=new_dim_tags) @@ -499,7 +499,7 @@ def check_atomic_loads(kernel): for x in missed: if {x} & atomicity_candidates: insn = insn.copy( - atomicity=insn.atomicity + (AtomicLoad(x),)) + atomicity=(*insn.atomicity, AtomicLoad(x))) new_insns.append(insn) diff --git a/loopy/schedule/__init__.py b/loopy/schedule/__init__.py index 2460f5ed..73a23a98 100644 --- a/loopy/schedule/__init__.py +++ b/loopy/schedule/__init__.py @@ -1,5 +1,7 @@ from __future__ import annotations +from loopy.kernel.function_interface import InKernelCallable +from loopy.translation_unit import FunctionIdT from loopy.typing import not_none @@ -1020,7 +1022,7 @@ def _generate_loop_schedules_v2(kernel: LoopKernel) -> Sequence[ScheduleItem]: def iname_key(iname: str) -> str: all_ancestors = sorted(loop_tree.ancestors(iname), key=lambda x: loop_tree.depth(x)) - return ",".join(all_ancestors+[iname]) + return ",".join([*all_ancestors, iname]) def key(x: ScheduleItem) -> tuple[str, ...]: if isinstance(x, RunInstruction): @@ -1097,7 +1099,7 @@ def _generate_loop_schedules_internal( assert sched_state.within_subkernel is False yield from _generate_loop_schedules_internal( sched_state.copy( - schedule=sched_state.schedule + (next_preschedule_item,), + schedule=(*sched_state.schedule, next_preschedule_item), preschedule=sched_state.preschedule[1:], within_subkernel=True, may_schedule_global_barriers=False, @@ -1110,7 +1112,7 @@ def _generate_loop_schedules_internal( if sched_state.active_inames == sched_state.enclosing_subkernel_inames: yield from _generate_loop_schedules_internal( sched_state.copy( - schedule=sched_state.schedule + (next_preschedule_item,), + schedule=(*sched_state.schedule, next_preschedule_item), preschedule=sched_state.preschedule[1:], within_subkernel=False, may_schedule_global_barriers=True), @@ -1129,7 +1131,7 @@ def _generate_loop_schedules_internal( and next_preschedule_item.originating_insn_id is None): yield from _generate_loop_schedules_internal( sched_state.copy( - schedule=sched_state.schedule + (next_preschedule_item,), + schedule=(*sched_state.schedule, next_preschedule_item), preschedule=sched_state.preschedule[1:]), debug=debug) @@ -1289,7 +1291,7 @@ def insn_sort_key(insn_id): unscheduled_insn_ids=sched_state.unscheduled_insn_ids - iid_set, insn_ids_to_try=new_insn_ids_to_try, schedule=( - sched_state.schedule + (RunInstruction(insn_id=insn.id),)), + (*sched_state.schedule, RunInstruction(insn_id=insn.id))), preschedule=( sched_state.preschedule if insn_id not in sched_state.prescheduled_insn_ids @@ -1403,8 +1405,8 @@ def insn_sort_key(insn_id): for sub_sched in _generate_loop_schedules_internal( sched_state.copy( schedule=( - sched_state.schedule - + (LeaveLoop(iname=last_entered_loop),)), + (*sched_state.schedule, + LeaveLoop(iname=last_entered_loop))), active_inames=sched_state.active_inames[:-1], insn_ids_to_try=insn_ids_to_try, preschedule=( @@ -1613,10 +1615,9 @@ def insn_sort_key(insn_id): for sub_sched in _generate_loop_schedules_internal( sched_state.copy( schedule=( - sched_state.schedule - + (EnterLoop(iname=iname),)), + (*sched_state.schedule, EnterLoop(iname=iname))), active_inames=( - sched_state.active_inames + (iname,)), + (*sched_state.active_inames, iname)), entered_inames=( sched_state.entered_inames | frozenset((iname,))), @@ -2446,7 +2447,7 @@ def get_one_linearized_kernel( callables_table) if CACHING_ENABLED and not from_cache: - schedule_cache.store_if_not_present(sched_cache_key, result) # pylint: disable=possibly-used-before-assignment # noqa: E501 + schedule_cache.store_if_not_present(sched_cache_key, result) # pylint: disable=possibly-used-before-assignment return result @@ -2466,7 +2467,7 @@ def linearize(t_unit: TranslationUnit) -> TranslationUnit: pre_schedule_checks(t_unit) - new_callables = {} + new_callables: dict[FunctionIdT, InKernelCallable] = {} for name, clbl in t_unit.callables_table.items(): if isinstance(clbl, CallableKernel): diff --git a/loopy/schedule/device_mapping.py b/loopy/schedule/device_mapping.py index a0345049..c5c65385 100644 --- a/loopy/schedule/device_mapping.py +++ b/loopy/schedule/device_mapping.py @@ -45,9 +45,9 @@ def map_schedule_onto_host_or_device(kernel): if not kernel.target.split_kernel_at_global_barriers(): new_schedule = ( - [CallKernel(kernel_name=device_prog_name_gen())] + - list(kernel.linearization) + - [ReturnFromKernel(kernel_name=kernel.name)]) + [CallKernel(kernel_name=device_prog_name_gen()), + *kernel.linearization, + ReturnFromKernel(kernel_name=kernel.name)]) kernel = kernel.copy(linearization=new_schedule) else: kernel = map_schedule_onto_host_or_device_impl( @@ -92,19 +92,13 @@ def inner_mapper(start_idx, end_idx, new_schedule): schedule_required_splitting = True if current_chunk: new_schedule.extend( - [dummy_call.copy()] + - current_chunk + - [dummy_return.copy()]) + [dummy_call.copy(), *current_chunk, dummy_return.copy()]) new_schedule.extend( - [start_item] + - inner_schedule + - [end_item]) + [start_item, *inner_schedule, end_item]) current_chunk = [] else: current_chunk.extend( - [start_item] + - inner_schedule + - [end_item]) + [start_item, *inner_schedule, end_item]) elif isinstance(sched_item, Barrier): if sched_item.synchronization_kind == "global": @@ -112,9 +106,7 @@ def inner_mapper(start_idx, end_idx, new_schedule): schedule_required_splitting = True if current_chunk: new_schedule.extend( - [dummy_call.copy()] + - current_chunk + - [dummy_return.copy()]) + [dummy_call.copy(), *current_chunk, dummy_return.copy()]) new_schedule.append(sched_item) current_chunk = [] else: @@ -127,9 +119,7 @@ def inner_mapper(start_idx, end_idx, new_schedule): if current_chunk and schedule_required_splitting: # Wrap remainder of schedule into a kernel call. new_schedule.extend( - [dummy_call.copy()] + - current_chunk + - [dummy_return.copy()]) + [dummy_call.copy(), *current_chunk, dummy_return.copy()]) else: new_schedule.extend(current_chunk) @@ -142,9 +132,7 @@ def inner_mapper(start_idx, end_idx, new_schedule): if not split_kernel: # Wrap everything into a kernel call. new_schedule = ( - [dummy_call.copy()] + - new_schedule + - [dummy_return.copy()]) + [dummy_call.copy(), *new_schedule, dummy_return.copy()]) # Assign names to CallKernel / ReturnFromKernel instructions diff --git a/loopy/schedule/tree.py b/loopy/schedule/tree.py index e98724f8..327fb65c 100644 --- a/loopy/schedule/tree.py +++ b/loopy/schedule/tree.py @@ -34,9 +34,10 @@ THE SOFTWARE. """ +import operator from collections.abc import Hashable, Iterator, Sequence from dataclasses import dataclass -from functools import cached_property +from functools import cached_property, reduce from typing import Generic, TypeVar from immutables import Map @@ -103,7 +104,7 @@ def ancestors(self, node: NodeT) -> tuple[NodeT, ...]: parent = self._child_to_parent[node] assert parent is not None - return (parent,) + self.ancestors(parent) + return (parent, *self.ancestors(parent)) def parent(self, node: NodeT) -> NodeT | None: """ @@ -162,7 +163,7 @@ def add_node(self, node: NodeT, parent: NodeT) -> Tree[NodeT]: siblings = self._parent_to_children[parent] return Tree((self._parent_to_children - .set(parent, siblings + (node,)) + .set(parent, (*siblings, node)) .set(node, ())), self._child_to_parent.set(node, parent)) @@ -231,7 +232,7 @@ def move_node(self, node: NodeT, new_parent: NodeT | None) -> Tree[NodeT]: assert parent is not None # parent=root handled as a special case siblings = self.children(parent) parents_new_children = tuple(frozenset(siblings) - frozenset([node])) - new_parents_children = self.children(new_parent) + (node,) + new_parents_children = (*self.children(new_parent), node) new_child_to_parent = self._child_to_parent.set(node, new_parent) new_parent_to_children = (self._parent_to_children @@ -276,7 +277,7 @@ def post_process_last_child(children: Sequence[str]) -> list[str]: for c in children_result[:-1]] + [post_process_last_child(c) for c in children_result[-1:]]) - return [str(node)] + sum(children_result, start=[]) + return [str(node), *reduce(operator.iadd, children_result, [])] return "\n".join(rec(self.root)) diff --git a/loopy/statistics.py b/loopy/statistics.py index fd697bc4..657ee9e2 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -25,7 +25,9 @@ THE SOFTWARE. """ +from collections.abc import Sequence from functools import cached_property, partial +from typing import ClassVar import islpy as isl from islpy import dim_type @@ -391,7 +393,7 @@ def group_by(self, *args): # make sure all item keys have same type if self.count_map: - key_type = type(list(self.keys())[0]) + key_type = type(next(iter(self.keys()))) if not all(isinstance(x, key_type) for x in self.keys()): raise ValueError("ToCountMap: group_by() function may only " "be used on ToCountMaps with uniform keys") @@ -598,7 +600,7 @@ class CountGranularity: WORKITEM = "workitem" SUBGROUP = "subgroup" WORKGROUP = "workgroup" - ALL = [WORKITEM, SUBGROUP, WORKGROUP] + ALL: ClassVar[Sequence[str]] = [WORKITEM, SUBGROUP, WORKGROUP] # }}} @@ -639,10 +641,10 @@ class Op(ImmutableRecord): def __init__(self, dtype=None, name=None, count_granularity=None, kernel_name=None): - if count_granularity not in CountGranularity.ALL+[None]: + if count_granularity not in [*CountGranularity.ALL, None]: raise ValueError("Op.__init__: count_granularity '%s' is " "not allowed. count_granularity options: %s" - % (count_granularity, CountGranularity.ALL+[None])) + % (count_granularity, [*CountGranularity.ALL, None])) if dtype is not None: from loopy.types import to_loopy_type @@ -735,10 +737,10 @@ def __init__(self, mtype=None, dtype=None, lid_strides=None, gid_strides=None, *, variable_tags=None, count_granularity=None, kernel_name=None): - if count_granularity not in CountGranularity.ALL+[None]: + if count_granularity not in [*CountGranularity.ALL, None]: raise ValueError("Op.__init__: count_granularity '%s' is " "not allowed. count_granularity options: %s" - % (count_granularity, CountGranularity.ALL+[None])) + % (count_granularity, [*CountGranularity.ALL, None])) if variable_tags is None: variable_tags = frozenset() @@ -1652,7 +1654,7 @@ def _get_insn_count(knl, callables_table, insn_id, subgroup_size, # this should not happen since this is enforced in Op/MemAccess raise ValueError("get_insn_count: count_granularity '%s' is" "not allowed. count_granularity options: %s" - % (count_granularity, CountGranularity.ALL+[None])) + % (count_granularity, [*CountGranularity.ALL, None])) # }}} @@ -1768,7 +1770,7 @@ def get_op_map(program, count_redundant_work=False, if len(program.entrypoints) > 1: raise LoopyError("Must provide entrypoint") - entrypoint = list(program.entrypoints)[0] + entrypoint = next(iter(program.entrypoints)) assert entrypoint in program.entrypoints @@ -1995,7 +1997,7 @@ def get_mem_access_map(program, count_redundant_work=False, if len(program.entrypoints) > 1: raise LoopyError("Must provide entrypoint") - entrypoint = list(program.entrypoints)[0] + entrypoint = next(iter(program.entrypoints)) assert entrypoint in program.entrypoints @@ -2116,7 +2118,7 @@ def get_synchronization_map(program, subgroup_size=None, entrypoint=None): if len(program.entrypoints) > 1: raise LoopyError("Must provide entrypoint") - entrypoint = list(program.entrypoints)[0] + entrypoint = next(iter(program.entrypoints)) assert entrypoint in program.entrypoints from loopy.preprocess import infer_unknown_types, preprocess_program @@ -2175,7 +2177,7 @@ def gather_access_footprints(program, ignore_uncountable=False, entrypoint=None) if len(program.entrypoints) > 1: raise LoopyError("Must provide entrypoint") - entrypoint = list(program.entrypoints)[0] + entrypoint = next(iter(program.entrypoints)) assert entrypoint in program.entrypoints @@ -2205,10 +2207,10 @@ def gather_access_footprints(program, ignore_uncountable=False, entrypoint=None) result = {} for vname, footprint in write_footprints.items(): - result[(vname, "write")] = footprint + result[vname, "write"] = footprint for vname, footprint in read_footprints.items(): - result[(vname, "read")] = footprint + result[vname, "read"] = footprint return result diff --git a/loopy/symbolic.py b/loopy/symbolic.py index d30581db..964bd4d0 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -33,8 +33,10 @@ TYPE_CHECKING, AbstractSet, Any, + ClassVar, Mapping, Sequence, + TypeAlias, cast, ) from warnings import warn @@ -1283,7 +1285,7 @@ def map_substitution(self, name, tags, arguments, expn_state, rec_arguments = self.rec(arguments, expn_state, *args, **kwargs) new_expn_state = expn_state.copy( - stack=expn_state.stack + ((name, tags),), + stack=(*expn_state.stack, (name, tags)), arg_context=self.make_new_arg_context( name, rule.arguments, rec_arguments, expn_state.arg_context)) @@ -1428,7 +1430,7 @@ def __init__(self, rule_mapping_context, rules, within): self.within = within def map_substitution(self, name, tags, arguments, expn_state): - new_stack = expn_state.stack + ((name, tags),) + new_stack = (*expn_state.stack, (name, tags)) if self.within(expn_state.kernel, expn_state.instruction, new_stack): # expand @@ -1573,11 +1575,15 @@ def map_call(self, expr): _open_dbl_bracket = intern("open_dbl_bracket") -TRAILING_FLOAT_TAG_RE = re.compile("^(.*?)([a-zA-Z]*)$") +TRAILING_FLOAT_TAG_RE = re.compile(r"^(.*?)([a-zA-Z]*)$") + + +LexTable: TypeAlias = Sequence[ + tuple[str, pytools.lex.RE | tuple[str | pytools.lex.RE, ...]]] class LoopyParser(ParserBase): - lex_table = [ + lex_table: ClassVar[LexTable] = [ (_open_dbl_bracket, pytools.lex.RE(r"\[\[")), *ParserBase.lex_table ] diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index a2961eee..98dbe35c 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -26,7 +26,7 @@ import re from typing import Any, Optional, Sequence, Tuple, cast -import numpy as np # noqa +import numpy as np import pymbolic.primitives as p from cgen import ( @@ -259,7 +259,7 @@ def _preamble_generator(preamble_info, func_qualifier="inline"): inline {res_ctype} {func.c_name}({base_ctype} x, {exp_ctype} n) {{ if (n == 0) return 1; - {re.sub("^", 14*" ", signed_exponent_preamble, flags=re.M)} + {re.sub(r"^", 14*" ", signed_exponent_preamble, flags=re.M)} {res_ctype} y = 1; @@ -414,8 +414,8 @@ class CFamilyTarget(TargetBase): usable as a common base for C99, C++, OpenCL, CUDA, and the like. """ - hash_fields = TargetBase.hash_fields + ("fortran_abi",) - comparison_fields = TargetBase.comparison_fields + ("fortran_abi",) + hash_fields = (*TargetBase.hash_fields, "fortran_abi") + comparison_fields = (*TargetBase.comparison_fields, "fortran_abi") def __init__(self, fortran_abi=False): self.fortran_abi = fortran_abi @@ -772,16 +772,13 @@ class CFamilyASTBuilder(ASTBuilderBase[Generable]): def symbol_manglers(self): return ( - super().symbol_manglers() + [ - c_symbol_mangler - ]) + [*super().symbol_manglers(), c_symbol_mangler]) def preamble_generators(self): return ( - super().preamble_generators() + [ - lambda preamble_info: _preamble_generator(preamble_info, - self.preamble_function_qualifier), - ]) + [*super().preamble_generators(), + lambda preamble_info: _preamble_generator( + preamble_info, self.preamble_function_qualifier)]) @property def known_callables(self): @@ -837,7 +834,7 @@ def get_function_definition( if not result: return fbody else: - return Collection(result+[Line(), fbody]) + return Collection([*result, Line(), fbody]) def get_function_declaration( self, codegen_state: CodeGenerationState, @@ -1281,7 +1278,7 @@ def emit_sequential_loop(self, codegen_state, iname, iname_dtype, inner) if hints: - return Collection(list(hints) + [loop]) + return Collection([*list(hints), loop]) else: return loop @@ -1397,9 +1394,7 @@ def get_dtype_registry(self): class CASTBuilder(CFamilyASTBuilder): def preamble_generators(self): return ( - super().preamble_generators() + [ - c99_preamble_generator, - ]) + [*super().preamble_generators(), c99_preamble_generator]) # }}} diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index 6bc496f5..87343090 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -25,7 +25,7 @@ import os import tempfile from dataclasses import dataclass -from typing import Any, Callable, Optional, Sequence, Tuple, Union +from typing import Any, Callable, ClassVar, Optional, Sequence, Tuple, Union import numpy as np from codepy.jit import compile_from_string @@ -365,15 +365,15 @@ def __init__(self, toolchain=None, # {{{ placeholder till ctypes fixes: https://github.com/python/cpython/issues/61103 class Complex64(ctypes.Structure): - _fields_ = [("real", ctypes.c_float), ("imag", ctypes.c_float)] + _fields_: ClassVar = [("real", ctypes.c_float), ("imag", ctypes.c_float)] class Complex128(ctypes.Structure): - _fields_ = [("real", ctypes.c_double), ("imag", ctypes.c_double)] + _fields_: ClassVar = [("real", ctypes.c_double), ("imag", ctypes.c_double)] class Complex256(ctypes.Structure): - _fields_ = [("real", ctypes.c_longdouble), ("imag", ctypes.c_longdouble)] + _fields_: ClassVar = [("real", ctypes.c_longdouble), ("imag", ctypes.c_longdouble)] _NUMPY_COMPLEX_TYPE_TO_CTYPE = { diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index e201326a..82e0bb19 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -641,7 +641,7 @@ def map_constant(self, expr, prec): # FIXME: Add type suffixes? return repr(int(expr)) elif isinstance(expr, np.float32): - return f"{repr(float(expr))}f" + return f"{float(expr)!r}f" elif isinstance(expr, np.float64): return repr(float(expr)) else: diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index afeb5cee..22d663b5 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -369,8 +369,7 @@ def get_function_declaration( def preamble_generators(self): return ( - super().preamble_generators() + [ - cuda_preamble_generator]) + [*super().preamble_generators(), cuda_preamble_generator]) # }}} diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 4200a4b2..d88b99bb 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -26,7 +26,7 @@ from typing import Sequence, Tuple, cast -import numpy as np # noqa +import numpy as np import pymbolic.primitives as p from cgen import Collection, Const, Declarator, Generable @@ -499,7 +499,7 @@ def emit_sequential_loop(self, codegen_state, iname, iname_dtype, inner) if hints: - return Collection(list(hints) + [loop]) + return Collection([*list(hints), loop]) else: return loop diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 14383e54..8250436f 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -618,15 +618,12 @@ def known_callables(self): def symbol_manglers(self): return ( - super().symbol_manglers() + [ - opencl_symbol_mangler - ]) + [*super().symbol_manglers(), opencl_symbol_mangler]) def preamble_generators(self): return ( - super().preamble_generators() + [ - opencl_preamble_generator]) + [*super().preamble_generators(), opencl_preamble_generator]) # }}} diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index fa7fd20e..18336056 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -506,7 +506,7 @@ class PyOpenCLTarget(OpenCLTarget): """ # FIXME make prefixes conform to naming rules - # (see Reference: Loopy’s Model of a Kernel) + # (see Reference: Loopy's Model of a Kernel) host_program_name_prefix = "_lpy_host_" host_program_name_suffix = "" @@ -522,7 +522,7 @@ def __init__( pointer_size_nbytes: Optional[int] = None ) -> None: # This ensures the dtype registry is populated. - import pyopencl.tools # noqa + import pyopencl.tools super().__init__( atomics_flavor=atomics_flavor, @@ -553,10 +553,8 @@ def device(self): return None # NB: Not including 'device', as that is handled specially here. - hash_fields = OpenCLTarget.hash_fields + ( - "pyopencl_module_name",) - comparison_fields = OpenCLTarget.comparison_fields + ( - "pyopencl_module_name",) + hash_fields = (*OpenCLTarget.hash_fields, "pyopencl_module_name") + comparison_fields = (*OpenCLTarget.comparison_fields, "pyopencl_module_name") def get_host_ast_builder(self): return PyOpenCLPythonASTBuilder(self) @@ -774,9 +772,8 @@ def get_function_definition( kai = get_kernel_arg_info(codegen_state.kernel) args = ( - ["_lpy_cl_kernels", "queue"] - + list(kai.passed_arg_names) - + ["wait_for=None", "allocator=None"]) + ["_lpy_cl_kernels", "queue", *kai.passed_arg_names, + "wait_for=None", "allocator=None"]) from genpy import For, Function, Line, Return, Statement as S, Suite return Function( @@ -920,7 +917,7 @@ def get_kernel_call( "_lpy_cl.mem_flags.READ_ONLY " "| _lpy_cl.mem_flags.COPY_HOST_PTR, " "hostbuf=" - f"_lpy_pack({repr(''.join(struct_pack_types))}, " + f"_lpy_pack({''.join(struct_pack_types)!r}, " f"{', '.join(struct_pack_args)}))"), Line(f"_lpy_knl.set_arg({cl_arg_count}, _lpy_overflow_args_buf)") ]) @@ -1096,7 +1093,7 @@ def get_function_definition( if not result: return fbody else: - return Collection(result+[Line(), fbody]) + return Collection([*result, Line(), fbody]) def get_function_declaration( self, codegen_state: CodeGenerationState, @@ -1195,9 +1192,7 @@ def known_callables(self): return callables def preamble_generators(self): - return ([ - pyopencl_preamble_generator, - ] + super().preamble_generators()) + return ([pyopencl_preamble_generator, *super().preamble_generators()]) # }}} diff --git a/loopy/target/pyopencl_execution.py b/loopy/target/pyopencl_execution.py index 248f5f2e..02781a8d 100644 --- a/loopy/target/pyopencl_execution.py +++ b/loopy/target/pyopencl_execution.py @@ -201,9 +201,8 @@ def generate_invocation(self, gen: CodeGenerator, kernel: LoopKernel, gen("") - arg_list = (["_lpy_cl_kernels", "queue"] - + list(args) - + ["wait_for=wait_for", "allocator=allocator"]) + arg_list = (["_lpy_cl_kernels", "queue", *args, + "wait_for=wait_for", "allocator=allocator"]) gen(f"_lpy_evt = {host_program_name}({', '.join(arg_list)})") if kernel.options.cl_exec_manage_array_events: diff --git a/loopy/target/python.py b/loopy/target/python.py index 3a8747f3..a419e6e3 100644 --- a/loopy/target/python.py +++ b/loopy/target/python.py @@ -33,7 +33,7 @@ from loopy.codegen import CodeGenerationState from loopy.codegen.result import CodeGenerationResult -from loopy.diagnostic import LoopyError # noqa +from loopy.diagnostic import LoopyError from loopy.kernel.data import ValueArg from loopy.target import ASTBuilderBase from loopy.type_inference import TypeReader @@ -161,9 +161,7 @@ def known_callables(self): def preamble_generators(self): return ( - super().preamble_generators() + [ - _base_python_preamble_generator - ]) + [*super().preamble_generators(), _base_python_preamble_generator]) # {{{ code generation guts diff --git a/loopy/tools.py b/loopy/tools.py index bb4904bf..ff66e36b 100644 --- a/loopy/tools.py +++ b/loopy/tools.py @@ -136,8 +136,8 @@ def hash_key(self): kb = LoopyKeyBuilder() # Build the key. For faster hashing, avoid hashing field names. key = ( - (self.class_.__name__.encode("utf-8"),) + - tuple(self.field_dict[k] for k in sorted(self.field_dict.keys()))) + (self.class_.__name__.encode("utf-8"), + *(self.field_dict[k] for k in sorted(self.field_dict.keys())))) return kb(key) @@ -242,25 +242,14 @@ def build_ispc_shared_lib( from subprocess import check_call - ispc_cmd = ([ispc_bin, - "--pic", - "-o", "ispc.o"] - + ispc_options - + list(ispc_source_names)) + ispc_cmd = ([ispc_bin, "--pic", "-o", "ispc.o", *ispc_options, *ispc_source_names]) if not quiet: print(" ".join(ispc_cmd)) check_call(ispc_cmd, cwd=cwd) - cxx_cmd = ([ - cxx_bin, - "-shared", "-Wl,--export-dynamic", - "-fPIC", - "-oshared.so", - "ispc.o", - ] - + cxx_options - + list(cxx_source_names)) + cxx_cmd = ([cxx_bin, "-shared", "-Wl,--export-dynamic", "-fPIC", "-oshared.so", + "ispc.o", *cxx_options, *cxx_source_names]) check_call(cxx_cmd, cwd=cwd) @@ -535,7 +524,7 @@ class Optional: The value, if present. """ - __slots__ = ("has_value", "_value") + __slots__ = ("_value", "has_value") def __init__(self, value=_no_value): self.has_value = value is not _no_value @@ -828,7 +817,7 @@ def t_unit_to_python(t_unit, var_name="t_unit", "from pymbolic.primitives import *", "import immutables", ]) - body_str = "\n".join(knl_python_code_srcs + ["\n", merge_stmt]) + body_str = "\n".join([*knl_python_code_srcs, "\n", merge_stmt]) python_code = "\n".join([preamble_str, "\n", body_str]) assert _is_generated_t_unit_the_same(python_code, var_name, t_unit) diff --git a/loopy/transform/add_barrier.py b/loopy/transform/add_barrier.py index 7ab5e376..e4112065 100644 --- a/loopy/transform/add_barrier.py +++ b/loopy/transform/add_barrier.py @@ -89,7 +89,7 @@ def add_barrier(kernel, insn_before="", insn_after="", id_based_on=None, synchronization_kind=synchronization_kind, mem_kind=mem_kind) - new_kernel = kernel.copy(instructions=kernel.instructions + [barrier_to_add]) + new_kernel = kernel.copy(instructions=[*kernel.instructions, barrier_to_add]) if insn_after is not None: new_kernel = add_dependency(new_kernel, insn_match=insn_after, diff --git a/loopy/transform/batch.py b/loopy/transform/batch.py index 04c5ea38..857f7d48 100644 --- a/loopy/transform/batch.py +++ b/loopy/transform/batch.py @@ -83,7 +83,7 @@ def map_subscript(self, expr, expn_state): if not isinstance(idx, tuple): idx = (idx,) - return type(expr)(expr.aggregate, (self.batch_iname_expr,) + idx) + return type(expr)(expr.aggregate, (self.batch_iname_expr, *idx)) def map_variable(self, expr, expn_state): if not self.needs_batch_subscript(expr.name): @@ -98,7 +98,7 @@ def _add_unique_dim_name(name, dim_names): from pytools import UniqueNameGenerator ng = UniqueNameGenerator(set(dim_names)) - return (ng(name),) + tuple(dim_names) + return (ng(name), *tuple(dim_names)) @for_each_kernel @@ -143,7 +143,7 @@ def to_batched(kernel, nbatches, batch_varying_args, batch_iname_prefix="ibatch" nbatches_expr = nbatches batch_domain = isl.BasicSet(batch_dom_str) - new_domains = [batch_domain] + kernel.domains + new_domains = [batch_domain, *kernel.domains] for arg in kernel.args: if arg.name in batch_varying_args: @@ -152,7 +152,7 @@ def to_batched(kernel, nbatches, batch_varying_args, batch_iname_prefix="ibatch" dim_tags="c") else: arg = arg.copy( - shape=(nbatches_expr,) + arg.shape, + shape=(nbatches_expr, *arg.shape), dim_tags=("c",) * (len(arg.shape) + 1), dim_names=_add_unique_dim_name("ibatch", arg.dim_names)) @@ -168,7 +168,7 @@ def to_batched(kernel, nbatches, batch_varying_args, batch_iname_prefix="ibatch" for temp in kernel.temporary_variables.values(): if temp_needs_batching_if_not_sequential(temp, batch_varying_args): new_temps[temp.name] = temp.copy( - shape=(nbatches_expr,) + temp.shape, + shape=(nbatches_expr, *temp.shape), dim_tags=("c",) * (len(temp.shape) + 1), dim_names=_add_unique_dim_name("ibatch", temp.dim_names)) else: diff --git a/loopy/transform/data.py b/loopy/transform/data.py index 2e19eea7..b4fc190d 100644 --- a/loopy/transform/data.py +++ b/loopy/transform/data.py @@ -124,7 +124,7 @@ def _process_footprint_subscripts(kernel, rule_name, sweep_inames, kernel = _add_kernel_axis(kernel, axis_name, 0, arg.shape[axis_nr], frozenset(sweep_inames) | fsub_dependencies) - sweep_inames = sweep_inames + [axis_name] + sweep_inames = [*sweep_inames, axis_name] inames_to_be_removed.append(axis_name) new_fsub.append(Variable(axis_name)) diff --git a/loopy/transform/iname.py b/loopy/transform/iname.py index 79515409..2dbba93e 100644 --- a/loopy/transform/iname.py +++ b/loopy/transform/iname.py @@ -296,16 +296,16 @@ def _split_iname_backend(kernel, iname_to_split, new_prio = () for prio_iname in prio: if prio_iname == iname_to_split: - new_prio = new_prio + (outer_iname, inner_iname) + new_prio = (*new_prio, outer_iname, inner_iname) else: - new_prio = new_prio + (prio_iname,) + new_prio = (*new_prio, prio_iname) new_priorities.append(new_prio) kernel = kernel.copy( domains=new_domains, iname_slab_increments=iname_slab_increments, instructions=new_insns, - applied_iname_rewrites=kernel.applied_iname_rewrites+(subst_map,), + applied_iname_rewrites=(*kernel.applied_iname_rewrites, subst_map), loop_priority=frozenset(new_priorities)) rule_mapping_context = SubstitutionRuleMappingContext( @@ -630,7 +630,7 @@ def subst_within_inames(fid): .copy( instructions=new_insns, domains=domch.get_domains_with(new_domain), - applied_iname_rewrites=kernel.applied_iname_rewrites + (subst_dict,) + applied_iname_rewrites=(*kernel.applied_iname_rewrites, subst_dict) )) from loopy.match import parse_stack_match @@ -1051,7 +1051,7 @@ def get_iname_duplication_options(kernel): if isinstance(kernel, TranslationUnit): if len([clbl for clbl in kernel.callables_table.values() if isinstance(clbl, CallableKernel)]) == 1: - kernel = kernel[list(kernel.entrypoints)[0]] + kernel = kernel[next(iter(kernel.entrypoints))] assert isinstance(kernel, LoopKernel) @@ -1096,7 +1096,7 @@ def has_schedulable_iname_nesting(kernel): if isinstance(kernel, TranslationUnit): if len([clbl for clbl in kernel.callables_table.values() if isinstance(clbl, CallableKernel)]) == 1: - kernel = kernel[list(kernel.entrypoints)[0]] + kernel = kernel[next(iter(kernel.entrypoints))] return not bool(next(get_iname_duplication_options(kernel), False)) # }}} @@ -1398,7 +1398,7 @@ def parse_equation(eqn): rule_mapping_context.finish_kernel( old_to_new.map_kernel(kernel)) .copy( - applied_iname_rewrites=kernel.applied_iname_rewrites + (subst_dict,) + applied_iname_rewrites=(*kernel.applied_iname_rewrites, subst_dict) )) # }}} @@ -2082,7 +2082,7 @@ def map_domain(kernel, transform_map): substitutions[iname] = subst_from_map var_substitutions[var(iname)] = subst_from_map - applied_iname_rewrites = applied_iname_rewrites + (var_substitutions,) + applied_iname_rewrites = (*applied_iname_rewrites, var_substitutions) del var_substitutions # }}} diff --git a/loopy/transform/pack_and_unpack_args.py b/loopy/transform/pack_and_unpack_args.py index ae5339b5..ca897e00 100644 --- a/loopy/transform/pack_and_unpack_args.py +++ b/loopy/transform/pack_and_unpack_args.py @@ -266,7 +266,7 @@ def pack_and_unpack_args_for_call_for_single_kernel(kernel, in_knl_callable.arg_id_to_descr[arg_id].shape): iname_set = iname_set & make_slab(space, iname.name, 0, axis_length) - new_domains = new_domains + [iname_set] + new_domains = [*new_domains, iname_set] # }}} @@ -290,8 +290,8 @@ def pack_and_unpack_args_for_call_for_single_kernel(kernel, new_ilp_inames), expression=new_call_insn.expression.function(*new_params), assignees=new_assignees) - old_insn_to_new_insns[insn.id] = (packing_insns + [new_call_insn] + - unpacking_insns) + old_insn_to_new_insns[insn.id] = ([ + *packing_insns, new_call_insn, *unpacking_insns]) if old_insn_to_new_insns: new_instructions = [] diff --git a/loopy/transform/precompute.py b/loopy/transform/precompute.py index 147b6265..831d0c36 100644 --- a/loopy/transform/precompute.py +++ b/loopy/transform/precompute.py @@ -388,11 +388,11 @@ def precompute_for_single_kernel( precompute_outer_inames: Optional[FrozenSet[str]] = None, storage_axis_to_tag=None, - default_tag: Union[None, Tag, str] = None, + default_tag: Union[Tag, str, None] = None, dtype: Optional[ToLoopyTypeConvertible] = None, fetch_bounding_box: bool = False, - temporary_address_space: Union[AddressSpace, None, Type[auto]] = None, + temporary_address_space: Union[AddressSpace, Type[auto], None] = None, compute_insn_id: Optional[str] = None, _enable_mirgecom_workaround: bool = False, ) -> LoopKernel: @@ -1028,7 +1028,7 @@ def add_assumptions(d): and insn.within_inames & prior_storage_axis_names): insn = (insn .with_transformed_expressions( - lambda expr: expr_subst_map(expr, kernel, insn)) # noqa: B023,E501 + lambda expr: expr_subst_map(expr, kernel, insn)) # noqa: B023 .copy(within_inames=frozenset( new_iname for iname in insn.within_inames diff --git a/loopy/transform/realize_reduction.py b/loopy/transform/realize_reduction.py index e981ad4b..f42a8ce1 100644 --- a/loopy/transform/realize_reduction.py +++ b/loopy/transform/realize_reduction.py @@ -1124,7 +1124,7 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, red_realize_ctx=red_realize_ctx, name_based_on="acc_"+red_iname, nvars=nresults, - shape=outer_local_iname_sizes + (size,), + shape=(*outer_local_iname_sizes, size), dtypes=reduction_dtypes, address_space=AddressSpace.LOCAL) @@ -1151,7 +1151,7 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, init_insn = make_assignment( id=init_id, assignees=tuple( - acc_var[outer_local_iname_vars + (var(base_exec_iname),)] + acc_var[(*outer_local_iname_vars, var(base_exec_iname))] for acc_var in acc_vars), expression=neutral, within_inames=( @@ -1234,7 +1234,7 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, transfer_insn = make_assignment( id=transfer_id, assignees=tuple( - acc_var[outer_local_iname_vars + (var(red_iname),)] + acc_var[(*outer_local_iname_vars, var(red_iname))] for acc_var in acc_vars), expression=expression, **transfer_red_realize_ctx.get_insn_kwargs()) @@ -1269,12 +1269,11 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, arg_dtypes, _strip_if_scalar(acc_vars, tuple( acc_var[ - outer_local_iname_vars + (var(stage_exec_iname),)] + (*outer_local_iname_vars, var(stage_exec_iname))] for acc_var in acc_vars)), _strip_if_scalar(acc_vars, tuple( acc_var[ - outer_local_iname_vars + ( - var(stage_exec_iname) + new_size,)] + (*outer_local_iname_vars, var(stage_exec_iname) + new_size)] for acc_var in acc_vars)), red_realize_ctx.boxed_callables_table[0], orig_kernel.target) @@ -1282,7 +1281,7 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, stage_insn = make_assignment( id=stage_id, assignees=tuple( - acc_var[outer_local_iname_vars + (var(stage_exec_iname),)] + acc_var[(*outer_local_iname_vars, var(stage_exec_iname))] for acc_var in acc_vars), expression=expression, within_inames=( @@ -1307,9 +1306,9 @@ def map_reduction_local(red_realize_ctx, expr, nresults, arg_dtypes, if nresults == 1: assert len(acc_vars) == 1 - return acc_vars[0][outer_local_iname_vars + (0,)] + return acc_vars[0][(*outer_local_iname_vars, 0)] else: - return [acc_var[outer_local_iname_vars + (0,)] for acc_var in + return [acc_var[(*outer_local_iname_vars, 0)] for acc_var in acc_vars] # }}} @@ -1419,7 +1418,7 @@ def map_scan_seq(red_realize_ctx, expr, nresults, arg_dtypes, assignees=acc_vars, within_inames=( red_realize_ctx.surrounding_within_inames - - frozenset((scan_param.sweep_iname,) + expr.inames)), + - frozenset((scan_param.sweep_iname, *expr.inames))), within_inames_is_final=True, depends_on=init_insn_depends_on, expression=expression, @@ -1558,7 +1557,7 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, red_realize_ctx=red_realize_ctx, name_based_on="acc_"+scan_param.scan_iname, nvars=nresults, - shape=outer_local_iname_sizes + (scan_size,), + shape=(*outer_local_iname_sizes, scan_size), dtypes=reduction_dtypes, address_space=AddressSpace.LOCAL) @@ -1579,7 +1578,7 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, init_insn = make_assignment( id=init_id, assignees=tuple( - acc_var[outer_local_iname_vars + (var(base_exec_iname),)] + acc_var[(*outer_local_iname_vars, var(base_exec_iname))] for acc_var in acc_vars), expression=neutral, within_inames=base_iname_deps | frozenset([base_exec_iname]), @@ -1640,8 +1639,10 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, f"{red_realize_ctx.id_prefix}_{scan_param.scan_iname}_transfer") transfer_insn = make_assignment( id=transfer_id, - assignees=(acc_var[outer_local_iname_vars - + (var(scan_param.sweep_iname) - sweep_lower_bound_expr,)],), + assignees=(acc_var[( + *outer_local_iname_vars, + var(scan_param.sweep_iname) - sweep_lower_bound_expr) + ],), expression=pre_scan_result_i, within_inames=( red_realize_ctx.surrounding_within_inames @@ -1684,8 +1685,8 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, assignees=(read_var,), expression=( acc_var[ - outer_local_iname_vars - + (var(stage_exec_iname) - cur_size,)]), + (*outer_local_iname_vars, + var(stage_exec_iname) - cur_size)]), within_inames=( base_iname_deps | frozenset([stage_exec_iname])), within_inames_is_final=True, @@ -1713,7 +1714,7 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, _strip_if_scalar(acc_vars, read_vars), _strip_if_scalar(acc_vars, tuple( acc_var[ - outer_local_iname_vars + (var(stage_exec_iname),)] + (*outer_local_iname_vars, var(stage_exec_iname))] for acc_var in acc_vars)), red_realize_ctx.boxed_callables_table[0], orig_kernel.target) @@ -1721,7 +1722,7 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, write_stage_insn = make_assignment( id=write_stage_id, assignees=tuple( - acc_var[outer_local_iname_vars + (var(stage_exec_iname),)] + acc_var[(*outer_local_iname_vars, var(stage_exec_iname))] for acc_var in acc_vars), expression=expression, within_inames=( @@ -1744,9 +1745,9 @@ def map_scan_local(red_realize_ctx, expr, nresults, arg_dtypes, if nresults == 1: assert len(acc_vars) == 1 - return acc_vars[0][outer_local_iname_vars + (output_idx,)] + return acc_vars[0][(*outer_local_iname_vars, output_idx)] else: - return [acc_var[outer_local_iname_vars + (output_idx,)] + return [acc_var[(*outer_local_iname_vars, output_idx)] for acc_var in acc_vars] # }}} diff --git a/loopy/transform/save.py b/loopy/transform/save.py index bd25dec3..2b874f67 100644 --- a/loopy/transform/save.py +++ b/loopy/transform/save.py @@ -77,7 +77,7 @@ def get_successor_relation(self): for idx, (item, next_item) in enumerate(zip( reversed(self.schedule), - reversed(self.schedule + [None]))): + reversed([*self.schedule, None]))): sched_idx = len(self.schedule) - idx - 1 # Look at next_item @@ -760,7 +760,7 @@ def save_and_reload_temporaries(program, entrypoint=None): if entrypoint is None: if len(program.entrypoints) != 1: raise LoopyError("Missing argument 'entrypoint'.") - entrypoint = list(program.entrypoints)[0] + entrypoint = next(iter(program.entrypoints)) knl = program[entrypoint] diff --git a/loopy/transform/subst.py b/loopy/transform/subst.py index 422d2256..9c3bafda 100644 --- a/loopy/transform/subst.py +++ b/loopy/transform/subst.py @@ -36,7 +36,7 @@ class ExprDescriptor(ImmutableRecord): - __slots__ = ["insn", "expr", "unif_var_dict"] + __slots__ = ["expr", "insn", "unif_var_dict"] # {{{ extract_subst diff --git a/loopy/translation_unit.py b/loopy/translation_unit.py index 4afdfcef..0f740dac 100644 --- a/loopy/translation_unit.py +++ b/loopy/translation_unit.py @@ -45,7 +45,6 @@ from pymbolic.primitives import Call, Variable from loopy.diagnostic import DirectCallUncachedWarning, LoopyError -from loopy.kernel import LoopKernel from loopy.kernel.function_interface import ( CallableKernel, InKernelCallable, @@ -61,6 +60,7 @@ if TYPE_CHECKING: + from loopy.kernel import LoopKernel from loopy.target.execution import ExecutorBase @@ -336,6 +336,7 @@ def default_entrypoint(self) -> LoopKernel: ep_name, = self.entrypoints entrypoint = self[ep_name] + from loopy import LoopKernel if not isinstance(entrypoint, LoopKernel): raise ValueError("default entrypoint is not a kernel") @@ -749,7 +750,7 @@ def __getitem__(self, name): # }}} -TUnitOrKernelT = TypeVar("TUnitOrKernelT", LoopKernel, TranslationUnit) +TUnitOrKernelT = TypeVar("TUnitOrKernelT", "LoopKernel", TranslationUnit) # {{{ helper functions @@ -778,6 +779,7 @@ def _collective_check( *args: P.args, **kwargs: P.kwargs ) -> None: + from loopy import LoopKernel if isinstance(t_unit_or_kernel, TranslationUnit): for clbl in t_unit_or_kernel.callables_table.values(): if isinstance(clbl, CallableKernel): @@ -807,6 +809,7 @@ def _collective_transform( *args: P.args, **kwargs: P.kwargs ) -> TUnitOrKernelT: + from loopy import LoopKernel if isinstance(t_unit_or_kernel, TranslationUnit): t_unit = t_unit_or_kernel new_callables = {} @@ -886,7 +889,7 @@ def resolve_callables(t_unit: TranslationUnit) -> TranslationUnit: # get loopy specific callables known_callables.update(get_loopy_callables()) - callables_table = {} + callables_table: dict[FunctionIdT, InKernelCallable] = {} # callables: name of the calls seen in the program callables = {name for name, clbl in t_unit.callables_table.items() diff --git a/loopy/types.py b/loopy/types.py index b43026bd..f784799e 100644 --- a/loopy/types.py +++ b/loopy/types.py @@ -202,13 +202,13 @@ def __eq__(self, other: object) -> bool: # }}} -ToLoopyTypeConvertible: TypeAlias = Union[Type[auto], None, np.dtype, LoopyType] +ToLoopyTypeConvertible: TypeAlias = Union[Type[auto], np.dtype, LoopyType, None] def to_loopy_type(dtype: ToLoopyTypeConvertible, allow_auto: bool = False, allow_none: bool = False, for_atomic: bool = False - ) -> Union[Type[auto], None, LoopyType]: + ) -> Union[Type[auto], LoopyType, None]: if dtype is None: if allow_none: return None diff --git a/proto-tests/test_fem_assembly.py b/proto-tests/test_fem_assembly.py index 9103c42c..b52ec460 100644 --- a/proto-tests/test_fem_assembly.py +++ b/proto-tests/test_fem_assembly.py @@ -28,7 +28,7 @@ def test_laplacian_stiffness(ctx_factory): % dict(Nb=Nb, Nq=Nq, dim=dim), [ "dPsi(ij, dxi) := sum_float32(@ax_b," - " jacInv[ax_b,dxi,K,q] * DPsi[ax_b,ij,q])", # noqa + " jacInv[ax_b,dxi,K,q] * DPsi[ax_b,ij,q])", "A[K, i, j] = sum_float32(q, w[q] * jacDet[K,q] * (" "sum_float32(dx_axis, dPsi$one(i,dx_axis)*dPsi$two(j,dx_axis))))" ], @@ -77,7 +77,7 @@ def variant_fig33(knl): Ncloc = 16 # noqa knl = lp.split_iname(knl, "K", Ncloc, outer_iname="Ko", inner_iname="Kloc") - knl = lp.precompute(knl, "dPsi$one", np.float32, ["dx_axis"], default_tag=None) # noqa + knl = lp.precompute(knl, "dPsi$one", np.float32, ["dx_axis"], default_tag=None) knl = lp.tag_inames(knl, {"j": "ilp.seq"}) return knl, ["Ko", "Kloc"] diff --git a/proto-tests/test_sem.py b/proto-tests/test_sem.py index d87126cf..acb7d342 100644 --- a/proto-tests/test_sem.py +++ b/proto-tests/test_sem.py @@ -53,7 +53,7 @@ def test_laplacian(ctx_factory): [ lp.GlobalArg("u", dtype, shape=field_shape, order=order), lp.GlobalArg("lap", dtype, shape=field_shape, order=order), - lp.GlobalArg("G", dtype, shape=(6,)+field_shape, order=order), + lp.GlobalArg("G", dtype, shape=(6, *field_shape), order=order), lp.GlobalArg("D", dtype, shape=(n, n), order=order), lp.ValueArg("K", np.int32, approximately=1000), ], @@ -139,7 +139,7 @@ def test_laplacian_lmem(ctx_factory): [ lp.GlobalArg("u", dtype, shape=field_shape, order=order), lp.GlobalArg("lap", dtype, shape=field_shape, order=order), - lp.GlobalArg("G", dtype, shape=(6,)+field_shape, order=order), + lp.GlobalArg("G", dtype, shape=(6, *field_shape), order=order), lp.GlobalArg("D", dtype, shape=(n, n), order=order), lp.ValueArg("K", np.int32, approximately=1000), ], @@ -216,7 +216,7 @@ def test_laplacian_lmem_ilp(ctx_factory): [ lp.GlobalArg("u", dtype, shape=field_shape, order=order), lp.GlobalArg("lap", dtype, shape=field_shape, order=order), - lp.GlobalArg("G", dtype, shape=(6,)+field_shape, order=order), + lp.GlobalArg("G", dtype, shape=(6, *field_shape), order=order), lp.GlobalArg("D", dtype, shape=(n, n), order=order), lp.ValueArg("K", np.int32, approximately=1000), ], @@ -320,7 +320,7 @@ def test_advect(ctx_factory): lp.GlobalArg("Nu", dtype, shape=field_shape, order=order), lp.GlobalArg("Nv", dtype, shape=field_shape, order=order), lp.GlobalArg("Nw", dtype, shape=field_shape, order=order), - lp.GlobalArg("G", dtype, shape=(9,)+field_shape, order=order), + lp.GlobalArg("G", dtype, shape=(9, *field_shape), order=order), lp.GlobalArg("D", dtype, shape=(N, N), order=order), lp.ValueArg("K", np.int32, approximately=1000), ], @@ -359,7 +359,7 @@ def test_advect_dealias(ctx_factory): K_sym = var("K") # noqa field_shape = (N, N, N, K_sym) - interim_field_shape = (M, M, M, K_sym) # noqa + interim_field_shape = (M, M, M, K_sym) # 1. direction-by-direction similarity transform on u # 2. invert diagonal diff --git a/proto-tests/test_sem_tim.py b/proto-tests/test_sem_tim.py index 2949b39d..2d0c2fe7 100644 --- a/proto-tests/test_sem_tim.py +++ b/proto-tests/test_sem_tim.py @@ -53,7 +53,7 @@ def test_laplacian(ctx_factory): [ lp.ArrayArg("u", dtype, shape=field_shape, order=order), lp.ArrayArg("lap", dtype, shape=field_shape, order=order), - lp.ArrayArg("G", dtype, shape=(6,)+field_shape, order=order), + lp.ArrayArg("G", dtype, shape=(6, *field_shape), order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), lp.ValueArg("K", np.int32, approximately=1000), ], @@ -140,7 +140,7 @@ def test_laplacian_lmem(ctx_factory): [ lp.ArrayArg("u", dtype, shape=field_shape, order=order), lp.ArrayArg("lap", dtype, shape=field_shape, order=order), - lp.ArrayArg("G", dtype, shape=(6,)+field_shape, order=order), + lp.ArrayArg("G", dtype, shape=(6, *field_shape), order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), lp.ValueArg("K", np.int32, approximately=1000), ], @@ -230,7 +230,7 @@ def test_laplacian_lmem_ilp(ctx_factory): [ lp.ArrayArg("u", dtype, shape=field_shape, order=order), lp.ArrayArg("lap", dtype, shape=field_shape, order=order), - lp.ArrayArg("G", dtype, shape=(6,)+field_shape, order=order), + lp.ArrayArg("G", dtype, shape=(6, *field_shape), order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), lp.ValueArg("K", np.int32, approximately=1000), ], @@ -328,7 +328,7 @@ def test_advect(ctx_factory): lp.ArrayArg("Nu", dtype, shape=field_shape, order=order), lp.ArrayArg("Nv", dtype, shape=field_shape, order=order), lp.ArrayArg("Nw", dtype, shape=field_shape, order=order), - lp.ArrayArg("G", dtype, shape=(9,)+field_shape, order=order), + lp.ArrayArg("G", dtype, shape=(9, *field_shape), order=order), lp.ArrayArg("D", dtype, shape=(N, N), order=order), lp.ValueArg("K", np.int32, approximately=1000), ], @@ -367,7 +367,7 @@ def test_advect_dealias(ctx_factory): K_sym = var("K") # noqa field_shape = (N, N, N, K_sym) - interim_field_shape = (M, M, M, K_sym) # noqa + interim_field_shape = (M, M, M, K_sym) # 1. direction-by-direction similarity transform on u # 2. invert diagonal diff --git a/proto-tests/test_tim.py b/proto-tests/test_tim.py index eb8125cd..7a519d80 100644 --- a/proto-tests/test_tim.py +++ b/proto-tests/test_tim.py @@ -38,7 +38,7 @@ def test_tim2d(ctx_factory): [ lp.ArrayArg("u", dtype, shape=field_shape, order=order), lp.ArrayArg("lap", dtype, shape=field_shape, order=order), - lp.ArrayArg("G", dtype, shape=(3,)+field_shape, order=order), + lp.ArrayArg("G", dtype, shape=(3, *field_shape), order=order), # lp.ConstantArrayArg("D", dtype, shape=(n, n), order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), # lp.ImageArg("D", dtype, shape=(n, n)), @@ -46,7 +46,7 @@ def test_tim2d(ctx_factory): ], name="semlap2D", assumptions="K>=1") - unroll = 32 # noqa + unroll = 32 seq_knl = knl knl = lp.add_prefetch(knl, "D", ["m", "j", "i", "o"], default_tag="l.auto") @@ -101,13 +101,13 @@ def test_red2d(ctx_factory): [ lp.ArrayArg("u", dtype, shape=field_shape, order=order), lp.ArrayArg("lap", dtype, shape=field_shape, order=order), - lp.ArrayArg("G", dtype, shape=(3,)+field_shape, order=order), + lp.ArrayArg("G", dtype, shape=(3, *field_shape), order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), lp.ValueArg("K", np.int32, approximately=1000), ], name="semlap2D", assumptions="K>=1") - unroll = 32 # noqa + unroll = 32 seq_knl = knl knl = lp.add_prefetch(knl, "D", ["m", "j", "i", "o"], default_tag="l.auto") @@ -168,7 +168,7 @@ def test_tim3d(ctx_factory): lp.ArrayArg("u", dtype, shape=field_shape, order=order), lp.ArrayArg("lap", dtype, shape=field_shape, order=order), - lp.ArrayArg("G", dtype, shape=(6,)+field_shape, order=order), + lp.ArrayArg("G", dtype, shape=(6, *field_shape), order=order), # lp.ConstantArrayArg("D", dtype, shape=(n, n), order=order), lp.ArrayArg("D", dtype, shape=(n, n), order=order), # lp.ImageArg("D", dtype, shape=(n, n)), diff --git a/pyproject.toml b/pyproject.toml index c23c2973..e07302dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -99,9 +99,7 @@ extend-select = [ # TODO # "UP", # pyupgrade - # "RUF", # ruff - - "RUF022", # __all__ isn't sorted + "RUF", # ruff ] extend-ignore = [ "C90", # McCabe complexity diff --git a/test/test_apps.py b/test/test_apps.py index c4cffaee..ce8b9701 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -27,7 +27,7 @@ import pytest import pyopencl as cl -import pyopencl.clmath # noqa +import pyopencl.clmath import pyopencl.clrandom # noqa import loopy as lp diff --git a/test/test_callables.py b/test/test_callables.py index 44a94e43..d7771d20 100644 --- a/test/test_callables.py +++ b/test/test_callables.py @@ -26,7 +26,7 @@ import pytest import pyopencl as cl -import pyopencl.clrandom # noqa: F401 +import pyopencl.clrandom from pyopencl.tools import ( # noqa: F401 pytest_generate_tests_for_pyopencl as pytest_generate_tests, ) diff --git a/test/test_dg.py b/test/test_dg.py index bc134d9c..04104620 100644 --- a/test/test_dg.py +++ b/test/test_dg.py @@ -25,7 +25,7 @@ import numpy as np import pyopencl as cl -import pyopencl.array # noqa +import pyopencl.array from pyopencl.tools import ( # noqa pytest_generate_tests_for_pyopencl as pytest_generate_tests, ) diff --git a/test/test_diff.py b/test/test_diff.py index 626ddb70..5b7d0bbc 100644 --- a/test/test_diff.py +++ b/test/test_diff.py @@ -23,11 +23,11 @@ import logging import sys -import numpy as np # noqa +import numpy as np import numpy.linalg as la import pyopencl as cl -import pyopencl.clrandom # noqa +import pyopencl.clrandom import loopy as lp diff --git a/test/test_domain.py b/test/test_domain.py index c422e131..843bcf31 100644 --- a/test/test_domain.py +++ b/test/test_domain.py @@ -24,11 +24,11 @@ import sys import numpy as np -import pytest # noqa +import pytest import pyopencl as cl -import pyopencl.clmath # noqa -import pyopencl.clrandom # noqa +import pyopencl.clmath +import pyopencl.clrandom import loopy as lp diff --git a/test/test_expression.py b/test/test_expression.py index 1b973e9a..b4b856e2 100644 --- a/test/test_expression.py +++ b/test/test_expression.py @@ -27,8 +27,8 @@ import pytest import pyopencl as cl -import pyopencl.clmath # noqa -import pyopencl.clrandom # noqa +import pyopencl.clmath +import pyopencl.clrandom from pymbolic.mapper.evaluator import EvaluationMapper import loopy as lp diff --git a/test/test_fortran.py b/test/test_fortran.py index 8f1291bb..aa7c241e 100644 --- a/test/test_fortran.py +++ b/test/test_fortran.py @@ -28,7 +28,7 @@ import pytest import pyopencl as cl -import pyopencl.clrandom # noqa +import pyopencl.clrandom import loopy as lp @@ -136,7 +136,7 @@ def test_assign_single_precision_scalar(ctx_factory): t_unit = lp.parse_fortran(fortran_src) import re - assert re.search("1.1000000[0-9]*f", lp.generate_code_v2(t_unit).device_code()) + assert re.search(r"1.1000000[0-9]*f", lp.generate_code_v2(t_unit).device_code()) a_dev = cl.array.empty(queue, 1, dtype=np.float64, order="F") t_unit(queue, a=a_dev) diff --git a/test/test_isl.py b/test/test_isl.py index fc1312f7..d61031df 100644 --- a/test/test_isl.py +++ b/test/test_isl.py @@ -26,7 +26,7 @@ def test_aff_to_expr(): s = isl.Space.create_from_names(isl.Context(), ["a", "b"]) zero = isl.Aff.zero_on_domain(isl.LocalSpace.from_space(s)) - one = zero.set_constant_val(1) # noqa + one = zero.set_constant_val(1) a = zero.set_coefficient_val(isl.dim_type.in_, 0, 1) b = zero.set_coefficient_val(isl.dim_type.in_, 1, 1) diff --git a/test/test_loopy.py b/test/test_loopy.py index bfa60732..319dd5d0 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -27,9 +27,9 @@ import pytest import pyopencl as cl -import pyopencl.array # noqa -import pyopencl.clmath # noqa -import pyopencl.clrandom # noqa +import pyopencl.array +import pyopencl.clmath +import pyopencl.clrandom import loopy as lp @@ -3271,7 +3271,7 @@ def test_sep_array_ordering(ctx_factory): """ x[k, i] = k """, - [lp.GlobalArg("x", shape=("noutputs", "m"), dim_tags="sep,C")] + [...], + [lp.GlobalArg("x", shape=("noutputs", "m"), dim_tags="sep,C"), ...], fixed_parameters=dict(noutputs=n), ) knl = lp.tag_inames(knl, "k:unr") diff --git a/test/test_reduction.py b/test/test_reduction.py index 0ca1a265..b8b32fb0 100644 --- a/test/test_reduction.py +++ b/test/test_reduction.py @@ -27,8 +27,8 @@ import pytest import pyopencl as cl -import pyopencl.clmath # noqa -import pyopencl.clrandom # noqa +import pyopencl.clmath +import pyopencl.clrandom import pyopencl.version import loopy as lp diff --git a/test/test_scan.py b/test/test_scan.py index 986a30da..5cb7573e 100644 --- a/test/test_scan.py +++ b/test/test_scan.py @@ -30,8 +30,8 @@ import pytest import pyopencl as cl -import pyopencl.clmath # noqa -import pyopencl.clrandom # noqa +import pyopencl.clmath +import pyopencl.clrandom import loopy as lp @@ -402,7 +402,7 @@ def test_segmented_scan(ctx_factory, n, segment_boundaries_indices, iname_tag): arr = np.ones(n, dtype=np.float32) segment_boundaries = np.zeros(n, dtype=np.int32) - segment_boundaries[(segment_boundaries_indices,)] = 1 + segment_boundaries[segment_boundaries_indices,] = 1 knl = lp.make_kernel( "{[i,j]: 0<=i Date: Sun, 24 Nov 2024 15:34:11 -0600 Subject: [PATCH 16/24] Hack dataclass'd function_interface to avoid breaking Firedrake --- loopy/kernel/function_interface.py | 56 +++++++++++++++++++++++++++--- loopy/library/random123.py | 3 +- 2 files changed, 53 insertions(+), 6 deletions(-) diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 33dfd73f..40d9969b 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -23,8 +23,8 @@ THE SOFTWARE. """ from abc import ABC, abstractmethod -from collections.abc import Mapping, Sequence -from dataclasses import dataclass, replace +from collections.abc import Collection, Mapping, Sequence +from dataclasses import dataclass, fields, replace from typing import TYPE_CHECKING, Any, Callable, FrozenSet, TypeVar from warnings import warn @@ -304,7 +304,9 @@ def get_kw_pos_association(kernel): # {{{ template class -@dataclass(frozen=True, init=False) +# not frozen for Firedrake compatibility +# not eq to avoid having __hash__ set to None in subclasses +@dataclass(init=False, eq=False) class InKernelCallable(ABC): """ An abstract interface to define a callable encountered in a kernel. @@ -368,9 +370,51 @@ def __init__(self, def name(self) -> str: raise NotImplementedError() + # {{{ hackery to avoid breaking Firedrake + + def _all_attrs(self) -> Collection[str]: + dc_attrs = { + fld.name for fld in fields(self) + } + legacy_fields: Collection[str] = getattr(self, "fields", []) + return dc_attrs | set(legacy_fields) + def copy(self, **kwargs: Any) -> Self: + present_kwargs = { + name: getattr(self, name) + for name in self._all_attrs() + } + kwargs = { + **present_kwargs, + **kwargs, + } + return replace(self, **kwargs) + def update_persistent_hash(self, key_hash, key_builder) -> None: + for field_name in self._all_attrs(): + key_builder.rec(key_hash, getattr(self, field_name)) + + def __eq__(self, other: object): + if type(self) is not type(other): + return False + + for f in self._all_attrs(): + if getattr(self, f) != getattr(other, f): + return False + + return True + + def __hash__(self): + import hashlib + + from loopy.tools import LoopyKeyBuilder + key_hash = hashlib.sha256() + self.update_persistent_hash(key_hash, LoopyKeyBuilder()) + return hash(key_hash.digest()) + + # }}} + def with_types(self, arg_id_to_dtype, clbl_inf_ctx): """ :arg arg_id_to_type: a mapping from argument identifiers (integers for @@ -521,7 +565,8 @@ def is_type_specialized(self): # {{{ scalar callable -@dataclass(frozen=True, init=False) +# not frozen, not eq for Firedrake compatibility +@dataclass(init=False, eq=False) class ScalarCallable(InKernelCallable): """ An abstract interface to a scalar callable encountered in a kernel. @@ -699,7 +744,8 @@ def is_type_specialized(self): # {{{ callable kernel -@dataclass(frozen=True, init=False) +# not frozen, not eq for Firedrake compatibility +@dataclass(init=False, eq=False) class CallableKernel(InKernelCallable): """ Records information about a callee kernel. Also provides interface through diff --git a/loopy/library/random123.py b/loopy/library/random123.py index 329770e0..cde0b093 100644 --- a/loopy/library/random123.py +++ b/loopy/library/random123.py @@ -176,7 +176,8 @@ def full_name(self) -> str: # }}} -@dataclass(frozen=True, init=False) +# not frozen, not eq for Firedrake compatibility +@dataclass(init=False, eq=False) class Random123Callable(ScalarCallable): """ Records information about for the random123 functions. From d71475ec03abdc395beb0e8edcd253620b4b161e Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 27 Nov 2024 21:01:07 -0600 Subject: [PATCH 17/24] Annotation for mildly typed cgen --- loopy/target/c/__init__.py | 22 +++++++++++----------- loopy/target/cuda.py | 10 +++++----- loopy/target/ispc.py | 4 ++-- loopy/target/pyopencl.py | 15 ++++++++------- pyproject.toml | 1 - 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 98dbe35c..e4a9bef9 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -821,7 +821,7 @@ def get_function_definition( tv.initializer is not None): assert tv.read_only - decl = self.wrap_global_constant( + decl: Generable = self.wrap_global_constant( self.get_temporary_var_declarator(codegen_state, tv)) if tv.initializer is not None: @@ -850,12 +850,12 @@ def get_function_declaration( from cgen import FunctionDeclaration, Value - name = codegen_result.current_program(codegen_state).name + name_str = codegen_result.current_program(codegen_state).name if self.target.fortran_abi: - name += "_" + name_str += "_" if codegen_state.is_entrypoint: - name = Value("void", name) + name: Declarator = Value("void", name_str) # subkernel launches occur only as part of entrypoint kernels for now from loopy.schedule.tools import get_subkernel_arg_info @@ -863,7 +863,7 @@ def get_function_declaration( passed_names = skai.passed_names written_names = skai.written_names else: - name = Value("static void", name) + name = Value("static void", name_str) passed_names = [arg.name for arg in kernel.args] written_names = kernel.get_written_variables() @@ -892,11 +892,11 @@ def emit_temp_var_decl_for_tv_with_base_storage(self, assert isinstance(tv.address_space, AddressSpace) ecm = codegen_state.expression_to_code_mapper - cast_decl = POD(self, tv.dtype, "") - temp_var_decl = POD(self, tv.dtype, tv.name) + cast_decl: Declarator = POD(self, tv.dtype, "") + temp_var_decl: Declarator = POD(self, tv.dtype, tv.name) if tv._base_storage_access_may_be_aliasing: - ptrtype = _ConstPointer + ptrtype: type[Pointer] = _ConstPointer else: # The 'restrict' part of this is a complete lie--of course # all these temporaries are aliased. But we're promising to @@ -1018,7 +1018,7 @@ def wrap_global_constant(self, decl: Declarator) -> Declarator: def get_value_arg_declaraotor( self, name: str, dtype: LoopyType, is_written: bool) -> Declarator: - result = POD(self, dtype, name) + result: Declarator = POD(self, dtype, name) if not is_written: from cgen import Const @@ -1048,7 +1048,7 @@ def get_array_base_declarator(self, ary: ArrayBase) -> Declarator: def get_array_arg_declarator( self, arg: ArrayArg, is_written: bool) -> Declarator: from cgen import RestrictPointer - arg_decl = RestrictPointer( + arg_decl: Declarator = RestrictPointer( self.wrap_decl_for_address_space( self.get_array_base_declarator(arg), arg.address_space)) @@ -1070,7 +1070,7 @@ def get_temporary_arg_decl( from cgen import RestrictPointer assert temp_var.address_space is not auto - arg_decl = RestrictPointer( + arg_decl: Declarator = RestrictPointer( self.wrap_decl_for_address_space( self.get_array_base_declarator(temp_var), cast(AddressSpace, temp_var.address_space))) diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 22d663b5..339010ee 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -27,7 +27,7 @@ import numpy as np -from cgen import Const, Declarator, Generable +from cgen import Const, Declarator, Generable, Pointer from pymbolic import var from pytools import memoize_method @@ -448,7 +448,7 @@ def get_array_base_declarator(self, ary: ArrayBase) -> Declarator: def get_array_arg_declarator( self, arg: ArrayArg, is_written: bool) -> Declarator: from cgen.cuda import CudaRestrictPointer - arg_decl = CudaRestrictPointer( + arg_decl: Declarator = CudaRestrictPointer( self.get_array_base_declarator(arg)) if not is_written: @@ -477,11 +477,11 @@ def emit_temp_var_decl_for_tv_with_base_storage(self, assert tv.base_storage is not None ecm = codegen_state.expression_to_code_mapper - cast_decl = POD(self, tv.dtype, "") - temp_var_decl = POD(self, tv.dtype, tv.name) + cast_decl: Declarator = POD(self, tv.dtype, "") + temp_var_decl: Declarator = POD(self, tv.dtype, tv.name) if tv._base_storage_access_may_be_aliasing: - ptrtype = _ConstPointer + ptrtype: type[Pointer] = _ConstPointer else: # The 'restrict' part of this is a complete lie--of course # all these temporaries are aliased. But we're promising to diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index d88b99bb..cbc8b417 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -237,7 +237,7 @@ def get_function_declaration( for arg_name in passed_names] if codegen_state.is_generating_device_code: - result = ISPCTask( + result: Declarator = ISPCTask( FunctionDeclaration( Value("void", name), arg_decls)) @@ -323,7 +323,7 @@ def get_array_arg_declarator( self, arg: ArrayArg, is_written: bool) -> Declarator: # FIXME restrict? from cgen.ispc import ISPCUniform, ISPCUniformPointer - decl = ISPCUniform( + decl: Declarator = ISPCUniform( ISPCUniformPointer(self.get_array_base_declarator(arg))) if not is_written: diff --git a/loopy/target/pyopencl.py b/loopy/target/pyopencl.py index 18336056..a8dd15cc 100644 --- a/loopy/target/pyopencl.py +++ b/loopy/target/pyopencl.py @@ -37,6 +37,7 @@ Block, Collection, Const, + Declarator, FunctionBody, Generable, Initializer, @@ -1027,7 +1028,7 @@ def get_function_definition( self, codegen_state: CodeGenerationState, codegen_result: CodeGenerationResult, schedule_index: int, function_decl: Generable, function_body: Generable, - ) -> Tuple[Sequence[Tuple[str, str]], Generable]: + ) -> Generable: assert isinstance(function_body, Block) kernel = codegen_state.kernel assert kernel.linearization is not None @@ -1055,7 +1056,7 @@ def get_function_definition( tv.initializer is not None): assert tv.read_only - decl = self.wrap_global_constant( + decl: Generable = self.wrap_global_constant( self.get_temporary_var_declarator(codegen_state, tv)) if tv.initializer is not None: @@ -1109,14 +1110,14 @@ def get_function_declaration( from cgen import FunctionDeclaration, Struct, Value - name = codegen_result.current_program(codegen_state).name + name_str = codegen_result.current_program(codegen_state).name if self.target.fortran_abi: - name += "_" + name_str += "_" from loopy.target.c import FunctionDeclarationWrapper if codegen_state.is_entrypoint: - name = Value("void", name) + name = Value("void", name_str) # subkernel launches occur only as part of entrypoint kernels for now from loopy.schedule.tools import get_subkernel_arg_info @@ -1146,7 +1147,7 @@ def get_function_declaration( (f"declare-{arg_overflow_struct_name}", str(arg_overflow_struct)) ] if struct_overflow_arg_names else [] - arg_struct_args = [CLGlobal(Const(Pointer(Value( + arg_struct_args: list[Declarator] = [CLGlobal(Const(Pointer(Value( f"struct {arg_overflow_struct_name}", "_lpy_overflow_args"))))] else: @@ -1165,7 +1166,7 @@ def get_function_declaration( + arg_struct_args ))) else: - name = Value("static void", name) + name = Value("static void", name_str) passed_names = [arg.name for arg in kernel.args] written_names = kernel.get_written_variables() diff --git a/pyproject.toml b/pyproject.toml index e07302dc..b5fecda1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -157,7 +157,6 @@ exclude = [ module = [ "islpy.*", "pymbolic.*", - "cgen.*", "genpy.*", "pyopencl.*", "colorama.*", From 64b661d377e39636678fbf7fe88aa51af1b270f9 Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 27 Nov 2024 21:09:15 -0600 Subject: [PATCH 18/24] Fix a typo --- doc/ref_kernel.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/ref_kernel.rst b/doc/ref_kernel.rst index 2962c23b..02f8b148 100644 --- a/doc/ref_kernel.rst +++ b/doc/ref_kernel.rst @@ -25,7 +25,7 @@ consist of arithmetic operations and calls to functions. If the outermost operation of the RHS expression is a function call, the RHS value may be a tuple, and multiple (still scalar) arrays appear as LHS values. (This is the only sense in which tuple types are supported.) -Each statement is parametrized by zero or more loop variables ("inames"). +Each statement is parameterized by zero or more loop variables ("inames"). A statement is executed once for each integer point defined by the domain forest for the iname tuple given for that statement (:attr:`loopy.InstructionBase.within_inames`). Each execution of a From f113be03007d8b11b5ecf30473f04d4173b27c7c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 1 Dec 2024 14:23:43 -0600 Subject: [PATCH 19/24] Fix Firedrake tests --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 23498828..a208ce54 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -194,9 +194,9 @@ jobs: cd /home/firedrake/firedrake/src/firedrake # patch so exception messages get shown - curl -L https://gist.githubusercontent.com/inducer/17d7134ace215f0df1f3627eac4195c7/raw/63edfaf2ec8bf06987896569a4f24264df490e9e/firedrake-debug-patch.diff | patch -p1 + curl -L https://gist.githubusercontent.com/inducer/17d7134ace215f0df1f3627eac4195c7/raw/ec5470a7d8587b6e1f336f3ef1d0ece5e26f236a/firedrake-debug-patch.diff | patch -p1 - pytest --tb=native -rsxw --durations=10 -m 'not parallel' tests/multigrid/ + pytest --tb=native -rsxw --durations=10 -m 'not parallel' tests/firedrake/multigrid/ validate_cff: name: Validate CITATION.cff From 2b41e84469985918d7a84ac6e46da7f9cf82fe5a Mon Sep 17 00:00:00 2001 From: Matthias Diener Date: Mon, 2 Dec 2024 16:42:52 -0600 Subject: [PATCH 20/24] Tree: various speedups (#887) * Tree: various speedups - make dataclass non-frozen - use mutate() for cases where a Map is modified multiple times - remove asserts for cases that would fail immediately anyway * make frozen depend on __debug__, restore an assert * Improve depth() Co-authored-by: Alexandru Fikl * opt ancestors --------- Co-authored-by: Alexandru Fikl --- loopy/schedule/tree.py | 50 ++++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/loopy/schedule/tree.py b/loopy/schedule/tree.py index 327fb65c..cc580713 100644 --- a/loopy/schedule/tree.py +++ b/loopy/schedule/tree.py @@ -50,7 +50,9 @@ NodeT = TypeVar("NodeT", bound=Hashable) -@dataclass(frozen=True) +# Not frozen when optimizations are enabled because it is slower. +# Tree objects are immutable, and offer no way to mutate the tree. +@dataclass(frozen=__debug__) # type: ignore[literal-required] class Tree(Generic[NodeT]): """ An immutable tree containing nodes of type :class:`NodeT`. @@ -95,31 +97,23 @@ def ancestors(self, node: NodeT) -> tuple[NodeT, ...]: """ Returns a :class:`tuple` of nodes that are ancestors of *node*. """ - assert node in self - - if self.is_root(node): + parent = self.parent(node) + if parent is None: # => root return () - parent = self._child_to_parent[node] - assert parent is not None - return (parent, *self.ancestors(parent)) def parent(self, node: NodeT) -> NodeT | None: """ Returns the parent of *node*. """ - assert node in self - return self._child_to_parent[node] def children(self, node: NodeT) -> tuple[NodeT, ...]: """ Returns the children of *node*. """ - assert node in self - return self._parent_to_children[node] @memoize_method @@ -127,25 +121,18 @@ def depth(self, node: NodeT) -> int: """ Returns the depth of *node*, with the root having depth 0. """ - assert node in self - - if self.is_root(node): - # => None - return 0 - parent_of_node = self.parent(node) - assert parent_of_node is not None + if parent_of_node is None: + return 0 return 1 + self.depth(parent_of_node) def is_root(self, node: NodeT) -> bool: - assert node in self - + """Return *True* if *node* is the root of the tree.""" return self.parent(node) is None def is_leaf(self, node: NodeT) -> bool: - assert node in self - + """Return *True* if *node* has no children.""" return len(self.children(node)) == 0 def __contains__(self, node: NodeT) -> bool: @@ -162,9 +149,11 @@ def add_node(self, node: NodeT, parent: NodeT) -> Tree[NodeT]: siblings = self._parent_to_children[parent] - return Tree((self._parent_to_children - .set(parent, (*siblings, node)) - .set(node, ())), + _parent_to_children_mut = self._parent_to_children.mutate() + _parent_to_children_mut[parent] = (*siblings, node) + _parent_to_children_mut[node] = () + + return Tree(_parent_to_children_mut.finish(), self._child_to_parent.set(node, parent)) def replace_node(self, node: NodeT, new_node: NodeT) -> Tree[NodeT]: @@ -234,13 +223,12 @@ def move_node(self, node: NodeT, new_parent: NodeT | None) -> Tree[NodeT]: parents_new_children = tuple(frozenset(siblings) - frozenset([node])) new_parents_children = (*self.children(new_parent), node) - new_child_to_parent = self._child_to_parent.set(node, new_parent) - new_parent_to_children = (self._parent_to_children - .set(parent, parents_new_children) - .set(new_parent, new_parents_children)) + _parent_to_children_mut = self._parent_to_children.mutate() + _parent_to_children_mut[parent] = parents_new_children + _parent_to_children_mut[new_parent] = new_parents_children - return Tree(new_parent_to_children, - new_child_to_parent) + return Tree(_parent_to_children_mut.finish(), + self._child_to_parent.set(node, new_parent)) def __str__(self) -> str: """ From 76f4afc27f343bc7e95a11b9b04d30c574402bcd Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Sun, 24 Nov 2024 23:24:08 -0600 Subject: [PATCH 21/24] Revert "Hack dataclass'd function_interface to avoid breaking Firedrake" This reverts commit 1af452302afdc8558773b503f621c08859565539. --- loopy/kernel/function_interface.py | 56 +++--------------------------- loopy/library/random123.py | 3 +- 2 files changed, 6 insertions(+), 53 deletions(-) diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 40d9969b..33dfd73f 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -23,8 +23,8 @@ THE SOFTWARE. """ from abc import ABC, abstractmethod -from collections.abc import Collection, Mapping, Sequence -from dataclasses import dataclass, fields, replace +from collections.abc import Mapping, Sequence +from dataclasses import dataclass, replace from typing import TYPE_CHECKING, Any, Callable, FrozenSet, TypeVar from warnings import warn @@ -304,9 +304,7 @@ def get_kw_pos_association(kernel): # {{{ template class -# not frozen for Firedrake compatibility -# not eq to avoid having __hash__ set to None in subclasses -@dataclass(init=False, eq=False) +@dataclass(frozen=True, init=False) class InKernelCallable(ABC): """ An abstract interface to define a callable encountered in a kernel. @@ -370,51 +368,9 @@ def __init__(self, def name(self) -> str: raise NotImplementedError() - # {{{ hackery to avoid breaking Firedrake - - def _all_attrs(self) -> Collection[str]: - dc_attrs = { - fld.name for fld in fields(self) - } - legacy_fields: Collection[str] = getattr(self, "fields", []) - return dc_attrs | set(legacy_fields) - def copy(self, **kwargs: Any) -> Self: - present_kwargs = { - name: getattr(self, name) - for name in self._all_attrs() - } - kwargs = { - **present_kwargs, - **kwargs, - } - return replace(self, **kwargs) - def update_persistent_hash(self, key_hash, key_builder) -> None: - for field_name in self._all_attrs(): - key_builder.rec(key_hash, getattr(self, field_name)) - - def __eq__(self, other: object): - if type(self) is not type(other): - return False - - for f in self._all_attrs(): - if getattr(self, f) != getattr(other, f): - return False - - return True - - def __hash__(self): - import hashlib - - from loopy.tools import LoopyKeyBuilder - key_hash = hashlib.sha256() - self.update_persistent_hash(key_hash, LoopyKeyBuilder()) - return hash(key_hash.digest()) - - # }}} - def with_types(self, arg_id_to_dtype, clbl_inf_ctx): """ :arg arg_id_to_type: a mapping from argument identifiers (integers for @@ -565,8 +521,7 @@ def is_type_specialized(self): # {{{ scalar callable -# not frozen, not eq for Firedrake compatibility -@dataclass(init=False, eq=False) +@dataclass(frozen=True, init=False) class ScalarCallable(InKernelCallable): """ An abstract interface to a scalar callable encountered in a kernel. @@ -744,8 +699,7 @@ def is_type_specialized(self): # {{{ callable kernel -# not frozen, not eq for Firedrake compatibility -@dataclass(init=False, eq=False) +@dataclass(frozen=True, init=False) class CallableKernel(InKernelCallable): """ Records information about a callee kernel. Also provides interface through diff --git a/loopy/library/random123.py b/loopy/library/random123.py index cde0b093..329770e0 100644 --- a/loopy/library/random123.py +++ b/loopy/library/random123.py @@ -176,8 +176,7 @@ def full_name(self) -> str: # }}} -# not frozen, not eq for Firedrake compatibility -@dataclass(init=False, eq=False) +@dataclass(frozen=True, init=False) class Random123Callable(ScalarCallable): """ Records information about for the random123 functions. From 9144a1d2373a0e80e68550c3fd904757b57f589c Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 4 Dec 2024 21:02:46 +0530 Subject: [PATCH 22/24] Test loop prioties with v2 scheduler Co-authored-by: Connor Ward --- test/test_loopy.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/test/test_loopy.py b/test/test_loopy.py index 319dd5d0..d58fd270 100644 --- a/test/test_loopy.py +++ b/test/test_loopy.py @@ -3704,6 +3704,21 @@ def test_long_kernel(): lp.get_one_linearized_kernel(t_unit.default_entrypoint, t_unit.callables_table) +@pytest.mark.filterwarnings("error:.*:loopy.LoopyWarning") +def test_loop_imperfect_nest_priorities_in_v2_scheduler(): + # Reported by Connor Ward. See . + knl = lp.make_kernel( + "{ [i,j,k]: 0 <= i,j,k < 5}", + """ + x[i, j] = i + j + y[i, k] = i + k + """, + loop_priority=frozenset({("i", "j"), ("i", "k")}), + ) + + code = lp.generate_code_v2(knl) + + if __name__ == "__main__": if len(sys.argv) > 1: exec(sys.argv[1]) From 355ba532f8dd448303e2ede81279a5fdb862e4d6 Mon Sep 17 00:00:00 2001 From: Kaushik Kulkarni Date: Wed, 4 Dec 2024 21:06:23 +0530 Subject: [PATCH 23/24] Correct the check to see if loop priority is redundant --- loopy/schedule/tools.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/loopy/schedule/tools.py b/loopy/schedule/tools.py index 3858462b..f75e2c00 100644 --- a/loopy/schedule/tools.py +++ b/loopy/schedule/tools.py @@ -862,10 +862,12 @@ def _update_nesting_constraints( .ancestors(inner_iname_nest)) ancestors_of_outer_iname = (loop_nest_tree .ancestors(outer_iname_nest)) - if outer_iname in ancestors_of_inner_iname: + if any(outer_iname in ancestor + for ancestor in ancestors_of_inner_iname): # nesting constraint already satisfied => do nothing pass - elif inner_iname in ancestors_of_outer_iname: + elif any(inner_iname in ancestor + for ancestor in ancestors_of_outer_iname): cannot_satisfy_callback("Cannot satisfy constraint that" f" iname '{inner_iname}' must be" f" nested within '{outer_iname}''.") From 56936b1ea8ece80ab5d1f3483b861d9bc0bd072c Mon Sep 17 00:00:00 2001 From: Andreas Kloeckner Date: Wed, 4 Dec 2024 10:56:55 -0600 Subject: [PATCH 24/24] Enable F841 unused-variable lint, fix --- contrib/c-integer-semantics.py | 1 - loopy/kernel/creation.py | 2 +- loopy/kernel/function_interface.py | 6 ++-- loopy/kernel/tools.py | 2 +- loopy/library/reduction.py | 2 +- loopy/preprocess.py | 2 +- loopy/statistics.py | 2 +- loopy/symbolic.py | 6 ++-- loopy/target/c/__init__.py | 2 -- loopy/target/c/c_execution.py | 2 +- loopy/target/cuda.py | 2 +- loopy/target/ispc.py | 4 +-- loopy/target/opencl.py | 2 +- loopy/tools.py | 2 +- loopy/transform/callable.py | 2 +- loopy/type_inference.py | 2 +- proto-tests/test_sem.py | 1 - proto-tests/test_sem_tim.py | 1 - proto-tests/test_tim.py | 4 --- pyproject.toml | 1 - test/test_apps.py | 2 +- test/test_c_execution.py | 2 +- test/test_callables.py | 44 +++++++++++++++--------------- test/test_diff.py | 10 +++---- test/test_domain.py | 2 +- test/test_einsum.py | 10 +++---- test/test_expression.py | 22 +++++++-------- test/test_fusion.py | 4 +-- test/test_isl.py | 1 - test/test_loopy.py | 42 ++++++++++++++-------------- test/test_reduction.py | 16 +++++------ test/test_scan.py | 22 +++++++-------- test/test_target.py | 28 +++++++++---------- test/test_transform.py | 14 +++++----- 34 files changed, 128 insertions(+), 139 deletions(-) diff --git a/contrib/c-integer-semantics.py b/contrib/c-integer-semantics.py index 62a980f0..8b30415c 100644 --- a/contrib/c-integer-semantics.py +++ b/contrib/c-integer-semantics.py @@ -95,7 +95,6 @@ def main(): func.argtypes = [ctypes.c_longlong, ctypes.c_longlong] func.restype = ctypes.c_longlong - cdiv = int_exp.cdiv cmod = int_exp.cmod int_floor_div = int_exp.loopy_floor_div_int64 int_floor_div_pos_b = int_exp.loopy_floor_div_pos_b_int64 diff --git a/loopy/kernel/creation.py b/loopy/kernel/creation.py index 43c4a4ee..f8f85447 100644 --- a/loopy/kernel/creation.py +++ b/loopy/kernel/creation.py @@ -679,7 +679,7 @@ def _count_open_paren_symbols(s): for c in s: val = _PAREN_PAIRS.get(c) if val is not None: - increment, cls = val + increment, _cls = val result += increment return result diff --git a/loopy/kernel/function_interface.py b/loopy/kernel/function_interface.py index 33dfd73f..fa8a73c2 100644 --- a/loopy/kernel/function_interface.py +++ b/loopy/kernel/function_interface.py @@ -859,7 +859,7 @@ def with_added_arg(self, arg_dtype, arg_descr): *self.subkernel.args, ValueArg(var_name, arg_dtype, self.subkernel.target)]) - kw_to_pos, pos_to_kw = get_kw_pos_association(subknl) + kw_to_pos, _pos_to_kw = get_kw_pos_association(subknl) if self.arg_id_to_dtype is None: arg_id_to_dtype = {} @@ -887,7 +887,7 @@ def with_added_arg(self, arg_dtype, arg_descr): def with_packing_for_args(self): from loopy.kernel.data import AddressSpace - kw_to_pos, pos_to_kw = get_kw_pos_association(self.subkernel) + _kw_to_pos, pos_to_kw = get_kw_pos_association(self.subkernel) arg_id_to_descr = {} @@ -955,7 +955,7 @@ def emit_call_insn(self, insn, target, expression_to_code_mapper): parameters = list(parameters) par_dtypes = [self.arg_id_to_dtype[i] for i, _ in enumerate(parameters)] - kw_to_pos, pos_to_kw = get_kw_pos_association(self.subkernel) + _kw_to_pos, _pos_to_kw = get_kw_pos_association(self.subkernel) # insert the assignees at the required positions assignee_write_count = -1 diff --git a/loopy/kernel/tools.py b/loopy/kernel/tools.py index 9a14aedd..66740efc 100644 --- a/loopy/kernel/tools.py +++ b/loopy/kernel/tools.py @@ -100,7 +100,7 @@ def add_dtypes( def _add_dtypes_overdetermined(kernel, dtype_dict): - dtype_dict_remainder, new_args, new_temp_vars = _add_dtypes(kernel, dtype_dict) + _dtype_dict_remainder, new_args, new_temp_vars = _add_dtypes(kernel, dtype_dict) # do not throw error for unused args return kernel.copy(args=new_args, temporary_variables=new_temp_vars) diff --git a/loopy/library/reduction.py b/loopy/library/reduction.py index 445a0b86..7ec4f6f8 100644 --- a/loopy/library/reduction.py +++ b/loopy/library/reduction.py @@ -329,7 +329,7 @@ def neutral_element(self, scalar_dtype, segment_flag_dtype, from loopy.library.function import MakeTupleCallable from loopy.translation_unit import add_callable_to_table - scalar_neutral_element, calables_table = ( + scalar_neutral_element, _calables_table = ( self.inner_reduction.neutral_element( scalar_dtype, callables_table, target)) diff --git a/loopy/preprocess.py b/loopy/preprocess.py index 98026fda..254baefe 100644 --- a/loopy/preprocess.py +++ b/loopy/preprocess.py @@ -697,7 +697,7 @@ def _tuple_or_none(s): raise NotImplementedError() new_callable, clbl_inf_ctx = t_unit.callables_table[e].with_descrs( arg_id_to_descr, clbl_inf_ctx) - clbl_inf_ctx, new_name = clbl_inf_ctx.with_callable(e, new_callable, + clbl_inf_ctx, _new_name = clbl_inf_ctx.with_callable(e, new_callable, is_entrypoint=True) return clbl_inf_ctx.finish_program(t_unit) diff --git a/loopy/statistics.py b/loopy/statistics.py index 657ee9e2..63e59eb8 100755 --- a/loopy/statistics.py +++ b/loopy/statistics.py @@ -2238,7 +2238,7 @@ def gather_access_footprint_bytes(program, ignore_uncountable=False): ignore_uncountable=ignore_uncountable) for key, var_fp in fp.items(): - vname, direction = key + vname, _direction = key var_descr = kernel.get_var_descriptor(vname) bytes_transferred = ( diff --git a/loopy/symbolic.py b/loopy/symbolic.py index 964bd4d0..cf2d0142 100644 --- a/loopy/symbolic.py +++ b/loopy/symbolic.py @@ -1839,7 +1839,7 @@ def map_remainder(self, expr): raise TypeError("modulo non-constant in '%s' not supported " "for as-pwaff evaluation" % expr) - (s, denom_aff), = denom.get_pieces() + (_s, denom_aff), = denom.get_pieces() denom = denom_aff.get_constant_val() return num.mod_val(denom) @@ -1866,7 +1866,7 @@ def aff_from_expr(space: isl.Space, expr: Expression, vars_to_zero=None) -> isl. pieces = pwaff.get_pieces() if len(pieces) == 1: - (s, aff), = pieces + (_s, aff), = pieces return aff else: from loopy.diagnostic import ExpressionNotAffineError @@ -1970,7 +1970,7 @@ def qpolynomial_from_expr(space, expr): pieces = pw_qpoly.get_pieces() if len(pieces) == 1: - (s, qpoly), = pieces + (_s, qpoly), = pieces return qpoly else: raise RuntimeError("expression '%s' could not be converted to a " diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index e4a9bef9..9a35207e 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -945,8 +945,6 @@ def get_temporary_decls(self, codegen_state, schedule_index): sub_knl_temps | supporting_temporary_names(kernel, sub_knl_temps)) - ecm = self.get_expression_to_code_mapper(codegen_state) - for tv_name in sorted(sub_knl_temps): tv = kernel.temporary_variables[tv_name] if not tv.base_storage: diff --git a/loopy/target/c/c_execution.py b/loopy/target/c/c_execution.py index 87343090..8e946648 100644 --- a/loopy/target/c/c_execution.py +++ b/loopy/target/c/c_execution.py @@ -324,7 +324,7 @@ def build(self, name, code, debug=False, wait_on_error=None, c_fname = self._tempname("code." + self.source_suffix) # build object - _, mod_name, ext_file, recompiled = \ + _, _mod_name, ext_file, recompiled = \ compile_from_string( self.toolchain.copy( cflags=self.toolchain.cflags+list(extra_build_options)), diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 339010ee..e7eb7863 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -186,7 +186,7 @@ def cuda_with_types(self, arg_id_to_dtype, callables_table): input_dtype = arg_id_to_dtype[0] - scalar_dtype, offset, field_name = input_dtype.fields["x"] + scalar_dtype, _offset, _field_name = input_dtype.fields["x"] return_dtype = scalar_dtype return self.copy(arg_id_to_dtype={0: input_dtype, 1: input_dtype, -1: return_dtype}) diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index cbc8b417..0d171d3e 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -114,7 +114,7 @@ def map_subscript(self, expr, type_context): and ary.address_space == AddressSpace.PRIVATE): # generate access code for access to private-index temporaries - gsize, lsize = self.kernel.get_grid_size_upper_bounds_as_exprs() + _gsize, lsize = self.kernel.get_grid_size_upper_bounds_as_exprs() if lsize: lsize, = lsize from pymbolic import evaluate @@ -174,7 +174,7 @@ class ISPCTarget(CFamilyTarget): device_program_name_suffix = "_inner" def pre_codegen_entrypoint_check(self, kernel, callables_table): - gsize, lsize = kernel.get_grid_size_upper_bounds_as_exprs( + _gsize, lsize = kernel.get_grid_size_upper_bounds_as_exprs( callables_table) if len(lsize) > 1: for ls_i in lsize[1:]: diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 8250436f..8c8f7903 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -321,7 +321,7 @@ def with_types(self, arg_id_to_dtype, callables_table): callables_table) dtype = arg_id_to_dtype[0] - scalar_dtype, offset, field_name = dtype.numpy_dtype.fields["s0"] + scalar_dtype, _offset, _field_name = dtype.numpy_dtype.fields["s0"] return ( self.copy(name_in_target=name, arg_id_to_dtype={-1: NumpyType(scalar_dtype), 0: dtype, 1: dtype}), diff --git a/loopy/tools.py b/loopy/tools.py index ff66e36b..20904a3e 100644 --- a/loopy/tools.py +++ b/loopy/tools.py @@ -268,7 +268,7 @@ def address_from_numpy(obj): if ary_intf is None: raise RuntimeError("no array interface") - buf_base, is_read_only = ary_intf["data"] + buf_base, _is_read_only = ary_intf["data"] return buf_base + ary_intf.get("offset", 0) diff --git a/loopy/transform/callable.py b/loopy/transform/callable.py index 0210eaee..0c0b22a2 100644 --- a/loopy/transform/callable.py +++ b/loopy/transform/callable.py @@ -314,7 +314,7 @@ def _inline_call_instruction(caller_knl, callee_knl, call_insn): parameters = call_insn.expression.parameters # reads from loopy.kernel.function_interface import get_kw_pos_association - kw_to_pos, pos_to_kw = get_kw_pos_association(callee_knl) + _kw_to_pos, pos_to_kw = get_kw_pos_association(callee_knl) for i, par in enumerate(parameters): arg_map[pos_to_kw[i]] = par diff --git a/loopy/type_inference.py b/loopy/type_inference.py index 57548ab6..bae6e36c 100644 --- a/loopy/type_inference.py +++ b/loopy/type_inference.py @@ -1060,7 +1060,7 @@ def infer_unknown_types( t_unit[e].args if arg.dtype not in (None, auto)} new_callable, clbl_inf_ctx = t_unit.callables_table[e].with_types( arg_id_to_dtype, clbl_inf_ctx) - clbl_inf_ctx, new_name = clbl_inf_ctx.with_callable(e, new_callable, + clbl_inf_ctx, _new_name = clbl_inf_ctx.with_callable(e, new_callable, is_entrypoint=True) if expect_completion: from loopy.types import LoopyType diff --git a/proto-tests/test_sem.py b/proto-tests/test_sem.py index acb7d342..b4d85411 100644 --- a/proto-tests/test_sem.py +++ b/proto-tests/test_sem.py @@ -359,7 +359,6 @@ def test_advect_dealias(ctx_factory): K_sym = var("K") # noqa field_shape = (N, N, N, K_sym) - interim_field_shape = (M, M, M, K_sym) # 1. direction-by-direction similarity transform on u # 2. invert diagonal diff --git a/proto-tests/test_sem_tim.py b/proto-tests/test_sem_tim.py index 2d0c2fe7..a42f1aa8 100644 --- a/proto-tests/test_sem_tim.py +++ b/proto-tests/test_sem_tim.py @@ -367,7 +367,6 @@ def test_advect_dealias(ctx_factory): K_sym = var("K") # noqa field_shape = (N, N, N, K_sym) - interim_field_shape = (M, M, M, K_sym) # 1. direction-by-direction similarity transform on u # 2. invert diagonal diff --git a/proto-tests/test_tim.py b/proto-tests/test_tim.py index 7a519d80..39261014 100644 --- a/proto-tests/test_tim.py +++ b/proto-tests/test_tim.py @@ -46,8 +46,6 @@ def test_tim2d(ctx_factory): ], name="semlap2D", assumptions="K>=1") - unroll = 32 - seq_knl = knl knl = lp.add_prefetch(knl, "D", ["m", "j", "i", "o"], default_tag="l.auto") knl = lp.add_prefetch(knl, "u", ["i", "j", "o"], default_tag="l.auto") @@ -107,8 +105,6 @@ def test_red2d(ctx_factory): ], name="semlap2D", assumptions="K>=1") - unroll = 32 - seq_knl = knl knl = lp.add_prefetch(knl, "D", ["m", "j", "i", "o"], default_tag="l.auto") knl = lp.add_prefetch(knl, "u", ["i", "j", "o"], default_tag="l.auto") diff --git a/pyproject.toml b/pyproject.toml index b5fecda1..708f02fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,7 +112,6 @@ extend-ignore = [ # FIXME "NPY002", # numpy rng "C408", # unnecssary dict() -> literal - "F841", # local variable unused ] [tool.ruff.lint.per-file-ignores] diff --git a/test/test_apps.py b/test/test_apps.py index ce8b9701..11422971 100644 --- a/test/test_apps.py +++ b/test/test_apps.py @@ -532,7 +532,7 @@ def test_fd_demo(): knl = lp.set_options(knl, write_code=True) knl = lp.add_and_infer_dtypes(knl, dict(u=np.float32)) - code, inf = lp.generate_code(knl) + code, _inf = lp.generate_code(knl) print(code) assert "double" not in code diff --git a/test/test_c_execution.py b/test/test_c_execution.py index 6208b9ae..9943d41d 100644 --- a/test/test_c_execution.py +++ b/test/test_c_execution.py @@ -365,7 +365,7 @@ def test_one_length_loop(): def test_scalar_global_args(): n = np.random.default_rng().integers(30, 100) - evt, (out,) = lp.make_kernel( + _evt, (out,) = lp.make_kernel( "{[i]: 0<=i