diff --git a/kernel_tuner/utils/directives.py b/kernel_tuner/utils/directives.py index 3d7fca06..0dad09ca 100644 --- a/kernel_tuner/utils/directives.py +++ b/kernel_tuner/utils/directives.py @@ -48,6 +48,48 @@ def __init__(self, directive: Directive, lang: Language): self.language = lang +class ArraySize(object): + """Size of an array""" + + def __init__(self): + self.size = list() + + def __iter__(self): + for i in self.size: + yield i + + def __len__(self): + return len(self.size) + + def clear(self): + self.size.clear() + + def get(self) -> int: + length = len(self.size) + if length == 0: + return 0 + elif length == 1: + return self.size[0] + else: + product = 1 + for i in self.size: + product *= i + return product + + def add(self, dim: int) -> None: + # Only allow adding valid dimensions + if dim >= 1: + self.size.append(dim) + + +def fortran_md_size(size: ArraySize) -> list: + """Format a multidimensional size into the correct Fortran string""" + md_size = list() + for dim in size: + md_size.append(f":{dim}") + return md_size + + def is_openacc(directive: Directive) -> bool: """Check if a directive is OpenACC""" return isinstance(directive, OpenACC) @@ -120,7 +162,7 @@ def openacc_directive_contains_data_clause(line: str) -> bool: return openacc_directive_contains_clause(line, data_clauses) -def create_data_directive_openacc(name: str, size: int, lang: Language) -> str: +def create_data_directive_openacc(name: str, size: ArraySize, lang: Language) -> str: """Create a data directive for a given language""" if is_cxx(lang): return create_data_directive_openacc_cxx(name, size) @@ -129,17 +171,23 @@ def create_data_directive_openacc(name: str, size: int, lang: Language) -> str: return "" -def create_data_directive_openacc_cxx(name: str, size: int) -> str: +def create_data_directive_openacc_cxx(name: str, size: ArraySize) -> str: """Create C++ OpenACC code to allocate and copy data""" - return f"#pragma acc enter data create({name}[:{size}])\n#pragma acc update device({name}[:{size}])\n" + return f"#pragma acc enter data create({name}[:{size.get()}])\n#pragma acc update device({name}[:{size.get()}])\n" -def create_data_directive_openacc_fortran(name: str, size: int) -> str: +def create_data_directive_openacc_fortran(name: str, size: ArraySize) -> str: """Create Fortran OpenACC code to allocate and copy data""" - return f"!$acc enter data create({name}(:{size}))\n!$acc update device({name}(:{size}))\n" + if len(size) == 1: + return f"!$acc enter data create({name}(:{size.get()}))\n!$acc update device({name}(:{size.get()}))\n" + else: + md_size = fortran_md_size(size) + return ( + f"!$acc enter data create({name}({','.join(md_size)}))\n!$acc update device({name}({','.join(md_size)}))\n" + ) -def exit_data_directive_openacc(name: str, size: int, lang: Language) -> str: +def exit_data_directive_openacc(name: str, size: ArraySize, lang: Language) -> str: """Create code to copy data back for a given language""" if is_cxx(lang): return exit_data_directive_openacc_cxx(name, size) @@ -148,14 +196,18 @@ def exit_data_directive_openacc(name: str, size: int, lang: Language) -> str: return "" -def exit_data_directive_openacc_cxx(name: str, size: int) -> str: +def exit_data_directive_openacc_cxx(name: str, size: ArraySize) -> str: """Create C++ OpenACC code to copy back data""" - return f"#pragma acc exit data copyout({name}[:{size}])\n" + return f"#pragma acc exit data copyout({name}[:{size.get()}])\n" -def exit_data_directive_openacc_fortran(name: str, size: int) -> str: +def exit_data_directive_openacc_fortran(name: str, size: ArraySize) -> str: """Create Fortran OpenACC code to copy back data""" - return f"!$acc exit data copyout({name}(:{size}))\n" + if len(size) == 1: + return f"!$acc exit data copyout({name}(:{size.get()}))\n" + else: + md_size = fortran_md_size(size) + return f"!$acc exit data copyout({name}({','.join(md_size)}))\n" def correct_kernel(kernel_name: str, line: str) -> bool: @@ -165,7 +217,7 @@ def correct_kernel(kernel_name: str, line: str) -> bool: def find_size_in_preprocessor(dimension: str, preprocessor: list) -> int: """Find the dimension of a directive defined value in the preprocessor""" - ret_size = None + ret_size = 0 for line in preprocessor: if f"#define {dimension}" in line: try: @@ -209,45 +261,43 @@ def extract_code(start: str, stop: str, code: str, langs: Code, kernel_name: str return sections -def parse_size(size: Any, preprocessor: list = None, dimensions: dict = None) -> int: +def parse_size(size: Any, preprocessor: list = None, dimensions: dict = None) -> ArraySize: """Converts an arbitrary object into an integer representing memory size""" - ret_size = None + ret_size = ArraySize() if type(size) is not int: try: # Try to convert the size to an integer - ret_size = int(size) + ret_size.add(int(size)) except ValueError: # If size cannot be natively converted to an int, we try to derive it from the preprocessor - if preprocessor is not None: - try: + try: + if preprocessor is not None: if "," in size: - ret_size = 1 for dimension in size.split(","): - ret_size *= find_size_in_preprocessor(dimension, preprocessor) + ret_size.add(find_size_in_preprocessor(dimension, preprocessor)) else: - ret_size = find_size_in_preprocessor(size, preprocessor) - except TypeError: - # preprocessor is available but does not contain the dimensions - pass + ret_size.add(find_size_in_preprocessor(size, preprocessor)) + except TypeError: + # At least one of the dimension cannot be derived from the preprocessor + pass # If size cannot be natively converted, nor retrieved from the preprocessor, we check user provided values if dimensions is not None: if size in dimensions.keys(): try: - ret_size = int(dimensions[size]) + ret_size.add(int(dimensions[size])) except ValueError: # User error, no mitigation return ret_size elif "," in size: - ret_size = 1 for dimension in size.split(","): try: - ret_size *= int(dimensions[dimension]) + ret_size.add(int(dimensions[dimension])) except ValueError: # User error, no mitigation - return None + return ret_size else: # size is already an int. no need for conversion - ret_size = size + ret_size.add(size) return ret_size @@ -297,8 +347,13 @@ def wrap_data(code: str, langs: Code, data: dict, preprocessor: list = None, use intro += create_data_directive_openacc_cxx(name, size) outro += exit_data_directive_openacc_cxx(name, size) elif is_openacc(langs.directive) and is_fortran(langs.language): - intro += create_data_directive_openacc_fortran(name, size) - outro += exit_data_directive_openacc_fortran(name, size) + if "," in data[name][1]: + # Multi dimensional + pass + else: + # One dimensional + intro += create_data_directive_openacc_fortran(name, size) + outro += exit_data_directive_openacc_fortran(name, size) return intro + code + outro @@ -537,9 +592,9 @@ def allocate_signature_memory(data: dict, preprocessor: list = None, user_dimens p_type = data[parameter][0] size = parse_size(data[parameter][1], preprocessor, user_dimensions) if "*" in p_type: - args.append(allocate_array(p_type, size)) + args.append(allocate_array(p_type, size.get())) else: - args.append(allocate_scalar(p_type, size)) + args.append(allocate_scalar(p_type, size.get())) return args @@ -579,11 +634,15 @@ def add_present_openacc( return new_body -def add_present_openacc_cxx(name: str, size: int) -> str: +def add_present_openacc_cxx(name: str, size: ArraySize) -> str: """Create present clause for C++ OpenACC directive""" - return f" present({name}[:{size}]) " + return f" present({name}[:{size.get()}]) " -def add_present_openacc_fortran(name: str, size: int) -> str: +def add_present_openacc_fortran(name: str, size: ArraySize) -> str: """Create present clause for Fortran OpenACC directive""" - return f" present({name}(:{size})) " + if len(size) == 1: + return f" present({name}(:{size.get()})) " + else: + md_size = fortran_md_size(size) + return f" present({name}({','.join(md_size)})) " diff --git a/test/utils/test_directives.py b/test/utils/test_directives.py index 530faa68..23cd3b39 100644 --- a/test/utils/test_directives.py +++ b/test/utils/test_directives.py @@ -1,5 +1,3 @@ -from pytest import raises - from kernel_tuner.utils.directives import * @@ -44,21 +42,31 @@ def test_openacc_directive_contains_data_clause(): def test_create_data_directive(): + size = ArraySize() + size.add(1024) assert ( - create_data_directive_openacc("array", 1024, Cxx()) + create_data_directive_openacc("array", size, Cxx()) == "#pragma acc enter data create(array[:1024])\n#pragma acc update device(array[:1024])\n" ) + size.clear() + size.add(35) + size.add(16) assert ( - create_data_directive_openacc("matrix", 35, Fortran()) - == "!$acc enter data create(matrix(:35))\n!$acc update device(matrix(:35))\n" + create_data_directive_openacc("matrix", size, Fortran()) + == "!$acc enter data create(matrix(:35,:16))\n!$acc update device(matrix(:35,:16))\n" ) - assert create_data_directive_openacc("array", 1024, None) == "" + assert create_data_directive_openacc("array", size, None) == "" def test_exit_data_directive(): - assert exit_data_directive_openacc("array", 1024, Cxx()) == "#pragma acc exit data copyout(array[:1024])\n" - assert exit_data_directive_openacc("matrix", 35, Fortran()) == "!$acc exit data copyout(matrix(:35))\n" - assert exit_data_directive_openacc("matrix", 1024, None) == "" + size = ArraySize() + size.add(1024) + assert exit_data_directive_openacc("array", size, Cxx()) == "#pragma acc exit data copyout(array[:1024])\n" + size.clear() + size.add(35) + size.add(16) + assert exit_data_directive_openacc("matrix", size, Fortran()) == "!$acc exit data copyout(matrix(:35,:16))\n" + assert exit_data_directive_openacc("matrix", size, None) == "" def test_correct_kernel(): @@ -69,16 +77,16 @@ def test_correct_kernel(): def test_parse_size(): - assert parse_size(128) == 128 - assert parse_size("16") == 16 - assert parse_size("test") is None - assert parse_size("n", ["#define n 1024\n"]) == 1024 - assert parse_size("n,m", ["#define n 16\n", "#define m 32\n"]) == 512 - assert parse_size("n", ["#define size 512\n"], {"n": 32}) == 32 - assert parse_size("m", ["#define size 512\n"], {"n": 32}) is None - assert parse_size("rows,cols", dimensions={"rows": 16, "cols": 8}) == 128 - assert parse_size("n_rows,n_cols", ["#define n_cols 16\n", "#define n_rows 32\n"]) == 512 - assert parse_size("rows,cols", [], dimensions={"rows": 16, "cols": 8}) == 128 + assert parse_size(128).get() == 128 + assert parse_size("16").get() == 16 + assert parse_size("test").get() == 0 + assert parse_size("n", ["#define n 1024\n"]).get() == 1024 + assert parse_size("n,m", ["#define n 16\n", "#define m 32\n"]).get() == 512 + assert parse_size("n", ["#define size 512\n"], {"n": 32}).get() == 32 + assert parse_size("m", ["#define size 512\n"], {"n": 32}).get() == 0 + assert parse_size("rows,cols", dimensions={"rows": 16, "cols": 8}).get() == 128 + assert parse_size("n_rows,n_cols", ["#define n_cols 16\n", "#define n_rows 32\n"]).get() == 512 + assert parse_size("rows,cols", [], dimensions={"rows": 16, "cols": 8}).get() == 128 def test_wrap_timing(): @@ -272,8 +280,8 @@ def test_extract_directive_data(): def test_allocate_signature_memory(): code = "#pragma tuner start vector_add a(float*:VECTOR_SIZE) b(float*:VECTOR_SIZE) c(float*:VECTOR_SIZE) size(int:VECTOR_SIZE)\n#pragma acc" data = extract_directive_data(code, Code(OpenACC(), Cxx())) - with raises(TypeError): - _ = allocate_signature_memory(data["vector_add"]) + args = allocate_signature_memory(data["vector_add"]) + assert args[3] == 0 preprocessor = ["#define VECTOR_SIZE 1024\n"] args = allocate_signature_memory(data["vector_add"], preprocessor) assert type(args[0]) is np.ndarray @@ -327,3 +335,8 @@ def test_add_present_openacc(): code_cxx = "#pragma acc parallel num_gangs(32)\n\t#pragma acc loop\n\t//for loop\n" expected_cxx = "#pragma acc parallel num_gangs(32) present(array[:42])\n\t#pragma acc loop\n\t//for loop\n" assert add_present_openacc(code_cxx, acc_cxx, data, preprocessor, None) == expected_cxx + code_f90 = "!$acc parallel async num_workers(16)\n" + data = {"matrix": ["float*", "rows,cols"]} + preprocessor = ["#define cols 18\n", "#define rows 14\n"] + expected_f90 = "!$acc parallel async num_workers(16) present(matrix(:14,:18))\n" + assert add_present_openacc(code_f90, acc_f90, data, preprocessor, None) == expected_f90