diff --git a/licence-checker.hjson b/licence-checker.hjson index e477707..f9802d1 100644 --- a/licence-checker.hjson +++ b/licence-checker.hjson @@ -4,14 +4,224 @@ // // Example Licence Checker File { + // The licence text. Newlines within the licence are preserved. + // + // All lines in this licence must be present and match exactly the contents + // here, using the per-file comment style. The licence must be in the first + // comment block at the top of the file. licence: ''' Copyright lowRISC contributors. Licensed under the Apache License, Version 2.0, see LICENSE for details. SPDX-License-Identifier: Apache-2.0 ''', + // Whether the `licence` above is a regular expression or not. match_regex: "false", + // Files that are never checked for a licence header exclude_paths: [ '.style.yapf', ], + // These are added to every suffix in `comment_styles_by_suffix` to support + // templating any of the filetypes specified there, using the same comment + // style as expected in the file itself. + // + // This means any suffix in this list should come last in + // `comment_styles_by_suffix`, so `foo.tpl` is not confused with `foo.c.tpl` + // (the latter of which should use the style that `foo.c` would use. + template_suffixes: [ + ".tpl", + ], + // (Prioritised) Mapping of file name suffixes to comment style. If the suffix + // of your file does not match one of these, it will not be checked. + // + // Each entry is a pair (suffixes, styles). suffixes is a list of file + // suffixes: if a filename matches one of these suffixes, we'll use the styles + // in styles. styles is either a string or a list of strings. If there is one + // or more strings, these strings must all be keys of COMMENT_STYLES and they + // give the different comment styles that are acceptable for the file type. + // + // These rules are given in priority order. Tuples higher in the list are + // matched before those later in the list, on purpose. + // + // Files that either do not match any extension or that have an empty list of + // styles are not checked for a licence. + // + // Available Comment Styles: + // - "//" - Line Comments using `//` + // - "#" - Line Comments using `#` + // - "/*" - Block Comments using `/* */`. Each line in the licence is wrapped + // in a single block comment. + // - "corefile" - The FuseSoC Core file style. This is `#` line comments, but + // the first line has a different format. + comment_styles_by_suffix: [ + // Hardware Files + { + // SystemVerilog + suffixes: [".svh", ".sv"], + styles: ["//"], + }, + // Hardware Build Systems + { + // TCL Files + suffixes: [".tcl", ".sdc"], + styles: ["#"], + }, + { + // FuseSoc Core Files + suffixes: [".core"], + styles: ["corefile"], + }, + { + // Makefiles + suffixes: ["Makefile", ".mk"], + styles: ["#"] + }, + { + // Yosys script + suffixes: [".ys"], + styles: ["#"], + }, + { + // AscentLint waiver Files + suffixes: [".waiver"], + styles: ["#"], + }, + { + // Verilator configuration (waiver) files + suffixes: [".vlt"], + styles: ["//"], + }, + { + // Verible configuration files + suffixes: [".vbl"], + styles: ["#"], + }, + { + // Exclusion lists + suffixes: [".el"], + styles: ["//"], + }, + { + // General configuration files + suffixes: [".cfg"], + styles: ["//", "#"], + }, + { + // File lists (not checked) + suffixes: [".f"], + styles: [] + }, + // The following two rules will inevitably bite us + { + // Riviera dofile + suffixes: ["riviera_run.do"], + styles: ["#"], + }, + { + // Cadence LEC dofile + suffixes: [".do"], + styles: ["//"], + }, + // Software Files + { + // C, C++ Sources and Headers + suffixes: [".c", ".h", ".cc", ".cpp"], + styles: ["//"], + }, + { + // C, C++ X-Macro List Declaration Files + suffixes: [".def"], + styles: ["//"], + }, + { + // Assembly (for Preprocessing) + suffixes: [".S"], + styles: ["//", "/*"], + }, + { + // Assembly (without Preprocessing) + suffixes: [".s"], + styles: ["/*"], + }, + { + // Linker Scripts + suffixes: [".ld"], + styles: ["/*"], + }, + { + // Rust + suffixes: [".rs"], + styles: ["//"], + }, + // Software Build Systems + { + suffixes: ["meson.build", "toolchain.txt", "meson_options.txt"], + styles: ["#"], + }, + // General tooling + { + // Python + suffixes: [".py"], + styles: ["#"], + }, + { + // Shell Scripts + suffixes: [".sh"], + styles: ["#"], + }, + { + // Dockerfiles + suffixes: ["Dockerfile"], + styles: ["#"] + }, + // Configuration + { + // HJSON + suffixes: [".hjson"], + styles: ["//"], + }, + { + // YAML + suffixes: [".yml", ".yaml"], + styles: ["#"], + }, + { + // TOML + suffixes: [".toml"], + styles: ["#"] + }, + { + // Apt and Python Requirements files + suffixes: ["requirements.txt"], + styles: ["#"], + }, + { + // nginx config + suffixes: ["redirector.conf"], + styles: ["#"], + }, + // Documentation + { + // Markdown and HTML (not checked) + suffixes: [".md", ".html"], + styles: [], + }, + { + // CSS + suffixes: [".css"], + styles: ["/*"], + }, + { + // SCSS + suffixes: [".scss"], + styles: ["//"], + }, + // Templates (last because there may be overlaps above, due to + // `template_suffixes` below). + { + suffixes: [".tpl"], + styles: ["#"], + }, + ], + } diff --git a/licence-checker/licence-checker.py b/licence-checker/licence-checker.py index ebd2dfd..be22678 100755 --- a/licence-checker/licence-checker.py +++ b/licence-checker/licence-checker.py @@ -6,6 +6,7 @@ import argparse import fnmatch +import itertools import logging import re import subprocess @@ -111,57 +112,97 @@ def full_line_parts(self, licence_line): # styles are not checked for a licence. COMMENT_CHARS = [ # Hardware Files - ([".svh", ".sv", ".sv.tpl"], SLASH_SLASH), # SystemVerilog + ([".svh", ".sv", ".sv.tpl"], [SLASH_SLASH]), # SystemVerilog # Hardware Build Systems - ([".tcl", ".sdc"], HASH), # tcl - ([".core", ".core.tpl"], 'corefile'), # FuseSoC Core Files - (["Makefile", ".mk"], HASH), # Makefiles - ([".ys"], HASH), # Yosys script - ([".waiver"], HASH), # AscentLint waiver files - ([".vlt"], SLASH_SLASH), # Verilator configuration (waiver) files - ([".vbl"], HASH), # Verible configuration files - ([".el", ".el.tpl"], SLASH_SLASH), # Exclusion list + ([".tcl", ".sdc"], [HASH]), # tcl + ([".core", ".core.tpl"], ['corefile']), # FuseSoC Core Files + (["Makefile", ".mk"], [HASH]), # Makefiles + ([".ys"], [HASH]), # Yosys script + ([".waiver"], [HASH]), # AscentLint waiver files + ([".vlt"], [SLASH_SLASH]), # Verilator configuration (waiver) files + ([".vbl"], [HASH]), # Verible configuration files + ([".el", ".el.tpl"], [SLASH_SLASH]), # Exclusion list ([".cfg", ".cfg.tpl"], [SLASH_SLASH, HASH]), # Kinds of configuration files ([".f"], []), # File lists (not checked) # The following two rules will inevitably bite us. - (["riviera_run.do"], HASH), # Riviera dofile - ([".do"], SLASH_SLASH), # Cadence LEC dofile + (["riviera_run.do"], [HASH]), # Riviera dofile + ([".do"], [SLASH_SLASH]), # Cadence LEC dofile # Software Files - ([".c", ".c.tpl", ".h", ".h.tpl", ".cc", ".cpp"], SLASH_SLASH), # C, C++ - ([".def"], SLASH_SLASH), # C, C++ X-Include List Declaration Files + ([".c", ".c.tpl", ".h", ".h.tpl", ".cc", ".cpp"], [SLASH_SLASH]), # C, C++ + ([".def"], [SLASH_SLASH]), # C, C++ X-Include List Declaration Files ([".S"], [SLASH_SLASH, SLASH_STAR]), # Assembly (With Preprocessing) - ([".s"], SLASH_STAR), # Assembly (Without Preprocessing) - ([".ld", ".ld.tpl"], SLASH_STAR), # Linker Scripts - ([".rs"], SLASH_SLASH), # Rust + ([".s"], [SLASH_STAR]), # Assembly (Without Preprocessing) + ([".ld", ".ld.tpl"], [SLASH_STAR]), # Linker Scripts + ([".rs"], [SLASH_SLASH]), # Rust # Software Build Systems - (["meson.build", "toolchain.txt", "meson_options.txt"], HASH), # Meson + (["meson.build", "toolchain.txt", "meson_options.txt"], [HASH]), # Meson # General Tooling - ([".py"], HASH), # Python - ([".sh"], HASH), # Shell Scripts - (["Dockerfile"], HASH), # Dockerfiles + ([".py"], [HASH]), # Python + ([".sh"], [HASH]), # Shell Scripts + (["Dockerfile"], [HASH]), # Dockerfiles # Configuration - ([".hjson", ".hjson.tpl"], SLASH_SLASH), # hjson - ([".yml", ".yaml"], HASH), # YAML - ([".toml"], HASH), # TOML - (["-requirements.txt"], HASH), # Apt and Python requirements files - (["redirector.conf"], HASH), # nginx config + ([".hjson", ".hjson.tpl"], [SLASH_SLASH]), # hjson + ([".yml", ".yaml"], [HASH]), # YAML + ([".toml"], [HASH]), # TOML + (["-requirements.txt"], [HASH]), # Apt and Python requirements files + (["redirector.conf"], [HASH]), # nginx config # Documentation ([".md", ".md.tpl", ".html"], []), # Markdown and HTML (not checked) - ([".css"], SLASH_STAR), # CSS - ([".scss"], SLASH_SLASH), # SCSS + ([".css"], [SLASH_STAR]), # CSS + ([".scss"], [SLASH_SLASH]), # SCSS # Templates (Last because there are overlaps with extensions above) - ([".tpl"], HASH), # Mako templates + ([".tpl"], [HASH]), # Mako templates ] +def parse_comment_style_config(styles_by_suffix, template_suffixes=[]): + known_styles = set(COMMENT_STYLES.keys()) + + def with_template_suffixes(suffixes, template_suffixes): + templated_suffixes = [] + for suffix, template_suffix in itertools.product(suffixes, template_suffixes) + templated_suffixes.append(suffix + template_suffix) + + return suffixes + templated_suffixes + + all_styles = [] + + for d in styles_by_suffix: + assert 'suffixes' in d + assert 'styles' in d + + suffixes = d['suffixes'] + styles = d['styles'] + + this_style = SimpleNamespace() + this_style.suffixes = with_template_suffixes(suffixes, template_suffixes) + this_style.styles = [s for s in styles if s in known_styles] + + all_styles.append(this_style) + else: + # Turn `COMMENT_CHARS` into the right format, because no comment styles + # given. + for (suffixes, styles) in COMMENT_CHARS: + assert isinstance(suffixes, list) + assert isinstance(styles, list) + + this_style = SimpleNamespace() + this_style.suffixes = with_template_suffixes(suffixes, template_suffixes) + this_style.styles = [s for s in styles if s in known_styles] + + all_styles.append(this_style) + + return all_styles + + class LicenceMatcher: '''An object to match a given licence at the start of a file''' @@ -233,30 +274,30 @@ def take_line(self, line): return (True, not self.lines_left) -def detect_comment_char(all_matchers, filename): +def detect_comment_char(all_matchers, all_styles, filename): '''Find zero or more LicenceMatcher objects for filename all_matchers should be a dict like COMMENT_STYLES, but where the values are the corresponding LicenceMatcher objects. + all_styles is a list of style configuration objects. Each one has a `styles` + attribute and a `suffixes` attribute, both lists of strings. The strings in + the `styles` attribute identify styles in the `all_matchers` dict. + ''' found = None - for (suffixes, keys) in COMMENT_CHARS: + for style_config in all_styles: if found is not None: break - for suffix in suffixes: + for suffix in style_config.suffixes: if filename.endswith(suffix): - found = keys + found = style_config.styles break if found is None: return [] - if not isinstance(found, list): - assert isinstance(found, str) - found = [found] - - return [all_matchers[key] for key in found] + return [all_matchers[style_name] for style_name in found] def git_find_repo_toplevel(): @@ -358,13 +399,13 @@ def check_paths(config, git_paths): results.excluded(filepath, "Path matches exclude pattern") continue - check_file_for_licence(all_matchers, results, filepath) + check_file_for_licence(all_matchers, config.comment_styles_by_suffix, results, filepath) return results -def check_file_for_licence(all_matchers, results, filepath): - matchers = detect_comment_char(all_matchers, filepath.name) +def check_file_for_licence(all_matchers, all_styles, results, filepath): + matchers = detect_comment_char(all_matchers, all_styles, filepath.name) if not matchers: results.skipped(filepath, "Unknown comment style") @@ -499,6 +540,8 @@ def main(): exit(1) config.match_regex = match_regex == 'true' + config.comment_styles_by_suffix = parse_comment_style_config(parsed_config.get('comment_styles_by_suffix', []), parsed_config.get('template_suffixes', [])) + results = check_paths(config, options.paths) print(results.display_nicely())