Skip to content

Commit 1e07f43

Browse files
committed
ci: add check_copyright pre-commit hook
1 parent c550ec6 commit 1e07f43

File tree

5 files changed

+152
-1
lines changed

5 files changed

+152
-1
lines changed

.copyright-template

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# © Crown Copyright GCHQ
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.

.cspell/library_terms.txt

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ jaxlib
5353
jaxopt
5454
jaxtyping
5555
jumanjihouse
56+
keepends
5657
linalg
5758
linkcheck
5859
literalinclude

.pre-commit-config.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -112,3 +112,9 @@ repos:
112112
- "-sn" # Don't display the score
113113
- "--rcfile=.pylintrc" # pylint configuration file
114114
exclude: "documentation/source/snippets"
115+
- id: check-copyright
116+
name: Check for copyright notice
117+
description: Ensure a copyright notice is present at the top of each Python file
118+
entry: python pre_commit_hooks/check_copyright.py
119+
types: [ python ]
120+
language: python

pre_commit_hooks/check_copyright.py

+132
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# © Crown Copyright GCHQ
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
Pre-commit hook to check for a copyright notice at the start of each file.
17+
18+
The copyright notice must start on the first line of the file (or the second line,
19+
if the first line is a shebang), and must exactly match the template file specified.
20+
21+
Exits with code 1 if any checked file does not have the copyright notice, printing
22+
the names of offending files to standard error. Exits silently with code 0 if all
23+
checked files have the notice.
24+
25+
Optionally, with the `--unsafe-fix` argument, this script can attempt to
26+
automatically insert the copyright notice at the top of files that are missing it.
27+
The method that this script tries is naive, though, and may cause unexpected issues,
28+
so be sure to check on any changes it makes! Using this argument when using this
29+
script as a pre-commit hook is not recommended.
30+
31+
If you want to change the copyright notice template, a multi-file find-and-replace
32+
tool is likely your best bet, rather than trying to use `--unsafe-fix`.
33+
"""
34+
35+
import argparse
36+
import sys
37+
38+
SHEBANG = "#!"
39+
40+
41+
def parse_args() -> argparse.Namespace:
42+
"""Parse command line arguments."""
43+
parser = argparse.ArgumentParser(description=__doc__)
44+
parser.add_argument("filenames", nargs="*", help="Filenames to check.")
45+
parser.add_argument(
46+
"--template",
47+
default=".copyright-template",
48+
help=(
49+
"File containing the copyright notice that must be present at the top of "
50+
"each file checked."
51+
),
52+
)
53+
parser.add_argument(
54+
"--unsafe-fix",
55+
action="store_true",
56+
help=(
57+
"If set, attempt to insert the copyright notice at the top of files that "
58+
"are missing one. Respects shebangs, but is otherwise naive and may cause "
59+
"issues such as duplicated copyright notices. Use at your own peril."
60+
),
61+
)
62+
return parser.parse_args()
63+
64+
65+
def guess_newline_type(filename: str):
66+
r"""Guess the newline type (\r\n or \n) for a file."""
67+
with open(filename, newline="", encoding="utf8") as f:
68+
lines = f.readlines()
69+
70+
if not lines:
71+
return "\n"
72+
if lines[0].endswith("\r\n"):
73+
return "\r\n"
74+
return "\n"
75+
76+
77+
def main() -> None:
78+
"""
79+
Run the script.
80+
81+
For more information, see the module docstring and `parse_args()` above,
82+
or run `python check_copyright.py --help`.
83+
"""
84+
failed = False
85+
86+
args = parse_args()
87+
with open(args.template, encoding="utf8") as f:
88+
copyright_template = f.read()
89+
90+
for filename in args.filenames:
91+
with open(filename, encoding="utf8") as f:
92+
content = f.read()
93+
lines = content.splitlines(keepends=True)
94+
if not content:
95+
# empty files don't need a copyright notice
96+
continue
97+
if lines[0].startswith(SHEBANG):
98+
# then it's a shebang line;
99+
# try again skipping this line and any following blank lines
100+
start_index = 1
101+
while start_index < len(lines) and not lines[start_index]:
102+
start_index += 1
103+
test_content = "".join(lines[start_index:])
104+
else:
105+
test_content = content
106+
107+
if not test_content.startswith(copyright_template):
108+
print(f"{filename}:0 - no matching copyright notice", file=sys.stderr)
109+
failed = True
110+
111+
if args.unsafe_fix:
112+
# try and add the copyright notice in
113+
if lines[0].startswith(SHEBANG):
114+
# preserve the shebang if present
115+
new_content = (
116+
lines[0] + "\n" + copyright_template + "\n" + "".join(lines[1:])
117+
)
118+
else:
119+
new_content = copyright_template + "\n" + "".join(lines)
120+
121+
newline_type = guess_newline_type(
122+
filename
123+
) # try and keep the same newline type
124+
with open(
125+
filename, encoding="utf8", mode="w", newline=newline_type
126+
) as f:
127+
f.write(new_content)
128+
sys.exit(1 if failed else 0)
129+
130+
131+
if __name__ == "__main__":
132+
main()

pre_commit_hooks/require_ascii.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
#!/usr/bin/env python3
2-
32
# © Crown Copyright GCHQ
43
#
54
# Licensed under the Apache License, Version 2.0 (the "License");

0 commit comments

Comments
 (0)