-
Notifications
You must be signed in to change notification settings - Fork 0
/
code-stat.py
executable file
·345 lines (289 loc) · 13.3 KB
/
code-stat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
#!/usr/bin/env python3
################################################################################
# #
# This file is part of Code Stat, a Python script to compute line of code #
# metrics on a set of source code files, for several programming languages. #
# Copyright (C) 2023-2024 Yoann Le Montagner <yo35 -at- melix.net> #
# #
# This program is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with this program. If not, see <https://www.gnu.org/licenses/>. #
# #
################################################################################
"""
Compute line of code metrics on source code files, for several programming languages.
Usage:
python3 code-stat.py <file-or-folder-1> [<file-or-folder-2> [<file-or-folder-3> ...]]
Count the number of source code files passed in argument of the script, and the number of code lines
and comment lines they contain. If a folder is passed in argument of the script, all its content is
analyzed, including recursively the content of its child folders, grandchild folders, etc...
Disclaimer:
The parsing strategy implemented in this script is a (significantly) simplified version of what would be
necessary to implement a real programming language parser. As such, it may not distinguish accurately
between code lines and comment lines in some weird cases (e.g. if a string litteral contains something
that looks as a comment token). Still, when it comes to analyze realistic code bases, these situations
are unlikely to happen frequently.
"""
import os
import re
import sys
from typing import Callable, Optional
class LOCCounter:
"""
Line of code counter: hold the number of files processed so far, and the respective number of code lines
and comments they contain.
"""
def __init__(self, title: str) -> None:
self.title = title
self.fileCount = 0
self.codeLineCount = 0
self.commentLineCount = 0
def increment(self, fileCount: int, codeLineCount: int, commentLineCount: int) -> None:
self.fileCount += fileCount
self.codeLineCount += codeLineCount
self.commentLineCount += commentLineCount
def printStats(self) -> None:
print(self.title)
print('-' * len(self.title))
print('Source files: {:8d}'.format(self.fileCount))
print('Code lines: {:8d}'.format(self.codeLineCount))
print('Comment lines: {:8d}'.format(self.commentLineCount))
if self.codeLineCount == 0:
print('Comment/code ratio: - %')
else:
print('Comment/code ratio: {:8.0f} %'.format(self.commentLineCount * 100 / self.codeLineCount))
def isEmpty(self) -> bool:
return self.fileCount == 0
def noSuchToken() -> Callable[[str], Optional[int]]:
def fun(line: str) -> Optional[int]:
return None
return fun
def findToken(token: str) -> Callable[[str], Optional[int]]:
def fun(line: str) -> Optional[int]:
pos = line.find(token)
return None if pos < 0 else pos
return fun
def findRegex(pattern: str) -> Callable[[str], Optional[int]]:
compiledPattern = re.compile(pattern)
def fun(line: str) -> Optional[int]:
m = re.search(compiledPattern, line)
return None if m is None else m.start()
return fun
def isFalse() -> Callable[[str], bool]:
def fun(line: str) -> bool:
return False
return fun
def isStartingWithToken(token: str) -> Callable[[str], bool]:
def fun(line: str) -> bool:
return line.startswith(token)
return fun
def doProcessFile(
locCounter: LOCCounter,
file: str,
findBeginCommentToken: Callable[[str], Optional[int]] = noSuchToken(),
findEndCommentToken: Callable[[str], Optional[int]] = noSuchToken(),
findSingleLineCommentToken: Callable[[str], Optional[int]] = noSuchToken(),
isMandatoryFirstInstruction: Callable[[str], bool] = isFalse(),
) -> None:
"""
Core processing function.
"""
codeLineCount = 0
commentLineCount = 0
with open(file, 'r') as f:
withinBlockComment = False
withinHeader = True
headerLineCount = 0
lineIndex = 0 # 1-based index
for line in f:
line = line.strip()
lineIndex += 1
# Blank line
if len(line) == 0:
if not withinBlockComment:
withinHeader = False
# Header lines are ignored if immediately followed by a blank line.
continue
# Within a block comment
if withinBlockComment:
if withinHeader:
headerLineCount += 1
else:
commentLineCount += 1
# Regular code
else:
beginCommentToken = findBeginCommentToken(line)
singleLineCommentToken = findSingleLineCommentToken(line)
if beginCommentToken == 0 or singleLineCommentToken == 0:
if withinHeader:
headerLineCount += 1
else:
commentLineCount += 1
else:
codeLineCount += 1
if withinHeader and not (lineIndex == 1 and isMandatoryFirstInstruction(line)):
withinHeader = False
commentLineCount += headerLineCount # Header lines are counted as comment if immediately followed by code.
if beginCommentToken is not None and (singleLineCommentToken is None or beginCommentToken < singleLineCommentToken):
withinBlockComment = True
line = line[(beginCommentToken + 1):]
# Look for the end of the current block comment (and potential following
# block comments that both begin and end on the current line)
if withinBlockComment:
while True:
endCommentToken = findEndCommentToken(line)
if endCommentToken is None:
break
line = line[(endCommentToken + 1):]
beginCommentToken = findBeginCommentToken(line)
singleLineCommentToken = findSingleLineCommentToken(line)
if beginCommentToken is None or (singleLineCommentToken is not None and singleLineCommentToken < beginCommentToken):
withinBlockComment = False
break
line = line[(beginCommentToken + 1):]
# Increment the counter (only at the end, in case of exceptions).
locCounter.increment(1, codeLineCount, commentLineCount)
def processCFamilyFile(locCounter: LOCCounter, file: str) -> None:
"""
Process a file with C/C++-like comments (i.e. // for single line comments, /* ... */ for block comments).
"""
doProcessFile(
locCounter,
file,
findBeginCommentToken = findToken('/*'),
findEndCommentToken = findToken('*/'),
findSingleLineCommentToken = findToken('//'),
)
def processPHPFile(locCounter: LOCCounter, file: str) -> None:
"""
Process a PHP file (i.e. // for single line comments, /* ... */ for block comments,
and possibly a <?php instruction on the first line).
"""
doProcessFile(
locCounter,
file,
findBeginCommentToken = findToken('/*'),
findEndCommentToken = findToken('*/'),
findSingleLineCommentToken = findToken('//'),
isMandatoryFirstInstruction = isStartingWithToken('<?php'),
)
def processCSSFile(locCounter: LOCCounter, file: str) -> None:
"""
Process a CSS file (/* ... */ for block comments, no single line comments).
"""
doProcessFile(
locCounter,
file,
findBeginCommentToken = findToken('/*'),
findEndCommentToken = findToken('*/'),
)
def processScriptFamilyFile(locCounter: LOCCounter, file: str) -> None:
"""
Process a file whose comments start with a hash character (#).
"""
doProcessFile(
locCounter,
file,
findSingleLineCommentToken = findToken('#'),
)
def processFortranFile(locCounter: LOCCounter, file: str) -> None:
"""
Process a Fortran 90 file (comments start with an exclamation mark character).
Comments starting with !DIR$, !$OMP, etc... are counted as code (compiler directives).
"""
doProcessFile(
locCounter,
file,
findSingleLineCommentToken = findRegex('!(?!\\w+\\$|\\$\\w+)'),
)
def processSQLFile(locCounter: LOCCounter, file: str) -> None:
"""
Process a SQL file (comments start with two hyphen characters).
"""
doProcessFile(
locCounter,
file,
findSingleLineCommentToken = findToken('--'),
)
def processPascalFile(locCounter: LOCCounter, file: str) -> None:
"""
Process a Pascal file ( (* ... *) or { ... } for block comments, // for single line comments).
Comments starting with a $ character are counted as code (compiler directives).
"""
doProcessFile(
locCounter,
file,
findBeginCommentToken = findRegex('(?:\\(\\*|{)(?!\\$)'),
findEndCommentToken = findRegex('(?:\\*\\)|})'),
findSingleLineCommentToken = findToken('//'),
)
def run(filesOrDirectories: list[str]) -> None:
"""
Script entry point.
"""
counters: list[LOCCounter] = []
extensionToAction: dict[str, Callable[[str], None]] = {}
def registerLanguage(title: str, processFun: Callable[[LOCCounter, str], None], extensions: list[str]) -> None:
counter = LOCCounter(title)
action = lambda file: processFun(counter, file)
counters.append(counter)
for extension in extensions:
if extension in extensionToAction:
raise ValueError('Extension conflict: ' + extension)
extensionToAction[extension] = action
# Register the supported languages.
registerLanguage('C/C++' , processCFamilyFile , [ '.c', '.cpp', '.cxx', '.cc', '.h', '.hpp', '.hxx', '.hh' ])
registerLanguage('C#' , processCFamilyFile , [ '.cs' ])
registerLanguage('CSS' , processCSSFile , [ '.css' ])
registerLanguage('CUDA' , processCFamilyFile , [ '.cu', '.cuh' ])
registerLanguage('Fortran 90' , processFortranFile , [ '.f90' ])
registerLanguage('Java' , processCFamilyFile , [ '.java' ])
registerLanguage('JavaScript' , processCFamilyFile , [ '.js', '.jsx', '.mjs' ])
registerLanguage('Kotlin' , processCFamilyFile , [ '.kt' ])
registerLanguage('Pascal' , processPascalFile , [ '.pas' ])
registerLanguage('PHP' , processPHPFile , [ '.php' ])
registerLanguage('Python' , processScriptFamilyFile, [ '.py' ])
registerLanguage('SQL' , processSQLFile , [ '.sql' ])
registerLanguage('TypeScript' , processCFamilyFile , [ '.ts', '.tsx', '.mts' ])
registerLanguage('Unix shell script', processScriptFamilyFile, [ '.sh', '.bash', '.csh', '.ksh', '.zsh' ])
# Visit recursively all the files and folders passed on the command line.
toProcess = [os.path.abspath(f) for f in filesOrDirectories]
errorCount = 0
while len(toProcess) != 0:
path = toProcess.pop()
try:
if os.path.isdir(path):
toProcess.extend([os.path.join(path, f) for f in os.listdir(path)])
elif os.path.isfile(path):
filename, extension = os.path.splitext(path)
extension = extension.lower()
if extension in extensionToAction:
extensionToAction[extension](path)
except Exception:
errorCount += 1
print('Error with {:s}'.format(path), file = sys.stderr)
if errorCount > 0:
print('{:d} error(s) encountered'.format(errorCount), file = sys.stderr)
# Print the result.
print()
allCountersAreEmpty = True
for counter in counters:
if not counter.isEmpty():
allCountersAreEmpty = False
counter.printStats()
print()
if allCountersAreEmpty:
print('No source code file found')
print()
# Invoke the script entry point.
if __name__ == '__main__':
run(sys.argv[:0:-1])