# Copyright (C) 2012 by the Massachusetts Institute of Technology.
# All rights reserved.
#
# Export of this software from the United States of America may
# require a specific license from the United States Government.
# It is the responsibility of any person or organization contemplating
# export to obtain such a license before exporting.
#
# WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
# distribute this software and its documentation for any purpose and
# without fee is hereby granted, provided that the above copyright
# notice appear in all copies and that both that copyright notice and
# this permission notice appear in supporting documentation, and that
# the name of M.I.T. not be used in advertising or publicity pertaining
# to distribution of the software without specific, written prior
# permission. Furthermore if you modify this software you must label
# your software as modified software and not distribute it in such a
# fashion that it might be confused with the original M.I.T. software.
# M.I.T. makes no representations about the suitability of
# this software for any purpose. It is provided "as is" without express
# or implied warranty.
# This program checks for some kinds of MIT krb5 coding style
# violations in a single file. Checked violations include:
#
# Line is too long
# Tabs violations
# Trailing whitespace and final blank lines
# Comment formatting errors
# Preprocessor statements in function bodies
# Misplaced braces
# Space before paren in function call, or no space after if/for/while
# Parenthesized return expression
# Space after cast operator, or no space before * in cast operator
# Line broken before binary operator
# Lack of spaces around binary operator (sometimes)
# Assignment at the beginning of an if conditional
# Use of prohibited string functions
# Lack of braces around 2+ line flow control body
# Incorrect indentation as determined by emacs c-mode (if possible)
#
# This program does not check for the following:
#
# Anything outside of a function body except line length/whitespace
# Anything non-syntactic (proper cleanup flow control, naming, etc.)
# UTF-8 violations
# Implicit tests against NULL or '\0'
# Inner-scope variable declarations
# Over- or under-parenthesization
# Long or deeply nested function bodies
# Syntax of function calls through pointers
import os
import re
import sys
from subprocess import call
from tempfile import NamedTemporaryFile
def warn(ln, msg):
print('%5d %s' % (ln, msg))
# If lines[0] indicates the krb5 C style, try to use emacs to reindent
# a copy of lines. Return None if the file does not use the krb5 C
# style or if the emacs batch reindent is unsuccessful.
def emacs_reindent(lines):
if 'c-basic-offset: 4; indent-tabs-mode: nil' not in lines[0]:
return None
util_dir = os.path.dirname(sys.argv[0])
cstyle_el = os.path.join(util_dir, 'krb5-c-style.el')
reindent_el = os.path.join(util_dir, 'krb5-batch-reindent.el')
with NamedTemporaryFile(suffix='.c', mode='w+') as f:
f.write(''.join(lines))
f.flush()
args = ['emacs', '-q', '-batch', '-l', cstyle_el, '-l', reindent_el,
f.name]
with open(os.devnull, 'w') as devnull:
try:
st = call(args, stdin=devnull, stdout=devnull, stderr=devnull)
if st != 0:
return None
except OSError:
# Fail gracefully if emacs isn't installed.
return None
f.seek(0)
ilines = f.readlines()
f.close()
return ilines
def check_length(line, ln):
if len(line) > 79 and not line.startswith(' * Copyright'):
warn(ln, 'Length exceeds 79 characters')
def check_tabs(line, ln, allow_tabs, seen_tab):
if not allow_tabs:
if '\t' in line:
warn(ln, 'Tab character in file which does not allow tabs')
else:
if ' \t' in line:
warn(ln, 'Tab character immediately following space')
if ' ' in line and seen_tab:
warn(ln, '8+ spaces in file which uses tabs')
def check_trailing_whitespace(line, ln):
if line and line[-1] in ' \t':
warn(ln, 'Trailing whitespace')
def check_comment(lines, ln):
align = lines[0].index('/*') + 1
if not lines[0].lstrip().startswith('/*'):
warn(ln, 'Multi-line comment begins after code')
for line in lines[1:]:
ln += 1
if len(line) <= align or line[align] != '*':
warn(ln, 'Comment line does not have * aligned with top')
elif line[:align].lstrip() != '':
warn(ln, 'Garbage before * in comment line')
if not lines[-1].rstrip().endswith('*/'):
warn(ln, 'Code after end of multi-line comment')
if len(lines) > 2 and (lines[0].strip() not in ('/*', '/**') or
lines[-1].strip() != '*/'):
warn(ln, 'Comment is 3+ lines but is not formatted as block comment')
def check_preprocessor(line, ln):
if line.startswith('#'):
warn(ln, 'Preprocessor statement in function body')
def check_braces(line, ln):
# Strip out one-line initializer expressions.
line = re.sub(r'=\s*{.*}', '', line)
if line.lstrip().startswith('{') and not line.startswith('{'):
warn(ln, 'Un-cuddled open brace')
if re.search(r'{\s*\S', line):
warn(ln, 'Code on line after open brace')
if re.search(r'\S.*}', line):
warn(ln, 'Code on line before close brace')
# This test gives false positives on some function pointer type
# declarations or casts. Avoid this by using typedefs.
def check_space_before_paren(line, ln):
for m in re.finditer(r'([\w]+)(\s*)\(', line):
ident, ws = m.groups()
if ident in ('void', 'char', 'int', 'long', 'unsigned'):
pass
elif ident in ('if', 'for', 'while', 'switch'):
if not ws:
warn(ln, 'No space after flow control keyword')
elif ident != 'return':
if ws:
warn(ln, 'Space before parenthesis in function call')
if re.search(r' \)', line):
warn(ln, 'Space before close parenthesis')
def check_parenthesized_return(line, ln):
if re.search(r'return\s*\(.*\);', line):
warn(ln, 'Parenthesized return expression')
def check_cast(line, ln):
# We can't reliably distinguish cast operators from parenthesized
# expressions or function call parameters without a real C parser,
# so we use some heuristics. A cast operator is followed by an
# expression, which usually begins with an identifier or an open
# paren. A function call or parenthesized expression is never
# followed by an identifier and only rarely by an open paren. We
# won't detect a cast operator when it's followed by an expression
# beginning with '*', since it's hard to distinguish that from a
# multiplication operator. We will get false positives from
# "(*fp) (args)" and "if (condition) statement", but both of those
# are erroneous anyway.
for m in re.finditer(r'\(([^(]+)\)(\s*)[a-zA-Z_(]', line):
if m.group(2):
warn(ln, 'Space after cast operator (or inline if/while body)')
# Check for casts like (char*) which should have a space.
if re.search(r'[^\s\*]\*+$', m.group(1)):
warn(ln, 'No space before * in cast operator')
def check_binary_operator(line, ln):
binop = r'(\+|-|\*|/|%|\^|==|=|!=|<=|<|>=|>|&&|&|\|\||\|)'
if re.match(r'\s*' + binop + r'\s', line):
warn(ln - 1, 'Line broken before binary operator')
for m in re.finditer(r'(\s|\w)' + binop + r'(\s|\w)', line):
before, op, after = m.groups()
if not before.isspace() and not after.isspace():
warn(ln, 'No space before or after binary operator')
elif not before.isspace():
warn(ln, 'No space before binary operator')
elif op not in ('-', '*', '&') and not after.isspace():
warn(ln, 'No space after binary operator')
def check_assignment_in_conditional(line, ln):
# Check specifically for if statements; we allow assignments in
# loop expressions.
if re.search(r'if\s*\(+\w+\s*=[^=]', line):
warn(ln, 'Assignment in if conditional')
def indent(line):
return len(re.match('\s*', line).group(0).expandtabs())
def check_unbraced_flow_body(line, ln, lines):
if re.match(r'\s*do$', line):
warn(ln, 'do statement without braces')
return
m = re.match(r'\s*(})?\s*else(\s*if\s*\(.*\))?\s*({)?\s*$', line)
if m and (m.group(1) is None) != (m.group(3) is None):
warn(ln, 'One arm of if/else statement braced but not the other')
if (re.match('\s*(if|else if|for|while)\s*\(.*\)$', line) or
re.match('\s*else$', line)):
base = indent(line)
# Look at the next two lines (ln is 1-based so lines[ln] is next).
if indent(lines[ln]) > base and indent(lines[ln + 1]) > base:
warn(ln, 'Body is 2+ lines but has no braces')
def check_bad_string_fn(line, ln):
# This is intentionally pretty fuzzy so that we catch the whole scanf
if re.search(r'\W(strcpy|strcat|sprintf|\w*scanf)\W', line):
warn(ln, 'Prohibited string function')
def check_indentation(line, indented_lines, ln):
if not indented_lines:
return
if ln - 1 >= len(indented_lines):
# This should only happen when the emacs reindent removed
# blank lines from the input file, but check.
if line.strip() == '':
warn(ln, 'Trailing blank line')
return
if line != indented_lines[ln - 1].rstrip('\r\n'):
warn(ln, 'Indentation may be incorrect')
def check_file(lines):
# Check if this file allows tabs.
if len(lines) == 0:
return
allow_tabs = 'indent-tabs-mode: nil' not in lines[0]
seen_tab = False
indented_lines = emacs_reindent(lines)
in_function = False
comment = []
ln = 0
for line in lines:
ln += 1
line = line.rstrip('\r\n')
seen_tab = seen_tab or ('\t' in line)
# Check line structure issues before altering the line.
check_indentation(line, indented_lines, ln)
check_length(line, ln)
check_tabs(line, ln, allow_tabs, seen_tab)
check_trailing_whitespace(line, ln)
# Strip out single-line comments the contents of string literals.
if not comment:
line = re.sub(r'/\*.*?\*/', '', line)
line = re.sub(r'"(\\.|[^"])*"', '""', line)
# Parse out and check multi-line comments. (Ignore code on
# the first or last line; check_comment will warn about it.)
if comment or '/*' in line:
comment.append(line)
if '*/' in line:
check_comment(comment, ln - len(comment) + 1)
comment = []
continue
# Warn if we see a // comment and ignore anything following.
if '//' in line:
warn(ln, '// comment')
line = re.sub(r'//.*/', '', line)
if line.startswith('{'):
in_function = True
elif line.startswith('}'):
in_function = False
if in_function:
check_preprocessor(line, ln)
check_braces(line, ln)
check_space_before_paren(line, ln)
check_parenthesized_return(line, ln)
check_cast(line, ln)
check_binary_operator(line, ln)
check_assignment_in_conditional(line, ln)
check_unbraced_flow_body(line, ln, lines)
check_bad_string_fn(line, ln)
if lines[-1] == '':
warn(ln, 'Blank line at end of file')
if len(sys.argv) == 1:
lines = sys.stdin.readlines()
elif len(sys.argv) == 2:
f = open(sys.argv[1])
lines = f.readlines()
f.close()
else:
sys.stderr.write('Usage: cstyle-file [filename]\n')
sys.exit(1)
check_file(lines)