Files
weaseldb/tools/check_snake_case.py

174 lines
6.1 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Pre-commit hook to enforce snake_case naming conventions in C++ code.
Only checks modified lines to avoid breaking existing camelCase code.
"""
import argparse
import re
import subprocess
import sys
from pathlib import Path
def get_modified_lines(filepath):
"""Get line numbers of modified lines using git diff."""
try:
# Get diff for staged changes (what's being committed)
result = subprocess.run(
["git", "diff", "--cached", "--unified=0", filepath],
capture_output=True,
text=True,
check=True,
)
modified_lines = set()
for line in result.stdout.split("\n"):
# Look for diff hunks like "@@ -10,3 +15,4 @@"
if line.startswith("@@"):
# Extract new line numbers from hunk header
match = re.search(r"\+(\d+)(?:,(\d+))?", line)
if match:
start = int(match.group(1))
count = int(match.group(2)) if match.group(2) else 1
modified_lines.update(range(start, start + count))
return modified_lines
except subprocess.CalledProcessError:
# If git diff fails, check all lines (fallback)
return None
def check_snake_case_violations(filepath, check_new_only=True):
"""Check for camelCase violations in C++ code."""
violations = []
seen_violations = {} # key: (file, line, col), value: camelCase name
# Get modified lines if checking new code only
modified_lines = get_modified_lines(filepath) if check_new_only else None
# Patterns that should use snake_case
patterns = [
# Function definitions and declarations
(r"\b([a-z]+[A-Z][a-zA-Z0-9_]*)\s*\(", "function"),
# Variable declarations (including member variables)
(
r"\b(?:auto|int|int64_t|int32_t|size_t|bool|char|float|double|std::\w+)\s+([a-z]+[A-Z][a-zA-Z0-9_]*)\b",
"variable",
),
# Member variable access (obj.camelCase)
(r"\.([a-z]+[A-Z][a-zA-Z0-9_]*)\b", "member_access"),
# Assignment to camelCase variables
(r"\b([a-z]+[A-Z][a-zA-Z0-9_]*)\s*=", "assignment"),
]
# Exclusions - things that should NOT be flagged
exclusions = [
# C++ standard library and common libraries
r"\b(std::|weaseljson|simdutf|doctest)",
# Template parameters and concepts
r"\b[A-Z][a-zA-Z0-9_]*\b",
# Class/struct names (PascalCase is correct)
r"\bstruct\s+[A-Z][a-zA-Z0-9_]*",
r"\bclass\s+[A-Z][a-zA-Z0-9_]*",
# Enum values (PascalCase is correct per style guide)
r"\b[A-Z][a-zA-Z0-9_]*::[A-Z][a-zA-Z0-9_]*",
# Common HTTP parser callback names (external API)
r"\b(onUrl|onHeaderField|onHeaderFieldComplete|onHeaderValue|onHeaderValueComplete|onHeadersComplete|onBody|onMessageComplete)\b",
# Known legacy APIs we can't easily change
r"\b(user_data|get_arena|append_message)\b",
]
try:
with open(filepath, "r", encoding="utf-8") as f:
lines = f.readlines()
except (IOError, UnicodeDecodeError):
return violations
for line_num, line in enumerate(lines, 1):
# Skip if we're only checking modified lines and this isn't one
if modified_lines is not None and line_num not in modified_lines:
continue
# Skip comments and strings
if re.search(r'^\s*(?://|/\*|\*|")', line.strip()):
continue
# Check if line should be excluded
if any(re.search(exclusion, line) for exclusion in exclusions):
continue
# Check for violations
for pattern, violation_type in patterns:
matches = re.finditer(pattern, line)
for match in matches:
camel_case_name = match.group(1)
# Convert to snake_case suggestion
snake_case = re.sub(
"([a-z0-9])([A-Z])", r"\1_\2", camel_case_name
).lower()
key = (filepath, line_num, match.start(1) + 1)
# Only add if we haven't seen this exact violation before
if (
key not in seen_violations
or seen_violations[key] != camel_case_name
):
seen_violations[key] = camel_case_name
violations.append(
{
"file": filepath,
"line": line_num,
"column": match.start(1) + 1,
"type": violation_type,
"camelCase": camel_case_name,
"snake_case": snake_case,
"context": line.strip(),
}
)
return violations
def main():
parser = argparse.ArgumentParser(description="Check snake_case naming in C++ files")
parser.add_argument("files", nargs="*", help="Files to check")
parser.add_argument(
"--check-new-code-only",
action="store_true",
help="Only check modified lines (git diff)",
)
args = parser.parse_args()
if not args.files:
return 0
total_violations = 0
for filepath in args.files:
path = Path(filepath)
if not path.exists() or path.suffix not in [".hpp", ".cpp"]:
continue
violations = check_snake_case_violations(filepath, args.check_new_code_only)
if violations:
for v in violations:
print(f"\n{filepath}:{v['line']}:{v['column']}:")
print(f" {v['type']} '{v['camelCase']}' should be '{v['snake_case']}'")
print(f" Context: {v['context']}")
total_violations += len(violations)
if total_violations > 0:
print(f"\n💡 Found {total_violations} naming violations.")
print(" New code should use snake_case per style.md")
print(" To convert: s/([a-z0-9])([A-Z])/\\1_\\2/g and lowercase")
return 1
return 0
if __name__ == "__main__":
sys.exit(main())