#!/usr/bin/env python3 """ Pre-commit hook to enforce snake_case naming conventions in C++ code. Only checks modified lines to avoid breaking existing camelCase code. """ import argparse import re import subprocess import sys from pathlib import Path def get_modified_lines(filepath): """Get line numbers of modified lines using git diff.""" try: # Get diff for staged changes (what's being committed) result = subprocess.run( ["git", "diff", "--cached", "--unified=0", filepath], capture_output=True, text=True, check=True, ) modified_lines = set() for line in result.stdout.split("\n"): # Look for diff hunks like "@@ -10,3 +15,4 @@" if line.startswith("@@"): # Extract new line numbers from hunk header match = re.search(r"\+(\d+)(?:,(\d+))?", line) if match: start = int(match.group(1)) count = int(match.group(2)) if match.group(2) else 1 modified_lines.update(range(start, start + count)) return modified_lines except subprocess.CalledProcessError: # If git diff fails, check all lines (fallback) return None def check_snake_case_violations(filepath, check_new_only=True): """Check for camelCase violations in C++ code.""" violations = [] # Get modified lines if checking new code only modified_lines = get_modified_lines(filepath) if check_new_only else None # Patterns that should use snake_case patterns = [ # Function definitions and declarations (r"\b([a-z]+[A-Z][a-zA-Z0-9_]*)\s*\(", "function"), # Variable declarations (including member variables) ( r"\b(?:auto|int|int64_t|int32_t|size_t|bool|char|float|double|std::\w+)\s+([a-z]+[A-Z][a-zA-Z0-9_]*)\b", "variable", ), # Member variable access (obj.camelCase) (r"\.([a-z]+[A-Z][a-zA-Z0-9_]*)\b", "member_access"), # Assignment to camelCase variables (r"\b([a-z]+[A-Z][a-zA-Z0-9_]*)\s*=", "assignment"), ] # Exclusions - things that should NOT be flagged exclusions = [ # C++ standard library and common libraries r"\b(std::|weaseljson|simdutf|doctest)", # Template parameters and concepts r"\b[A-Z][a-zA-Z0-9_]*\b", # Class/struct names (PascalCase is correct) r"\bstruct\s+[A-Z][a-zA-Z0-9_]*", r"\bclass\s+[A-Z][a-zA-Z0-9_]*", # Enum values (PascalCase is correct per style guide) r"\b[A-Z][a-zA-Z0-9_]*::[A-Z][a-zA-Z0-9_]*", # Common HTTP parser callback names (external API) r"\b(onUrl|onHeaderField|onHeaderFieldComplete|onHeaderValue|onHeaderValueComplete|onHeadersComplete|onBody|onMessageComplete)\b", # Known legacy APIs we can't easily change r"\b(user_data|get_arena|append_message)\b", ] try: with open(filepath, "r", encoding="utf-8") as f: lines = f.readlines() except (IOError, UnicodeDecodeError): return violations for line_num, line in enumerate(lines, 1): # Skip if we're only checking modified lines and this isn't one if modified_lines is not None and line_num not in modified_lines: continue # Skip comments and strings if re.search(r'^\s*(?://|/\*|\*|")', line.strip()): continue # Check if line should be excluded if any(re.search(exclusion, line) for exclusion in exclusions): continue # Check for violations for pattern, violation_type in patterns: matches = re.finditer(pattern, line) for match in matches: camel_case_name = match.group(1) # Convert to snake_case suggestion snake_case = re.sub( "([a-z0-9])([A-Z])", r"\1_\2", camel_case_name ).lower() violations.append( { "file": filepath, "line": line_num, "column": match.start(1) + 1, "type": violation_type, "camelCase": camel_case_name, "snake_case": snake_case, "context": line.strip(), } ) return violations def main(): parser = argparse.ArgumentParser(description="Check snake_case naming in C++ files") parser.add_argument("files", nargs="*", help="Files to check") parser.add_argument( "--check-new-code-only", action="store_true", help="Only check modified lines (git diff)", ) args = parser.parse_args() if not args.files: return 0 total_violations = 0 for filepath in args.files: path = Path(filepath) if not path.exists() or path.suffix not in [".hpp", ".cpp"]: continue violations = check_snake_case_violations(filepath, args.check_new_code_only) if violations: print(f"\n❌ {filepath}:") for v in violations: print( f" Line {v['line']}:{v['column']} - {v['type']} '{v['camelCase']}' should be '{v['snake_case']}'" ) print(f" Context: {v['context']}") total_violations += len(violations) if total_violations > 0: print(f"\n💡 Found {total_violations} naming violations.") print(" New code should use snake_case per style.md") print(" To convert: s/([a-z0-9])([A-Z])/\\1_\\2/g and lowercase") return 1 return 0 if __name__ == "__main__": sys.exit(main())