From 612497f73308274baca0cb5eab1857c67e3fef83 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Mon, 25 Aug 2025 13:47:35 -0400 Subject: [PATCH] Try enforcing snake_case --- .pre-commit-config.yaml | 16 ++++ style.md | 1 + tools/check_snake_case.py | 166 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 183 insertions(+) create mode 100755 tools/check_snake_case.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0f21130..1680c53 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,3 +16,19 @@ repos: rev: e2c2116d86a80e72e7146a06e68b7c228afc6319 # frozen: v0.6.13 hooks: - id: cmake-format + + - repo: https://github.com/psf/black + rev: 8a737e727ac5ab2f1d4cf5876720ed276dc8dc4b # frozen: 25.1.0 + hooks: + - id: black + language_version: python3 + + - repo: local + hooks: + - id: snake-case-enforcement + name: Enforce snake_case naming + entry: ./tools/check_snake_case.py + language: script + files: '\.(hpp|cpp)$' + exclude: '^build/' + args: ['--check-new-code-only'] diff --git a/style.md b/style.md index 7a65358..a5cbc1a 100644 --- a/style.md +++ b/style.md @@ -119,6 +119,7 @@ auto addr = reinterpret_cast(ptr); // Pointer to integer conv ### Variables and Functions - **snake_case** for all variables, functions, and member functions +- **Legacy camelCase exists** - the codebase currently contains mixed naming due to historical development. New code should use snake_case. Existing camelCase should be converted to snake_case during natural refactoring (not mass renaming). ```cpp int64_t used_bytes() const; void add_block(int64_t size); diff --git a/tools/check_snake_case.py b/tools/check_snake_case.py new file mode 100755 index 0000000..bcd2f72 --- /dev/null +++ b/tools/check_snake_case.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 +""" +Pre-commit hook to enforce snake_case naming conventions in C++ code. +Only checks modified lines to avoid breaking existing camelCase code. +""" + +import argparse +import re +import subprocess +import sys +from pathlib import Path + + +def get_modified_lines(filepath): + """Get line numbers of modified lines using git diff.""" + try: + # Get diff for staged changes (what's being committed) + result = subprocess.run( + ["git", "diff", "--cached", "--unified=0", filepath], + capture_output=True, + text=True, + check=True, + ) + + modified_lines = set() + for line in result.stdout.split("\n"): + # Look for diff hunks like "@@ -10,3 +15,4 @@" + if line.startswith("@@"): + # Extract new line numbers from hunk header + match = re.search(r"\+(\d+)(?:,(\d+))?", line) + if match: + start = int(match.group(1)) + count = int(match.group(2)) if match.group(2) else 1 + modified_lines.update(range(start, start + count)) + + return modified_lines + except subprocess.CalledProcessError: + # If git diff fails, check all lines (fallback) + return None + + +def check_snake_case_violations(filepath, check_new_only=True): + """Check for camelCase violations in C++ code.""" + violations = [] + + # Get modified lines if checking new code only + modified_lines = get_modified_lines(filepath) if check_new_only else None + + # Patterns that should use snake_case + patterns = [ + # Function definitions and declarations + (r"\b([a-z]+[A-Z][a-zA-Z0-9_]*)\s*\(", "function"), + # Variable declarations (including member variables) + ( + r"\b(?:auto|int|int64_t|int32_t|size_t|bool|char|float|double|std::\w+)\s+([a-z]+[A-Z][a-zA-Z0-9_]*)\b", + "variable", + ), + # Member variable access (obj.camelCase) + (r"\.([a-z]+[A-Z][a-zA-Z0-9_]*)\b", "member_access"), + # Assignment to camelCase variables + (r"\b([a-z]+[A-Z][a-zA-Z0-9_]*)\s*=", "assignment"), + ] + + # Exclusions - things that should NOT be flagged + exclusions = [ + # C++ standard library and common libraries + r"\b(std::|weaseljson|simdutf|doctest)", + # Template parameters and concepts + r"\b[A-Z][a-zA-Z0-9_]*\b", + # Class/struct names (PascalCase is correct) + r"\bstruct\s+[A-Z][a-zA-Z0-9_]*", + r"\bclass\s+[A-Z][a-zA-Z0-9_]*", + # Enum values (PascalCase is correct per style guide) + r"\b[A-Z][a-zA-Z0-9_]*::[A-Z][a-zA-Z0-9_]*", + # Common HTTP parser callback names (external API) + r"\b(onUrl|onHeaderField|onHeaderFieldComplete|onHeaderValue|onHeaderValueComplete|onHeadersComplete|onBody|onMessageComplete)\b", + # Known legacy APIs we can't easily change + r"\b(user_data|get_arena|append_message)\b", + ] + + try: + with open(filepath, "r", encoding="utf-8") as f: + lines = f.readlines() + except (IOError, UnicodeDecodeError): + return violations + + for line_num, line in enumerate(lines, 1): + # Skip if we're only checking modified lines and this isn't one + if modified_lines is not None and line_num not in modified_lines: + continue + + # Skip comments and strings + if re.search(r'^\s*(?://|/\*|\*|")', line.strip()): + continue + + # Check if line should be excluded + if any(re.search(exclusion, line) for exclusion in exclusions): + continue + + # Check for violations + for pattern, violation_type in patterns: + matches = re.finditer(pattern, line) + for match in matches: + camel_case_name = match.group(1) + # Convert to snake_case suggestion + snake_case = re.sub( + "([a-z0-9])([A-Z])", r"\1_\2", camel_case_name + ).lower() + + violations.append( + { + "file": filepath, + "line": line_num, + "column": match.start(1) + 1, + "type": violation_type, + "camelCase": camel_case_name, + "snake_case": snake_case, + "context": line.strip(), + } + ) + + return violations + + +def main(): + parser = argparse.ArgumentParser(description="Check snake_case naming in C++ files") + parser.add_argument("files", nargs="*", help="Files to check") + parser.add_argument( + "--check-new-code-only", + action="store_true", + help="Only check modified lines (git diff)", + ) + args = parser.parse_args() + + if not args.files: + return 0 + + total_violations = 0 + + for filepath in args.files: + path = Path(filepath) + if not path.exists() or path.suffix not in [".hpp", ".cpp"]: + continue + + violations = check_snake_case_violations(filepath, args.check_new_code_only) + + if violations: + print(f"\n❌ {filepath}:") + for v in violations: + print( + f" Line {v['line']}:{v['column']} - {v['type']} '{v['camelCase']}' should be '{v['snake_case']}'" + ) + print(f" Context: {v['context']}") + total_violations += len(violations) + + if total_violations > 0: + print(f"\n💡 Found {total_violations} naming violations.") + print(" New code should use snake_case per style.md") + print(" To convert: s/([a-z0-9])([A-Z])/\\1_\\2/g and lowercase") + return 1 + + return 0 + + +if __name__ == "__main__": + sys.exit(main())