Try enforcing snake_case

This commit is contained in:
2025-08-25 13:47:35 -04:00
parent 50d873e8eb
commit 612497f733
3 changed files with 183 additions and 0 deletions

View File

@@ -16,3 +16,19 @@ repos:
rev: e2c2116d86a80e72e7146a06e68b7c228afc6319 # frozen: v0.6.13 rev: e2c2116d86a80e72e7146a06e68b7c228afc6319 # frozen: v0.6.13
hooks: hooks:
- id: cmake-format - id: cmake-format
- repo: https://github.com/psf/black
rev: 8a737e727ac5ab2f1d4cf5876720ed276dc8dc4b # frozen: 25.1.0
hooks:
- id: black
language_version: python3
- repo: local
hooks:
- id: snake-case-enforcement
name: Enforce snake_case naming
entry: ./tools/check_snake_case.py
language: script
files: '\.(hpp|cpp)$'
exclude: '^build/'
args: ['--check-new-code-only']

View File

@@ -119,6 +119,7 @@ auto addr = reinterpret_cast<uintptr_t>(ptr); // Pointer to integer conv
### Variables and Functions ### Variables and Functions
- **snake_case** for all variables, functions, and member functions - **snake_case** for all variables, functions, and member functions
- **Legacy camelCase exists** - the codebase currently contains mixed naming due to historical development. New code should use snake_case. Existing camelCase should be converted to snake_case during natural refactoring (not mass renaming).
```cpp ```cpp
int64_t used_bytes() const; int64_t used_bytes() const;
void add_block(int64_t size); void add_block(int64_t size);

166
tools/check_snake_case.py Executable file
View File

@@ -0,0 +1,166 @@
#!/usr/bin/env python3
"""
Pre-commit hook to enforce snake_case naming conventions in C++ code.
Only checks modified lines to avoid breaking existing camelCase code.
"""
import argparse
import re
import subprocess
import sys
from pathlib import Path
def get_modified_lines(filepath):
"""Get line numbers of modified lines using git diff."""
try:
# Get diff for staged changes (what's being committed)
result = subprocess.run(
["git", "diff", "--cached", "--unified=0", filepath],
capture_output=True,
text=True,
check=True,
)
modified_lines = set()
for line in result.stdout.split("\n"):
# Look for diff hunks like "@@ -10,3 +15,4 @@"
if line.startswith("@@"):
# Extract new line numbers from hunk header
match = re.search(r"\+(\d+)(?:,(\d+))?", line)
if match:
start = int(match.group(1))
count = int(match.group(2)) if match.group(2) else 1
modified_lines.update(range(start, start + count))
return modified_lines
except subprocess.CalledProcessError:
# If git diff fails, check all lines (fallback)
return None
def check_snake_case_violations(filepath, check_new_only=True):
"""Check for camelCase violations in C++ code."""
violations = []
# Get modified lines if checking new code only
modified_lines = get_modified_lines(filepath) if check_new_only else None
# Patterns that should use snake_case
patterns = [
# Function definitions and declarations
(r"\b([a-z]+[A-Z][a-zA-Z0-9_]*)\s*\(", "function"),
# Variable declarations (including member variables)
(
r"\b(?:auto|int|int64_t|int32_t|size_t|bool|char|float|double|std::\w+)\s+([a-z]+[A-Z][a-zA-Z0-9_]*)\b",
"variable",
),
# Member variable access (obj.camelCase)
(r"\.([a-z]+[A-Z][a-zA-Z0-9_]*)\b", "member_access"),
# Assignment to camelCase variables
(r"\b([a-z]+[A-Z][a-zA-Z0-9_]*)\s*=", "assignment"),
]
# Exclusions - things that should NOT be flagged
exclusions = [
# C++ standard library and common libraries
r"\b(std::|weaseljson|simdutf|doctest)",
# Template parameters and concepts
r"\b[A-Z][a-zA-Z0-9_]*\b",
# Class/struct names (PascalCase is correct)
r"\bstruct\s+[A-Z][a-zA-Z0-9_]*",
r"\bclass\s+[A-Z][a-zA-Z0-9_]*",
# Enum values (PascalCase is correct per style guide)
r"\b[A-Z][a-zA-Z0-9_]*::[A-Z][a-zA-Z0-9_]*",
# Common HTTP parser callback names (external API)
r"\b(onUrl|onHeaderField|onHeaderFieldComplete|onHeaderValue|onHeaderValueComplete|onHeadersComplete|onBody|onMessageComplete)\b",
# Known legacy APIs we can't easily change
r"\b(user_data|get_arena|append_message)\b",
]
try:
with open(filepath, "r", encoding="utf-8") as f:
lines = f.readlines()
except (IOError, UnicodeDecodeError):
return violations
for line_num, line in enumerate(lines, 1):
# Skip if we're only checking modified lines and this isn't one
if modified_lines is not None and line_num not in modified_lines:
continue
# Skip comments and strings
if re.search(r'^\s*(?://|/\*|\*|")', line.strip()):
continue
# Check if line should be excluded
if any(re.search(exclusion, line) for exclusion in exclusions):
continue
# Check for violations
for pattern, violation_type in patterns:
matches = re.finditer(pattern, line)
for match in matches:
camel_case_name = match.group(1)
# Convert to snake_case suggestion
snake_case = re.sub(
"([a-z0-9])([A-Z])", r"\1_\2", camel_case_name
).lower()
violations.append(
{
"file": filepath,
"line": line_num,
"column": match.start(1) + 1,
"type": violation_type,
"camelCase": camel_case_name,
"snake_case": snake_case,
"context": line.strip(),
}
)
return violations
def main():
parser = argparse.ArgumentParser(description="Check snake_case naming in C++ files")
parser.add_argument("files", nargs="*", help="Files to check")
parser.add_argument(
"--check-new-code-only",
action="store_true",
help="Only check modified lines (git diff)",
)
args = parser.parse_args()
if not args.files:
return 0
total_violations = 0
for filepath in args.files:
path = Path(filepath)
if not path.exists() or path.suffix not in [".hpp", ".cpp"]:
continue
violations = check_snake_case_violations(filepath, args.check_new_code_only)
if violations:
print(f"\n{filepath}:")
for v in violations:
print(
f" Line {v['line']}:{v['column']} - {v['type']} '{v['camelCase']}' should be '{v['snake_case']}'"
)
print(f" Context: {v['context']}")
total_violations += len(violations)
if total_violations > 0:
print(f"\n💡 Found {total_violations} naming violations.")
print(" New code should use snake_case per style.md")
print(" To convert: s/([a-z0-9])([A-Z])/\\1_\\2/g and lowercase")
return 1
return 0
if __name__ == "__main__":
sys.exit(main())