Add latency sim. Not reviewed

This commit is contained in:
2025-08-24 22:32:47 -04:00
parent 506bbbb528
commit 1a4e8d5761
3 changed files with 2179 additions and 0 deletions

View File

@@ -0,0 +1,461 @@
#!/usr/bin/env python3
"""
Persistence Thread Parameter Optimization
Uses Bayesian Optimization to automatically find the optimal configuration
parameters that minimize commit latency. This is much more efficient than
grid search since it uses a probabilistic model to guide parameter exploration.
Key advantages:
- Efficiently explores high-dimensional parameter spaces
- Uses previous simulation results to guide future parameter choices
- Handles expensive objective function evaluations (our simulation)
- Provides uncertainty estimates for parameter importance
"""
import numpy as np
from typing import Dict, List, Tuple, Optional
import time
from persistence_simulation import PersistenceSimulation, print_results
# Try to import scikit-optimize for Bayesian optimization
try:
from skopt import gp_minimize, forest_minimize
from skopt.space import Real, Integer
from skopt.utils import use_named_args
from skopt.plots import plot_convergence, plot_objective
import matplotlib.pyplot as plt
OPTIMIZE_AVAILABLE = True
except ImportError:
print("scikit-optimize not available. Install with: pip install scikit-optimize")
print("Falling back to grid search...")
OPTIMIZE_AVAILABLE = False
class PersistenceOptimizer:
"""
Automated parameter optimization for the persistence thread using Bayesian optimization.
This class finds the optimal configuration parameters to minimize commit latency
by intelligently exploring the parameter space using Gaussian Process models.
"""
def __init__(self,
optimization_budget: int = 50,
simulation_duration: float = 20.0,
arrival_rate: float = 1000.0,
objective_metric: str = "p95_latency",
random_seed: int = 42):
self.optimization_budget = optimization_budget
self.simulation_duration = simulation_duration
self.arrival_rate = arrival_rate
self.objective_metric = objective_metric
self.random_seed = random_seed
# Track optimization history
self.optimization_history = []
self.best_params = None
self.best_score = float('inf')
# Define parameter search space
self.parameter_space = self._define_search_space()
self.parameter_names = [dim.name for dim in self.parameter_space]
def _define_search_space(self) -> List:
"""
Define the parameter search space for optimization.
Focus on the 3 core parameters that matter for persistence thread performance
with 100% reliable S3. Retry parameters removed since S3 never fails.
"""
return [
# Core batching parameters
Real(1.0, 50.0, name='batch_timeout_ms',
prior='log-uniform'), # Log scale since small changes matter
Integer(64 * 1024, 4 * 1024 * 1024, name='batch_size_threshold', # 64KB - 4MB
prior='log-uniform'),
# Flow control parameters - likely the most impactful
Integer(1, 50, name='max_in_flight_requests'),
]
def _run_simulation_with_params(self, params: Dict[str, float]) -> Dict:
"""Run simulation with given parameters and return results"""
try:
sim = PersistenceSimulation(
batch_timeout_ms=params['batch_timeout_ms'],
batch_size_threshold=int(params['batch_size_threshold']),
max_in_flight_requests=int(params['max_in_flight_requests']),
# Retry parameters fixed since S3 is 100% reliable
max_retry_attempts=0, # No retries needed
retry_base_delay_ms=100.0, # Irrelevant but needs a value
# S3 parameters kept fixed - 100% reliable for optimization focus
s3_latency_shape=2.0, # Fixed Gamma shape
s3_latency_scale=15.0, # Fixed Gamma scale (30ms RTT + ~30ms variable = ~60ms mean)
s3_failure_rate=0.0, # 100% reliable S3
arrival_rate_per_sec=self.arrival_rate,
simulation_duration_sec=self.simulation_duration
)
return sim.run_simulation()
except Exception as e:
print(f"Simulation failed with params {params}: {e}")
# Return a high penalty score for failed simulations
return {
'commit_metrics': {
'latency_ms': {
'mean': 10000,
'p95': 10000,
'p99': 10000
}
},
'error': str(e)
}
def _extract_objective_value(self, results: Dict) -> float:
"""Extract the objective value to minimize from simulation results"""
try:
commit_metrics = results['commit_metrics']['latency_ms']
if self.objective_metric == "mean_latency":
return commit_metrics['mean']
elif self.objective_metric == "p95_latency":
return commit_metrics['p95']
elif self.objective_metric == "p99_latency":
return commit_metrics['p99']
elif self.objective_metric == "weighted_latency":
# Weighted combination emphasizing tail latencies
return (0.3 * commit_metrics['mean'] +
0.5 * commit_metrics['p95'] +
0.2 * commit_metrics['p99'])
else:
return commit_metrics['p95'] # Default to P95
except KeyError as e:
print(f"Failed to extract objective from results: {e}")
return 10000 # High penalty for invalid results
def optimize_with_bayesian(self) -> Tuple[Dict, float]:
"""
Use Bayesian Optimization to find optimal parameters.
This uses Gaussian Process models to build a probabilistic model
of the objective function and intelligently choose where to sample next.
"""
if not OPTIMIZE_AVAILABLE:
return self.optimize_with_grid_search()
print(f"Starting Bayesian Optimization with {self.optimization_budget} evaluations")
print(f"Objective: Minimize {self.objective_metric}")
print(f"Parameter space: {len(self.parameter_space)} dimensions")
print()
@use_named_args(self.parameter_space)
def objective(**params):
"""Objective function for Bayesian optimization"""
print(f"Evaluating: {params}")
start_time = time.time()
results = self._run_simulation_with_params(params)
eval_time = time.time() - start_time
objective_value = self._extract_objective_value(results)
# Track optimization history
history_entry = {
'params': params.copy(),
'objective_value': objective_value,
'results': results,
'eval_time': eval_time,
'iteration': len(self.optimization_history) + 1
}
self.optimization_history.append(history_entry)
# Update best if improved
if objective_value < self.best_score:
self.best_score = objective_value
self.best_params = params.copy()
print(f"✓ NEW BEST: {objective_value:.2f}ms (evaluation {history_entry['iteration']})")
else:
print(f" Score: {objective_value:.2f}ms")
print(f" Time: {eval_time:.1f}s")
print()
return objective_value
# Run Bayesian optimization
result = gp_minimize(
func=objective,
dimensions=self.parameter_space,
n_calls=self.optimization_budget,
n_initial_points=10, # Random exploration first
acq_func='EI', # Expected Improvement acquisition
random_state=self.random_seed
)
# Extract best parameters
best_params_list = result.x
best_params_dict = dict(zip(self.parameter_names, best_params_list))
best_objective = result.fun
return best_params_dict, best_objective
def optimize_with_grid_search(self) -> Tuple[Dict, float]:
"""Fallback grid search optimization if scikit-optimize not available"""
print("Using grid search optimization (install scikit-optimize for better results)")
print()
# Define a smaller grid for key parameters
grid_configs = [
# Vary max_in_flight and batch_timeout
{'max_in_flight_requests': 5, 'batch_timeout_ms': 5.0},
{'max_in_flight_requests': 10, 'batch_timeout_ms': 5.0},
{'max_in_flight_requests': 20, 'batch_timeout_ms': 5.0},
{'max_in_flight_requests': 10, 'batch_timeout_ms': 2.0},
{'max_in_flight_requests': 10, 'batch_timeout_ms': 10.0},
{'max_in_flight_requests': 15, 'batch_timeout_ms': 3.0},
{'max_in_flight_requests': 25, 'batch_timeout_ms': 7.0},
]
best_params = None
best_score = float('inf')
for i, config in enumerate(grid_configs):
print(f"Evaluating config {i+1}/{len(grid_configs)}: {config}")
# Use default values for unspecified parameters
full_params = {
'batch_timeout_ms': 5.0,
'batch_size_threshold': 1024 * 1024,
'max_in_flight_requests': 5
}
full_params.update(config)
results = self._run_simulation_with_params(full_params)
objective_value = self._extract_objective_value(results)
if objective_value < best_score:
best_score = objective_value
best_params = full_params.copy()
print(f"✓ NEW BEST: {objective_value:.2f}ms")
else:
print(f" Score: {objective_value:.2f}ms")
print()
return best_params, best_score
def analyze_parameter_importance(self):
"""Analyze which parameters have the most impact on performance"""
if not self.optimization_history:
print("No optimization history available")
return
print("Parameter Importance Analysis")
print("=" * 50)
# Extract parameter values and objectives
param_data = {}
objectives = []
for entry in self.optimization_history:
objectives.append(entry['objective_value'])
for param_name, param_value in entry['params'].items():
if param_name not in param_data:
param_data[param_name] = []
param_data[param_name].append(param_value)
objectives = np.array(objectives)
# Simple correlation analysis
print("Parameter correlations with objective (lower is better):")
correlations = []
for param_name, values in param_data.items():
correlation = np.corrcoef(values, objectives)[0, 1]
correlations.append((param_name, correlation))
print(f" {param_name:<25}: {correlation:+.3f}")
print("\nMost impactful parameters (by absolute correlation):")
correlations.sort(key=lambda x: abs(x[1]), reverse=True)
for param_name, correlation in correlations[:5]:
impact = "reduces latency" if correlation < 0 else "increases latency"
print(f" {param_name:<25}: {impact} (r={correlation:+.3f})")
def plot_optimization_progress(self, save_path: Optional[str] = None):
"""Plot optimization convergence"""
if not OPTIMIZE_AVAILABLE or not self.optimization_history:
return
iterations = [entry['iteration'] for entry in self.optimization_history]
objectives = [entry['objective_value'] for entry in self.optimization_history]
# Calculate running minimum (best so far)
running_min = []
current_min = float('inf')
for obj in objectives:
current_min = min(current_min, obj)
running_min.append(current_min)
plt.figure(figsize=(12, 8))
# Plot 1: Objective value over iterations
plt.subplot(2, 2, 1)
plt.scatter(iterations, objectives, alpha=0.6, s=30)
plt.plot(iterations, running_min, 'r-', linewidth=2, label='Best so far')
plt.xlabel('Iteration')
plt.ylabel(f'{self.objective_metric} (ms)')
plt.title('Optimization Progress')
plt.legend()
plt.grid(True, alpha=0.3)
# Plot 2: Parameter evolution for key parameters
plt.subplot(2, 2, 2)
key_params = ['max_in_flight_requests', 'batch_timeout_ms']
for param in key_params:
if param in self.optimization_history[0]['params']:
values = [entry['params'][param] for entry in self.optimization_history]
plt.scatter(iterations, values, alpha=0.6, label=param, s=30)
plt.xlabel('Iteration')
plt.ylabel('Parameter Value')
plt.title('Key Parameter Evolution')
plt.legend()
plt.grid(True, alpha=0.3)
# Plot 3: Objective distribution
plt.subplot(2, 2, 3)
plt.hist(objectives, bins=20, alpha=0.7, edgecolor='black')
plt.axvline(self.best_score, color='red', linestyle='--',
label=f'Best: {self.best_score:.1f}ms')
plt.xlabel(f'{self.objective_metric} (ms)')
plt.ylabel('Count')
plt.title('Objective Value Distribution')
plt.legend()
plt.grid(True, alpha=0.3)
# Plot 4: Convergence rate
plt.subplot(2, 2, 4)
improvements = []
for i, entry in enumerate(self.optimization_history):
if i == 0:
improvements.append(0)
else:
prev_best = running_min[i-1]
curr_best = running_min[i]
improvement = prev_best - curr_best
improvements.append(improvement)
plt.plot(iterations, improvements, 'g-', marker='o', markersize=3)
plt.xlabel('Iteration')
plt.ylabel('Improvement (ms)')
plt.title('Per-Iteration Improvement')
plt.grid(True, alpha=0.3)
plt.tight_layout()
if save_path:
plt.savefig(save_path, dpi=300, bbox_inches='tight')
print(f"Optimization plots saved to {save_path}")
else:
plt.show()
def run_optimization(self) -> Dict:
"""Run the full optimization process and return results"""
start_time = time.time()
# Run optimization
if OPTIMIZE_AVAILABLE:
best_params, best_score = self.optimize_with_bayesian()
else:
best_params, best_score = self.optimize_with_grid_search()
total_time = time.time() - start_time
# Run final simulation with best parameters for detailed results
print("Running final simulation with optimal parameters...")
final_results = self._run_simulation_with_params(best_params)
# Prepare optimization summary
optimization_summary = {
'best_parameters': best_params,
'best_objective_value': best_score,
'optimization_time': total_time,
'evaluations_performed': len(self.optimization_history),
'final_simulation_results': final_results,
'optimization_history': self.optimization_history
}
return optimization_summary
def print_optimization_summary(self, summary: Dict):
"""Print a comprehensive summary of optimization results"""
print("=" * 80)
print("BAYESIAN OPTIMIZATION RESULTS")
print("=" * 80)
print(f"Optimization completed in {summary['optimization_time']:.1f} seconds")
print(f"Performed {summary['evaluations_performed']} parameter evaluations")
print(f"Best {self.objective_metric}: {summary['best_objective_value']:.2f}ms")
print()
print("OPTIMAL PARAMETERS:")
print("-" * 40)
for param, value in summary['best_parameters'].items():
if isinstance(value, float):
if param.endswith('_rate'):
print(f" {param:<25}: {value:.4f}")
else:
print(f" {param:<25}: {value:.2f}")
else:
print(f" {param:<25}: {value}")
print("\nDETAILED PERFORMANCE WITH OPTIMAL PARAMETERS:")
print("-" * 50)
final_results = summary['final_simulation_results']
print_results(final_results)
print("\nPARAMETER IMPACT ANALYSIS:")
print("-" * 30)
self.analyze_parameter_importance()
def main():
"""Main optimization workflow"""
print("Persistence Thread Parameter Optimization")
print("Using Bayesian Optimization for intelligent parameter search")
print()
# Create optimizer with different objective functions to test
objectives_to_test = ["p95_latency", "weighted_latency"]
for objective in objectives_to_test:
print(f"\n{'='*80}")
print(f"OPTIMIZING FOR: {objective.upper()}")
print(f"{'='*80}")
optimizer = PersistenceOptimizer(
optimization_budget=30, # Reasonable for demo
simulation_duration=15.0, # Shorter sims for faster optimization
arrival_rate=1000.0,
objective_metric=objective,
random_seed=42
)
# Run optimization
summary = optimizer.run_optimization()
optimizer.print_optimization_summary(summary)
# Generate plots
try:
optimizer.plot_optimization_progress(f'optimization_{objective}.png')
except Exception as e:
print(f"Could not generate plots: {e}")
print(f"\nOptimization for {objective} completed!")
print("="*80)
if __name__ == "__main__":
main()