Move latency_sim to tools
This commit is contained in:
476
tools/latency_sim/persistence_optimizer.py
Normal file
476
tools/latency_sim/persistence_optimizer.py
Normal file
@@ -0,0 +1,476 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Persistence Thread Parameter Optimization
|
||||
|
||||
Uses Bayesian Optimization to automatically find the optimal configuration
|
||||
parameters that minimize commit latency. This is much more efficient than
|
||||
grid search since it uses a probabilistic model to guide parameter exploration.
|
||||
|
||||
Key advantages:
|
||||
- Efficiently explores high-dimensional parameter spaces
|
||||
- Uses previous simulation results to guide future parameter choices
|
||||
- Handles expensive objective function evaluations (our simulation)
|
||||
- Provides uncertainty estimates for parameter importance
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
import time
|
||||
from persistence_simulation import PersistenceSimulation, print_results
|
||||
|
||||
# Try to import scikit-optimize for Bayesian optimization
|
||||
try:
|
||||
from skopt import gp_minimize, forest_minimize
|
||||
from skopt.space import Real, Integer
|
||||
from skopt.utils import use_named_args
|
||||
from skopt.plots import plot_convergence, plot_objective
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
OPTIMIZE_AVAILABLE = True
|
||||
except ImportError:
|
||||
print("scikit-optimize not available. Install with: pip install scikit-optimize")
|
||||
print("Falling back to grid search...")
|
||||
OPTIMIZE_AVAILABLE = False
|
||||
|
||||
|
||||
class PersistenceOptimizer:
|
||||
"""
|
||||
Automated parameter optimization for the persistence thread using Bayesian optimization.
|
||||
|
||||
This class finds the optimal configuration parameters to minimize commit latency
|
||||
by intelligently exploring the parameter space using Gaussian Process models.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
optimization_budget: int = 50,
|
||||
simulation_duration: float = 20.0,
|
||||
arrival_rate: float = 1000.0,
|
||||
objective_metric: str = "p95_latency",
|
||||
random_seed: int = 42,
|
||||
):
|
||||
|
||||
self.optimization_budget = optimization_budget
|
||||
self.simulation_duration = simulation_duration
|
||||
self.arrival_rate = arrival_rate
|
||||
self.objective_metric = objective_metric
|
||||
self.random_seed = random_seed
|
||||
|
||||
# Track optimization history
|
||||
self.optimization_history = []
|
||||
self.best_params = None
|
||||
self.best_score = float("inf")
|
||||
|
||||
# Define parameter search space
|
||||
self.parameter_space = self._define_search_space()
|
||||
self.parameter_names = [dim.name for dim in self.parameter_space]
|
||||
|
||||
def _define_search_space(self) -> List:
|
||||
"""
|
||||
Define the parameter search space for optimization.
|
||||
|
||||
Focus on the 3 core parameters that matter for persistence thread performance
|
||||
with 100% reliable S3. Retry parameters removed since S3 never fails.
|
||||
"""
|
||||
return [
|
||||
# Core batching parameters
|
||||
Real(
|
||||
1.0, 50.0, name="batch_timeout_ms", prior="log-uniform"
|
||||
), # Log scale since small changes matter
|
||||
Integer(
|
||||
64 * 1024,
|
||||
4 * 1024 * 1024,
|
||||
name="batch_size_threshold", # 64KB - 4MB
|
||||
prior="log-uniform",
|
||||
),
|
||||
# Flow control parameters - likely the most impactful
|
||||
Integer(1, 50, name="max_in_flight_requests"),
|
||||
]
|
||||
|
||||
def _run_simulation_with_params(self, params: Dict[str, float]) -> Dict:
|
||||
"""Run simulation with given parameters and return results"""
|
||||
try:
|
||||
sim = PersistenceSimulation(
|
||||
batch_timeout_ms=params["batch_timeout_ms"],
|
||||
batch_size_threshold=int(params["batch_size_threshold"]),
|
||||
max_in_flight_requests=int(params["max_in_flight_requests"]),
|
||||
# Retry parameters fixed since S3 is 100% reliable
|
||||
max_retry_attempts=0, # No retries needed
|
||||
retry_base_delay_ms=100.0, # Irrelevant but needs a value
|
||||
# S3 parameters kept fixed - 100% reliable for optimization focus
|
||||
s3_latency_shape=2.0, # Fixed Gamma shape
|
||||
s3_latency_scale=15.0, # Fixed Gamma scale (30ms RTT + ~30ms variable = ~60ms mean)
|
||||
s3_failure_rate=0.0, # 100% reliable S3
|
||||
arrival_rate_per_sec=self.arrival_rate,
|
||||
simulation_duration_sec=self.simulation_duration,
|
||||
)
|
||||
|
||||
return sim.run_simulation()
|
||||
|
||||
except Exception as e:
|
||||
print(f"Simulation failed with params {params}: {e}")
|
||||
# Return a high penalty score for failed simulations
|
||||
return {
|
||||
"commit_metrics": {
|
||||
"latency_ms": {"mean": 10000, "p95": 10000, "p99": 10000}
|
||||
},
|
||||
"error": str(e),
|
||||
}
|
||||
|
||||
def _extract_objective_value(self, results: Dict) -> float:
|
||||
"""Extract the objective value to minimize from simulation results"""
|
||||
try:
|
||||
commit_metrics = results["commit_metrics"]["latency_ms"]
|
||||
|
||||
if self.objective_metric == "mean_latency":
|
||||
return commit_metrics["mean"]
|
||||
elif self.objective_metric == "p95_latency":
|
||||
return commit_metrics["p95"]
|
||||
elif self.objective_metric == "p99_latency":
|
||||
return commit_metrics["p99"]
|
||||
elif self.objective_metric == "weighted_latency":
|
||||
# Weighted combination emphasizing tail latencies
|
||||
return (
|
||||
0.3 * commit_metrics["mean"]
|
||||
+ 0.5 * commit_metrics["p95"]
|
||||
+ 0.2 * commit_metrics["p99"]
|
||||
)
|
||||
else:
|
||||
return commit_metrics["p95"] # Default to P95
|
||||
|
||||
except KeyError as e:
|
||||
print(f"Failed to extract objective from results: {e}")
|
||||
return 10000 # High penalty for invalid results
|
||||
|
||||
def optimize_with_bayesian(self) -> Tuple[Dict, float]:
|
||||
"""
|
||||
Use Bayesian Optimization to find optimal parameters.
|
||||
|
||||
This uses Gaussian Process models to build a probabilistic model
|
||||
of the objective function and intelligently choose where to sample next.
|
||||
"""
|
||||
if not OPTIMIZE_AVAILABLE:
|
||||
return self.optimize_with_grid_search()
|
||||
|
||||
print(
|
||||
f"Starting Bayesian Optimization with {self.optimization_budget} evaluations"
|
||||
)
|
||||
print(f"Objective: Minimize {self.objective_metric}")
|
||||
print(f"Parameter space: {len(self.parameter_space)} dimensions")
|
||||
print()
|
||||
|
||||
@use_named_args(self.parameter_space)
|
||||
def objective(**params):
|
||||
"""Objective function for Bayesian optimization"""
|
||||
print(f"Evaluating: {params}")
|
||||
|
||||
start_time = time.time()
|
||||
results = self._run_simulation_with_params(params)
|
||||
eval_time = time.time() - start_time
|
||||
|
||||
objective_value = self._extract_objective_value(results)
|
||||
|
||||
# Track optimization history
|
||||
history_entry = {
|
||||
"params": params.copy(),
|
||||
"objective_value": objective_value,
|
||||
"results": results,
|
||||
"eval_time": eval_time,
|
||||
"iteration": len(self.optimization_history) + 1,
|
||||
}
|
||||
self.optimization_history.append(history_entry)
|
||||
|
||||
# Update best if improved
|
||||
if objective_value < self.best_score:
|
||||
self.best_score = objective_value
|
||||
self.best_params = params.copy()
|
||||
print(
|
||||
f"✓ NEW BEST: {objective_value:.2f}ms (evaluation {history_entry['iteration']})"
|
||||
)
|
||||
else:
|
||||
print(f" Score: {objective_value:.2f}ms")
|
||||
|
||||
print(f" Time: {eval_time:.1f}s")
|
||||
print()
|
||||
|
||||
return objective_value
|
||||
|
||||
# Run Bayesian optimization
|
||||
result = gp_minimize(
|
||||
func=objective,
|
||||
dimensions=self.parameter_space,
|
||||
n_calls=self.optimization_budget,
|
||||
n_initial_points=10, # Random exploration first
|
||||
acq_func="EI", # Expected Improvement acquisition
|
||||
random_state=self.random_seed,
|
||||
)
|
||||
|
||||
# Extract best parameters
|
||||
best_params_list = result.x
|
||||
best_params_dict = dict(zip(self.parameter_names, best_params_list))
|
||||
best_objective = result.fun
|
||||
|
||||
return best_params_dict, best_objective
|
||||
|
||||
def optimize_with_grid_search(self) -> Tuple[Dict, float]:
|
||||
"""Fallback grid search optimization if scikit-optimize not available"""
|
||||
print(
|
||||
"Using grid search optimization (install scikit-optimize for better results)"
|
||||
)
|
||||
print()
|
||||
|
||||
# Define a smaller grid for key parameters
|
||||
grid_configs = [
|
||||
# Vary max_in_flight and batch_timeout
|
||||
{"max_in_flight_requests": 5, "batch_timeout_ms": 5.0},
|
||||
{"max_in_flight_requests": 10, "batch_timeout_ms": 5.0},
|
||||
{"max_in_flight_requests": 20, "batch_timeout_ms": 5.0},
|
||||
{"max_in_flight_requests": 10, "batch_timeout_ms": 2.0},
|
||||
{"max_in_flight_requests": 10, "batch_timeout_ms": 10.0},
|
||||
{"max_in_flight_requests": 15, "batch_timeout_ms": 3.0},
|
||||
{"max_in_flight_requests": 25, "batch_timeout_ms": 7.0},
|
||||
]
|
||||
|
||||
best_params = None
|
||||
best_score = float("inf")
|
||||
|
||||
for i, config in enumerate(grid_configs):
|
||||
print(f"Evaluating config {i+1}/{len(grid_configs)}: {config}")
|
||||
|
||||
# Use default values for unspecified parameters
|
||||
full_params = {
|
||||
"batch_timeout_ms": 5.0,
|
||||
"batch_size_threshold": 1024 * 1024,
|
||||
"max_in_flight_requests": 5,
|
||||
}
|
||||
full_params.update(config)
|
||||
|
||||
results = self._run_simulation_with_params(full_params)
|
||||
objective_value = self._extract_objective_value(results)
|
||||
|
||||
if objective_value < best_score:
|
||||
best_score = objective_value
|
||||
best_params = full_params.copy()
|
||||
print(f"✓ NEW BEST: {objective_value:.2f}ms")
|
||||
else:
|
||||
print(f" Score: {objective_value:.2f}ms")
|
||||
print()
|
||||
|
||||
return best_params, best_score
|
||||
|
||||
def analyze_parameter_importance(self):
|
||||
"""Analyze which parameters have the most impact on performance"""
|
||||
if not self.optimization_history:
|
||||
print("No optimization history available")
|
||||
return
|
||||
|
||||
print("Parameter Importance Analysis")
|
||||
print("=" * 50)
|
||||
|
||||
# Extract parameter values and objectives
|
||||
param_data = {}
|
||||
objectives = []
|
||||
|
||||
for entry in self.optimization_history:
|
||||
objectives.append(entry["objective_value"])
|
||||
for param_name, param_value in entry["params"].items():
|
||||
if param_name not in param_data:
|
||||
param_data[param_name] = []
|
||||
param_data[param_name].append(param_value)
|
||||
|
||||
objectives = np.array(objectives)
|
||||
|
||||
# Simple correlation analysis
|
||||
print("Parameter correlations with objective (lower is better):")
|
||||
correlations = []
|
||||
|
||||
for param_name, values in param_data.items():
|
||||
correlation = np.corrcoef(values, objectives)[0, 1]
|
||||
correlations.append((param_name, correlation))
|
||||
print(f" {param_name:<25}: {correlation:+.3f}")
|
||||
|
||||
print("\nMost impactful parameters (by absolute correlation):")
|
||||
correlations.sort(key=lambda x: abs(x[1]), reverse=True)
|
||||
for param_name, correlation in correlations[:5]:
|
||||
impact = "reduces latency" if correlation < 0 else "increases latency"
|
||||
print(f" {param_name:<25}: {impact} (r={correlation:+.3f})")
|
||||
|
||||
def plot_optimization_progress(self, save_path: Optional[str] = None):
|
||||
"""Plot optimization convergence"""
|
||||
if not OPTIMIZE_AVAILABLE or not self.optimization_history:
|
||||
return
|
||||
|
||||
iterations = [entry["iteration"] for entry in self.optimization_history]
|
||||
objectives = [entry["objective_value"] for entry in self.optimization_history]
|
||||
|
||||
# Calculate running minimum (best so far)
|
||||
running_min = []
|
||||
current_min = float("inf")
|
||||
for obj in objectives:
|
||||
current_min = min(current_min, obj)
|
||||
running_min.append(current_min)
|
||||
|
||||
plt.figure(figsize=(12, 8))
|
||||
|
||||
# Plot 1: Objective value over iterations
|
||||
plt.subplot(2, 2, 1)
|
||||
plt.scatter(iterations, objectives, alpha=0.6, s=30)
|
||||
plt.plot(iterations, running_min, "r-", linewidth=2, label="Best so far")
|
||||
plt.xlabel("Iteration")
|
||||
plt.ylabel(f"{self.objective_metric} (ms)")
|
||||
plt.title("Optimization Progress")
|
||||
plt.legend()
|
||||
plt.grid(True, alpha=0.3)
|
||||
|
||||
# Plot 2: Parameter evolution for key parameters
|
||||
plt.subplot(2, 2, 2)
|
||||
key_params = ["max_in_flight_requests", "batch_timeout_ms"]
|
||||
for param in key_params:
|
||||
if param in self.optimization_history[0]["params"]:
|
||||
values = [entry["params"][param] for entry in self.optimization_history]
|
||||
plt.scatter(iterations, values, alpha=0.6, label=param, s=30)
|
||||
plt.xlabel("Iteration")
|
||||
plt.ylabel("Parameter Value")
|
||||
plt.title("Key Parameter Evolution")
|
||||
plt.legend()
|
||||
plt.grid(True, alpha=0.3)
|
||||
|
||||
# Plot 3: Objective distribution
|
||||
plt.subplot(2, 2, 3)
|
||||
plt.hist(objectives, bins=20, alpha=0.7, edgecolor="black")
|
||||
plt.axvline(
|
||||
self.best_score,
|
||||
color="red",
|
||||
linestyle="--",
|
||||
label=f"Best: {self.best_score:.1f}ms",
|
||||
)
|
||||
plt.xlabel(f"{self.objective_metric} (ms)")
|
||||
plt.ylabel("Count")
|
||||
plt.title("Objective Value Distribution")
|
||||
plt.legend()
|
||||
plt.grid(True, alpha=0.3)
|
||||
|
||||
# Plot 4: Convergence rate
|
||||
plt.subplot(2, 2, 4)
|
||||
improvements = []
|
||||
for i, entry in enumerate(self.optimization_history):
|
||||
if i == 0:
|
||||
improvements.append(0)
|
||||
else:
|
||||
prev_best = running_min[i - 1]
|
||||
curr_best = running_min[i]
|
||||
improvement = prev_best - curr_best
|
||||
improvements.append(improvement)
|
||||
|
||||
plt.plot(iterations, improvements, "g-", marker="o", markersize=3)
|
||||
plt.xlabel("Iteration")
|
||||
plt.ylabel("Improvement (ms)")
|
||||
plt.title("Per-Iteration Improvement")
|
||||
plt.grid(True, alpha=0.3)
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
if save_path:
|
||||
plt.savefig(save_path, dpi=300, bbox_inches="tight")
|
||||
print(f"Optimization plots saved to {save_path}")
|
||||
else:
|
||||
plt.show()
|
||||
|
||||
def run_optimization(self) -> Dict:
|
||||
"""Run the full optimization process and return results"""
|
||||
start_time = time.time()
|
||||
|
||||
# Run optimization
|
||||
if OPTIMIZE_AVAILABLE:
|
||||
best_params, best_score = self.optimize_with_bayesian()
|
||||
else:
|
||||
best_params, best_score = self.optimize_with_grid_search()
|
||||
|
||||
total_time = time.time() - start_time
|
||||
|
||||
# Run final simulation with best parameters for detailed results
|
||||
print("Running final simulation with optimal parameters...")
|
||||
final_results = self._run_simulation_with_params(best_params)
|
||||
|
||||
# Prepare optimization summary
|
||||
optimization_summary = {
|
||||
"best_parameters": best_params,
|
||||
"best_objective_value": best_score,
|
||||
"optimization_time": total_time,
|
||||
"evaluations_performed": len(self.optimization_history),
|
||||
"final_simulation_results": final_results,
|
||||
"optimization_history": self.optimization_history,
|
||||
}
|
||||
|
||||
return optimization_summary
|
||||
|
||||
def print_optimization_summary(self, summary: Dict):
|
||||
"""Print a comprehensive summary of optimization results"""
|
||||
print("=" * 80)
|
||||
print("BAYESIAN OPTIMIZATION RESULTS")
|
||||
print("=" * 80)
|
||||
|
||||
print(f"Optimization completed in {summary['optimization_time']:.1f} seconds")
|
||||
print(f"Performed {summary['evaluations_performed']} parameter evaluations")
|
||||
print(f"Best {self.objective_metric}: {summary['best_objective_value']:.2f}ms")
|
||||
print()
|
||||
|
||||
print("OPTIMAL PARAMETERS:")
|
||||
print("-" * 40)
|
||||
for param, value in summary["best_parameters"].items():
|
||||
if isinstance(value, float):
|
||||
if param.endswith("_rate"):
|
||||
print(f" {param:<25}: {value:.4f}")
|
||||
else:
|
||||
print(f" {param:<25}: {value:.2f}")
|
||||
else:
|
||||
print(f" {param:<25}: {value}")
|
||||
|
||||
print("\nDETAILED PERFORMANCE WITH OPTIMAL PARAMETERS:")
|
||||
print("-" * 50)
|
||||
final_results = summary["final_simulation_results"]
|
||||
print_results(final_results)
|
||||
|
||||
print("\nPARAMETER IMPACT ANALYSIS:")
|
||||
print("-" * 30)
|
||||
self.analyze_parameter_importance()
|
||||
|
||||
|
||||
def main():
|
||||
"""Main optimization workflow"""
|
||||
print("Persistence Thread Parameter Optimization")
|
||||
print("Using Bayesian Optimization for intelligent parameter search")
|
||||
print()
|
||||
|
||||
# Create optimizer with different objective functions to test
|
||||
objectives_to_test = ["p95_latency", "weighted_latency"]
|
||||
|
||||
for objective in objectives_to_test:
|
||||
print(f"\n{'='*80}")
|
||||
print(f"OPTIMIZING FOR: {objective.upper()}")
|
||||
print(f"{'='*80}")
|
||||
|
||||
optimizer = PersistenceOptimizer(
|
||||
optimization_budget=30, # Reasonable for demo
|
||||
simulation_duration=15.0, # Shorter sims for faster optimization
|
||||
arrival_rate=1000.0,
|
||||
objective_metric=objective,
|
||||
random_seed=42,
|
||||
)
|
||||
|
||||
# Run optimization
|
||||
summary = optimizer.run_optimization()
|
||||
optimizer.print_optimization_summary(summary)
|
||||
|
||||
# Generate plots
|
||||
try:
|
||||
optimizer.plot_optimization_progress(f"optimization_{objective}.png")
|
||||
except Exception as e:
|
||||
print(f"Could not generate plots: {e}")
|
||||
|
||||
print(f"\nOptimization for {objective} completed!")
|
||||
print("=" * 80)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user