apr-serve
Status: Verified | Idempotent: Yes | Coverage: 95%+
Serve APR model via HTTP API.
Run Command
cargo run --example cli_apr_serve -- --demo
Code
//! # Recipe: APR Model Server CLI
//!
//! **Category**: CLI Tools
//! **CLI Equivalent**: `apr serve`
//! Contract: contracts/recipe-iiur-v1.yaml, contracts/cli-parity-v1.yaml
//! **Isolation Level**: Full
//! **Idempotency**: Guaranteed
//! **Dependencies**: None (default features)
//!
//! ## QA Checklist
//! 1. [x] `cargo run` succeeds (Exit Code 0)
//! 2. [x] `cargo test` passes
//! 3. [x] Deterministic output (Verified)
//! 4. [x] No temp files leaked
//! 5. [x] Memory usage stable
//! 6. [x] WASM compatible (N/A)
//! 7. [x] Clippy clean
//! 8. [x] Rustfmt standard
//! 9. [x] No `unwrap()` in logic
//! 10. [x] Proptests pass (100+ cases)
//!
//! ## Learning Objective
//! Serve APR model via HTTP API (simulated).
//!
//! ## Run Command
//! ```bash
//! cargo run --example cli_apr_serve
//! cargo run --example cli_apr_serve -- --demo
//! ```
//!
//!
//! ## Format Variants
//! ```bash
//! apr inspect model.apr # APR native format
//! apr inspect model.gguf # GGUF (llama.cpp compatible)
//! apr inspect model.safetensors # SafeTensors (HuggingFace)
//! ```
//! ## References
//! - Amershi, S. et al. (2019). *Software Engineering for Machine Learning: A Case Study*. ICSE. DOI: 10.1109/ICSE-SEIP.2019.00042
use apr_cookbook::prelude::*;
use clap::Parser;
use serde::{Deserialize, Serialize};
fn main() -> Result<()> {
let config = ServerConfig::parse();
run_server(&config)
}
#[derive(Debug, Clone, Parser)]
#[command(name = "apr-serve", about = "Serve APR model via HTTP API")]
struct ServerConfig {
/// Model file path
model_path: Option<String>,
/// Host address
#[arg(long, default_value = "127.0.0.1")]
host: String,
/// Port number
#[arg(short = 'p', long, default_value_t = 8080)]
port: u16,
/// Number of workers
#[arg(short = 'w', long, default_value_t = 4)]
workers: usize,
/// Demo mode
#[arg(long, short = 'd')]
demo: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct ServerStatus {
status: String,
model: String,
host: String,
port: u16,
workers: usize,
endpoints: Vec<EndpointInfo>,
metrics: ServerMetrics,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct EndpointInfo {
path: String,
method: String,
description: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct ServerMetrics {
requests_total: u64,
requests_per_sec: f64,
avg_latency_ms: f64,
uptime_seconds: u64,
}
fn run_server(config: &ServerConfig) -> Result<()> {
let mut ctx = RecipeContext::new("cli_apr_serve")?;
// Get model name
let model_name = if config.demo {
"demo-model".to_string()
} else if let Some(path) = &config.model_path {
std::path::Path::new(path)
.file_stem()
.map_or_else(|| "model".to_string(), |s| s.to_string_lossy().to_string())
} else {
eprintln!("Error: provide a model path or use --demo");
return Ok(());
};
ctx.record_metric("port", i64::from(config.port));
ctx.record_metric("workers", config.workers as i64);
// Print startup banner
println!("╔══════════════════════════════════════════════════════╗");
println!("║ APR Model Server ║");
println!("╚══════════════════════════════════════════════════════╝");
println!();
// Simulated server startup
let status = simulate_server_startup(config, &model_name)?;
println!("Model: {}", status.model);
println!("Server: http://{}:{}", status.host, status.port);
println!("Workers: {}", status.workers);
println!();
println!("Endpoints:");
println!("{:-<50}", "");
for endpoint in &status.endpoints {
println!(
" {} {:<20} {}",
endpoint.method, endpoint.path, endpoint.description
);
}
println!("{:-<50}", "");
println!();
// Simulate some requests
println!("Simulating requests...");
println!();
let requests = vec![
("POST", "/v1/infer", r#"{"inputs": [0.5, 0.3]}"#),
("GET", "/v1/health", ""),
("GET", "/v1/metrics", ""),
("POST", "/v1/infer", r#"{"inputs": [0.1, 0.9]}"#),
("POST", "/v1/infer", r#"{"inputs": [0.7, 0.2]}"#),
];
for (method, path, body) in &requests {
let response = simulate_request(method, path, body)?;
println!(
" {} {} -> {} ({:.1}ms)",
method, path, response.status, response.latency_ms
);
}
println!();
// Final metrics
let metrics = simulate_metrics(requests.len())?;
ctx.record_float_metric("requests_per_sec", metrics.requests_per_sec);
ctx.record_float_metric("avg_latency_ms", metrics.avg_latency_ms);
println!("Metrics:");
println!(" Total requests: {}", metrics.requests_total);
println!(" Requests/sec: {:.1}", metrics.requests_per_sec);
println!(" Avg latency: {:.2}ms", metrics.avg_latency_ms);
println!();
println!("Server simulation complete.");
println!("(In production, use: apr-serve model.apr --port 8080)");
Ok(())
}
fn simulate_server_startup(config: &ServerConfig, model_name: &str) -> Result<ServerStatus> {
let endpoints = vec![
EndpointInfo {
path: "/v1/infer".to_string(),
method: "POST".to_string(),
description: "Run inference".to_string(),
},
EndpointInfo {
path: "/v1/health".to_string(),
method: "GET".to_string(),
description: "Health check".to_string(),
},
EndpointInfo {
path: "/v1/metrics".to_string(),
method: "GET".to_string(),
description: "Server metrics".to_string(),
},
EndpointInfo {
path: "/v1/model".to_string(),
method: "GET".to_string(),
description: "Model info".to_string(),
},
];
Ok(ServerStatus {
status: "running".to_string(),
model: model_name.to_string(),
host: config.host.clone(),
port: config.port,
workers: config.workers,
endpoints,
metrics: ServerMetrics {
requests_total: 0,
requests_per_sec: 0.0,
avg_latency_ms: 0.0,
uptime_seconds: 0,
},
})
}
#[derive(Debug)]
struct SimulatedResponse {
status: u16,
latency_ms: f64,
}
fn simulate_request(method: &str, path: &str, _body: &str) -> Result<SimulatedResponse> {
// Deterministic response based on path
let seed = hash_name_to_seed(path);
let latency = 1.0 + (seed % 10) as f64 * 0.5;
let status = match (method, path) {
("GET", "/v1/health") => 200,
("GET", "/v1/metrics") => 200,
("POST", "/v1/infer") => 200,
("GET", "/v1/model") => 200,
_ => 404,
};
Ok(SimulatedResponse {
status,
latency_ms: latency,
})
}
fn simulate_metrics(request_count: usize) -> Result<ServerMetrics> {
Ok(ServerMetrics {
requests_total: request_count as u64,
requests_per_sec: request_count as f64 * 100.0, // Simulated high throughput
avg_latency_ms: 2.5,
uptime_seconds: 10,
})
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_clap_demo() {
let config = ServerConfig::try_parse_from(["apr-serve", "--demo"]).unwrap();
assert!(config.demo);
}
#[test]
fn test_clap_port() {
let config = ServerConfig::try_parse_from(["apr-serve", "-p", "9000"]).unwrap();
assert_eq!(config.port, 9000);
}
#[test]
fn test_clap_workers() {
let config = ServerConfig::try_parse_from(["apr-serve", "-w", "8"]).unwrap();
assert_eq!(config.workers, 8);
}
#[test]
fn test_server_startup() {
let config = ServerConfig {
model_path: None,
host: "127.0.0.1".to_string(),
port: 8080,
workers: 4,
demo: true,
};
let status = simulate_server_startup(&config, "test-model").unwrap();
assert_eq!(status.status, "running");
assert_eq!(status.port, 8080);
assert!(!status.endpoints.is_empty());
}
#[test]
fn test_simulate_request_infer() {
let response = simulate_request("POST", "/v1/infer", "{}").unwrap();
assert_eq!(response.status, 200);
assert!(response.latency_ms > 0.0);
}
#[test]
fn test_simulate_request_health() {
let response = simulate_request("GET", "/v1/health", "").unwrap();
assert_eq!(response.status, 200);
}
#[test]
fn test_simulate_request_404() {
let response = simulate_request("GET", "/v1/unknown", "").unwrap();
assert_eq!(response.status, 404);
}
#[test]
fn test_deterministic_latency() {
let r1 = simulate_request("POST", "/v1/infer", "{}").unwrap();
let r2 = simulate_request("POST", "/v1/infer", "{}").unwrap();
assert_eq!(r1.latency_ms, r2.latency_ms);
}
}
#[cfg(test)]
mod proptests {
use super::*;
use proptest::prelude::*;
proptest! {
#![proptest_config(ProptestConfig::with_cases(100))]
#[test]
fn prop_port_in_range(port in 1u16..65535) {
let config = ServerConfig::try_parse_from([
"apr-serve",
"-p",
&port.to_string(),
]).unwrap();
prop_assert!(config.port > 0);
}
#[test]
fn prop_workers_positive(workers in 1usize..32) {
let config = ServerConfig::try_parse_from([
"apr-serve",
"-w",
&workers.to_string(),
]).unwrap();
prop_assert!(config.workers > 0);
}
#[test]
fn prop_latency_positive(path in "/v1/[a-z]{1,10}") {
let response = simulate_request("GET", &path, "").unwrap();
prop_assert!(response.latency_ms > 0.0);
}
}
}
Usage
apr-serve model.apr # Serve on :8080
apr-serve --port 9000 model.apr # Custom port
apr-serve --workers 8 model.apr # 8 worker threads