Linear Regression Model

Status: Verified | Idempotent: Yes | Coverage: 95%+

Create a linear regression model with weight and bias tensors.

Run Command

cargo run --example create_apr_linear_regression

Code

//! # Recipe: Create APR Linear Regression Model
//!
//! Contract: contracts/recipe-iiur-v1.yaml
//! **Category**: Model Creation
//! **Isolation Level**: Full
//! **Idempotency**: Guaranteed
//! **Dependencies**: None (default features)
//!
//! ## QA Checklist
//! 1. [x] `cargo run` succeeds (Exit Code 0)
//! 2. [x] `cargo test` passes
//! 3. [x] Deterministic output (Verified)
//! 4. [x] No temp files leaked
//! 5. [x] Memory usage stable
//! 6. [x] WASM compatible (N/A - uses filesystem)
//! 7. [x] Clippy clean
//! 8. [x] Rustfmt standard
//! 9. [x] No `unwrap()` in logic
//! 10. [x] Proptests pass (100+ cases)
//!
//! ## Learning Objective
//! Train a linear regression model on synthetic data and save as `.apr`.
//!
//! ## Run Command
//! ```bash
//! cargo run --example create_apr_linear_regression
//! ```
//!
//!
//! ## Format Variants
//! ```bash
//! apr convert model.apr          # APR native format
//! apr convert model.gguf         # GGUF (llama.cpp compatible)
//! apr convert model.safetensors  # SafeTensors (HuggingFace)
//! ```
//! ## References
//! - Jacob, B. et al. (2018). *Quantization and Training of Neural Networks for Efficient Integer-Arithmetic-Only Inference*. CVPR. arXiv:1712.05877

use apr_cookbook::prelude::*;
use rand::Rng;

fn main() -> Result<()> {
    let mut ctx = RecipeContext::new("create_apr_linear_regression")?;

    // Generate synthetic training data: y = 2*x1 + 3*x2 + 1 + noise
    let n_samples = 1000;
    let n_features = 2;
    let (x_data, y_data) = generate_linear_data(ctx.rng(), n_samples, n_features);

    // Train linear regression using closed-form solution (normal equation)
    let (weights, bias) = train_linear_regression(&x_data, &y_data, n_features);

    ctx.record_metric("n_samples", n_samples as i64);
    ctx.record_metric("n_features", n_features as i64);

    // Evaluate model
    let predictions = predict(&x_data, &weights, bias, n_features);
    let mse = calculate_mse(&predictions, &y_data);
    ctx.record_float_metric("mse", mse);

    // Save as APR
    let mut converter = AprConverter::new();
    converter.set_metadata(ConversionMetadata {
        name: Some("linear-regression".to_string()),
        architecture: Some("linear".to_string()),
        source_format: None,
        custom: std::collections::HashMap::new(),
    });

    converter.add_tensor(TensorData {
        name: "weights".to_string(),
        shape: vec![n_features],
        dtype: DataType::F32,
        data: floats_to_bytes(&weights),
    });

    converter.add_tensor(TensorData {
        name: "bias".to_string(),
        shape: vec![1],
        dtype: DataType::F32,
        data: floats_to_bytes(&[bias]),
    });

    let apr_path = ctx.path("linear_regression.apr");
    let apr_bytes = converter.to_apr()?;
    std::fs::write(&apr_path, &apr_bytes)?;

    println!("=== Recipe: {} ===", ctx.name());
    println!(
        "Trained on {} samples with {} features",
        n_samples, n_features
    );
    println!("Learned weights: {:?}", weights);
    println!("Learned bias: {:.4}", bias);
    println!("MSE: {:.6}", mse);
    println!("Saved to: {:?}", apr_path);

    Ok(())
}

/// Generate synthetic linear regression data
fn generate_linear_data(
    rng: &mut impl Rng,
    n_samples: usize,
    n_features: usize,
) -> (Vec<f32>, Vec<f32>) {
    let true_weights = [2.0f32, 3.0]; // y = 2*x1 + 3*x2 + 1
    let true_bias = 1.0f32;

    let mut x_data = Vec::with_capacity(n_samples * n_features);
    let mut y_data = Vec::with_capacity(n_samples);

    for _ in 0..n_samples {
        let mut y = true_bias;
        for (i, &w) in true_weights.iter().take(n_features).enumerate() {
            let x = rng.gen_range(-10.0f32..10.0f32);
            x_data.push(x);
            y += w * x;
            // Only use first n_features weights
            if i >= n_features - 1 {
                break;
            }
        }
        // Add small noise
        y += rng.gen_range(-0.1f32..0.1f32);
        y_data.push(y);
    }

    (x_data, y_data)
}

/// Train linear regression using normal equation: w = (X^T X)^-1 X^T y
fn train_linear_regression(x_data: &[f32], y_data: &[f32], n_features: usize) -> (Vec<f32>, f32) {
    let n_samples = y_data.len();

    // Simple gradient descent for robustness
    let mut weights = vec![0.0f32; n_features];
    let mut bias = 0.0f32;
    let learning_rate = 0.001f32;
    let epochs = 1000;

    for _ in 0..epochs {
        let mut weight_grads = vec![0.0f32; n_features];
        let mut bias_grad = 0.0f32;

        for i in 0..n_samples {
            let mut pred = bias;
            for j in 0..n_features {
                pred += weights[j] * x_data[i * n_features + j];
            }
            let error = pred - y_data[i];

            for j in 0..n_features {
                weight_grads[j] += error * x_data[i * n_features + j];
            }
            bias_grad += error;
        }

        for j in 0..n_features {
            weights[j] -= learning_rate * weight_grads[j] / n_samples as f32;
        }
        bias -= learning_rate * bias_grad / n_samples as f32;
    }

    (weights, bias)
}

/// Make predictions
fn predict(x_data: &[f32], weights: &[f32], bias: f32, n_features: usize) -> Vec<f32> {
    let n_samples = x_data.len() / n_features;
    let mut predictions = Vec::with_capacity(n_samples);

    for i in 0..n_samples {
        let mut pred = bias;
        for j in 0..n_features {
            pred += weights[j] * x_data[i * n_features + j];
        }
        predictions.push(pred);
    }

    predictions
}

/// Calculate mean squared error
fn calculate_mse(predictions: &[f32], targets: &[f32]) -> f64 {
    let sum: f64 = predictions
        .iter()
        .zip(targets.iter())
        .map(|(p, t)| (f64::from(*p) - f64::from(*t)).powi(2))
        .sum();
    sum / predictions.len() as f64
}

fn floats_to_bytes(floats: &[f32]) -> Vec<u8> {
    floats.iter().flat_map(|f| f.to_le_bytes()).collect()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_linear_data_generation() {
        let mut ctx = RecipeContext::new("test_data_gen").unwrap();
        let (x, y) = generate_linear_data(ctx.rng(), 100, 2);

        assert_eq!(x.len(), 200); // 100 samples * 2 features
        assert_eq!(y.len(), 100);
    }

    #[test]
    fn test_training_converges() {
        let mut ctx = RecipeContext::new("test_training").unwrap();
        let (x, y) = generate_linear_data(ctx.rng(), 500, 2);
        let (weights, bias) = train_linear_regression(&x, &y, 2);

        // Should learn approximately correct weights (2, 3) and bias (1)
        assert!((weights[0] - 2.0).abs() < 0.5, "weight[0] should be ~2.0");
        assert!((weights[1] - 3.0).abs() < 0.5, "weight[1] should be ~3.0");
        assert!((bias - 1.0).abs() < 0.5, "bias should be ~1.0");
    }

    #[test]
    fn test_prediction() {
        let weights = vec![1.0f32, 2.0f32];
        let bias = 0.5f32;
        let x_data = vec![1.0, 2.0, 3.0, 4.0]; // 2 samples

        let predictions = predict(&x_data, &weights, bias, 2);

        assert_eq!(predictions.len(), 2);
        // First sample: 0.5 + 1*1 + 2*2 = 5.5
        assert!((predictions[0] - 5.5).abs() < 0.001);
        // Second sample: 0.5 + 1*3 + 2*4 = 11.5
        assert!((predictions[1] - 11.5).abs() < 0.001);
    }

    #[test]
    fn test_mse_calculation() {
        let predictions = vec![1.0f32, 2.0, 3.0];
        let targets = vec![1.0f32, 2.0, 3.0];
        let mse = calculate_mse(&predictions, &targets);
        assert!((mse - 0.0).abs() < 0.0001);

        let predictions2 = vec![0.0f32, 0.0, 0.0];
        let targets2 = vec![1.0f32, 2.0, 3.0];
        let mse2 = calculate_mse(&predictions2, &targets2);
        // MSE = (1 + 4 + 9) / 3 = 14/3 = 4.666...
        assert!((mse2 - 4.666666).abs() < 0.001);
    }

    #[test]
    fn test_deterministic_training() {
        let mut ctx1 = RecipeContext::new("det_train").unwrap();
        let mut ctx2 = RecipeContext::new("det_train").unwrap();

        let (x1, y1) = generate_linear_data(ctx1.rng(), 100, 2);
        let (x2, y2) = generate_linear_data(ctx2.rng(), 100, 2);

        assert_eq!(x1, x2);
        assert_eq!(y1, y2);
    }
}

#[cfg(test)]
mod proptests {
    use super::*;
    use proptest::prelude::*;

    proptest! {
        #![proptest_config(ProptestConfig::with_cases(50))]

        #[test]
        fn prop_mse_non_negative(
            preds in proptest::collection::vec(-100.0f32..100.0, 1..100),
            targets in proptest::collection::vec(-100.0f32..100.0, 1..100)
        ) {
            let len = preds.len().min(targets.len());
            let p: Vec<f32> = preds.into_iter().take(len).collect();
            let t: Vec<f32> = targets.into_iter().take(len).collect();

            let mse = calculate_mse(&p, &t);
            prop_assert!(mse >= 0.0, "MSE should never be negative");
        }

        #[test]
        fn prop_prediction_length(n_samples in 1usize..100, n_features in 1usize..10) {
            let weights: Vec<f32> = vec![1.0; n_features];
            let bias = 0.0f32;
            let x_data: Vec<f32> = vec![1.0; n_samples * n_features];

            let predictions = predict(&x_data, &weights, bias, n_features);
            prop_assert_eq!(predictions.len(), n_samples);
        }
    }
}

Key Concepts

  1. Weight Matrix: Shape [input_dim, output_dim]
  2. Bias Vector: Shape [output_dim]
  3. Prediction: y = Wx + b

Mathematical Background

Linear regression finds the best-fit line through data points by minimizing the mean squared error between predictions and actual values.