Incremental Training

Status: Verified | Idempotent: Yes | Coverage: 95%+

Add new training data to an existing model without full retraining.

Run Command

cargo run --example continuous_train_incremental

Code

//! # Recipe: Continuous Incremental Training
//!
//! Contract: contracts/recipe-iiur-v1.yaml
//! **Category**: Continuous Training
//! **Isolation Level**: Full
//! **Idempotency**: Guaranteed
//! **Dependencies**: None (default features)
//!
//! ## QA Checklist
//! 1. [x] `cargo run` succeeds (Exit Code 0)
//! 2. [x] `cargo test` passes
//! 3. [x] Deterministic output (Verified)
//! 4. [x] No temp files leaked
//! 5. [x] Memory usage stable
//! 6. [x] WASM compatible (N/A)
//! 7. [x] Clippy clean
//! 8. [x] Rustfmt standard
//! 9. [x] No `unwrap()` in logic
//! 10. [x] Proptests pass (100+ cases)
//!
//! ## Learning Objective
//! Update existing `.apr` model with new training data incrementally.
//!
//! ## Run Command
//! ```bash
//! cargo run --example continuous_train_incremental
//! ```
//!
//!
//! ## Format Variants
//! ```bash
//! apr finetune model.apr          # APR native format
//! apr finetune model.gguf         # GGUF (llama.cpp compatible)
//! apr finetune model.safetensors  # SafeTensors (HuggingFace)
//! ```
//! ## References
//! - Hu, E. et al. (2021). *LoRA: Low-Rank Adaptation of Large Language Models*. arXiv:2106.09685

use apr_cookbook::prelude::*;
use rand::Rng;

fn main() -> Result<()> {
    let mut ctx = RecipeContext::new("continuous_train_incremental")?;

    let n_features = 4;
    let n_batches = 5;
    let batch_size = 100;

    // Initialize model weights
    let mut weights = vec![0.0f32; n_features];
    let mut bias = 0.0f32;
    let learning_rate = 0.01f32;

    ctx.record_metric("n_features", n_features as i64);
    ctx.record_metric("n_batches", i64::from(n_batches));
    ctx.record_metric("batch_size", batch_size as i64);

    println!("=== Recipe: {} ===", ctx.name());
    println!("Starting incremental training...");
    println!();

    let mut total_samples = 0;

    // Simulate streaming data batches
    for batch_id in 0..n_batches {
        // Generate batch with deterministic seed per batch
        let batch_seed = hash_name_to_seed(&format!("batch_{}", batch_id));
        let (x_batch, y_batch) = generate_batch(batch_seed, batch_size, n_features);

        // Incremental SGD update
        let batch_loss = train_batch(
            &x_batch,
            &y_batch,
            &mut weights,
            &mut bias,
            learning_rate,
            n_features,
        );

        total_samples += batch_size;

        // Save checkpoint
        let checkpoint_path = ctx.path(&format!("checkpoint_{}.apr", batch_id));
        save_checkpoint(&checkpoint_path, &weights, bias)?;

        println!(
            "Batch {}: loss={:.4}, samples_seen={}",
            batch_id, batch_loss, total_samples
        );

        ctx.record_float_metric(&format!("batch_{}_loss", batch_id), batch_loss);
    }

    // Final evaluation
    let eval_seed = hash_name_to_seed("eval_data");
    let (x_eval, y_eval) = generate_batch(eval_seed, 200, n_features);
    let eval_loss = evaluate(&x_eval, &y_eval, &weights, bias, n_features);

    ctx.record_float_metric("final_eval_loss", eval_loss);
    ctx.record_metric("total_samples", total_samples as i64);

    // Save final model
    let final_path = ctx.path("final_model.apr");
    save_checkpoint(&final_path, &weights, bias)?;

    println!();
    println!("Training complete:");
    println!("  Total batches: {}", n_batches);
    println!("  Total samples: {}", total_samples);
    println!("  Final weights: {:?}", weights);
    println!("  Final bias: {:.4}", bias);
    println!("  Evaluation loss: {:.4}", eval_loss);
    println!("  Model saved to: {:?}", final_path);

    Ok(())
}

/// Generate a training batch
fn generate_batch(seed: u64, batch_size: usize, n_features: usize) -> (Vec<f32>, Vec<f32>) {
    use rand::SeedableRng;
    let mut rng = rand::rngs::StdRng::seed_from_u64(seed);

    // True weights for synthetic data
    let true_weights: Vec<f32> = (0..n_features).map(|i| (i + 1) as f32).collect();
    let true_bias = 0.5f32;

    let mut x_data = Vec::with_capacity(batch_size * n_features);
    let mut y_data = Vec::with_capacity(batch_size);

    for _ in 0..batch_size {
        let mut y = true_bias;
        for (j, &w) in true_weights.iter().enumerate() {
            let x = rng.gen_range(-1.0f32..1.0f32);
            x_data.push(x);
            y += w * x;
            if j >= n_features - 1 {
                break;
            }
        }
        y += rng.gen_range(-0.1f32..0.1f32); // Noise
        y_data.push(y);
    }

    (x_data, y_data)
}

/// Train on a single batch using SGD
fn train_batch(
    x_data: &[f32],
    y_data: &[f32],
    weights: &mut [f32],
    bias: &mut f32,
    learning_rate: f32,
    n_features: usize,
) -> f64 {
    let batch_size = y_data.len();
    let mut total_loss = 0.0f64;

    for i in 0..batch_size {
        // Forward pass
        let mut pred = *bias;
        for j in 0..n_features {
            pred += weights[j] * x_data[i * n_features + j];
        }

        let error = pred - y_data[i];
        total_loss += f64::from(error).powi(2);

        // Backward pass (SGD update)
        for j in 0..n_features {
            weights[j] -= learning_rate * error * x_data[i * n_features + j];
        }
        *bias -= learning_rate * error;
    }

    total_loss / batch_size as f64
}

/// Evaluate model on data
fn evaluate(x_data: &[f32], y_data: &[f32], weights: &[f32], bias: f32, n_features: usize) -> f64 {
    let n_samples = y_data.len();
    let mut total_loss = 0.0f64;

    for i in 0..n_samples {
        let mut pred = bias;
        for j in 0..n_features {
            pred += weights[j] * x_data[i * n_features + j];
        }
        let error = pred - y_data[i];
        total_loss += f64::from(error).powi(2);
    }

    total_loss / n_samples as f64
}

/// Save model checkpoint
fn save_checkpoint(path: &std::path::Path, weights: &[f32], bias: f32) -> Result<()> {
    let mut converter = AprConverter::new();
    converter.set_metadata(ConversionMetadata {
        name: Some("incremental-model".to_string()),
        architecture: Some("linear".to_string()),
        source_format: None,
        custom: std::collections::HashMap::new(),
    });

    converter.add_tensor(TensorData {
        name: "weights".to_string(),
        shape: vec![weights.len()],
        dtype: DataType::F32,
        data: weights.iter().flat_map(|f| f.to_le_bytes()).collect(),
    });

    converter.add_tensor(TensorData {
        name: "bias".to_string(),
        shape: vec![1],
        dtype: DataType::F32,
        data: bias.to_le_bytes().to_vec(),
    });

    let apr_bytes = converter.to_apr()?;
    std::fs::write(path, apr_bytes)?;

    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_batch_generation() {
        let (x, y) = generate_batch(42, 50, 4);
        assert_eq!(x.len(), 200); // 50 * 4
        assert_eq!(y.len(), 50);
    }

    #[test]
    fn test_batch_deterministic() {
        let (x1, y1) = generate_batch(42, 50, 4);
        let (x2, y2) = generate_batch(42, 50, 4);
        assert_eq!(x1, x2);
        assert_eq!(y1, y2);
    }

    #[test]
    fn test_training_reduces_loss() {
        let (x, y) = generate_batch(42, 100, 4);
        let mut weights = vec![0.0f32; 4];
        let mut bias = 0.0f32;

        let loss1 = train_batch(&x, &y, &mut weights, &mut bias, 0.01, 4);

        // Train more
        let loss2 = train_batch(&x, &y, &mut weights, &mut bias, 0.01, 4);

        assert!(loss2 <= loss1, "Loss should decrease or stay same");
    }

    #[test]
    fn test_checkpoint_save() {
        let ctx = RecipeContext::new("test_checkpoint").unwrap();
        let weights = vec![1.0f32, 2.0, 3.0];
        let bias = 0.5f32;

        let path = ctx.path("test.apr");
        save_checkpoint(&path, &weights, bias).unwrap();

        assert!(path.exists());
    }

    #[test]
    fn test_evaluation() {
        let weights = vec![1.0f32, 2.0f32];
        let bias = 0.0f32;

        // Perfect data for y = 1*x1 + 2*x2
        let x = vec![1.0f32, 0.0, 0.0, 1.0]; // Two samples
        let y = vec![1.0f32, 2.0f32]; // Expected outputs

        let loss = evaluate(&x, &y, &weights, bias, 2);
        assert!(loss < 0.001, "Loss should be near zero for perfect data");
    }
}

#[cfg(test)]
mod proptests {
    use super::*;
    use proptest::prelude::*;

    proptest! {
        #![proptest_config(ProptestConfig::with_cases(50))]

        #[test]
        fn prop_batch_sizes(batch_size in 1usize..100, n_features in 1usize..10) {
            let (x, y) = generate_batch(42, batch_size, n_features);
            prop_assert_eq!(x.len(), batch_size * n_features);
            prop_assert_eq!(y.len(), batch_size);
        }

        #[test]
        fn prop_loss_non_negative(batch_size in 10usize..50) {
            let (x, y) = generate_batch(42, batch_size, 4);
            let mut weights = vec![0.0f32; 4];
            let mut bias = 0.0f32;

            let loss = train_batch(&x, &y, &mut weights, &mut bias, 0.01, 4);
            prop_assert!(loss >= 0.0);
        }
    }
}