Chapter 10: Training with aprender-train
Contract:
apr-book-ch10
Run: cargo run -p aprender-core --example ch10_training
#![allow(clippy::disallowed_methods)]
//! Chapter 10: Training with aprender-train
//!
//! Demonstrates autograd forward/backward pass and optimizer.
//! Citation: Hu et al., "LoRA," arXiv:2106.09685
//! Contract: contracts/apr-book-ch10-v1.yaml (v2 — api_calls enforced)
use aprender::autograd::{clear_graph, Tensor};
use aprender::nn::{
loss::MSELoss,
optim::SGD,
Linear, Module, Optimizer, Sequential,
};
use aprender::prelude::Adam;
fn main() {
// --- Part 1: Build network and demonstrate forward/backward ---
let mut model = Sequential::new()
.add(Linear::new(2, 4))
.add(Linear::new(4, 1));
// Training data: y = x1 + x2
let x = Tensor::new(&[1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], &[4, 2]);
let y = Tensor::new(&[3.0, 7.0, 11.0, 15.0], &[4, 1]);
let loss_fn = MSELoss::new();
let learning_rate = 0.01_f32;
let mut optimizer = SGD::new(model.parameters_mut(), learning_rate);
// Initial forward pass
clear_graph();
let x_grad = x.clone().requires_grad();
let pred = model.forward(&x_grad);
println!("Forward pass output shape: {:?}", pred.shape());
assert_eq!(pred.shape(), &[4, 1], "Output shape must be [4, 1]");
let loss = loss_fn.forward(&pred, &y);
let initial_loss = loss.item();
println!("Initial MSE loss: {initial_loss:.4}");
// Backward pass
loss.backward();
println!("Backward pass: gradients computed");
// Optimizer step (xor_training pattern: step_with_params)
let mut params = model.parameters_mut();
optimizer.step_with_params(&mut params);
clear_graph();
println!("SGD step: parameters updated");
// --- Part 2: Training loop ---
let mut prev_loss = initial_loss;
let mut decreasing = 0;
for epoch in 0..100 {
clear_graph();
let x_g = x.clone().requires_grad();
let pred = model.forward(&x_g);
let loss = loss_fn.forward(&pred, &y);
let l = loss.item();
loss.backward();
let mut p = model.parameters_mut();
optimizer.step_with_params(&mut p);
if l < prev_loss { decreasing += 1; }
prev_loss = l;
if epoch % 25 == 0 {
println!(" Epoch {epoch}: loss={l:.4}");
}
}
let final_loss = prev_loss;
println!("Loss decreased in {decreasing}/100 epochs");
println!("Initial: {initial_loss:.4} -> Final: {final_loss:.4}");
assert!(
final_loss < initial_loss,
"Training must reduce loss: {final_loss:.4} < {initial_loss:.4}"
);
// --- Part 3: LoRA parameter efficiency (Hu et al., 2021) ---
let d = 4096_usize;
let k = 4096_usize;
let r = 16_usize;
let full_params = d * k;
let lora_params = r * (d + k);
let ratio = lora_params as f64 / full_params as f64;
println!("\nLoRA (rank={r}): {lora_params} vs {full_params} ({:.2}%)", ratio * 100.0);
assert!(ratio < 0.01, "LoRA must use <1% of full params");
// Adam optimizer exists
let _adam = Adam::new(1e-4);
println!("Adam optimizer: instantiated");
println!("Chapter 10 contracts: PASSED");
}