use alimentar::ArrowDataset;
let result = ArrowDataset::from_parquet("empty.parquet");
// Jidoka: Stop and signal problem
match result {
Ok(dataset) if dataset.len() == 0 => {
// Empty but valid - proceed with caution
println!("Warning: Empty dataset");
}
Err(e) => {
// Error loading - stop the line
eprintln!("Jidoka: {}", e);
return Err(e);
}
Ok(dataset) => {
// Normal processing
process(dataset);
}
}
use alimentar::ArrowDataset;
let result = ArrowDataset::from_parquet("corrupt.parquet");
// Jidoka: Detect and stop on corruption
assert!(result.is_err(), "Corrupt file should return error");
match result {
Err(e) => {
eprintln!("Jidoka stop: Corrupt file detected");
eprintln!("Error: {}", e);
// Alert human for intervention
}
Ok(_) => unreachable!(),
}
use alimentar::backend::{BackendConfig, S3Config};
// Configure S3 backend
let config = S3Config::builder()
.bucket("my-bucket")
.region("us-west-2")
.endpoint("https://s3.amazonaws.com")
.build();
let backend = BackendConfig::S3(config).create()?;
// List datasets
let datasets = backend.list("datasets/").await?;
// Load from S3
let data = backend.get("datasets/train.parquet").await?;
# S3 via CLI
AWS_ACCESS_KEY_ID=xxx AWS_SECRET_ACCESS_KEY=yyy \
alimentar info s3://my-bucket/data.parquet