Examples Guide

Practical examples demonstrating TinyRL for machine learning tasks.

Quick Reference

Example	Location	Description
Basic Operations	`examples/cpp/minimal_operations.cpp`	Tensors, autograd, broadcasting
MLP	`examples/cpp/minimal_network_mlp.cpp`	Multi-layer perceptron
CNN	`examples/cpp/minimal_network_cnn.cpp`	Convolutional network
MNIST	`examples/python/full_training_mnist.py`	Digit classification
CIFAR-10	`examples/python/full_training_cifar10.py`	Image classification
StreamAC	`examples/stream_x/`	Streaming Actor-Critic
ESP32	`examples/stream_x_esp32/`	Embedded RL

Basic Operations

Tensor Creation and Operations

#include "autograd.h"
#include <iostream>

int main() {
    // Set random seed for reproducibility
    ag::manual_seed(42);

    // Create tensors with different shapes
    ag::Tensor x(ag::Matrix::Random(2, 3), true, "x");
    ag::Tensor y(ag::Matrix::Random(3, 2), true, "y");
    ag::Tensor z(ag::Matrix::Zeros(2, 2), true, "z");

    std::cout << "x shape: " << x.shape()[0] << "x" << x.shape()[1] << std::endl;
    std::cout << "y shape: " << y.shape()[0] << "x" << y.shape()[1] << std::endl;

    // Basic arithmetic operations with matching shapes
    auto scaled = x * 2.0;
    auto added = z + z;  // Same-shape addition

    // Matrix multiplication
    auto matmul_result = x.matmul(y);  // (2,3) @ (3,2) -> (2,2)

    // Element-wise functions
    auto relu_result = ag::relu(matmul_result);
    auto tanh_result = ag::tanh(matmul_result);

    // Reductions
    auto total_sum = ag::sum(matmul_result);

    std::cout << "Matrix multiplication result:\n" << matmul_result.value() << std::endl;
    std::cout << "Total sum: " << total_sum.item() << std::endl;

    return 0;
}

Automatic Differentiation

#include "autograd.h"
#include <iostream>

int main() {
    ag::manual_seed(42);

    // Create tensors that require gradients
    ag::Tensor a(ag::Matrix::Random(2, 2), true, "a");
    ag::Tensor b(ag::Matrix::Random(2, 2), true, "b");

    std::cout << "Initial values:\n";
    std::cout << "a:\n" << a.value() << std::endl;
    std::cout << "b:\n" << b.value() << std::endl;

    // Create computation graph
    auto c = a.matmul(b);
    auto d = ag::relu(c);
    auto loss = ag::sum(d);

    std::cout << "Loss: " << loss.item() << std::endl;

    // Compute gradients
    loss.backward();

    std::cout << "Gradients:\n";
    std::cout << "da/dloss:\n" << a.grad() << std::endl;
    std::cout << "db/dloss:\n" << b.grad() << std::endl;

    return 0;
}

Broadcasting Operations

#include "autograd.h"
#include <iostream>

int main() {
    ag::manual_seed(42);

    // Create tensors with different shapes
    ag::Tensor matrix(ag::Matrix::Random(3, 4), true, "matrix");
    ag::Tensor vector(ag::Matrix::Random(1, 4), true, "vector");
    ag::Tensor scalar(ag::Matrix::Constant(1, 1, 2.0), true, "scalar");

    std::cout << "Matrix shape: " << matrix.shape()[0] << "x" << matrix.shape()[1] << std::endl;
    std::cout << "Vector shape: " << vector.shape()[0] << "x" << vector.shape()[1] << std::endl;

    // Broadcasting operations
    auto result1 = matrix + vector;  // Broadcasts vector across matrix rows
    auto result2 = matrix * scalar;  // Broadcasts scalar to all elements

    std::cout << "Matrix + Vector:\n" << result1.value() << std::endl;
    std::cout << "Matrix * Scalar:\n" << result2.value() << std::endl;

    return 0;
}

Neural Networks

Simple Linear Regression

#include "autograd.h"
#include "layers.h"
#include "optimizer.h"
#include <iostream>

int main() {
    ag::manual_seed(42);

    // Generate synthetic data
    int n_samples = 100;
    ag::Matrix X_data = ag::Matrix::Random(n_samples, 1);
    ag::Matrix y_data = 2.0 * X_data + 1.0 + ag::Matrix::Random(n_samples, 1) * 0.1;

    ag::Tensor X(X_data, false, "X");
    ag::Tensor y(y_data, false, "y");

    // Create model
    nn::Sequential model;
    model.add(nn::Linear(1, 1));  // 1 input, 1 output

    // Create optimizer
    ag::SGD optimizer(0.01f);
    optimizer.add_parameters(model.layers());

    // Training loop
    int epochs = 100;
    for (int epoch = 0; epoch < epochs; ++epoch) {
        // Forward pass
        auto y_pred = model.forward(X);
        auto loss = ag::sum(ag::pow(y_pred - y, 2.0)) / n_samples;

        // Backward pass
        optimizer.zero_grad();
        loss.backward();
        optimizer.step();

        if (epoch % 20 == 0) {
            std::cout << "Epoch " << epoch << ", Loss: " << loss.item() << std::endl;
        }
    }

    // Print learned parameters
    auto layers = model.layers();
    auto* linear = dynamic_cast<nn::Linear*>(layers[0].get());
    if (linear) {
        std::cout << "Learned weights: " << linear->weights.value()(0, 0) << std::endl;
        std::cout << "Learned bias: " << linear->bias.value()(0, 0) << std::endl;
    }

    return 0;
}

Multi-Layer Perceptron

#include "autograd.h"
#include "layers.h"
#include "optimizer.h"
#include <iostream>

int main() {
    ag::manual_seed(42);

    // Create sequential model
    nn::Sequential model;
    model.add(nn::Linear(784, 128));
    model.add(nn::ReLU());
    model.add(nn::Linear(128, 64));
    model.add(nn::ReLU());
    model.add(nn::Linear(64, 10));

    // Create optimizer
    ag::SGD optimizer(0.01f);
    optimizer.add_parameters(model.layers());

    // Generate dummy data
    ag::Tensor input(ag::Matrix::Random(32, 784), false, "input");
    ag::Tensor target(ag::Matrix::Random(32, 10), false, "target");

    // Training loop
    int epochs = 50;
    for (int epoch = 0; epoch < epochs; ++epoch) {
        // Forward pass
        auto output = model.forward(input);
        auto loss = ag::sum(ag::pow(output - target, 2.0)) / 32;

        // Backward pass
        optimizer.zero_grad();
        loss.backward();
        optimizer.step();

        if (epoch % 10 == 0) {
            std::cout << "Epoch " << epoch << ", Loss: " << loss.item() << std::endl;
        }
    }

    return 0;
}

Convolutional Neural Network

#include "autograd.h"
#include "layers.h"
#include "optimizer.h"
#include <iostream>

int main() {
    ag::manual_seed(42);

    // Create CNN model
    nn::Sequential model;

    // Convolutional layers
    model.add(nn::Conv2D(3, 16, 3, 1, 1));  // 3->16 channels, 3x3 kernel
    model.add(nn::ReLU());
    model.add(nn::Conv2D(16, 32, 3, 1, 1)); // 16->32 channels, 3x3 kernel
    model.add(nn::ReLU());

    // Flatten and fully connected layers
    model.add(nn::Flatten());
    model.add(nn::Linear(32 * 32 * 32, 128));
    model.add(nn::ReLU());
    model.add(nn::Linear(128, 10));

    // Create optimizer
    ag::SGD optimizer(0.01);
    optimizer.add_parameters(model.layers());

    // Generate dummy image data (batch_size, channels, height, width)
    ag::Tensor input(ag::Matrix::Random(4, 3, 32, 32), false, "input");
    ag::Tensor target(ag::Matrix::Random(4, 10), false, "target");

    // Training loop
    int epochs = 30;
    for (int epoch = 0; epoch < epochs; ++epoch) {
        // Forward pass
        auto output = model.forward(input);
        auto loss = ag::sum(ag::pow(output - target, 2.0)) / 4;

        // Backward pass
        optimizer.zero_grad();
        loss.backward();
        optimizer.step();

        if (epoch % 5 == 0) {
            std::cout << "Epoch " << epoch << ", Loss: " << loss.item() << std::endl;
        }
    }

    return 0;
}

Training Loops

Complete Training Example

#include "autograd.h"
#include "layers.h"
#include "optimizer.h"
#include <iostream>
#include <vector>

class SimpleDataset {
public:
    SimpleDataset(int n_samples) {
        ag::manual_seed(42);
        X = ag::Matrix::Random(n_samples, 10);
        y = ag::Matrix::Random(n_samples, 1);
    }

    ag::Matrix X, y;
};

int main() {
    ag::manual_seed(42);

    // Create dataset
    SimpleDataset dataset(1000);

    // Create model
    nn::Sequential model;
    model.add(nn::Linear(10, 64));
    model.add(nn::ReLU());
    model.add(nn::Linear(64, 32));
    model.add(nn::ReLU());
    model.add(nn::Linear(32, 1));

    // Create optimizer
    ag::SGD optimizer(0.01f);
    optimizer.add_parameters(model.layers());

    // Training parameters
    int epochs = 100;
    int batch_size = 32;
    int n_batches = dataset.X.rows() / batch_size;

    std::vector<double> losses;

    // Training loop
    for (int epoch = 0; epoch < epochs; ++epoch) {
        double epoch_loss = 0.0;

        for (int batch = 0; batch < n_batches; ++batch) {
            // Create batch
            int start_idx = batch * batch_size;
            int end_idx = start_idx + batch_size;

            ag::Matrix X_batch = dataset.X.block(start_idx, 0, batch_size, 10);
            ag::Matrix y_batch = dataset.y.block(start_idx, 0, batch_size, 1);

            ag::Tensor X_tensor(X_batch, false, "X_batch");
            ag::Tensor y_tensor(y_batch, false, "y_batch");

            // Forward pass
            auto y_pred = model.forward(X_tensor);
            auto loss = ag::sum(ag::pow(y_pred - y_tensor, 2.0)) / batch_size;

            // Backward pass
            optimizer.zero_grad();
            loss.backward();
            optimizer.step();

            epoch_loss += loss.item();
        }

        epoch_loss /= n_batches;
        losses.push_back(epoch_loss);

        if (epoch % 10 == 0) {
            std::cout << "Epoch " << epoch << ", Loss: " << epoch_loss << std::endl;
        }
    }

    std::cout << "Training completed!" << std::endl;
    std::cout << "Final loss: " << losses.back() << std::endl;

    return 0;
}

Learning Rate Scheduling

#include "autograd.h"
#include "layers.h"
#include "optimizer.h"
#include <iostream>

class LearningRateScheduler {
public:
    LearningRateScheduler(double initial_lr, double decay_factor, int decay_epochs)
        : lr(initial_lr), factor(decay_factor), epochs(decay_epochs) {}

    double get_lr(int epoch) {
        if (epoch > 0 && epoch % epochs == 0) {
            lr *= factor;
        }
        return lr;
    }

private:
    double lr, factor;
    int epochs;
};

int main() {
    ag::manual_seed(42);

    // Create model and data
    nn::Sequential model;
    model.add(nn::Linear(10, 50));
    model.add(nn::ReLU());
    model.add(nn::Linear(50, 1));

    ag::Tensor X(ag::Matrix::Random(100, 10), false);
    ag::Tensor y(ag::Matrix::Random(100, 1), false);

    // Create scheduler
    LearningRateScheduler scheduler(0.01, 0.9, 20);

    // Training loop with learning rate scheduling
    int epochs = 100;
    for (int epoch = 0; epoch < epochs; ++epoch) {
        // Update learning rate
        double current_lr = scheduler.get_lr(epoch);
        ag::SGD optimizer(current_lr);
        optimizer.add_parameters(model.layers());

        // Forward pass
        auto y_pred = model.forward(X);
        auto loss = ag::sum(ag::pow(y_pred - y, 2.0)) / 100;

        // Backward pass
        optimizer.zero_grad();
        loss.backward();
        optimizer.step();

        if (epoch % 20 == 0) {
            std::cout << "Epoch " << epoch << ", LR: " << current_lr 
                      << ", Loss: " << loss.item() << std::endl;
        }
    }

    return 0;
}

Reinforcement Learning

The Stream-X module (containing StreamAC, StreamQ, and StreamSARSA algorithms) lives under examples/stream_x. Build it by enabling the module:

cmake -S . -B build -DAUTOGRAD_BUILD_STREAM_X=ON
cmake --build build --config Release

Quick pointers: - StreamAC (continuous): examples/stream_x/minimal_ac_cartpole_continuous.cpp - StreamAC (discrete): examples/stream_x/minimal_ac_cartpole_discrete.cpp - StreamQ: examples/stream_x/minimal_q_cartpole.cpp

Basic Actor-Critic

#include "autograd.h"
#include "layers.h"
#include "optimizer.h"
#include <iostream>
#include <random>

class SimpleEnvironment {
public:
    SimpleEnvironment() : state(0.0), steps(0) {}

    std::pair<double, bool> step(int action) {
        steps++;
        double reward = (action == 0) ? 1.0 : -1.0;
        state += (action == 0) ? 0.1 : -0.1;
        bool done = (steps >= 100) || (std::abs(state) > 2.0);
        return {reward, done};
    }

    double reset() {
        state = 0.0;
        steps = 0;
        return state;
    }

    double get_state() const { return state; }

private:
    double state;
    int steps;
};

int main() {
    ag::manual_seed(42);

    // Create actor and critic networks
    nn::Sequential actor;
    actor.add(nn::Linear(1, 64));
    actor.add(nn::ReLU());
    actor.add(nn::Linear(64, 2));

    nn::Sequential critic;
    critic.add(nn::Linear(1, 64));
    critic.add(nn::ReLU());
    critic.add(nn::Linear(64, 1));

    // Create optimizers
    ag::SGD actor_optimizer(0.01f);
    ag::SGD critic_optimizer(0.01f);
    actor_optimizer.add_parameters(actor.layers());
    critic_optimizer.add_parameters(critic.layers());

    // Create environment
    SimpleEnvironment env;
    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<> dis(0.0, 1.0);

    // Training loop
    int episodes = 100;
    for (int episode = 0; episode < episodes; ++episode) {
        double state = env.reset();
        double total_reward = 0.0;

        while (true) {
            // Create state tensor
            ag::Tensor state_tensor(ag::Matrix::Constant(1, 1, state), false);

            // Get action probabilities
            auto action_logits = actor.forward(state_tensor);
            auto action_probs = ag::softmax(action_logits);

            // Sample action
            double rand_val = dis(gen);
            int action = (rand_val < action_probs.value()(0, 0)) ? 0 : 1;

            // Take action
            auto [reward, done] = env.step(action);
            total_reward += reward;

            // Get next state
            double next_state = env.get_state();

            // Compute TD error
            ag::Tensor next_state_tensor(ag::Matrix::Constant(1, 1, next_state), false);
            auto current_value = critic.forward(state_tensor);
            auto next_value = critic.forward(next_state_tensor);

            double td_target = reward + (done ? 0.0 : 0.99 * next_value.item());
            double td_error = td_target - current_value.item();

            // Update networks
            actor_optimizer.zero_grad();
            critic_optimizer.zero_grad();

            // Critic loss
            auto critic_loss = ag::pow(current_value - td_target, 2.0);
            critic_loss.backward();
            critic_optimizer.step();

            // Actor loss (policy gradient)
            ag::Matrix one_hot = ag::Matrix::Zeros({1, 2, 1, 1});
            one_hot(0, action) = 1.0f;
            ag::Tensor picked_prob = ag::sum(action_probs * ag::Tensor(one_hot, false));
            auto actor_loss = -ag::log(picked_prob) * td_error;
            actor_loss.backward();
            actor_optimizer.step();

            state = next_state;
            if (done) break;
        }

        if (episode % 10 == 0) {
            std::cout << "Episode " << episode << ", Total Reward: " << total_reward << std::endl;
        }
    }

    return 0;
}

Advanced Features

Computational Graph Visualization

#include "autograd.h"
#include "layers.h"
#include <iostream>

int main() {
    ag::manual_seed(42);

    // Create a complex computation
    ag::Tensor x(ag::Matrix::Random(2, 3), true, "input_x");
    ag::Tensor y(ag::Matrix::Random(3, 2), true, "input_y");
    ag::Tensor z(ag::Matrix::Random(2, 2), true, "input_z");

    // Build computation graph
    auto a = x.matmul(y);
    auto b = ag::relu(a);
    auto c = b + z;
    auto d = ag::tanh(c);
    auto loss = ag::sum(d);

    // Visualize the graph
    ag::draw_graph(loss, "computation_graph.dot");

    std::cout << "Computational graph saved to 'computation_graph.dot'" << std::endl;
    std::cout << "Convert to image with: dot -Tpng computation_graph.dot -o graph.png" << std::endl;

    // Compute gradients
    loss.backward();

    std::cout << "Gradients computed successfully!" << std::endl;

    return 0;
}

Custom Layer Implementation

#include "autograd.h"
#include "layers.h"
#include <iostream>

class CustomLayer : public nn::Layer {
public:
    CustomLayer(int input_dim, int output_dim) 
        : weights(ag::Matrix::Random({1, input_dim, output_dim, 1}), true, "custom_weights"),
          bias(ag::Matrix::Zeros({1, output_dim, 1, 1}), true, "custom_bias") {}

    ag::Tensor forward(const ag::Tensor& input) override {
        // input: (batch, input_dim, 1, 1), weights: (1, input_dim, output_dim, 1)
        auto linear_output = input.matmul(weights) + bias;
        return ag::relu(linear_output);
    }

    std::vector<ag::Tensor*> get_parameters() override {
        return {&weights, &bias};
    }

    bool has_parameters() const override { return true; }

private:
    ag::Tensor weights, bias;
};

int main() {
    ag::manual_seed(42);

    // Create custom layer
    auto custom_layer = std::make_shared<CustomLayer>(10, 5);

    // Create input
    ag::Tensor input(ag::Matrix::Random(3, 10), false, "input");

    // Forward pass
    auto output = custom_layer->forward(input);

    std::cout << "Input shape: " << input.shape()[0] << "x" << input.shape()[1] << std::endl;
    std::cout << "Output shape: " << output.shape()[0] << "x" << output.shape()[1] << std::endl;
    std::cout << "Output:\n" << output.value() << std::endl;

    return 0;
}

Performance Tips

Memory Management

#include "autograd.h"
#include "layers.h"
#include <iostream>

int main() {
    ag::manual_seed(42);

    // Create tensors for computation
    ag::Tensor x(ag::Matrix::Random({100, 100, 1, 1}), true, "tensor_a");
    ag::Tensor w(ag::Matrix::Random({100, 100, 1, 1}), true, "tensor_b");

    // Perform computation
    auto y = x.matmul(w);
    auto z = ag::relu(y);
    auto loss = ag::sum(z);

    // Compute gradients
    loss.backward();

    // Clear graph to free memory
    x.clear_graph();
    y.clear_graph();
    z.clear_graph();
    loss.clear_graph();

    std::cout << "Memory freed successfully!" << std::endl;

    return 0;
}

Batch Processing

#include "autograd.h"
#include "layers.h"
#include "optimizer.h"
#include <iostream>

int main() {
    ag::manual_seed(42);

    // Create model
    nn::Sequential model;
    model.add(nn::Linear(100, 50));
    model.add(nn::ReLU());
    model.add(nn::Linear(50, 10));

    // Create optimizer
    ag::SGD optimizer(0.01f);
    optimizer.add_parameters(model.layers());

    // Process data in batches
    int batch_size = 32;
    int num_batches = 10;

    for (int batch = 0; batch < num_batches; ++batch) {
        // Create batch data
        ag::Tensor batch_input(ag::Matrix::Random(batch_size, 100), false);
        ag::Tensor batch_target(ag::Matrix::Random(batch_size, 10), false);

        // Forward pass
        auto output = model.forward(batch_input);
        auto loss = ag::sum(ag::pow(output - batch_target, 2.0)) / batch_size;

        // Backward pass
        optimizer.zero_grad();
        loss.backward();
        optimizer.step();

        std::cout << "Batch " << batch << ", Loss: " << loss.item() << std::endl;

        // Important: clear graph to free memory
        loss.clear_graph();
    }

    return 0;
}