From 9924c2d54693db09a334240f2a4863b4023c0cc9 Mon Sep 17 00:00:00 2001
From: Matthew Sotoudeh <matthew@masot.net>
Date: Tue, 22 Aug 2023 23:37:01 -0700
Subject: neural net labs

---
 README                |   9 ++++-
 neural-net/.gitignore |   1 +
 neural-net/Makefile   |   9 +++++
 neural-net/README     |  75 +++++++++++++++++++++++++++++++++++++
 neural-net/main.c     | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++
 neural-net/nn.c       |  88 +++++++++++++++++++++++++++++++++++++++++++
 neural-net/nn.h       |  29 ++++++++++++++
 neural-net/xor_input  |  16 ++++++++
 8 files changed, 328 insertions(+), 1 deletion(-)
 create mode 100644 neural-net/.gitignore
 create mode 100644 neural-net/Makefile
 create mode 100644 neural-net/README
 create mode 100644 neural-net/main.c
 create mode 100644 neural-net/nn.c
 create mode 100644 neural-net/nn.h
 create mode 100644 neural-net/xor_input

diff --git a/README b/README
index 9708900..eeaaf78 100644
--- a/README
+++ b/README
@@ -1,4 +1,4 @@
-Labs I've written for classes
+Labs I've written for classes:
 
     - sat-chaff: implement a basic DPLL SAT solver, profile it, then optimize
       it with the watched literals trick from Chaff
@@ -8,3 +8,10 @@ Labs I've written for classes
 
     - static-analysis: implement a mini static analysis pass that finds some
       bugs in the Linux kernel
+
+There are some "work-in-progress labs:"
+
+    - neural-net: implement a simple feed-forward neural net training &
+      inference program. the code is kind of a mess right now, but it's the
+      smallest/simplest backprop implementation I know of. one of our 240lx
+      students (Luca Pistor) even got it running on the pi.
diff --git a/neural-net/.gitignore b/neural-net/.gitignore
new file mode 100644
index 0000000..ba2906d
--- /dev/null
+++ b/neural-net/.gitignore
@@ -0,0 +1 @@
+main
diff --git a/neural-net/Makefile b/neural-net/Makefile
new file mode 100644
index 0000000..95c7828
--- /dev/null
+++ b/neural-net/Makefile
@@ -0,0 +1,9 @@
+CFLAGS += -O3
+CFLAGS += -g
+
+main: main.c nn.c
+
+clean:
+	rm -f main
+
+.PHONY: clean
diff --git a/neural-net/README b/neural-net/README
new file mode 100644
index 0000000..c9da1f3
--- /dev/null
+++ b/neural-net/README
@@ -0,0 +1,75 @@
+Note: the code is a bit of a mess; I wouldn't teach this as-is. But at least
+it's tiny, and from scratch!
+
+==== USAGE ====
+Quickstart:
+
+    $ make
+    $ cat xor_input | ./main
+    Read 4 training points.
+    [1.000000, ]
+    [0.999998, ]
+    [0.000002, ]
+    [0.000001, ]
+    [0.146817, ]
+    [0.154475, ]
+    [0.914473, ]
+
+The `main` script trains a neural net with 2 inputs and 1 output. You provide
+the training set, hyperparameters, and test set on stdin. The input format is
+as follows:
+
+    $ cat xor_input
+    in size 2
+    out size 1
+    hidden size 10
+    n layers 3
+    0 0 0
+    0 1 1
+    1 0 1
+    1 1 0
+    train 150 0.05
+    0 1
+    1 0
+    0 0
+    1 1
+    0.8 0.99
+    0.95 0.86
+    0.95 0.1
+
+The first few lines describe the network. Then any lines before the "train"
+statement give training points. Each training point line is the input
+dimensions followed by the output dimensions for a single point.  Then you use
+`train [iters] [lr]` to train the neural net on those points with the given
+number of iterations and given learning rate. Finally, lines after that are
+treated as test inputs. The trained neural net is run against each one and the
+output of the neural net is given on stdout.
+
+==== HOW? ====
+The wrapper code is in main.c, the core operations are in nn.c.
+
+The naming scheme is:
+    - v_...: vector operation
+    - vv_...: vector-vector operation
+    - mv_...: matrix-vector operation
+    - ..._bp...: backwards operation (compute derivative)
+
+Note that when computing the derivative of, say, a mat-vec multiplication, we
+can ask for the derivative with respect to either the matrix (weights) or the
+vector (input). These are called mv_bp_m and mv_bp_v, respectively.
+
+The forward operations are exactly what you would expect.
+
+For the backwards operations, the intuitive thing to remember is just
+"everything is linear, so derivatives add up." I've tried to decompose all of
+the backprop operations in terms of smaller operations: note mat-vec
+multiplication can be defined as sums of vec-vec multiplications, so the
+backprop for mat-vec multiplication can be defined as sums of backprop of
+vec-vec multiplications. Meanwhile, vec-vec backprop can be understood
+intuitively:
+
+    v.w = v1*w1 + v2*w2 + ... + vn*wn
+    well, the derivative of v.w wrt v1 is w1
+    etc. for vi
+    and then we multiply by 'how much we want it to change', i.e., the
+    backprop'd value from the previous layer.
diff --git a/neural-net/main.c b/neural-net/main.c
new file mode 100644
index 0000000..3001bde
--- /dev/null
+++ b/neural-net/main.c
@@ -0,0 +1,102 @@
+#include "nn.h"
+
+static size_t IN_DIMS, OUT_DIMS, HIDDEN_DIMS, N_LAYERS;
+
+void append_bias(struct vec *vec) {
+    vec->data[vec->n++] = 1.;
+}
+
+int read_vec(size_t n, struct vec *out) {
+    out->n = n;
+    for (size_t i = 0; i < n; i++)
+        if (scanf(" %f ", &out->data[i]) != 1)
+            return 0;
+    return 1;
+}
+
+// neural net described by @layers. the input vector is @intermediates[0].
+// intermediate layer values are placed in @intermediates.
+void forward(struct mat layers[], struct vec intermediates[], size_t n_layers) {
+    for (size_t i = 0; i < n_layers; i++) {
+        intermediates[i + 1] = mv(layers[i], intermediates[i]);
+        if (i + 1 != n_layers)
+            intermediates[i + 1] = v_relu(intermediates[i + 1]);
+    }
+}
+
+// computes the derivative for the output nodes. @desired is the true labels;
+// @out is the current output of the model. the derivative is computed
+// in-place, by overwriting @out.
+// note: 'derivative' here is a bit of a rough description ...
+void loss_bp(struct vec *out, struct vec desired, float learn_rate) {
+    for (size_t i = 0; i < out->n; i++)
+        out->data[i] = (desired.data[i] - out->data[i]) * learn_rate;
+}
+
+// backpropagates in-place. assumes @intermediates[n_layers] contains the
+// derivative for the last layer. during @ith loop iteration, we compute the
+// derivative for the @i-1 layer from the derivative of the @ith layer.
+// derivative is computed in-place.
+void backward(struct mat layers[], struct vec intermediates[], size_t n_layers) {
+    for (size_t i = n_layers; i > 0; i--) {
+        // need to recompute the forward pass because we've already overwritten
+        // @intermediates[i].
+        struct vec pre_relu = mv(layers[i - 1], intermediates[i - 1]);
+        struct vec pre_relu_deltas = v_relu_bp(pre_relu, intermediates[i]);
+        if (i == n_layers)
+            pre_relu_deltas = intermediates[i];
+        struct mat layer_delta = mv_bp_m(layers[i - 1], intermediates[i - 1], pre_relu_deltas);
+        struct vec in_delta = mv_bp_v(layers[i - 1], intermediates[i - 1], pre_relu_deltas);
+        intermediates[i - 1] = in_delta;
+        add_mat(&layers[i - 1], layer_delta);
+    }
+}
+
+int main() {
+    assert(scanf(" in size %lu out size %lu hidden size %lu n layers %lu ",
+                 &IN_DIMS, &OUT_DIMS, &HIDDEN_DIMS, &N_LAYERS) == 4);
+    assert(IN_DIMS < MAX_DIMS && OUT_DIMS < MAX_DIMS && HIDDEN_DIMS < MAX_DIMS
+            && N_LAYERS < MAX_LAYERS);
+    struct mat layers[MAX_LAYERS];
+    layers[0] = m_random(HIDDEN_DIMS, IN_DIMS + 1);
+    for (size_t i = 1; i < N_LAYERS; i++)
+        layers[i] = m_random(HIDDEN_DIMS, HIDDEN_DIMS);
+    layers[N_LAYERS - 1] = m_random(OUT_DIMS, HIDDEN_DIMS);
+
+    // Read in the training points.
+    struct vec train_inputs[128];
+    struct vec train_outputs[128];
+    size_t n_train_points = 0;
+    for (; read_vec(IN_DIMS, &train_inputs[n_train_points]); n_train_points++) {
+        append_bias(&train_inputs[n_train_points]);
+        read_vec(OUT_DIMS, &train_outputs[n_train_points]);
+    }
+    printf("Read %lu training points.\n", n_train_points);
+
+    // Do the training.
+    size_t n_iters;
+    float learn_rate;
+    assert(scanf(" train %lu %f ", &n_iters, &learn_rate) == 2);
+    for (size_t _iter = 0; _iter < n_iters; _iter++) {
+        for (size_t i = 0; i < n_train_points; i++) {
+            struct vec intermediates[N_LAYERS + 1];
+            intermediates[0] = train_inputs[i];
+            forward(layers, intermediates, N_LAYERS);
+            loss_bp(&intermediates[N_LAYERS], train_outputs[i], learn_rate);
+            backward(layers, intermediates, N_LAYERS);
+        }
+    }
+
+    // Do the testing.
+    struct vec input;
+    while (!feof(stdin) && read_vec(IN_DIMS, &input)) {
+        append_bias(&input);
+
+        struct vec intermediates[N_LAYERS + 1];
+        intermediates[0] = input;
+        forward(layers, intermediates, N_LAYERS);
+        print_vec(intermediates[N_LAYERS]);
+    }
+
+    return 0;
+}
diff --git a/neural-net/nn.c b/neural-net/nn.c
new file mode 100644
index 0000000..9ad3346
--- /dev/null
+++ b/neural-net/nn.c
@@ -0,0 +1,88 @@
+#include "nn.h"
+
+static struct vec get_row(struct mat mat, int i) {
+    struct vec row = {mat.cols};
+    for (int k = 0; k < row.n; k++) row.data[k] = mat.data[i][k];
+    return row;
+}
+static void add_vec(struct vec *out, struct vec in) {
+    for (int k = 0; k < in.n; k++) out->data[k] += in.data[k];
+}
+static void add_row(struct mat *mat, int i, struct vec row) {
+    for (int k = 0; k < row.n; k++) mat->data[i][k] += row.data[k];
+}
+void add_mat(struct mat *out, struct mat in) {
+    for (int i = 0; i < out->rows; i++)
+        add_row(out, i, get_row(in, i));
+}
+
+// FORWARD PROPAGATION OPERATIONS
+float vv(struct vec A, struct vec B) {
+    float result = 0.;
+    for (size_t i = 0; i < A.n; i++)
+        result += A.data[i] * B.data[i];
+    return result;
+}
+
+struct vec mv(struct mat mat, struct vec vec) {
+    struct vec out = {mat.rows, {0}};
+    for (size_t i = 0; i < mat.rows; i++)
+        out.data[i] = vv(get_row(mat, i), vec);
+    return out;
+}
+
+struct vec v_relu(struct vec vec) {
+    for (size_t i = 0; i < vec.n; i++)
+        vec.data[i] = (vec.data[i] > 0.) ? vec.data[i] : 0.;
+    return vec;
+}
+
+// BACKWARD PROPAGATION OPERATIONS
+static struct vec vv_bp(struct vec constant, struct vec variable, float out_delta) {
+    struct vec out = {constant.n, {0}};
+    for (size_t i = 0; i < out.n; i++)
+        out.data[i] = constant.data[i] * out_delta;
+    return out;
+}
+
+struct vec mv_bp_v(struct mat constant, struct vec variable, struct vec out_deltas) {
+    struct vec in_deltas = {variable.n, {0}};
+    for (size_t i = 0; i < out_deltas.n; i++)
+        add_vec(&in_deltas,
+                vv_bp(get_row(constant, i), variable, out_deltas.data[i]));
+    return in_deltas;
+}
+
+struct mat mv_bp_m(struct mat variable, struct vec constant, struct vec out_deltas) {
+    struct mat weight_deltas = {variable.rows, variable.cols, {0}};
+    for (size_t i = 0; i < out_deltas.n; i++)
+        add_row(&weight_deltas, i,
+                vv_bp(constant, get_row(variable, i), out_deltas.data[i]));
+    return weight_deltas;
+}
+
+struct vec v_relu_bp(struct vec vec_in, struct vec deltas) {
+    for (size_t i = 0; i < deltas.n; i++)
+        deltas.data[i] = (vec_in.data[i] > 0.) ? deltas.data[i] : 0.;
+    return deltas;
+}
+
+static float randf() {
+    int centered = rand() - (RAND_MAX / 2);
+    return ((float)centered) / (RAND_MAX / 2);
+}
+
+struct mat m_random(size_t rows, size_t cols) {
+    struct mat mat = {rows, cols, {0}};
+    for (size_t i = 0; i < rows; i++)
+        for (size_t j = 0; j < cols; j++)
+            mat.data[i][j] = randf();
+    return mat;
+}
+
+void print_vec(struct vec vec) {
+    printf("[");
+    for (size_t i = 0; i < vec.n; i++)
+        printf("%f, ", vec.data[i]);
+    printf("]\n");
+}
diff --git a/neural-net/nn.h b/neural-net/nn.h
new file mode 100644
index 0000000..1b34c42
--- /dev/null
+++ b/neural-net/nn.h
@@ -0,0 +1,29 @@
+#include <stddef.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#define MAX_DIMS 256
+#define MAX_LAYERS 8
+
+struct vec {
+    size_t n;
+    float data[256];
+};
+
+struct mat {
+    size_t rows, cols;
+    float data[256][256];
+};
+
+struct vec mv(struct mat mat, struct vec vec);
+struct mat mm(struct mat mat1, struct mat mat2);
+struct mat m_random(size_t rows, size_t cols);
+struct vec mv_bp_v(struct mat constant, struct vec variable, struct vec out_deltas);
+struct mat mv_bp_m(struct mat variable, struct vec constant, struct vec out_deltas);
+struct vec v_relu(struct vec vec);
+struct vec v_relu_bp(struct vec vec_in, struct vec deltas);
+struct mat m_random(size_t rows, size_t cols);
+void add_mat(struct mat *out, struct mat in);
+void print_vec(struct vec vec);
diff --git a/neural-net/xor_input b/neural-net/xor_input
new file mode 100644
index 0000000..0cfc3eb
--- /dev/null
+++ b/neural-net/xor_input
@@ -0,0 +1,16 @@
+in size 2
+out size 1
+hidden size 10
+n layers 3
+0 0 0
+0 1 1
+1 0 1
+1 1 0
+train 150 0.05
+0 1
+1 0
+0 0
+1 1
+0.8 0.99
+0.95 0.86
+0.95 0.1
-- 
cgit v1.2.3