summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Sotoudeh <matthew@masot.net>2023-08-22 23:37:01 -0700
committerMatthew Sotoudeh <matthew@masot.net>2023-08-22 23:37:01 -0700
commit9924c2d54693db09a334240f2a4863b4023c0cc9 (patch)
tree6174625d8c93bdac36d03994dd73a0ae8f406912
parent65ceac5d8c2a82f3dd69980971d6bf3b6101b67c (diff)
neural net labsHEADmaster
-rw-r--r--README9
-rw-r--r--neural-net/.gitignore1
-rw-r--r--neural-net/Makefile9
-rw-r--r--neural-net/README75
-rw-r--r--neural-net/main.c102
-rw-r--r--neural-net/nn.c88
-rw-r--r--neural-net/nn.h29
-rw-r--r--neural-net/xor_input16
8 files changed, 328 insertions, 1 deletions
diff --git a/README b/README
index 9708900..eeaaf78 100644
--- a/README
+++ b/README
@@ -1,4 +1,4 @@
-Labs I've written for classes
+Labs I've written for classes:
- sat-chaff: implement a basic DPLL SAT solver, profile it, then optimize
it with the watched literals trick from Chaff
@@ -8,3 +8,10 @@ Labs I've written for classes
- static-analysis: implement a mini static analysis pass that finds some
bugs in the Linux kernel
+
+There are some "work-in-progress labs:"
+
+ - neural-net: implement a simple feed-forward neural net training &
+ inference program. the code is kind of a mess right now, but it's the
+ smallest/simplest backprop implementation I know of. one of our 240lx
+ students (Luca Pistor) even got it running on the pi.
diff --git a/neural-net/.gitignore b/neural-net/.gitignore
new file mode 100644
index 0000000..ba2906d
--- /dev/null
+++ b/neural-net/.gitignore
@@ -0,0 +1 @@
+main
diff --git a/neural-net/Makefile b/neural-net/Makefile
new file mode 100644
index 0000000..95c7828
--- /dev/null
+++ b/neural-net/Makefile
@@ -0,0 +1,9 @@
+CFLAGS += -O3
+CFLAGS += -g
+
+main: main.c nn.c
+
+clean:
+ rm -f main
+
+.PHONY: clean
diff --git a/neural-net/README b/neural-net/README
new file mode 100644
index 0000000..c9da1f3
--- /dev/null
+++ b/neural-net/README
@@ -0,0 +1,75 @@
+Note: the code is a bit of a mess; I wouldn't teach this as-is. But at least
+it's tiny, and from scratch!
+
+==== USAGE ====
+Quickstart:
+
+ $ make
+ $ cat xor_input | ./main
+ Read 4 training points.
+ [1.000000, ]
+ [0.999998, ]
+ [0.000002, ]
+ [0.000001, ]
+ [0.146817, ]
+ [0.154475, ]
+ [0.914473, ]
+
+The `main` script trains a neural net with 2 inputs and 1 output. You provide
+the training set, hyperparameters, and test set on stdin. The input format is
+as follows:
+
+ $ cat xor_input
+ in size 2
+ out size 1
+ hidden size 10
+ n layers 3
+ 0 0 0
+ 0 1 1
+ 1 0 1
+ 1 1 0
+ train 150 0.05
+ 0 1
+ 1 0
+ 0 0
+ 1 1
+ 0.8 0.99
+ 0.95 0.86
+ 0.95 0.1
+
+The first few lines describe the network. Then any lines before the "train"
+statement give training points. Each training point line is the input
+dimensions followed by the output dimensions for a single point. Then you use
+`train [iters] [lr]` to train the neural net on those points with the given
+number of iterations and given learning rate. Finally, lines after that are
+treated as test inputs. The trained neural net is run against each one and the
+output of the neural net is given on stdout.
+
+==== HOW? ====
+The wrapper code is in main.c, the core operations are in nn.c.
+
+The naming scheme is:
+ - v_...: vector operation
+ - vv_...: vector-vector operation
+ - mv_...: matrix-vector operation
+ - ..._bp...: backwards operation (compute derivative)
+
+Note that when computing the derivative of, say, a mat-vec multiplication, we
+can ask for the derivative with respect to either the matrix (weights) or the
+vector (input). These are called mv_bp_m and mv_bp_v, respectively.
+
+The forward operations are exactly what you would expect.
+
+For the backwards operations, the intuitive thing to remember is just
+"everything is linear, so derivatives add up." I've tried to decompose all of
+the backprop operations in terms of smaller operations: note mat-vec
+multiplication can be defined as sums of vec-vec multiplications, so the
+backprop for mat-vec multiplication can be defined as sums of backprop of
+vec-vec multiplications. Meanwhile, vec-vec backprop can be understood
+intuitively:
+
+ v.w = v1*w1 + v2*w2 + ... + vn*wn
+ well, the derivative of v.w wrt v1 is w1
+ etc. for vi
+ and then we multiply by 'how much we want it to change', i.e., the
+ backprop'd value from the previous layer.
diff --git a/neural-net/main.c b/neural-net/main.c
new file mode 100644
index 0000000..3001bde
--- /dev/null
+++ b/neural-net/main.c
@@ -0,0 +1,102 @@
+#include "nn.h"
+
+static size_t IN_DIMS, OUT_DIMS, HIDDEN_DIMS, N_LAYERS;
+
+void append_bias(struct vec *vec) {
+ vec->data[vec->n++] = 1.;
+}
+
+int read_vec(size_t n, struct vec *out) {
+ out->n = n;
+ for (size_t i = 0; i < n; i++)
+ if (scanf(" %f ", &out->data[i]) != 1)
+ return 0;
+ return 1;
+}
+
+// neural net described by @layers. the input vector is @intermediates[0].
+// intermediate layer values are placed in @intermediates.
+void forward(struct mat layers[], struct vec intermediates[], size_t n_layers) {
+ for (size_t i = 0; i < n_layers; i++) {
+ intermediates[i + 1] = mv(layers[i], intermediates[i]);
+ if (i + 1 != n_layers)
+ intermediates[i + 1] = v_relu(intermediates[i + 1]);
+ }
+}
+
+// computes the derivative for the output nodes. @desired is the true labels;
+// @out is the current output of the model. the derivative is computed
+// in-place, by overwriting @out.
+// note: 'derivative' here is a bit of a rough description ...
+void loss_bp(struct vec *out, struct vec desired, float learn_rate) {
+ for (size_t i = 0; i < out->n; i++)
+ out->data[i] = (desired.data[i] - out->data[i]) * learn_rate;
+}
+
+// backpropagates in-place. assumes @intermediates[n_layers] contains the
+// derivative for the last layer. during @ith loop iteration, we compute the
+// derivative for the @i-1 layer from the derivative of the @ith layer.
+// derivative is computed in-place.
+void backward(struct mat layers[], struct vec intermediates[], size_t n_layers) {
+ for (size_t i = n_layers; i > 0; i--) {
+ // need to recompute the forward pass because we've already overwritten
+ // @intermediates[i].
+ struct vec pre_relu = mv(layers[i - 1], intermediates[i - 1]);
+ struct vec pre_relu_deltas = v_relu_bp(pre_relu, intermediates[i]);
+ if (i == n_layers)
+ pre_relu_deltas = intermediates[i];
+ struct mat layer_delta = mv_bp_m(layers[i - 1], intermediates[i - 1], pre_relu_deltas);
+ struct vec in_delta = mv_bp_v(layers[i - 1], intermediates[i - 1], pre_relu_deltas);
+ intermediates[i - 1] = in_delta;
+ add_mat(&layers[i - 1], layer_delta);
+ }
+}
+
+int main() {
+ assert(scanf(" in size %lu out size %lu hidden size %lu n layers %lu ",
+ &IN_DIMS, &OUT_DIMS, &HIDDEN_DIMS, &N_LAYERS) == 4);
+ assert(IN_DIMS < MAX_DIMS && OUT_DIMS < MAX_DIMS && HIDDEN_DIMS < MAX_DIMS
+ && N_LAYERS < MAX_LAYERS);
+ struct mat layers[MAX_LAYERS];
+ layers[0] = m_random(HIDDEN_DIMS, IN_DIMS + 1);
+ for (size_t i = 1; i < N_LAYERS; i++)
+ layers[i] = m_random(HIDDEN_DIMS, HIDDEN_DIMS);
+ layers[N_LAYERS - 1] = m_random(OUT_DIMS, HIDDEN_DIMS);
+
+ // Read in the training points.
+ struct vec train_inputs[128];
+ struct vec train_outputs[128];
+ size_t n_train_points = 0;
+ for (; read_vec(IN_DIMS, &train_inputs[n_train_points]); n_train_points++) {
+ append_bias(&train_inputs[n_train_points]);
+ read_vec(OUT_DIMS, &train_outputs[n_train_points]);
+ }
+ printf("Read %lu training points.\n", n_train_points);
+
+ // Do the training.
+ size_t n_iters;
+ float learn_rate;
+ assert(scanf(" train %lu %f ", &n_iters, &learn_rate) == 2);
+ for (size_t _iter = 0; _iter < n_iters; _iter++) {
+ for (size_t i = 0; i < n_train_points; i++) {
+ struct vec intermediates[N_LAYERS + 1];
+ intermediates[0] = train_inputs[i];
+ forward(layers, intermediates, N_LAYERS);
+ loss_bp(&intermediates[N_LAYERS], train_outputs[i], learn_rate);
+ backward(layers, intermediates, N_LAYERS);
+ }
+ }
+
+ // Do the testing.
+ struct vec input;
+ while (!feof(stdin) && read_vec(IN_DIMS, &input)) {
+ append_bias(&input);
+
+ struct vec intermediates[N_LAYERS + 1];
+ intermediates[0] = input;
+ forward(layers, intermediates, N_LAYERS);
+ print_vec(intermediates[N_LAYERS]);
+ }
+
+ return 0;
+}
diff --git a/neural-net/nn.c b/neural-net/nn.c
new file mode 100644
index 0000000..9ad3346
--- /dev/null
+++ b/neural-net/nn.c
@@ -0,0 +1,88 @@
+#include "nn.h"
+
+static struct vec get_row(struct mat mat, int i) {
+ struct vec row = {mat.cols};
+ for (int k = 0; k < row.n; k++) row.data[k] = mat.data[i][k];
+ return row;
+}
+static void add_vec(struct vec *out, struct vec in) {
+ for (int k = 0; k < in.n; k++) out->data[k] += in.data[k];
+}
+static void add_row(struct mat *mat, int i, struct vec row) {
+ for (int k = 0; k < row.n; k++) mat->data[i][k] += row.data[k];
+}
+void add_mat(struct mat *out, struct mat in) {
+ for (int i = 0; i < out->rows; i++)
+ add_row(out, i, get_row(in, i));
+}
+
+// FORWARD PROPAGATION OPERATIONS
+float vv(struct vec A, struct vec B) {
+ float result = 0.;
+ for (size_t i = 0; i < A.n; i++)
+ result += A.data[i] * B.data[i];
+ return result;
+}
+
+struct vec mv(struct mat mat, struct vec vec) {
+ struct vec out = {mat.rows, {0}};
+ for (size_t i = 0; i < mat.rows; i++)
+ out.data[i] = vv(get_row(mat, i), vec);
+ return out;
+}
+
+struct vec v_relu(struct vec vec) {
+ for (size_t i = 0; i < vec.n; i++)
+ vec.data[i] = (vec.data[i] > 0.) ? vec.data[i] : 0.;
+ return vec;
+}
+
+// BACKWARD PROPAGATION OPERATIONS
+static struct vec vv_bp(struct vec constant, struct vec variable, float out_delta) {
+ struct vec out = {constant.n, {0}};
+ for (size_t i = 0; i < out.n; i++)
+ out.data[i] = constant.data[i] * out_delta;
+ return out;
+}
+
+struct vec mv_bp_v(struct mat constant, struct vec variable, struct vec out_deltas) {
+ struct vec in_deltas = {variable.n, {0}};
+ for (size_t i = 0; i < out_deltas.n; i++)
+ add_vec(&in_deltas,
+ vv_bp(get_row(constant, i), variable, out_deltas.data[i]));
+ return in_deltas;
+}
+
+struct mat mv_bp_m(struct mat variable, struct vec constant, struct vec out_deltas) {
+ struct mat weight_deltas = {variable.rows, variable.cols, {0}};
+ for (size_t i = 0; i < out_deltas.n; i++)
+ add_row(&weight_deltas, i,
+ vv_bp(constant, get_row(variable, i), out_deltas.data[i]));
+ return weight_deltas;
+}
+
+struct vec v_relu_bp(struct vec vec_in, struct vec deltas) {
+ for (size_t i = 0; i < deltas.n; i++)
+ deltas.data[i] = (vec_in.data[i] > 0.) ? deltas.data[i] : 0.;
+ return deltas;
+}
+
+static float randf() {
+ int centered = rand() - (RAND_MAX / 2);
+ return ((float)centered) / (RAND_MAX / 2);
+}
+
+struct mat m_random(size_t rows, size_t cols) {
+ struct mat mat = {rows, cols, {0}};
+ for (size_t i = 0; i < rows; i++)
+ for (size_t j = 0; j < cols; j++)
+ mat.data[i][j] = randf();
+ return mat;
+}
+
+void print_vec(struct vec vec) {
+ printf("[");
+ for (size_t i = 0; i < vec.n; i++)
+ printf("%f, ", vec.data[i]);
+ printf("]\n");
+}
diff --git a/neural-net/nn.h b/neural-net/nn.h
new file mode 100644
index 0000000..1b34c42
--- /dev/null
+++ b/neural-net/nn.h
@@ -0,0 +1,29 @@
+#include <stddef.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#define MAX_DIMS 256
+#define MAX_LAYERS 8
+
+struct vec {
+ size_t n;
+ float data[256];
+};
+
+struct mat {
+ size_t rows, cols;
+ float data[256][256];
+};
+
+struct vec mv(struct mat mat, struct vec vec);
+struct mat mm(struct mat mat1, struct mat mat2);
+struct mat m_random(size_t rows, size_t cols);
+struct vec mv_bp_v(struct mat constant, struct vec variable, struct vec out_deltas);
+struct mat mv_bp_m(struct mat variable, struct vec constant, struct vec out_deltas);
+struct vec v_relu(struct vec vec);
+struct vec v_relu_bp(struct vec vec_in, struct vec deltas);
+struct mat m_random(size_t rows, size_t cols);
+void add_mat(struct mat *out, struct mat in);
+void print_vec(struct vec vec);
diff --git a/neural-net/xor_input b/neural-net/xor_input
new file mode 100644
index 0000000..0cfc3eb
--- /dev/null
+++ b/neural-net/xor_input
@@ -0,0 +1,16 @@
+in size 2
+out size 1
+hidden size 10
+n layers 3
+0 0 0
+0 1 1
+1 0 1
+1 1 0
+train 150 0.05
+0 1
+1 0
+0 0
+1 1
+0.8 0.99
+0.95 0.86
+0.95 0.1
generated by cgit on debian on lair
contact matthew@masot.net with questions or feedback