From 9924c2d54693db09a334240f2a4863b4023c0cc9 Mon Sep 17 00:00:00 2001 From: Matthew Sotoudeh Date: Tue, 22 Aug 2023 23:37:01 -0700 Subject: neural net labs --- README | 9 ++++- neural-net/.gitignore | 1 + neural-net/Makefile | 9 +++++ neural-net/README | 75 +++++++++++++++++++++++++++++++++++++ neural-net/main.c | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++ neural-net/nn.c | 88 +++++++++++++++++++++++++++++++++++++++++++ neural-net/nn.h | 29 ++++++++++++++ neural-net/xor_input | 16 ++++++++ 8 files changed, 328 insertions(+), 1 deletion(-) create mode 100644 neural-net/.gitignore create mode 100644 neural-net/Makefile create mode 100644 neural-net/README create mode 100644 neural-net/main.c create mode 100644 neural-net/nn.c create mode 100644 neural-net/nn.h create mode 100644 neural-net/xor_input diff --git a/README b/README index 9708900..eeaaf78 100644 --- a/README +++ b/README @@ -1,4 +1,4 @@ -Labs I've written for classes +Labs I've written for classes: - sat-chaff: implement a basic DPLL SAT solver, profile it, then optimize it with the watched literals trick from Chaff @@ -8,3 +8,10 @@ Labs I've written for classes - static-analysis: implement a mini static analysis pass that finds some bugs in the Linux kernel + +There are some "work-in-progress labs:" + + - neural-net: implement a simple feed-forward neural net training & + inference program. the code is kind of a mess right now, but it's the + smallest/simplest backprop implementation I know of. one of our 240lx + students (Luca Pistor) even got it running on the pi. diff --git a/neural-net/.gitignore b/neural-net/.gitignore new file mode 100644 index 0000000..ba2906d --- /dev/null +++ b/neural-net/.gitignore @@ -0,0 +1 @@ +main diff --git a/neural-net/Makefile b/neural-net/Makefile new file mode 100644 index 0000000..95c7828 --- /dev/null +++ b/neural-net/Makefile @@ -0,0 +1,9 @@ +CFLAGS += -O3 +CFLAGS += -g + +main: main.c nn.c + +clean: + rm -f main + +.PHONY: clean diff --git a/neural-net/README b/neural-net/README new file mode 100644 index 0000000..c9da1f3 --- /dev/null +++ b/neural-net/README @@ -0,0 +1,75 @@ +Note: the code is a bit of a mess; I wouldn't teach this as-is. But at least +it's tiny, and from scratch! + +==== USAGE ==== +Quickstart: + + $ make + $ cat xor_input | ./main + Read 4 training points. + [1.000000, ] + [0.999998, ] + [0.000002, ] + [0.000001, ] + [0.146817, ] + [0.154475, ] + [0.914473, ] + +The `main` script trains a neural net with 2 inputs and 1 output. You provide +the training set, hyperparameters, and test set on stdin. The input format is +as follows: + + $ cat xor_input + in size 2 + out size 1 + hidden size 10 + n layers 3 + 0 0 0 + 0 1 1 + 1 0 1 + 1 1 0 + train 150 0.05 + 0 1 + 1 0 + 0 0 + 1 1 + 0.8 0.99 + 0.95 0.86 + 0.95 0.1 + +The first few lines describe the network. Then any lines before the "train" +statement give training points. Each training point line is the input +dimensions followed by the output dimensions for a single point. Then you use +`train [iters] [lr]` to train the neural net on those points with the given +number of iterations and given learning rate. Finally, lines after that are +treated as test inputs. The trained neural net is run against each one and the +output of the neural net is given on stdout. + +==== HOW? ==== +The wrapper code is in main.c, the core operations are in nn.c. + +The naming scheme is: + - v_...: vector operation + - vv_...: vector-vector operation + - mv_...: matrix-vector operation + - ..._bp...: backwards operation (compute derivative) + +Note that when computing the derivative of, say, a mat-vec multiplication, we +can ask for the derivative with respect to either the matrix (weights) or the +vector (input). These are called mv_bp_m and mv_bp_v, respectively. + +The forward operations are exactly what you would expect. + +For the backwards operations, the intuitive thing to remember is just +"everything is linear, so derivatives add up." I've tried to decompose all of +the backprop operations in terms of smaller operations: note mat-vec +multiplication can be defined as sums of vec-vec multiplications, so the +backprop for mat-vec multiplication can be defined as sums of backprop of +vec-vec multiplications. Meanwhile, vec-vec backprop can be understood +intuitively: + + v.w = v1*w1 + v2*w2 + ... + vn*wn + well, the derivative of v.w wrt v1 is w1 + etc. for vi + and then we multiply by 'how much we want it to change', i.e., the + backprop'd value from the previous layer. diff --git a/neural-net/main.c b/neural-net/main.c new file mode 100644 index 0000000..3001bde --- /dev/null +++ b/neural-net/main.c @@ -0,0 +1,102 @@ +#include "nn.h" + +static size_t IN_DIMS, OUT_DIMS, HIDDEN_DIMS, N_LAYERS; + +void append_bias(struct vec *vec) { + vec->data[vec->n++] = 1.; +} + +int read_vec(size_t n, struct vec *out) { + out->n = n; + for (size_t i = 0; i < n; i++) + if (scanf(" %f ", &out->data[i]) != 1) + return 0; + return 1; +} + +// neural net described by @layers. the input vector is @intermediates[0]. +// intermediate layer values are placed in @intermediates. +void forward(struct mat layers[], struct vec intermediates[], size_t n_layers) { + for (size_t i = 0; i < n_layers; i++) { + intermediates[i + 1] = mv(layers[i], intermediates[i]); + if (i + 1 != n_layers) + intermediates[i + 1] = v_relu(intermediates[i + 1]); + } +} + +// computes the derivative for the output nodes. @desired is the true labels; +// @out is the current output of the model. the derivative is computed +// in-place, by overwriting @out. +// note: 'derivative' here is a bit of a rough description ... +void loss_bp(struct vec *out, struct vec desired, float learn_rate) { + for (size_t i = 0; i < out->n; i++) + out->data[i] = (desired.data[i] - out->data[i]) * learn_rate; +} + +// backpropagates in-place. assumes @intermediates[n_layers] contains the +// derivative for the last layer. during @ith loop iteration, we compute the +// derivative for the @i-1 layer from the derivative of the @ith layer. +// derivative is computed in-place. +void backward(struct mat layers[], struct vec intermediates[], size_t n_layers) { + for (size_t i = n_layers; i > 0; i--) { + // need to recompute the forward pass because we've already overwritten + // @intermediates[i]. + struct vec pre_relu = mv(layers[i - 1], intermediates[i - 1]); + struct vec pre_relu_deltas = v_relu_bp(pre_relu, intermediates[i]); + if (i == n_layers) + pre_relu_deltas = intermediates[i]; + struct mat layer_delta = mv_bp_m(layers[i - 1], intermediates[i - 1], pre_relu_deltas); + struct vec in_delta = mv_bp_v(layers[i - 1], intermediates[i - 1], pre_relu_deltas); + intermediates[i - 1] = in_delta; + add_mat(&layers[i - 1], layer_delta); + } +} + +int main() { + assert(scanf(" in size %lu out size %lu hidden size %lu n layers %lu ", + &IN_DIMS, &OUT_DIMS, &HIDDEN_DIMS, &N_LAYERS) == 4); + assert(IN_DIMS < MAX_DIMS && OUT_DIMS < MAX_DIMS && HIDDEN_DIMS < MAX_DIMS + && N_LAYERS < MAX_LAYERS); + struct mat layers[MAX_LAYERS]; + layers[0] = m_random(HIDDEN_DIMS, IN_DIMS + 1); + for (size_t i = 1; i < N_LAYERS; i++) + layers[i] = m_random(HIDDEN_DIMS, HIDDEN_DIMS); + layers[N_LAYERS - 1] = m_random(OUT_DIMS, HIDDEN_DIMS); + + // Read in the training points. + struct vec train_inputs[128]; + struct vec train_outputs[128]; + size_t n_train_points = 0; + for (; read_vec(IN_DIMS, &train_inputs[n_train_points]); n_train_points++) { + append_bias(&train_inputs[n_train_points]); + read_vec(OUT_DIMS, &train_outputs[n_train_points]); + } + printf("Read %lu training points.\n", n_train_points); + + // Do the training. + size_t n_iters; + float learn_rate; + assert(scanf(" train %lu %f ", &n_iters, &learn_rate) == 2); + for (size_t _iter = 0; _iter < n_iters; _iter++) { + for (size_t i = 0; i < n_train_points; i++) { + struct vec intermediates[N_LAYERS + 1]; + intermediates[0] = train_inputs[i]; + forward(layers, intermediates, N_LAYERS); + loss_bp(&intermediates[N_LAYERS], train_outputs[i], learn_rate); + backward(layers, intermediates, N_LAYERS); + } + } + + // Do the testing. + struct vec input; + while (!feof(stdin) && read_vec(IN_DIMS, &input)) { + append_bias(&input); + + struct vec intermediates[N_LAYERS + 1]; + intermediates[0] = input; + forward(layers, intermediates, N_LAYERS); + print_vec(intermediates[N_LAYERS]); + } + + return 0; +} diff --git a/neural-net/nn.c b/neural-net/nn.c new file mode 100644 index 0000000..9ad3346 --- /dev/null +++ b/neural-net/nn.c @@ -0,0 +1,88 @@ +#include "nn.h" + +static struct vec get_row(struct mat mat, int i) { + struct vec row = {mat.cols}; + for (int k = 0; k < row.n; k++) row.data[k] = mat.data[i][k]; + return row; +} +static void add_vec(struct vec *out, struct vec in) { + for (int k = 0; k < in.n; k++) out->data[k] += in.data[k]; +} +static void add_row(struct mat *mat, int i, struct vec row) { + for (int k = 0; k < row.n; k++) mat->data[i][k] += row.data[k]; +} +void add_mat(struct mat *out, struct mat in) { + for (int i = 0; i < out->rows; i++) + add_row(out, i, get_row(in, i)); +} + +// FORWARD PROPAGATION OPERATIONS +float vv(struct vec A, struct vec B) { + float result = 0.; + for (size_t i = 0; i < A.n; i++) + result += A.data[i] * B.data[i]; + return result; +} + +struct vec mv(struct mat mat, struct vec vec) { + struct vec out = {mat.rows, {0}}; + for (size_t i = 0; i < mat.rows; i++) + out.data[i] = vv(get_row(mat, i), vec); + return out; +} + +struct vec v_relu(struct vec vec) { + for (size_t i = 0; i < vec.n; i++) + vec.data[i] = (vec.data[i] > 0.) ? vec.data[i] : 0.; + return vec; +} + +// BACKWARD PROPAGATION OPERATIONS +static struct vec vv_bp(struct vec constant, struct vec variable, float out_delta) { + struct vec out = {constant.n, {0}}; + for (size_t i = 0; i < out.n; i++) + out.data[i] = constant.data[i] * out_delta; + return out; +} + +struct vec mv_bp_v(struct mat constant, struct vec variable, struct vec out_deltas) { + struct vec in_deltas = {variable.n, {0}}; + for (size_t i = 0; i < out_deltas.n; i++) + add_vec(&in_deltas, + vv_bp(get_row(constant, i), variable, out_deltas.data[i])); + return in_deltas; +} + +struct mat mv_bp_m(struct mat variable, struct vec constant, struct vec out_deltas) { + struct mat weight_deltas = {variable.rows, variable.cols, {0}}; + for (size_t i = 0; i < out_deltas.n; i++) + add_row(&weight_deltas, i, + vv_bp(constant, get_row(variable, i), out_deltas.data[i])); + return weight_deltas; +} + +struct vec v_relu_bp(struct vec vec_in, struct vec deltas) { + for (size_t i = 0; i < deltas.n; i++) + deltas.data[i] = (vec_in.data[i] > 0.) ? deltas.data[i] : 0.; + return deltas; +} + +static float randf() { + int centered = rand() - (RAND_MAX / 2); + return ((float)centered) / (RAND_MAX / 2); +} + +struct mat m_random(size_t rows, size_t cols) { + struct mat mat = {rows, cols, {0}}; + for (size_t i = 0; i < rows; i++) + for (size_t j = 0; j < cols; j++) + mat.data[i][j] = randf(); + return mat; +} + +void print_vec(struct vec vec) { + printf("["); + for (size_t i = 0; i < vec.n; i++) + printf("%f, ", vec.data[i]); + printf("]\n"); +} diff --git a/neural-net/nn.h b/neural-net/nn.h new file mode 100644 index 0000000..1b34c42 --- /dev/null +++ b/neural-net/nn.h @@ -0,0 +1,29 @@ +#include +#include +#include +#include +#include + +#define MAX_DIMS 256 +#define MAX_LAYERS 8 + +struct vec { + size_t n; + float data[256]; +}; + +struct mat { + size_t rows, cols; + float data[256][256]; +}; + +struct vec mv(struct mat mat, struct vec vec); +struct mat mm(struct mat mat1, struct mat mat2); +struct mat m_random(size_t rows, size_t cols); +struct vec mv_bp_v(struct mat constant, struct vec variable, struct vec out_deltas); +struct mat mv_bp_m(struct mat variable, struct vec constant, struct vec out_deltas); +struct vec v_relu(struct vec vec); +struct vec v_relu_bp(struct vec vec_in, struct vec deltas); +struct mat m_random(size_t rows, size_t cols); +void add_mat(struct mat *out, struct mat in); +void print_vec(struct vec vec); diff --git a/neural-net/xor_input b/neural-net/xor_input new file mode 100644 index 0000000..0cfc3eb --- /dev/null +++ b/neural-net/xor_input @@ -0,0 +1,16 @@ +in size 2 +out size 1 +hidden size 10 +n layers 3 +0 0 0 +0 1 1 +1 0 1 +1 1 0 +train 150 0.05 +0 1 +1 0 +0 0 +1 1 +0.8 0.99 +0.95 0.86 +0.95 0.1 -- cgit v1.2.3