diff options
Diffstat (limited to 'neural-net/main.c')
-rw-r--r-- | neural-net/main.c | 102 |
1 files changed, 102 insertions, 0 deletions
diff --git a/neural-net/main.c b/neural-net/main.c new file mode 100644 index 0000000..3001bde --- /dev/null +++ b/neural-net/main.c @@ -0,0 +1,102 @@ +#include "nn.h" + +static size_t IN_DIMS, OUT_DIMS, HIDDEN_DIMS, N_LAYERS; + +void append_bias(struct vec *vec) { + vec->data[vec->n++] = 1.; +} + +int read_vec(size_t n, struct vec *out) { + out->n = n; + for (size_t i = 0; i < n; i++) + if (scanf(" %f ", &out->data[i]) != 1) + return 0; + return 1; +} + +// neural net described by @layers. the input vector is @intermediates[0]. +// intermediate layer values are placed in @intermediates. +void forward(struct mat layers[], struct vec intermediates[], size_t n_layers) { + for (size_t i = 0; i < n_layers; i++) { + intermediates[i + 1] = mv(layers[i], intermediates[i]); + if (i + 1 != n_layers) + intermediates[i + 1] = v_relu(intermediates[i + 1]); + } +} + +// computes the derivative for the output nodes. @desired is the true labels; +// @out is the current output of the model. the derivative is computed +// in-place, by overwriting @out. +// note: 'derivative' here is a bit of a rough description ... +void loss_bp(struct vec *out, struct vec desired, float learn_rate) { + for (size_t i = 0; i < out->n; i++) + out->data[i] = (desired.data[i] - out->data[i]) * learn_rate; +} + +// backpropagates in-place. assumes @intermediates[n_layers] contains the +// derivative for the last layer. during @ith loop iteration, we compute the +// derivative for the @i-1 layer from the derivative of the @ith layer. +// derivative is computed in-place. +void backward(struct mat layers[], struct vec intermediates[], size_t n_layers) { + for (size_t i = n_layers; i > 0; i--) { + // need to recompute the forward pass because we've already overwritten + // @intermediates[i]. + struct vec pre_relu = mv(layers[i - 1], intermediates[i - 1]); + struct vec pre_relu_deltas = v_relu_bp(pre_relu, intermediates[i]); + if (i == n_layers) + pre_relu_deltas = intermediates[i]; + struct mat layer_delta = mv_bp_m(layers[i - 1], intermediates[i - 1], pre_relu_deltas); + struct vec in_delta = mv_bp_v(layers[i - 1], intermediates[i - 1], pre_relu_deltas); + intermediates[i - 1] = in_delta; + add_mat(&layers[i - 1], layer_delta); + } +} + +int main() { + assert(scanf(" in size %lu out size %lu hidden size %lu n layers %lu ", + &IN_DIMS, &OUT_DIMS, &HIDDEN_DIMS, &N_LAYERS) == 4); + assert(IN_DIMS < MAX_DIMS && OUT_DIMS < MAX_DIMS && HIDDEN_DIMS < MAX_DIMS + && N_LAYERS < MAX_LAYERS); + struct mat layers[MAX_LAYERS]; + layers[0] = m_random(HIDDEN_DIMS, IN_DIMS + 1); + for (size_t i = 1; i < N_LAYERS; i++) + layers[i] = m_random(HIDDEN_DIMS, HIDDEN_DIMS); + layers[N_LAYERS - 1] = m_random(OUT_DIMS, HIDDEN_DIMS); + + // Read in the training points. + struct vec train_inputs[128]; + struct vec train_outputs[128]; + size_t n_train_points = 0; + for (; read_vec(IN_DIMS, &train_inputs[n_train_points]); n_train_points++) { + append_bias(&train_inputs[n_train_points]); + read_vec(OUT_DIMS, &train_outputs[n_train_points]); + } + printf("Read %lu training points.\n", n_train_points); + + // Do the training. + size_t n_iters; + float learn_rate; + assert(scanf(" train %lu %f ", &n_iters, &learn_rate) == 2); + for (size_t _iter = 0; _iter < n_iters; _iter++) { + for (size_t i = 0; i < n_train_points; i++) { + struct vec intermediates[N_LAYERS + 1]; + intermediates[0] = train_inputs[i]; + forward(layers, intermediates, N_LAYERS); + loss_bp(&intermediates[N_LAYERS], train_outputs[i], learn_rate); + backward(layers, intermediates, N_LAYERS); + } + } + + // Do the testing. + struct vec input; + while (!feof(stdin) && read_vec(IN_DIMS, &input)) { + append_bias(&input); + + struct vec intermediates[N_LAYERS + 1]; + intermediates[0] = input; + forward(layers, intermediates, N_LAYERS); + print_vec(intermediates[N_LAYERS]); + } + + return 0; +} |