/* * Source code to go with "Introduction to implementing neural networks" * Copyright 2015, Arjan van de Ven * Provided under the terms of the Apache-2.0 license (see LICENSE file) */ #include #include #include #include #include #include #define SYNAPSES 3 #define INPUTS 2 #define OUTPUTS 1 #define HIDDEN 2 #define BIAS 1 #define NETSIZE (BIAS + INPUTS + HIDDEN + OUTPUTS) /* convenince constants */ #define HIDDEN_START (BIAS + INPUTS) #define HIDDEN_END (BIAS + INPUTS + HIDDEN) #define OUTPUT_START (BIAS + INPUTS + HIDDEN) #define OUTPUT_END (BIAS + INPUTS + HIDDEN + OUTPUTS) #define MAXINPUT 4 /* The training set definition, a set of inputs with expected outputs */ struct input { double values[INPUTS]; int output[OUTPUTS]; }; struct input inputs[MAXINPUT]; /* The basic neuron */ struct neuron { double value; /* the (output) value of the neuron */ int inputs[SYNAPSES]; /* which neurons are the input */ double weights[SYNAPSES]; /* and their weights */ }; /* * A network is a set of neurons * * For convenience this is represented as a flat array, where * the neurons are, in order, * * count type * ========================================================================= * BIAS The bias neuron(s) for getting fixed values into the network * INPUTS The inputs to the network * HIDDEN The hidden layer neurons * OUTPUTS The outputs of the network */ struct network { struct neuron neurons[NETSIZE]; }; static double learning_rate = 0.5; /* * Filling in the training set. * For simple networks one can hardcode the inputs like this, * but for more complex problems it's usually more convenient * to load these from a file on disk. */ static void create_training_set(void) { inputs[0].values[0] = 0; inputs[0].values[1] = 0; inputs[0].output[0] = 0; inputs[1].values[0] = 1; inputs[1].values[1] = 0; inputs[1].output[0] = 1; inputs[2].values[0] = 0; inputs[2].values[1] = 1; inputs[2].output[0] = 1; inputs[3].values[0] = 1; inputs[3].values[1] = 1; inputs[3].output[0] = 0; } /* * Helper function * Return a random number between -1.0 and 1.0 */ static double urand(void) { double d; int64_t j; j = rand(); d = j / (1.0 * RAND_MAX); if (rand() & 1) d = - d; return 1.0 * d; } /* * The "activation function" of a neuron */ static double activation(double v) { return 1.0 / ( 1 + exp(-v)); } static void calculate_neuron(struct network *network, int index) { int i; double input_sum; struct neuron *neuron; neuron = &network->neurons[index]; /* add up the inputs to the neuron, multiplied by their weight */ input_sum = 0.0; for (i = 0; i < SYNAPSES; i++) { input_sum += neuron->weights[i] * network->neurons[ neuron->inputs[i] ].value; } /* and transform it through the activation function */ network->neurons[index].value = activation(input_sum); } static void calculate_network(struct network *network) { int i; for (i = INPUTS + 1; i < NETSIZE; i++) calculate_neuron(network, i); } static void update_weight(struct neuron *neuron, int index, double delta) { neuron->weights[index] += delta; } static double compute_one_input(struct network *network, struct input *input) { int i; double error_sum = 0.0; /* set the BIAS neuron to 1 */ network->neurons[0].value = 1.0; /* set the input neurons to the values from the training data */ for (i = 0; i < INPUTS; i++) network->neurons[BIAS+i].value = input->values[i]; calculate_network(network); /* now calculate the error for all outputs */ for (i = 0; i < OUTPUTS; i++) { double error; double gradient; struct neuron *neuron; int h; neuron = &network->neurons[OUTPUT_START + i]; error = input->output[i] - neuron->value; error_sum += error * error; /* Back propagation: the error propagates back via the inputs of the output neuron */ gradient = neuron->value * (1.0 - neuron->value) * error; for (h = 0; h < BIAS + HIDDEN; h++) { double delta; double hidden_gradient; struct neuron *hidden_neuron; int inp; hidden_neuron = &network->neurons[ neuron->inputs[h] ]; /* calculate the delta */ delta = learning_rate * gradient * hidden_neuron->value; /* calcuate the (recursive) gradient */ hidden_gradient = hidden_neuron->value * (1 - hidden_neuron->value) * gradient * neuron->weights[h]; /* adjust the weight */ update_weight(neuron, h, delta); /* further propagate the error to the input layer */ for (inp = 0; inp < BIAS + INPUTS; inp++) { struct neuron *input_neuron; input_neuron = &network->neurons[ hidden_neuron->inputs[inp] ]; delta = learning_rate * input_neuron->value * hidden_gradient; update_weight(hidden_neuron, inp, delta); } } } return error_sum; } static double compute_for_all_inputs(struct network *network) { int i; double error_sum = 0.0; for (i = 0; i < MAXINPUT; i++) error_sum += compute_one_input(network, &inputs[i]); return error_sum; } static struct network *allocate_network(void) { struct network *n; int i,k; n = calloc(sizeof(struct network), 1); /* create the network structure */ /* the hidden layer neurons connect from all input and bias neurons, with random weight */ for (i = HIDDEN_START; i < HIDDEN_END; i++) { for (k = 0; k < BIAS + INPUTS; k++) { n->neurons[i].inputs[k] = k; n->neurons[i].weights[k] = urand(); } } /* the output neurons connect from all hidden neurons, with random weight */ for (i = OUTPUT_START; i < OUTPUT_END; i++) { /* set up the link to the bias neuron */ n->neurons[i].inputs[0] = 0; n->neurons[i].weights[0] = urand(); /* and then connect up each of the neurons in the hidden layer */ for (k = 0; k < HIDDEN; k++) { n->neurons[i].inputs[k + BIAS] = HIDDEN_START + k; n->neurons[i].weights[k + BIAS] = urand(); } } return n; } static void free_network(struct network *n) { free(n); } /* * Output the structure of the network to a ".dot" file that * can be turned into a graphic with the dot program from * the "graphviz" project: * * dot filename.dot -O -Tpng */ static void output_network(char *filename, struct network *n) { FILE *file; int i, j; file = fopen(filename, "w"); if (!file) return; fprintf(file, "digraph G {\n"); fprintf(file, "\t { \n"); fprintf(file, "\t\t rank=source;\n"); for (i = 0; i < INPUTS + BIAS; i++) { fprintf(file,"\t\t%i;\n", i); } fprintf(file, "\t}\n"); fprintf(file, "\t { \n"); fprintf(file, "\t\t rank=sink;\n"); for (i = OUTPUT_START; i < OUTPUT_END; i++) { fprintf(file,"\t\t%i;\n", i); } fprintf(file, "\t}\n"); for (i = INPUTS + BIAS; i < NETSIZE; i++) { for (j = 0; j < SYNAPSES; j++) { if (fabs(n->neurons[i].weights[j])>0.001 ) { fprintf(file, " %i -> %i [label = %4.1f] ;\n", n->neurons[i].inputs[j], i, n->neurons[i].weights[j]); } } } fprintf(file, "\tsubgraph inputcluster { \n"); for (i = 0; i < INPUTS + BIAS; i++) { fprintf(file,"\t\t%i;\n", i); } fprintf(file, "\t}\n"); fprintf(file, "\tsubgraph outputcluster { \n"); for (i = NETSIZE-OUTPUTS; i < NETSIZE; i++) { fprintf(file,"\t\t%i;\n", i); } fprintf(file, "\t}\n"); fprintf(file, "}\n"); fclose(file); } int main(int argc, char **argv) { struct network *net; double error; int i; /* initialize the random number generator */ srand(time(NULL) + argc + strlen(argv[0])); create_training_set(); net = allocate_network(); error = compute_for_all_inputs(net); printf("Error is %5.4f\n", error); for (i = 0; i <= 5000; i++) { error = compute_for_all_inputs(net); /* every 10 rounds, print a status update */ if (i % 10 == 0) printf("Round %4i: Error is %5.4f\n", i, error); } output_network("final.dot", net); free_network(net); return EXIT_SUCCESS; }