// This file is a part of Framsticks SDK. http://www.framsticks.com/ // Copyright (C) 1999-2023 Maciej Komosinski and Szymon Ulatowski. // See LICENSE.txt for details. // Copyright (C) 1999,2000 Adam Rotaru-Varga (adam_rotaru@yahoo.com), GNU LGPL // 2018, Grzegorz Latosinski, added development checkpoints and support for new API for neuron types #include "f4_general.h" #include "../genooperators.h" // for GENOPER_ constants #include #include #include // for min and max attributes #include #ifdef DMALLOC #include #endif #define BREAK_WHEN_REP_COUNTER_NULL //see comments where it is used #define TREAT_BAD_CONNECTIONS_AS_INVALID_GENO //see comments where it is used void rolling_dec(double *v) { *v -= 0.7853; // 0.7853981 45 degrees = pi/4 like in f1 } void rolling_inc(double *v) { *v += 0.7853; // 0.7853981 45 degrees } f4_Cell::f4_Cell(int nnr, f4_Cell *ndad, int nangle, GeneProps newP) { nr = nnr; type = CELL_UNDIFF; dadlink = ndad; org = NULL; genot = NULL; gcur = old_gcur = NULL; repeat.clear(); //genoRange.clear(); -- implicit anglepos = nangle; commacount = 0; childcount = 0; P = newP; rolling = 0; xrot = 0; zrot = 0; //OM = Orient_1; inertia = 0.8; force = 0.04; sigmo = 2; conns_count = 0; // adjust firstend and OM if there is a stick dad if (ndad != NULL) { // make sure it is a stick (and not a stick f4_Cell!) if (ndad->type == CELL_STICK) { //firstend = ndad->lastend; //OM = ndad->OM; ndad->childcount++; } if (ndad->type == CELL_NEURON) { inertia = ndad->inertia; force = ndad->force; sigmo = ndad->sigmo; } } // adjust lastend //lastend = firstend + ((Orient)OM * (Pt3D(1,0,0) * P.len)); mz = 1; } f4_Cell::f4_Cell(f4_Cells *nO, int nnr, f4_Node *ngeno, f4_Node *ngcur, f4_Cell *ndad, int nangle, GeneProps newP) { nr = nnr; type = CELL_UNDIFF; dadlink = ndad; org = nO; genot = ngeno; gcur = old_gcur = ngcur; repeat.clear(); //genoRange.clear(); -- implicit // preserve geno range of parent cell if (NULL != ndad) genoRange.add(ndad->genoRange); anglepos = nangle; commacount = 0; childcount = 0; P = newP; rolling = 0; xrot = 0; zrot = 0; //OM = Orient_1; inertia = 0.8; force = 0.04; sigmo = 2; conns_count = 0; // adjust firstend and OM if there is a stick dad if (ndad != NULL) { // make sure it is a stick (and not a stick f4_Cell!) if (ndad->type == CELL_STICK) { //firstend = ndad->lastend; //OM = ndad->OM; ndad->childcount++; } if (ndad->type == CELL_NEURON) { inertia = ndad->inertia; force = ndad->force; sigmo = ndad->sigmo; } } // adjust lastend //lastend = firstend + ((Orient)OM * (Pt3D(1,0,0) * P.len)); mz = 1; } f4_Cell::~f4_Cell() { // remove connections if (conns_count) { int i; for (i = conns_count - 1; i >= 0; i--) delete conns[i]; conns_count = 0; } } void f4_Cell::oneStep() { while (gcur != NULL) { //DB( printf(" %d (%d) executing '%c' %d\n", name, type, gcur->name, gcur->pos); ) // currently this is the last one processed // the current genotype code is processed //genoRange.add(gcur->pos,gcur->pos+gcur->name.length()-1); // To detect what genes are valid neuroclass names, but do NOT have is_neuroclass==true // (just as a curiosity to ensure we properly distinguish between, for example, the "G" neuron and the "G" modifier): //char *TMP = (char*)gcur->name.c_str(); //if (gcur->is_neuroclass==false && GenoOperators::parseNeuroClass(TMP, ModelEnum::SHAPETYPE_BALL_AND_STICK)) // printf("Could be a valid neuroclass, but is_neuroclass==false: %s\n", gcur->name.c_str()); if (gcur->neuclass == NULL) //not a neuron { if (gcur->name.length() > 1) logPrintf("f4_Cell", "oneStep", LOG_WARN, "Multiple-character code that is not a neuron class name: '%s'", gcur->name.c_str()); //let's see an example of such a code... genoRange.add(gcur->pos, gcur->pos); char name = gcur->name[0]; switch (name) { case '<': { // cell division! //DB( printf(" div! %d\n", name); ) // error: sticks cannot divide if (type == CELL_STICK) { // cannot fix org->setError(gcur->pos); return; // error code set -> stop further cells development } // undiff divides if (type == CELL_UNDIFF) { // commacount is set only when daughter turns into X // daughter cell // adjust new len GeneProps newP = P; newP.propagateAlong(false); f4_Cell *tmp = new f4_Cell(org, org->cell_count, genot, gcur->child2, this, commacount, newP); tmp->repeat = repeat; repeat.clear(); org->addCell(tmp); } // a neuron divides: create a new, duplicate connections if (type == CELL_NEURON) { // daughter cell f4_Cell *tmp = new f4_Cell(org, org->cell_count, genot, gcur->child2, // has the same dadlink this->dadlink, commacount, P); tmp->repeat = repeat; repeat.clear(); // it is a neuron from start tmp->type = CELL_NEURON; // it has the same type as the parent neuron tmp->neuclass = neuclass; // duplicate connections f4_CellConn *conn; for (int i = 0; i < conns_count; i++) { conn = conns[i]; tmp->addConnection(conn->from, conn->weight); } org->addCell(tmp); } // adjustments for this cell gcur = gcur->child; return; // error code not set -> halt this development and yield to other cells to develop } case '>': { // finish // see if there is a repeat count if (repeat.top > 0) { // there is a repeat counter if (!repeat.first()->isNull()) { // repeat counter is not null repeat.first()->dec(); if (repeat.first()->count > 0) { // return to repeat gcur = repeat.first()->node->child; } else { // continue gcur = repeat.first()->node->child2; repeat.pop(); } break; } else { repeat.pop(); // MacKo 2023-04: originally, there was no "break" nor "return" here (hence [[fallthrough]]; needed below for modern compilers) - not sure if this was intentional or overlooking. // This case can be tested with "#0" in the genotype. Anyway, there seems to be no difference in outcomes with and without "break". // However, falling through [[fallthrough]] below for count==0 causes performing repeat.push(repeat_ptr(gcur, 0)) while the very reason // we are here is that repeat count==0 (one of the conditions for isNull()), so I opted to add "break", but marked this tentative decision using #define. // The ultimate informed decision would require understanding all the logic and testing all the edge cases. #ifdef BREAK_WHEN_REP_COUNTER_NULL break; #endif } } else { // error: still undiff if (type == CELL_UNDIFF) { // fix it: insert an 'X' f4_Node *insertnode = new f4_Node("X", NULL, gcur->pos); if (org->setRepairInsert(gcur->pos, gcur, insertnode)) // not in repair mode, release delete insertnode; return; // error code set -> stop further cells development } repeat.clear(); // eat up rest int remaining_nodes = gcur->count() - 1; if (remaining_nodes > 0) logPrintf("f4_Cell", "oneStep", LOG_WARN, "Ignoring junk genetic code: %d node(s) at position %d", remaining_nodes, gcur->child->pos); //let's see an example of such a genotype... gcur = NULL; return; // done development } } #ifndef BREAK_WHEN_REP_COUNTER_NULL [[fallthrough]]; #endif case '#': { // repetition marker if (repeat.top >= repeat_stack::stackSize) { // repeat pointer stack is full, cannot remember this one. // fix: delete it org->setRepairRemove(gcur->pos, gcur); return; // error code set -> stop further cells development } repeat.push(repeat_ptr(gcur, gcur->reps)); gcur = gcur->child; break; } case ',': { commacount++; gcur = gcur->child; break; } case 'r': case 'R': { // error: if neuron if (type == CELL_NEURON) { // fix: delete it org->setRepairRemove(gcur->pos, gcur); return; // error code set -> stop further cells development } switch (name) { case 'r': rolling_dec(&rolling); break; case 'R': rolling_inc(&rolling); break; } gcur = gcur->child; break; } case 'l': case 'L': case 'c': case 'C': case 'q': case 'Q': case 'a': case 'A': case 'i': case 'I': case 's': case 'S': case 'm': case 'M': case 'f': case 'F': case 'w': case 'W': case 'e': case 'E': case 'd': case 'D': case 'g': case 'G': case 'b': case 'B': case 'h': case 'H': { // error: if neuron if (type == CELL_NEURON) //some neurons have the same single-letter names as modifiers (for example G,S,D), but they are supposed to have is_neuroclass==true so they should indeed not be handled here {//however, what we see here is actually modifiers such as IdqEbWL (so not valid neuroclasses) that occurred within an already differentiated cell of type==CELL_NEURON. //printf("Handled as a modifier, but type==CELL_NEURON: '%c'\n", name); // fix: delete it org->setRepairRemove(gcur->pos, gcur); return; // error code set -> stop further cells development } P.executeModifier(name); gcur = gcur->child; break; } case 'X': { // turn undiff. cell into a stick // error: already differentiated if (type != CELL_UNDIFF) { // fix: delete this node org->setRepairRemove(gcur->pos, gcur); return; // error code set -> stop further cells development } type = CELL_STICK; // fix dad commacount and own anglepos if (dadlink != NULL) { dadlink->commacount++; anglepos = dadlink->commacount; } // change of type halts developments, see comment at 'neuclasshandler' below gcur = gcur->child; return; // error code not set -> halt this development and yield to other cells to develop } case '[': { // connection to neuron // error: not a neuron if (type != CELL_NEURON) { // fix: delete it org->setRepairRemove(gcur->pos, gcur); return; // error code set -> stop further cells development } // input [%d:%g] int relfrom = gcur->conn_from; double weight = gcur->conn_weight; f4_Cell *neu_from = NULL; // input from other neuron // find neuron at relative i // find own index int this_index = 0, neu_counter = 0; for (int i = 0; i < org->cell_count; i++) { if (org->C[i]->type == CELL_NEURON) neu_counter++; if (org->C[i] == this) { this_index = neu_counter - 1; break; } } // find index of incoming int from_index = this_index + relfrom; if (from_index < 0) goto wait_conn; if (from_index >= org->cell_count) goto wait_conn; // find that neuron neu_counter = 0; int from; for (from = 0; from < org->cell_count; from++) { if (org->C[from]->type == CELL_NEURON) neu_counter++; if (from_index == (neu_counter - 1)) break; } if (from >= org->cell_count) goto wait_conn; neu_from = org->C[from]; // add connection // error: could not add connection (too many?) if (addConnection(neu_from, weight)) { // cannot fix org->setError(gcur->pos); return; // error code set -> stop further cells development } gcur = gcur->child; break; } wait_conn: { // wait for other neurons to develop if (!org->development_stagnation) // other cells are developing, the situation is changing, we may continue waiting... return; // error code not set -> halt this development and yield to other cells to develop //no cells are developing and we are waiting, but there is no chance other cells will create neurons we are waiting for, so we are forced to move on. #ifdef TREAT_BAD_CONNECTIONS_AS_INVALID_GENO // MacKo 2023-04: there were so many invalid connections accumulating in the genotype (and stopping processing of the chain of gcur->child) that it looks like treating them as errors is better... in 2000's, Framsticks neurons were flexible when it comes to inputs and outputs (for example, when asked, muscles would provide an output too, and neurons that ignored inputs would still accept them when connected) so f4 could create connections pretty randomly, but after 2000's we attempt to respect neurons' getPreferredInputs() and getPreferredOutput() so the network of connections has more constraints. if (gcur->parent->name == "#") { // MacKo 2023-04: Unfortunately the logic of multiplicating connections is not ideal... //TREAT_BAD_CONNECTIONS_AS_INVALID_GENO without this "#" exception would break /*4*/N:N#5<[1:1]> // because every neuron wants to get an input from the neuron that will be created next // and all is fine until the last created neuron, which wants to get an input from another one which will not be created // (3 gets from 4, 4 gets from 5, 5 wants to get from 6 (relative connection offset for each of them is 1), // but 6 will not get created and if we want to TREAT_BAD_CONNECTIONS_AS_INVALID_GENO, we produce an error... // We would like to have this multiplication working, but OTAH we don't want to accept bad connections because then they tend to multiply as junk genes and bloat the genotype also causing more and more neutral mutations... //so this condition and checking for "#" is a simple way to be kind to some, but not all, bad connections, and not raise errors. Perhaps too kind and we open the door for too many cases with invalid connections. //Maybe it would be better to perform this check before addConnection(), seeing that for example we process the last iteration of the repetition counter? But how would we know that the (needed here) input neuron will not be developed later by other dividing cells... gcur = gcur->child; org->development_stagnation = false; //do not force other potentially waiting cells to hurry and act in this development cycle (which would be the last cycle if development_stagnation stayed true); we just acted and because of this the situation may change, so they can wait until another development_stagnation is detected return; // error code not set -> halt this development and yield to other cells to develop } else { //org->setError(gcur->pos); //in case setRepairRemove() would not always produce reasonable results org->setRepairRemove(gcur->pos, gcur); //produces unexpected results? or NOT? TODO verify, some genotypes earlier produced strange outcomes of this repair (produced a valid genotype, but some neurons were multiplied/copied after repair - maybe because when a branch of '<' (or something else) is missing, the other branch is copied?) return; // error code set -> stop further cells development } #else // no more actives, cannot add connection, ignore, but treat not as an error - before 2023-04 gcur = gcur->child; #endif } break; case ':': { // neuron parameter // error: not a neuron if (type != CELL_NEURON) { // fix: delete it org->setRepairRemove(gcur->pos, gcur); return; // error code set -> stop further cells development } switch (gcur->prop_symbol) { case '!': if (gcur->prop_increase) force += (1.0 - force) * 0.2; else force -= force * 0.2; break; case '=': if (gcur->prop_increase) inertia += (1.0 - inertia) * 0.2; else inertia -= inertia * 0.2; break; case '/': if (gcur->prop_increase) sigmo *= 1.4; else sigmo /= 1.4; break; default: org->setRepairRemove(gcur->pos, gcur); return; // error code set -> stop further cells development } gcur = gcur->child; break; } case ' ': case '\t': case '\n': case '\r': { // whitespace has no effect, should not occur // fix: delete it org->setRepairRemove(gcur->pos, gcur); return; // error code set -> stop further cells development } default: { // error: unknown code string buf = "Unknown code '" + gcur->name + "'"; logMessage("f4_Cell", "oneStep", LOG_ERROR, buf.c_str()); org->setRepairRemove(gcur->pos, gcur); return; // error code set -> stop further cells development } } } else { genoRange.add(gcur->pos, gcur->pos + int(gcur->name.length()) + 2 - 1); // +2 for N: if (type != CELL_UNDIFF) { // fix: delete this node org->setRepairRemove(gcur->pos, gcur); return; // error code set -> stop further cells development } // error: if no previous if (dadlink == NULL) { // fix: delete it org->setRepairRemove(gcur->pos, gcur); return; // error code set -> stop further cells development } neuclass = gcur->neuclass; type = CELL_NEURON; // change of type also halts development, to give other // cells a chance for adjustment. Namely, it is important // to wait for other cells to turn to neurons before adding connections gcur = gcur->child; return; // error code not set -> halt this development and yield to other cells to develop } } } int f4_Cell::addConnection(f4_Cell *nfrom, double nweight) { if (nfrom->neuclass->getPreferredOutput() == 0) return -1; // if incoming neuron does not produce output, return error if (neuclass->getPreferredInputs() != -1 && conns_count >= neuclass->getPreferredInputs()) return -1; //cannot add more inputs to this neuron if (conns_count >= F4_MAX_CELL_INPUTS - 1) return -1; // over hardcoded limit conns[conns_count] = new f4_CellConn(nfrom, nweight); conns_count++; return 0; } void f4_Cell::adjustRec() { //f4_OrientMat rot; int i; if (recProcessedFlag) // already processed return; // mark it processed recProcessedFlag = 1; // make sure its parent is processed first if (dadlink != NULL) dadlink->adjustRec(); // count children childcount = 0; for (i = 0; i < org->cell_count; i++) { if (org->C[i]->dadlink == this) if (org->C[i]->type == CELL_STICK) childcount++; } if (type == CELL_STICK) { if (dadlink == NULL) { //firstend = Pt3D_0; // rotation due to rolling xrot = rolling; mz = 1; } else { //firstend = dadlink->lastend; GeneProps Pdad = dadlink->P; GeneProps Padj = Pdad; Padj.propagateAlong(false); //rot = Orient_1; // rotation due to rolling xrot = rolling + // rotation due to twist Pdad.twist; if (dadlink->commacount <= 1) { // rotation due to curvedness zrot = Padj.curvedness; } else { zrot = Padj.curvedness + (anglepos * 1.0 / (dadlink->commacount + 1) - 0.5) * M_PI * 2.0; } //rot = rot * f4_OrientMat(yOz, xrot); //rot = rot * f4_OrientMat(xOy, zrot); // rotation relative to parent stick //OM = rot * OM; // rotation in world coordinates //OM = ((f4_OrientMat)dadlink->OM) * OM; mz = dadlink->mz / dadlink->childcount; } //Pt3D lastoffset = (Orient)OM * (Pt3D(1,0,0)*P.len); //lastend = firstend + lastoffset; } } f4_CellConn::f4_CellConn(f4_Cell *nfrom, double nweight) { from = nfrom; weight = nweight; } f4_Cells::f4_Cells(f4_Node *genome, bool nrepair) { repair = nrepair; errorcode = GENOPER_OK; errorpos = -1; repair_remove = NULL; repair_parent = NULL; repair_insert = NULL; tmpcel = NULL; // create ancestor cell C[0] = new f4_Cell(this, 0, genome, genome, NULL, 0, GeneProps::standard_values); cell_count = 1; development_stagnation = false; } f4_Cells::~f4_Cells() { // release cells if (cell_count) { for (int i = cell_count - 1; i >= 0; i--) delete C[i]; cell_count = 0; } } bool f4_Cells::oneStep() { int old_cell_count = cell_count; //cell_count may change in the loop as new cells may be appended because cells may be dividing for (int i = 0; i < old_cell_count; i++) C[i]->old_gcur = C[i]->gcur; for (int i = 0; i < old_cell_count; i++) { C[i]->oneStep(); if (errorcode != GENOPER_OK) return false; // error -> end development } if (cell_count != old_cell_count) //the number of cells changed - something is going on! return true; //so continue development! for (int i = 0; i < old_cell_count; i++) if (C[i]->old_gcur != C[i]->gcur) // genotype execution pointer changed - something is going on! return true; //so continue development! //the same number of cells, no progress in development in any cell -> stagnation! if (development_stagnation) // stagnation was already detected in the previous step, so end development! { for (int i = 0; i < cell_count; i++) if (C[i]->gcur != NULL) // genotype execution pointer did not reach the end logPrintf("f4_Cells", "oneStep", LOG_WARN, "Finishing the development of cells due to stagnation, but cell %d did not reach the end of development", i); //let's see an example of such a genotype and investigate... return false; //end development } else { development_stagnation = true; //signal (force) f4_Cell's that wait for neural connection development to make a step, because all cells stagnated and waiting cells cannot hope for new neurons to be created return true; //one grace step. If there are some waiting cells, they must move on in the next step and set development_stagnation=false or set error. If development_stagnation is not set to false, we will finish development in the next step. This grace step may be unnecessary if there are no waiting cells, but we have no easy way to check this from here (although we could check if all cells' gcur==NULL... would this be always equivalent? Maybe some cells may stagnate with gcur!=NULL and they are not waiting for neural connections to develop and this does not mean an error? Added LOG_WARN above to detect such cases. Anyway, for gcur==NULL, f4_Cell.oneStep() exits immediately, so one grace step is not a big overhead.) } } int f4_Cells::simulate() { const bool PRINT_CELLS_DEVELOPMENT = false; //print the state of cells errorcode = GENOPER_OK; development_stagnation = false; //will be detected by oneStep() if (PRINT_CELLS_DEVELOPMENT) f4_Node::print_tree(C[0]->genot, 0); if (PRINT_CELLS_DEVELOPMENT) print_cells("Initialization"); // execute oneStep() in a cycle while (oneStep()) if (PRINT_CELLS_DEVELOPMENT) print_cells("Development step"); if (PRINT_CELLS_DEVELOPMENT) print_cells("After last development step"); if (errorcode != GENOPER_OK) return errorcode; // fix neuron attachements for (int i = 0; i < cell_count; i++) { if (C[i]->type == CELL_NEURON) { while (C[i]->dadlink->type == CELL_NEURON) { C[i]->dadlink = C[i]->dadlink->dadlink; } } } // there should be no undiff. cells // make undifferentiated cells sticks for (int i = 0; i < cell_count; i++) { if (C[i]->type == CELL_UNDIFF) { C[i]->type = CELL_STICK; //setError(); } } // recursive adjust // reset recursive traverse flags for (int i = 0; i < cell_count; i++) C[i]->recProcessedFlag = 0; // process every cell for (int i = 0; i < cell_count; i++) C[i]->adjustRec(); //DB( printf("Cell simulation done, %d cells. \n", nc); ) if (PRINT_CELLS_DEVELOPMENT) print_cells("Final"); return errorcode; } void f4_Cells::print_cells(const char* description) { printf("------ %-55s ------ errorcode=%d, errorpos=%d\n", description, getErrorCode(), getErrorPos()); for (int i = 0; i < cell_count; i++) { f4_Cell *c = C[i]; string type; switch (c->type) { case CELL_UNDIFF: type = "undiff"; break; case CELL_STICK: type = "STICK"; break; case CELL_NEURON: type = string("NEURON:") + c->neuclass->name.c_str(); break; default: type = std::to_string(c->type); } const char *status = c->gcur == c->old_gcur ? (c->gcur != NULL ? "no progress" : "") : (c->gcur != NULL ? "progress" : "finished"); //progress or no progress means the cell is yielding = not finished but decided to halt development and wait for other cells. New cells may be created in case of "no progress" status. printf("%2d(%-8s) nr=%d \t type=%-15s \t genot=%s \t gcurrent=%s", i, status, c->nr, type.c_str(), c->genot->name.c_str(), c->gcur ? c->gcur->name.c_str() : "null"); if (c->gcur && c->gcur->name == "[") printf("\tfrom=%d weight=%g", c->gcur->conn_from, c->gcur->conn_weight); printf("\n"); for (int l = 0; l < c->conns_count; l++) printf("\tconn:%d from=%d weight=%g\n", l, c->conns[l]->from->nr, c->conns[l]->weight); } printf("\n"); } void f4_Cells::addCell(f4_Cell *newcell) { if (cell_count >= F4_MAX_CELLS - 1) { delete newcell; return; } C[cell_count] = newcell; cell_count++; } void f4_Cells::setError(int nerrpos) { errorcode = GENOPER_OPFAIL; errorpos = nerrpos; } void f4_Cells::setRepairRemove(int nerrpos, f4_Node *to_remove) { errorcode = GENOPER_REPAIR; errorpos = nerrpos; if (!repair) { // not in repair mode, treat as repairable error } else { repair_remove = to_remove; } } int f4_Cells::setRepairInsert(int nerrpos, f4_Node *parent, f4_Node *to_insert) { errorcode = GENOPER_REPAIR; errorpos = nerrpos; if (!repair) { // not in repair mode, treat as repairable error return -1; } else { repair_parent = parent; repair_insert = to_insert; return 0; } } void f4_Cells::repairGeno(f4_Node *geno, int whichchild) { // assemble repaired geno, if the case if (!repair) return; if ((repair_remove == NULL) && (repair_insert == NULL)) return; // traverse genotype tree, remove / insert node f4_Node *g2; if (whichchild == 1) g2 = geno->child; else g2 = geno->child2; if (g2 == NULL) return; if (g2 == repair_remove) { f4_Node *oldgeno; geno->removeChild(g2); if (g2->child) { // add g2->child as child to geno if (whichchild == 1) geno->child = g2->child; else geno->child2 = g2->child; g2->child->parent = geno; } oldgeno = g2; oldgeno->child = NULL; delete oldgeno; if (geno->child == NULL) return; // check this new repairGeno(geno, whichchild); return; } if (g2 == repair_parent) { geno->removeChild(g2); geno->addChild(repair_insert); repair_insert->parent = geno; repair_insert->child = g2; repair_insert->child2 = NULL; g2->parent = repair_insert; } // recurse if (g2->child) repairGeno(g2, 1); if (g2->child2) repairGeno(g2, 2); } void f4_Cells::toF1Geno(SString &out) { if (tmpcel) delete tmpcel; tmpcel = new f4_Cell(-1, NULL, 0, GeneProps::standard_values); out = ""; toF1GenoRec(0, out); delete tmpcel; } void f4_Cells::toF1GenoRec(int curc, SString &out) { if (curc >= cell_count) return; if (C[curc]->type != CELL_STICK) return; f4_Cell *thisti = C[curc]; if (thisti->dadlink != NULL) *tmpcel = *(thisti->dadlink); // adjust length, curvedness, etc. tmpcel->P.propagateAlong(false); while (tmpcel->P.length > thisti->P.length) { tmpcel->P.executeModifier('l'); out += "l"; } while (tmpcel->P.length < thisti->P.length) { tmpcel->P.executeModifier('L'); out += "L"; } while (tmpcel->P.curvedness > thisti->P.curvedness) { tmpcel->P.executeModifier('c'); out += "c"; } while (tmpcel->P.curvedness < thisti->P.curvedness) { tmpcel->P.executeModifier('C'); out += "C"; } while (thisti->rolling > 0.0f) { rolling_dec(&(thisti->rolling)); out += "R"; } while (thisti->rolling < 0.0f) { rolling_inc(&(thisti->rolling)); out += "r"; } // output X for this stick out += "X"; // neurons attached to it for (int i = 0; i < cell_count; i++) { if (C[i]->type == CELL_NEURON) { if (C[i]->dadlink == thisti) { f4_Cell *thneu = C[i]; out += "["; out += thneu->neuclass->name.c_str(); if (thneu->conns_count > 0) out += ", "; // connections for (int j = 0; j < thneu->conns_count; j++) { if (j > 0) out += ", "; char buf[100]; sprintf(buf, "%d", thneu->conns[j]->from->nr - thneu->nr); out += buf; out += ":"; // connection weight sprintf(buf, "%g", thneu->conns[j]->weight); out += buf; } out += "]"; } } } // sticks connected to it if (thisti->commacount >= 2) out += "("; int ccount = 1; for (int i = 0; i < cell_count; i++) { if (C[i]->type == CELL_STICK) { if (C[i]->dadlink == thisti) { while (ccount < (C[i])->anglepos) { ccount++; out += ","; } toF1GenoRec(i, out); } } } while (ccount < thisti->commacount) { ccount++; out += ","; } if (thisti->commacount >= 2) out += ")"; } // to organize an f4 genotype in a tree structure f4_Node::f4_Node() { name = "?"; parent = NULL; child = NULL; child2 = NULL; pos = -1; reps = 0; prop_symbol = '\0'; prop_increase = false; conn_from = 0; conn_weight = 0.0; neuclass = NULL; } f4_Node::f4_Node(string nname, f4_Node *nparent, int npos) { name = nname; parent = nparent; child = NULL; child2 = NULL; pos = npos; if (parent) parent->addChild(this); reps = 0; prop_symbol = '\0'; prop_increase = false; conn_from = 0; conn_weight = 0.0; neuclass = NULL; } f4_Node::f4_Node(char nname, f4_Node *nparent, int npos) { name = nname; parent = nparent; child = NULL; child2 = NULL; pos = npos; if (parent) parent->addChild(this); reps = 0; prop_symbol = '\0'; prop_increase = false; conn_from = 0; conn_weight = 0.0; neuclass = NULL; } f4_Node::~f4_Node() { destroy(); } void f4_Node::print_tree(const f4_Node *root, int indent) { for (int i = 0; i < indent; i++) printf(" "); printf("%s%s%s (%d)", root->neuclass != NULL ? "N:" : "", root->name.c_str(), root->name == "#" ? std::to_string(root->reps).c_str() : "", root->count() - 1); if (root->name == "[") printf(" from=%-3d weight=%g", root->conn_from, root->conn_weight); printf("\n"); if (root->child) print_tree(root->child, indent + 1); if (root->child2) print_tree(root->child2, indent + 1); } int f4_Node::addChild(f4_Node *nchi) { if (child == NULL) { child = nchi; return 0; } if (child2 == NULL) { child2 = nchi; return 0; } return -1; } int f4_Node::removeChild(f4_Node *nchi) { if (nchi == child2) { child2 = NULL; return 0; } if (nchi == child) { child = NULL; return 0; } return -1; } int f4_Node::childCount() { return int(child != NULL) + int(child2 != NULL); //0, 1 or 2 } int f4_Node::count() const { int c = 1; if (child != NULL) c += child->count(); if (child2 != NULL) c += child2->count(); return c; } f4_Node* f4_Node::ordNode(int n) { int n1; if (n == 0) return this; n--; if (child != NULL) { n1 = child->count(); if (n < n1) return child->ordNode(n); n -= n1; } if (child2 != NULL) { n1 = child2->count(); if (n < n1) return child2->ordNode(n); n -= n1; } return NULL; } f4_Node* f4_Node::randomNode() { int n = count(); // pick a random node between 0 and n-1 return ordNode(rndUint(n)); } f4_Node* f4_Node::randomNodeWithSize(int mn, int mx) { // try random nodes, and accept if size in range // limit to maxlim tries int i, n, maxlim; f4_Node *nod = NULL; maxlim = count(); for (i = 0; i < maxlim; i++) { nod = randomNode(); n = nod->count(); if ((n >= mn) && (n <= mx)) return nod; } // failed, doesn't matter return nod; } void f4_Node::sprint(SString& out) { char buf2[20]; // special case: repetition code if (name == "#") { out += "#"; sprintf(buf2, "%d", reps); out += buf2; } else { // special case: neuron connection if (name == "[") { out += "["; sprintf(buf2, "%d", conn_from); out += buf2; sprintf(buf2, ":%g]", conn_weight); out += buf2; } else if (name == ":") { sprintf(buf2, ":%c%c:", prop_increase ? '+' : '-', prop_symbol); out += buf2; } else if (neuclass != NULL) { out += "N:"; out += neuclass->name.c_str(); } else { out += name.c_str(); } } if (child != NULL) child->sprint(out); // if two children, make sure last char is a '>' if (childCount() == 2) if (out[0] == 0) out += ">"; else if (out[out.length() - 1] != '>') out += ">"; if (child2 != NULL) child2->sprint(out); // make sure last char is a '>' if (out[0] == 0) out += ">"; else if (out[out.length() - 1] != '>') out += ">"; } void f4_Node::sprintAdj(char *& buf) { unsigned int len; // build in a SString, with initial size SString out; out.reserve(int(strlen(buf)) + 2000); sprint(out); len = out.length(); // very last '>' can be omitted // MacKo 2023-05: after tightening parsing and removing a silent repair for missing '>' after '#', this is no longer always the case. // For genotypes using '#', removing trailing >'s makes them invalid: /*4*/X#1>> or /*4*/X#1#2>>> or /*4*/X#1#2#3>>>> etc. // Such invalid genotypes with missing >'s would then require silently adding >'s, but now stricter parsing and clear information about invalid syntax is preferred. // See also comments in f4_processRecur() case '#'. //if (len > 1) // if (out[len - 1] == '>') { (out.directWrite())[len - 1] = 0; out.endWrite(); }; //Macko 2023-04 "can be omitted" => was always removed in generated genotypes. // copy back to string // if new is longer, reallocate buf if (len + 1 > strlen(buf)) { buf = (char*)realloc(buf, len + 1); } strcpy(buf, out.c_str()); } f4_Node* f4_Node::duplicate() { f4_Node *copy; copy = new f4_Node(*this); copy->parent = NULL; // set later copy->child = NULL; copy->child2 = NULL; if (child != NULL) { copy->child = child->duplicate(); copy->child->parent = copy; } if (child2 != NULL) { copy->child2 = child2->duplicate(); copy->child2->parent = copy; } return copy; } void f4_Node::destroy() { // children are destroyed (recursively) through the destructor if (child2 != NULL) delete child2; if (child != NULL) delete child; } // scan genotype string and build a tree // return >1 for error (errorpos) int f4_processRecur(const char* genot, const int genot_len, int &pos_inout, f4_Node *parent) { static const char *all_modifiers_no_comma = F14_MODIFIERS; //I did experiments with added comma (see all_modifiers_for_simplify below) which had the advantage of commas not breaking sequences of modifiers, thus longer sequences of modifiers (including commas) could be simplified and genetic bloat was further reduced. But since we impose a limit on the number of modifier chars in GenoOperators::simplifiedModifiers(), it would also influence commas (e.g. no more than 8 commas per sequence), so in order to leave commas entirely unlimited let's exclude them from simplification. Note that currently 'X' or any other non-F14_MODIFIERS char also separates the sequence to be simplified, so if we wanted a really intensive simplification, it should occur during development, when we know precisely which genes influence each f4_Cell. //const char *Geno_f4::all_modifiers_for_simplify = F14_MODIFIERS ",\1"; //'\1' added to keep the number of chars even, avoid exceptions in logic and save the simple rule that the sequence is made of pairs (gene,contradictory gene), where a comma has no contradictory gene and \1 is unlikely to occur in the f4 genotype (and not allowed), so no risk it will cancel out a comma during simplification. f4_Node *par = parent; if (pos_inout >= genot_len) return genot_len + 1; while (pos_inout < genot_len) { const bool PRINT_PARSING_LOCATION = false; if (PRINT_PARSING_LOCATION) { printf("%s\n", genot); for (int i = 0; i < pos_inout; i++) printf(" "); printf("^\n"); } switch (genot[pos_inout]) { case '<': { f4_Node *node = new f4_Node("<", par, pos_inout); par = node; pos_inout++; //move after '<' int res = f4_processRecur(genot, genot_len, pos_inout, par); if (res) return res; if (pos_inout < genot_len) { res = f4_processRecur(genot, genot_len, pos_inout, par); if (res) return res; } else // ran out { //MacKo 2023-04, more strict behavior: instead of silent repair (no visible effect to the user, genotype stays invalid but is interpreted and reported as valid), we now point out where the error is. For example or return genot_len + 1; //old silent repair: //node = new f4_Node(">", par, genot_len - 1); } return 0; // OK } case '>': { new f4_Node(">", par, pos_inout); pos_inout++; //move after '>' return 0; // OK } case '#': { // repetition marker ExtValue reps; const char* end = reps.parseNumber(genot + pos_inout + 1, ExtPType::TInt); if (end == NULL) return pos_inout + 1; //error f4_Node *node = new f4_Node("#", par, pos_inout); //TODO here or elsewhere: gene mapping seems to map '#' but not the following number node->reps = reps.getInt(); // skip number pos_inout += end - (genot + pos_inout); int res = f4_processRecur(genot, genot_len, pos_inout, node); if (res) return res; if (pos_inout < genot_len) { res = f4_processRecur(genot, genot_len, pos_inout, node); if (res) return res; } else // ran out { return genot_len + 1; //MacKo 2023-04: report an error, better to be more strict instead of a silent repair (genotype stays invalid but is interpreted and reported as valid) with non-obvious consequences? //earlier approach - silently treating this problem (we don't ever see where the error is because it gets corrected in some way here, while parsing the genotype, and error location in the genotype is never reported): //node = new f4_Node(">", par, genot_len - 1); // Maybe TODO: check if this was needed and if this was really the best repair operation; could happen many times in succession for some genotypes even though they were only a result of f4 operators, not manually created... and the operators should not generate invalid genotypes, right? Or maybe crossover does? Seemed like too many #n's for closing >'s; removing #n or adding > helped. Examples (remove trailing >'s to make invalid): /*4*/X#1>> or /*4*/X#1#2>>> or /*4*/X#1#2#3>>>> etc. // So operators somehow don't do it properly sometimes? But F4_ADD_REP adds '>'... Maybe the rule to always remove final trailing '>' was responsible? (now commented out). Since the proper syntax for # is #n ...repcode... > ...endcode..., perhaps endcode also needs '>' as the final delimiter. If we have many #'s in the genotype and the final >'s are missing, in the earlier approach we would keep adding them here as needed to ensure the syntax is valid. If we don't add '>' here silently, they must be explicitly added or else the genotype is invalid. BUT this earlier approach here only handled the situation where the genotype ended prematurely; what about cases where '>' may be needed as delimiters for # in the middle of the genotype? Or does # always concern all genes until the end, unless explicitly delimited earlier? Perhaps, if the '>' endcode delimiters are not present in the middle of the genotype, we don't know where they should be so the earlier approach would always add them only at the end of the genotype? } return 0; // OK } case ' ': case '\n': case '\r': case '\t': { // whitespace: ignore pos_inout++; break; } case 'N': { int forgenorange = pos_inout; if (genot[pos_inout + 1] != ':') return pos_inout + 1; //error pos_inout += 2; //skipping "N:" unsigned int neuroclass_begin = pos_inout; char* neuroclass_end = (char*)genot + neuroclass_begin; NeuroClass *neuclass = GenoOperators::parseNeuroClass(neuroclass_end, ModelEnum::SHAPETYPE_BALL_AND_STICK); //advances neuroclass_end if (neuclass == NULL) return pos_inout + 1; //error pos_inout += neuroclass_end - genot - neuroclass_begin; string neutype = string(genot + neuroclass_begin, genot + pos_inout); f4_Node *node = new f4_Node(neutype, par, forgenorange); node->neuclass = neuclass; par = node; // if it continues with a colon that determines a neuron parameter (e.g. N:N:+=: ), then let the switch case for colon handle this break; } case ':': { // neuron parameter +! -! += -= +/ or -/ // in the future this could be generalized to all neuron properties, for example N:|:power:0.6:range:1.4, or can even use '=' or ',' instead of ':' if no ambiguity char prop_dir, prop_symbol, prop_end[2]; // prop_end is only to ensure that neuron parameter definition is completed if (sscanf(genot + pos_inout, ":%c%c%1[:]", &prop_dir, &prop_symbol, prop_end) != 3) // error: incorrect format return pos_inout + 1 + 1; if (prop_dir != '-' && prop_dir != '+') return pos_inout + 1 + 1; //error switch (prop_symbol) { case '!': case '=': case '/': break; default: return pos_inout + 1 + 1; //error } f4_Node *node = new f4_Node(":", par, pos_inout); node->prop_symbol = prop_symbol; node->prop_increase = prop_dir == '+' ? true : false; // + or - par = node; pos_inout += 4; //skipping :ds: break; } case '[': { double weight = 0; int relfrom; const char *end = parseConnection(genot + pos_inout, relfrom, weight); if (end == NULL) return pos_inout + 1; //error f4_Node *node = new f4_Node("[", par, pos_inout); node->conn_from = relfrom; node->conn_weight = weight; par = node; pos_inout += end - (genot + pos_inout); break; } default: // 'X' and ',' and all modifiers and also invalid symbols - add a node. For symbols that are not valid in f4, the cell development process will give the error or repair { //printf("any regular character '%c'\n", genot[pos_inout]); #define F4_SIMPLIFY_MODIFIERS //avoid long, redundant sequences like ...llmlIilImmimiimmimifmfl 0) //found modifiers { string simplified = GenoOperators::simplifiedModifiers(original); // add a node for each char in "simplified" for (size_t i = 0; i < simplified.length(); i++) { int pos = GenoOperators::strchrn0(genot + pos_inout, simplified[i]) - genot; //unnecessarily finding the same char, if it occurrs multiple times in simplified f4_Node *node = new f4_Node(simplified[i], par, pos); //location is approximate. In the simplification process we don't trace where the origin(s) of the simplified[i] gene were. We provide 'pos' as the first occurrence of simplified[i] (for example, all 'L' will have the same location assigned, but at least this is where 'L' occurred in the genotype, so in case of any modification of a node (repair, removal, whatever... even mapping of genes) the indicated gene will be one of the responsible ones) par = node; } pos_inout += advanced; } else // genot[pos_inout] is a character not present in all_modifiers_no_comma, so treat it as a regular individual char just as it would be without simplification { f4_Node *node = new f4_Node(genot[pos_inout], par, pos_inout); par = node; pos_inout++; } #else f4_Node *node = new f4_Node(genot[pos_inout], par, pos_inout); par = node; pos_inout++; #endif // F4_SIMPLIFY_MODIFIERS break; } } } // should end with a '>' if (par && par->name != ">") { //happens when pos_inout == genot_len //return pos_inout; //MacKo 2023-04: could report an error instead of silent repair, but repair operators only work in Cells (i.e., after the f4_Node tree has been parsed without errors and Cells can start developing) so we don't want to make a fatal error because of missing '>' here. Also after conversions from Cells to text, trailing '>' is deliberately removed... and also the simplest genotype is officially X, not X>. new f4_Node('>', par, genot_len - 1); } return 0; // OK } int f4_process(const char *genot, f4_Node *root) { int pos = 0; int res = f4_processRecur(genot, (int)strlen(genot), pos, root); if (res > 0) return res; //parsing error else if (genot[pos] != 0) return pos + 1; //parsing OK but junk, unparsed genes left, for example /*4*/N:N>whatever or /*4*/X>>> else return 0; //parsing OK and parsed until the end } const char* parseConnection(const char *fragm, int& relfrom, double &weight) { const char *parser = fragm; if (*parser != '[') return NULL; parser++; ExtValue val; parser = val.parseNumber(parser, ExtPType::TInt); if (parser == NULL) return NULL; relfrom = val.getInt(); if (*parser != ':') return NULL; parser++; parser = val.parseNumber(parser, ExtPType::TDouble); if (parser == NULL) return NULL; weight = val.getDouble(); if (*parser != ']') return NULL; parser++; return parser; } /* f4_Node* f4_processTree(const char* geno) { f4_Node *root = new f4_Node(); int res = f4_processRecur(geno, 0, root); if (res) return NULL; //DB( printf("test f4 "); ) DB( if (root->child) { char* buf = (char*)malloc(300); DB(printf("(%d) ", root->child->count());) buf[0] = 0; root->child->sprintAdj(buf); DB(printf("%s\n", buf);) free(buf); } ) return root->child; } */