// This file is a part of Framsticks SDK. http://www.framsticks.com/ // Copyright (C) 1999-2023 Maciej Komosinski and Szymon Ulatowski. // See LICENSE.txt for details. //TODO parsing quotes/neurons seems too relaxed, for example the genotype aag"S""acalaaaafbc is considered valid //TODO Same with numbers: "----1.23" is valid //TODO reconsider: Horizontal gene transfer - copying a single random gene from each parent to the beginning of the other parent: should the gene be copied (seems to cause bloat!) or rather moved? //Neurons ("N") can grow even without using quotes and providing neuron classname in the genotype, for example aaaaabbccvaaapdgfddaalaandwddbaajt (this works likely as designed, but investigate and reconsider); also valid neuron definitions inside the genotype are sometimes not expressed #include #include #include #include #include "fB_conv.h" #include "fB_general.h" #include "fB_oper.h" #include "../fH/fH_oper.h" #define FIELDSTRUCT Geno_fB static ParamEntry geno_fB_paramtab[] = { { "Genetics: fB", 3, FB_MUT_COUNT + FB_XOVER_COUNT, }, { "Genetics: fB: Mutation", }, { "Genetics: fB: Crossover", }, { "fB_mut_substitute", 1, 0, "Substitution", "f 0 100 1", FIELD(mutationprobs[FB_SUBSTITUTION]), "Relative probability of changing a single random character (or a neuron) in the genotype", }, { "fB_mut_insert", 1, 0, "Insertion", "f 0 100 3", FIELD(mutationprobs[FB_INSERTION]), "Relative probability of inserting a random character in a random place of the genotype", }, { "fB_mut_insert_neuron", 1, 0, "Insertion of a neuron", "f 0 100 3", FIELD(mutationprobs[FB_INSERTION_NEURON]), "Relative probability of inserting a neuron in a random place of genotype", }, { "fB_mut_delete", 1, 0, "Deletion", "f 0 100 4", FIELD(mutationprobs[FB_DELETION]), "Relative probability of deleting a random character (or a neuron) in the genotype", }, { "fB_mut_duplicate", 1, 0, "Duplication", "f 0 100 0", FIELD(mutationprobs[FB_DUPLICATION]), "Relative probability of copying a single *gene* of the genotype and appending it to the beginning of this genotype", }, { "fB_mut_translocate", 1, 0, "Translocation", "f 0 100 4", FIELD(mutationprobs[FB_TRANSLOCATION]), "Relative probability of swapping two substrings in the genotype", }, { "fB_cross_gene_transfer", 2, 0, "Horizontal gene transfer", "f 0 100 0", FIELD(crossoverprobs[FB_GENE_TRANSFER]), "Relative probability of crossing over by copying a single random gene from each parent to the beginning of the other parent", }, { "fB_cross_crossover", 2, 0, "Crossing over", "f 0 100 100", FIELD(crossoverprobs[FB_CROSSING_OVER]), "Relative probability of crossing over by a random distribution of genes from both parents to both children", }, { 0, }, }; #undef FIELDSTRUCT Geno_fB::Geno_fB() { par.setParamTab(geno_fB_paramtab); par.select(this); par.setDefault(); supported_format = 'B'; } bool Geno_fB::hasStick(const SString &genotype) { for (int i = 0; i < fB_GenoHelpers::geneCount(genotype); i++) { int start, end; SString gene = fB_GenoHelpers::getGene(i, genotype, start, end); int endoffset = 0; if (gene.indexOf("zz", 0) != -1) endoffset = 2; if (gene.length() - endoffset < 3) { return true; // genes with length < 3 are always sticks } else if (gene[2] >= 'a' && gene[2] <= 'i') { return true; // gene within this range is stick } } return false; } int Geno_fB::checkValidity(const char *geno, const char *genoname) { // load genotype SString genotype(geno); SString line; int pos = 0; // if there is no genotype to load, then return error if (!genotype.getNextToken(pos, line, '\n')) { return pos + 1; } // extract dimensions int dims = 0; if (!ExtValue::parseInt(line.c_str(), dims, true, false)) { return 1; } // extract next token in order to check if next line starts with "aa" int genstart = genotype.indexOf("aa", 0); if (genstart != pos) { return pos + 1; } // check if rest of characters are lowercase for (int i = genstart; i < genotype.length(); i++) { if (!islower(genotype[i])) { if (genotype[i] == '"') { SString neuclassdef; int nextid = i + 1; if (!genotype.getNextToken(nextid, neuclassdef, '"')) { return i + 1; } Neuro *neu = new Neuro(); neu->setDetails(neuclassdef); bool isclass = neu->getClass() ? true : false; delete neu; if (!isclass) { return i + 1; } i = nextid; } else { return i + 1; } } } if (!hasStick(genotype)) { return 1; } return GENOPER_OK; } int Geno_fB::validate(char *&geno, const char *genoname) { // load genotype SString genotype(geno); SString strdims; int pos = 0; if (!genotype.getNextToken(pos, strdims, '\n')) { return GENOPER_OK; } // parse dimension int dims = 0; if (!ExtValue::parseInt(strdims.c_str(), dims, true, false)) { return GENOPER_OK; } SString line; bool fix = false; int genstart = genotype.indexOf("aa", 0); // if there is no "aa" codon in the beginning of a genotype, then add it if (genstart != pos) { genotype = strdims + "\naa" + genotype.substr(pos); fix = true; } for (int i = pos; i < genotype.length(); i++) { // if character is not alphabetic - error if (!isalpha(genotype[i])) { if (genotype[i] == '"') { SString neuclassdef; int nextid = i + 1; if (!genotype.getNextToken(nextid, neuclassdef, '"')) { return i + 1; } Neuro *neu = new Neuro(); neu->setDetails(neuclassdef); bool isclass = neu->getClass() ? true : false; delete neu; if (!isclass) { return i + 1; } i = nextid; } else { return GENOPER_OK; } } // if character is uppercase, then convert it to lowercase else if (isupper(genotype[i])) { genotype.directWrite()[i] = tolower(genotype[i]); fix = true; } } // if the genotype does not contain any stick - add it if (!hasStick(genotype)) { genotype = SString("aaazz") + genotype; } // if there were any changes - save them if (fix) { free(geno); geno = strdup(genotype.c_str()); } return GENOPER_OK; } SString Geno_fB::detokenizeSequence(std::list *tokenlist) { SString res = ""; for (std::list::iterator it = tokenlist->begin(); it != tokenlist->end(); it++) { res += (*it); } return res; } std::list Geno_fB::tokenizeSequence(const SString &genotype) { std::list res; int i = 0; while (i < genotype.length()) { // if character is not alphabetic - error if (isalpha(genotype[i])) { SString el = ""; el += genotype[i]; res.push_back(el); i++; } else { SString neuclassdef; i++; genotype.getNextToken(i, neuclassdef, '"'); SString ndef = "\""; ndef += neuclassdef; ndef += "\""; res.push_back(ndef); } } return res; } int Geno_fB::mutate(char *&geno, float &chg, int &method) { SString genotype(geno); SString strdims; int pos = 0; genotype.getNextToken(pos, strdims, '\n'); SString line; genotype.getNextToken(pos, line, '\n'); method = roulette(mutationprobs, FB_MUT_COUNT); switch (method) { case FB_SUBSTITUTION: { std::list tokenized = tokenizeSequence(line); int rndid = rndUint(tokenized.size()); // select random letter from genotype // increment/decrement character - when overflow happens, this method // uses the "reflect" approach std::list::iterator it = tokenized.begin(); std::advance(it, rndid); SString t = (*it); if ((*it).length() == 1) { if (rndUint(2) == 0) { if ((*it)[0] == 'a') (*it).directWrite()[0] = 'b'; else (*it).directWrite()[0] = (*it)[0] - 1; } else { if ((*it)[0] == 'z') (*it).directWrite()[0] = 'y'; else (*it).directWrite()[0] = (*it)[0] + 1; } chg = 1.0 / line.length(); } else { // first method needs to extract quotes SString def = (*it); def = def.substr(1, def.length() - 2); Geno_fH::mutateNeuronProperties(def); SString res = "\""; res += def; res += "\""; (*it) = res; chg = (double)def.length() / line.length(); } line = detokenizeSequence(&tokenized); break; } case FB_INSERTION_NEURON: { std::list tokenized = tokenizeSequence(line); std::list::iterator it = tokenized.begin(); int rndid = rndUint(tokenized.size()); // select random insertion point std::advance(it, rndid); NeuroClass *cls = getRandomNeuroClass(Model::SHAPETYPE_BALL_AND_STICK); if (cls) { SString classdef = cls->getName(); Geno_fH::mutateNeuronProperties(classdef); SString res = "\""; res += classdef; res += "\""; tokenized.insert(it, res); chg = (double)classdef.length() / line.length(); line = detokenizeSequence(&tokenized); break; } } [[fallthrough]]; case FB_INSERTION: { chg = 1.0 / line.length(); std::list tokenized = tokenizeSequence(line); int rndid = rndUint(tokenized.size()); // select random insertion point std::list::iterator it = tokenized.begin(); std::advance(it, rndid); SString letter = "a"; letter.directWrite()[0] = 'a' + rndUint(26); tokenized.insert(it, letter); line = detokenizeSequence(&tokenized); break; } case FB_DELETION: { chg = 1.0 / line.length(); std::list tokenized = tokenizeSequence(line); std::list::iterator it = tokenized.begin(); int rndid = rndUint(tokenized.size()); // select random deletion point std::advance(it, rndid); tokenized.erase(it); line = detokenizeSequence(&tokenized); break; } case FB_DUPLICATION: { int rndgene = rndUint(fB_GenoHelpers::geneCount(line)); int start, end; SString gene = fB_GenoHelpers::getGene(rndgene, line, start, end); if (gene.indexOf("zz", 0) == -1) gene += "zz"; chg = (float)gene.length() / line.length(); line = gene + line; break; } case FB_TRANSLOCATION: { std::list tokenized = tokenizeSequence(line); std::vector cuts(4); for (int i = 0; i < 4; i++) { cuts[i] = rndUint(tokenized.size()); } std::sort(cuts.begin(), cuts.end()); std::vector::iterator> iters(4); for (int i = 0; i < 4; i++) { iters[i] = tokenized.begin(); std::advance(iters[i], cuts[i]); } std::list res; res.insert(res.end(), tokenized.begin(), iters[0]); res.insert(res.end(), iters[2], iters[3]); res.insert(res.end(), iters[1], iters[2]); res.insert(res.end(), iters[0], iters[1]); res.insert(res.end(), iters[3], tokenized.end()); // SString first = line.substr(cuts[0], cuts[1] - cuts[0]); // SString second = line.substr(cuts[2], cuts[3] - cuts[2]); // SString result = line.substr(0, cuts[0]) + second + // line.substr(cuts[1], cuts[2] - cuts[1]) + first + line.substr(cuts[3]); line = detokenizeSequence(&res); chg = (float)(cuts[3] - cuts[2] + cuts[1] - cuts[0]) / line.length(); break; } } SString result = strdims + "\n" + line; free(geno); geno = strdup(result.c_str()); return GENOPER_OK; } int Geno_fB::crossOver(char *&g1, char *&g2, float& chg1, float& chg2) { SString p1(g1); SString p2(g2); int dims1 = 0, dims2 = 0; int pos = 0; SString strdims; p1.getNextToken(pos, strdims, '\n'); ExtValue::parseInt(strdims.c_str(), dims1, true, false); SString parent1; p1.getNextToken(pos, parent1, '\n'); pos = 0; p2.getNextToken(pos, strdims, '\n'); ExtValue::parseInt(strdims.c_str(), dims2, true, false); if (dims1 != dims2) { return GENOPER_OPFAIL; } SString parent2; p2.getNextToken(pos, parent2, '\n'); SString child1 = ""; SString child2 = ""; switch (roulette(crossoverprobs, FB_XOVER_COUNT)) { case FB_GENE_TRANSFER: { // get a random gene from the first parent int choice = rndUint(fB_GenoHelpers::geneCount(parent1)); int start, end; SString gene = fB_GenoHelpers::getGene(choice, parent1, start, end); // add this gene to the beginning of the second parent genotype child2 = gene + parent2; chg2 = (float)parent2.length() / (float)child2.length(); // do the same for the second parent choice = rndUint(fB_GenoHelpers::geneCount(parent2)); gene = fB_GenoHelpers::getGene(choice, parent2, start, end); child1 = gene + parent1; chg1 = (float)parent1.length() / (float)child1.length(); break; } // case FB_CROSSING_OVER: // { // // iterate through all genes of the first parent and assign them // // randomly to children // for (int i = 0; i < fB_GenoHelpers::geneCount(parent1); i++) // { // int start, end; // SString gene = fB_GenoHelpers::getGene(i, parent1, start, end); // if (rndUint(2) == 0) // { // child1 += gene; // chg1 += 1.0f; // } // else // { // child2 += gene; // } // } // chg1 /= fB_GenoHelpers::geneCount(parent1); // // // do the same with second parent // for (int i = 0; i < fB_GenoHelpers::geneCount(parent2); i++) // { // int start, end; // SString gene = fB_GenoHelpers::getGene(i, parent2, start, end); // if (rndUint(2) == 0) // { // child1 += gene; // } // else // { // child2 += gene; // chg2 += 1.0f; // } // } // chg2 /= fB_GenoHelpers::geneCount(parent2); // break; // } case FB_CROSSING_OVER: { // get maximal count of genes from both parents int maxgenecount = std::max(fB_GenoHelpers::geneCountNoNested(parent1), fB_GenoHelpers::geneCountNoNested(parent2)); // while there are genes in at least one genotype for (int i = 0; i < maxgenecount; i++) { SString to1 = "", to2 = ""; int start = 0, end = 0; // if both parents have genes available, then distribute them if (i < fB_GenoHelpers::geneCountNoNested(parent1) && i < fB_GenoHelpers::geneCountNoNested(parent2)) { if (rndUint(2) == 0) { to1 = fB_GenoHelpers::getNonNestedGene(i, parent1, start, end); to2 = fB_GenoHelpers::getNonNestedGene(i, parent2, start, end); chg1 += 1.0f; chg2 += 1.0f; } else { to1 = fB_GenoHelpers::getNonNestedGene(i, parent2, start, end); to2 = fB_GenoHelpers::getNonNestedGene(i, parent1, start, end); } } else if (i < fB_GenoHelpers::geneCountNoNested(parent1)) { if (rndUint(2) == 0) { to1 = fB_GenoHelpers::getNonNestedGene(i, parent1, start, end); chg1 += 1.0f; } else { to2 = fB_GenoHelpers::getNonNestedGene(i, parent1, start, end); } } else // if (i < fB_GenoHelpers::geneCountNoNested(parent2)) { if (rndUint(2) == 0) { to1 = fB_GenoHelpers::getNonNestedGene(i, parent2, start, end); } else { to2 = fB_GenoHelpers::getNonNestedGene(i, parent2, start, end); chg2 += 1.0f; } } child1 += to1; child2 += to2; } chg1 /= fB_GenoHelpers::geneCountNoNested(parent1); chg2 /= fB_GenoHelpers::geneCountNoNested(parent2); break; } } free(g1); free(g2); if (child1.length() > 0 && child2.length() == 0) { child1 = strdims + "\n" + child1; g1 = strdup(child1.c_str()); g2 = strdup(""); } else if (child2.length() > 0 && child1.length() == 0) { child2 = strdims + "\n" + child2; g1 = strdup(child2.c_str()); g2 = strdup(""); } else { child1 = strdims + "\n" + child1; child2 = strdims + "\n" + child2; g1 = strdup(child1.c_str()); g2 = strdup(child2.c_str()); } return GENOPER_OK; } uint32_t Geno_fB::style(const char *geno, int pos) { char ch = geno[pos]; if (isdigit(ch)) { while (pos > 0) { pos--; if (isdigit(geno[pos]) == 0) //going left we encountered some non-digit character { return GENSTYLE_CS(GENCOLOR_NUMBER, GENSTYLE_NONE); //so 'ch' is any digit in the genotype (neural property value etc.); for simplicity, digits as parts of neuroclass name or property name also get included here } } return GENSTYLE_RGBS(0, 0, 200, GENSTYLE_BOLD); //only digits up to the beginning, so this is the dimensionality value } if (ch == '-' || ch == '.') return GENSTYLE_CS(GENCOLOR_NUMBER, GENSTYLE_NONE); if (ch == '"') return GENSTYLE_RGBS(150, 0, 150, GENSTYLE_BOLD); //quotes encompass neuron definitions. To further distinguish the text inside quotes from the text outside quotes, we would need to determine the number of '"' from the beginning, i.e. linear search through the entire genotype. We don't want to do it - it would mean the complexity of len(geno)^2 if performed for each symbol in the genotype independently, like this function does. Below we perform an approximate partial scan. if (isupper(ch) || strchr("@|*", ch)) return GENSTYLE_RGBS(150, 0, 150, GENSTYLE_BOLD); //neuroclass if (strchr(":,=", ch)) return GENSTYLE_RGBS(150, 0, 150, GENSTYLE_NONE); //these symbols occur exclusively inside "...neuron...", so let's make the entire neuron section "...neuron..." more visually uniform by using the same violet color as the neuroclass name and quotes have if (islower(ch)) //how to color the current lower-case letter? { static const int SCAN_RANGE = 8; //how many characters before the current one to scan to discover some context and find out if we are likely in the neuroclass name or the property name. Reduces computational complexity. Example genotype fragments: abcabc"T:r=0.9, ry=4.088, rz=1.213"abcabc or abc"N:in=0.0, fo=0.17, si=999.0"abc int i = pos; while (i > 0 && pos - i < SCAN_RANGE) { i--; //go back one char if (isupper(geno[i])) return GENSTYLE_RGBS(150, 0, 150, GENSTYLE_BOLD); //neuroclass if (geno[i] == ',' || geno[i] == ':') //this is what must occur before property name starts return GENSTYLE_RGBS(255, 140, 0, GENSTYLE_BOLD); //property if (!(isalpha(geno[i]) || isspace(geno[i]))) //going left we encountered any char that is not a letter or space break; } } uint32_t style = GENSTYLE_CS(GENCOLOR_TEXT, GENSTYLE_NONE); //if the current character did not fall into any of the above cases, assume default black style if (ch == 'a' && (geno[pos + 1] == 'a' || (pos > 0 && geno[pos - 1] == 'a'))) //start codon, "aa" { style = GENSTYLE_RGBS(0, 200, 0, GENSTYLE_BOLD); } else if (ch == 'z' && (geno[pos + 1] == 'z' || (pos > 0 && geno[pos - 1] == 'z'))) //stop codon, "zz" { style = GENSTYLE_RGBS(200, 0, 0, GENSTYLE_BOLD); } return style; }