[780] | 1 | #include <frams/util/sstring.h> |
---|
| 2 | #include <vector> |
---|
| 3 | #include <frams/param/param.h> |
---|
| 4 | #include "fB_conv.h" |
---|
| 5 | #include "fB_general.h" |
---|
| 6 | #include "fB_oper.h" |
---|
| 7 | |
---|
| 8 | #define FIELDSTRUCT Geno_fB |
---|
| 9 | |
---|
| 10 | static ParamEntry GENOfBparam_tab[] = |
---|
| 11 | { |
---|
| 12 | { "Genetics: fB", 3, FB_MUT_COUNT + FB_XOVER_COUNT, }, // ask about it |
---|
| 13 | { "Genetics: fB: Mutation", }, |
---|
| 14 | { "Genetics: fB: Crossover", }, |
---|
| 15 | { "fB_mut_substitution", 1, 0, "Substitution", "f 0 1 0.6", FIELD(mutationprobs[FB_SUBSTITUTION]), "Probability of mutation by changing single random letter in genotype", }, |
---|
| 16 | { "fB_mut_insertion", 1, 0, "Insertion", "f 0 1 0.1", FIELD(mutationprobs[FB_INSERTION]), "Probability of mutation by inserting characters in random place of genotype", }, |
---|
| 17 | { "fB_mut_deletion", 1, 0, "Deletion", "f 0 1 0.1", FIELD(mutationprobs[FB_DELETION]), "Probability of mutation by deleting random characters in genotype", }, |
---|
| 18 | { "fB_mut_duplication", 1, 0, "Duplication", "f 0 1 0.05", FIELD(mutationprobs[FB_DUPLICATION]), "Probability of mutation by copying single *gene* of genotype and appending it to the beginning of this genotype", }, |
---|
| 19 | { "fB_mut_translocation", 1, 0, "Translocation", "f 0 1 0.15", FIELD(mutationprobs[FB_TRANSLOCATION]), "Probability of mutation by replacing two substrings in genotype", }, |
---|
| 20 | { "fB_cross_gene_transfer", 2, 0, "Horizontal gene transfer", "f 0 1 0.8", FIELD(crossoverprobs[FB_GENE_TRANSFER]), "Probability of crossing over by transferring single genes from both parents to beginning of each other", }, |
---|
| 21 | { "fB_cross_crossover", 2, 0, "Crossing over", "f 0 1 0.2", FIELD(crossoverprobs[FB_CROSSING_OVER]), "Probability of crossing over by random distribution of genes from both parents to both children", }, |
---|
| 22 | { 0, }, |
---|
| 23 | }; |
---|
| 24 | |
---|
| 25 | #undef FIELDSTRUCT |
---|
| 26 | |
---|
| 27 | Geno_fB::Geno_fB() |
---|
| 28 | { |
---|
| 29 | par.setParamTab(GENOfBparam_tab); |
---|
| 30 | par.select(this); |
---|
| 31 | par.setDefault(); |
---|
| 32 | supported_format = 'B'; |
---|
| 33 | } |
---|
| 34 | |
---|
| 35 | bool Geno_fB::hasStick(SString genotype) |
---|
| 36 | { |
---|
| 37 | for (int i = 0; i < fB_GenoHelpers::geneCount(genotype); i++) |
---|
| 38 | { |
---|
| 39 | int start, end; |
---|
| 40 | SString gene = fB_GenoHelpers::getGene(i, genotype, start, end); |
---|
| 41 | int endoffset = 0; |
---|
| 42 | if (gene.indexOf("zz", 0) != -1) endoffset = 2; |
---|
| 43 | if (gene.len() - endoffset < 3) |
---|
| 44 | { |
---|
| 45 | return true; // genes with length < 3 are always sticks |
---|
| 46 | } |
---|
| 47 | else if (gene[2] >= 'a' && gene[2] <= 'i') |
---|
| 48 | { |
---|
| 49 | return true; // gene within this range is stick |
---|
| 50 | } |
---|
| 51 | } |
---|
| 52 | return false; |
---|
| 53 | } |
---|
| 54 | |
---|
| 55 | int Geno_fB::checkValidity(const char *geno, const char *genoname) |
---|
| 56 | { |
---|
| 57 | // load genotype |
---|
| 58 | SString genotype(geno); |
---|
| 59 | SString line; |
---|
| 60 | int pos = 0; |
---|
| 61 | // if there is no genotype to load, then return error |
---|
| 62 | if (!genotype.getNextToken(pos, line, '\n')) |
---|
| 63 | { |
---|
| 64 | return pos + 1; |
---|
| 65 | } |
---|
| 66 | // extract dimensions |
---|
| 67 | int dims = 0; |
---|
| 68 | if (!ExtValue::parseInt(line.c_str(), dims, true, false)) |
---|
| 69 | { |
---|
| 70 | return 1; |
---|
| 71 | } |
---|
| 72 | // extract next token in order to check if next line starts with "aa" |
---|
| 73 | int genstart = genotype.indexOf("aa", 0); |
---|
| 74 | if (genstart != pos) |
---|
| 75 | { |
---|
| 76 | return pos + 1; |
---|
| 77 | } |
---|
| 78 | // check if rest of characters are lowercase |
---|
| 79 | for (int i = genstart; i < genotype.len(); i++) |
---|
| 80 | { |
---|
| 81 | if (!islower(genotype[i])) |
---|
| 82 | { |
---|
| 83 | return i + 1; |
---|
| 84 | } |
---|
| 85 | } |
---|
| 86 | if (!hasStick(genotype)) |
---|
| 87 | { |
---|
| 88 | return 1; |
---|
| 89 | } |
---|
| 90 | return GENOPER_OK; |
---|
| 91 | } |
---|
| 92 | |
---|
| 93 | int Geno_fB::validate(char *&geno, const char *genoname) |
---|
| 94 | { |
---|
| 95 | // load genotype |
---|
| 96 | SString genotype(geno); |
---|
| 97 | SString strdims; |
---|
| 98 | int pos = 0; |
---|
| 99 | if (!genotype.getNextToken(pos, strdims, '\n')) |
---|
| 100 | { |
---|
| 101 | return GENOPER_OPFAIL; |
---|
| 102 | } |
---|
| 103 | // parse dimension |
---|
| 104 | int dims = 0; |
---|
| 105 | if (!ExtValue::parseInt(strdims.c_str(), dims, true, false)) |
---|
| 106 | { |
---|
| 107 | return GENOPER_OPFAIL; |
---|
| 108 | } |
---|
| 109 | SString line; |
---|
| 110 | bool fix = false; |
---|
| 111 | int genstart = genotype.indexOf("aa", 0); |
---|
| 112 | // if there is no "aa" codon in the beginning of a genotype, then add it |
---|
| 113 | if (genstart != pos) |
---|
| 114 | { |
---|
| 115 | genotype = strdims + "\naa" + genotype.substr(pos); |
---|
| 116 | fix = true; |
---|
| 117 | } |
---|
| 118 | for (int i = pos; i < genotype.len(); i++) |
---|
| 119 | { |
---|
| 120 | // if character is not alphabetic - error |
---|
| 121 | if (!isalpha(genotype[i])) |
---|
| 122 | { |
---|
| 123 | return GENOPER_OPFAIL; |
---|
| 124 | } |
---|
| 125 | // if character is uppercase, then convert it to lowercase |
---|
| 126 | if (isupper(genotype[i])) |
---|
| 127 | { |
---|
| 128 | genotype.directWrite()[i] = tolower(genotype[i]); |
---|
| 129 | fix = true; |
---|
| 130 | } |
---|
| 131 | } |
---|
| 132 | // if the genotype does not contain any stick - add it |
---|
| 133 | if (!hasStick(genotype)) |
---|
| 134 | { |
---|
| 135 | genotype = SString("aaazz") + genotype; |
---|
| 136 | } |
---|
| 137 | // if there were any changes - save them |
---|
| 138 | if (fix) |
---|
| 139 | { |
---|
| 140 | free(geno); |
---|
| 141 | geno = strdup(genotype.c_str()); |
---|
| 142 | } |
---|
| 143 | return GENOPER_OK; |
---|
| 144 | } |
---|
| 145 | |
---|
| 146 | int Geno_fB::mutate(char *&geno, float &chg, int &method) |
---|
| 147 | { |
---|
| 148 | SString genotype(geno); |
---|
| 149 | SString strdims; |
---|
| 150 | int pos = 0; |
---|
| 151 | genotype.getNextToken(pos, strdims, '\n'); |
---|
| 152 | SString line; |
---|
| 153 | genotype.getNextToken(pos, line, '\n'); |
---|
| 154 | method = roulette(mutationprobs, FB_MUT_COUNT); |
---|
| 155 | switch (method) |
---|
| 156 | { |
---|
| 157 | case FB_SUBSTITUTION: |
---|
| 158 | { |
---|
| 159 | int rndid = randomN(line.len()); // select random letter from genotype |
---|
| 160 | // increment/decrement character - when overflow happens, this method |
---|
| 161 | // uses reflect method |
---|
| 162 | if (randomN(2) == 0) |
---|
| 163 | { |
---|
| 164 | if (line[rndid] == 'a') line.directWrite()[rndid] = 'b'; |
---|
| 165 | else line.directWrite()[rndid] = line[rndid] - 1; |
---|
| 166 | } |
---|
| 167 | else |
---|
| 168 | { |
---|
| 169 | if (line[rndid] == 'z') line.directWrite()[rndid] = 'y'; |
---|
| 170 | else line.directWrite()[rndid] = line[rndid] + 1; |
---|
| 171 | } |
---|
| 172 | chg = 1.0 / line.len(); |
---|
| 173 | break; |
---|
| 174 | } |
---|
| 175 | case FB_INSERTION: |
---|
| 176 | { |
---|
| 177 | chg = 1.0 / line.len(); |
---|
| 178 | int rndid = randomN(genotype.len()); // select random insertion point |
---|
| 179 | char letter = 'a' + randomN(26); |
---|
| 180 | SString result = line.substr(0, rndid); |
---|
| 181 | result += letter; |
---|
| 182 | result += line.substr(rndid); |
---|
| 183 | line = result; |
---|
| 184 | break; |
---|
| 185 | } |
---|
| 186 | case FB_DELETION: |
---|
| 187 | { |
---|
| 188 | chg = 1.0 / line.len(); |
---|
| 189 | int rndid = randomN(line.len()); // select random insertion point |
---|
| 190 | if (rndid == line.len() - 1) |
---|
| 191 | { |
---|
| 192 | line = line.substr(0, line.len() - 1); |
---|
| 193 | } |
---|
| 194 | else |
---|
| 195 | { |
---|
| 196 | line = line.substr(0, rndid) + line.substr(rndid + 1); |
---|
| 197 | } |
---|
| 198 | break; |
---|
| 199 | } |
---|
| 200 | case FB_DUPLICATION: |
---|
| 201 | { |
---|
| 202 | int rndgene = randomN(fB_GenoHelpers::geneCount(line)); |
---|
| 203 | int start, end; |
---|
| 204 | SString gene = fB_GenoHelpers::getGene(rndgene, line, start, end); |
---|
| 205 | if (gene.indexOf("zz", 0) == -1) gene += "zz"; |
---|
| 206 | chg = (float)gene.len() / line.len(); |
---|
| 207 | line = gene + line; |
---|
| 208 | break; |
---|
| 209 | } |
---|
| 210 | case FB_TRANSLOCATION: |
---|
| 211 | { |
---|
| 212 | std::vector<int> cuts(4); |
---|
| 213 | for (int i = 0; i < 4; i++) |
---|
| 214 | { |
---|
| 215 | cuts[i] = randomN(line.len()); |
---|
| 216 | } |
---|
| 217 | std::sort(cuts.begin(), cuts.end()); |
---|
| 218 | SString first = line.substr(cuts[0], cuts[1] - cuts[0]); |
---|
| 219 | SString second = line.substr(cuts[2], cuts[3] - cuts[2]); |
---|
| 220 | SString result = line.substr(0, cuts[0]) + second + |
---|
| 221 | line.substr(cuts[1], cuts[2] - cuts[1]) + first + line.substr(cuts[3]); |
---|
| 222 | line = result; |
---|
| 223 | chg = (float)(cuts[3] - cuts[2] + cuts[1] - cuts[0]) / line.len(); |
---|
| 224 | break; |
---|
| 225 | } |
---|
| 226 | } |
---|
| 227 | SString result = strdims + "\n" + line; |
---|
| 228 | free(geno); |
---|
| 229 | geno = strdup(result.c_str()); |
---|
| 230 | return GENOPER_OK; |
---|
| 231 | } |
---|
| 232 | |
---|
| 233 | int Geno_fB::crossOver(char *&g1, char *&g2, float& chg1, float& chg2) |
---|
| 234 | { |
---|
| 235 | SString p1(g1); |
---|
| 236 | SString p2(g2); |
---|
| 237 | |
---|
| 238 | int dims1 = 0, dims2 = 0; |
---|
| 239 | int pos = 0; |
---|
| 240 | SString strdims; |
---|
| 241 | p1.getNextToken(pos, strdims, '\n'); |
---|
| 242 | ExtValue::parseInt(strdims.c_str(), dims1, true, false); |
---|
| 243 | SString parent1; |
---|
| 244 | p1.getNextToken(pos, parent1, '\n'); |
---|
| 245 | |
---|
| 246 | pos = 0; |
---|
| 247 | p2.getNextToken(pos, strdims, '\n'); |
---|
| 248 | ExtValue::parseInt(strdims.c_str(), dims2, true, false); |
---|
| 249 | |
---|
| 250 | if (dims1 != dims2) |
---|
| 251 | { |
---|
| 252 | return GENOPER_OPFAIL; |
---|
| 253 | } |
---|
| 254 | |
---|
| 255 | SString parent2; |
---|
| 256 | p2.getNextToken(pos, parent2, '\n'); |
---|
| 257 | |
---|
| 258 | SString child1 = ""; |
---|
| 259 | SString child2 = ""; |
---|
| 260 | |
---|
| 261 | switch (roulette(crossoverprobs, FB_XOVER_COUNT)) |
---|
| 262 | { |
---|
| 263 | case FB_GENE_TRANSFER: |
---|
| 264 | { |
---|
| 265 | // get random gene from first parent |
---|
| 266 | int choice = randomN(fB_GenoHelpers::geneCount(parent1)); |
---|
| 267 | int start, end; |
---|
| 268 | SString gene = fB_GenoHelpers::getGene(choice, parent1, start, end); |
---|
| 269 | // add this gene to the beginning of the second parent genotype |
---|
| 270 | child2 = gene + parent2; |
---|
| 271 | chg2 = (float)parent2.len() / (float)child2.len(); |
---|
| 272 | // do the same for second parent |
---|
| 273 | choice = randomN(fB_GenoHelpers::geneCount(parent2)); |
---|
| 274 | gene = fB_GenoHelpers::getGene(choice, parent2, start, end); |
---|
| 275 | child1 = gene + parent1; |
---|
| 276 | chg1 = (float)parent1.len() / (float)child1.len(); |
---|
| 277 | break; |
---|
| 278 | } |
---|
| 279 | case FB_CROSSING_OVER: |
---|
| 280 | { |
---|
| 281 | // iterate through all genes of the first parent and assign them |
---|
| 282 | // randomly to children |
---|
| 283 | for (int i = 0; i < fB_GenoHelpers::geneCount(parent1); i++) |
---|
| 284 | { |
---|
| 285 | int start, end; |
---|
| 286 | SString gene = fB_GenoHelpers::getGene(i, parent1, start, end); |
---|
| 287 | if (randomN(2) == 0) |
---|
| 288 | { |
---|
| 289 | child1 += gene; |
---|
| 290 | chg1 += 1.0f; |
---|
| 291 | } |
---|
| 292 | else |
---|
| 293 | { |
---|
| 294 | child2 += gene; |
---|
| 295 | } |
---|
| 296 | } |
---|
| 297 | chg1 /= fB_GenoHelpers::geneCount(parent1); |
---|
| 298 | |
---|
| 299 | // do the same with second parent |
---|
| 300 | for (int i = 0; i < fB_GenoHelpers::geneCount(parent2); i++) |
---|
| 301 | { |
---|
| 302 | int start, end; |
---|
| 303 | SString gene = fB_GenoHelpers::getGene(i, parent2, start, end); |
---|
| 304 | if (randomN(2) == 0) |
---|
| 305 | { |
---|
| 306 | child1 += gene; |
---|
| 307 | } |
---|
| 308 | else |
---|
| 309 | { |
---|
| 310 | child2 += gene; |
---|
| 311 | chg2 += 1.0f; |
---|
| 312 | } |
---|
| 313 | } |
---|
| 314 | chg2 /= fB_GenoHelpers::geneCount(parent2); |
---|
| 315 | break; |
---|
| 316 | } |
---|
| 317 | } |
---|
| 318 | |
---|
| 319 | free(g1); |
---|
| 320 | free(g2); |
---|
| 321 | if (child1.len() > 0 && child2.len() == 0) |
---|
| 322 | { |
---|
| 323 | child1 = strdims + "\n" + child1; |
---|
| 324 | g1 = strdup(child1.c_str()); |
---|
| 325 | g2 = strdup(""); |
---|
| 326 | } |
---|
| 327 | else if (child2.len() > 0 && child1.len() == 0) |
---|
| 328 | { |
---|
| 329 | child2 = strdims + "\n" + child2; |
---|
| 330 | g1 = strdup(child2.c_str()); |
---|
| 331 | g2 = strdup(""); |
---|
| 332 | } |
---|
| 333 | else |
---|
| 334 | { |
---|
| 335 | child1 = strdims + "\n" + child1; |
---|
| 336 | child2 = strdims + "\n" + child2; |
---|
| 337 | g1 = strdup(child1.c_str()); |
---|
| 338 | g2 = strdup(child2.c_str()); |
---|
| 339 | } |
---|
| 340 | return GENOPER_OK; |
---|
| 341 | } |
---|
| 342 | |
---|
| 343 | uint32_t Geno_fB::style(const char *geno, int pos) |
---|
| 344 | { |
---|
| 345 | char ch = geno[pos]; |
---|
| 346 | if (isdigit(ch)) |
---|
| 347 | { |
---|
| 348 | while (pos > 0) |
---|
| 349 | { |
---|
| 350 | pos--; |
---|
| 351 | if (isdigit(geno[pos]) == 0) |
---|
| 352 | { |
---|
| 353 | return GENSTYLE_CS(0, GENSTYLE_INVALID); |
---|
| 354 | } |
---|
| 355 | } |
---|
| 356 | return GENSTYLE_RGBS(0, 0, 200, GENSTYLE_BOLD); |
---|
| 357 | } |
---|
| 358 | if (islower(ch) == 0) |
---|
| 359 | { |
---|
| 360 | return GENSTYLE_CS(0, GENSTYLE_INVALID); |
---|
| 361 | } |
---|
| 362 | uint32_t style = GENSTYLE_CS(GENCOLOR_TEXT, GENSTYLE_NONE); |
---|
| 363 | if (ch == 'a' && pos > 0 && (geno[pos - 1] == 'a' || geno[pos - 1] == '\n')) |
---|
| 364 | { |
---|
| 365 | style = GENSTYLE_RGBS(0, 200, 0, GENSTYLE_BOLD); |
---|
| 366 | } |
---|
| 367 | else if (ch == 'z' && pos > 0 && geno[pos - 1] == 'z') |
---|
| 368 | { |
---|
| 369 | style = GENSTYLE_RGBS(200, 0, 0, GENSTYLE_BOLD); |
---|
| 370 | } |
---|
| 371 | return style; |
---|
| 372 | } |
---|