Changeset 1230 for cpp/frams/genetics/f4/f4_general.cpp
- Timestamp:
- 05/01/23 02:14:27 (2 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
cpp/frams/genetics/f4/f4_general.cpp
r1229 r1230 31 31 { 32 32 *v += 0.7853; // 0.7853981 45 degrees 33 }34 35 int scanRecur(const char* s, int slen, char stopchar)36 {37 int i = 0;38 //DB( printf(" scan('%s', '%c')\n", s, stopchar); )39 while (1)40 {41 if (i >= slen) // ran out the string, should never happen with a correct string42 return 1; //TODO MacKo 2023-04: interesting: why was this situation made undistinguishable from s[1]==stopchar ? does this have any bad consequences or is "1" just used to tell "advance as little as possible"? Anyway, this function can be eliminated when parsing is simplified.43 if (stopchar == s[i]) // bumped into stopchar44 return int(i);45 if (i < slen - 1) // s[i] is not the last char46 {47 if (s[i] == '(') //not an allowed char in f4, perhaps a remnant of old experiments with code48 {49 i += 2 + scanRecur(s + i + 1, slen - i - 1, ')');50 continue;51 }52 if (s[i] == '<')53 {54 i += 2 + scanRecur(s + i + 1, slen - i - 1, '>');55 continue;56 }57 if (s[i] == '#')58 {59 i += 2 + scanRecur(s + i + 1, slen - i - 1, '>');60 continue;61 }62 }63 // s[i] is a non-special character64 i++;65 }66 return i;67 33 } 68 34 … … 703 669 // transform geno from string to nodes 704 670 f4rootnode = new f4_Node(); 705 int res = f4_processRecur(genome.c_str(), 0, f4rootnode); 671 int _ = 0; 672 int res = f4_processRecur(genome.c_str(), _, f4rootnode); 706 673 if (res || (f4rootnode->childCount() != 1)) 707 674 { … … 1329 1296 // scan genotype string and build tree 1330 1297 // return >1 for error (errorpos) 1331 int f4_processRecur(const char* genot, unsigned int pos0, f4_Node *parent) 1332 { 1333 unsigned int gpos = pos0; //MacKo 2023-04 (TODO): these two variables are often updated before return which has no effect since they are local. Seems like a half step towards making them (or just gpos) in/out parameter which would solve many issues and simplify parsing (getting rid of scanRecur()) while making it more strict. 1298 int f4_processRecur(const char* genot, int &pos_inout, f4_Node *parent) 1299 { 1334 1300 f4_Node *par = parent; 1335 1301 1336 if ( gpos >=strlen(genot))1302 if (pos_inout >= (int)strlen(genot)) 1337 1303 return (int)strlen(genot) + 1; 1338 1304 1339 while (gpos < strlen(genot)) 1340 { 1341 // first switch across cell dividers and old semantics 1342 switch (genot[gpos]) 1305 while (pos_inout < (int)strlen(genot)) 1306 { 1307 //#define PRINT_PARSING_LOCATION 1308 #ifdef PRINT_PARSING_LOCATION 1309 printf("%s\n", genot); 1310 for (int i = 0; i < pos_inout; i++) printf(" "); 1311 printf("^\n"); 1312 #endif 1313 switch (genot[pos_inout]) 1343 1314 { 1344 1315 case '<': 1345 1316 { 1346 // find out genotype start for child 1347 int stopchar_offset = scanRecur(genot + gpos + 1, (int)strlen(genot + gpos + 1), '>'); 1348 1349 f4_Node *node = new f4_Node("<", par, gpos); 1317 f4_Node *node = new f4_Node("<", par, pos_inout); 1350 1318 par = node; 1351 int res = f4_processRecur(genot, gpos + 1, par); 1319 pos_inout++; //move after '<' 1320 int res = f4_processRecur(genot, pos_inout, par); 1352 1321 if (res) return res; 1353 if ( gpos + stopchar_offset + 2 <strlen(genot))1354 { 1355 res = f4_processRecur(genot, gpos + stopchar_offset + 2, par);1322 if (pos_inout < (int)strlen(genot)) 1323 { 1324 res = f4_processRecur(genot, pos_inout, par); 1356 1325 if (res) return res; 1357 1326 } … … 1359 1328 { 1360 1329 //MacKo 2023-04, more strict behavior: instead of silent repair (no visible effect to the user, genotype stays invalid but is interpreted and reported as valid), we now point out where the error is. For example <X> or <X><X or <X><N:N> 1361 return gpos + 1; //the problem starts here, occurs because second child (branch) <1..>2..> is not completed1330 return (int)strlen(genot) + 1; 1362 1331 //old silent repair: 1363 1332 //node = new f4_Node(">", par, int(strlen(genot)) - 1); 1364 //par = node; 1365 } 1366 gpos++; 1333 } 1367 1334 return 0; // OK 1368 1335 } 1369 1336 case '>': 1370 1337 { 1371 f4_Node *node = new f4_Node(">", par, gpos); 1372 par = node; 1373 //gpos = (unsigned int)strlen(genot); //MacKo 2023-04: first of all, 'gpos' is a local variable so no effect; second, '>' may be internal (i.e., not the last one in the genotype), so it is a bad hint to assign strlen(). 'par' above is also local... 1338 new f4_Node(">", par, pos_inout); 1339 pos_inout++; //move after '>' 1374 1340 return 0; // OK 1375 1341 } … … 1378 1344 // repetition marker, 1 by default 1379 1345 ExtValue val; 1380 const char* end = val.parseNumber(genot + gpos + 1, ExtPType::TInt); 1346 const char* end = val.parseNumber(genot + pos_inout + 1, ExtPType::TInt); 1347 //TODO end==NULL? -> error! 1381 1348 int reps = (end == NULL) ? 1 : val.getInt(); 1382 // find out genotype start for continuation1383 int stopchar_offset = scanRecur(genot + gpos + 1, (int)strlen(genot + gpos + 1), '>');1349 f4_Node *node = new f4_Node("#", par, pos_inout); 1350 node->reps = reps; 1384 1351 // skip number 1385 unsigned int oldpos = gpos; 1386 gpos += end - (genot + gpos); 1387 //gpos++; 1388 //while ((genot[gpos] >= '0') && (genot[gpos] <= '9')) gpos++; node1 = new f4_Node("#", par, oldpos); 1389 f4_Node *node = new f4_Node("#", par, oldpos); 1390 node->reps = reps; 1391 par = node; 1392 int res = f4_processRecur(genot, gpos, node); 1352 pos_inout += end - (genot + pos_inout); 1353 int res = f4_processRecur(genot, pos_inout, node); 1393 1354 if (res) return res; 1394 if ( oldpos + stopchar_offset + 2 <strlen(genot))1395 { 1396 res = f4_processRecur(genot, oldpos + stopchar_offset + 2, node);1355 if (pos_inout < (int)strlen(genot)) 1356 { 1357 res = f4_processRecur(genot, pos_inout, node); 1397 1358 if (res) return res; 1398 1359 } 1399 1360 else // ran out 1400 1361 { 1401 return gpos; //MacKo 2023-04: report an error, better to be more strict instead of a silent repair (genotype stays invalid but is interpreted and reported as valid) with non-obvious consequences?1362 return (int)strlen(genot) + 1; //MacKo 2023-04: report an error, better to be more strict instead of a silent repair (genotype stays invalid but is interpreted and reported as valid) with non-obvious consequences? 1402 1363 //earlier apporach - silently treating this problem (we don't ever see where the error is because it gets corrected in some way here, while parsing the genotype, and error location in the genotype is never reported): 1403 1364 //node = new f4_Node(">", par, int(strlen(genot)) - 1); // check if needed and if this is really the best repair operation; seemed to happen too many times in succession for some genotypes even though they were only a result of f4 operators, not manually created... and the operators should not generate invalid genotypes, right? Or maybe crossover does? Seems like too many #N's for closing >'s; removing #N or adding > helped. Operators somehow don't do it properly sometimes? But F4_ADD_REP adds '>'... (TODO) … … 1411 1372 { 1412 1373 // whitespace: ignore 1413 gpos++;1374 pos_inout++; 1414 1375 break; 1415 1376 } 1416 1377 case 'N': 1417 1378 { 1418 int forgenorange = gpos;1419 if (genot[ gpos+ 1] != ':')1420 return gpos+ 1; //error1421 gpos+= 2; //skipping "N:"1422 unsigned int begin_index = gpos;1423 char* end = (char*)genot + begin_index;1424 NeuroClass *neuclass = GenoOperators::parseNeuroClass( end, ModelEnum::SHAPETYPE_BALL_AND_STICK);1379 int forgenorange = pos_inout; 1380 if (genot[pos_inout + 1] != ':') 1381 return pos_inout + 1; //error 1382 pos_inout += 2; //skipping "N:" 1383 unsigned int neuroclass_begin = pos_inout; 1384 char* neuroclass_end = (char*)genot + neuroclass_begin; 1385 NeuroClass *neuclass = GenoOperators::parseNeuroClass(neuroclass_end, ModelEnum::SHAPETYPE_BALL_AND_STICK); //advances neuroclass_end 1425 1386 if (neuclass == NULL) 1426 return gpos+ 1; //error1427 gpos += end - genot - begin_index;1428 string neutype = string(genot + begin_index, genot + gpos);1387 return pos_inout + 1; //error 1388 pos_inout += neuroclass_end - genot - neuroclass_begin; 1389 string neutype = string(genot + neuroclass_begin, genot + pos_inout); 1429 1390 f4_Node *node = new f4_Node(neutype, par, forgenorange); 1430 1391 node->neuclass = neuclass; … … 1438 1399 // in the future this could be generalized to all neuron properties, for example N:|:power:0.6:range:1.4, or can even use '=' or ',' instead of ':' if no ambiguity 1439 1400 char prop_dir, prop_symbol, prop_end[2]; // prop_end is only to ensure that neuron parameter definition is completed 1440 if (sscanf(genot + gpos, ":%c%c%1[:]", &prop_dir, &prop_symbol, &prop_end) != 3)1401 if (sscanf(genot + pos_inout, ":%c%c%1[:]", &prop_dir, &prop_symbol, &prop_end) != 3) 1441 1402 // error: incorrect format 1442 return gpos+ 1 + 1;1403 return pos_inout + 1 + 1; 1443 1404 if (prop_dir != '-' && prop_dir != '+') 1444 return gpos+ 1 + 1; //error1405 return pos_inout + 1 + 1; //error 1445 1406 switch (prop_symbol) 1446 1407 { 1447 1408 case '!': case '=': case '/': break; 1448 1409 default: 1449 return gpos+ 1 + 1; //error1450 } 1451 f4_Node *node = new f4_Node(":", par, gpos);1410 return pos_inout + 1 + 1; //error 1411 } 1412 f4_Node *node = new f4_Node(":", par, pos_inout); 1452 1413 node->prop_symbol = prop_symbol; 1453 1414 node->prop_increase = prop_dir == '+' ? true : false; // + or - 1454 1415 par = node; 1455 int stopchar_offset = scanRecur(genot + gpos + 1, (int)strlen(genot + gpos + 1), ':'); 1456 gpos += stopchar_offset + 2; 1416 pos_inout += 4; //skipping :ds: 1457 1417 break; 1458 1418 } … … 1461 1421 double weight = 0; 1462 1422 int relfrom; 1463 const char *end = parseConnection(genot + gpos, relfrom, weight);1423 const char *end = parseConnection(genot + pos_inout, relfrom, weight); 1464 1424 if (end == NULL) 1465 return gpos+ 1; //error1466 1467 f4_Node *node = new f4_Node("[", par, gpos);1425 return pos_inout + 1; //error 1426 1427 f4_Node *node = new f4_Node("[", par, pos_inout); 1468 1428 node->conn_from = relfrom; 1469 1429 node->conn_weight = weight; 1470 1430 par = node; 1471 int stopchar_offset = scanRecur(genot + gpos + 1, (int)strlen(genot + gpos + 1), ']'); 1472 gpos += stopchar_offset + 2; 1431 pos_inout += end - (genot + pos_inout); 1473 1432 break; 1474 1433 } 1475 1434 default: // 'X' and ',' and all modifiers and also invalid symbols - add a node, for invalid symbols build will give the error or repair 1476 1435 { 1477 //printf("any regular character '%c'\n", genot[gpos]); 1478 f4_Node *node = new f4_Node(genot[gpos], par, gpos); 1436 //printf("any regular character '%c'\n", genot[pos_inout]); 1437 //TODO here: read a continuous sequence of modifiers, sort and optimize ("collapse") it like in f1, then add to tree 1438 f4_Node *node = new f4_Node(genot[pos_inout], par, pos_inout); 1479 1439 par = node; 1480 gpos++;1440 pos_inout++; 1481 1441 break; 1482 1442 } … … 1487 1447 if (par && par->name != ">") 1488 1448 { 1489 //happens when gpos == strlen(genot) 1490 //return gpos; //MacKo 2023-04: could report an error instead of silent repair, but repair operators only work in Cells (i.e., after the f4_Node tree has been parsed without errors and Cells can start developing) so we don't want to make a fatal error because of missing '>' here. Also after conversions from Cells to text, trailing '>' is deliberately removed... and also the simplest genotype is officially X, not X>. 1491 f4_Node *node = new f4_Node('>', par, int(strlen(genot)) - 1); 1492 par = node; 1493 } 1494 1495 return 0; 1449 //happens when pos_inout == strlen(genot) 1450 //return pos_inout; //MacKo 2023-04: could report an error instead of silent repair, but repair operators only work in Cells (i.e., after the f4_Node tree has been parsed without errors and Cells can start developing) so we don't want to make a fatal error because of missing '>' here. Also after conversions from Cells to text, trailing '>' is deliberately removed... and also the simplest genotype is officially X, not X>. 1451 new f4_Node('>', par, int(strlen(genot)) - 1); 1452 } 1453 1454 return 0; // OK 1496 1455 } 1497 1456
Note: See TracChangeset
for help on using the changeset viewer.