Ignore:
Timestamp:
04/30/23 02:11:46 (20 months ago)
Author:
Maciej Komosinski
Message:
  • More strict parsing (reporting errors instead of implicit fixes)
  • Simplified and optimized parsing of neuron class names
  • Added a number of comments on parsing peculiarities
File:
1 edited

Legend:

Unmodified
Added
Removed
  • cpp/frams/genetics/f4/f4_general.cpp

    r1228 r1229  
    2525void rolling_dec(double *v)
    2626{
    27         *v -= 0.7853;  // 0.7853981  45 degrees
     27        *v -= 0.7853;  // 0.7853981  45 degrees = pi/4 like in f1
    2828}
    2929
     
    4040        {
    4141                if (i >= slen)  // ran out the string, should never happen with a correct string
    42                         return 1;
     42                        return 1; //TODO MacKo 2023-04: interesting: why was this situation made undistinguishable from s[1]==stopchar ? does this have any bad consequences or is "1" just used to tell "advance as little as possible"? Anyway, this function can be eliminated when parsing is simplified.
    4343                if (stopchar == s[i])  // bumped into stopchar
    4444                        return int(i);
    4545                if (i < slen - 1) // s[i] is not the last char
    4646                {
    47                         if (s[i] == '(')
     47                        if (s[i] == '(') //not an allowed char in f4, perhaps a remnant of old experiments with code
    4848                        {
    4949                                i += 2 + scanRecur(s + i + 1, slen - i - 1, ')');
     
    192192                // the current genotype code is processed
    193193                //genoRange.add(gcur->pos,gcur->pos+gcur->name.length()-1);
    194                 bool neuclasshandler = false; // if set to true, then a separate neuron handler below will identify the neuroclass and assign the cell to the neuron type
    195194
    196195                // To detect what genes are valid neuroclass names, but do NOT have is_neuroclass==true
    197                 // (just as a curiosity to ensure we properly distinguish between, for example, the "G" neuron and "G" modifier):
     196                // (just as a curiosity to ensure we properly distinguish between, for example, the "G" neuron and the "G" modifier):
    198197                //char *TMP = (char*)gcur->name.c_str();
    199                 //if (gcur->is_neuroclass==false && GenoOperators::parseNeuroClass(TMP,ModelEnum::SHAPETYPE_BALL_AND_STICK))
     198                //if (gcur->is_neuroclass==false && GenoOperators::parseNeuroClass(TMP, ModelEnum::SHAPETYPE_BALL_AND_STICK))
    200199                //      printf("Could be a valid neuroclass, but is_neuroclass==false: %s\n", gcur->name.c_str());
    201200
    202                 if (gcur->name.length() == 1 && gcur->neuclass == NULL) //one-character genes and not neuroclass names
    203                 {
     201                if (gcur->neuclass == NULL) //not a neuron
     202                {
     203                        if (gcur->name.length() > 1)
     204                                logPrintf("f4_Cell", "oneStep", LOG_WARN, "Multiple-character code that is not a neuron class name: '%s'", gcur->name.c_str()); //let's see an example of such a code...
     205
    204206                        genoRange.add(gcur->pos, gcur->pos);
    205207                        char name = gcur->name[0];
     
    542544                        default:
    543545                        {
    544                                 // because there are one-character neuron classes, default is move control to neuclasshandler
    545                                 neuclasshandler = true;
    546                         }
    547                         }
    548                 }
    549                 else
    550                 {
    551                         // if many characters or single character but is_neuroclass, then it will be handled below
    552                         neuclasshandler = true;
    553                 }
    554 
    555                 if (neuclasshandler)
    556                 {
    557                         genoRange.add(gcur->pos, gcur->pos + int(gcur->name.length()) + 2 - 1); // +2 for N:
    558                         if (type != CELL_UNDIFF)
    559                         {
    560                                 // fix: delete this node
    561                                 org->setRepairRemove(gcur->pos, gcur);
    562                                 return 1;  // stop
    563                         }
    564                         // error: if no previous
    565                         if (dadlink == NULL)
    566                         {
    567                                 // fix: delete it
    568                                 org->setRepairRemove(gcur->pos, gcur);
    569                                 return 1;  // stop
    570                         }
    571                         // multiple characters are neuron types. Let's check if exists in the current configuration of Framsticks
    572                         char *temp = (char*)gcur->name.c_str();
    573                         neuclass = GenoOperators::parseNeuroClass(temp, ModelEnum::SHAPETYPE_BALL_AND_STICK);
    574                         if (neuclass == NULL)
    575                         {
    576546                                // error: unknown code
    577547                                string buf = "Unknown code '" + gcur->name + "'";
     
    580550                                return 1;
    581551                        }
     552                        }
     553                }
     554                else
     555                {
     556                        genoRange.add(gcur->pos, gcur->pos + int(gcur->name.length()) + 2 - 1); // +2 for N:
     557                        if (type != CELL_UNDIFF)
     558                        {
     559                                // fix: delete this node
     560                                org->setRepairRemove(gcur->pos, gcur);
     561                                return 1;  // stop
     562                        }
     563                        // error: if no previous
     564                        if (dadlink == NULL)
     565                        {
     566                                // fix: delete it
     567                                org->setRepairRemove(gcur->pos, gcur);
     568                                return 1;  // stop
     569                        }
     570                        neuclass = gcur->neuclass;
    582571                        type = CELL_NEURON;
    583572                        // change of type also halts development, to give other
     
    13001289        len = out.length();
    13011290        if (len > 1)
    1302                 if (out[len - 1] == '>') { (out.directWrite())[len - 1] = 0; out.endWrite(); };
     1291                if (out[len - 1] == '>') { (out.directWrite())[len - 1] = 0; out.endWrite(); }; //Macko 2023-04 TODO "can be omitted", but should we remove it as a rule even in generated genotypes? see if I can somehow detect junk characters after top-level '>' ends properly: /*4*/<X>N:N>whatever
    13031292        // copy back to string
    13041293        // if new is longer, reallocate buf
     
    13401329// scan genotype string and build tree
    13411330// return >1 for error (errorpos)
    1342 int f4_processRecur(const char* genot, unsigned pos0, f4_Node *parent)
    1343 {
    1344         unsigned int gpos;
    1345         f4_Node *par;
    1346 
    1347         gpos = pos0;
    1348         par = parent;
    1349         if (gpos >= strlen(genot)) return 1;
     1331int f4_processRecur(const char* genot, unsigned int pos0, f4_Node *parent)
     1332{
     1333        unsigned int gpos = pos0; //MacKo 2023-04 (TODO): these two variables are often updated before return which has no effect since they are local. Seems like a half step towards making them (or just gpos) in/out parameter which would solve many issues and simplify parsing (getting rid of scanRecur()) while making it more strict.
     1334        f4_Node *par = parent;
     1335
     1336        if (gpos >= strlen(genot))
     1337                return (int)strlen(genot) + 1;
     1338
    13501339        while (gpos < strlen(genot))
    13511340        {
     
    13691358                        else // ran out
    13701359                        {
    1371                                 node = new f4_Node(">", par, int(strlen(genot)) - 1);
    1372                                 par = node;
     1360                                //MacKo 2023-04, more strict behavior: instead of silent repair (no visible effect to the user, genotype stays invalid but is interpreted and reported as valid), we now point out where the error is. For example <X> or <X><X or <X><N:N>
     1361                                return gpos + 1; //the problem starts here, occurs because second child (branch) <1..>2..> is not completed
     1362                                //old silent repair:
     1363                                //node = new f4_Node(">", par, int(strlen(genot)) - 1);
     1364                                //par = node;
    13731365                        }
    13741366                        gpos++;
     
    13791371                        f4_Node *node = new f4_Node(">", par, gpos);
    13801372                        par = node;
    1381                         gpos = (unsigned int)strlen(genot);
     1373                        //gpos = (unsigned int)strlen(genot); //MacKo 2023-04: first of all, 'gpos' is a local variable so no effect; second, '>' may be internal (i.e., not the last one in the genotype), so it is a bad hint to assign strlen(). 'par' above is also local...
    13821374                        return 0;  // OK
    13831375                }
     
    13941386                        gpos += end - (genot + gpos);
    13951387                        //gpos++;
    1396                         //while ((genot[gpos] >= '0') && (genot[gpos] <= '9')) gpos++;node1 = new f4_Node("#", par, oldpos);
     1388                        //while ((genot[gpos] >= '0') && (genot[gpos] <= '9')) gpos++; node1 = new f4_Node("#", par, oldpos);
    13971389                        f4_Node *node = new f4_Node("#", par, oldpos);
    13981390                        node->reps = reps;
     
    14071399                        else // ran out
    14081400                        {
    1409                                 node = new f4_Node(">", par, int(strlen(genot)) - 1);
     1401                                return gpos; //MacKo 2023-04: report an error, better to be more strict instead of a silent repair (genotype stays invalid but is interpreted and reported as valid) with non-obvious consequences?
     1402                                //earlier apporach - silently treating this problem (we don't ever see where the error is because it gets corrected in some way here, while parsing the genotype, and error location in the genotype is never reported):
     1403                                //node = new f4_Node(">", par, int(strlen(genot)) - 1); // check if needed and if this is really the best repair operation; seemed to happen too many times in succession for some genotypes even though they were only a result of f4 operators, not manually created... and the operators should not generate invalid genotypes, right? Or maybe crossover does? Seems like too many #N's for closing >'s; removing #N or adding > helped. Operators somehow don't do it properly sometimes? But F4_ADD_REP adds '>'... (TODO)
    14101404                        }
    14111405                        return 0;  // OK
     
    14911485
    14921486        // should end with a '>'
    1493         if (par)
    1494         {
    1495                 if (par->name != ">")
    1496                 {
    1497                         f4_Node *node = new f4_Node('>', par, int(strlen(genot)) - 1);
    1498                         par = node;
    1499                 }
     1487        if (par && par->name != ">")
     1488        {
     1489                //happens when gpos == strlen(genot)
     1490                //return gpos; //MacKo 2023-04: could report an error instead of silent repair, but repair operators only work in Cells (i.e., after the f4_Node tree has been parsed without errors and Cells can start developing) so we don't want to make a fatal error because of missing '>' here. Also after conversions from Cells to text, trailing '>' is deliberately removed... and also the simplest genotype is officially X, not X>.
     1491                f4_Node *node = new f4_Node('>', par, int(strlen(genot)) - 1);
     1492                par = node;
    15001493        }
    15011494
Note: See TracChangeset for help on using the changeset viewer.