Changeset 1241 for cpp/frams/genetics/genooperators.cpp
- Timestamp:
- 05/18/23 03:43:42 (2 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
cpp/frams/genetics/genooperators.cpp
r1233 r1241 473 473 474 474 //#include <cassert> 475 string GenoOperators::simplifiedModifiers (const char *str_of_char_pairs, vector<int> &char_counts)475 string GenoOperators::simplifiedModifiersFixedOrder(const char *str_of_char_pairs, vector<int> &char_counts) 476 476 { 477 477 // assert(strlen(str_of_char_pairs) == char_counts.size()); 478 478 // assert(char_counts.size() % 2 == 0); 479 const int MAX_NUMBER_SAME_TYPE = 8; // max. number of modifiers of each type = 8 (mainly for Rr)479 const int MAX_NUMBER_SAME_TYPE = 8; // max. number of modifiers of each type (case-sensitive) - mainly for rR, even though for rR, 4 would be sufficient if we assume lower or upper can be chosen as required for minimal length, e.g. rrrrr==RRR, RRRRRR==rr 480 480 string simplified; 481 //#define CLUMP_IDENTICAL_MODIFIERS //not good because properties are calculated incrementally, non-linearly, and their values are updated after each modifier character, so these values may for example saturate after a large number of identical modifier symbols. The order of modifiers is in generalrelevant and extreme values of properties increase this relevance, so better keep the modifiers dispersed.481 //#define CLUMP_IDENTICAL_MODIFIERS //not good because with the exception of rR properties are calculated incrementally, non-linearly, and their values are updated after each modifier character, so these values may for example saturate after a large number of identical modifier symbols. The order of modifiers is (with the exception of rR) relevant and extreme values of properties increase this relevance, so better keep the modifiers dispersed. 482 482 #ifdef CLUMP_IDENTICAL_MODIFIERS 483 483 for (size_t i = 0; i < strlen(str_of_char_pairs); i++) … … 507 507 return simplified; 508 508 } 509 510 string GenoOperators::simplifiedModifiers(const string & original) 511 { 512 const int MAX_NUMBER_SAME_TYPE = 6; // max. number of modifiers of each type (case-insensitive). rR could be treated separately in simplification because their influence follows different (i.e., simple additive) logic - so the simplifiedModifiersFixedOrder() logic with cancelling out is appropriate for rR. However in this function, making no exception to rR does not cause any harm to these modifiers either - the only consequence is that we will not remove antagonistic letters and will not simplify sequences of rR longer than 4, while they could be simplified (e.g. rrrrr==RRR, RRRRRR==rr). 513 int counter[256] = {}; //initialize with zeros; 256 is unnecessarily too big and redundant, but enables very fast access (indexed directly by the ascii code) 514 string simplified = ""; 515 for (int i = original.size() - 1; i >= 0; i--) //iterate from end to begin - easier to remove "oldest" = first modifiers 516 { 517 unsigned char c = original[i]; 518 if (!std::isalpha(c)) 519 continue; 520 unsigned char lower = std::tolower(c); 521 counter[lower]++; 522 if (counter[lower] <= MAX_NUMBER_SAME_TYPE) //get rid of modifiers that are too numerous, but get rid of the first ones in the string (="oldest", the last ones looking from the end), because their influence on the parameter value is the smallest 523 simplified += c; 524 } 525 std::reverse(simplified.begin(), simplified.end()); //"simplified" was built in reverse order, so need to restore the order that corresponds to "original" 526 return simplified; 527 }
Note: See TracChangeset
for help on using the changeset viewer.