source: cpp/frams/genetics/f4/f4_general.h @ 1229

Last change on this file since 1229 was 1229, checked in by Maciej Komosinski, 20 months ago
  • More strict parsing (reporting errors instead of implicit fixes)
  • Simplified and optimized parsing of neuron class names
  • Added a number of comments on parsing peculiarities
  • Property svn:eol-style set to native
File size: 18.6 KB
Line 
1// This file is a part of Framsticks SDK.  http://www.framsticks.com/
2// Copyright (C) 1999-2023  Maciej Komosinski and Szymon Ulatowski.
3// See LICENSE.txt for details.
4
5// Copyright (C) 1999,2000  Adam Rotaru-Varga (adam_rotaru@yahoo.com), GNU LGPL
6
7#ifndef _F4_GENERAL_H_
8#define _F4_GENERAL_H_
9
10#include <frams/util/3d.h>
11#include <frams/util/sstring.h>
12#include <frams/util/multirange.h>
13#include <frams/genetics/geneprops.h>
14
15#ifdef DMALLOC
16#include <dmalloc.h>
17#endif
18
19/**
20 * Performs single rotation angle decrementation on a given value.
21 * @param v pointer to the decremented value
22 */
23void rolling_dec(double *v);
24
25/**
26 * Performs single rotation angle incrementation on a given value.
27 * @param v pointer to the incremented value
28 */
29void rolling_inc(double *v);
30
31class f4_Node;   // later
32class f4_Cell;   // later
33class f4_Cells;  // later
34
35
36/** @name Types of f4_Cell's */
37//@{
38#define CELL_UNDIFF 40 ///<undifferentiated cell
39#define CELL_STICK  41 ///<differentiated to stick, cannot divide
40#define CELL_NEURON 42 ///<differentiated to neuron, can divide
41//@}
42
43/**
44 * TODO MacKo 2023-04: not sure if this function is needed and if f4_processRecur() would not suffice
45 * if it advanced the string pointer (in/out parameter) while processing. Its returned value is always used after
46 * f4_processRecur() anyway, and in two cases likely incorrectly (for [...] to detect closing ']'
47 * and for :...: to detect closing ':') - we don't need recursion in these cases, a simple linear
48 * scan would suffice, but even this would not be needed - since we are parsing the actual characters in these cases,
49 * we do scanning anyway. So looks like this function doubles the work already done more thoroughly by f4_processRecur().
50 *
51 * Scans f4 genotype string for a stopping character and returns the position of
52 * this stopping character or 1 if the end of string was reached. This method is used
53 * for closing braces, like ), >, ]. It runs recursively when opening braces
54 * like (, <, # are found.
55 * @param s string with the f4 genotype
56 * @param slen length of a given string
57 * @param stopchar character to be found
58 * @return 1 if end of string was reached, or position of found character in sequence
59 */
60int scanRecur(const char* s, int slen, char stopchar);
61
62
63class f4_CellConn;
64
65/** @name Constraints of f4 genotype structures */
66//@{
67#define F4_MAX_CELL_INPUTS  10 ///<maximum number of neuron inputs in a developing organism
68#define F4_MAX_CELLS 100 ///<maximum number of f4 organism cells
69//@}
70
71/**
72 * Abstract cell type - the representation of a single component in the developmental
73 * encoding. In the beginning, each f4_Cell is undifferentiated. During the process
74 * of development it can divide or differentiate into a stick or a neuron. If it
75 * differentiates to a neuron, then it preserves the ability to divide, but divided
76 * cells will be the same type as the parent cell. If it is a stick, then it cannot
77 * be divided anymore.
78 *
79 * From f4_Cell array the final Model of a creature is created.
80 */
81class f4_Cell
82{
83public:
84        /**
85         * Represents the repetition marker. It holds information about the pointer
86         * to the repetition node and the count of repetitions.
87         */
88        class repeat_ptr
89        {
90        public:
91                repeat_ptr() : node(NULL), count(-1) { };
92
93                /**
94                 * A constructor that takes the pointer to the repetition node and the count of repetitions.
95                 * @param a pointer to f4_Node for repetition character
96                 * @param b the number of repetitions
97                 */
98                repeat_ptr(f4_Node *a, int b) : node(a), count(b) { };
99
100                inline void makeNull() { node = NULL; count = -1; };
101
102                inline bool isNull() const { return ((node == NULL) || (count <= 0)); };
103
104                inline void dec() { count--; };
105                f4_Node    *node; ///<pointer to the repetition code
106                int       count; ///<repetition counter
107        };
108
109        /**
110         * Represents the stack of repeat_ptr objects. The objects are
111         * pushed to the stack when '#' repetition symbol appears, and are popped when
112         * the end of the current cell definition, i.e. the '>' character, appears. After the
113         * '>' character, the cell is duplicated as many times as it is defined after the
114         * repetition marker.
115         */
116        class repeat_stack
117        {
118        public:
119                repeat_stack() { top = 0; }
120
121                inline void clear() { top = 0; }
122
123                /**
124                 * Pushes repeat_ptr object onto the stack. If the stack size is exceeded, then no
125                 * information is provided.
126                 * @param rn repetition node info
127                 */
128                inline void push(repeat_ptr rn) { if (top >= stackSize) return; ptr[top] = rn; top++; }
129
130                inline void pop() { if (top > 0) top--; }
131
132                /**
133                 * Gets the current top element.
134                 * @return pointer to the element on top of the repeat_stack object
135                 */
136                inline repeat_ptr* first() { return &(ptr[top - (top > 0)]); };
137                static const int stackSize = 4;  ///<max 4 nested levels
138                repeat_ptr ptr[stackSize]; ///<array holding pointers to repeat_ptr
139                int top;  ///<index of the top of the stack
140        };
141
142        /**
143         * Creates a new f4_Cell object.
144         * @param nnr number of the cell
145         * @param ndad pointer to the parent of the created cell
146         * @param nangle the amount of commas affecting branch angles
147         * @param newP genotype properties of a given cell
148         */
149        f4_Cell(int nnr, f4_Cell *ndad, int nangle, GeneProps newP);
150        /**
151         * Creates a new f4_Cell object.
152         * @param nO pointer to an organism containing the cell
153         * @param nnr number of the cell
154         * @param ngeno pointer to the root of the genotype tree
155         * @param ngcur pointer to the f4_Node representing the current cell in the genotype tree
156         * @param ndad pointer to the parent of the created cell
157         * @param nangle the number of commas affecting branch angles
158         * @param newP genotype properties of a given cell
159         */
160        f4_Cell(f4_Cells *nO, int nnr, f4_Node *ngeno, f4_Node *ngcur, f4_Cell *ndad, int nangle, GeneProps newP);
161
162        ~f4_Cell();
163
164        /**
165         * Performs a single step of cell development. This method requires a pointer to
166         * the f4_Cells object in org attribute. If the current node in genotype tree
167         * is the branching character '<', the cell divides into two cells, unless the
168         * cell was already differentiated into the stick cell. Otherwise, the current
169         * differentiation or modification is performed on the cell. If current node is
170         * creating a connection between two neuron nodes and the input node is not
171         * yet developed, the simulation of the development of the current cell waits until
172         * the input node is created. The onestep method is deployed for every cell
173         * at least once. If one cell requires another one to develop, onestep
174         * should be deployed again on this cell. This method, unlike genotype tree
175         * creation, checks semantics. This means that this function will fail if:
176         *  - the cell differentiated as a stick will have branching node '<',
177         *  - the undifferentiated cell will have termination node '>' (end of cell development without differentiation),
178         *  - the stack of repetition marker '#' will exceed maximum allowed value of repetition,
179         *  - the stick modifiers, like rotation, will be applied on neuron cell,
180         *  - the differentiated cell will be differentiated again,
181         *  - the connection between neurons cannot be established,
182         *  - the neuron class is not valid.
183         *
184         * @return 0 if development was successful, 1 if there was an error in genotype tree
185         */
186        int oneStep();
187
188        /**
189         * Adds a connection between this neuron cell and a given neuron cell in nfrom.
190         * @param nfrom input neuron cell
191         * @param nweight weight of connection
192         * @return 0 if connection is established, -1 otherwise
193         */
194        int   addConnection(f4_Cell *nfrom, double nweight);
195
196        /**
197         * Adjusts properties of stick objects.
198         */
199        void  adjustRec();
200
201        int        nr;                 ///<number of cell (seems to be used only in old f1 converter for neuron connections)
202        int        type;               ///<type
203        f4_Cell *dadlink;              ///<pointer to cell parent
204        f4_Cells  *org;                ///<uplink to organism
205
206        f4_Node *genot;                    ///<genotype tree
207        f4_Node *gcur;                 ///<current genotype execution pointer
208        bool active;                   ///<determines whether development is still active; even if false, the cell may "yield" - may be halted (but still having its onStep() called) due to neural connections waiting for other cells to potentially develop neurons
209        repeat_stack repeat;           ///<stack holding repetition nodes and counters
210        int recProcessedFlag;          ///<used during recursive traverse
211        MultiRange genoRange;          ///<remember the genotype codes affecting this cell so far
212
213        GeneProps    P;                ///<properties
214        int          anglepos;         ///<number of position within dad's children (,)
215        int          childcount;       ///<number of children
216        int          commacount;       ///<number of postitions at lastend (>=childcount)
217        double       rolling;          ///<rolling angle ('R') (around x)
218        double       xrot;                         ///<rotation angle around x
219        double       zrot;             ///<horizontal rotation angle due to branching (around z)
220
221        double       mz;               ///<freedom in z
222        int          p2_refno;         ///<the number of the last end part object, used in f0
223        int          joint_refno;      ///<the number of the joint object, used in f0
224        int          neuro_refno;      ///<the number of the neuro object, used in f0
225
226        double       inertia;          ///<inertia of neuron
227        double       force;            ///<force of neuron
228        double       sigmo;            ///<sigmoid of neuron
229        f4_CellConn *conns[F4_MAX_CELL_INPUTS]; ///<array of neuron connections
230        int          conns_count;      ///<number of connections
231        NeuroClass *neuclass;          ///<pointer to neuron class
232};
233
234/**
235 * Class representing a connection between neuron cells.
236 */
237class f4_CellConn
238{
239public:
240        /**
241         * Constructor for f4_CellLink class. Parameter nfrom represents input
242         * neuron cell.
243         * @param nfrom pointer to input neuron cell
244         * @param nweight weight of connection
245         */
246        f4_CellConn(f4_Cell *nfrom, double nweight);
247
248        f4_Cell *from;  ///<pointer to input neuron cell
249        double weight;  ///<weight of connection
250};
251
252
253/**
254 * A class representing a collection of cells. It is equivalent to an organism.
255 */
256class f4_Cells
257{
258public:
259
260        /**
261         * Constructor taking genotype in a form of a tree.
262         * @param genome genotype tree
263         * @param nrepair 0 if nothing to repair
264         */
265        f4_Cells(f4_Node *genome, int nrepair);
266
267        /**
268         * Constructor taking genotype in a form of a string.
269         * @param genome genotype string
270         * @param nrepair 0 if nothing to repair
271         */
272        f4_Cells(SString &genome, int nrepair);
273
274        /**
275         * Destructor removing cells from memory.
276         */
277        ~f4_Cells();
278
279        /**
280         * Adds a new cell to organism.
281         * @param newcell cell to be added
282         */
283        void addCell(f4_Cell *newcell);
284
285        /**
286         * Creates an approximate genotype in the f1 encoding and stores it in a given parameter.
287         * @param out the string in which the approximate f1 genotype will be stored
288         */
289        void toF1Geno(SString &out);
290
291        /**
292         * Performs a single step of organism development. It runs each active cell in the organism.
293         * @return false if all cells are developed or there is an error, true otherwise
294         */
295        bool oneStep();
296
297        /**
298         * Performs the full development of organism and returns error code if something
299         * went wrong.
300         * @return 0 if organism developed successfully, error code if something went wrong
301         */
302        int simulate();
303
304        /**
305         * Prints the current state of the organism (for debugging purposes).
306         * @param description printout header
307         */
308        void print_cells(const char* description);
309
310        /**
311         * Returns error code of the last simulation.
312         * @return error code
313         */
314        int getErrorCode() { return errorcode; };
315
316        /**
317         * Returns position of an error in genotype.
318         * @return position of an error
319         */
320        int getErrorPos() { return errorpos; };
321
322        /**
323         * Sets error code GENOPER_OPFAIL for a simulation on a given position.
324         * @param nerrpos position of an error
325         */
326        void setError(int nerrpos);
327
328        /**
329         * Sets the element of genotype to be repaired by removal.
330         * @param nerrpos position of an error in genotype
331         * @param rem the f4_Node to be removed from the  genotype tree in order to repair
332         */
333        void setRepairRemove(int nerrpos, f4_Node *rem);
334
335        /**
336         * Sets repairing of a genotype by inserting a new node to the current genotype.
337         * @param nerrpos position of an error in genotype
338         * @param parent the parent of a new element
339         * @param insert the element to be inserted
340         * @return 0 if repair can be performed, or -1 otherwise because the repair flag wasn't set in the constructor
341         */
342        int setRepairInsert(int nerrpos, f4_Node *parent, f4_Node *insert);
343
344        /**
345         * Repairs the genotype according to setRepairRemove or setRepairInsert methods.
346         * @param geno pointer to the genotype tree
347         * @param whichchild 1 if first child, 2 otherwise
348         */
349        void repairGeno(f4_Node *geno, int whichchild);
350
351        // the cells
352        f4_Cell *C[F4_MAX_CELLS];  ///<Array of all cells of an organism
353        int     cell_count;        ///<Number of cells in an organism
354
355private:
356        // for error reporting / genotype fixing
357        int repair;
358        int errorcode;
359        int errorpos;
360        f4_Node *repair_remove;
361        f4_Node *repair_parent;
362        f4_Node *repair_insert;
363        void toF1GenoRec(int curc, SString &out);
364        f4_Cell *tmpcel;                // needed by toF1Geno
365        f4_Node *f4rootnode;          // used by constructor
366};
367
368
369/**
370 * A class to organize a f4 genotype in a tree structure.
371 */
372class f4_Node
373{
374public:
375        string name; ///<one-letter gene code or multiple characters for neuron classes (then neuclass != NULL)
376        f4_Node *parent; ///<parent link or NULL
377        f4_Node *child; ///<child or NULL
378        f4_Node *child2; ///<second child or NULL
379        int pos; ///<original position in the string
380
381        int reps; ///<repetition counter for the '#' gene
382        char prop_symbol; ///<old-style properties (force,intertia,sigmoid) of the N neuron: !=/
383        bool prop_increase; ///<false=decrease neuron property (force,intertia,sigmoid), true=increase it
384        int conn_from; ///<relative number of the neuron this neuron get an input from
385        double conn_weight; ///<neuron connection weight
386        NeuroClass *neuclass; ///< NULL or not if "name" is a neuroclass name with a proper genotype context ("N:neuroclassname"). New in 2023-04 - to fix fatal flaw with fundamental assumptions: it was impossible to distinguish between single-character neuron names such as S, D, G and single-character modifiers. They were all stored in the "name" field. Before 2018 this was never a problem because the only supported neuroclasses had distinctive symbols such as @|*GTS, and the set of supported modifiers was small and different from neuroclass letters (no G,D,S clash).
387
388        f4_Node();
389
390        /**
391         * Multiple-character name constructor.
392         * @param nname string from genotype representing node
393         * @param nparent pointer to parent of the node
394         * @param npos position of node substring in the genotype string
395         */
396        f4_Node(string nname, f4_Node *nparent, int npos);
397
398        /**
399         * Single-character name constructor.
400         * @param nname character from genotype representing node
401         * @param nparent pointer to parent of the node
402         * @param npos position of node character in the genotype string
403         */
404        f4_Node(char nname, f4_Node *nparent, int npos);
405
406        ~f4_Node();
407
408        /**
409         * Recursively print subtree (for debugging).
410         * @param root starting node
411         * @param indent initial indentation
412         */
413        static void print_tree(const f4_Node *root, int indent);
414
415        /**
416         * Adds the child to the node.
417         * @param nchi the child to be added to the node
418         * @return 0 if the child could be added, -1 otherwise
419         */
420        int addChild(f4_Node *nchi);
421
422        /**
423         * Removes the child from the node.
424         * @param nchi the child to be removed from the node
425         * @return 0 if child could be removed, -1 otherwise
426         */
427        int removeChild(f4_Node *nchi);
428
429        /**
430         * Returns the number of children.
431         * @return 0, 1 or 2
432         */
433        int childCount();
434
435        /**
436         * Returns the number of nodes coming from this node in a recursive way.
437         * @return the number of nodes from this node
438         */
439        int count() const;
440
441        /**
442         * Returns the nth subnode (0-)
443         * @param n index of the child to be found
444         * @return pointer to the nth subnode or NULL if not found
445         */
446        f4_Node* ordNode(int n);
447
448        /**
449         * Returns a random subnode.
450         * @return random subnode
451         */
452        f4_Node* randomNode();
453
454        /**
455         * Returns a random subnode with a given size.
456         * @param min minimum size
457         * @param max maximum size
458         * @return a random subnode with a given size or NULL
459         */
460        f4_Node* randomNodeWithSize(int min, int max);
461
462        /**
463         * Prints recursively the tree from a given node.
464         * @param buf variable to store printing result
465         */
466        void      sprintAdj(char *&buf);
467
468        /**
469         * Recursively copies the genotype tree from this node.
470         * @return pointer to a tree copy
471         */
472        f4_Node* duplicate();
473
474        /**
475         * Recursively releases memory from all node children.
476         */
477        void      destroy();
478private:
479        void     sprint(SString &out);  // print recursively
480};
481
482/**
483 * The main function for converting a string of f4 encoding to a tree structure. Prepares
484 * f4_Node root of tree and runs f4_processRecur function for it.
485 * @param geno the string representing an f4 genotype
486 * @return a pointer to the f4_Node object representing the f4 tree root
487 */
488//f4_Node* f4_processTree(const char *geno);
489
490/**
491 * Scans a genotype string starting from a given position. This recursive method creates
492 * a tree of f4_Node objects. This method extracts each potentially functional element
493 * of a genotype string to a separate f4_Nodes. When the branching character '<' occurs,
494 * f4_processRecur is deployed for the latest f4_Node element. This method does not
495 * analyse the genotype semantically, it only checks if the syntax is proper. The only
496 * semantic aspect is neuron class name extraction, where the GenoOperators
497 * class is used to parse the potential neuron class name.
498 * @param genot the string holding all the genotype
499 * @param pos0 the current position of processing in string
500 * @param parent current parent of the analysed branch of the genotype
501 * @return 0 if processing was successful, otherwise returns the position of an error in the genotype
502 */
503int f4_processRecur(const char *genot, unsigned int pos0, f4_Node *parent);
504
505/**
506 * Parses notation of the neuron connection - takes the beginning of the connection
507 * definition, extracts the relative position of input neurons and the weight of the connection.
508 * After successful parsing, returns the pointer to the first character after the connection
509 * definition, or NULL if the connection definition was not valid due to the lack of [, :, ]
510 * characters or an invalid value of relfrom or weight.
511 * @param fragm the beginning of connection definition, should be the '[' character
512 * @param relfrom the reference to an int variable in which the relative position of the input neuron will be stored
513 * @param weight the reference to a double variable in which the weight of the connection will be stored
514 * @return the pointer to the first character in string after connection definition
515 */
516const char *parseConnection(const char *fragm, int &relfrom, double &weight);
517
518#endif
Note: See TracBrowser for help on using the repository browser.