source: framspy/FramsticksCLI.py @ 1060

Last change on this file since 1060 was 1060, checked in by Maciej Komosinski, 23 months ago

"Vectorized" mutation for better performance due to decreased time of communication py<->frams (mutate many genotypes in one call)

File size: 16.6 KB
Line 
1from subprocess import Popen, PIPE, check_output
2from enum import Enum
3from typing import List  # to be able to specify a type hint of list(something)
4from itertools import count  # for tracking multiple instances
5import json
6import sys, os
7import argparse
8import numpy as np
9import framsreader  # only needed for mutation: https://pypi.org/project/framsreader
10
11
12class FramsticksCLI:
13        """Runs Framsticks CLI (command-line) executable and communicates with it using standard input and output.
14        You can perform basic operations like mutation, crossover, and evaluation of genotypes.
15        This way you can perform evolution controlled by python as well as access and manipulate genotypes.
16        You can even design and use in evolution your own genetic representation implemented entirely in python.
17
18        You need to provide one or two parameters when you run this class: the path to Framsticks CLI
19        and the name of the Framsticks CLI executable (if it is non-standard). See::
20                FramsticksCLI.py -h"""
21
22        PRINT_FRAMSTICKS_OUTPUT: bool = False  # set to True for debugging
23        DETERMINISTIC: bool = False  # set to True to have the same results on each run
24
25        GENO_SAVE_FILE_FORMAT = Enum('GENO_SAVE_FILE_FORMAT', 'NATIVEFRAMS RAWGENO')  # how to save genotypes
26        OUTPUT_DIR = "scripts_output"
27        GENOTYPE_INVALID = "/*invalid*/"  # this is how genotype invalidity is represented in Framsticks
28        STDOUT_ENDOPER_MARKER = "FileObject.write"  # we look for this message on Framsticks CLI stdout to detect when Framsticks created a file with the result we expect
29
30        FILE_PREFIX = 'framspy_'
31
32        RANDOMIZE_CMD = "Math.randomize();"
33        SETEXPEDEF_CMD = "Simulator.expdef=\"standard-eval\";"
34        GETSIMPLEST_CMD = "getsimplest"
35        GETSIMPLEST_FILE = "simplest.gen"
36        EVALUATE_CMD = "evaluate eval-allcriteria.sim"
37        EVALUATE_FILE = "genos_eval.json"
38        CROSSOVER_CMD = "crossover"
39        CROSSOVER_FILE = "crossover_child.gen"
40        DISSIMIL_CMD = "dissimil"
41        DISSIMIL_FILE = "dissimilarity_matrix.tsv"  # tab-separated values
42        ISVALID_CMD = "isvalid_many"
43        ISVALID_FILE = "validity.txt"
44        MUTATE_CMD = "mutate_many"
45        MUTATE_FILE = "mutation_results.gen"
46
47        CLI_INPUT_FILE = "genotypes.gen"
48
49        _next_instance_id = count(0)  # "static" counter incremented when a new instance is created. Used to ensure unique filenames for each instance.
50
51
52        def __init__(self, framspath, framsexe, pid=""):
53                self.pid = pid if pid is not None else ""
54                self.id = next(FramsticksCLI._next_instance_id)
55                self.frams_path = framspath
56                self.frams_exe = framsexe if framsexe is not None else 'frams.exe' if os.name == "nt" else 'frams.linux'
57                self.writing_path = None
58                mainpath = os.path.join(self.frams_path, self.frams_exe)
59                exe_call = [mainpath, '-Q', '-s', '-c', '-icliutils.ini']  # -c will be ignored in Windows Framsticks (this option is meaningless because the Windows version does not support color console, so no need to deactivate this feature using -c)
60                exe_call_to_get_version = [mainpath, '-V']
61                exe_call_to_get_path = [mainpath, '-?']
62                try:
63                        print("\n".join(self.__readAllOutput(exe_call_to_get_version)))
64                        help = self.__readAllOutput(exe_call_to_get_path)
65                        for helpline in help:
66                                if 'dDIRECTORY' in helpline:
67                                        self.writing_path = helpline.split("'")[1]
68                except FileNotFoundError:
69                        print("Could not find Framsticks executable ('%s') in the given location ('%s')." % (self.frams_exe, self.frams_path))
70                        sys.exit(1)
71                print("Temporary files with results will be saved in detected writable working directory '%s'" % self.writing_path)
72                self.__spawnFramsticksCLI(exe_call)
73
74
75        def __readAllOutput(self, command):
76                frams_process = Popen(command, stdout=PIPE, stderr=PIPE, stdin=PIPE)
77                return [line.decode('utf-8').rstrip() for line in iter(frams_process.stdout.readlines())]
78
79
80        def __spawnFramsticksCLI(self, args):
81                # the child app (Framsticks CLI) should not buffer outputs and we need to immediately read its stdout, hence we use pexpect/wexpect
82                print('Spawning Framsticks CLI for continuous stdin/stdout communication... ', end='')
83                if os.name == "nt":  # Windows:
84                        import wexpect  # https://pypi.org/project/wexpect/
85                        # https://github.com/raczben/wexpect/tree/master/examples
86                        self.child = wexpect.spawn(' '.join(args))
87                else:
88                        import pexpect  # https://pexpect.readthedocs.io/en/stable/
89                        self.child = pexpect.spawn(' '.join(args))
90                self.child.setecho(False)  # ask the communication to not copy to stdout what we write to stdin
91                print('OK.')
92
93                self.__readFromFramsCLIUntil("UserScripts.autoload")
94                print('Performing a basic test 1/2... ', end='')
95                assert self.getSimplest("1") == "X"
96                print('OK.')
97                print('Performing a basic test 2/2... ', end='')
98                assert self.isValid(["X[0:0],", "X[0:0]", "X[1:0]"]) == [False, True, False]
99                print('OK.')
100                if not self.DETERMINISTIC:
101                        self.sendDirectCommand(self.RANDOMIZE_CMD)
102                self.sendDirectCommand(self.SETEXPEDEF_CMD)
103
104
105        def closeFramsticksCLI(self):
106                # End gracefully by sending end-of-file character: ^Z or ^D
107                # Without the -Q argument ("quiet mode"), Framsticks CLI would print "Shell closed." for goodbye.
108                self.child.sendline(chr(26 if os.name == "nt" else 4))
109
110
111        def __getPrefixedFilename(self, filename: str) -> str:
112                # Returns filename with unique instance id appended so there is no clash when many instances of this class use the same Framsticks CLI executable
113                return FramsticksCLI.FILE_PREFIX + self.pid + str(chr(ord('A') + self.id)) + '_' + filename
114
115
116        def __saveGenotypeToFile(self, genotype, name, mode, saveformat):
117                relname = self.__getPrefixedFilename(name)
118                absname = os.path.join(self.writing_path, relname)
119                if mode == 'd':  # special mode, 'delete'
120                        if os.path.exists(absname):
121                                os.remove(absname)
122                else:
123                        outfile = open(absname, mode)
124                        if saveformat == self.GENO_SAVE_FILE_FORMAT["RAWGENO"]:
125                                outfile.write(genotype)
126                        else:
127                                outfile.write("org:\n")
128                                outfile.write("genotype:~\n")
129                                outfile.write(genotype + "~\n\n")  # TODO proper quoting of special characters in genotype...
130                        outfile.close()
131                return relname, absname
132
133
134        def __readFromFramsCLIUntil(self, until_marker: str) -> str:
135                output = ""
136                while True:
137                        self.child.expect('\r\n' if os.name == "nt" else '\n')
138                        msg = str(self.child.before)
139                        if self.PRINT_FRAMSTICKS_OUTPUT or msg.startswith("[ERROR]") or msg.startswith("[CRITICAL]"):
140                                print(msg)
141                        if until_marker in msg:
142                                break
143                        else:
144                                output += msg + '\n'
145                return output
146
147
148        def __runCommand(self, command, genotypes, result_file_name, saveformat) -> List[str]:
149                filenames_rel = []  # list of file names with input data for the command
150                filenames_abs = []  # same list but absolute paths actually used
151                if saveformat == self.GENO_SAVE_FILE_FORMAT["RAWGENO"]:
152                        for i in range(len(genotypes)):
153                                # plain text format = must have a separate file for each genotype
154                                rel, abs = self.__saveGenotypeToFile(genotypes[i], "genotype" + str(i) + ".gen", "w", self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
155                                filenames_rel.append(rel)
156                                filenames_abs.append(abs)
157                elif saveformat == self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"]:
158                        self.__saveGenotypeToFile(None, self.CLI_INPUT_FILE, 'd', None)  # 'd'elete: ensure there is nothing left from the last run of the program because we "a"ppend to file in the loop below
159                        for i in range(len(genotypes)):
160                                rel, abs = self.__saveGenotypeToFile(genotypes[i], self.CLI_INPUT_FILE, "a", self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
161                        #  since we use the same file in the loop above, add this file only once (i.e., outside of the loop)
162                        filenames_rel.append(rel)
163                        filenames_abs.append(abs)
164
165                result_file_name = self.__getPrefixedFilename(result_file_name)
166                cmd = command + " " + " ".join(filenames_rel) + " " + result_file_name
167                self.child.sendline(cmd)
168                self.__readFromFramsCLIUntil(self.STDOUT_ENDOPER_MARKER)
169                filenames_abs.append(os.path.join(self.writing_path, self.OUTPUT_DIR, result_file_name))
170                return filenames_abs  # last element is a path to the file containing results
171
172
173        def __cleanUpCommandResults(self, filenames):
174                """Deletes files with results just created by the command."""
175                for name in filenames:
176                        os.remove(name)
177
178
179        sendDirectCommand_counter = count(0)  # an internal counter for the sendDirectCommand() method; should be static within that method but python does not allow
180
181
182        def sendDirectCommand(self, command: str) -> str:
183                """Sends any command to Framsticks CLI. Use when you know Framsticks and its scripting language, Framscript.
184
185                Returns:
186                        The output of the command, likely with extra \\n because for each entered command, Framsticks CLI responds with a (muted in Quiet mode) prompt and a \\n.
187                """
188                self.child.sendline(command.strip())
189                next(FramsticksCLI.sendDirectCommand_counter)
190                STDOUT_ENDOPER_MARKER = "uniqe-marker-" + str(FramsticksCLI.sendDirectCommand_counter)
191                self.child.sendline("Simulator.print(\"%s\");" % STDOUT_ENDOPER_MARKER)
192                return self.__readFromFramsCLIUntil(STDOUT_ENDOPER_MARKER)
193
194
195        def getSimplest(self, genetic_format) -> str:
196                files = self.__runCommand(self.GETSIMPLEST_CMD + " " + genetic_format + " ", [], self.GETSIMPLEST_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
197                with open(files[-1]) as f:
198                        genotype = "".join(f.readlines())
199                self.__cleanUpCommandResults(files)
200                return genotype
201
202
203        def evaluate(self, genotype_list: List[str]):
204                """
205                Returns:
206                        List of dictionaries containing the performance of genotypes evaluated with self.EVALUATE_COMMAND.
207                        Note that for whatever reason (e.g. incorrect genotype), the dictionaries you will get may be empty or
208                        partially empty and may not have the fields you expected, so handle such cases properly.
209                """
210                assert isinstance(genotype_list, list)  # because in python str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
211                files = self.__runCommand(self.EVALUATE_CMD, genotype_list, self.EVALUATE_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
212                with open(files[-1]) as f:
213                        data = json.load(f)
214                if len(data) > 0:
215                        self.__cleanUpCommandResults(files)
216                        assert len(genotype_list) == len(data), f"After evaluating {len(genotype_list)} genotype(s) got {len(data)} result(s)."
217                        return data
218                else:
219                        print("Evaluating genotype: no performance data was returned in", self.EVALUATE_FILE)  # we do not delete files here
220                        return None
221
222
223        def mutate(self, genotype_list: List[str]) -> List[str]:
224                """
225                Returns:
226                        The genotype(s) of the mutated source genotype(s). self.GENOTYPE_INVALID for genotypes whose mutation failed (for example because the source genotype was invalid).
227                """
228                assert isinstance(genotype_list, list)  # because in python str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
229                files = self.__runCommand(self.MUTATE_CMD, genotype_list, self.MUTATE_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
230                genos = framsreader.load(files[-1], "gen file")
231                self.__cleanUpCommandResults(files)
232                return [g["genotype"] for g in genos]
233
234
235        def crossOver(self, genotype_parent1: str, genotype_parent2: str) -> str:
236                """
237                Returns:
238                        The genotype of the offspring. Empty string if the crossing over failed.
239                """
240                files = self.__runCommand(self.CROSSOVER_CMD, [genotype_parent1, genotype_parent2], self.CROSSOVER_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
241                with open(files[-1]) as f:
242                        child_genotype = "".join(f.readlines())
243                self.__cleanUpCommandResults(files)
244                return child_genotype
245
246
247        def dissimilarity(self, genotype_list: List[str]) -> np.ndarray:
248                """
249                Returns:
250                        A square array with dissimilarities of each pair of genotypes.
251                """
252                assert isinstance(genotype_list, list)  # because in python str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
253                files = self.__runCommand(self.DISSIMIL_CMD, genotype_list, self.DISSIMIL_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
254                with open(files[-1]) as f:
255                        dissimilarity_matrix = np.genfromtxt(f, dtype=np.float64, comments='#', encoding=None, delimiter='\t')
256                # We would like to skip column #1 while reading and read everything else, but... https://stackoverflow.com/questions/36091686/exclude-columns-from-genfromtxt-with-numpy
257                # This would be too complicated, so strings (names) in column #1 become NaN as floats (unless they accidentally are valid numbers) - not great, not terrible
258                square_matrix = dissimilarity_matrix[:, 2:]  # get rid of two first columns (fitness and name)
259                EXPECTED_SHAPE = (len(genotype_list), len(genotype_list))
260                # print(square_matrix)
261                assert square_matrix.shape == EXPECTED_SHAPE, f"Not a correct dissimilarity matrix, expected {EXPECTED_SHAPE}"
262                for i in range(len(square_matrix)):
263                        assert square_matrix[i][i] == 0, "Not a correct dissimilarity matrix, diagonal expected to be 0"
264                assert (square_matrix == square_matrix.T).all(), "Probably not a correct dissimilarity matrix, expecting symmetry, verify this"  # could introduce tolerance in comparison (e.g. class field DISSIMIL_DIFF_TOLERANCE=10^-5) so that miniscule differences do not fail here
265                self.__cleanUpCommandResults(files)
266                return square_matrix
267
268
269        def isValid(self, genotype_list: List[str]) -> List[bool]:
270                assert isinstance(genotype_list, list)  # because in python str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
271                files = self.__runCommand(self.ISVALID_CMD, genotype_list, self.ISVALID_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
272                valid = []
273                with open(files[-1]) as f:
274                        for line in f:
275                                valid.append(line.strip() == "1")
276                self.__cleanUpCommandResults(files)
277                assert len(genotype_list) == len(valid), "Submitted %d genotypes, received %d validity values" % (len(genotype_list), len(valid))
278                return valid
279
280
281def parseArguments():
282        parser = argparse.ArgumentParser(description='Run this program with "python -u %s" if you want to disable buffering of its output.' % sys.argv[0])
283        parser.add_argument('-path', type=ensureDir, required=True, help='Path to Framsticks CLI without trailing slash.')
284        parser.add_argument('-exe', required=False, help='Executable name. If not given, "frams.exe" or "frams.linux" is assumed depending on the platform.')
285        parser.add_argument('-genformat', required=False, help='Genetic format for the demo run, for example 4, 9, or S. If not given, f1 is assumed.')
286        parser.add_argument('-pid', required=False, help='Unique ID of this process. Only relevant when you run multiple instances of this class simultaneously but as separate processes, and they use the same Framsticks CLI executable. This value will be appended to the names of created files to avoid conflicts.')
287        return parser.parse_args()
288
289
290def ensureDir(string):
291        if os.path.isdir(string):
292                return string
293        else:
294                raise NotADirectoryError(string)
295
296
297if __name__ == "__main__":
298        # A demo run.
299
300        # TODO ideas:
301        # - check_validity with three levels (invalid, corrected, valid)
302        # - "vectorize" crossover so that many genotypes is handled in one call. Even better, use .so/.dll direct communication to CLI
303        # - use threads for non-blocking reading from frams' stdout and thus not relying on specific strings printed by frams
304        # - a pool of binaries run at the same time, balance load - in particular evaluation
305        # - if we read genotypes in "org:" format anywhere: import https://pypi.org/project/framsreader and use it if successful,
306        #    if not then print a message "framsreader not available, using simple internal method to save a genotype" and proceed as it is now.
307        #    We should use the proper writer to handle all special cases like quoting special characters etc.
308
309        parsed_args = parseArguments()
310        framsCLI = FramsticksCLI(parsed_args.path, parsed_args.exe, parsed_args.pid)
311
312        print("Sending a direct command to Framsticks CLI that calculates \"4\"+2 yields", repr(framsCLI.sendDirectCommand("Simulator.print(\"4\"+2);")))
313
314        simplest = framsCLI.getSimplest('1' if parsed_args.genformat is None else parsed_args.genformat)
315        print("\tSimplest genotype:", simplest)
316        parent1 = framsCLI.mutate([simplest])[0]
317        parent2 = parent1
318        MUTATE_COUNT = 10
319        for x in range(MUTATE_COUNT):  # example of a chain of 20 mutations
320                parent2 = framsCLI.mutate([parent2])[0]
321        print("\tParent1 (mutated simplest):", parent1)
322        print("\tParent2 (Parent1 mutated %d times):" % MUTATE_COUNT, parent2)
323        offspring = framsCLI.crossOver(parent1, parent2)
324        print("\tCrossover (Offspring):", offspring)
325        print('\tDissimilarity of Parent1 and Offspring:', framsCLI.dissimilarity([parent1, offspring])[0, 1])
326        print('\tPerformance of Offspring:', framsCLI.evaluate([offspring]))
327        print('\tValidity of Parent1, Parent 2, and Offspring:', framsCLI.isValid([parent1, parent2, offspring]))
328
329        framsCLI.closeFramsticksCLI()
Note: See TracBrowser for help on using the repository browser.