source: framspy/FramsticksCLI.py @ 1087

Last change on this file since 1087 was 1087, checked in by Maciej Komosinski, 22 months ago

Cosmetic

File size: 16.7 KB
Line 
1from subprocess import Popen, PIPE, check_output
2from enum import Enum
3from typing import List  # to be able to specify a type hint of list(something)
4from itertools import count  # for tracking multiple instances
5import json
6import sys, os
7import argparse
8import numpy as np
9import framsreader  # only needed for mutation: https://pypi.org/project/framsreader
10
11
12class FramsticksCLI:
13        """Note: instead of this class, you should use the simpler, faster, and more reliable FramsticksLib.py.
14       
15        This class runs Framsticks CLI (command-line) executable and communicates with it using standard input and output.
16        You can perform basic operations like mutation, crossover, and evaluation of genotypes.
17        This way you can perform evolution controlled by python as well as access and manipulate genotypes.
18        You can even design and use in evolution your own genetic representation implemented entirely in python.
19
20        You need to provide one or two parameters when you run this class: the path to Framsticks CLI
21        and, optionally, the name of the Framsticks CLI executable (if it is non-standard). See::
22                FramsticksCLI.py -h"""
23
24        PRINT_FRAMSTICKS_OUTPUT: bool = False  # set to True for debugging
25        DETERMINISTIC: bool = False  # set to True to have the same results in each run
26
27        GENO_SAVE_FILE_FORMAT = Enum('GENO_SAVE_FILE_FORMAT', 'NATIVEFRAMS RAWGENO')  # how to save genotypes
28        OUTPUT_DIR = "scripts_output"
29        GENOTYPE_INVALID = "/*invalid*/"  # this is how genotype invalidity is represented in Framsticks
30        STDOUT_ENDOPER_MARKER = "FileObject.write:"  # we look for this message on Framsticks CLI stdout to detect when Framsticks created a file with the result we expect
31
32        FILE_PREFIX = 'framspy_'
33
34        RANDOMIZE_CMD = "Math.randomize();"
35        SETEXPEDEF_CMD = "Simulator.expdef=\"standard-eval\";"
36        GETSIMPLEST_CMD = "getsimplest"
37        GETSIMPLEST_FILE = "simplest.gen"
38        EVALUATE_CMD = "evaluate eval-allcriteria.sim"
39        EVALUATE_FILE = "genos_eval.json"
40        CROSSOVER_CMD = "crossover"
41        CROSSOVER_FILE = "crossover_child.gen"
42        DISSIMIL_CMD = "dissimil"
43        DISSIMIL_FILE = "dissimilarity_matrix.tsv"  # tab-separated values
44        ISVALID_CMD = "isvalid_many"
45        ISVALID_FILE = "validity.txt"
46        MUTATE_CMD = "mutate_many"
47        MUTATE_FILE = "mutation_results.gen"
48
49        CLI_INPUT_FILE = "genotypes.gen"
50
51        _next_instance_id = count(0)  # "static" counter incremented when a new instance is created. Used to ensure unique filenames for each instance.
52
53
54        def __init__(self, framspath, framsexe, pid=""):
55                self.pid = pid if pid is not None else ""
56                self.id = next(FramsticksCLI._next_instance_id)
57                self.frams_path = framspath
58                self.frams_exe = framsexe if framsexe is not None else 'frams.exe' if os.name == "nt" else 'frams.linux'
59                self.writing_path = None
60                mainpath = os.path.join(self.frams_path, self.frams_exe)
61                exe_call = [mainpath, '-Q', '-s', '-c', '-icliutils.ini']  # -c will be ignored in Windows Framsticks (this option is meaningless because the Windows version does not support color console, so no need to deactivate this feature using -c)
62                exe_call_to_get_version = [mainpath, '-V']
63                exe_call_to_get_path = [mainpath, '-?']
64                try:
65                        print("\n".join(self.__readAllOutput(exe_call_to_get_version)))
66                        help = self.__readAllOutput(exe_call_to_get_path)
67                        for helpline in help:
68                                if 'dDIRECTORY' in helpline:
69                                        self.writing_path = helpline.split("'")[1]
70                except FileNotFoundError:
71                        print("Could not find Framsticks executable ('%s') in the given location ('%s')." % (self.frams_exe, self.frams_path))
72                        sys.exit(1)
73                print("Temporary files with results will be saved in detected writable working directory '%s'" % self.writing_path)
74                self.__spawnFramsticksCLI(exe_call)
75
76
77        def __readAllOutput(self, command):
78                frams_process = Popen(command, stdout=PIPE, stderr=PIPE, stdin=PIPE)
79                return [line.decode('utf-8').rstrip() for line in iter(frams_process.stdout.readlines())]
80
81
82        def __spawnFramsticksCLI(self, args):
83                # the child app (Framsticks CLI) should not buffer outputs and we need to immediately read its stdout, hence we use pexpect/wexpect
84                print('Spawning Framsticks CLI for continuous stdin/stdout communication... ', end='')
85                if os.name == "nt":  # Windows:
86                        import wexpect  # https://pypi.org/project/wexpect/
87                        # https://github.com/raczben/wexpect/tree/master/examples
88                        self.child = wexpect.spawn(' '.join(args))
89                else:
90                        import pexpect  # https://pexpect.readthedocs.io/en/stable/
91                        self.child = pexpect.spawn(' '.join(args))
92                self.child.setecho(False)  # ask the communication to not copy to stdout what we write to stdin
93                print('OK.')
94
95                self.__readFromFramsCLIUntil("UserScripts.autoload")
96                print('Performing a basic test 1/2... ', end='')
97                assert self.getSimplest("1") == "X"
98                print('OK.')
99                print('Performing a basic test 2/2... ', end='')
100                assert self.isValid(["X[0:0],", "X[0:0]", "X[1:0]"]) == [False, True, False]
101                print('OK.')
102                if not self.DETERMINISTIC:
103                        self.sendDirectCommand(self.RANDOMIZE_CMD)
104                self.sendDirectCommand(self.SETEXPEDEF_CMD)
105
106
107        def closeFramsticksCLI(self):
108                # End gracefully by sending end-of-file character: ^Z or ^D
109                # Without the -Q argument ("quiet mode"), Framsticks CLI would print "Shell closed." for goodbye.
110                self.child.sendline(chr(26 if os.name == "nt" else 4))
111
112
113        def __getPrefixedFilename(self, filename: str) -> str:
114                # Returns filename with unique instance id appended so there is no clash when many instances of this class use the same Framsticks CLI executable
115                return FramsticksCLI.FILE_PREFIX + self.pid + str(chr(ord('A') + self.id)) + '_' + filename
116
117
118        def __saveGenotypeToFile(self, genotype, name, mode, saveformat):
119                relname = self.__getPrefixedFilename(name)
120                absname = os.path.join(self.writing_path, relname)
121                if mode == 'd':  # special mode, 'delete'
122                        if os.path.exists(absname):
123                                os.remove(absname)
124                else:
125                        outfile = open(absname, mode)
126                        if saveformat == self.GENO_SAVE_FILE_FORMAT["RAWGENO"]:
127                                outfile.write(genotype)
128                        else:
129                                outfile.write("org:\n")
130                                outfile.write("genotype:~\n")
131                                outfile.write(genotype + "~\n\n")  # TODO proper quoting of special characters in genotype...
132                        outfile.close()
133                return relname, absname
134
135
136        def __readFromFramsCLIUntil(self, until_marker: str) -> str:
137                output = ""
138                while True:
139                        self.child.expect('\r\n' if os.name == "nt" else '\n')
140                        msg = str(self.child.before)
141                        if self.PRINT_FRAMSTICKS_OUTPUT or msg.startswith("[ERROR]") or msg.startswith("[CRITICAL]"):
142                                print(msg)
143                        if until_marker in msg:
144                                break
145                        else:
146                                output += msg + '\n'
147                return output
148
149
150        def __runCommand(self, command, genotypes, result_file_name, saveformat) -> List[str]:
151                filenames_rel = []  # list of file names with input data for the command
152                filenames_abs = []  # same list but absolute paths actually used
153                if saveformat == self.GENO_SAVE_FILE_FORMAT["RAWGENO"]:
154                        for i in range(len(genotypes)):
155                                # plain text format = must have a separate file for each genotype
156                                rel, abs = self.__saveGenotypeToFile(genotypes[i], "genotype" + str(i) + ".gen", "w", self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
157                                filenames_rel.append(rel)
158                                filenames_abs.append(abs)
159                elif saveformat == self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"]:
160                        self.__saveGenotypeToFile(None, self.CLI_INPUT_FILE, 'd', None)  # 'd'elete: ensure there is nothing left from the last run of the program because we "a"ppend to file in the loop below
161                        for i in range(len(genotypes)):
162                                rel, abs = self.__saveGenotypeToFile(genotypes[i], self.CLI_INPUT_FILE, "a", self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
163                        #  since we use the same file in the loop above, add this file only once (i.e., outside of the loop)
164                        filenames_rel.append(rel)
165                        filenames_abs.append(abs)
166
167                result_file_name = self.__getPrefixedFilename(result_file_name)
168                cmd = command + " " + " ".join(filenames_rel) + " " + result_file_name
169                self.child.sendline(cmd)
170                self.__readFromFramsCLIUntil(self.STDOUT_ENDOPER_MARKER)
171                filenames_abs.append(os.path.join(self.writing_path, self.OUTPUT_DIR, result_file_name))
172                return filenames_abs  # last element is a path to the file containing results
173
174
175        def __cleanUpCommandResults(self, filenames):
176                """Deletes files with results just created by the command."""
177                for name in filenames:
178                        os.remove(name)
179
180
181        sendDirectCommand_counter = count(0)  # an internal counter for the sendDirectCommand() method; should be static within that method but python does not allow
182
183
184        def sendDirectCommand(self, command: str) -> str:
185                """Sends any command to Framsticks CLI. Use when you know Framsticks and its scripting language, Framscript.
186
187                Returns:
188                        The output of the command, likely with extra \\n because for each entered command, Framsticks CLI responds with a (muted in Quiet mode) prompt and a \\n.
189                """
190                self.child.sendline(command.strip())
191                next(FramsticksCLI.sendDirectCommand_counter)
192                STDOUT_ENDOPER_MARKER = "uniqe-marker-" + str(FramsticksCLI.sendDirectCommand_counter)
193                self.child.sendline("Simulator.print(\"%s\");" % STDOUT_ENDOPER_MARKER)
194                return self.__readFromFramsCLIUntil(STDOUT_ENDOPER_MARKER)
195
196
197        def getSimplest(self, genetic_format) -> str:
198                files = self.__runCommand(self.GETSIMPLEST_CMD + " " + genetic_format + " ", [], self.GETSIMPLEST_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
199                with open(files[-1]) as f:
200                        genotype = "".join(f.readlines())
201                self.__cleanUpCommandResults(files)
202                return genotype
203
204
205        def evaluate(self, genotype_list: List[str]):
206                """
207                Returns:
208                        List of dictionaries containing the performance of genotypes evaluated with self.EVALUATE_COMMAND.
209                        Note that for whatever reason (e.g. incorrect genotype), the dictionaries you will get may be empty or
210                        partially empty and may not have the fields you expected, so handle such cases properly.
211                """
212                assert isinstance(genotype_list, list)  # because in python str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
213                files = self.__runCommand(self.EVALUATE_CMD, genotype_list, self.EVALUATE_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
214                with open(files[-1]) as f:
215                        data = json.load(f)
216                if len(data) > 0:
217                        self.__cleanUpCommandResults(files)
218                        assert len(genotype_list) == len(data), f"After evaluating {len(genotype_list)} genotype(s) got {len(data)} result(s)."
219                        return data
220                else:
221                        print("Evaluating genotype: no performance data was returned in", self.EVALUATE_FILE)  # we do not delete files here
222                        return None
223
224
225        def mutate(self, genotype_list: List[str]) -> List[str]:
226                """
227                Returns:
228                        The genotype(s) of the mutated source genotype(s). self.GENOTYPE_INVALID for genotypes whose mutation failed (for example because the source genotype was invalid).
229                """
230                assert isinstance(genotype_list, list)  # because in python str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
231                files = self.__runCommand(self.MUTATE_CMD, genotype_list, self.MUTATE_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
232                genos = framsreader.load(files[-1], "gen file")
233                self.__cleanUpCommandResults(files)
234                return [g["genotype"] for g in genos]
235
236
237        def crossOver(self, genotype_parent1: str, genotype_parent2: str) -> str:
238                """
239                Returns:
240                        The genotype of the offspring. self.GENOTYPE_INVALID if the crossing over failed.
241                """
242                files = self.__runCommand(self.CROSSOVER_CMD, [genotype_parent1, genotype_parent2], self.CROSSOVER_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
243                with open(files[-1]) as f:
244                        child_genotype = "".join(f.readlines())
245                self.__cleanUpCommandResults(files)
246                return child_genotype
247
248
249        def dissimilarity(self, genotype_list: List[str]) -> np.ndarray:
250                """
251                Returns:
252                        A square array with dissimilarities of each pair of genotypes.
253                """
254                assert isinstance(genotype_list, list)  # because in python str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
255                files = self.__runCommand(self.DISSIMIL_CMD, genotype_list, self.DISSIMIL_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
256                with open(files[-1]) as f:
257                        dissimilarity_matrix = np.genfromtxt(f, dtype=np.float64, comments='#', encoding=None, delimiter='\t')
258                # We would like to skip column #1 while reading and read everything else, but... https://stackoverflow.com/questions/36091686/exclude-columns-from-genfromtxt-with-numpy
259                # This would be too complicated, so strings (names) in column #1 become NaN as floats (unless they accidentally are valid numbers) - not great, not terrible
260                square_matrix = dissimilarity_matrix[:, 2:]  # get rid of two first columns (fitness and name)
261                EXPECTED_SHAPE = (len(genotype_list), len(genotype_list))
262                # print(square_matrix)
263                assert square_matrix.shape == EXPECTED_SHAPE, f"Not a correct dissimilarity matrix, expected {EXPECTED_SHAPE}"
264                for i in range(len(square_matrix)):
265                        assert square_matrix[i][i] == 0, "Not a correct dissimilarity matrix, diagonal expected to be 0"
266                assert (square_matrix == square_matrix.T).all(), "Probably not a correct dissimilarity matrix, expecting symmetry, verify this"  # could introduce tolerance in comparison (e.g. class field DISSIMIL_DIFF_TOLERANCE=10^-5) so that miniscule differences do not fail here
267                self.__cleanUpCommandResults(files)
268                return square_matrix
269
270
271        def isValid(self, genotype_list: List[str]) -> List[bool]:
272                assert isinstance(genotype_list, list)  # because in python str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
273                files = self.__runCommand(self.ISVALID_CMD, genotype_list, self.ISVALID_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
274                valid = []
275                with open(files[-1]) as f:
276                        for line in f:
277                                valid.append(line.strip() == "1")
278                self.__cleanUpCommandResults(files)
279                assert len(genotype_list) == len(valid), "Submitted %d genotypes, received %d validity values" % (len(genotype_list), len(valid))
280                return valid
281
282
283def parseArguments():
284        parser = argparse.ArgumentParser(description='Run this program with "python -u %s" if you want to disable buffering of its output.' % sys.argv[0])
285        parser.add_argument('-path', type=ensureDir, required=True, help='Path to Framsticks CLI without trailing slash.')
286        parser.add_argument('-exe', required=False, help='Executable name. If not given, "frams.exe" or "frams.linux" is assumed depending on the platform.')
287        parser.add_argument('-genformat', required=False, help='Genetic format for the demo run, for example 4, 9, or S. If not given, f1 is assumed.')
288        parser.add_argument('-pid', required=False, help='Unique ID of this process. Only relevant when you run multiple instances of this class simultaneously but as separate processes, and they use the same Framsticks CLI executable. This value will be appended to the names of created files to avoid conflicts.')
289        return parser.parse_args()
290
291
292def ensureDir(string):
293        if os.path.isdir(string):
294                return string
295        else:
296                raise NotADirectoryError(string)
297
298
299if __name__ == "__main__":
300        # A demo run.
301
302        # TODO ideas:
303        # - check_validity with three levels (invalid, corrected, valid)
304        # - "vectorize" crossover so that many genotypes is handled in one call. Even better, use .so/.dll direct communication to CLI
305        # - use threads for non-blocking reading from frams' stdout and thus not relying on specific strings printed by frams
306        # - a pool of binaries running simultaneously, balance load - in particular evaluation
307        # - if we read genotypes in "org:" format anywhere: import https://pypi.org/project/framsreader and use it if successful,
308        #    if not then print a message "framsreader not available, using simple internal method to save a genotype" and proceed as it is now.
309        #    We should use the proper writer to handle all special cases like quoting special characters etc.
310
311        parsed_args = parseArguments()
312        framsCLI = FramsticksCLI(parsed_args.path, parsed_args.exe, parsed_args.pid)
313
314        print("Sending a direct command to Framsticks CLI that calculates \"4\"+2 yields", repr(framsCLI.sendDirectCommand("Simulator.print(\"4\"+2);")))
315
316        simplest = framsCLI.getSimplest('1' if parsed_args.genformat is None else parsed_args.genformat)
317        print("\tSimplest genotype:", simplest)
318        parent1 = framsCLI.mutate([simplest])[0]
319        parent2 = parent1
320        MUTATE_COUNT = 10
321        for x in range(MUTATE_COUNT):  # example of a chain of 10 mutations
322                parent2 = framsCLI.mutate([parent2])[0]
323        print("\tParent1 (mutated simplest):", parent1)
324        print("\tParent2 (Parent1 mutated %d times):" % MUTATE_COUNT, parent2)
325        offspring = framsCLI.crossOver(parent1, parent2)
326        print("\tCrossover (Offspring):", offspring)
327        print('\tDissimilarity of Parent1 and Offspring:', framsCLI.dissimilarity([parent1, offspring])[0, 1])
328        print('\tPerformance of Offspring:', framsCLI.evaluate([offspring]))
329        print('\tValidity of Parent1, Parent 2, and Offspring:', framsCLI.isValid([parent1, parent2, offspring]))
330
331        framsCLI.closeFramsticksCLI()
Note: See TracBrowser for help on using the repository browser.