source: framspy/FramsticksCLI.py @ 1059

Last change on this file since 1059 was 1059, checked in by Maciej Komosinski, 3 years ago

Warn when the number of performance evaluations is different from the number of genotypes evaluated

File size: 16.1 KB
Line 
1from subprocess import Popen, PIPE, check_output
2from enum import Enum
3from typing import List  # to be able to specify a type hint of list(something)
4from itertools import count  # for tracking multiple instances
5import json
6import sys, os
7import argparse
8import numpy as np
9
10
11class FramsticksCLI:
12        """Runs Framsticks CLI (command-line) executable and communicates with it using standard input and output.
13        You can perform basic operations like mutation, crossover, and evaluation of genotypes.
14        This way you can perform evolution controlled by python as well as access and manipulate genotypes.
15        You can even design and use in evolution your own genetic representation implemented entirely in python.
16
17        You need to provide one or two parameters when you run this class: the path to Framsticks CLI
18        and the name of the Framsticks CLI executable (if it is non-standard). See::
19                FramsticksCLI.py -h"""
20
21        PRINT_FRAMSTICKS_OUTPUT: bool = False  # set to True for debugging
22        DETERMINISTIC: bool = False  # set to True to have the same results on each run
23
24        GENO_SAVE_FILE_FORMAT = Enum('GENO_SAVE_FILE_FORMAT', 'NATIVEFRAMS RAWGENO')  # how to save genotypes
25        OUTPUT_DIR = "scripts_output"
26        STDOUT_ENDOPER_MARKER = "FileObject.write"  # we look for this message on Framsticks CLI stdout to detect when Framsticks created a file with the result we expect
27
28        FILE_PREFIX = 'framspy_'
29
30        RANDOMIZE_CMD = "Math.randomize();"
31        SETEXPEDEF_CMD = "Simulator.expdef=\"standard-eval\";"
32        GETSIMPLEST_CMD = "getsimplest"
33        GETSIMPLEST_FILE = "simplest.gen"
34        EVALUATE_CMD = "evaluate eval-allcriteria.sim"
35        EVALUATE_FILE = "genos_eval.json"
36        CROSSOVER_CMD = "crossover"
37        CROSSOVER_FILE = "child.gen"
38        DISSIMIL_CMD = "dissimil"
39        DISSIMIL_FILE = "dissimilarity_matrix.tsv"  # tab-separated values
40        ISVALID_CMD = "arevalid"
41        ISVALID_FILE = "validity.txt"
42        MUTATE_CMD = "mutate"
43        MUTATE_FILE = "mutant.gen"
44
45        CLI_INPUT_FILE = "genotypes.gen"
46
47        _next_instance_id = count(0)  # "static" counter incremented when a new instance is created. Used to ensure unique filenames for each instance.
48
49
50        def __init__(self, framspath, framsexe, pid=""):
51                self.pid = pid if pid is not None else ""
52                self.id = next(FramsticksCLI._next_instance_id)
53                self.frams_path = framspath
54                self.frams_exe = framsexe if framsexe is not None else 'frams.exe' if os.name == "nt" else 'frams.linux'
55                self.writing_path = None
56                mainpath = os.path.join(self.frams_path, self.frams_exe)
57                exe_call = [mainpath, '-Q', '-s', '-c', '-icliutils.ini']  # -c will be ignored in Windows Framsticks (this option is meaningless because the Windows version does not support color console, so no need to deactivate this feature using -c)
58                exe_call_to_get_version = [mainpath, '-V']
59                exe_call_to_get_path = [mainpath, '-?']
60                try:
61                        print("\n".join(self.__readAllOutput(exe_call_to_get_version)))
62                        help = self.__readAllOutput(exe_call_to_get_path)
63                        for helpline in help:
64                                if 'dDIRECTORY' in helpline:
65                                        self.writing_path = helpline.split("'")[1]
66                except FileNotFoundError:
67                        print("Could not find Framsticks executable ('%s') in the given location ('%s')." % (self.frams_exe, self.frams_path))
68                        sys.exit(1)
69                print("Temporary files with results will be saved in detected writable working directory '%s'" % self.writing_path)
70                self.__spawnFramsticksCLI(exe_call)
71
72
73        def __readAllOutput(self, command):
74                frams_process = Popen(command, stdout=PIPE, stderr=PIPE, stdin=PIPE)
75                return [line.decode('utf-8').rstrip() for line in iter(frams_process.stdout.readlines())]
76
77
78        def __spawnFramsticksCLI(self, args):
79                # the child app (Framsticks CLI) should not buffer outputs and we need to immediately read its stdout, hence we use pexpect/wexpect
80                print('Spawning Framsticks CLI for continuous stdin/stdout communication... ', end='')
81                if os.name == "nt":  # Windows:
82                        import wexpect  # https://pypi.org/project/wexpect/
83                        # https://github.com/raczben/wexpect/tree/master/examples
84                        self.child = wexpect.spawn(' '.join(args))
85                else:
86                        import pexpect  # https://pexpect.readthedocs.io/en/stable/
87                        self.child = pexpect.spawn(' '.join(args))
88                self.child.setecho(False)  # ask the communication to not copy to stdout what we write to stdin
89                print('OK.')
90
91                self.__readFromFramsCLIUntil("UserScripts.autoload")
92                print('Performing a basic test 1/2... ', end='')
93                assert self.getSimplest("1") == "X"
94                print('OK.')
95                print('Performing a basic test 2/2... ', end='')
96                assert self.isValid(["X[0:0],", "X[0:0]", "X[1:0]"]) == [False, True, False]
97                print('OK.')
98                if not self.DETERMINISTIC:
99                        self.sendDirectCommand(self.RANDOMIZE_CMD)
100                self.sendDirectCommand(self.SETEXPEDEF_CMD)
101
102
103        def closeFramsticksCLI(self):
104                # End gracefully by sending end-of-file character: ^Z or ^D
105                # Without the -Q argument ("quiet mode"), Framsticks CLI would print "Shell closed." for goodbye.
106                self.child.sendline(chr(26 if os.name == "nt" else 4))
107
108
109        def __getPrefixedFilename(self, filename: str) -> str:
110                # Returns filename with unique instance id appended so there is no clash when many instances of this class use the same Framsticks CLI executable
111                return FramsticksCLI.FILE_PREFIX + self.pid + str(chr(ord('A') + self.id)) + '_' + filename
112
113
114        def __saveGenotypeToFile(self, genotype, name, mode, saveformat):
115                relname = self.__getPrefixedFilename(name)
116                absname = os.path.join(self.writing_path, relname)
117                if mode == 'd':  # special mode, 'delete'
118                        if os.path.exists(absname):
119                                os.remove(absname)
120                else:
121                        outfile = open(absname, mode)
122                        if saveformat == self.GENO_SAVE_FILE_FORMAT["RAWGENO"]:
123                                outfile.write(genotype)
124                        else:
125                                outfile.write("org:\n")
126                                outfile.write("genotype:~\n")
127                                outfile.write(genotype + "~\n\n")  # TODO proper quoting of special characters in genotype...
128                        outfile.close()
129                return relname, absname
130
131
132        def __readFromFramsCLIUntil(self, until_marker: str) -> str:
133                output = ""
134                while True:
135                        self.child.expect('\r\n' if os.name == "nt" else '\n')
136                        msg = str(self.child.before)
137                        if self.PRINT_FRAMSTICKS_OUTPUT or msg.startswith("[ERROR]") or msg.startswith("[CRITICAL]"):
138                                print(msg)
139                        if until_marker in msg:
140                                break
141                        else:
142                                output += msg + '\n'
143                return output
144
145
146        def __runCommand(self, command, genotypes, result_file_name, saveformat) -> List[str]:
147                filenames_rel = []  # list of file names with input data for the command
148                filenames_abs = []  # same list but absolute paths actually used
149                if saveformat == self.GENO_SAVE_FILE_FORMAT["RAWGENO"]:
150                        for i in range(len(genotypes)):
151                                # plain text format = must have a separate file for each genotype
152                                rel, abs = self.__saveGenotypeToFile(genotypes[i], "genotype" + str(i) + ".gen", "w", self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
153                                filenames_rel.append(rel)
154                                filenames_abs.append(abs)
155                elif saveformat == self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"]:
156                        self.__saveGenotypeToFile(None, self.CLI_INPUT_FILE, 'd', None)  # 'd'elete: ensure there is nothing left from the last run of the program because we "a"ppend to file in the loop below
157                        for i in range(len(genotypes)):
158                                rel, abs = self.__saveGenotypeToFile(genotypes[i], self.CLI_INPUT_FILE, "a", self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
159                        #  since we use the same file in the loop above, add this file only once (i.e., outside of the loop)
160                        filenames_rel.append(rel)
161                        filenames_abs.append(abs)
162
163                result_file_name = self.__getPrefixedFilename(result_file_name)
164                cmd = command + " " + " ".join(filenames_rel) + " " + result_file_name
165                self.child.sendline(cmd)
166                self.__readFromFramsCLIUntil(self.STDOUT_ENDOPER_MARKER)
167                filenames_abs.append(os.path.join(self.writing_path, self.OUTPUT_DIR, result_file_name))
168                return filenames_abs  # last element is a path to the file containing results
169
170
171        def __cleanUpCommandResults(self, filenames):
172                """Deletes files with results just created by the command."""
173                for name in filenames:
174                        os.remove(name)
175
176
177        sendDirectCommand_counter = count(0)  # an internal counter for the sendDirectCommand() method; should be static within that method but python does not allow
178
179
180        def sendDirectCommand(self, command: str) -> str:
181                """Sends any command to Framsticks CLI. Use when you know Framsticks and its scripting language, Framscript.
182
183                Returns:
184                        The output of the command, likely with extra \\n because for each entered command, Framsticks CLI responds with a (muted in Quiet mode) prompt and a \\n.
185                """
186                self.child.sendline(command.strip())
187                next(FramsticksCLI.sendDirectCommand_counter)
188                STDOUT_ENDOPER_MARKER = "uniqe-marker-" + str(FramsticksCLI.sendDirectCommand_counter)
189                self.child.sendline("Simulator.print(\"%s\");" % STDOUT_ENDOPER_MARKER)
190                return self.__readFromFramsCLIUntil(STDOUT_ENDOPER_MARKER)
191
192
193        def getSimplest(self, genetic_format) -> str:
194                files = self.__runCommand(self.GETSIMPLEST_CMD + " " + genetic_format + " ", [], self.GETSIMPLEST_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
195                with open(files[-1]) as f:
196                        genotype = "".join(f.readlines())
197                self.__cleanUpCommandResults(files)
198                return genotype
199
200
201        def evaluate(self, genotype_list: List[str]):
202                """
203                Returns:
204                        List of dictionaries containing the performance of genotypes evaluated with self.EVALUATE_COMMAND.
205                        Note that for whatever reason (e.g. incorrect genotype), the dictionaries you will get may be empty or
206                        partially empty and may not have the fields you expected, so handle such cases properly.
207                """
208                assert isinstance(genotype_list, list)  # because in python str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
209                files = self.__runCommand(self.EVALUATE_CMD, genotype_list, self.EVALUATE_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
210                with open(files[-1]) as f:
211                        data = json.load(f)
212                if len(data) > 0:
213                        self.__cleanUpCommandResults(files)
214                        assert len(genotype_list) == len(data), f"After evaluating {len(genotype_list)} genotype(s) got {len(data)} result(s)."
215                        return data
216                else:
217                        print("Evaluating genotype: no performance data was returned in", self.EVALUATE_FILE)  # we do not delete files here
218                        return None
219
220
221        def mutate(self, genotype: str) -> str:
222                """
223                Returns:
224                        The genotype of the mutated individual. Empty string if the mutation failed.
225                """
226                files = self.__runCommand(self.MUTATE_CMD, [genotype], self.MUTATE_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
227                with open(files[-1]) as f:
228                        newgenotype = "".join(f.readlines())
229                self.__cleanUpCommandResults(files)
230                return newgenotype
231
232
233        def crossOver(self, genotype_parent1: str, genotype_parent2: str) -> str:
234                """
235                Returns:
236                        The genotype of the offspring. Empty string if the crossing over failed.
237                """
238                files = self.__runCommand(self.CROSSOVER_CMD, [genotype_parent1, genotype_parent2], self.CROSSOVER_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
239                with open(files[-1]) as f:
240                        child_genotype = "".join(f.readlines())
241                self.__cleanUpCommandResults(files)
242                return child_genotype
243
244
245        def dissimilarity(self, genotype_list: List[str]) -> np.ndarray:
246                """
247                Returns:
248                        A square array with dissimilarities of each pair of genotypes.
249                """
250                assert isinstance(genotype_list, list)  # because in python str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
251                files = self.__runCommand(self.DISSIMIL_CMD, genotype_list, self.DISSIMIL_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
252                with open(files[-1]) as f:
253                        dissimilarity_matrix = np.genfromtxt(f, dtype=np.float64, comments='#', encoding=None, delimiter='\t')
254                # We would like to skip column #1 while reading and read everything else, but... https://stackoverflow.com/questions/36091686/exclude-columns-from-genfromtxt-with-numpy
255                # This would be too complicated, so strings (names) in column #1 become NaN as floats (unless they accidentally are valid numbers) - not great, not terrible
256                square_matrix = dissimilarity_matrix[:, 2:]  # get rid of two first columns (fitness and name)
257                EXPECTED_SHAPE = (len(genotype_list), len(genotype_list))
258                # print(square_matrix)
259                assert square_matrix.shape == EXPECTED_SHAPE, f"Not a correct dissimilarity matrix, expected {EXPECTED_SHAPE}"
260                for i in range(len(square_matrix)):
261                        assert square_matrix[i][i] == 0, "Not a correct dissimilarity matrix, diagonal expected to be 0"
262                assert (square_matrix == square_matrix.T).all(), "Probably not a correct dissimilarity matrix, expecting symmetry, verify this"  # could introduce tolerance in comparison (e.g. class field DISSIMIL_DIFF_TOLERANCE=10^-5) so that miniscule differences do not fail here
263                self.__cleanUpCommandResults(files)
264                return square_matrix
265
266
267        def isValid(self, genotype_list: List[str]) -> List[bool]:
268                assert isinstance(genotype_list, list)  # because in python str has similar capabilities as list and here it would pretend to work too, so to avoid any ambiguity
269                files = self.__runCommand(self.ISVALID_CMD, genotype_list, self.ISVALID_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
270                valid = []
271                with open(files[-1]) as f:
272                        for line in f:
273                                valid.append(line.strip() == "1")
274                self.__cleanUpCommandResults(files)
275                assert len(genotype_list) == len(valid), "Submitted %d genotypes, received %d validity values" % (len(genotype_list), len(valid))
276                return valid
277
278
279def parseArguments():
280        parser = argparse.ArgumentParser(description='Run this program with "python -u %s" if you want to disable buffering of its output.' % sys.argv[0])
281        parser.add_argument('-path', type=ensureDir, required=True, help='Path to Framsticks CLI without trailing slash.')
282        parser.add_argument('-exe', required=False, help='Executable name. If not given, "frams.exe" or "frams.linux" is assumed depending on the platform.')
283        parser.add_argument('-genformat', required=False, help='Genetic format for the demo run, for example 4, 9, or S. If not given, f1 is assumed.')
284        parser.add_argument('-pid', required=False, help='Unique ID of this process. Only relevant when you run multiple instances of this class simultaneously but as separate processes, and they use the same Framsticks CLI executable. This value will be appended to the names of created files to avoid conflicts.')
285        return parser.parse_args()
286
287
288def ensureDir(string):
289        if os.path.isdir(string):
290                return string
291        else:
292                raise NotADirectoryError(string)
293
294
295if __name__ == "__main__":
296        # A demo run.
297
298        # TODO ideas:
299        # - check_validity with three levels (invalid, corrected, valid)
300        # - "vectorize" some operations (isvalid, evaluate) so that a number of genotypes is handled in one call
301        # - use threads for non-blocking reading from frams' stdout and thus not relying on specific strings printed by frams
302        # - a pool of binaries run at the same time, balance load - in particular evaluation
303        # - if we read genotypes in "org:" format anywhere: import https://pypi.org/project/framsreader/0.1.2/ and use it if successful,
304        #    if not then print a message "framsreader not available, using simple internal method to save a genotype" and proceed as it is now.
305        #    So far we don't read, but we should use the proper writer to handle all special cases like quoting etc.
306
307        parsed_args = parseArguments()
308        framsCLI = FramsticksCLI(parsed_args.path, parsed_args.exe, parsed_args.pid)
309
310        print("Sending a direct command to Framsticks CLI that calculates \"4\"+2 yields", repr(framsCLI.sendDirectCommand("Simulator.print(\"4\"+2);")))
311
312        simplest = framsCLI.getSimplest('1' if parsed_args.genformat is None else parsed_args.genformat)
313        print("\tSimplest genotype:", simplest)
314        parent1 = framsCLI.mutate(simplest)
315        parent2 = parent1
316        MUTATE_COUNT = 10
317        for x in range(MUTATE_COUNT):  # example of a chain of 20 mutations
318                parent2 = framsCLI.mutate(parent2)
319        print("\tParent1 (mutated simplest):", parent1)
320        print("\tParent2 (Parent1 mutated %d times):" % MUTATE_COUNT, parent2)
321        offspring = framsCLI.crossOver(parent1, parent2)
322        print("\tCrossover (Offspring):", offspring)
323        print('\tDissimilarity of Parent1 and Offspring:', framsCLI.dissimilarity([parent1, offspring])[0, 1])
324        print('\tPerformance of Offspring:', framsCLI.evaluate([offspring]))
325        print('\tValidity of Parent1, Parent 2, and Offspring:', framsCLI.isValid([parent1, parent2, offspring]))
326
327        framsCLI.closeFramsticksCLI()
Note: See TracBrowser for help on using the repository browser.