source: framspy/FramsticksCLI.py @ 962

Last change on this file since 962 was 962, checked in by Maciej Komosinski, 4 years ago

Added an optional parameter: unique process ID. Only relevant when you run multiple instances of this class simultaneously but as separate processes, and they use the same Framsticks CLI executable.

File size: 13.5 KB
Line 
1from subprocess import Popen, PIPE, check_output
2from enum import Enum
3from typing import List
4from itertools import count  # for tracking multiple instances
5import json
6import sys, os
7import argparse
8import numpy as np
9
10
11class FramsticksCLI:
12        """Runs Framsticks CLI (command-line) executable and communicates with it using standard input and output.
13        You can perform basic operations like mutation, crossover, and evaluation of genotypes.
14        This way you can perform evolution controlled by python as well as access and manipulate genotypes.
15        You can even design and use in evolution your own genetic representation implemented entirely in python.
16
17        You need to provide one or two parameters when you run this class: the path to Framsticks CLI
18        and the name of the Framsticks CLI executable (if it is non-standard). See::
19                FramsticksCLI.py -h"""
20
21        PRINT_FRAMSTICKS_OUTPUT: bool = False  # set to True for debugging
22        DETERMINISTIC: bool = False  # set to True to have the same results on each run
23
24        GENO_SAVE_FILE_FORMAT = Enum('GENO_SAVE_FILE_FORMAT', 'NATIVEFRAMS RAWGENO')  # how to save genotypes
25        OUTPUT_DIR = "scripts_output"
26        STDOUT_ENDOPER_MARKER = "FileObject.write"  # we look for this message on Framsticks CLI stdout to detect when Framsticks created a file with the result we expect
27
28        FILE_PREFIX = 'framspy_'
29
30        RANDOMIZE_CMD = "rnd" + "\n"
31        SETEXPEDEF_CMD = "expdef standard-eval" + "\n"
32        GETSIMPLEST_CMD = "getsimplest"
33        GETSIMPLEST_FILE = "simplest.gen"
34        EVALUATE_CMD = "evaluate eval-allcriteria.sim"
35        EVALUATE_FILE = "genos_eval.json"
36        CROSSOVER_CMD = "crossover"
37        CROSSOVER_FILE = "child.gen"
38        DISSIMIL_CMD = "dissimil"
39        DISSIMIL_FILE = "dissimilarity_matrix.gen"
40        ISVALID_CMD = "isvalid"
41        ISVALID_FILE = "validity.gen"
42        MUTATE_CMD = "mutate"
43        MUTATE_FILE = "mutant.gen"
44
45        CLI_INPUT_FILE = "genotypes.gen"
46
47        _next_instance_id = count(0)  # "static" counter incremented when a new instance is created. Used for unique filenames
48
49
50        def __init__(self, framspath, framsexe, pid=""):
51                self.pid = pid if pid is not None else ""
52                self.id = next(FramsticksCLI._next_instance_id)
53                self.frams_path = framspath
54                self.frams_exe = framsexe if framsexe is not None else 'frams.exe' if os.name == "nt" else 'frams.linux'
55                self.writing_path = None
56                mainpath = os.path.join(self.frams_path, self.frams_exe)
57                exe_call = [mainpath, '-Q', '-s', '-c', '-icliutils.ini']  # -c will be ignored in Windows Framsticks (this option is meaningless because the Windows version does not support color console, so no need to deactivate this feature using -c)
58                exe_call_to_get_version = [mainpath, '-V']
59                exe_call_to_get_path = [mainpath, '-?']
60                try:
61                        print("\n".join(self.__readAllOutput(exe_call_to_get_version)))
62                        help = self.__readAllOutput(exe_call_to_get_path)
63                        for helpline in help:
64                                if 'dDIRECTORY' in helpline:
65                                        self.writing_path = helpline.split("'")[1]
66                except FileNotFoundError:
67                        print("Could not find Framsticks executable ('%s') in the given location ('%s')." % (self.frams_exe, self.frams_path))
68                        sys.exit(1)
69                print("Temporary files with results will be saved in detected writable working directory '%s'" % self.writing_path)
70                self.__spawnFramsticksCLI(exe_call)
71
72
73        def __readAllOutput(self, command):
74                frams_process = Popen(command, stdout=PIPE, stderr=PIPE, stdin=PIPE)
75                return [line.decode('utf-8').rstrip() for line in iter(frams_process.stdout.readlines())]
76
77
78        def __spawnFramsticksCLI(self, args):
79                # the child app (Framsticks CLI) should not buffer outputs and we need to immediately read its stdout, hence we use pexpect/wexpect
80                print('Spawning Framsticks CLI for continuous stdin/stdout communication... ', end='')
81                if os.name == "nt":  # Windows:
82                        import wexpect  # https://pypi.org/project/wexpect/
83                        # https://github.com/raczben/wexpect/tree/master/examples
84                        self.child = wexpect.spawn(' '.join(args))
85                else:
86                        import pexpect  # https://pexpect.readthedocs.io/en/stable/
87                        self.child = pexpect.spawn(' '.join(args))
88                        self.child.setecho(False)  # linux only
89                print('OK.')
90
91                self.__readFromFramsCLIUntil("UserScripts.autoload")
92                print('Performing a basic test 1/3... ', end='')
93                assert self.getSimplest("1") == "X"
94                print('OK.')
95                print('Performing a basic test 2/3... ', end='')
96                assert self.isValid("X[0:0]") is True
97                print('OK.')
98                print('Performing a basic test 3/3... ', end='')
99                assert self.isValid("X[0:0],") is False
100                print('OK.')
101                if not self.DETERMINISTIC:
102                        self.child.sendline(self.RANDOMIZE_CMD)
103                self.child.sendline(self.SETEXPEDEF_CMD)
104
105
106        def closeFramsticksCLI(self):
107                # End gracefully by sending end-of-file character: ^Z or ^D
108                # Without -Q argument ("quiet mode"), Framsticks CLI would print "Shell closed." for goodbye.
109                self.child.sendline(chr(26 if os.name == "nt" else 4))
110
111
112        def __getPrefixedFilename(self, filename: str) -> str:
113                # Returns filename with unique instance id appended so there is no clash when many instances of this class use the same Framsticks CLI executable
114                return FramsticksCLI.FILE_PREFIX + self.pid + str(chr(ord('A') + self.id)) + '_' + filename
115
116
117        def __saveGenotypeToFile(self, genotype, name, mode, saveformat):
118                relname = self.__getPrefixedFilename(name)
119                absname = os.path.join(self.writing_path, relname)
120                if mode == 'd':  # special mode, 'delete'
121                        if os.path.exists(absname):
122                                os.remove(absname)
123                else:
124                        outfile = open(absname, mode)
125                        if saveformat == self.GENO_SAVE_FILE_FORMAT["RAWGENO"]:
126                                outfile.write(genotype)
127                        else:
128                                outfile.write("org:\n")
129                                outfile.write("genotype:~\n")
130                                outfile.write(genotype + "~\n\n")  # TODO proper quoting of special characters in genotype...
131                        outfile.close()
132                return relname, absname
133
134
135        def __readFromFramsCLIUntil(self, until_marker: str):
136                while True:
137                        self.child.expect('\n')
138                        msg = str(self.child.before)
139                        if self.PRINT_FRAMSTICKS_OUTPUT or msg.startswith("[ERROR]"):
140                                print(msg)
141                        if until_marker in msg:
142                                break
143
144
145        def __runCommand(self, command, genotypes, result_file_name, saveformat) -> List[str]:
146                filenames_rel = []  # list of file names with input data for the command
147                filenames_abs = []  # same list but absolute paths actually used
148                if saveformat == self.GENO_SAVE_FILE_FORMAT["RAWGENO"]:
149                        for i in range(len(genotypes)):
150                                # plain text format = must have a separate file for each genotype
151                                rel, abs = self.__saveGenotypeToFile(genotypes[i], "genotype" + str(i) + ".gen", "w", self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
152                                filenames_rel.append(rel)
153                                filenames_abs.append(abs)
154                elif saveformat == self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"]:
155                        self.__saveGenotypeToFile(None, self.CLI_INPUT_FILE, 'd', None)  # 'd'elete: ensure there is nothing left from the last run of the program because we "a"ppend to file in the loop below
156                        for i in range(len(genotypes)):
157                                rel, abs = self.__saveGenotypeToFile(genotypes[i], self.CLI_INPUT_FILE, "a", self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
158                        #  since we use the same file in the loop above, add this file only once (i.e., outside of the loop)
159                        filenames_rel.append(rel)
160                        filenames_abs.append(abs)
161
162                result_file_name = self.__getPrefixedFilename(result_file_name)
163                cmd = command + " " + " ".join(filenames_rel) + " " + result_file_name
164                self.child.sendline(cmd + '\n')
165                self.__readFromFramsCLIUntil(self.STDOUT_ENDOPER_MARKER)
166                filenames_abs.append(os.path.join(self.writing_path, self.OUTPUT_DIR, result_file_name))
167                return filenames_abs  # last element is a path to the file containing results
168
169
170        def __cleanUpCommandResults(self, filenames):
171                """Deletes files with results just created by the command."""
172                for name in filenames:
173                        os.remove(name)
174
175
176        def getSimplest(self, genetic_format) -> str:
177                assert len(genetic_format) == 1, "Genetic format should be a single character"
178                files = self.__runCommand(self.GETSIMPLEST_CMD + " " + genetic_format + " ", [], self.GETSIMPLEST_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
179                with open(files[-1]) as f:
180                        genotype = "".join(f.readlines())
181                self.__cleanUpCommandResults(files)
182                return genotype
183
184
185        def evaluate(self, genotype: str):
186                """
187                Returns:
188                        Dictionary -- genotype evaluated with self.EVALUATE_COMMAND. Note that for whatever reason (e.g. incorrect genotype),
189                        the dictionary you will get may be empty or partially empty and may not have the fields you expected, so handle such cases properly.
190                """
191                files = self.__runCommand(self.EVALUATE_CMD, [genotype], self.EVALUATE_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
192                with open(files[-1]) as f:
193                        data = json.load(f)
194                if len(data) > 0:
195                        self.__cleanUpCommandResults(files)
196                        return data
197                else:
198                        print("Evaluating genotype: no performance data was returned in", self.EVALUATE_FILE)  # we do not delete files here
199                        return None
200
201
202        def mutate(self, genotype: str) -> str:
203                files = self.__runCommand(self.MUTATE_CMD, [genotype], self.MUTATE_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
204                with open(files[-1]) as f:
205                        newgenotype = "".join(f.readlines())
206                self.__cleanUpCommandResults(files)
207                return newgenotype
208
209
210        def crossOver(self, genotype1: str, genotype2: str) -> str:
211                files = self.__runCommand(self.CROSSOVER_CMD, [genotype1, genotype2], self.CROSSOVER_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
212                with open(files[-1]) as f:
213                        child_genotype = "".join(f.readlines())
214                self.__cleanUpCommandResults(files)
215                return child_genotype
216
217
218        def dissimilarity(self, genotype1: str, genotype2: str) -> float:
219                files = self.__runCommand(self.DISSIMIL_CMD, [genotype1, genotype2], self.DISSIMIL_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
220                with open(files[-1]) as f:
221                        dissimilarity_matrix = np.genfromtxt(f, dtype=np.float64, comments='#', encoding=None, delimiter='\t')
222                # We would like to skip column #1 while reading and read everything else, but... https://stackoverflow.com/questions/36091686/exclude-columns-from-genfromtxt-with-numpy
223                # This would be too complicated, so strings (names) in column #1 become NaN as floats (unless they accidentally are valid numbers) - not great, not terrible
224                EXPECTED_SHAPE = (2, 4)
225                assert dissimilarity_matrix.shape == EXPECTED_SHAPE, f"Not a correct dissimilarity matrix, expected {EXPECTED_SHAPE} "
226                for i in range(len(dissimilarity_matrix)):
227                        assert dissimilarity_matrix[i][i + 2] == 0, "Not a correct dissimilarity matrix, diagonal expected to be 0"
228                assert dissimilarity_matrix[0][3] == dissimilarity_matrix[1][2], "Probably not a correct dissimilarity matrix, expecting symmetry, verify this"
229                self.__cleanUpCommandResults(files)
230                return dissimilarity_matrix[0][3]
231
232
233        def isValid(self, genotype: str) -> bool:
234                files = self.__runCommand(self.ISVALID_CMD, [genotype], self.ISVALID_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
235                with open(files[-1]) as f:
236                        valid = f.readline() == "1"
237                self.__cleanUpCommandResults(files)
238                return valid
239
240
241def parseArguments():
242        parser = argparse.ArgumentParser(description='Run this program with "python -u %s" if you want to disable buffering of its output.' % sys.argv[0])
243        parser.add_argument('-path', type=ensureDir, required=True, help='Path to Framsticks CLI without trailing slash.')
244        parser.add_argument('-exe', required=False, help='Executable name. If not given, "frams.exe" or "frams.linux" is assumed.')
245        parser.add_argument('-genformat', required=False, help='Genetic format for the demo run, for example 4, 9, or S. If not given, f1 is assumed.')
246        parser.add_argument('-pid', required=False, help='Unique ID of this process. Only relevant when you run multiple instances of this class simultaneously but as separate processes, and they use the same Framsticks CLI executable. This value will be appended to the names of created files to avoid conflicts.')
247        return parser.parse_args()
248
249
250def ensureDir(string):
251        if os.path.isdir(string):
252                return string
253        else:
254                raise NotADirectoryError(string)
255
256
257if __name__ == "__main__":
258        # A demo run.
259
260        # TODO ideas:
261        # - check_validity with three levels (invalid, corrected, valid)
262        # - "vectorize" some operations (isvalid, evaluate) so that a number of genotypes is handled in one call
263        # - use threads for non-blocking reading from frams' stdout and thus not relying on specific strings printed by frams
264        # - a pool of binaries run at the same time, balance load - in particular evaluation
265        # - if we read genotypes in "org:" format anywhere: import https://pypi.org/project/framsreader/0.1.2/ and use it if successful,
266        #    if not then print a message "framsreader not available, using simple internal method to save a genotype" and proceed as it is now.
267        #    So far we don't read, but we should use the proper writer to handle all special cases like quoting etc.
268
269        parsed_args = parseArguments()
270        framsCLI = FramsticksCLI(parsed_args.path, parsed_args.exe, parsed_args.pid)
271
272        simplest = framsCLI.getSimplest('1' if parsed_args.genformat is None else parsed_args.genformat)
273        print("\tSimplest genotype:", simplest)
274        parent1 = framsCLI.mutate(simplest)
275        parent2 = parent1
276        MUTATE_COUNT = 10
277        for x in range(MUTATE_COUNT):  # example of a chain of 20 mutations
278                parent2 = framsCLI.mutate(parent2)
279        print("\tParent1 (mutated simplest):", parent1)
280        print("\tParent2 (Parent1 mutated %d times):" % MUTATE_COUNT, parent2)
281        offspring = framsCLI.crossOver(parent1, parent2)
282        print("\tCrossover (Offspring):", offspring)
283        print('\tDissimilarity of Parent1 and Offspring:', framsCLI.dissimilarity(offspring, parent1))
284        print('\tPerformance of Offspring:', framsCLI.evaluate(offspring))
285        print('\tValidity of Offspring:', framsCLI.isValid(offspring))
286
287        framsCLI.closeFramsticksCLI()
Note: See TracBrowser for help on using the repository browser.