source: framspy/FramsticksCLI.py @ 956

Last change on this file since 956 was 956, checked in by Maciej Komosinski, 4 years ago

Allowed multiple instances of FramsticksCLI class to safely use a single Framsticks CLI executable (no danger to use the same file names)

File size: 13.1 KB
RevLine 
[939]1from subprocess import Popen, PIPE, check_output
2from enum import Enum
3from typing import List
[956]4from itertools import count  # for tracking multiple instances
[939]5import json
6import sys, os
7import argparse
8import numpy as np
9
10
11class FramsticksCLI:
12        """Runs Framsticks CLI (command-line) executable and communicates with it using standard input and output.
13        You can perform basic operations like mutation, crossover, and evaluation of genotypes.
[956]14        This way you can perform evolution controlled by python as well as access and manipulate genotypes.
[947]15        You can even design and use in evolution your own genetic representation implemented entirely in python.
[939]16
17        You need to provide one or two parameters when you run this class: the path to Framsticks CLI
18        and the name of the Framsticks CLI executable (if it is non-standard). See::
19                FramsticksCLI.py -h"""
20
21        PRINT_FRAMSTICKS_OUTPUT: bool = False  # set to True for debugging
[947]22        DETERMINISTIC: bool = False  # set to True to have the same results on each run
[939]23
24        GENO_SAVE_FILE_FORMAT = Enum('GENO_SAVE_FILE_FORMAT', 'NATIVEFRAMS RAWGENO')  # how to save genotypes
25        OUTPUT_DIR = "scripts_output"
26        STDOUT_ENDOPER_MARKER = "FileObject.write"  # we look for this message on Framsticks CLI stdout to detect when Framsticks created a file with the result we expect
27
28        FILE_PREFIX = 'framspy_'
29
30        RANDOMIZE_CMD = "rnd" + "\n"
31        SETEXPEDEF_CMD = "expdef standard-eval" + "\n"
32        GETSIMPLEST_CMD = "getsimplest"
[956]33        GETSIMPLEST_FILE = "simplest.gen"
34        EVALUATE_CMD = "evaluate eval-allcriteria.sim"
[939]35        EVALUATE_FILE = "genos_eval.json"
36        CROSSOVER_CMD = "crossover"
[956]37        CROSSOVER_FILE = "child.gen"
[939]38        DISSIMIL_CMD = "dissimil"
[956]39        DISSIMIL_FILE = "dissimilarity_matrix.gen"
[939]40        ISVALID_CMD = "isvalid"
[956]41        ISVALID_FILE = "validity.gen"
[939]42        MUTATE_CMD = "mutate"
[956]43        MUTATE_FILE = "mutant.gen"
[939]44
[956]45        CLI_INPUT_FILE = "genotypes.gen"
[939]46
[956]47        _last_instance_id = count(0)  # "static" counter incremented when a new instance is created. Used for unique filenames
[939]48
[956]49
[939]50        def __init__(self, framspath, framsexe):
[956]51                self.id = next(FramsticksCLI._last_instance_id)
[939]52                self.frams_path = framspath
53                self.frams_exe = framsexe if framsexe is not None else 'frams.exe' if os.name == "nt" else 'frams.linux'
54                self.writing_path = None
55                mainpath = os.path.join(self.frams_path, self.frams_exe)
56                exe_call = [mainpath, '-Q', '-s', '-c', '-icliutils.ini']  # -c will be ignored in Windows Framsticks (this option is meaningless because the Windows version does not support color console, so no need to deactivate this feature using -c)
57                exe_call_to_get_version = [mainpath, '-V']
58                exe_call_to_get_path = [mainpath, '-?']
59                try:
[953]60                        print("\n".join(self.__readAllOutput(exe_call_to_get_version)))
61                        help = self.__readAllOutput(exe_call_to_get_path)
[939]62                        for helpline in help:
63                                if 'dDIRECTORY' in helpline:
64                                        self.writing_path = helpline.split("'")[1]
65                except FileNotFoundError:
66                        print("Could not find Framsticks executable ('%s') in the given location ('%s')." % (self.frams_exe, self.frams_path))
67                        sys.exit(1)
68                print("Temporary files with results will be saved in detected writable working directory '%s'" % self.writing_path)
69                self.__spawnFramsticksCLI(exe_call)
70
71
72        def __readAllOutput(self, command):
73                frams_process = Popen(command, stdout=PIPE, stderr=PIPE, stdin=PIPE)
74                return [line.decode('utf-8').rstrip() for line in iter(frams_process.stdout.readlines())]
75
76
77        def __spawnFramsticksCLI(self, args):
78                # the child app (Framsticks CLI) should not buffer outputs and we need to immediately read its stdout, hence we use pexpect/wexpect
79                print('Spawning Framsticks CLI for continuous stdin/stdout communication... ', end='')
80                if os.name == "nt":  # Windows:
81                        import wexpect  # https://pypi.org/project/wexpect/
82                        # https://github.com/raczben/wexpect/tree/master/examples
83                        self.child = wexpect.spawn(' '.join(args))
84                else:
85                        import pexpect  # https://pexpect.readthedocs.io/en/stable/
86                        self.child = pexpect.spawn(' '.join(args))
87                        self.child.setecho(False)  # linux only
88                print('OK.')
89
90                self.__readFromFramsCLIUntil("UserScripts.autoload")
91                print('Performing a basic test 1/3... ', end='')
92                assert self.getSimplest("1") == "X"
93                print('OK.')
94                print('Performing a basic test 2/3... ', end='')
[956]95                assert self.isValid("X[0:0]") is True
[939]96                print('OK.')
97                print('Performing a basic test 3/3... ', end='')
[956]98                assert self.isValid("X[0:0],") is False
[939]99                print('OK.')
[947]100                if not self.DETERMINISTIC:
101                        self.child.sendline(self.RANDOMIZE_CMD)
[939]102                self.child.sendline(self.SETEXPEDEF_CMD)
103
104
105        def closeFramsticksCLI(self):
106                # End gracefully by sending end-of-file character: ^Z or ^D
107                # Without -Q argument ("quiet mode"), Framsticks CLI would print "Shell closed." for goodbye.
108                self.child.sendline(chr(26 if os.name == "nt" else 4))
109
110
[956]111        def __getPrefixedFilename(self, filename: str) -> str:
112                # Returns filename with unique instance id appended so there is no clash when many instances of this class use the same Framsticks CLI executable
113                return FramsticksCLI.FILE_PREFIX + str(chr(ord('A') + self.id)) + '_' + filename
[939]114
115
[956]116        def __saveGenotypeToFile(self, genotype, name, mode, saveformat):
117                relname = self.__getPrefixedFilename(name)
118                absname = os.path.join(self.writing_path, relname)
119                if mode == 'd':  # special mode, 'delete'
120                        if os.path.exists(absname):
121                                os.remove(absname)
122                else:
123                        outfile = open(absname, mode)
124                        if saveformat == self.GENO_SAVE_FILE_FORMAT["RAWGENO"]:
125                                outfile.write(genotype)
126                        else:
127                                outfile.write("org:\n")
128                                outfile.write("genotype:~\n")
129                                outfile.write(genotype + "~\n\n")  # TODO proper quoting of special characters in genotype...
130                        outfile.close()
131                return relname, absname
[939]132
133
134        def __readFromFramsCLIUntil(self, until_marker: str):
135                while True:
136                        self.child.expect('\n')
137                        msg = str(self.child.before)
138                        if self.PRINT_FRAMSTICKS_OUTPUT or msg.startswith("[ERROR]"):
139                                print(msg)
140                        if until_marker in msg:
141                                break
142
143
144        def __runCommand(self, command, genotypes, result_file_name, saveformat) -> List[str]:
[956]145                filenames_rel = []  # list of file names with input data for the command
146                filenames_abs = []  # same list but absolute paths actually used
[939]147                if saveformat == self.GENO_SAVE_FILE_FORMAT["RAWGENO"]:
148                        for i in range(len(genotypes)):
[956]149                                # plain text format = must have a separate file for each genotype
150                                rel, abs = self.__saveGenotypeToFile(genotypes[i], "genotype" + str(i) + ".gen", "w", self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
151                                filenames_rel.append(rel)
152                                filenames_abs.append(abs)
[939]153                elif saveformat == self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"]:
[956]154                        self.__saveGenotypeToFile(None, self.CLI_INPUT_FILE, 'd', None)  # 'd'elete: ensure there is nothing left from the last run of the program because we "a"ppend to file in the loop below
[939]155                        for i in range(len(genotypes)):
[956]156                                rel, abs = self.__saveGenotypeToFile(genotypes[i], self.CLI_INPUT_FILE, "a", self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
157                        #  since we use the same file in the loop above, add this file only once (i.e., outside of the loop)
158                        filenames_rel.append(rel)
159                        filenames_abs.append(abs)
[939]160
[956]161                result_file_name = self.__getPrefixedFilename(result_file_name)
162                cmd = command + " " + " ".join(filenames_rel) + " " + result_file_name
163                self.child.sendline(cmd + '\n')
[939]164                self.__readFromFramsCLIUntil(self.STDOUT_ENDOPER_MARKER)
[956]165                filenames_abs.append(os.path.join(self.writing_path, self.OUTPUT_DIR, result_file_name))
166                return filenames_abs  # last element is a path to the file containing results
[939]167
168
[956]169        def __cleanUpCommandResults(self, filenames):
170                """Deletes files with results just created by the command."""
171                for name in filenames:
172                        os.remove(name)
[939]173
174
175        def getSimplest(self, genetic_format) -> str:
176                assert len(genetic_format) == 1, "Genetic format should be a single character"
177                files = self.__runCommand(self.GETSIMPLEST_CMD + " " + genetic_format + " ", [], self.GETSIMPLEST_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
178                with open(files[-1]) as f:
179                        genotype = "".join(f.readlines())
180                self.__cleanUpCommandResults(files)
181                return genotype
182
183
184        def evaluate(self, genotype: str):
185                """
186                Returns:
187                        Dictionary -- genotype evaluated with self.EVALUATE_COMMAND. Note that for whatever reason (e.g. incorrect genotype),
[947]188                        the dictionary you will get may be empty or partially empty and may not have the fields you expected, so handle such cases properly.
[939]189                """
190                files = self.__runCommand(self.EVALUATE_CMD, [genotype], self.EVALUATE_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
191                with open(files[-1]) as f:
192                        data = json.load(f)
193                if len(data) > 0:
194                        self.__cleanUpCommandResults(files)
195                        return data
196                else:
197                        print("Evaluating genotype: no performance data was returned in", self.EVALUATE_FILE)  # we do not delete files here
198                        return None
199
200
201        def mutate(self, genotype: str) -> str:
202                files = self.__runCommand(self.MUTATE_CMD, [genotype], self.MUTATE_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
203                with open(files[-1]) as f:
204                        newgenotype = "".join(f.readlines())
205                self.__cleanUpCommandResults(files)
206                return newgenotype
207
208
209        def crossOver(self, genotype1: str, genotype2: str) -> str:
210                files = self.__runCommand(self.CROSSOVER_CMD, [genotype1, genotype2], self.CROSSOVER_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
211                with open(files[-1]) as f:
212                        child_genotype = "".join(f.readlines())
213                self.__cleanUpCommandResults(files)
214                return child_genotype
215
216
217        def dissimilarity(self, genotype1: str, genotype2: str) -> float:
218                files = self.__runCommand(self.DISSIMIL_CMD, [genotype1, genotype2], self.DISSIMIL_FILE, self.GENO_SAVE_FILE_FORMAT["NATIVEFRAMS"])
219                with open(files[-1]) as f:
220                        dissimilarity_matrix = np.genfromtxt(f, dtype=np.float64, comments='#', encoding=None, delimiter='\t')
[947]221                # We would like to skip column #1 while reading and read everything else, but... https://stackoverflow.com/questions/36091686/exclude-columns-from-genfromtxt-with-numpy
222                # This would be too complicated, so strings (names) in column #1 become NaN as floats (unless they accidentally are valid numbers) - not great, not terrible
[939]223                EXPECTED_SHAPE = (2, 4)
224                assert dissimilarity_matrix.shape == EXPECTED_SHAPE, f"Not a correct dissimilarity matrix, expected {EXPECTED_SHAPE} "
225                for i in range(len(dissimilarity_matrix)):
226                        assert dissimilarity_matrix[i][i + 2] == 0, "Not a correct dissimilarity matrix, diagonal expected to be 0"
227                assert dissimilarity_matrix[0][3] == dissimilarity_matrix[1][2], "Probably not a correct dissimilarity matrix, expecting symmetry, verify this"
228                self.__cleanUpCommandResults(files)
229                return dissimilarity_matrix[0][3]
230
231
232        def isValid(self, genotype: str) -> bool:
233                files = self.__runCommand(self.ISVALID_CMD, [genotype], self.ISVALID_FILE, self.GENO_SAVE_FILE_FORMAT["RAWGENO"])
234                with open(files[-1]) as f:
235                        valid = f.readline() == "1"
236                self.__cleanUpCommandResults(files)
237                return valid
238
239
240def parseArguments():
241        parser = argparse.ArgumentParser(description='Run this program with "python -u %s" if you want to disable buffering of its output.' % sys.argv[0])
242        parser.add_argument('-path', type=ensureDir, required=True, help='Path to Framsticks CLI without trailing slash.')
243        parser.add_argument('-exe', required=False, help='Executable name. If not given, "frams.exe" or "frams.linux" is assumed.')
[956]244        parser.add_argument('-genformat', required=False, help='Genetic format for the demo run, for example 4, 9, or S. If not given, f1 is assumed.')
[939]245        return parser.parse_args()
246
247
248def ensureDir(string):
249        if os.path.isdir(string):
250                return string
251        else:
252                raise NotADirectoryError(string)
253
254
255if __name__ == "__main__":
256        # A demo run.
[947]257
258        # TODO ideas:
259        # - check_validity with three levels (invalid, corrected, valid)
[948]260        # - "vectorize" some operations (isvalid, evaluate) so that a number of genotypes is handled in one call
[947]261        # - use threads for non-blocking reading from frams' stdout and thus not relying on specific strings printed by frams
262        # - a pool of binaries run at the same time, balance load - in particular evaluation
263        # - if we read genotypes in "org:" format anywhere: import https://pypi.org/project/framsreader/0.1.2/ and use it if successful,
264        #    if not then print a message "framsreader not available, using simple internal method to save a genotype" and proceed as it is now.
265        #    So far we don't read, but we should use the proper writer to handle all special cases like quoting etc.
266
[939]267        parsed_args = parseArguments()
268        framsCLI = FramsticksCLI(parsed_args.path, parsed_args.exe)
269
270        simplest = framsCLI.getSimplest('1' if parsed_args.genformat is None else parsed_args.genformat)
271        print("\tSimplest genotype:", simplest)
272        parent1 = framsCLI.mutate(simplest)
273        parent2 = parent1
274        MUTATE_COUNT = 10
275        for x in range(MUTATE_COUNT):  # example of a chain of 20 mutations
276                parent2 = framsCLI.mutate(parent2)
277        print("\tParent1 (mutated simplest):", parent1)
278        print("\tParent2 (Parent1 mutated %d times):" % MUTATE_COUNT, parent2)
279        offspring = framsCLI.crossOver(parent1, parent2)
280        print("\tCrossover (Offspring):", offspring)
281        print('\tDissimilarity of Parent1 and Offspring:', framsCLI.dissimilarity(offspring, parent1))
282        print('\tPerformance of Offspring:', framsCLI.evaluate(offspring))
283        print('\tValidity of Offspring:', framsCLI.isValid(offspring))
284
285        framsCLI.closeFramsticksCLI()
Note: See TracBrowser for help on using the repository browser.