source: mds-and-trees/tree-genealogy.py @ 623

Last change on this file since 623 was 623, checked in by konrad, 8 years ago

Some minor changes

File size: 21.0 KB
Line 
1# Draws a genealogical tree (generates a SVG file) based on parent-child relationship information.
2# Supports files generated by Framsticks experiments.
3
4import json
5import random
6import math
7import argparse
8import time as ttime
9
10TIME = "" # BIRTHS / GENERATIONAL / REAL
11BALANCE = "" # MIN / DENSITY
12
13DOT_STYLE = "" # NONE / NORMAL / CLEAR
14
15JITTER = "" #
16
17# ------SVG---------
18svg_file = 0
19
20svg_line_style = 'stroke="rgb(90%,10%,16%)" stroke-width="1" stroke-opacity="0.7"'
21svg_mutation_line_style = 'stroke-width="1"'
22svg_crossover_line_style = 'stroke-width="1"'
23svg_spine_line_style = 'stroke="rgb(0%,90%,40%)" stroke-width="2" stroke-opacity="1"'
24svg_scale_line_style = 'stroke="black" stroke-width="0.5" stroke-opacity="1" stroke-dasharray="5, 5"'
25
26svg_dot_style = 'r="2" stroke="black" stroke-width="0.2" fill="red"'
27svg_clear_dot_style = 'r="2" stroke="black" stroke-width="0.4" fill="none"'
28svg_spine_dot_style = 'r="1" stroke="black" stroke-width="0.2" fill="rgb(50%,50%,100%)"'
29
30svg_scale_text_style = 'style="font-family: Arial; font-size: 12; fill: #000000;"'
31
32def hex_to_style(hex):
33    default_style = ' stroke="black" stroke-opacity="0.5" '
34
35    if hex[0] == "#":
36        hex = hex[1:]
37
38    if len(hex) == 6 or len(hex) == 8:
39        try:
40            int(hex, 16)
41        except:
42            print("Invalid characters in the color's hex #" + hex + "! Assuming black.")
43            return default_style
44        red = 100*int(hex[0:2], 16)/255
45        green = 100*int(hex[2:4], 16)/255
46        blue = 100*int(hex[4:6], 16)/255
47        opacity = 0.5
48        if len(hex) == 8:
49            opacity = int(hex[6:8], 16)/255
50        return ' stroke="rgb(' +str(red)+ '%,' +str(green)+ '%,' +str(blue)+ '%)" stroke-opacity="' +str(opacity)+ '" '
51    else:
52        print("Invalid number of digits in the color's hex #" + hex + "! Assuming black.")
53        return default_style
54
55def svg_add_line(from_pos, to_pos, style=svg_line_style):
56    svg_file.write('<line ' + style + ' x1="' + str(from_pos[0]) + '" x2="' + str(to_pos[0]) +
57                   '" y1="' + str(from_pos[1]) + '" y2="' + str(to_pos[1]) + '"  fill="none"/>')
58
59def svg_add_text(text, pos, anchor, style=svg_scale_text_style):
60    svg_file.write('<text ' + style + ' text-anchor="' + anchor + '" x="' + str(pos[0]) + '" y="' + str(pos[1]) + '" >' + text + '</text>')
61
62def svg_add_dot(pos, style=svg_dot_style):
63    svg_file.write('<circle ' + style + ' cx="' + str(pos[0]) + '" cy="' + str(pos[1]) + '" />')
64
65def svg_generate_line_style(percent):
66    # hotdog
67    from_col = [100, 70, 0]
68    to_col = [60, 0, 0]
69    # lava
70    # from_col = [100, 80, 0]
71    # to_col = [100, 0, 0]
72    # neon
73    # from_col = [30, 200, 255]
74    # to_col = [240, 0, 220]
75
76    from_opa = 0.2
77    to_opa = 1.0
78    from_stroke = 1
79    to_stroke = 3
80
81    opa = from_opa*(1-percent) + to_opa*percent
82    stroke = from_stroke*(1-percent) + to_stroke*percent
83
84    percent = 1 - ((1-percent)**20)
85
86    return 'stroke="rgb(' + str(from_col[0]*(1-percent) + to_col[0]*percent) + '%,' \
87           + str(from_col[1]*(1-percent) + to_col[1]*percent) + '%,' \
88           + str(from_col[2]*(1-percent) + to_col[2]*percent) + '%)" stroke-width="' + str(stroke) + '" stroke-opacity="' + str(opa) + '"'
89
90def svg_generate_dot_style(kind):
91    kinds = ["red", "lawngreen", "royalblue", "magenta", "yellow", "cyan", "white", "black"]
92
93    r = min(2500/len(nodes), 10)
94
95    return 'fill="' + kinds[kind] + '" r="' + str(r) + '" stroke="black" stroke-width="' + str(r/10) + '" fill-opacity="1.0" ' \
96           'stroke-opacity="1.0"'
97
98# -------------------
99
100def load_data(dir):
101    global firstnode, nodes, inv_nodes, time
102    f = open(dir)
103    loaded = 0
104
105    for line in f:
106        sline = line.split(' ', 1)
107        if len(sline) == 2:
108            if sline[0] == "[OFFSPRING]":
109                creature = json.loads(sline[1])
110                #print("B" +str(creature))
111                if "FromIDs" in creature:
112                    if not creature["ID"] in nodes:
113                        nodes[creature["ID"]] = {}
114                        # we assign to each parent its contribution to the genotype of the child
115                        for i in range(0, len(creature["FromIDs"])):
116                            inherited = 1 #(creature["Inherited"][i] if 'Inherited' in creature else 1) #ONLY FOR NOW
117                            nodes[creature["ID"]][creature["FromIDs"][i]] = inherited
118                    else:
119                        print("Duplicated entry for " + creature["ID"])
120                        quit()
121
122                    if not creature["FromIDs"][0] in nodes and firstnode == None:
123                        firstnode = creature["FromIDs"][0]
124
125                if "Time" in creature:
126                    time[creature["ID"]] = creature["Time"]
127
128                if "Kind" in creature:
129                    kind[creature["ID"]] = creature["Kind"]
130
131                loaded += 1
132        if loaded == max_nodes and max_nodes != 0:
133            break
134
135    for k, v in sorted(nodes.items()):
136        for val in sorted(v):
137            inv_nodes[val] = inv_nodes.get(val, [])
138            inv_nodes[val].append(k)
139
140    print(len(nodes))
141
142
143def load_simple_data(dir):
144    global firstnode, nodes, inv_nodes
145    f = open(dir)
146    loaded = 0
147
148    for line in f:
149        sline = line.split()
150        if len(sline) > 1:
151            #if int(sline[0]) > 15000:
152            #    break
153            if sline[0] == firstnode:
154                continue
155            nodes[sline[0]] = str(max(int(sline[1]), int(firstnode)))
156        else:
157            firstnode = sline[0]
158
159        loaded += 1
160        if loaded == max_nodes and max_nodes != 0:
161            break
162
163    for k, v in sorted(nodes.items()):
164        inv_nodes[v] = inv_nodes.get(v, [])
165        inv_nodes[v].append(k)
166
167    #print(str(inv_nodes))
168    #quit()
169
170def compute_depth(node):
171    my_depth = 0
172    if node in inv_nodes:
173        for c in inv_nodes[node]:
174            my_depth = max(my_depth, compute_depth(c)+1)
175    depth[node] = my_depth
176    return my_depth
177
178# ------------------------------------
179
180
181def xmin_crowd_random(x1, x2, y):
182    return (x1 if random.randrange(2) == 0 else x2)
183
184def xmin_crowd_min(x1, x2, y):
185    x1_closest = 999999
186    x2_closest = 999999
187    for pos in positions:
188        pos = positions[pos]
189        if pos[1] == y:
190            x1_closest = min(x1_closest, abs(x1-pos[0]))
191            x2_closest = min(x2_closest, abs(x2-pos[0]))
192    return (x1 if x1_closest > x2_closest else x2)
193def xmin_crowd_density(x1, x2, y):
194    x1_dist = 0
195    x2_dist = 0
196    ymin = y-10
197    ymax = y+10
198    for pos in positions:
199        pos = positions[pos]
200        if pos[1] > ymin or pos[1] < ymax:
201            dysq = (pos[1]-y)**2
202            dx1 = pos[0]-x1
203            dx2 = pos[0]-x2
204
205
206            x1_dist += math.sqrt(dysq + dx1**2)
207            x2_dist += math.sqrt(dysq + dx2**2)
208    return (x1 if x1_dist > x2_dist else x2)
209
210# ------------------------------------
211
212def prepos_children():
213    global max_height, max_width, min_width, visited, TIME
214
215    print("firstnode " + firstnode)
216
217    if not bool(time):
218        print("REAL time requested, but no real time data provided. Assuming BIRTHS time instead.")
219        TIME = "BIRTHS"
220
221    positions[firstnode] = [0, 0]
222
223    xmin_crowd = None
224    if BALANCE == "RANDOM":
225        xmin_crowd =xmin_crowd_random
226    elif BALANCE == "MIN":
227        xmin_crowd = xmin_crowd_min
228    elif BALANCE == "DENSITY":
229        xmin_crowd = xmin_crowd_density
230    else:
231        raise ValueError("Error, the value of BALANCE does not match any expected value.")
232
233    nodes_to_visit = [firstnode]
234
235    node_counter = 0
236    start_time = ttime.time()
237
238    while True:
239
240        node_counter += 1
241        if node_counter%1000 == 0 :
242            print(str(node_counter) + " "  + str(ttime.time()-start_time))
243            start_time = ttime.time()
244
245        current_node = nodes_to_visit[0]
246
247        if current_node in inv_nodes:
248            for c in inv_nodes[current_node]:
249                # we want to visit the node just once, after all of its parents
250                if c not in nodes_to_visit:
251                    nodes_to_visit.append(c)
252
253                    cy = 0
254                    if TIME == "BIRTHS":
255                        if c[0] == "c":
256                            cy = int(c[1:])
257                        else:
258                            cy = int(c)
259                    elif TIME == "GENERATIONAL":
260                        cy = positions[current_node][1]+1
261                    elif TIME == "REAL":
262                        cy = time[c]
263
264                    if len(nodes[c]) == 1:
265                        dissimilarity = 0
266                        if JITTER == True:
267                            dissimilarity = random.gauss(0,1)
268                        else:
269                            dissimilarity = 1
270                        positions[c] = [xmin_crowd(positions[current_node][0]-dissimilarity, positions[current_node][0]+dissimilarity, cy), cy]
271                    else:
272                        vsum = sum([v for k, v in nodes[c].items()])
273                        cx = sum([positions[k][0]*v/vsum for k, v in nodes[c].items()])
274
275                        if JITTER == True:
276                            positions[c] = [cx + random.gauss(0, 0.1), cy]
277                        else:
278                            positions[c] = [cx, cy]
279
280        nodes_to_visit = nodes_to_visit[1:]
281        # if none left, we can stop
282        if len(nodes_to_visit) == 0:
283            break
284
285
286   # prepos_children_reccurent(firstnode)
287
288    for pos in positions:
289        max_height = max(max_height, positions[pos][1])
290        max_width = max(max_width, positions[pos][0])
291        min_width = min(min_width, positions[pos][0])
292
293# ------------------------------------
294
295def all_parents_visited(node):
296    apv = True
297    for k, v in sorted(nodes[node].items()):
298        if not k in visited:
299            apv = False
300            break
301    return apv
302# ------------------------------------
303
304def draw_children():
305    max_depth = 0
306    for k, v in depth.items():
307            max_depth = max(max_depth, v)
308
309    nodes_to_visit = [firstnode]
310    while True:
311        current_node = nodes_to_visit[0]
312
313        if current_node in inv_nodes:
314            for c in inv_nodes[current_node]: # inv_node => p->c
315
316                if not c in nodes_to_visit:
317                    nodes_to_visit.append(c)
318
319                line_style = ""
320                if COLORING == "NONE":
321                    line_style = svg_line_style
322                elif COLORING == "TYPE":
323                    line_style = (svg_mutation_line_style if len(nodes[c]) == 1 else svg_crossover_line_style)
324                else: # IMPORTANCE, default
325                    line_style = svg_generate_line_style(depth[c]/max_depth)
326
327                svg_add_line( (w_margin+w_no_margs*(positions[current_node][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[current_node][1]/max_height),
328                        (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), line_style)
329
330        # we want to draw the node just once
331        if DOT_STYLE == "NONE":
332            continue
333        elif DOT_STYLE == "TYPE":
334            dot_style = svg_generate_dot_style(kind[current_node] if current_node in kind else 0) #type
335        else: # NORMAL, default
336            dot_style = svg_clear_dot_style #svg_generate_dot_style(depth[c]/max_depth)
337        svg_add_dot( (w_margin+w_no_margs*(positions[current_node][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[current_node][1]/max_height), dot_style)
338        #svg_add_text( str(depth[current_node]), (w_margin+w_no_margs*(positions[current_node][0]-min_width)/(max_width-min_width),
339        # h_margin+h_no_margs*positions[current_node][1]/max_height), "end")
340
341        # we remove the current node from the list
342        nodes_to_visit = nodes_to_visit[1:]
343        # if none left, we can stop
344        if len(nodes_to_visit) == 0:
345            break
346
347def draw_spine():
348    nodes_to_visit = [firstnode]
349    while True:
350        current_node = nodes_to_visit[0]
351
352        if current_node in inv_nodes:
353            for c in inv_nodes[current_node]: # inv_node => p->c
354                if depth[c] == depth[current_node] - 1:
355                    if not c in nodes_to_visit:
356                        nodes_to_visit.append(c)
357                    line_style = svg_spine_line_style
358                    svg_add_line( (w_margin+w_no_margs*(positions[current_node][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[current_node][1]/max_height),
359                        (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), line_style)
360
361        # we remove the current node from the list
362        nodes_to_visit = nodes_to_visit[1:]
363        # if none left, we can stop
364        if len(nodes_to_visit) == 0:
365            break
366
367def draw_skeleton():
368    nodes_to_visit = [firstnode]
369    while True:
370        current_node = nodes_to_visit[0]
371
372        if current_node in inv_nodes:
373            for c in inv_nodes[current_node]: # inv_node => p->c
374                if depth[c] >= min_skeleton_depth:
375                    if not c in nodes_to_visit:
376                        nodes_to_visit.append(c)
377                    line_style = svg_spine_line_style
378                    svg_add_line( (w_margin+w_no_margs*(positions[current_node][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[current_node][1]/max_height),
379                        (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), line_style)
380
381        # we remove the current node from the list
382        nodes_to_visit = nodes_to_visit[1:]
383        # if none left, we can stop
384        if len(nodes_to_visit) == 0:
385            break
386
387# ------------------------------------
388
389def draw_scale(filename ,type):
390
391    svg_add_text("Generated from " + filename.split("\\")[-1], (5, 15), "start")
392
393    svg_add_line( (w*0.7, h_margin), (w, h_margin), svg_scale_line_style)
394    start_text = ""
395    if TIME == "BIRTHS":
396       start_text = "Birth #" + str(min([int(k[1:]) for k, v in nodes.items()]))
397    if TIME == "REAL":
398       start_text = "Time " + str(min([v for k, v in time.items()]))
399    if TIME == "GENERATIONAL":
400       start_text = "Depth " + str(min([v for k, v in depth.items()]))
401    svg_add_text( start_text, (w, h_margin + 15), "end")
402
403    svg_add_line( (w*0.7, h-h_margin), (w, h-h_margin), svg_scale_line_style)
404    end_text = ""
405    if TIME == "BIRTHS":
406       end_text = "Birth #" + str(max([int(k[1:]) for k, v in nodes.items()]))
407    if TIME == "REAL":
408       end_text = "Time " + str(max([v for k, v in time.items()]))
409    if TIME == "GENERATIONAL":
410       end_text = "Depth " + str(max([v for k, v in depth.items()]))
411    svg_add_text( end_text, (w, h-h_margin + 15), "end")
412
413
414##################################################### main #####################################################
415
416args = 0
417
418h = 800
419w = 600
420h_margin = 20
421w_margin = 10
422h_no_margs = h - 2* h_margin
423w_no_margs = w - 2* w_margin
424
425max_height = 0
426max_width = 0
427min_width = 9999999999
428
429min_skeleton_depth = 0
430max_nodes = 0
431
432firstnode = None
433nodes = {}
434inv_nodes = {}
435positions = {}
436visited= {}
437depth = {}
438time = {}
439kind = {}
440
441def main():
442    global svg_file, min_skeleton_depth, max_nodes, args, \
443        TIME, BALANCE, DOT_STYLE, COLORING, JITTER, \
444        svg_mutation_line_style, svg_crossover_line_style
445
446    parser = argparse.ArgumentParser(description='Draws a genealogical tree (generates a SVG file) based on parent-child relationship information from a text file. Supports files generated by Framsticks experiments.')
447    parser.add_argument('-i', '--in', dest='input', required=True, help='input file name with stuctured evolutionary data')
448    parser.add_argument('-o', '--out', dest='output', required=True, help='output file name for the evolutionary tree (SVG format)')
449    draw_tree_parser = parser.add_mutually_exclusive_group(required=False)
450    draw_tree_parser.add_argument('--draw-tree', dest='draw_tree', action='store_true', help='whether drawing the full tree should be skipped')
451    draw_tree_parser.add_argument('--no-draw-tree', dest='draw_tree', action='store_false')
452
453    draw_skeleton_parser = parser.add_mutually_exclusive_group(required=False)
454    draw_skeleton_parser.add_argument('--draw-skeleton', dest='draw_skeleton', action='store_true', help='whether the skeleton of the tree should be drawn')
455    draw_skeleton_parser.add_argument('--no-draw-skeleton', dest='draw_skeleton', action='store_false')
456
457    draw_spine_parser = parser.add_mutually_exclusive_group(required=False)
458    draw_spine_parser.add_argument('--draw-spine', dest='draw_spine', action='store_true', help='whether the spine of the tree should be drawn')
459    draw_spine_parser.add_argument('--no-draw-spine', dest='draw_spine', action='store_false')
460
461    #TODO: better names for those parameters
462    parser.add_argument('-t', '--time', default='GENERATIONAL', dest='time', help='values on vertical axis (BIRTHS/GENERATIONAL(d)/REAL); '
463                                                                      'BIRTHS: time measured as the number of births since the beginning; '
464                                                                      'GENERATIONAL: time measured as number of ancestors; '
465                                                                      'REAL: real time of the simulation')
466    parser.add_argument('-b', '--balance', default='DENSITY', dest='balance', help='method of placing nodes in the tree (RANDOM/MIN/DENSITY(d))')
467    parser.add_argument('-s', '--scale', default='NONE', dest='scale', help='type of timescale added to the tree (NONE(d)/SIMPLE)')
468    parser.add_argument('-c', '--coloring', default='IMPORTANCE', dest="coloring", help='method of coloring the tree (NONE/IMPORTANCE(d)/TYPE)')
469    parser.add_argument('-d', '--dots', default='TYPE', dest='dots', help='method of drawing dots (individuals) (NONE/NORMAL/TYPE(d))')
470    parser.add_argument('-j', '--jitter', dest="jitter", action='store_true', help='draw horizontal positions of children from the normal distribution')
471
472    parser.add_argument('--color-mut', default="#000000", dest="color_mut", help='color of clone/mutation lines in rgba (e.g. #FF60B240) for TYPE coloring')
473    parser.add_argument('--color-cross', default="#660198", dest="color_cross", help='color of crossover lines in rgba (e.g. #FF60B240) for TYPE coloring')
474
475    parser.add_argument('--min-skeleton-depth', type=int, default=2, dest='min_skeleton_depth', help='minimal distance from the leafs for the nodes in the skeleton')
476    parser.add_argument('--seed', type=int, dest='seed', help='seed for the random number generator (-1 for random)')
477
478    parser.add_argument('--simple-data', type=bool, dest='simple_data', help='input data are given in a simple format (#child #parent)')
479
480
481    parser.add_argument('-x', '--max-nodes', type=int, default=0, dest='max_nodes', help='maximum number of nodes drawn (starting from the first one)')
482
483    parser.set_defaults(draw_tree=True)
484    parser.set_defaults(draw_skeleton=False)
485    parser.set_defaults(draw_spine=False)
486
487    parser.set_defaults(seed=-1)
488
489    args = parser.parse_args()
490
491    TIME = args.time.upper()
492    BALANCE = args.balance.upper()
493    DOT_STYLE = args.dots.upper()
494    COLORING = args.coloring.upper()
495    SCALE = args.scale.upper()
496    JITTER = args.jitter
497    if not TIME in ['BIRTHS', 'GENERATIONAL', 'REAL']\
498        or not BALANCE in ['RANDOM', 'MIN', 'DENSITY']\
499        or not DOT_STYLE in ['NONE', 'NORMAL', 'TYPE']\
500        or not COLORING in ['NONE', 'IMPORTANCE', 'TYPE']\
501        or not SCALE in ['NONE', 'SIMPLE']:
502        print("Incorrect value of one of the parameters! Closing the program.") #TODO don't be lazy, figure out which parameter is wrong...
503        return
504
505
506    svg_mutation_line_style += hex_to_style(args.color_mut)
507    svg_crossover_line_style += hex_to_style(args.color_cross)
508
509    dir = args.input
510    min_skeleton_depth = args.min_skeleton_depth
511    max_nodes = args.max_nodes
512    seed = args.seed
513    if seed == -1:
514        seed = random.randint(0, 10000)
515    random.seed(seed)
516    print("seed:", seed)
517
518    if args.simple_data:
519        load_simple_data(dir)
520    else:
521        load_data(dir)
522
523    compute_depth(firstnode)
524
525    svg_file = open(args.output, "w")
526    svg_file.write('<svg xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" '
527                   'width="' + str(w) + '" height="' + str(h) + '">')
528
529    prepos_children()
530
531    if args.draw_tree:
532        draw_children()
533    if args.draw_skeleton:
534        draw_skeleton()
535    if args.draw_spine:
536        draw_spine()
537
538    draw_scale(dir, SCALE)
539
540    svg_file.write("</svg>")
541    svg_file.close()
542
543main()
Note: See TracBrowser for help on using the repository browser.