source: mds-and-trees/tree-genealogy.py @ 621

Last change on this file since 621 was 621, checked in by konrad, 9 years ago

Rewritten prepos_children to the iterative form + Turned on some debugging

File size: 21.7 KB
RevLine 
[562]1# Draws a genealogical tree (generates a SVG file) based on parent-child relationship information.
[615]2# Supports files generated by Framsticks experiments.
[562]3
4import json
5import random
6import math
7import argparse
[621]8import time as ttime
[562]9
[571]10TIME = "" # BIRTHS / GENERATIONAL / REAL
11BALANCE = "" # MIN / DENSITY
[562]12
[571]13DOT_STYLE = "" # NONE / NORMAL / CLEAR
14
15JITTER = "" #
16
[562]17# ------SVG---------
18svg_file = 0
19
[577]20svg_line_style = 'stroke="rgb(90%,10%,16%)" stroke-width="1" stroke-opacity="0.7"'
[585]21svg_mutation_line_style = 'stroke-width="1"'
22svg_crossover_line_style = 'stroke-width="1"'
[577]23svg_spine_line_style = 'stroke="rgb(0%,90%,40%)" stroke-width="2" stroke-opacity="1"'
24svg_scale_line_style = 'stroke="black" stroke-width="0.5" stroke-opacity="1" stroke-dasharray="5, 5"'
25
[562]26svg_dot_style = 'r="2" stroke="black" stroke-width="0.2" fill="red"'
[571]27svg_clear_dot_style = 'r="2" stroke="black" stroke-width="0.4" fill="none"'
[562]28svg_spine_dot_style = 'r="1" stroke="black" stroke-width="0.2" fill="rgb(50%,50%,100%)"'
29
[576]30svg_scale_text_style = 'style="font-family: Arial; font-size: 12; fill: #000000;"'
31
[585]32def hex_to_style(hex):
[586]33    default_style = ' stroke="black" stroke-opacity="0.5" '
34
[585]35    if hex[0] == "#":
36        hex = hex[1:]
37
38    if len(hex) == 6 or len(hex) == 8:
39        try:
40            int(hex, 16)
41        except:
[615]42            print("Invalid characters in the color's hex #" + hex + "! Assuming black.")
[586]43            return default_style
[585]44        red = 100*int(hex[0:2], 16)/255
45        green = 100*int(hex[2:4], 16)/255
46        blue = 100*int(hex[4:6], 16)/255
47        opacity = 0.5
48        if len(hex) == 8:
49            opacity = int(hex[6:8], 16)/255
50        return ' stroke="rgb(' +str(red)+ '%,' +str(green)+ '%,' +str(blue)+ '%)" stroke-opacity="' +str(opacity)+ '" '
51    else:
[615]52        print("Invalid number of digits in the color's hex #" + hex + "! Assuming black.")
[586]53        return default_style
[585]54
[562]55def svg_add_line(from_pos, to_pos, style=svg_line_style):
[589]56    svg_file.write('<line ' + style + ' x1="' + str(from_pos[0]) + '" x2="' + str(to_pos[0]) +
57                   '" y1="' + str(from_pos[1]) + '" y2="' + str(to_pos[1]) + '"  fill="none"/>')
[562]58
[576]59def svg_add_text(text, pos, anchor, style=svg_scale_text_style):
60    svg_file.write('<text ' + style + ' text-anchor="' + anchor + '" x="' + str(pos[0]) + '" y="' + str(pos[1]) + '" >' + text + '</text>')
61
[562]62def svg_add_dot(pos, style=svg_dot_style):
63    svg_file.write('<circle ' + style + ' cx="' + str(pos[0]) + '" cy="' + str(pos[1]) + '" />')
64
65def svg_generate_line_style(percent):
[564]66    # hotdog
[562]67    from_col = [100, 70, 0]
[564]68    to_col = [60, 0, 0]
[571]69    # lava
70    # from_col = [100, 80, 0]
71    # to_col = [100, 0, 0]
[564]72    # neon
73    # from_col = [30, 200, 255]
74    # to_col = [240, 0, 220]
[562]75
[564]76    from_opa = 0.2
77    to_opa = 1.0
78    from_stroke = 1
79    to_stroke = 3
[562]80
[564]81    opa = from_opa*(1-percent) + to_opa*percent
82    stroke = from_stroke*(1-percent) + to_stroke*percent
83
84    percent = 1 - ((1-percent)**20)
85
[562]86    return 'stroke="rgb(' + str(from_col[0]*(1-percent) + to_col[0]*percent) + '%,' \
87           + str(from_col[1]*(1-percent) + to_col[1]*percent) + '%,' \
[564]88           + str(from_col[2]*(1-percent) + to_col[2]*percent) + '%)" stroke-width="' + str(stroke) + '" stroke-opacity="' + str(opa) + '"'
[562]89
[577]90def svg_generate_dot_style(kind):
91    kinds = ["red", "lawngreen", "royalblue", "magenta", "yellow", "cyan", "white", "black"]
[562]92
[577]93    r = min(2500/len(nodes), 10)
[562]94
[577]95    return 'fill="' + kinds[kind] + '" r="' + str(r) + '" stroke="black" stroke-width="' + str(r/10) + '" fill-opacity="1.0" ' \
96           'stroke-opacity="1.0"'
[564]97
[562]98# -------------------
99
100def load_data(dir):
[571]101    global firstnode, nodes, inv_nodes, time
[562]102    f = open(dir)
103    for line in f:
[571]104        sline = line.split(' ', 1)
105        if len(sline) == 2:
106            if sline[0] == "[OFFSPRING]":
107                creature = json.loads(sline[1])
[562]108                #print("B" +str(creature))
[563]109                if "FromIDs" in creature:
[572]110                    if not creature["ID"] in nodes:
111                        nodes[creature["ID"]] = {}
112                        # we assign to each parent its contribution to the genotype of the child
113                        for i in range(0, len(creature["FromIDs"])):
114                            inherited = 1 #(creature["Inherited"][i] if 'Inherited' in creature else 1) #ONLY FOR NOW
115                            nodes[creature["ID"]][creature["FromIDs"][i]] = inherited
116                    else:
[615]117                        print("Duplicated entry for " + creature["ID"])
[572]118                        quit()
119
[621]120                    if not creature["FromIDs"][0] in nodes and firstnode == None:
[563]121                        firstnode = creature["FromIDs"][0]
[572]122
[566]123                if "Time" in creature:
124                    time[creature["ID"]] = creature["Time"]
[562]125
[577]126                if "Kind" in creature:
127                    kind[creature["ID"]] = creature["Kind"]
128
[562]129    for k, v in sorted(nodes.items()):
[572]130        for val in sorted(v):
131            inv_nodes[val] = inv_nodes.get(val, [])
132            inv_nodes[val].append(k)
[562]133
[621]134    print(len(nodes))
[562]135
[621]136
[562]137def load_simple_data(dir):
138    global firstnode, nodes, inv_nodes
139    f = open(dir)
140    for line in f:
141        sline = line.split()
142        if len(sline) > 1:
143            #if int(sline[0]) > 15000:
144            #    break
145            if sline[0] == firstnode:
146                continue
147            nodes[sline[0]] = str(max(int(sline[1]), int(firstnode)))
148        else:
149            firstnode = sline[0]
150
151    for k, v in sorted(nodes.items()):
152        inv_nodes[v] = inv_nodes.get(v, [])
153        inv_nodes[v].append(k)
154
155    #print(str(inv_nodes))
156    #quit()
157
158def compute_depth(node):
159    my_depth = 0
160    if node in inv_nodes:
161        for c in inv_nodes[node]:
162            my_depth = max(my_depth, compute_depth(c)+1)
163    depth[node] = my_depth
164    return my_depth
165
166# ------------------------------------
167
168def xmin_crowd(x1, x2, y):
169    if BALANCE == "RANDOM":
170        return (x1 if random.randrange(2) == 0 else x2)
171    elif BALANCE == "MIN":
172        x1_closest = 999999
173        x2_closest = 999999
174        for pos in positions:
175            pos = positions[pos]
176            if pos[1] == y:
177                x1_closest = min(x1_closest, abs(x1-pos[0]))
178                x2_closest = min(x2_closest, abs(x2-pos[0]))
179        return (x1 if x1_closest > x2_closest else x2)
180    elif BALANCE == "DENSITY":
181        x1_dist = 0
182        x2_dist = 0
183        for pos in positions:
184            pos = positions[pos]
185            if pos[1] > y-10 or pos[1] < y+10:
186                dy = pos[1]-y
187                dx1 = pos[0]-x1
188                dx2 = pos[0]-x2
189
190                x1_dist += math.sqrt(dy**2 + dx1**2)
191                x2_dist += math.sqrt(dy**2 + dx2**2)
192        return (x1 if x1_dist > x2_dist else x2)
193
194# ------------------------------------
195
[621]196# def prepos_children_reccurent(node):
197#     global visited
198#     for c in inv_nodes[node]:
199#
200#         # we want to visit the node just once, after all of its parents
201#         if not all_parents_visited(c):
202#             continue
203#         else:
204#             visited[c] = True
205#
206#         cy = 0
207#         if TIME == "BIRTHS":
208#             if c[0] == "c":
209#                 cy = int(c[1:])
210#             else:
211#                 cy = int(c)
212#         elif TIME == "GENERATIONAL":
213#             cy = positions[node][1]+1
214#         elif TIME == "REAL":
215#             cy = time[c]
216#
217#         if len(nodes[c]) == 1:
218#             dissimilarity = 0
219#             if JITTER == True:
220#                 dissimilarity = random.gauss(0,1)
221#             else:
222#                 dissimilarity = 1
223#             positions[c] = [xmin_crowd(positions[node][0]-dissimilarity, positions[node][0]+dissimilarity, cy), cy]
224#         else:
225#             vsum = sum([v for k, v in nodes[c].items()])
226#             cx = sum([positions[k][0]*v/vsum for k, v in nodes[c].items()])
227#
228#             if JITTER == True:
229#                 positions[c] = [cx + random.gauss(0, 0.1), cy]
230#             else:
231#                 positions[c] = [cx, cy]
232#
233#
234#         if c in inv_nodes:
235#             prepos_children_reccurent(c)
[572]236
[621]237def prepos_children():
238    global max_height, max_width, min_width, visited, TIME
[571]239
[621]240    print("firstnode " + firstnode)
[562]241
[621]242    if not bool(time):
243        print("REAL time requested, but no real time data provided. Assuming BIRTHS time instead.")
244        TIME = "BIRTHS"
[572]245
[621]246    positions[firstnode] = [0, 0]
[572]247
[621]248    #visited = {}
249    #visited[firstnode] = True
[572]250
[562]251
[621]252    nodes_to_visit = [firstnode]
[562]253
[621]254    ccc = 0
255    timet = ttime.time()
[566]256
[621]257    while True:
[562]258
[621]259        ccc += 1
260        if ccc%1000 == 0 :
261            print(str(ccc) + " "  + str(ttime.time()-timet))
262            timet = ttime.time()
[562]263
[621]264        current_node = nodes_to_visit[0]
265
266        if current_node in inv_nodes:
267            for c in inv_nodes[current_node]:
268                # we want to visit the node just once, after all of its parents
269                if c not in nodes_to_visit:
270                    nodes_to_visit.append(c)
271
272                    cy = 0
273                    if TIME == "BIRTHS":
274                        if c[0] == "c":
275                            cy = int(c[1:])
276                        else:
277                            cy = int(c)
278                    elif TIME == "GENERATIONAL":
279                        cy = positions[current_node][1]+1
280                    elif TIME == "REAL":
281                        cy = time[c]
282
283                    if len(nodes[c]) == 1:
284                        dissimilarity = 0
285                        if JITTER == True:
286                            dissimilarity = random.gauss(0,1)
287                        else:
288                            dissimilarity = 1
289                        positions[c] = [xmin_crowd(positions[current_node][0]-dissimilarity, positions[current_node][0]+dissimilarity, cy), cy]
290                    else:
291                        vsum = sum([v for k, v in nodes[c].items()])
292                        cx = sum([positions[k][0]*v/vsum for k, v in nodes[c].items()])
293
294                        if JITTER == True:
295                            positions[c] = [cx + random.gauss(0, 0.1), cy]
296                        else:
297                            positions[c] = [cx, cy]
298
299
300        #if c in inv_nodes:
301        #    prepos_children_reccurent(c)
302
303        nodes_to_visit = nodes_to_visit[1:]
304        # if none left, we can stop
305        if len(nodes_to_visit) == 0:
306            break
307
308
309   # prepos_children_reccurent(firstnode)
310
[562]311    for pos in positions:
312        max_height = max(max_height, positions[pos][1])
313        max_width = max(max_width, positions[pos][0])
314        min_width = min(min_width, positions[pos][0])
315
316# ------------------------------------
317
[572]318def all_parents_visited(node):
319    apv = True
320    for k, v in sorted(nodes[node].items()):
321        if not k in visited:
322            apv = False
323            break
324    return apv
325# ------------------------------------
326
[619]327def draw_children():
[617]328    max_depth = 0
329    for k, v in depth.items():
330            max_depth = max(max_depth, v)
331
332    nodes_to_visit = [firstnode]
333    while True:
334        current_node = nodes_to_visit[0]
335
[618]336        if current_node in inv_nodes:
337            for c in inv_nodes[current_node]: # inv_node => p->c
[617]338
[618]339                if not c in nodes_to_visit:
340                    nodes_to_visit.append(c)
[617]341
[618]342                line_style = ""
343                if COLORING == "NONE":
344                    line_style = svg_line_style
345                elif COLORING == "TYPE":
346                    line_style = (svg_mutation_line_style if len(nodes[c]) == 1 else svg_crossover_line_style)
347                else: # IMPORTANCE, default
348                    line_style = svg_generate_line_style(depth[c]/max_depth)
[617]349
[618]350                svg_add_line( (w_margin+w_no_margs*(positions[current_node][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[current_node][1]/max_height),
351                        (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), line_style)
[617]352
[618]353        # we want to draw the node just once
354        if DOT_STYLE == "NONE":
355            continue
356        elif DOT_STYLE == "TYPE":
357            dot_style = svg_generate_dot_style(kind[current_node] if current_node in kind else 0) #type
358        else: # NORMAL, default
359            dot_style = svg_clear_dot_style #svg_generate_dot_style(depth[c]/max_depth)
360        svg_add_dot( (w_margin+w_no_margs*(positions[current_node][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[current_node][1]/max_height), dot_style)
361        #svg_add_text( str(depth[current_node]), (w_margin+w_no_margs*(positions[current_node][0]-min_width)/(max_width-min_width),
362        # h_margin+h_no_margs*positions[current_node][1]/max_height), "end")
[617]363
364        # we remove the current node from the list
365        nodes_to_visit = nodes_to_visit[1:]
366        # if none left, we can stop
367        if len(nodes_to_visit) == 0:
368            break
369
[562]370def draw_spine():
[619]371    nodes_to_visit = [firstnode]
372    while True:
373        current_node = nodes_to_visit[0]
[594]374
[619]375        if current_node in inv_nodes:
376            for c in inv_nodes[current_node]: # inv_node => p->c
377                if depth[c] == depth[current_node] - 1:
378                    if not c in nodes_to_visit:
379                        nodes_to_visit.append(c)
380                    line_style = svg_spine_line_style
381                    svg_add_line( (w_margin+w_no_margs*(positions[current_node][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[current_node][1]/max_height),
382                        (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), line_style)
[562]383
[619]384        # we remove the current node from the list
385        nodes_to_visit = nodes_to_visit[1:]
386        # if none left, we can stop
387        if len(nodes_to_visit) == 0:
388            break
[564]389
[619]390def draw_skeleton():
391    nodes_to_visit = [firstnode]
392    while True:
393        current_node = nodes_to_visit[0]
[594]394
[619]395        if current_node in inv_nodes:
396            for c in inv_nodes[current_node]: # inv_node => p->c
397                if depth[c] >= min_skeleton_depth:
398                    if not c in nodes_to_visit:
399                        nodes_to_visit.append(c)
400                    line_style = svg_spine_line_style
401                    svg_add_line( (w_margin+w_no_margs*(positions[current_node][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[current_node][1]/max_height),
402                        (w_margin+w_no_margs*(positions[c][0]-min_width)/(max_width-min_width), h_margin+h_no_margs*positions[c][1]/max_height), line_style)
[594]403
[619]404        # we remove the current node from the list
405        nodes_to_visit = nodes_to_visit[1:]
406        # if none left, we can stop
407        if len(nodes_to_visit) == 0:
408            break
[615]409
[576]410# ------------------------------------
[562]411
[576]412def draw_scale(filename ,type):
[562]413
[615]414    svg_add_text("Generated from " + filename.split("\\")[-1], (5, 15), "start")
[576]415
416    svg_add_line( (w*0.7, h_margin), (w, h_margin), svg_scale_line_style)
417    start_text = ""
418    if TIME == "BIRTHS":
419       start_text = "Birth #" + str(min([int(k[1:]) for k, v in nodes.items()]))
420    if TIME == "REAL":
421       start_text = "Time " + str(min([v for k, v in time.items()]))
422    if TIME == "GENERATIONAL":
423       start_text = "Depth " + str(min([v for k, v in depth.items()]))
424    svg_add_text( start_text, (w, h_margin + 15), "end")
425
426    svg_add_line( (w*0.7, h-h_margin), (w, h-h_margin), svg_scale_line_style)
427    end_text = ""
428    if TIME == "BIRTHS":
429       end_text = "Birth #" + str(max([int(k[1:]) for k, v in nodes.items()]))
430    if TIME == "REAL":
431       end_text = "Time " + str(max([v for k, v in time.items()]))
432    if TIME == "GENERATIONAL":
433       end_text = "Depth " + str(max([v for k, v in depth.items()]))
[577]434    svg_add_text( end_text, (w, h-h_margin + 15), "end")
[576]435
436
[562]437##################################################### main #####################################################
438
439args = 0
440
441h = 800
442w = 600
[576]443h_margin = 20
[562]444w_margin = 10
445h_no_margs = h - 2* h_margin
446w_no_margs = w - 2* w_margin
447
448max_height = 0
449max_width = 0
450min_width = 9999999999
451
452min_skeleton_depth = 0
453
[621]454firstnode = None
[562]455nodes = {}
456inv_nodes = {}
457positions = {}
[572]458visited= {}
[562]459depth = {}
[566]460time = {}
[577]461kind = {}
[562]462
463def main():
[585]464    global svg_file, min_skeleton_depth, args, \
465        TIME, BALANCE, DOT_STYLE, COLORING, JITTER, \
466        svg_mutation_line_style, svg_crossover_line_style
[562]467
[615]468    parser = argparse.ArgumentParser(description='Draws a genealogical tree (generates a SVG file) based on parent-child relationship information from a text file. Supports files generated by Framsticks experiments.')
469    parser.add_argument('-i', '--in', dest='input', required=True, help='input file name with stuctured evolutionary data')
470    parser.add_argument('-o', '--out', dest='output', required=True, help='output file name for the evolutionary tree (SVG format)')
[562]471    draw_tree_parser = parser.add_mutually_exclusive_group(required=False)
472    draw_tree_parser.add_argument('--draw-tree', dest='draw_tree', action='store_true', help='whether drawing the full tree should be skipped')
473    draw_tree_parser.add_argument('--no-draw-tree', dest='draw_tree', action='store_false')
474
475    draw_skeleton_parser = parser.add_mutually_exclusive_group(required=False)
476    draw_skeleton_parser.add_argument('--draw-skeleton', dest='draw_skeleton', action='store_true', help='whether the skeleton of the tree should be drawn')
477    draw_skeleton_parser.add_argument('--no-draw-skeleton', dest='draw_skeleton', action='store_false')
478
479    draw_spine_parser = parser.add_mutually_exclusive_group(required=False)
480    draw_spine_parser.add_argument('--draw-spine', dest='draw_spine', action='store_true', help='whether the spine of the tree should be drawn')
481    draw_spine_parser.add_argument('--no-draw-spine', dest='draw_spine', action='store_false')
482
483    #TODO: better names for those parameters
[620]484    parser.add_argument('-t', '--time', default='GENERATIONAL', dest='time', help='values on vertical axis (BIRTHS/GENERATIONAL(d)/REAL); '
[614]485                                                                      'BIRTHS: time measured as the number of births since the beginning; '
[571]486                                                                      'GENERATIONAL: time measured as number of ancestors; '
487                                                                      'REAL: real time of the simulation')
[620]488    parser.add_argument('-b', '--balance', default='DENSITY', dest='balance', help='method of placing nodes in the tree (RANDOM/MIN/DENSITY(d))')
489    parser.add_argument('-s', '--scale', default='NONE', dest='scale', help='type of timescale added to the tree (NONE(d)/SIMPLE)')
490    parser.add_argument('-c', '--coloring', default='IMPORTANCE', dest="coloring", help='method of coloring the tree (NONE/IMPORTANCE(d)/TYPE)')
491    parser.add_argument('-d', '--dots', default='TYPE', dest='dots', help='method of drawing dots (individuals) (NONE/NORMAL/TYPE(d))')
[571]492    parser.add_argument('-j', '--jitter', dest="jitter", action='store_true', help='draw horizontal positions of children from the normal distribution')
493
[585]494    parser.add_argument('--color-mut', default="#000000", dest="color_mut", help='color of clone/mutation lines in rgba (e.g. #FF60B240) for TYPE coloring')
495    parser.add_argument('--color-cross', default="#660198", dest="color_cross", help='color of crossover lines in rgba (e.g. #FF60B240) for TYPE coloring')
496
[562]497    parser.add_argument('--min-skeleton-depth', type=int, default=2, dest='min_skeleton_depth', help='minimal distance from the leafs for the nodes in the skeleton')
498    parser.add_argument('--seed', type=int, dest='seed', help='seed for the random number generator (-1 for random)')
499
500    parser.add_argument('--simple-data', type=bool, dest='simple_data', help='input data are given in a simple format (#child #parent)')
501
502    parser.set_defaults(draw_tree=True)
503    parser.set_defaults(draw_skeleton=False)
504    parser.set_defaults(draw_spine=False)
505
506    parser.set_defaults(seed=-1)
507
508    args = parser.parse_args()
509
[620]510    TIME = args.time.upper()
511    BALANCE = args.balance.upper()
512    DOT_STYLE = args.dots.upper()
513    COLORING = args.coloring.upper()
514    SCALE = args.scale.upper()
[571]515    JITTER = args.jitter
[620]516    if not TIME in ['BIRTHS', 'GENERATIONAL', 'REAL']\
517        or not BALANCE in ['RANDOM', 'MIN', 'DENSITY']\
518        or not DOT_STYLE in ['NONE', 'NORMAL', 'TYPE']\
519        or not COLORING in ['NONE', 'IMPORTANCE', 'TYPE']\
520        or not SCALE in ['NONE', 'SIMPLE']:
521        print("Incorrect value of one of the parameters! Closing the program.") #TODO don't be lazy, figure out which parameter is wrong...
522        return
[562]523
[620]524
[585]525    svg_mutation_line_style += hex_to_style(args.color_mut)
526    svg_crossover_line_style += hex_to_style(args.color_cross)
527
[562]528    dir = args.input
529    min_skeleton_depth = args.min_skeleton_depth
530    seed = args.seed
531    if seed == -1:
532        seed = random.randint(0, 10000)
533    random.seed(seed)
534    print("seed:", seed)
535
536    if args.simple_data:
537        load_simple_data(dir)
538    else:
539        load_data(dir)
540
541    compute_depth(firstnode)
542
543    svg_file = open(args.output, "w")
544    svg_file.write('<svg xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" '
545                   'width="' + str(w) + '" height="' + str(h) + '">')
546
547    prepos_children()
548
549    if args.draw_tree:
[619]550        draw_children()
[562]551    if args.draw_skeleton:
552        draw_skeleton()
553    if args.draw_spine:
554        draw_spine()
555
[620]556    draw_scale(dir, SCALE)
[576]557
[562]558    svg_file.write("</svg>")
559    svg_file.close()
560
561main()
Note: See TracBrowser for help on using the repository browser.