source: framspy/dissimilarity/density-distribution.py @ 1208

Last change on this file since 1208 was 1208, checked in by oriona, 13 months ago

density distribution measures added.

File size: 14.7 KB
RevLine 
[1208]1import numpy as np
2from pyemd import emd
3from ctypes import cdll
4from ctypes.util import find_library
5from alignmodel import align
6
7class DensityDistribution:
8    libm = cdll.LoadLibrary(find_library('m'))
9    EPSILON = 0.0001
10    def __init__(self, FramsLib=None, density = 10, steps = 3, reduce=True, frequency=False, metric = 'emd', fixedZaxis=False, verbose=False):
11        """ __init__
12        Args:
13            density (int, optional): density of samplings for frams.ModelGeometry . Defaults to 10.
14            steps (int, optional): How many steps is used for sampling space of voxels,
15                The higher value the more accurate sampling and the longer calculations. Defaults to 3.
16            reduce (bool, optional): If we should use reduction to remove blank samples. Defaults to True.
17            frequency (bool, optional): If we should use frequency distribution. Defaults to False.
18            metric (string, optional): The distance metric that should be used ('emd', 'l1', or 'l2'). Defaults to 'emd'.
19            fixedZaxis (bool, optional): If the z axis should be fixed during alignment. Defaults to False.
20            verbose (bool, optional): Turning on logging, works only for calculateEMDforGeno. Defaults to False.           
21        """
22        if FramsLib == None:
23            raise ValueError('Frams library not provided!')
24        self.frams_lib = FramsLib
25
26        self.density = density
27        self.steps = steps
28        self.verbose = verbose
29        self.reduce = reduce
30        self.frequency = frequency
31        self.metric = metric
32        self.fixedZaxis = fixedZaxis
33
34
35    def calculateNeighberhood(self,array,mean_coords):
36        """ Calculates number of elements for given sample and set ups the center of this sample
37        to the center of mass (calculated by mean of every coordinate)
38        Args:
39            array ([[float,float,float],...,[float,float,float]]): array of voxels that belong to given sample.
40            mean_coords ([float,float,float]): default coordinates that are the
41                middle of the sample (used when number of voxels in sample is equal to 0)
42
43        Returns:
44            weight [int]: number of voxels in a sample
45            coordinates [float,float,float]: center of mass for a sample
46        """
47        weight = len(array)
48        if weight > 0:
49            point = [np.mean(array[:,0]),np.mean(array[:,1]),np.mean(array[:,2])]
50            return weight, point
51        else:
52            return 0, mean_coords
53
54
55    def calculateDistPoints(self,point1, point2):
56        """ Returns euclidean distance between two points
57        Args (distribution):
58            point1 ([float,float,float]) - coordinates of first point
59            point2 ([float,float,float]) - coordinates of second point
60        Args (frequency):
61            point1 (float) - value of the first sample
62            point2 (float) - value of the second sample
63
64        Returns:
65            [float]: euclidean distance
66        """
67        if self.frequency:
68            return abs(point1-point2)
69        else:
70            return np.sqrt(np.sum(np.square(point1-point2)))
71
72
73    def calculateDistanceMatrix(self,array1, array2):
74        """
75
76        Args:
77            array1 ([type]): array of size n with points representing firsts model
78            array2 ([type]): array of size n with points representing second model
79
80        Returns:
81            np.array(np.array(,dtype=float)): distance matrix n x n
82        """
83        n = len(array1)
84        distMatrix = np.zeros((n,n))
85        for i in range(n):
86            for j in range(n):
87                distMatrix[i][j] = self.calculateDistPoints(array1[i], array2[j])
88        return np.array(distMatrix)
89
90
91    def reduceSignaturesFreq(self,s1,s2):
92        """Removes samples from signatures if corresponding samples for both models have weight 0.
93        Args:
94            s1 (np.array(,dtype=np.float64)): values of samples
95            s2 (np.array(,dtype=np.float64)): values of samples
96
97        Returns:
98            s1new (np.array(,dtype=np.float64)): coordinates of samples after reduction
99            s2new (np.array(,dtype=np.float64)): coordinates of samples after reduction
100        """
101        lens = len(s1)
102        indices = []
103        for i in range(lens):
104            if s1[i]==0 and s2[i]==0:
105                    indices.append(i)
106
107        return np.delete(s1, indices), np.delete(s2, indices)
108
109
110    def reduceSignaturesDens(self,s1,s2):
111        """Removes samples from signatures if corresponding samples for both models have weight 0.
112        Args:
113            s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
114            s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
115
116        Returns:
117            s1new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
118            s2new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
119        """
120        lens = len(s1[0])
121        indices = []
122        for i in range(lens):
123            if s1[1][i]==0 and s2[1][i]==0:
124                indices.append(i)
125
126        s1 = [np.delete(s1[0], indices, axis=0), np.delete(s1[1], indices, axis=0)]
127        s2 = [np.delete(s2[0], indices, axis=0), np.delete(s2[1], indices, axis=0)]
128        return s1, s2
129
130
131    def getSignatures(self,array,steps_all,step_all):
132        """Generates signature for array representing model. Signature is composed of list of points [x,y,z] (float) and list of weights (int).
133
134        Args:
135            array (np.array(np.array(,dtype=float))): array with voxels representing model
136            steps_all ([np.array(,dtype=float),np.array(,dtype=float),np.array(,dtype=float)]): lists with edges for each step for each axis in order x,y,z
137            step_all ([float,float,float]): [size of step for x axis, size of step for y axis, size of step for y axis]
138
139        Returns (distribution):
140           signature [np.array(,dtype=np.float64),np.array(,dtype=np.float64)]: returns signatuere [np.array of points, np.array of weights]
141        Returns (frequency):
142           signature np.array(,dtype=np.float64): returns signatuere np.array of coefficients
143        """
144        x_steps,y_steps,z_steps = steps_all
145        x_step,y_step,z_step=step_all
146        feature_array = []
147        weight_array = []
148        step_half_x = x_step/2
149        step_half_y = y_step/2
150        step_half_z = z_step/2
151        for x in range(len(x_steps[:-1])):
152            for y in range(len(y_steps[:-1])) :
153                for z in range(len(z_steps[:-1])):
154                    rows=np.where((array[:,0]> x_steps[x]) &
155                                  (array[:,0]<= x_steps[x+1]) &
156                                  (array[:,1]> y_steps[y]) &
157                                  (array[:,1]<= y_steps[y+1]) &
158                                  (array[:,2]> z_steps[z]) &
159                                  (array[:,2]<= z_steps[z+1]))
160                    if self.frequency:
161                        feature_array.append(len(array[rows]))
162                    else:
163                        weight, point = self.calculateNeighberhood(array[rows],[x_steps[x]+step_half_x,y_steps[y]+step_half_y,z_steps[z]+step_half_z])
164                        feature_array.append(point)
165                        weight_array.append(weight)
166
167        if self.frequency:
168            samples = np.array(feature_array,dtype=np.float64)
169            return abs(np.fft.fft(samples))
170        else:
171            return [np.array(feature_array,dtype=np.float64), np.array(weight_array,dtype=np.float64)]
172
173
174    def getSignaturesForPair(self,array1,array2):
175        """generates signatures for given pair of models represented by array of voxels.
176        We calculate space for given models by taking the extremas for each axis and dividing the space by the number of steps.
177        This divided space generate us samples which contains points. Each sample will have new coordinates which are mean of all points from it and weight
178        which equals to the number of points.
179       
180        Args:
181            array1 (np.array(np.array(,dtype=float))): array with voxels representing model1
182            array2 (np.array(np.array(,dtype=float))): array with voxels representing model2
183            steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
184
185        Returns:
186            s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
187            s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
188        """
189
190        min_x = np.min([np.min(array1[:,0]),np.min(array2[:,0])]) - self.EPSILON  # EPSILON added and removed to deal boundary voxels
191        max_x = np.max([np.max(array1[:,0]),np.max(array2[:,0])]) + self.EPSILON
192        min_y = np.min([np.min(array1[:,1]),np.min(array2[:,1])]) - self.EPSILON
193        max_y = np.max([np.max(array1[:,1]),np.max(array2[:,1])]) + self.EPSILON
194        min_z = np.min([np.min(array1[:,2]),np.min(array2[:,2])]) - self.EPSILON
195        max_z = np.max([np.max(array1[:,2]),np.max(array2[:,2])]) + self.EPSILON
196
197        x_steps,x_step = np.linspace(min_x,max_x,self.steps,retstep=True)
198        y_steps,y_step = np.linspace(min_y,max_y,self.steps,retstep=True)
199        z_steps,z_step = np.linspace(min_z,max_z,self.steps,retstep=True)
200       
201        if self.steps == 1:
202            x_steps = [min_x,max_x]
203            y_steps = [min_y,max_y]
204            z_steps = [min_z,max_z]
205            x_step = max_x - min_x
206            y_step = max_y - min_y
207            z_step = max_z - min_z
208
209        steps_all = (x_steps,y_steps,z_steps)
210        step_all = (x_step,y_step,z_step)
211       
212        s1 = self.getSignatures(array1,steps_all,step_all)
213        s2 = self.getSignatures(array2,steps_all,step_all)   
214       
215        return s1,s2
216
217
218    def getVoxels(self,geno):
219        """ Generates voxels for genotype using frams.ModelGeometry
220
221        Args:
222            geno (string): representation of model in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
223
224        Returns:
225            np.array([np.array(,dtype=float)]: list of voxels representing model.
226        """
227        model = self.frams_lib.Model.newFromString(geno)
228        align(model, self.fixedZaxis)
229        model_geometry = self.frams_lib.ModelGeometry.forModel(model)
230
231        model_geometry.geom_density = self.density
232        voxels = np.array([np.array([p.x._value(),p.y._value(),p.z._value()]) for p in model_geometry.voxels()])
233        return voxels
234
235
236    def calculateDissimforVoxels(self, voxels1, voxels2):
237        """ Calculate EMD for pair of voxels representing models.
238        Args:
239            voxels1 np.array([np.array(,dtype=float)]: list of voxels representing model1.
240            voxels2 np.array([np.array(,dtype=float)]: list of voxels representing model2.
241            steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
242
243        Returns:
244            float: dissim for pair of list of voxels
245        """
246        numvox1 = len(voxels1)
247        numvox2 = len(voxels2)   
248
249        s1, s2 = self.getSignaturesForPair(voxels1, voxels2)
250
251        if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
252            print("Bad signature!")
253            print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
254            print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
255            raise ValueError("BAd signature!")
256
257        reduce_fun = self.reduceSignaturesFreq if self.frequency else self.reduceSignaturesDens
258        if self.reduce:
259            s1, s2 = reduce_fun(s1,s2)
260
261            if not self.frequency:
262                if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
263                    print("Voxel reduction didnt work properly")
264                    print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
265                    print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
266       
267        if self.metric == 'l1':
268            if self.frequency:
269                out = np.linalg.norm((s1-s2), ord=1)
270            else:
271                out = np.linalg.norm((s1[1]-s2[1]), ord=1)
272
273        elif self.metric == 'l2':
274            if self.frequency:
275                out = np.linalg.norm((s1-s2))
276            else:
277                out = np.linalg.norm((s1[1]-s2[1]))
278
279        elif self.metric == 'emd':
280            if self.frequency:
281                num_points = len(s1)
282                dist_matrix = self.calculateDistanceMatrix(range(num_points),range(num_points))
283            else:
284                dist_matrix = self.calculateDistanceMatrix(s1[0],s2[0])
285
286            self.libm.fedisableexcept(0x04)  # allowing for operation divide by 0 because pyemd requiers it.
287
288            if self.frequency:
289                out = emd(s1,s2,np.array(dist_matrix,dtype=np.float64))
290            else:
291                out = emd(s1[1],s2[1],dist_matrix)
292
293            self.libm.feclearexcept(0x04) # disabling operation divide by 0 because framsticks doesnt like it.
294            self.libm.feenableexcept(0x04)
295
296        else:
297            raise ValueError("Wrong metric '%s'"%self.metric)
298
299        return out
300
301
302    def calculateDissimforGeno(self, geno1, geno2):
303        """ Calculate EMD for pair of genos.
304        Args:
305            geno1 (string): representation of model1 in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
306            geno2 (string): representation of model2 in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
307            steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
308
309        Returns:
310            float: dissim for pair of strings representing models.
311        """     
312
313        voxels1 = self.getVoxels(geno1)
314        voxels2 = self.getVoxels(geno2)
315
316        out = self.calculateDissimforVoxels(voxels1, voxels2)
317
318        if self.verbose == True:
319            print("Steps: ", self.steps)
320            print("Geno1:\n",geno1)
321            print("Geno2:\n",geno2)
322            print("EMD:\n",out)
323
324        return out
325
326
327    def getDissimilarityMatrix(self,listOfGeno):
328        """
329
330        Args:
331            listOfGeno ([string]): list of strings representing genotypes in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
332
333        Returns:
334            np.array(np.array(,dtype=float)): dissimilarity matrix of EMD for given list of genotypes
335        """
336        numOfGeno = len(listOfGeno)
337        dissimMatrix = np.zeros(shape=[numOfGeno,numOfGeno])
338        listOfVoxels = [self.getVoxels(g) for g in listOfGeno]
339        for i in range(numOfGeno):
340            for j in range(numOfGeno):
341                dissimMatrix[i,j] = self.calculateDissimforVoxels(listOfVoxels[i], listOfVoxels[j])
342        return dissimMatrix
Note: See TracBrowser for help on using the repository browser.