source: framspy/dissimilarity/density-distribution.py @ 1210

Last change on this file since 1210 was 1210, checked in by Maciej Komosinski, 13 months ago

EPSILON used only where necessary

File size: 14.6 KB
Line 
1import numpy as np
2from pyemd import emd
3from ctypes import cdll
4from ctypes.util import find_library
5from alignmodel import align
6
7class DensityDistribution:
8    libm = cdll.LoadLibrary(find_library('m'))
9    EPSILON = 0.0001
10    def __init__(self, FramsLib=None, density = 10, steps = 3, reduce=True, frequency=False, metric = 'emd', fixedZaxis=False, verbose=False):
11        """ __init__
12        Args:
13            density (int, optional): density of samplings for frams.ModelGeometry . Defaults to 10.
14            steps (int, optional): How many steps is used for sampling space of voxels,
15                The higher value the more accurate sampling and the longer calculations. Defaults to 3.
16            reduce (bool, optional): If we should use reduction to remove blank samples. Defaults to True.
17            frequency (bool, optional): If we should use frequency distribution. Defaults to False.
18            metric (string, optional): The distance metric that should be used ('emd', 'l1', or 'l2'). Defaults to 'emd'.
19            fixedZaxis (bool, optional): If the z axis should be fixed during alignment. Defaults to False.
20            verbose (bool, optional): Turning on logging, works only for calculateEMDforGeno. Defaults to False.           
21        """
22        if FramsLib == None:
23            raise ValueError('Frams library not provided!')
24        self.frams_lib = FramsLib
25
26        self.density = density
27        self.steps = steps
28        self.verbose = verbose
29        self.reduce = reduce
30        self.frequency = frequency
31        self.metric = metric
32        self.fixedZaxis = fixedZaxis
33
34
35    def calculateNeighberhood(self,array,mean_coords):
36        """ Calculates number of elements for given sample and set ups the center of this sample
37        to the center of mass (calculated by mean of every coordinate)
38        Args:
39            array ([[float,float,float],...,[float,float,float]]): array of voxels that belong to given sample.
40            mean_coords ([float,float,float]): default coordinates that are the
41                middle of the sample (used when number of voxels in sample is equal to 0)
42
43        Returns:
44            weight [int]: number of voxels in a sample
45            coordinates [float,float,float]: center of mass for a sample
46        """
47        weight = len(array)
48        if weight > 0:
49            point = [np.mean(array[:,0]),np.mean(array[:,1]),np.mean(array[:,2])]
50            return weight, point
51        else:
52            return 0, mean_coords
53
54
55    def calculateDistPoints(self,point1, point2):
56        """ Returns euclidean distance between two points
57        Args (distribution):
58            point1 ([float,float,float]) - coordinates of first point
59            point2 ([float,float,float]) - coordinates of second point
60        Args (frequency):
61            point1 (float) - value of the first sample
62            point2 (float) - value of the second sample
63
64        Returns:
65            [float]: euclidean distance
66        """
67        if self.frequency:
68            return abs(point1-point2)
69        else:
70            return np.sqrt(np.sum(np.square(point1-point2)))
71
72
73    def calculateDistanceMatrix(self,array1, array2):
74        """
75
76        Args:
77            array1 ([type]): array of size n with points representing firsts model
78            array2 ([type]): array of size n with points representing second model
79
80        Returns:
81            np.array(np.array(,dtype=float)): distance matrix n x n
82        """
83        n = len(array1)
84        distMatrix = np.zeros((n,n))
85        for i in range(n):
86            for j in range(n):
87                distMatrix[i][j] = self.calculateDistPoints(array1[i], array2[j])
88        return np.array(distMatrix)
89
90
91    def reduceSignaturesFreq(self,s1,s2):
92        """Removes samples from signatures if corresponding samples for both models have weight 0.
93        Args:
94            s1 (np.array(,dtype=np.float64)): values of samples
95            s2 (np.array(,dtype=np.float64)): values of samples
96
97        Returns:
98            s1new (np.array(,dtype=np.float64)): coordinates of samples after reduction
99            s2new (np.array(,dtype=np.float64)): coordinates of samples after reduction
100        """
101        lens = len(s1)
102        indices = []
103        for i in range(lens):
104            if s1[i]==0 and s2[i]==0:
105                    indices.append(i)
106
107        return np.delete(s1, indices), np.delete(s2, indices)
108
109
110    def reduceSignaturesDens(self,s1,s2):
111        """Removes samples from signatures if corresponding samples for both models have weight 0.
112        Args:
113            s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
114            s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
115
116        Returns:
117            s1new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
118            s2new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
119        """
120        lens = len(s1[0])
121        indices = []
122        for i in range(lens):
123            if s1[1][i]==0 and s2[1][i]==0:
124                indices.append(i)
125
126        s1 = [np.delete(s1[0], indices, axis=0), np.delete(s1[1], indices, axis=0)]
127        s2 = [np.delete(s2[0], indices, axis=0), np.delete(s2[1], indices, axis=0)]
128        return s1, s2
129
130
131    def getSignatures(self,array,steps_all,step_all):
132        """Generates signature for array representing model. Signature is composed of list of points [x,y,z] (float) and list of weights (int).
133
134        Args:
135            array (np.array(np.array(,dtype=float))): array with voxels representing model
136            steps_all ([np.array(,dtype=float),np.array(,dtype=float),np.array(,dtype=float)]): lists with edges for each step for each axis in order x,y,z
137            step_all ([float,float,float]): [size of step for x axis, size of step for y axis, size of step for y axis]
138
139        Returns (distribution):
140           signature [np.array(,dtype=np.float64),np.array(,dtype=np.float64)]: returns signatuere [np.array of points, np.array of weights]
141        Returns (frequency):
142           signature np.array(,dtype=np.float64): returns signatuere np.array of coefficients
143        """
144        x_steps,y_steps,z_steps = steps_all
145        x_step,y_step,z_step=step_all
146        feature_array = []
147        weight_array = []
148        step_half_x = x_step/2
149        step_half_y = y_step/2
150        step_half_z = z_step/2
151        for x in range(len(x_steps[:-1])):
152            for y in range(len(y_steps[:-1])) :
153                for z in range(len(z_steps[:-1])):
154                    rows=np.where((array[:,0]> x_steps[x]) &
155                                  (array[:,0]<= x_steps[x+1]) &
156                                  (array[:,1]> y_steps[y]) &
157                                  (array[:,1]<= y_steps[y+1]) &
158                                  (array[:,2]> z_steps[z]) &
159                                  (array[:,2]<= z_steps[z+1]))
160                    if self.frequency:
161                        feature_array.append(len(array[rows]))
162                    else:
163                        weight, point = self.calculateNeighberhood(array[rows],[x_steps[x]+step_half_x,y_steps[y]+step_half_y,z_steps[z]+step_half_z])
164                        feature_array.append(point)
165                        weight_array.append(weight)
166
167        if self.frequency:
168            samples = np.array(feature_array,dtype=np.float64)
169            return abs(np.fft.fft(samples))
170        else:
171            return [np.array(feature_array,dtype=np.float64), np.array(weight_array,dtype=np.float64)]
172
173
174    def getSignaturesForPair(self,array1,array2):
175        """generates signatures for given pair of models represented by array of voxels.
176        We calculate space for given models by taking the extremas for each axis and dividing the space by the number of steps.
177        This divided space generate us samples which contains points. Each sample will have new coordinates which are mean of all points from it and weight
178        which equals to the number of points.
179       
180        Args:
181            array1 (np.array(np.array(,dtype=float))): array with voxels representing model1
182            array2 (np.array(np.array(,dtype=float))): array with voxels representing model2
183            steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
184
185        Returns:
186            s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
187            s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
188        """
189
190        min_x = np.min([np.min(array1[:,0]),np.min(array2[:,0])])
191        max_x = np.max([np.max(array1[:,0]),np.max(array2[:,0])])
192        min_y = np.min([np.min(array1[:,1]),np.min(array2[:,1])])
193        max_y = np.max([np.max(array1[:,1]),np.max(array2[:,1])])
194        min_z = np.min([np.min(array1[:,2]),np.min(array2[:,2])])
195        max_z = np.max([np.max(array1[:,2]),np.max(array2[:,2])])
196
197        x_steps,x_step = np.linspace(min_x,max_x,self.steps,retstep=True)
198        y_steps,y_step = np.linspace(min_y,max_y,self.steps,retstep=True)
199        z_steps,z_step = np.linspace(min_z,max_z,self.steps,retstep=True)
200       
201        for intervals in (x_steps, y_steps, z_steps):  # EPSILON subtracted to deal with boundary voxels (one-sided open intervals and comparisons in loops in function getSignatures())
202            intervals[0] -= self.EPSILON
203
204        steps_all = (x_steps,y_steps,z_steps)
205        step_all = (x_step,y_step,z_step)
206       
207        s1 = self.getSignatures(array1,steps_all,step_all)
208        s2 = self.getSignatures(array2,steps_all,step_all)   
209       
210        return s1,s2
211
212
213    def getVoxels(self,geno):
214        """ Generates voxels for genotype using frams.ModelGeometry
215
216        Args:
217            geno (string): representation of model in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
218
219        Returns:
220            np.array([np.array(,dtype=float)]: list of voxels representing model.
221        """
222        model = self.frams_lib.Model.newFromString(geno)
223        align(model, self.fixedZaxis)
224        model_geometry = self.frams_lib.ModelGeometry.forModel(model)
225
226        model_geometry.geom_density = self.density
227        voxels = np.array([np.array([p.x._value(),p.y._value(),p.z._value()]) for p in model_geometry.voxels()])
228        return voxels
229
230
231    def calculateDissimforVoxels(self, voxels1, voxels2):
232        """ Calculate EMD for pair of voxels representing models.
233        Args:
234            voxels1 np.array([np.array(,dtype=float)]: list of voxels representing model1.
235            voxels2 np.array([np.array(,dtype=float)]: list of voxels representing model2.
236            steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
237
238        Returns:
239            float: dissim for pair of list of voxels
240        """
241        numvox1 = len(voxels1)
242        numvox2 = len(voxels2)   
243
244        s1, s2 = self.getSignaturesForPair(voxels1, voxels2)
245
246        if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
247            print("Bad signature!")
248            print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
249            print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
250            raise ValueError("BAd signature!")
251
252        reduce_fun = self.reduceSignaturesFreq if self.frequency else self.reduceSignaturesDens
253        if self.reduce:
254            s1, s2 = reduce_fun(s1,s2)
255
256            if not self.frequency:
257                if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
258                    print("Voxel reduction didnt work properly")
259                    print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
260                    print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
261       
262        if self.metric == 'l1':
263            if self.frequency:
264                out = np.linalg.norm((s1-s2), ord=1)
265            else:
266                out = np.linalg.norm((s1[1]-s2[1]), ord=1)
267
268        elif self.metric == 'l2':
269            if self.frequency:
270                out = np.linalg.norm((s1-s2))
271            else:
272                out = np.linalg.norm((s1[1]-s2[1]))
273
274        elif self.metric == 'emd':
275            if self.frequency:
276                num_points = len(s1)
277                dist_matrix = self.calculateDistanceMatrix(range(num_points),range(num_points))
278            else:
279                dist_matrix = self.calculateDistanceMatrix(s1[0],s2[0])
280
281            self.libm.fedisableexcept(0x04)  # allowing for operation divide by 0 because pyemd requiers it.
282
283            if self.frequency:
284                out = emd(s1,s2,np.array(dist_matrix,dtype=np.float64))
285            else:
286                out = emd(s1[1],s2[1],dist_matrix)
287
288            self.libm.feclearexcept(0x04) # disabling operation divide by 0 because framsticks doesnt like it.
289            self.libm.feenableexcept(0x04)
290
291        else:
292            raise ValueError("Wrong metric '%s'"%self.metric)
293
294        return out
295
296
297    def calculateDissimforGeno(self, geno1, geno2):
298        """ Calculate EMD for pair of genos.
299        Args:
300            geno1 (string): representation of model1 in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
301            geno2 (string): representation of model2 in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
302            steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
303
304        Returns:
305            float: dissim for pair of strings representing models.
306        """     
307
308        voxels1 = self.getVoxels(geno1)
309        voxels2 = self.getVoxels(geno2)
310
311        out = self.calculateDissimforVoxels(voxels1, voxels2)
312
313        if self.verbose == True:
314            print("Steps: ", self.steps)
315            print("Geno1:\n",geno1)
316            print("Geno2:\n",geno2)
317            print("EMD:\n",out)
318
319        return out
320
321
322    def getDissimilarityMatrix(self,listOfGeno):
323        """
324
325        Args:
326            listOfGeno ([string]): list of strings representing genotypes in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
327
328        Returns:
329            np.array(np.array(,dtype=float)): dissimilarity matrix of EMD for given list of genotypes
330        """
331        numOfGeno = len(listOfGeno)
332        dissimMatrix = np.zeros(shape=[numOfGeno,numOfGeno])
333        listOfVoxels = [self.getVoxels(g) for g in listOfGeno]
334        for i in range(numOfGeno):
335            for j in range(numOfGeno):
336                dissimMatrix[i,j] = self.calculateDissimforVoxels(listOfVoxels[i], listOfVoxels[j])
337        return dissimMatrix
Note: See TracBrowser for help on using the repository browser.