source: framspy/dissimilarity/density_distribution.py @ 1322

Last change on this file since 1322 was 1322, checked in by Maciej Komosinski, 5 weeks ago

Use libm.so when available to disable exceptions in pyemd, and if not (e.g., on Windows), proceed without it

File size: 16.2 KB
Line 
1import numpy as np
2from pyemd import emd
3from ctypes import cdll
4from ctypes.util import find_library
5from .alignmodel import align
6
7class DensityDistribution:
8    """Two dissimilarity measures based on the spatial distribution of two Models. The Model bounding box is divided into a grid of equally-sized cuboids, the number of which is the 'resolution' parameter cubed. Then the Model surface is covered with points; the density of the surface sampling is determined by the 'density' parameter. There are two versions of the measure. In the default version ('frequency'=False), a signature of each cuboid is the centroid and the number of samples. In the 'frequency'=True version, FFT is computed from the vector containing the number of samples in each cuboid. The final result of the dissimilarity measure is the distance between the signatures and it can be computed using EMD, L1, or L2 norms (the 'metric' parameter).
9    """
10
11    libm = find_library('m')  # for disabling/enabling floating point exceptions (division by zero occurs in the pyemd library)
12    if libm is not None:  # libm.so (the mathematical library) is a part of Linux ecosystem - always present
13        libm = cdll.LoadLibrary(libm)
14    else:
15        print('\nWarning: The "m" library not found - floating point exceptions in pyemd may occur...\n') # but on Windows, pyemd does not seem to cause floating point exceptions
16    EPSILON = 0.0001
17   
18    def __init__(self, frams_module=None, density = 10, resolution = 8, reduce_empty=True, frequency=False, metric = 'emd', fixedZaxis=False, verbose=False):
19        """ __init__
20        Args:
21            density (int, optional): density of samplings for frams.ModelGeometry. Defaults to 10.
22            resolution (int, optional): How many intervals are used in each dimension to partition surface samples of Models in the 3D space.
23                The higher the value, the more detailed the comparison and the longer the calculations. Defaults to 3.
24            reduce_empty (bool, optional): If we should use reduction to remove blank samples. Defaults to True.
25            frequency (bool, optional): If we should use frequency distribution. Defaults to False.
26            metric (string, optional): The distance metric that should be used ('emd', 'l1', or 'l2'). Defaults to 'emd'.
27            fixedZaxis (bool, optional): If the z axis should be fixed during alignment. Defaults to False.
28            verbose (bool, optional): Turning on logging, works only for calculateEMDforGeno. Defaults to False.           
29        """
30        if frams_module is None:
31            raise ValueError('Framsticks module not provided!')
32        self.frams = frams_module
33
34        self.density = density
35        self.resolution = resolution
36        self.verbose = verbose
37        self.reduce_empty = reduce_empty
38        self.frequency = frequency
39        self.metric = metric
40        self.fixedZaxis = fixedZaxis
41
42
43    def calculateNeighberhood(self,array,mean_coords):
44        """ Calculates number of elements for given sample and set ups the center of this sample
45        to the center of mass (calculated by mean of every coordinate)
46        Args:
47            array ([[float,float,float],...,[float,float,float]]): array of voxels that belong to given sample.
48            mean_coords ([float,float,float]): default coordinates that are the
49                middle of the sample (used when number of voxels in sample is equal to 0)
50
51        Returns:
52            weight [int]: number of voxels in a sample
53            coordinates [float,float,float]: center of mass for a sample
54        """
55        weight = len(array)
56        if weight > 0:
57            point = [np.mean(array[:,0]),np.mean(array[:,1]),np.mean(array[:,2])]
58            return weight, point
59        else:
60            return 0, mean_coords
61
62
63    def calculateDistPoints(self,point1, point2):
64        """ Returns euclidean distance between two points
65        Args (distribution):
66            point1 ([float,float,float]) - coordinates of first point
67            point2 ([float,float,float]) - coordinates of second point
68        Args (frequency):
69            point1 (float) - value of the first sample
70            point2 (float) - value of the second sample
71
72        Returns:
73            [float]: euclidean distance
74        """
75        if self.frequency:
76            return abs(point1-point2)
77        else:
78            return np.sqrt(np.sum(np.square(point1-point2)))
79
80
81    def calculateDistanceMatrix(self,array1, array2):
82        """
83        Args:
84            array1 ([type]): array of size n with points representing the first Model
85            array2 ([type]): array of size n with points representing the second Model
86
87        Returns:
88            np.array(np.array(,dtype=float)): distance matrix n*n
89        """
90        n = len(array1)
91        distMatrix = np.zeros((n,n))
92        for i in range(n):
93            for j in range(n):
94                distMatrix[i][j] = self.calculateDistPoints(array1[i], array2[j])
95        return np.array(distMatrix)
96
97
98    def reduceEmptySignatures_Frequency(self,s1,s2):
99        """Removes samples from signatures if corresponding samples for both models have weight 0.
100        Args:
101            s1 (np.array(,dtype=np.float64)): values of samples
102            s2 (np.array(,dtype=np.float64)): values of samples
103
104        Returns:
105            s1new (np.array(,dtype=np.float64)): coordinates of samples after reduction
106            s2new (np.array(,dtype=np.float64)): coordinates of samples after reduction
107        """
108        lens = len(s1)
109        indices = []
110        for i in range(lens):
111            if s1[i]==0 and s2[i]==0:
112                    indices.append(i)
113
114        return np.delete(s1, indices), np.delete(s2, indices)
115
116
117    def reduceEmptySignatures_Density(self,s1,s2):
118        """Removes samples from signatures if corresponding samples for both models have weight 0.
119        Args:
120            s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
121            s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
122
123        Returns:
124            s1new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
125            s2new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
126        """
127        lens = len(s1[0])
128        indices = []
129        for i in range(lens):
130            if s1[1][i]==0 and s2[1][i]==0:
131                indices.append(i)
132
133        s1 = [np.delete(s1[0], indices, axis=0), np.delete(s1[1], indices, axis=0)]
134        s2 = [np.delete(s2[0], indices, axis=0), np.delete(s2[1], indices, axis=0)]
135        return s1, s2
136
137
138    def getSignatures(self,array,edges3,steps3):
139        """Generates signature for array representing the Model. Signature is composed of list of points [x,y,z] (float) and list of weights (int).
140
141        Args:
142            array (np.array(np.array(,dtype=float))): array with voxels representing the Model
143            edges3 ([np.array(,dtype=float),np.array(,dtype=float),np.array(,dtype=float)]): lists with edges for each step for each axis in order x,y,z
144            steps3 ([float,float,float]): [size of interval for x axis, size of interval for y axis, size of interval for y axis]
145
146        Returns (distribution):
147           signature [np.array(,dtype=np.float64),np.array(,dtype=np.float64)]: returns signatuere [np.array of points, np.array of weights]
148        Returns (frequency):
149           signature np.array(,dtype=np.float64): returns signatuere np.array of coefficients
150        """
151        edges_x,edges_y,edges_z = edges3
152        step_x,step_y,step_z=steps3
153        feature_array = []
154        weight_array = []
155        step_x_half = step_x/2
156        step_y_half = step_y/2
157        step_z_half = step_z/2
158        for x in range(len(edges_x[:-1])):
159            for y in range(len(edges_y[:-1])) :
160                for z in range(len(edges_z[:-1])):
161                    rows=np.where((array[:,0]> edges_x[x]) &
162                                  (array[:,0]<= edges_x[x+1]) &
163                                  (array[:,1]> edges_y[y]) &
164                                  (array[:,1]<= edges_y[y+1]) &
165                                  (array[:,2]> edges_z[z]) &
166                                  (array[:,2]<= edges_z[z+1]))
167                    if self.frequency:
168                        feature_array.append(len(array[rows]))
169                    else:
170                        weight, point = self.calculateNeighberhood(array[rows],[edges_x[x]+step_x_half,edges_y[y]+step_y_half,edges_z[z]+step_z_half])
171                        feature_array.append(point)
172                        weight_array.append(weight)
173
174        if self.frequency:
175            samples = np.array(feature_array,dtype=np.float64)
176            return samples
177        else:
178            return [np.array(feature_array,dtype=np.float64), np.array(weight_array,dtype=np.float64)]
179
180
181    def getSignaturesForPair(self,array1,array2):
182        """Generates signatures for given pair of models represented by array of voxels.
183        We calculate space for given models by taking the extremas for each axis and dividing the space by the resolution.
184        This divided space generate us samples which contains points. Each sample will have new coordinates which are mean of all points from it and weight which equals to the number of points.
185       
186        Args:
187            array1 (np.array(np.array(,dtype=float))): array with voxels representing model1
188            array2 (np.array(np.array(,dtype=float))): array with voxels representing model2
189
190        Returns:
191            s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
192            s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
193        """
194
195        min_x = np.min([np.min(array1[:,0]),np.min(array2[:,0])])
196        max_x = np.max([np.max(array1[:,0]),np.max(array2[:,0])])
197        min_y = np.min([np.min(array1[:,1]),np.min(array2[:,1])])
198        max_y = np.max([np.max(array1[:,1]),np.max(array2[:,1])])
199        min_z = np.min([np.min(array1[:,2]),np.min(array2[:,2])])
200        max_z = np.max([np.max(array1[:,2]),np.max(array2[:,2])])
201
202        # We request self.resolution+1 samples since we need self.resolution intervals
203        edges_x,step_x = np.linspace(min_x,max_x,self.resolution+1,retstep=True)
204        edges_y,step_y = np.linspace(min_y,max_y,self.resolution+1,retstep=True)
205        edges_z,step_z = np.linspace(min_z,max_z,self.resolution+1,retstep=True)
206       
207        for edges in (edges_x, edges_y, edges_z):  # EPSILON subtracted to deal with boundary voxels (one-sided open intervals and comparisons in loops in function getSignatures())
208            edges[0] -= self.EPSILON
209
210        edges3 = (edges_x,edges_y,edges_z)
211        steps3 = (step_x,step_y,step_z)
212       
213        s1 = self.getSignatures(array1,edges3,steps3)
214        s2 = self.getSignatures(array2,edges3,steps3)   
215       
216        return s1,s2
217
218
219    def getVoxels(self,geno):
220        """Generates voxels for genotype using frams.ModelGeometry
221
222        Args:
223            geno (string): representation of Model in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
224
225        Returns:
226            np.array([np.array(,dtype=float)]: list of voxels representing the Model.
227        """
228        model = self.frams.Model.newFromString(geno)
229        align(model, self.fixedZaxis)
230        model_geometry = self.frams.ModelGeometry.forModel(model)
231
232        model_geometry.geom_density = self.density
233        voxels = np.array([np.array([p.x._value(),p.y._value(),p.z._value()]) for p in model_geometry.voxels()])
234        return voxels
235
236
237    def normalize(self, signature):
238        """Normalizes the signature values by dividing each element by the sum of all elements.
239        Args:
240            signature np.array(,dtype=float): A one-dimensional array of signature values.
241
242        Returns:
243            np.array(,dtype=float): A one-dimensional array of normalized signature values.
244        """
245        total = np.sum(signature)
246        return np.divide(signature, total)
247
248
249    def calculateDissimforVoxels(self, voxels1, voxels2):
250        """Calculates EMD for pair of voxels representing models.
251        Args:
252            voxels1 np.array([np.array(,dtype=float)]: list of voxels representing model1.
253            voxels2 np.array([np.array(,dtype=float)]: list of voxels representing model2.
254
255        Returns:
256            float: dissim for pair of list of voxels
257        """
258        numvox1 = len(voxels1)
259        numvox2 = len(voxels2)   
260
261        s1, s2 = self.getSignaturesForPair(voxels1, voxels2)
262
263        reduce_fun = self.reduceEmptySignatures_Frequency if self.frequency else self.reduceEmptySignatures_Density
264        if self.reduce_empty:
265            s1, s2 = reduce_fun(s1,s2)
266
267            if not self.frequency:
268                if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
269                    print("Voxel reduction didn't work properly")
270                    print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
271                    print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
272                    raise RuntimeError("Voxel reduction error!")
273       
274        if self.frequency:
275           s1 = abs(np.fft.fft(s1))
276           s2 = abs(np.fft.fft(s2))
277       
278        if self.metric == 'l1':
279            if self.frequency:
280                out = np.linalg.norm((s1-s2), ord=1)
281            else:
282                out = np.linalg.norm((s1[1]-s2[1]), ord=1)
283
284        elif self.metric == 'l2':
285            if self.frequency:
286                out = np.linalg.norm((s1-s2))
287            else:
288                out = np.linalg.norm((s1[1]-s2[1]))
289
290        elif self.metric == 'emd':
291            if self.frequency:
292                num_points = np.linspace(0, 1, len(s1), True)
293                dist_matrix = self.calculateDistanceMatrix(num_points,num_points)
294            else:
295                dist_matrix = self.calculateDistanceMatrix(s1[0],s2[0])
296
297            if self.libm is not None:
298                self.libm.fedisableexcept(0x04)  # change default flag value - don't cause "Floating point exception" when dividing by 0 (pyemd does that, for example when comparing two identical histograms, i.e., two identical signatures, for example from two identical phenotypes)
299
300            if self.frequency:
301                out = emd(self.normalize(s1),self.normalize(s2),np.array(dist_matrix,dtype=np.float64))
302            else:
303                out = emd(self.normalize(s1[1]),self.normalize(s2[1]),dist_matrix)
304
305            if self.libm is not None:
306                self.libm.feclearexcept(0x04)  # restoring default flag values...
307                self.libm.feenableexcept(0x04)
308
309        else:
310            raise ValueError("Wrong metric '%s'"%self.metric)
311
312        return out
313
314
315    def calculateDissimforGeno(self, geno1, geno2):
316        """Calculates EMD for a pair of genotypes.
317        Args:
318            geno1 (string): representation of model1 in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
319            geno2 (string): representation of model2 in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
320
321        Returns:
322            float: dissim for pair of strings representing models.
323        """     
324
325        voxels1 = self.getVoxels(geno1)
326        voxels2 = self.getVoxels(geno2)
327
328        out = self.calculateDissimforVoxels(voxels1, voxels2)
329
330        if self.verbose == True:
331            print("Intervals: ", self.resolution)
332            print("Geno1:\n",geno1)
333            print("Geno2:\n",geno2)
334            print("EMD:\n",out)
335
336        return out
337
338
339    def getDissimilarityMatrix(self,listOfGeno):
340        """
341        Args:
342            listOfGeno ([string]): list of strings representing genotypes in one of the formats supported by Framsticks, http://www.framsticks.com/a/al_genotype.html
343
344        Returns:
345            np.array(np.array(,dtype=float)): dissimilarity matrix of EMD for given list of genotypes
346        """
347        numOfGeno = len(listOfGeno)
348        dissimMatrix = np.zeros(shape=[numOfGeno,numOfGeno])
349        listOfVoxels = [self.getVoxels(g) for g in listOfGeno]
350        for i in range(numOfGeno):
351            for j in range(numOfGeno):
352                dissimMatrix[i,j] = self.calculateDissimforVoxels(listOfVoxels[i], listOfVoxels[j])
353        return dissimMatrix
Note: See TracBrowser for help on using the repository browser.