source: framspy/dissimilarity/density-distribution.py @ 1214

Last change on this file since 1214 was 1214, checked in by Maciej Komosinski, 13 months ago

The number of "steps" has now the interpretation of the number of intervals

File size: 14.7 KB
Line 
1import numpy as np
2from pyemd import emd
3from ctypes import cdll
4from ctypes.util import find_library
5from alignmodel import align
6
7class DensityDistribution:
8    libm = cdll.LoadLibrary(find_library('m'))
9    EPSILON = 0.0001
10    def __init__(self, FramsLib=None, density = 10, steps = 3, reduce=True, frequency=False, metric = 'emd', fixedZaxis=False, verbose=False):
11        """ __init__
12        Args:
13            density (int, optional): density of samplings for frams.ModelGeometry . Defaults to 10.
14            steps (int, optional): How many steps are used for sampling the space of voxels,
15                The higher the value, the more accurate the sampling and the longer the calculations. Defaults to 3.
16            reduce (bool, optional): If we should use reduction to remove blank samples. Defaults to True.
17            frequency (bool, optional): If we should use frequency distribution. Defaults to False.
18            metric (string, optional): The distance metric that should be used ('emd', 'l1', or 'l2'). Defaults to 'emd'.
19            fixedZaxis (bool, optional): If the z axis should be fixed during alignment. Defaults to False.
20            verbose (bool, optional): Turning on logging, works only for calculateEMDforGeno. Defaults to False.           
21        """
22        if FramsLib == None:
23            raise ValueError('Frams library not provided!')
24        self.frams_lib = FramsLib
25
26        self.density = density
27        self.steps = steps
28        self.verbose = verbose
29        self.reduce = reduce
30        self.frequency = frequency
31        self.metric = metric
32        self.fixedZaxis = fixedZaxis
33
34
35    def calculateNeighberhood(self,array,mean_coords):
36        """ Calculates number of elements for given sample and set ups the center of this sample
37        to the center of mass (calculated by mean of every coordinate)
38        Args:
39            array ([[float,float,float],...,[float,float,float]]): array of voxels that belong to given sample.
40            mean_coords ([float,float,float]): default coordinates that are the
41                middle of the sample (used when number of voxels in sample is equal to 0)
42
43        Returns:
44            weight [int]: number of voxels in a sample
45            coordinates [float,float,float]: center of mass for a sample
46        """
47        weight = len(array)
48        if weight > 0:
49            point = [np.mean(array[:,0]),np.mean(array[:,1]),np.mean(array[:,2])]
50            return weight, point
51        else:
52            return 0, mean_coords
53
54
55    def calculateDistPoints(self,point1, point2):
56        """ Returns euclidean distance between two points
57        Args (distribution):
58            point1 ([float,float,float]) - coordinates of first point
59            point2 ([float,float,float]) - coordinates of second point
60        Args (frequency):
61            point1 (float) - value of the first sample
62            point2 (float) - value of the second sample
63
64        Returns:
65            [float]: euclidean distance
66        """
67        if self.frequency:
68            return abs(point1-point2)
69        else:
70            return np.sqrt(np.sum(np.square(point1-point2)))
71
72
73    def calculateDistanceMatrix(self,array1, array2):
74        """
75        Args:
76            array1 ([type]): array of size n with points representing firsts model
77            array2 ([type]): array of size n with points representing second model
78
79        Returns:
80            np.array(np.array(,dtype=float)): distance matrix n x n
81        """
82        n = len(array1)
83        distMatrix = np.zeros((n,n))
84        for i in range(n):
85            for j in range(n):
86                distMatrix[i][j] = self.calculateDistPoints(array1[i], array2[j])
87        return np.array(distMatrix)
88
89
90    def reduceSignaturesFreq(self,s1,s2):
91        """Removes samples from signatures if corresponding samples for both models have weight 0.
92        Args:
93            s1 (np.array(,dtype=np.float64)): values of samples
94            s2 (np.array(,dtype=np.float64)): values of samples
95
96        Returns:
97            s1new (np.array(,dtype=np.float64)): coordinates of samples after reduction
98            s2new (np.array(,dtype=np.float64)): coordinates of samples after reduction
99        """
100        lens = len(s1)
101        indices = []
102        for i in range(lens):
103            if s1[i]==0 and s2[i]==0:
104                    indices.append(i)
105
106        return np.delete(s1, indices), np.delete(s2, indices)
107
108
109    def reduceSignaturesDens(self,s1,s2):
110        """Removes samples from signatures if corresponding samples for both models have weight 0.
111        Args:
112            s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
113            s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
114
115        Returns:
116            s1new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
117            s2new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
118        """
119        lens = len(s1[0])
120        indices = []
121        for i in range(lens):
122            if s1[1][i]==0 and s2[1][i]==0:
123                indices.append(i)
124
125        s1 = [np.delete(s1[0], indices, axis=0), np.delete(s1[1], indices, axis=0)]
126        s2 = [np.delete(s2[0], indices, axis=0), np.delete(s2[1], indices, axis=0)]
127        return s1, s2
128
129
130    def getSignatures(self,array,steps_all,step_all):
131        """Generates signature for array representing model. Signature is composed of list of points [x,y,z] (float) and list of weights (int).
132
133        Args:
134            array (np.array(np.array(,dtype=float))): array with voxels representing model
135            steps_all ([np.array(,dtype=float),np.array(,dtype=float),np.array(,dtype=float)]): lists with edges for each step for each axis in order x,y,z
136            step_all ([float,float,float]): [size of step for x axis, size of step for y axis, size of step for y axis]
137
138        Returns (distribution):
139           signature [np.array(,dtype=np.float64),np.array(,dtype=np.float64)]: returns signatuere [np.array of points, np.array of weights]
140        Returns (frequency):
141           signature np.array(,dtype=np.float64): returns signatuere np.array of coefficients
142        """
143        x_steps,y_steps,z_steps = steps_all
144        x_step,y_step,z_step=step_all
145        feature_array = []
146        weight_array = []
147        step_half_x = x_step/2
148        step_half_y = y_step/2
149        step_half_z = z_step/2
150        for x in range(len(x_steps[:-1])):
151            for y in range(len(y_steps[:-1])) :
152                for z in range(len(z_steps[:-1])):
153                    rows=np.where((array[:,0]> x_steps[x]) &
154                                  (array[:,0]<= x_steps[x+1]) &
155                                  (array[:,1]> y_steps[y]) &
156                                  (array[:,1]<= y_steps[y+1]) &
157                                  (array[:,2]> z_steps[z]) &
158                                  (array[:,2]<= z_steps[z+1]))
159                    if self.frequency:
160                        feature_array.append(len(array[rows]))
161                    else:
162                        weight, point = self.calculateNeighberhood(array[rows],[x_steps[x]+step_half_x,y_steps[y]+step_half_y,z_steps[z]+step_half_z])
163                        feature_array.append(point)
164                        weight_array.append(weight)
165
166        if self.frequency:
167            samples = np.array(feature_array,dtype=np.float64)
168            return abs(np.fft.fft(samples))
169        else:
170            return [np.array(feature_array,dtype=np.float64), np.array(weight_array,dtype=np.float64)]
171
172
173    def getSignaturesForPair(self,array1,array2):
174        """Generates signatures for given pair of models represented by array of voxels.
175        We calculate space for given models by taking the extremas for each axis and dividing the space by the number of steps.
176        This divided space generate us samples which contains points. Each sample will have new coordinates which are mean of all points from it and weight
177        which equals to the number of points.
178       
179        Args:
180            array1 (np.array(np.array(,dtype=float))): array with voxels representing model1
181            array2 (np.array(np.array(,dtype=float))): array with voxels representing model2
182            steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
183               
184        Returns:
185            s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
186            s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
187        """
188
189        min_x = np.min([np.min(array1[:,0]),np.min(array2[:,0])])   
190        max_x = np.max([np.max(array1[:,0]),np.max(array2[:,0])])
191        min_y = np.min([np.min(array1[:,1]),np.min(array2[:,1])])
192        max_y = np.max([np.max(array1[:,1]),np.max(array2[:,1])])
193        min_z = np.min([np.min(array1[:,2]),np.min(array2[:,2])])
194        max_z = np.max([np.max(array1[:,2]),np.max(array2[:,2])])
195
196        # We request self.steps+1 samples since we need self.steps intervals
197        x_steps,x_step = np.linspace(min_x,max_x,self.steps+1,retstep=True)
198        y_steps,y_step = np.linspace(min_y,max_y,self.steps+1,retstep=True)
199        z_steps,z_step = np.linspace(min_z,max_z,self.steps+1,retstep=True)
200       
201        for intervals in (x_steps, y_steps, z_steps):  # EPSILON subtracted to deal with boundary voxels (one-sided open intervals and comparisons in loops in function getSignatures())
202            intervals[0] -= self.EPSILON
203
204        steps_all = (x_steps,y_steps,z_steps)
205        step_all = (x_step,y_step,z_step)
206       
207        s1 = self.getSignatures(array1,steps_all,step_all)
208        s2 = self.getSignatures(array2,steps_all,step_all)   
209       
210        return s1,s2
211
212
213    def getVoxels(self,geno):
214        """Generates voxels for genotype using frams.ModelGeometry
215
216        Args:
217            geno (string): representation of model in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
218
219        Returns:
220            np.array([np.array(,dtype=float)]: list of voxels representing model.
221        """
222        model = self.frams_lib.Model.newFromString(geno)
223        align(model, self.fixedZaxis)
224        model_geometry = self.frams_lib.ModelGeometry.forModel(model)
225
226        model_geometry.geom_density = self.density
227        voxels = np.array([np.array([p.x._value(),p.y._value(),p.z._value()]) for p in model_geometry.voxels()])
228        return voxels
229
230
231    def calculateDissimforVoxels(self, voxels1, voxels2):
232        """Calculates EMD for pair of voxels representing models.
233        Args:
234            voxels1 np.array([np.array(,dtype=float)]: list of voxels representing model1.
235            voxels2 np.array([np.array(,dtype=float)]: list of voxels representing model2.
236            steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
237
238        Returns:
239            float: dissim for pair of list of voxels
240        """
241        numvox1 = len(voxels1)
242        numvox2 = len(voxels2)   
243
244        s1, s2 = self.getSignaturesForPair(voxels1, voxels2)
245
246        if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
247            print("Bad signature!")
248            print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
249            print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
250            raise ValueError("BAd signature!")
251
252        reduce_fun = self.reduceSignaturesFreq if self.frequency else self.reduceSignaturesDens
253        if self.reduce:
254            s1, s2 = reduce_fun(s1,s2)
255
256            if not self.frequency:
257                if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
258                    print("Voxel reduction didnt work properly")
259                    print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
260                    print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
261       
262        if self.metric == 'l1':
263            if self.frequency:
264                out = np.linalg.norm((s1-s2), ord=1)
265            else:
266                out = np.linalg.norm((s1[1]-s2[1]), ord=1)
267
268        elif self.metric == 'l2':
269            if self.frequency:
270                out = np.linalg.norm((s1-s2))
271            else:
272                out = np.linalg.norm((s1[1]-s2[1]))
273
274        elif self.metric == 'emd':
275            if self.frequency:
276                num_points = len(s1)
277                dist_matrix = self.calculateDistanceMatrix(range(num_points),range(num_points))
278            else:
279                dist_matrix = self.calculateDistanceMatrix(s1[0],s2[0])
280
281            self.libm.fedisableexcept(0x04)  # allowing for operation divide by 0 because pyemd requiers it.
282
283            if self.frequency:
284                out = emd(s1,s2,np.array(dist_matrix,dtype=np.float64))
285            else:
286                out = emd(s1[1],s2[1],dist_matrix)
287
288            self.libm.feclearexcept(0x04) # disabling operation divide by 0 because framsticks doesnt like it.
289            self.libm.feenableexcept(0x04)
290
291        else:
292            raise ValueError("Wrong metric '%s'"%self.metric)
293
294        return out
295
296
297    def calculateDissimforGeno(self, geno1, geno2):
298        """Calculates EMD for pair of genos.
299        Args:
300            geno1 (string): representation of model1 in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
301            geno2 (string): representation of model2 in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
302            steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
303
304        Returns:
305            float: dissim for pair of strings representing models.
306        """     
307
308        voxels1 = self.getVoxels(geno1)
309        voxels2 = self.getVoxels(geno2)
310
311        out = self.calculateDissimforVoxels(voxels1, voxels2)
312
313        if self.verbose == True:
314            print("Steps: ", self.steps)
315            print("Geno1:\n",geno1)
316            print("Geno2:\n",geno2)
317            print("EMD:\n",out)
318
319        return out
320
321
322    def getDissimilarityMatrix(self,listOfGeno):
323        """
324        Args:
325            listOfGeno ([string]): list of strings representing genotypes in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
326
327        Returns:
328            np.array(np.array(,dtype=float)): dissimilarity matrix of EMD for given list of genotypes
329        """
330        numOfGeno = len(listOfGeno)
331        dissimMatrix = np.zeros(shape=[numOfGeno,numOfGeno])
332        listOfVoxels = [self.getVoxels(g) for g in listOfGeno]
333        for i in range(numOfGeno):
334            for j in range(numOfGeno):
335                dissimMatrix[i,j] = self.calculateDissimforVoxels(listOfVoxels[i], listOfVoxels[j])
336        return dissimMatrix
Note: See TracBrowser for help on using the repository browser.