Context Navigation

source: framspy/dissimilarity/density-distribution.py @ 1214

Last change on this file since 1214 was 1214, checked in by Maciej Komosinski, 2 years ago
The number of "steps" has now the interpretation of the number of intervals
File size: 14.7 KB

Line
1	import numpy as np
2	from pyemd import emd
3	from ctypes import cdll
4	from ctypes.util import find_library
5	from alignmodel import align
6
7	class DensityDistribution:
8	libm = cdll.LoadLibrary(find_library('m'))
9	EPSILON = 0.0001
10	def __init__(self, FramsLib=None, density = 10, steps = 3, reduce=True, frequency=False, metric = 'emd', fixedZaxis=False, verbose=False):
11	""" __init__
12	Args:
13	density (int, optional): density of samplings for frams.ModelGeometry . Defaults to 10.
14	steps (int, optional): How many steps are used for sampling the space of voxels,
15	The higher the value, the more accurate the sampling and the longer the calculations. Defaults to 3.
16	reduce (bool, optional): If we should use reduction to remove blank samples. Defaults to True.
17	frequency (bool, optional): If we should use frequency distribution. Defaults to False.
18	metric (string, optional): The distance metric that should be used ('emd', 'l1', or 'l2'). Defaults to 'emd'.
19	fixedZaxis (bool, optional): If the z axis should be fixed during alignment. Defaults to False.
20	verbose (bool, optional): Turning on logging, works only for calculateEMDforGeno. Defaults to False.
21	"""
22	if FramsLib == None:
23	raise ValueError('Frams library not provided!')
24	self.frams_lib = FramsLib
25
26	self.density = density
27	self.steps = steps
28	self.verbose = verbose
29	self.reduce = reduce
30	self.frequency = frequency
31	self.metric = metric
32	self.fixedZaxis = fixedZaxis
33
34
35	def calculateNeighberhood(self,array,mean_coords):
36	""" Calculates number of elements for given sample and set ups the center of this sample
37	to the center of mass (calculated by mean of every coordinate)
38	Args:
39	array ([[float,float,float],...,[float,float,float]]): array of voxels that belong to given sample.
40	mean_coords ([float,float,float]): default coordinates that are the
41	middle of the sample (used when number of voxels in sample is equal to 0)
42
43	Returns:
44	weight [int]: number of voxels in a sample
45	coordinates [float,float,float]: center of mass for a sample
46	"""
47	weight = len(array)
48	if weight > 0:
49	point = [np.mean(array[:,0]),np.mean(array[:,1]),np.mean(array[:,2])]
50	return weight, point
51	else:
52	return 0, mean_coords
53
54
55	def calculateDistPoints(self,point1, point2):
56	""" Returns euclidean distance between two points
57	Args (distribution):
58	point1 ([float,float,float]) - coordinates of first point
59	point2 ([float,float,float]) - coordinates of second point
60	Args (frequency):
61	point1 (float) - value of the first sample
62	point2 (float) - value of the second sample
63
64	Returns:
65	[float]: euclidean distance
66	"""
67	if self.frequency:
68	return abs(point1-point2)
69	else:
70	return np.sqrt(np.sum(np.square(point1-point2)))
71
72
73	def calculateDistanceMatrix(self,array1, array2):
74	"""
75	Args:
76	array1 ([type]): array of size n with points representing firsts model
77	array2 ([type]): array of size n with points representing second model
78
79	Returns:
80	np.array(np.array(,dtype=float)): distance matrix n x n
81	"""
82	n = len(array1)
83	distMatrix = np.zeros((n,n))
84	for i in range(n):
85	for j in range(n):
86	distMatrix[i][j] = self.calculateDistPoints(array1[i], array2[j])
87	return np.array(distMatrix)
88
89
90	def reduceSignaturesFreq(self,s1,s2):
91	"""Removes samples from signatures if corresponding samples for both models have weight 0.
92	Args:
93	s1 (np.array(,dtype=np.float64)): values of samples
94	s2 (np.array(,dtype=np.float64)): values of samples
95
96	Returns:
97	s1new (np.array(,dtype=np.float64)): coordinates of samples after reduction
98	s2new (np.array(,dtype=np.float64)): coordinates of samples after reduction
99	"""
100	lens = len(s1)
101	indices = []
102	for i in range(lens):
103	if s1[i]==0 and s2[i]==0:
104	indices.append(i)
105
106	return np.delete(s1, indices), np.delete(s2, indices)
107
108
109	def reduceSignaturesDens(self,s1,s2):
110	"""Removes samples from signatures if corresponding samples for both models have weight 0.
111	Args:
112	s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
113	s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
114
115	Returns:
116	s1new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
117	s2new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
118	"""
119	lens = len(s1[0])
120	indices = []
121	for i in range(lens):
122	if s1[1][i]==0 and s2[1][i]==0:
123	indices.append(i)
124
125	s1 = [np.delete(s1[0], indices, axis=0), np.delete(s1[1], indices, axis=0)]
126	s2 = [np.delete(s2[0], indices, axis=0), np.delete(s2[1], indices, axis=0)]
127	return s1, s2
128
129
130	def getSignatures(self,array,steps_all,step_all):
131	"""Generates signature for array representing model. Signature is composed of list of points [x,y,z] (float) and list of weights (int).
132
133	Args:
134	array (np.array(np.array(,dtype=float))): array with voxels representing model
135	steps_all ([np.array(,dtype=float),np.array(,dtype=float),np.array(,dtype=float)]): lists with edges for each step for each axis in order x,y,z
136	step_all ([float,float,float]): [size of step for x axis, size of step for y axis, size of step for y axis]
137
138	Returns (distribution):
139	signature [np.array(,dtype=np.float64),np.array(,dtype=np.float64)]: returns signatuere [np.array of points, np.array of weights]
140	Returns (frequency):
141	signature np.array(,dtype=np.float64): returns signatuere np.array of coefficients
142	"""
143	x_steps,y_steps,z_steps = steps_all
144	x_step,y_step,z_step=step_all
145	feature_array = []
146	weight_array = []
147	step_half_x = x_step/2
148	step_half_y = y_step/2
149	step_half_z = z_step/2
150	for x in range(len(x_steps[:-1])):
151	for y in range(len(y_steps[:-1])) :
152	for z in range(len(z_steps[:-1])):
153	rows=np.where((array[:,0]> x_steps[x]) &
154	(array[:,0]<= x_steps[x+1]) &
155	(array[:,1]> y_steps[y]) &
156	(array[:,1]<= y_steps[y+1]) &
157	(array[:,2]> z_steps[z]) &
158	(array[:,2]<= z_steps[z+1]))
159	if self.frequency:
160	feature_array.append(len(array[rows]))
161	else:
162	weight, point = self.calculateNeighberhood(array[rows],[x_steps[x]+step_half_x,y_steps[y]+step_half_y,z_steps[z]+step_half_z])
163	feature_array.append(point)
164	weight_array.append(weight)
165
166	if self.frequency:
167	samples = np.array(feature_array,dtype=np.float64)
168	return abs(np.fft.fft(samples))
169	else:
170	return [np.array(feature_array,dtype=np.float64), np.array(weight_array,dtype=np.float64)]
171
172
173	def getSignaturesForPair(self,array1,array2):
174	"""Generates signatures for given pair of models represented by array of voxels.
175	We calculate space for given models by taking the extremas for each axis and dividing the space by the number of steps.
176	This divided space generate us samples which contains points. Each sample will have new coordinates which are mean of all points from it and weight
177	which equals to the number of points.
178
179	Args:
180	array1 (np.array(np.array(,dtype=float))): array with voxels representing model1
181	array2 (np.array(np.array(,dtype=float))): array with voxels representing model2
182	steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
183
184	Returns:
185	s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
186	s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
187	"""
188
189	min_x = np.min([np.min(array1[:,0]),np.min(array2[:,0])])
190	max_x = np.max([np.max(array1[:,0]),np.max(array2[:,0])])
191	min_y = np.min([np.min(array1[:,1]),np.min(array2[:,1])])
192	max_y = np.max([np.max(array1[:,1]),np.max(array2[:,1])])
193	min_z = np.min([np.min(array1[:,2]),np.min(array2[:,2])])
194	max_z = np.max([np.max(array1[:,2]),np.max(array2[:,2])])
195
196	# We request self.steps+1 samples since we need self.steps intervals
197	x_steps,x_step = np.linspace(min_x,max_x,self.steps+1,retstep=True)
198	y_steps,y_step = np.linspace(min_y,max_y,self.steps+1,retstep=True)
199	z_steps,z_step = np.linspace(min_z,max_z,self.steps+1,retstep=True)
200
201	for intervals in (x_steps, y_steps, z_steps): # EPSILON subtracted to deal with boundary voxels (one-sided open intervals and comparisons in loops in function getSignatures())
202	intervals[0] -= self.EPSILON
203
204	steps_all = (x_steps,y_steps,z_steps)
205	step_all = (x_step,y_step,z_step)
206
207	s1 = self.getSignatures(array1,steps_all,step_all)
208	s2 = self.getSignatures(array2,steps_all,step_all)
209
210	return s1,s2
211
212
213	def getVoxels(self,geno):
214	"""Generates voxels for genotype using frams.ModelGeometry
215
216	Args:
217	geno (string): representation of model in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
218
219	Returns:
220	np.array([np.array(,dtype=float)]: list of voxels representing model.
221	"""
222	model = self.frams_lib.Model.newFromString(geno)
223	align(model, self.fixedZaxis)
224	model_geometry = self.frams_lib.ModelGeometry.forModel(model)
225
226	model_geometry.geom_density = self.density
227	voxels = np.array([np.array([p.x._value(),p.y._value(),p.z._value()]) for p in model_geometry.voxels()])
228	return voxels
229
230
231	def calculateDissimforVoxels(self, voxels1, voxels2):
232	"""Calculates EMD for pair of voxels representing models.
233	Args:
234	voxels1 np.array([np.array(,dtype=float)]: list of voxels representing model1.
235	voxels2 np.array([np.array(,dtype=float)]: list of voxels representing model2.
236	steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
237
238	Returns:
239	float: dissim for pair of list of voxels
240	"""
241	numvox1 = len(voxels1)
242	numvox2 = len(voxels2)
243
244	s1, s2 = self.getSignaturesForPair(voxels1, voxels2)
245
246	if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
247	print("Bad signature!")
248	print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
249	print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
250	raise ValueError("BAd signature!")
251
252	reduce_fun = self.reduceSignaturesFreq if self.frequency else self.reduceSignaturesDens
253	if self.reduce:
254	s1, s2 = reduce_fun(s1,s2)
255
256	if not self.frequency:
257	if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
258	print("Voxel reduction didnt work properly")
259	print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
260	print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
261
262	if self.metric == 'l1':
263	if self.frequency:
264	out = np.linalg.norm((s1-s2), ord=1)
265	else:
266	out = np.linalg.norm((s1[1]-s2[1]), ord=1)
267
268	elif self.metric == 'l2':
269	if self.frequency:
270	out = np.linalg.norm((s1-s2))
271	else:
272	out = np.linalg.norm((s1[1]-s2[1]))
273
274	elif self.metric == 'emd':
275	if self.frequency:
276	num_points = len(s1)
277	dist_matrix = self.calculateDistanceMatrix(range(num_points),range(num_points))
278	else:
279	dist_matrix = self.calculateDistanceMatrix(s1[0],s2[0])
280
281	self.libm.fedisableexcept(0x04) # allowing for operation divide by 0 because pyemd requiers it.
282
283	if self.frequency:
284	out = emd(s1,s2,np.array(dist_matrix,dtype=np.float64))
285	else:
286	out = emd(s1[1],s2[1],dist_matrix)
287
288	self.libm.feclearexcept(0x04) # disabling operation divide by 0 because framsticks doesnt like it.
289	self.libm.feenableexcept(0x04)
290
291	else:
292	raise ValueError("Wrong metric '%s'"%self.metric)
293
294	return out
295
296
297	def calculateDissimforGeno(self, geno1, geno2):
298	"""Calculates EMD for pair of genos.
299	Args:
300	geno1 (string): representation of model1 in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
301	geno2 (string): representation of model2 in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
302	steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
303
304	Returns:
305	float: dissim for pair of strings representing models.
306	"""
307
308	voxels1 = self.getVoxels(geno1)
309	voxels2 = self.getVoxels(geno2)
310
311	out = self.calculateDissimforVoxels(voxels1, voxels2)
312
313	if self.verbose == True:
314	print("Steps: ", self.steps)
315	print("Geno1:\n",geno1)
316	print("Geno2:\n",geno2)
317	print("EMD:\n",out)
318
319	return out
320
321
322	def getDissimilarityMatrix(self,listOfGeno):
323	"""
324	Args:
325	listOfGeno ([string]): list of strings representing genotypes in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
326
327	Returns:
328	np.array(np.array(,dtype=float)): dissimilarity matrix of EMD for given list of genotypes
329	"""
330	numOfGeno = len(listOfGeno)
331	dissimMatrix = np.zeros(shape=[numOfGeno,numOfGeno])
332	listOfVoxels = [self.getVoxels(g) for g in listOfGeno]
333	for i in range(numOfGeno):
334	for j in range(numOfGeno):
335	dissimMatrix[i,j] = self.calculateDissimforVoxels(listOfVoxels[i], listOfVoxels[j])
336	return dissimMatrix

Note: See TracBrowser for help on using the repository browser.

Download in other formats: