Context Navigation

source: framspy/dissimilarity/density-distribution.py @ 1208

Last change on this file since 1208 was 1208, checked in by oriona, 2 years ago
density distribution measures added.
File size: 14.7 KB

Rev	Line
[1208]	1	import numpy as np
	2	from pyemd import emd
	3	from ctypes import cdll
	4	from ctypes.util import find_library
	5	from alignmodel import align
	6
	7	class DensityDistribution:
	8	libm = cdll.LoadLibrary(find_library('m'))
	9	EPSILON = 0.0001
	10	def __init__(self, FramsLib=None, density = 10, steps = 3, reduce=True, frequency=False, metric = 'emd', fixedZaxis=False, verbose=False):
	11	""" __init__
	12	Args:
	13	density (int, optional): density of samplings for frams.ModelGeometry . Defaults to 10.
	14	steps (int, optional): How many steps is used for sampling space of voxels,
	15	The higher value the more accurate sampling and the longer calculations. Defaults to 3.
	16	reduce (bool, optional): If we should use reduction to remove blank samples. Defaults to True.
	17	frequency (bool, optional): If we should use frequency distribution. Defaults to False.
	18	metric (string, optional): The distance metric that should be used ('emd', 'l1', or 'l2'). Defaults to 'emd'.
	19	fixedZaxis (bool, optional): If the z axis should be fixed during alignment. Defaults to False.
	20	verbose (bool, optional): Turning on logging, works only for calculateEMDforGeno. Defaults to False.
	21	"""
	22	if FramsLib == None:
	23	raise ValueError('Frams library not provided!')
	24	self.frams_lib = FramsLib
	25
	26	self.density = density
	27	self.steps = steps
	28	self.verbose = verbose
	29	self.reduce = reduce
	30	self.frequency = frequency
	31	self.metric = metric
	32	self.fixedZaxis = fixedZaxis
	33
	34
	35	def calculateNeighberhood(self,array,mean_coords):
	36	""" Calculates number of elements for given sample and set ups the center of this sample
	37	to the center of mass (calculated by mean of every coordinate)
	38	Args:
	39	array ([[float,float,float],...,[float,float,float]]): array of voxels that belong to given sample.
	40	mean_coords ([float,float,float]): default coordinates that are the
	41	middle of the sample (used when number of voxels in sample is equal to 0)
	42
	43	Returns:
	44	weight [int]: number of voxels in a sample
	45	coordinates [float,float,float]: center of mass for a sample
	46	"""
	47	weight = len(array)
	48	if weight > 0:
	49	point = [np.mean(array[:,0]),np.mean(array[:,1]),np.mean(array[:,2])]
	50	return weight, point
	51	else:
	52	return 0, mean_coords
	53
	54
	55	def calculateDistPoints(self,point1, point2):
	56	""" Returns euclidean distance between two points
	57	Args (distribution):
	58	point1 ([float,float,float]) - coordinates of first point
	59	point2 ([float,float,float]) - coordinates of second point
	60	Args (frequency):
	61	point1 (float) - value of the first sample
	62	point2 (float) - value of the second sample
	63
	64	Returns:
	65	[float]: euclidean distance
	66	"""
	67	if self.frequency:
	68	return abs(point1-point2)
	69	else:
	70	return np.sqrt(np.sum(np.square(point1-point2)))
	71
	72
	73	def calculateDistanceMatrix(self,array1, array2):
	74	"""
	75
	76	Args:
	77	array1 ([type]): array of size n with points representing firsts model
	78	array2 ([type]): array of size n with points representing second model
	79
	80	Returns:
	81	np.array(np.array(,dtype=float)): distance matrix n x n
	82	"""
	83	n = len(array1)
	84	distMatrix = np.zeros((n,n))
	85	for i in range(n):
	86	for j in range(n):
	87	distMatrix[i][j] = self.calculateDistPoints(array1[i], array2[j])
	88	return np.array(distMatrix)
	89
	90
	91	def reduceSignaturesFreq(self,s1,s2):
	92	"""Removes samples from signatures if corresponding samples for both models have weight 0.
	93	Args:
	94	s1 (np.array(,dtype=np.float64)): values of samples
	95	s2 (np.array(,dtype=np.float64)): values of samples
	96
	97	Returns:
	98	s1new (np.array(,dtype=np.float64)): coordinates of samples after reduction
	99	s2new (np.array(,dtype=np.float64)): coordinates of samples after reduction
	100	"""
	101	lens = len(s1)
	102	indices = []
	103	for i in range(lens):
	104	if s1[i]==0 and s2[i]==0:
	105	indices.append(i)
	106
	107	return np.delete(s1, indices), np.delete(s2, indices)
	108
	109
	110	def reduceSignaturesDens(self,s1,s2):
	111	"""Removes samples from signatures if corresponding samples for both models have weight 0.
	112	Args:
	113	s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
	114	s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
	115
	116	Returns:
	117	s1new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
	118	s2new ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights] after reduction
	119	"""
	120	lens = len(s1[0])
	121	indices = []
	122	for i in range(lens):
	123	if s1[1][i]==0 and s2[1][i]==0:
	124	indices.append(i)
	125
	126	s1 = [np.delete(s1[0], indices, axis=0), np.delete(s1[1], indices, axis=0)]
	127	s2 = [np.delete(s2[0], indices, axis=0), np.delete(s2[1], indices, axis=0)]
	128	return s1, s2
	129
	130
	131	def getSignatures(self,array,steps_all,step_all):
	132	"""Generates signature for array representing model. Signature is composed of list of points [x,y,z] (float) and list of weights (int).
	133
	134	Args:
	135	array (np.array(np.array(,dtype=float))): array with voxels representing model
	136	steps_all ([np.array(,dtype=float),np.array(,dtype=float),np.array(,dtype=float)]): lists with edges for each step for each axis in order x,y,z
	137	step_all ([float,float,float]): [size of step for x axis, size of step for y axis, size of step for y axis]
	138
	139	Returns (distribution):
	140	signature [np.array(,dtype=np.float64),np.array(,dtype=np.float64)]: returns signatuere [np.array of points, np.array of weights]
	141	Returns (frequency):
	142	signature np.array(,dtype=np.float64): returns signatuere np.array of coefficients
	143	"""
	144	x_steps,y_steps,z_steps = steps_all
	145	x_step,y_step,z_step=step_all
	146	feature_array = []
	147	weight_array = []
	148	step_half_x = x_step/2
	149	step_half_y = y_step/2
	150	step_half_z = z_step/2
	151	for x in range(len(x_steps[:-1])):
	152	for y in range(len(y_steps[:-1])) :
	153	for z in range(len(z_steps[:-1])):
	154	rows=np.where((array[:,0]> x_steps[x]) &
	155	(array[:,0]<= x_steps[x+1]) &
	156	(array[:,1]> y_steps[y]) &
	157	(array[:,1]<= y_steps[y+1]) &
	158	(array[:,2]> z_steps[z]) &
	159	(array[:,2]<= z_steps[z+1]))
	160	if self.frequency:
	161	feature_array.append(len(array[rows]))
	162	else:
	163	weight, point = self.calculateNeighberhood(array[rows],[x_steps[x]+step_half_x,y_steps[y]+step_half_y,z_steps[z]+step_half_z])
	164	feature_array.append(point)
	165	weight_array.append(weight)
	166
	167	if self.frequency:
	168	samples = np.array(feature_array,dtype=np.float64)
	169	return abs(np.fft.fft(samples))
	170	else:
	171	return [np.array(feature_array,dtype=np.float64), np.array(weight_array,dtype=np.float64)]
	172
	173
	174	def getSignaturesForPair(self,array1,array2):
	175	"""generates signatures for given pair of models represented by array of voxels.
	176	We calculate space for given models by taking the extremas for each axis and dividing the space by the number of steps.
	177	This divided space generate us samples which contains points. Each sample will have new coordinates which are mean of all points from it and weight
	178	which equals to the number of points.
	179
	180	Args:
	181	array1 (np.array(np.array(,dtype=float))): array with voxels representing model1
	182	array2 (np.array(np.array(,dtype=float))): array with voxels representing model2
	183	steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
	184
	185	Returns:
	186	s1 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
	187	s2 ([np.array(,dtype=np.float64),np.array(,dtype=np.float64)]): [coordinates of samples, weights]
	188	"""
	189
	190	min_x = np.min([np.min(array1[:,0]),np.min(array2[:,0])]) - self.EPSILON # EPSILON added and removed to deal boundary voxels
	191	max_x = np.max([np.max(array1[:,0]),np.max(array2[:,0])]) + self.EPSILON
	192	min_y = np.min([np.min(array1[:,1]),np.min(array2[:,1])]) - self.EPSILON
	193	max_y = np.max([np.max(array1[:,1]),np.max(array2[:,1])]) + self.EPSILON
	194	min_z = np.min([np.min(array1[:,2]),np.min(array2[:,2])]) - self.EPSILON
	195	max_z = np.max([np.max(array1[:,2]),np.max(array2[:,2])]) + self.EPSILON
	196
	197	x_steps,x_step = np.linspace(min_x,max_x,self.steps,retstep=True)
	198	y_steps,y_step = np.linspace(min_y,max_y,self.steps,retstep=True)
	199	z_steps,z_step = np.linspace(min_z,max_z,self.steps,retstep=True)
	200
	201	if self.steps == 1:
	202	x_steps = [min_x,max_x]
	203	y_steps = [min_y,max_y]
	204	z_steps = [min_z,max_z]
	205	x_step = max_x - min_x
	206	y_step = max_y - min_y
	207	z_step = max_z - min_z
	208
	209	steps_all = (x_steps,y_steps,z_steps)
	210	step_all = (x_step,y_step,z_step)
	211
	212	s1 = self.getSignatures(array1,steps_all,step_all)
	213	s2 = self.getSignatures(array2,steps_all,step_all)
	214
	215	return s1,s2
	216
	217
	218	def getVoxels(self,geno):
	219	""" Generates voxels for genotype using frams.ModelGeometry
	220
	221	Args:
	222	geno (string): representation of model in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
	223
	224	Returns:
	225	np.array([np.array(,dtype=float)]: list of voxels representing model.
	226	"""
	227	model = self.frams_lib.Model.newFromString(geno)
	228	align(model, self.fixedZaxis)
	229	model_geometry = self.frams_lib.ModelGeometry.forModel(model)
	230
	231	model_geometry.geom_density = self.density
	232	voxels = np.array([np.array([p.x._value(),p.y._value(),p.z._value()]) for p in model_geometry.voxels()])
	233	return voxels
	234
	235
	236	def calculateDissimforVoxels(self, voxels1, voxels2):
	237	""" Calculate EMD for pair of voxels representing models.
	238	Args:
	239	voxels1 np.array([np.array(,dtype=float)]: list of voxels representing model1.
	240	voxels2 np.array([np.array(,dtype=float)]: list of voxels representing model2.
	241	steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
	242
	243	Returns:
	244	float: dissim for pair of list of voxels
	245	"""
	246	numvox1 = len(voxels1)
	247	numvox2 = len(voxels2)
	248
	249	s1, s2 = self.getSignaturesForPair(voxels1, voxels2)
	250
	251	if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
	252	print("Bad signature!")
	253	print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
	254	print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
	255	raise ValueError("BAd signature!")
	256
	257	reduce_fun = self.reduceSignaturesFreq if self.frequency else self.reduceSignaturesDens
	258	if self.reduce:
	259	s1, s2 = reduce_fun(s1,s2)
	260
	261	if not self.frequency:
	262	if numvox1 != sum(s1[1]) or numvox2 != sum(s2[1]):
	263	print("Voxel reduction didnt work properly")
	264	print("Base voxels fig1: ", numvox1, " fig2: ", numvox2)
	265	print("After reduction voxels fig1: ", sum(s1[1]), " fig2: ", sum(s2[1]))
	266
	267	if self.metric == 'l1':
	268	if self.frequency:
	269	out = np.linalg.norm((s1-s2), ord=1)
	270	else:
	271	out = np.linalg.norm((s1[1]-s2[1]), ord=1)
	272
	273	elif self.metric == 'l2':
	274	if self.frequency:
	275	out = np.linalg.norm((s1-s2))
	276	else:
	277	out = np.linalg.norm((s1[1]-s2[1]))
	278
	279	elif self.metric == 'emd':
	280	if self.frequency:
	281	num_points = len(s1)
	282	dist_matrix = self.calculateDistanceMatrix(range(num_points),range(num_points))
	283	else:
	284	dist_matrix = self.calculateDistanceMatrix(s1[0],s2[0])
	285
	286	self.libm.fedisableexcept(0x04) # allowing for operation divide by 0 because pyemd requiers it.
	287
	288	if self.frequency:
	289	out = emd(s1,s2,np.array(dist_matrix,dtype=np.float64))
	290	else:
	291	out = emd(s1[1],s2[1],dist_matrix)
	292
	293	self.libm.feclearexcept(0x04) # disabling operation divide by 0 because framsticks doesnt like it.
	294	self.libm.feenableexcept(0x04)
	295
	296	else:
	297	raise ValueError("Wrong metric '%s'"%self.metric)
	298
	299	return out
	300
	301
	302	def calculateDissimforGeno(self, geno1, geno2):
	303	""" Calculate EMD for pair of genos.
	304	Args:
	305	geno1 (string): representation of model1 in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
	306	geno2 (string): representation of model2 in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
	307	steps (int, optional): How many steps is used for sampling space of voxels. Defaults to self.steps (3).
	308
	309	Returns:
	310	float: dissim for pair of strings representing models.
	311	"""
	312
	313	voxels1 = self.getVoxels(geno1)
	314	voxels2 = self.getVoxels(geno2)
	315
	316	out = self.calculateDissimforVoxels(voxels1, voxels2)
	317
	318	if self.verbose == True:
	319	print("Steps: ", self.steps)
	320	print("Geno1:\n",geno1)
	321	print("Geno2:\n",geno2)
	322	print("EMD:\n",out)
	323
	324	return out
	325
	326
	327	def getDissimilarityMatrix(self,listOfGeno):
	328	"""
	329
	330	Args:
	331	listOfGeno ([string]): list of strings representing genotypes in one of the formats handled by frams http://www.framsticks.com/a/al_genotype.html
	332
	333	Returns:
	334	np.array(np.array(,dtype=float)): dissimilarity matrix of EMD for given list of genotypes
	335	"""
	336	numOfGeno = len(listOfGeno)
	337	dissimMatrix = np.zeros(shape=[numOfGeno,numOfGeno])
	338	listOfVoxels = [self.getVoxels(g) for g in listOfGeno]
	339	for i in range(numOfGeno):
	340	for j in range(numOfGeno):
	341	dissimMatrix[i,j] = self.calculateDissimforVoxels(listOfVoxels[i], listOfVoxels[j])
	342	return dissimMatrix

Note: See TracBrowser for help on using the repository browser.

Download in other formats: