`
biansutao
  • 浏览: 53124 次
  • 性别: Icon_minigender_1
  • 来自: 北京
社区版块
存档分类
最新评论

字符串相似性算法【最长公共字符串算法】 【LCS】

阅读更多
#!/user/bin/env python
# -*- coding: utf-8 -*-

class arithmetic():
	
	def __init__(self):
		pass
	''' 【编辑距离算法】 【levenshtein distance】 【字符串相似度算法】 '''
	def levenshtein(self,first,second):
		if len(first) > len(second):
			first,second = second,first
		if len(first) == 0:
			return len(second)
		if len(second) == 0:
			return len(first)
		first_length = len(first) + 1
		second_length = len(second) + 1
		distance_matrix = [range(second_length) for x in range(first_length)] 
		#print distance_matrix
		for i in range(1,first_length):
			for j in range(1,second_length):
				deletion = distance_matrix[i-1][j] + 1
				insertion = distance_matrix[i][j-1] + 1
				substitution = distance_matrix[i-1][j-1]
				if first[i-1] != second[j-1]:
					substitution += 1
				distance_matrix[i][j] = min(insertion,deletion,substitution)
		#print distance_matrix
		return distance_matrix[first_length-1][second_length-1]
	def lcs(self,first,second):
		first_length = len(first)
		second_length = len(second)
		size = 0
		x = 0
		y = 0
		matrix = [range(second_length) for x in range(first_length)]
		#print matrix
		for i in range(first_length):
			for j in range(second_length):
				#print i,j
				if first[i] == second[j]:
					if i - 1 >= 0 and j - 1 >=0:
						matrix[i][j] = matrix[i-1][j-1] + 1
					else:
						matrix[i][j] = 1
					if matrix[i][j] > size:
						size = matrix[i][j]
						x = j
						y = i
				else:
					matrix[i][j] = 0
		#print matrix
		#print size,x,y 

		return second[x-size+1:x+1]
	
if __name__ == "__main__":
	arith = arithmetic()
	print arith.levenshtein('GUMBOsdafsadfdsafsafsadfasfadsfasdfasdfs','GAMBOL00000000000dfasfasfdafsafasfasdfdsa')
	print arith.lcs('GUMBOsdafsadfdsafsafsadfasfadsfasdfasdfs','GAMBOL00000000000dfasfasfdafsafasfasdfdsa')

#Longest Common String 【最长公共字符串算法】
	def lcs(self,first,second):
		first_length = len(first) #the first string's length
		second_length = len(second)#the second string's length
		size = 0 #length of the max string
		x = 0
		y = 0
				
		li = [0 for x in range(second_length)]
		for i in range(first_length):
			temp = li
			print temp
			li = [0 for x in range(second_length)]
			for j in range(second_length):
				if first[i] == second[j]:
					if i - 1 >= 0 and j - 1 >=0:
						li[j] = temp[j-1] + 1 #matrix[i][j] = matrix[i-1][j-1] + 1 
					else:
						li[j] = 1
					if li[j] > size:
						size = li[j] # max length
						x = j # X-axis
						y = i # Y-axis
				else:
					li[j] = 0

		#print size,x,y
		return second[x-size+1:x+1]
	
 

 

参考:http://henryouly.blogspot.com/2006/10/blog-post_895.html

http://space.itpub.net/16857/viewspace-79033

http://hellobmw.com/archives/dynamic-programming-longest-common-substring.html

http://en.wikipedia.org/wiki/Longest_common_substring_problem

http://www.allisons.org/ll/AlgDS/

 

 

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics