2015-01-09 14:11:21 +03:00
|
|
|
#!/usr/bin/env python
|
|
|
|
# encoding: utf-8
|
|
|
|
"""
|
|
|
|
StringMatcher.py
|
|
|
|
|
|
|
|
ported from python-Levenshtein
|
|
|
|
[https://github.com/miohtama/python-Levenshtein]
|
|
|
|
"""
|
|
|
|
|
2015-12-15 17:09:25 +03:00
|
|
|
from warnings import warn
|
2015-08-02 22:58:45 +03:00
|
|
|
|
2015-12-15 17:18:06 +03:00
|
|
|
from Levenshtein import *
|
|
|
|
|
|
|
|
|
2015-01-09 14:11:21 +03:00
|
|
|
class StringMatcher:
|
|
|
|
"""A SequenceMatcher-like class built on the top of Levenshtein"""
|
|
|
|
|
|
|
|
def _reset_cache(self):
|
|
|
|
self._ratio = self._distance = None
|
|
|
|
self._opcodes = self._editops = self._matching_blocks = None
|
|
|
|
|
|
|
|
def __init__(self, isjunk=None, seq1='', seq2=''):
|
|
|
|
if isjunk:
|
|
|
|
warn("isjunk not NOT implemented, it will be ignored")
|
|
|
|
self._str1, self._str2 = seq1, seq2
|
|
|
|
self._reset_cache()
|
|
|
|
|
|
|
|
def set_seqs(self, seq1, seq2):
|
|
|
|
self._str1, self._str2 = seq1, seq2
|
|
|
|
self._reset_cache()
|
|
|
|
|
|
|
|
def set_seq1(self, seq1):
|
|
|
|
self._str1 = seq1
|
|
|
|
self._reset_cache()
|
|
|
|
|
|
|
|
def set_seq2(self, seq2):
|
|
|
|
self._str2 = seq2
|
|
|
|
self._reset_cache()
|
|
|
|
|
|
|
|
def get_opcodes(self):
|
|
|
|
if not self._opcodes:
|
|
|
|
if self._editops:
|
|
|
|
self._opcodes = opcodes(self._editops, self._str1, self._str2)
|
|
|
|
else:
|
|
|
|
self._opcodes = opcodes(self._str1, self._str2)
|
|
|
|
return self._opcodes
|
|
|
|
|
|
|
|
def get_editops(self):
|
|
|
|
if not self._editops:
|
|
|
|
if self._opcodes:
|
|
|
|
self._editops = editops(self._opcodes, self._str1, self._str2)
|
|
|
|
else:
|
|
|
|
self._editops = editops(self._str1, self._str2)
|
|
|
|
return self._editops
|
|
|
|
|
|
|
|
def get_matching_blocks(self):
|
|
|
|
if not self._matching_blocks:
|
|
|
|
self._matching_blocks = matching_blocks(self.get_opcodes(),
|
|
|
|
self._str1, self._str2)
|
|
|
|
return self._matching_blocks
|
|
|
|
|
|
|
|
def ratio(self):
|
|
|
|
if not self._ratio:
|
|
|
|
self._ratio = ratio(self._str1, self._str2)
|
|
|
|
return self._ratio
|
|
|
|
|
|
|
|
def quick_ratio(self):
|
|
|
|
# This is usually quick enough :o)
|
|
|
|
if not self._ratio:
|
|
|
|
self._ratio = ratio(self._str1, self._str2)
|
|
|
|
return self._ratio
|
|
|
|
|
|
|
|
def real_quick_ratio(self):
|
|
|
|
len1, len2 = len(self._str1), len(self._str2)
|
|
|
|
return 2.0 * min(len1, len2) / (len1 + len2)
|
|
|
|
|
|
|
|
def distance(self):
|
|
|
|
if not self._distance:
|
|
|
|
self._distance = distance(self._str1, self._str2)
|
|
|
|
return self._distance
|