将c# Levenshtein Distance移植到Java

本文关键字:Java Distance Levenshtein | 更新日期: 2023-09-27 17:51:13

我正在从一个现有的拼写检查程序开发一个应用程序,谁能帮我修改这部分代码,我不能移植指针或stackalloc到Java,因为没有等价物存在。一个具有完全相同功能的java方法。

public static unsafe double GNULevenstein(string word1, string word2)
    {
        // this algorithm normally computes un-normalized distance between two string.
        fixed (char* word1Ptr = word1)
        fixed (char* word2Ptr = word2)
        {
            char* pointerToWord1 = word1Ptr;
            char* pointerToWord2 = word2Ptr;
            /* skip equal start sequence, if any */
            if (word1.Length >= word2.Length)
            {
                while (*pointerToWord1 == *pointerToWord2)
                {
                    /* if we already used up one string,
                     * then the result is the length of the other */
                    if (*pointerToWord1 == ''0') break;
                    pointerToWord1++; 
                    pointerToWord2++;
                }
            }
            else // wordl < word2
            {
                while (*pointerToWord1 == *pointerToWord2)
                {
                    /* if we already used up one string,
                     * then the result is the length of the other */
                    if (*pointerToWord2 == ''0') break;
                    pointerToWord1++; 
                    pointerToWord2++;
                }
            }
            /* length count #1*/
            int len1 = word1.Length - (int)(pointerToWord1 - word1Ptr);
            int len2 = word2.Length - (int)(pointerToWord2 - word2Ptr);

            /* if we already used up one string, then
             the result is the length of the other */
            if (*pointerToWord1 == ''0') 
                return ExportResult( len2 , word1.Length,word2.Length , false);
            if (*pointerToWord2 == ''0')
                return ExportResult(len1, word1.Length, word2.Length, false);
            /* length count #2*/
            pointerToWord1 += len1;
            pointerToWord2 += len2;
            /* cut of equal tail sequence, if any */
            while (*--pointerToWord1 == *--pointerToWord2)
            {
                len1--; 
                len2--;
            }
            /* reset pointers, adjust length */
            pointerToWord1 -= len1++;
            pointerToWord2 -= len2++;
            /* possible dist to great? */
            //if ((len1 - len2 >= 0 ? len1 - len2 : -(len1 - len2)) >= char.MaxValue) return 1;
            if (Math.Abs(len1 - len2) >= char.MaxValue)
                return ExportResult(1, false);  // no similarity
            char* tmp;
            /* swap if l2 longer than l1 */
            if (len1 < len2)
            {
                tmp = pointerToWord1; 
                pointerToWord1 = pointerToWord2; 
                pointerToWord2 = tmp;
                len1 ^= len2; 
                len2 ^= len1; 
                len1 ^= len2;
            }
            /* fill initial row */
            int i, j, n;
            n = (*pointerToWord1 != *pointerToWord2) ? 1 : 0;
            char* r = stackalloc char[len1 * 2];
            char* p1, p2;
            for (i = 0, p1 = r; i < len1; i++, *p1++ = (char)n++, p1++) 
            { /*empty*/}

            /* calc. rowwise */
            for (j = 1; j < len2; j++)
            {
                /* init pointers and col#0 */
                p1 = r + ((j & 1) == 0 ? 1 : 0);
                p2 = r + (j & 1);
                n = *p1 + 1;
                *p2++ = (char)n; p2++;
                pointerToWord2++;
                /* foreach column */
                for (i = 1; i < len1; i++)
                {
                    if (*p1 < n) n = *p1 + (*(pointerToWord1 + i) != *pointerToWord2 ? 1 : 0); /* replace cheaper than delete? */
                    p1++;
                    if (*++p1 < n) n = *p1 + 1; /* insert cheaper then insert ? */
                    *p2++ = (char)n++; /* update field and cost for next col's delete */
                    p2++;
                }
            }
            /* return result */
            return ExportResult( n - 1, word1.Length, word2.Length, false);
        }

    }

将c# Levenshtein Distance移植到Java

这个方法看起来像是从C/c++移植过来的,而不是用c#编写的。c#中的字符串操作通常足够快,使用unsafe并直接使用char* s是浪费时间和精力…

我刚谷歌了方法名,看起来你只是想要一个Java实现的Levenshtein距离,所以,从相同的链接:

public class LevenshteinDistance {
        private static int minimum(int a, int b, int c) {
                return Math.min(Math.min(a, b), c);
        }
        public static int computeLevenshteinDistance(CharSequence str1,
                        CharSequence str2) {
                int[][] distance = new int[str1.length() + 1][str2.length() + 1];
                for (int i = 0; i <= str1.length(); i++)
                        distance[i][0] = i;
                for (int j = 1; j <= str2.length(); j++)

                distance[0][j] = j;
            for (int i = 1; i <= str1.length(); i++)
                    for (int j = 1; j <= str2.length(); j++)
                            distance[i][j] = minimum(
                                            distance[i - 1][j] + 1,
                                            distance[i][j - 1] + 1,
                                            distance[i - 1][j - 1]
                                                            + ((str1.charAt(i - 1) == str2.charAt(j - 1)) ? 0
                                                                            : 1));
            return distance[str1.length()][str2.length()];
    }
}