mirror of
https://github.com/tursodatabase/libsql.git
synced 2025-03-08 23:41:50 +00:00
A common complain with libSQL is how to run extensions. The main mechanism, with a .so, has a lot of issues around how those .so are distributed. The most common extensions are the ones in the sqlean package. We can improve this experience by bundling them in our sqlite build. Not all SQLean extensions are kosher: some of them, like fileio, use the vfs. Others, are deemed too complex. The extensions included here are a subset that we deem important enough, and low risk enough, to just be a part of the main bundle.
105 lines
2.8 KiB
C
105 lines
2.8 KiB
C
// Copyright (c) 2014 Ross Bayer, MIT License
|
|
// https://github.com/Rostepher/libstrcmp
|
|
|
|
#include <assert.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "fuzzy/common.h"
|
|
|
|
/// Calculates and returns the Damerau-Levenshtein distance of two non NULL
|
|
/// strings. More information about the algorithm can be found here:
|
|
/// https://en.wikipedia.org/wiki/Damerau-Levenshtein_distance
|
|
///
|
|
/// @param str1 first non NULL string
|
|
/// @param str2 second non NULL string
|
|
///
|
|
/// @returns Damerau-Levenshtein distance of str1 and str2
|
|
unsigned damerau_levenshtein(const char* str1, const char* str2) {
|
|
// strings cannot be NULL
|
|
assert(str1 != NULL);
|
|
assert(str2 != NULL);
|
|
|
|
// size of the alphabet
|
|
const unsigned alpha_size = 255;
|
|
|
|
size_t str1_len = strlen(str1);
|
|
size_t str2_len = strlen(str2);
|
|
|
|
// handle cases where one or both strings are empty
|
|
if (str1_len == 0) {
|
|
return str2_len;
|
|
}
|
|
if (str2_len == 0) {
|
|
return str1_len;
|
|
}
|
|
|
|
// remove common substring
|
|
while (str1_len > 0 && str2_len > 0 && EQ(str1[0], str2[0])) {
|
|
str1++, str2++;
|
|
str1_len--, str2_len--;
|
|
}
|
|
|
|
const unsigned INFINITY = str1_len + str2_len;
|
|
unsigned row, col;
|
|
|
|
// create "dictionary"
|
|
unsigned* dict = calloc(alpha_size, sizeof(unsigned));
|
|
|
|
size_t m_rows = str1_len + 2; // matrix rows
|
|
size_t m_cols = str2_len + 2; // matrix cols
|
|
|
|
// matrix to hold computed values
|
|
unsigned** matrix = malloc(m_rows * sizeof(unsigned*));
|
|
for (unsigned i = 0; i < m_rows; i++) {
|
|
matrix[i] = calloc(m_cols, sizeof(unsigned));
|
|
}
|
|
|
|
// set all the starting values and add all characters to the dict
|
|
matrix[0][0] = INFINITY;
|
|
for (row = 1; row < m_rows; row++) {
|
|
matrix[row][0] = INFINITY;
|
|
matrix[row][1] = row - 1;
|
|
}
|
|
for (col = 1; col < m_cols; col++) {
|
|
matrix[0][col] = INFINITY;
|
|
matrix[1][col] = col - 1;
|
|
}
|
|
|
|
unsigned db;
|
|
unsigned i, k;
|
|
unsigned cost;
|
|
|
|
// fill in the matrix
|
|
for (row = 1; row <= str1_len; row++) {
|
|
db = 0;
|
|
|
|
for (col = 1; col <= str2_len; col++) {
|
|
i = dict[(unsigned)str2[col - 1]];
|
|
k = db;
|
|
cost = EQ(str1[row - 1], str2[col - 1]) ? 0 : 1;
|
|
|
|
if (cost == 0) {
|
|
db = col;
|
|
}
|
|
|
|
matrix[row + 1][col + 1] =
|
|
MIN4(matrix[row][col] + cost, matrix[row + 1][col] + 1, matrix[row][col + 1] + 1,
|
|
matrix[i][k] + (row - i - 1) + (col - k - 1) + 1);
|
|
}
|
|
|
|
dict[(unsigned)str1[row - 1]] = row;
|
|
}
|
|
|
|
unsigned result = matrix[m_rows - 1][m_cols - 1];
|
|
|
|
// free allocated memory
|
|
free(dict);
|
|
for (unsigned i = 0; i < m_rows; i++) {
|
|
free(matrix[i]);
|
|
}
|
|
free(matrix);
|
|
|
|
return result;
|
|
}
|