kasai’s Algorithm

Data StructurePattern Searching AlgorithmsAlgorithms

Best Seller

89 Lectures 11.5 hours

Practical Data Science using Python

22 Lectures 6 hours

Data Science and Data Analysis with Python

50 Lectures 3.5 hours

Kasai’s Algorithm is used to get the Longest Common Prefix (LCP) array from suffix array. At first suffix arrays are found. After that Kasai's algorithm takes the suffix array list to find LCP.

For the LCP array, it takes O(m log n), where m is pattern length and n is the length of the text. The Kasai’s Algorithm, it takes O(n) for searching the pattern in the main string.

Input and Output

Input:
Main String: “banana”
Output:
Suffix Array :
5 3 1 0 4 2
Common Prefix Array :
1 3 0 0 2 0

Algorithm

buildSuffixArray(text)

Input: The main string

Output: The suffix array, built from the main text

Begin
n := size of text

for i := 0 to n, do
suffArray[i].index := i
suffArray[i].rank[0] := text[i]
if (i+1) < n, then
suffArray[i].rank[1] := text[i+1]
else
suffArray[i].rank[1] := -1
do

sort the suffix array
define index array to store indexes

for k := 4 to (2*n)-1, increase k by k*2, do
currRank := 0
prevRank := suffArray[0].rank[0]
suffArray[0].rank[0] := currRank
index[suffArray[0].index] = 0

for all character index i of text, do
if suffArray[i].rank[0] = prevRank AND suffArray[i].rank[1] =
suffArray[i-1].rank[1], then
prevRank := suffArray[i].rank[0]
suffArray[i].rank[0] := currRank
else
prevRank := suffArray[i].rank[0]
suffArray[i].rank[0] := currRank + 1
currRank := currRank + 1
index[suffArray[i].index] := i
done

for all character index i of text, do
nextIndex := suffArray[i].index + k/2
if nextIndex< n, then
suffArray[i].rank[1] := suffArray[index[nextIndex]].rank[0]
else
suffArray[i].rank[1] := -1
done
sort the suffArray
done

for all character index i of text, do
insert suffArray[i].index into suffVector
done
End

kasaiAlgorithm(text, suffVector)

Input − The main text, and the suffix vector as a list of suffixes

Output: The location of where longest common prefix is found

Begin
n := size of suffVector
define longPrefix list of size n and fill all entries with 0
define suffInverse list of size n and fill all entries with 0

for all index values ‘i’ of suffVector, do
suffInverse[suffVector[i]] = i
done

k := 0
for i := 0 to n-1, do
if suffInverse[i] = n-1 then
k :=0
ignore the bottom part and go for next iteration.
j := suffVector[suffInverse[i]+1]

while (i+k)<n AND (j+k) < n and text[i+k] = text[j+k], do
increase k by 1
done
longPrefix[suffInverse[i]] := k
if k > 0 then
decrease k by 1
done
return longPrefix
End

Example

#include<iostream>
#include<vector>
#include<algorithm>
using namespace std;

struct suffix {
int index;
int rank[2];    // To store rank pair
};

bool compare(suffix s1, suffix s2) {    //compare suffixes for sort function
if(s1.rank[0] == s2.rank[0]) {
if(s1.rank[1] < s2.rank[1])
return true;
else
return false;
}else {
if(s1.rank[0] < s2.rank[0])
return true;
else
return false;
}
}

vector<int> buildSuffixArray(string mainString) {
int n = mainString.size();
suffix suffixArray[n];

for (int i = 0; i < n; i++) {
suffixArray[i].index = i;
suffixArray[i].rank[0] = mainString[i] - 'a';     //store old rank
suffixArray[i].rank[1] = ((i+1)<n)?(mainString[i+1]-'a'):-1; //rank after alphabetical ordering
}

sort(suffixArray, suffixArray+n, compare);    //sort suffix array upto first 2 characters
int index[n];  //index in suffixArray

for (int k = 4; k < 2*n; k = k*2) {    //increase k as power of 2
int currRank = 0;
int prevRank = suffixArray[0].rank[0];
suffixArray[0].rank[0] = currRank;
index[suffixArray[0].index] = 0;

for (int i = 1; i < n; i++) {    // Assigning rank to all suffix
if (suffixArray[i].rank[0] == prevRank && suffixArray[i].rank[1] == suffixArray[i-1].rank[1]) {
prevRank = suffixArray[i].rank[0];
suffixArray[i].rank[0] = currRank;
} else{    //increment rank and assign
prevRank = suffixArray[i].rank[0];
suffixArray[i].rank[0] = ++currRank;
}
index[suffixArray[i].index] = i;
}

for (int i = 0; i < n; i++) {    // Assign next rank to every suffix
int nextIndex = suffixArray[i].index + k/2;
suffixArray[i].rank[1] = (nextIndex < n)? suffixArray[index[nextIndex]].rank[0]: -1;
}
sort(suffixArray, suffixArray+n, compare); //sort upto first k characters
}

vector<int>suffixVector;
for (int i = 0; i < n; i++)
suffixVector.push_back(suffixArray[i].index);    //index of all suffix to suffix vector
return  suffixVector;
}

vector<int> kasaiAlgorithm(string mainString, vector<int> suffixVector) {
int n = suffixVector.size();
vector<int> longPrefix(n, 0);    //size n and initialize with 0
vector<int> suffixInverse(n, 0);

for (int i=0; i < n; i++)
suffixInverse[suffixVector[i]] = i;    //fill values of inverse Suffix list
int k = 0;
for (int i=0; i<n; i++) {     //for all suffix in main string
if (suffixInverse[i] == n-1) {    //when suffix at position (n-1)
k = 0;
continue;
}

int j = suffixVector[suffixInverse[i]+1];    //nest string of suffix list
while (i+k<n && j+k<n && mainString[i+k]==mainString[j+k]) //start from kth index
k++;
longPrefix[suffixInverse[i]] = k;    // prefix for the current suffix.
if (k>0)
k--;    //remofe first character of string
}
return longPrefix;
}

void showArray(vector<int> vec) {
vector<int>::iterator it;

for (it = vec.begin(); it < vec.end() ; it++)
cout << *it << " ";
cout << endl;
}

int main() {
string mainString = "banana";
vector<int>suffixArray = buildSuffixArray(mainString);
int n = suffixArray.size();

cout<< "Suffix Array : "<<endl;
showArray(suffixArray);

vector<int>commonPrefix = kasaiAlgorithm(mainString, suffixArray);

cout<< "Common Prefix Array : "<<endl;
showArray(commonPrefix);
}

Output

Suffix Array :
5 3 1 0 4 2
Common Prefix Array :
1 3 0 0 2 0
Updated on 15-Jun-2020 19:00:01