kasai’s Algorithm

Data StructurePattern Searching AlgorithmsAlgorithms

Kasai’s Algorithm is used to get the Longest Common Prefix (LCP) array from suffix array. At first suffix arrays are found. After that Kasai's algorithm takes the suffix array list to find LCP.

For the LCP array, it takes O(m log n), where m is pattern length and n is the length of the text. The Kasai’s Algorithm, it takes O(n) for searching the pattern in the main string.

Input and Output

Input:
Main String: “banana”
Output:
Suffix Array :
5 3 1 0 4 2
Common Prefix Array :
1 3 0 0 2 0

Algorithm

buildSuffixArray(text)

Input: The main string

Output: The suffix array, built from the main text

Begin
   n := size of text

   for i := 0 to n, do
      suffArray[i].index := i
      suffArray[i].rank[0] := text[i]
      if (i+1) < n, then
         suffArray[i].rank[1] := text[i+1]
      else
         suffArray[i].rank[1] := -1
   do

   sort the suffix array
   define index array to store indexes

   for k := 4 to (2*n)-1, increase k by k*2, do
      currRank := 0
      prevRank := suffArray[0].rank[0]
      suffArray[0].rank[0] := currRank
      index[suffArray[0].index] = 0

      for all character index i of text, do
         if suffArray[i].rank[0] = prevRank AND suffArray[i].rank[1] =
            suffArray[i-1].rank[1], then
            prevRank := suffArray[i].rank[0]
            suffArray[i].rank[0] := currRank
         else
            prevRank := suffArray[i].rank[0]
            suffArray[i].rank[0] := currRank + 1
            currRank := currRank + 1
            index[suffArray[i].index] := i
      done

      for all character index i of text, do
         nextIndex := suffArray[i].index + k/2
         if nextIndex< n, then
            suffArray[i].rank[1] := suffArray[index[nextIndex]].rank[0]
         else
            suffArray[i].rank[1] := -1
      done
      sort the suffArray
   done

   for all character index i of text, do
      insert suffArray[i].index into suffVector
   done
End

kasaiAlgorithm(text, suffVector)

Input − The main text, and the suffix vector as a list of suffixes

Output: The location of where longest common prefix is found

Begin
   n := size of suffVector
   define longPrefix list of size n and fill all entries with 0
   define suffInverse list of size n and fill all entries with 0

   for all index values ‘i’ of suffVector, do
      suffInverse[suffVector[i]] = i
   done

   k := 0
      for i := 0 to n-1, do
         if suffInverse[i] = n-1 then
            k :=0
            ignore the bottom part and go for next iteration.
         j := suffVector[suffInverse[i]+1]

         while (i+k)<n AND (j+k) < n and text[i+k] = text[j+k], do
            increase k by 1
         done
         longPrefix[suffInverse[i]] := k
         if k > 0 then
            decrease k by 1
   done
   return longPrefix
End

Example

#include<iostream>
#include<vector>
#include<algorithm>
using namespace std;

struct suffix {
   int index;
   int rank[2];    // To store rank pair
};

bool compare(suffix s1, suffix s2) {    //compare suffixes for sort function
   if(s1.rank[0] == s2.rank[0]) {
      if(s1.rank[1] < s2.rank[1])
         return true;
      else
         return false;
   }else {
      if(s1.rank[0] < s2.rank[0])
         return true;
      else
         return false;
   }
}

vector<int> buildSuffixArray(string mainString) {
   int n = mainString.size();
   suffix suffixArray[n];

   for (int i = 0; i < n; i++) {
      suffixArray[i].index = i;
      suffixArray[i].rank[0] = mainString[i] - 'a';     //store old rank
      suffixArray[i].rank[1] = ((i+1)<n)?(mainString[i+1]-'a'):-1; //rank after alphabetical ordering
   }

   sort(suffixArray, suffixArray+n, compare);    //sort suffix array upto first 2 characters
   int index[n];  //index in suffixArray

   for (int k = 4; k < 2*n; k = k*2) {    //increase k as power of 2
      int currRank = 0;
      int prevRank = suffixArray[0].rank[0];
      suffixArray[0].rank[0] = currRank;
      index[suffixArray[0].index] = 0;

      for (int i = 1; i < n; i++) {    // Assigning rank to all suffix
         if (suffixArray[i].rank[0] == prevRank && suffixArray[i].rank[1] == suffixArray[i-1].rank[1]) {
            prevRank = suffixArray[i].rank[0];
            suffixArray[i].rank[0] = currRank;
         } else{    //increment rank and assign
            prevRank = suffixArray[i].rank[0];
            suffixArray[i].rank[0] = ++currRank;
         }
         index[suffixArray[i].index] = i;
      }

      for (int i = 0; i < n; i++) {    // Assign next rank to every suffix
         int nextIndex = suffixArray[i].index + k/2;
         suffixArray[i].rank[1] = (nextIndex < n)? suffixArray[index[nextIndex]].rank[0]: -1;
      }
      sort(suffixArray, suffixArray+n, compare); //sort upto first k characters
   }

   vector<int>suffixVector;
   for (int i = 0; i < n; i++)
      suffixVector.push_back(suffixArray[i].index);    //index of all suffix to suffix vector
      return  suffixVector;
}

vector<int> kasaiAlgorithm(string mainString, vector<int> suffixVector) {
   int n = suffixVector.size();
   vector<int> longPrefix(n, 0);    //size n and initialize with 0
   vector<int> suffixInverse(n, 0);

   for (int i=0; i < n; i++)
      suffixInverse[suffixVector[i]] = i;    //fill values of inverse Suffix list
   int k = 0;
   for (int i=0; i<n; i++) {     //for all suffix in main string
      if (suffixInverse[i] == n-1) {    //when suffix at position (n-1)
         k = 0;
         continue;
      }

      int j = suffixVector[suffixInverse[i]+1];    //nest string of suffix list
      while (i+k<n && j+k<n && mainString[i+k]==mainString[j+k]) //start from kth index
         k++;
      longPrefix[suffixInverse[i]] = k;    // prefix for the current suffix.
      if (k>0)
         k--;    //remofe first character of string
   }
   return longPrefix;
}

void showArray(vector<int> vec) {
   vector<int>::iterator it;

   for (it = vec.begin(); it < vec.end() ; it++)
      cout << *it << " ";
   cout << endl;
}

int main() {
   string mainString = "banana";
   vector<int>suffixArray = buildSuffixArray(mainString);
   int n = suffixArray.size();

   cout<< "Suffix Array : "<<endl;
   showArray(suffixArray);

   vector<int>commonPrefix = kasaiAlgorithm(mainString, suffixArray);

   cout<< "\nCommon Prefix Array : "<<endl;
   showArray(commonPrefix);
}

Output

Suffix Array :
5 3 1 0 4 2
Common Prefix Array :
1 3 0 0 2 0
raja
Published on 09-Jul-2018 13:14:46
Advertisements