Suffix Array



From a given string, we can get all possible suffixes. After sorting the suffixes in lexicographical order, we can get the suffix array. Suffix arrays can also be formed using suffix trees. By using the DFS traversal of suffix trees, we can get suffix arrays. Suffix arrays are helpful to find suffixes in linear time. We can also find substrings using suffix array by using binary search type procedure.

The time complexity is O(m log n)

Input and Output

Input:
Main String: “BANANA”, Pattern: “NAN”
Output:
Pattern found at position: 2

Algorithm

fillSuffixArray (text, suffArray)

Input: The main string

Output: The array of suffixes

Begin
   n := text Length
   define suffix array as allSuffix of size n

   for i := 0 to n-1, do
      allSuffix[i].index := i
      allSuffix[i].suff := substring of text from (i to end)
   done

   sort the allSuffix array
   store indexes of all suffix array in suffArray.
End

suffixArraySearch (text, pattern, suffArray)

Input: The main string, the pattern and the suffix array

Output − the location where patterns are found

Begin
   patLen := size of pattern
   strLen := size of text
   left := 0
   right := strLen -1

   while left <= right, do
      mid := left + (right - left)/2
      tempStr := substring of text from suffArray[mid] to end
      result := compare tempStr and pattern upto pattern length.

      if result = 0, then
         print the location
      if res < 0, then
         right := mid – 1
      else
         left := mid +1
   done
End

Example

#include<iostream>
#include<algorithm>
#include<cstring>
using namespace std;

struct suffix {
   int index;
   string suff;
};

int strCompare(string st1, string st2, int n) {
   int i = 0;
   while(n--) {
      if(st1[i] != st2[i])
         return st1[i] - st2[i];
      i++;
   }
   return 0;
}

bool comp(suffix suff1, suffix suff2) {     //compare two strings for sorting
   if(suff1.suff<suff2.suff)
      return true;
   return false;
}

void fillSuffixArray(string mainString, int suffArr[]) {
   int n = mainString.size();
   suffix allSuffix[n];    //array to hold all suffixes

   for(int i = 0; i<n; i++) {
      allSuffix[i].index = i;
      allSuffix[i].suff = mainString.substr(i);    //from ith position to end
   }

   sort(allSuffix, allSuffix+n, comp);
   for(int i = 0; i<n; i++)
      suffArr[i] = allSuffix[i].index;    //indexes of all sorted suffix
}

void suffixArraySearch(string mainString, string pattern, int suffArr[], int array[], int *index) {
   int patLen = pattern.size();
   int strLen = mainString.size();
   int left = 0, right = strLen - 1;    //left and right for binary search

   while(left <= right) {
      int mid = left + (right - left)/2;
      string tempStr = mainString.substr(suffArr[mid]);
      int result = strCompare(pattern,tempStr, patLen);
   
      if(result == 0) {    //the pattern is found
        (*index)++;
        array[(*index)] = suffArr[mid];
      }

      if(result < 0)
         right = mid -1;
      else
         left = mid +1;
   }
}

int main() {
   string mainString = "BANANA";
   string pattern = "NAN";
   int locArray[mainString.size()];
   int index = -1;

   int suffArr[mainString.size()];
   fillSuffixArray(mainString,  suffArr);
   
   suffixArraySearch(mainString, pattern, suffArr, locArray, &index);
   for(int i = 0; i <= index; i++) {
      cout << "Pattern found at position: " << locArray[i]<<endl;
   }
}

Output

Pattern found at position: 2
Sharon Christine
Sharon Christine

An investment in knowledge pays the best interest


Advertisements