Efficient Construction of Finite Automata



What is Finite Automata?

Finite Automata is a mathematical model of computation used for recognizing patterns within input texts. It consists of a set of states and transitions between them. This machine can either accept or reject an input string which indicates it has only two states.

Finite Automata for Pattern Matching

To use finite automata for pattern matching, we need to construct an FA machine that only accepts the strings that match the given pattern. Suppose, we have created a finite automaton that has four states namely q0, q1, q2, and q3. The initial state would be 'q0', and the final state 'q3'. The transitions are labelled with characters of the pattern.

Now, we begin matching from the start state and read the string from left to right, following the transitions according to the input symbols. If we reach the final state after reading the whole string, then the string matches the pattern. Otherwise, the string does not match the pattern.

Following is the input-output scenario to enhance our understanding of the problem −

Input:
main String: "ABAAABCDBBABCDDEBCABC" 
pattern: "ABC"
Output:
Pattern found at position: 4
Pattern found at position: 10
Pattern found at position: 18

Given an input string "ABAAABCDBBABCDDEBCABC" and the pattern "ABC," we use a finite automaton program to search for occurrences of the pattern in the string. The program identifies the pattern at index positions 4, 10, and 18.

Example

Now, let us implement the finite automata approach to solve pattern matching problem in different programming languages −

#include <stdio.h>
#include <string.h>
#define MAXCHAR 256
// to fill Finite Automata transition table based on given pattern
void fillTransitionTable(const char *pattern, int transTable[][MAXCHAR]) {
   int longPS = 0;
   // initializing the first state with 0 for all characters
   for (int i = 0; i < MAXCHAR; i++) {
      transTable[0][i] = 0; 
   }
   // For the first character of pattern, move to the first state
   transTable[0][(int)pattern[0]] = 1;  
   // For rest of the pattern's characters, create states using prefix and suffix
   for (int i = 1; i <= strlen(pattern); i++) {
      // Copy values from LPS length to the current state
      for (int j = 0; j < MAXCHAR; j++)  
         transTable[i][j] = transTable[longPS][j];
      // For the current pattern character, move to the next state
      transTable[i][(int)pattern[i]] = i + 1;
      // Updating LPS length for the next state
      if (i < strlen(pattern))
         longPS = transTable[longPS][(int)pattern[i]];  
   }
}
// to search for the pattern in main string using Finite Automata approach
void FAPatternSearch(const char *mainString, const char *pattern, int array[], int *index) {
   int patLen = strlen(pattern);
   int strLen = strlen(mainString);
   // Creating transition table for the pattern
   int transTable[patLen + 1][MAXCHAR];  
   fillTransitionTable(pattern, transTable);
   int presentState = 0;
   // iterating over the main string
   for (int i = 0; i <= strLen; i++) {
      // Move to the next state if the transition is possible
      presentState = transTable[presentState][(int)mainString[i]]; 
      // If the present state is the final state, the pattern is found
      if (presentState == patLen) {  
         (*index)++;
         array[(*index)] = i - patLen + 1;
      }
   }
}
int main() {
   const char *mainString = "ABAAABCDBBABCDDEBCABC";
   // pattern to be searched
   const char *pattern = "ABC";
   int locArray[strlen(mainString)];
   int index = -1;
   // Calling the pattern search function
   FAPatternSearch(mainString, pattern, locArray, &index);
   // to print the result
   for (int i = 0; i <= index; i++) {
      printf("Pattern found at position: %d\n", locArray[i]);
   }
   return 0;
}
#include<iostream>
#define MAXCHAR 256
using namespace std;
// to fill Finite Automata transition table based on given pattern
void fillTransitionTable(string pattern, int transTable[][MAXCHAR]) {
   int longPS = 0;
   // initializing the first state with 0 for all characters
   for (int i = 0; i < MAXCHAR; i++) {
      transTable[0][i] = 0;        
   }
   // For the first character of pattern, move to the first state
   transTable[0][pattern[0]] = 1;  
   // For rest of the characters, create states using prefix and suffix
   for (int i = 1; i<= pattern.size(); i++) {
      // Copy values from LPS length to the current state
      for (int j = 0; j < MAXCHAR ; j++)    
         transTable[i][j] = transTable[longPS][j];
      // For the current pattern character, move to the next state     
      transTable[i][pattern[i]] = i + 1;
      // Updating LPS length for the next state
      if (i < pattern.size())
         longPS = transTable[longPS][pattern[i]];
   }
}
// to search for the pattern in main string using Finite Automata approach
void FAPatternSearch(string mainString, string pattern, int array[], int *index) {
   int patLen = pattern.size();
   int strLen = mainString.size();
   // Creating transition table for the pattern
   int transTable[patLen+1][MAXCHAR];     
   fillTransitionTable(pattern, transTable);
   int presentState = 0;
   // iterating over the main string
   for(int i = 0; i<=strLen; i++) {
      // Move to the next state if the transition is possible 
      presentState = transTable[presentState][mainString[i]];    
      // If the present state is the final state, the pattern is found
      if(presentState == patLen) {    
         (*index)++;
         array[(*index)] = i - patLen + 1 ;
      }
   }
}
int main() {
   string mainString = "ABAAABCDBBABCDDEBCABC";
   // pattern to be searched
   string pattern = "ABC";
   int locArray[mainString.size()];
   int index = -1;
   // Calling the pattern search function
   FAPatternSearch(mainString, pattern, locArray, &index);
   // to print the result
   for(int i = 0; i <= index; i++) {
      cout << "Pattern found at position: " << locArray[i]<<endl;
   }
}
public class Main {
   static final int MAXCHAR = 256;
   // to fill Finite Automata transition table based on given pattern
   public static void fillTransitionTable(String pattern, int[][] transTable) {
      int longPS = 0;
      // initializing the first state with 0 for all characters
      for (int i = 0; i < MAXCHAR; i++) {
         transTable[0][i] = 0;  
      }
      // For the first character of pattern, move to the first state
      transTable[0][pattern.charAt(0)] = 1;  
      // For rest of the characters, create states using prefix and suffix
      for (int i = 1; i < pattern.length(); i++) {  
         // Copy values from LPS length to the current state
         for (int j = 0; j < MAXCHAR; j++)  
            transTable[i][j] = transTable[longPS][j];
         // For the current pattern character, move to the next state
         transTable[i][pattern.charAt(i)] = i + 1;
         // Updating LPS length for the next state
         longPS = transTable[longPS][pattern.charAt(i)];  
      }
   }
   // to search for the pattern in main string using Finite Automata approach
   public static void FAPatternSearch(String mainString, String pattern, int[] array, int[] index) {
      int patLen = pattern.length();
      int strLen = mainString.length();
      // Creating transition table for the pattern
      int[][] transTable = new int[patLen + 1][MAXCHAR];  
      fillTransitionTable(pattern, transTable);
      int presentState = 0;
      // iterating over the main string
      for (int i = 0; i < strLen; i++) {
          // Move to the next state if the transition is possible  
         presentState = transTable[presentState][mainString.charAt(i)];  
         // If the present state is the final state, the pattern is found
         if (presentState == patLen) {  
            index[0]++;
            array[index[0]] = i - patLen + 1;
         }
      }
   }
   public static void main(String[] args) {
      String mainString = "ABAAABCDBBABCDDEBCABC";
      // pattern to be searched
      String pattern = "ABC";
      int[] locArray = new int[mainString.length()];
      int[] index = { -1 };
      // Calling the pattern search method
      FAPatternSearch(mainString, pattern, locArray, index);
      // to print the result
      for (int i = 0; i <= index[0]; i++) {
         System.out.println("Pattern found at position: " + locArray[i]);
      }
   }
}
MAXCHAR = 256
# to fill Finite Automata transition table based on given pattern
def fillTransitionTable(pattern, transTable):
    longPS = 0
    # initializing the first state with 0 for all characters
    for i in range(MAXCHAR):
        transTable[0][i] = 0
    # For the first character of pattern, move to the first state    
    transTable[0][ord(
        pattern[0])] = 1 
    # For rest of the pattern's characters, create states using prefix and suffix    
    for i in range(1, len(pattern)):
        # Copy values from LPS length to the current state
        for j in range(MAXCHAR):  
            transTable[i][j] = transTable[longPS][j]
        # For the current pattern character, move to the next state    
        transTable[i][ord(pattern[i])] = i + 1
        # Updating LPS length for the next state
        longPS = transTable[longPS][ord(
            pattern[i]
        )]  
# to search for the pattern in main string using Finite Automata approach
def FAPatternSearch(mainString, pattern, array, index):
    patLen = len(pattern)
    strLen = len(mainString)
    # create a transition table for each pattern
    transTable = [[0] * MAXCHAR for _ in range(patLen + 1)
                  ]  
    fillTransitionTable(pattern, transTable)
    presentState = 0
    # iterating over the main string
    for i in range(strLen):
        # Move to the next state if the transition is possible
        presentState = transTable[presentState][ord(
            mainString[i]
        )]  
        # If the present state is the final state, the pattern is found
        if presentState == patLen:
            index[0] += 1
            array[index[0]] = i - patLen + 1

mainString = "ABAAABCDBBABCDDEBCABC"
pattern = "ABC"
locArray = [0] * len(mainString)
index = [-1]
#Calling the pattern search function
FAPatternSearch(mainString, pattern, locArray, index)
for i in range(index[0] + 1):
    print("Pattern found at position:", locArray[i])

Output

Pattern found at position: 4
Pattern found at position: 10
Pattern found at position: 18
Advertisements