Remove all occurrences of a word from a given string using Z-algorithm


This article delves into an interesting string manipulation problem: "Remove all occurrences of a word from a given string using Z-algorithm". This problem serves as an excellent use case for the Z-algorithm, highlighting its efficacy in pattern searching problems. Let's explore in detail.

Problem Statement

Given a string S and a word W, the task is to remove all occurrences of W from S using the Z-algorithm.

Understanding the Problem

Consider a string S = "HelloWorldHelloWorld" and a word W = "World". The goal is to remove all occurrences of W from S. Hence, the output would be "HelloHello".

Z-algorithm

The Z-algorithm finds all occurrences of a pattern in a text in linear time. It constructs an array (Z-array), where for a given index i, Z[i] represents the length of the longest substring starting from i which is also a prefix of the string.

Algorithmic Approach

Here are the steps to solve the problem −

  • Create a new string P = W + '$' + S.

  • Apply the Z-algorithm to P and construct the Z-array.

  • Iterate over the Z-array. If Z[i] is equal to the length of W, it means W is present at that index. Remove W from S at that index.

Example

Here're the programs that implements the above approach −

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

void constructZArray(char* str, int* Z, int n) {
   int L = 0, R = 0;
   for (int i = 1; i < n; i++) {
      if (i > R) {
         L = R = i;
         while (R < n && str[R - L] == str[R])
         R++;
         Z[i] = R - L;
         R--;
      } else {
         int k = i - L;
         if (Z[k] < R - i + 1)
         Z[i] = Z[k];
         else {
            L = i;
            while (R < n && str[R - L] == str[R])
            R++;
            Z[i] = R - L;
            R--;
         }
      }
   }
}

char* removeWord(char* S, char* W) {
   int len_S = strlen(S);
   int len_W = strlen(W);
   char* P = (char*)malloc((len_S + len_W + 2) * sizeof(char));
   strcpy(P, W);
   strcat(P, "$");
   strcat(P, S);

   int* Z = (int*)malloc((len_S + len_W + 2) * sizeof(int));
   constructZArray(P, Z, len_S + len_W + 1);

   char* toRemove = (char*)malloc(len_S * sizeof(char));
   memset(toRemove, 0, len_S * sizeof(char));

   for (int i = len_W + 1; i < len_S + len_W + 1; i++) {
      if (Z[i] == len_W)
         memset(toRemove + i - len_W - 1, 1, len_W * sizeof(char));
   }

   char* result = (char*)malloc((len_S + 1) * sizeof(char));
   int j = 0;
   for (int i = 0; i < len_S; i++) {
      if (!toRemove[i])
         result[j++] = S[i];
   }
   result[j] = '\0';

   free(P);
   free(Z);
   free(toRemove);
   return result;
}
int main() {
   char S[] = "Iamwritingwriting";
   char W[] = "writing";
   char* result = removeWord(S, W);
   printf("String after removal: %s\n", result);
   free(result);
   return 0;
}

Output

String after removal: Iam
#include<bits/stdc++.h>
using namespace std;
vector<int> constructZArray(string str) {
   int n = str.length();
   vector<int> Z(n, 0);
   int L = 0, R = 0;
   for (int i = 1; i < n; i++) {
      if (i > R) {
         L = R = i;
         while (R < n && str[R - L] == str[R])
            R++;
         Z[i] = R - L;
         R--;
      } else {
         int k = i - L;
         if (Z[k] < R - i + 1)
            Z[i] = Z[k];
         else {
            L = i;
            while (R < n && str[R - L] == str[R])
               R++;
            Z[i] = R - L;
            R--;
         }
      }
   }
   return Z;
}

string removeWord(string S, string W) {
   string P = W + '$' + S;
   int len_W = W.length();
   vector<int> Z = constructZArray(P);
   vector<bool> toRemove(S.size(), false);
   for (int i = len_W + 1; i < Z.size(); i++) {
      if (Z[i] == len_W)
         fill(toRemove.begin() + i - len_W - 1, toRemove.begin() + i - 1, true);
   }
   
   string result = "";
   for (int i = 0; i < S.size(); i++) {
      if (!toRemove[i])
         result += S[i];
   }
   return result;
}

int main() {
   string S, W;
   S="Iamwritingwriting";
   W = "writing";
   cout << "String after removal: " << removeWord(S, W);
   return 0;
}

Output

String after removal: Iam
import java.util.Arrays;

public class Main {
   public static int[] constructZArray(String str) {
      int n = str.length();
      int[] Z = new int[n];
      int L = 0, R = 0;
      for (int i = 1; i < n; i++) {
         if (i > R) {
            L = R = i;
            while (R < n && str.charAt(R - L) == str.charAt(R))
            R++;
            Z[i] = R - L;
            R--;
         } else {
            int k = i - L;
            if (Z[k] < R - i + 1)
            Z[i] = Z[k];
            else {
               L = i;
               while (R < n && str.charAt(R - L) == str.charAt(R))
               R++;
               Z[i] = R - L;
               R--;
            }
         }
      }
      return Z;
   }

   public static String removeWord(String S, String W) {
      String P = W + '$' + S;
      int len_W = W.length();
      int[] Z = constructZArray(P);
      boolean[] toRemove = new boolean[S.length()];
      for (int i = len_W + 1; i < Z.length; i++) {
         if (Z[i] == len_W)
            Arrays.fill(toRemove, i - len_W - 1, i - 1, true);
      }

      StringBuilder result = new StringBuilder();
      for (int i = 0; i < S.length(); i++) {
         if (!toRemove[i])
            result.append(S.charAt(i));
      }
      return result.toString();
   }

   public static void main(String[] args) {
      String S = "Iamwritingwriting";
      String W = "writing";
      System.out.println("String after removal: " + removeWord(S, W));
   }
}

Output

String after removal: Iam
def construct_z_array(string):
   n = len(string)
   Z = [0] * n
   L = R = 0
   for i in range(1, n):
      if i > R:
         L = R = i
         while R < n and string[R - L] == string[R]:
            R += 1
         Z[i] = R - L
         R -= 1
      else:
         k = i - L
         if Z[k] < R - i + 1:
            Z[i] = Z[k]
         else:
            L = i
            while R < n and string[R - L] == string[R]:
               R += 1
            Z[i] = R - L
            R -= 1
   return Z


def remove_word(S, W):
   P = W + '$' + S
   len_W = len(W)
   Z = construct_z_array(P)
   to_remove = [False] * len(S)
   for i in range(len_W + 1, len(Z)):
      if Z[i] == len_W:
         to_remove[i - len_W - 1:i - 1] = [True] * len_W
   result = ''.join([S[i] for i in range(len(S)) if not to_remove[i]])
   return result


S = "Iamwritingwriting"
W = "writing"
print("String after removal:", remove_word(S, W))

Output

String after removal: Iam

Testcase Example

Let's consider an example −

Suppose S = "Iamwritingwriting" and W = "writing". The program will output "Iam". Here's why −

  • The new string P becomes "writing$Iamwritingwriting".

  • After applying the Z-algorithm, we find that Z[8] and Z[15] are equal to the length of W, which means W is present at these indices in S.

  • We then remove W from these indices in S, resulting in the string "Iam".

Conclusion

The Z-algorithm is a powerful tool for pattern searching problems. In this article, we saw its application in removing all occurrences of a word from a string. This problem is a great example showcasing the benefits of understanding and applying string matching algorithms. Always remember, understanding and learning algorithms open up ways to solve complex problems.

Updated on: 27-Oct-2023

184 Views

Kickstart Your Career

Get certified by completing the course

Get Started
Advertisements