 
 Data Structure Data Structure
 Networking Networking
 RDBMS RDBMS
 Operating System Operating System
 Java Java
 MS Excel MS Excel
 iOS iOS
 HTML HTML
 CSS CSS
 Android Android
 Python Python
 C Programming C Programming
 C++ C++
 C# C#
 MongoDB MongoDB
 MySQL MySQL
 Javascript Javascript
 PHP PHP
- Selected Reading
- UPSC IAS Exams Notes
- Developer's Best Practices
- Questions and Answers
- Effective Resume Writing
- HR Interview Questions
- Computer Glossary
- Who is Who
HTML Entity Parser in C++
Suppose we have a string; we have to design one HTML parser that will replace the special character of HTML syntax into normal character. The HTML entity parser is the parser that takes HTML code as input and replace all the entities of the special characters by the characters itself. These are the special characters and their entities for HTML −
- Quotation Mark − the entity is " and symbol character is ". 
- Single Quote Mark − the entity is ' and symbol character is '. 
- Ampersand − the entity is & and symbol character is &. 
- Greater Than Sign − the entity is > and symbol character is >. 
- Less Than Sign − the entity is < and symbol character is <. 
- Slash − the entity is ⁄ and symbol character is /. 
So, if the input is like "& is changed but &ambassador; is not.", then the output will be "& is changed but &ambassador; is not."
To solve this, we will follow these steps −
- Define an array v = initialize v by splitting string using space 
- ret := empty string 
- Define one map m, this will hold all HTML symbol as key and corresponding special character as value 
- 
for initialize i := 0, when i < size of v, update (increase i by 1), do − - s := v[i] 
- temp := empty string 
- n := size of v[i] 
- k := 0 
- 
while k < n, do − - 
if v[i, k] is same as '&', then − - temp := temp + v[i, k] 
- (increase k by 1) 
- 
while (k < n and v[i, k] is not equal to ';'), do − - temp := temp + v[i, k] 
- (increase k by 1) 
 
- temp := temp + v[i, k] 
- (increase k by 1) 
- 
if temp is member of m, then − - ret := ret + m[temp] 
 
- 
Otherwise - ret := ret + temp 
 
- temp := empty string 
 
- 
Otherwise - ret := ret + v[i, k] 
- (increase k by 1) 
 
 
- 
- 
if size of temp is not 0 and temp is member of m, then − - ret := ret concatenate m[temp] 
 
- 
otherwise when size of temp, then − - ret := ret concatenate temp 
 
- 
if i is not equal to size of v, then − - ret := ret concatenate blank space 
 
 
- return ret 
Example
Let us see the following implementation to get a better understanding −
#include <bits/stdc++.h>
using namespace std;
class Solution {
public:
   vector <string> split(string& s, char delimiter){
      vector <string> tokens;
      string token;
      istringstream tokenStream(s);
      while(getline(tokenStream, token, delimiter)){
         tokens.push_back(token);
      }
      return tokens;
   }
   void out(vector <string> v){
      for(string s : v) cout << s << endl;
   }
   string entityParser(string text) {
      vector<string> v = split(text, ' ');
      string ret = "";
      map<string, string> m;
      m["""] = "\"";
      m["'"] = "\'";
      m["&"] = "&";
      m[">"] = ">";
      m["<"] = "<";
      m["⁄"] = "/";
      for (int i = 0; i < v.size(); i++) {
         string s = v[i];
         string temp = "";
         int n = v[i].size();
         int k = 0;
         while (k < n) {
            if (v[i][k] == '&') {
               temp += v[i][k];
               k++;
               while (k < n && v[i][k] != ';') {
                  temp += v[i][k];
                  k++;
               }
               temp += v[i][k];
               k++;
               if (m.count(temp))
                  ret += m[temp];
               else
                  ret += temp;
               temp = "";
            }
            else {
               ret += v[i][k];
               k++;
            }
         }
         if (temp.size() && m.count(temp)) {
            ret += m[temp];
         }
         else if (temp.size())
            ret += temp;
         if (i != v.size() - 1)
            ret += " ";
      }
      return ret;
   }
};
main(){
   Solution ob;
   cout << (ob.entityParser("& is changed but &ambassador; is not."));
}
Input
"& is changed but &ambassador; is not."
Output
& is changed but &ambassador; is not.
