Program to detect tokens in a C code
As it is known that Lexical Analysis is the first phase of compiler also known as scanner. It converts the input program into a sequence of Tokens.
A C program consists of various tokens and a token is either a keyword, an identifier, a constant, a string literal, or a symbol.
For Example:
- Keywords: for, while, if etc.
- Identifier: Variable name, function name etc.
- Operators: '+', '++', '-' etc.
- Separators: ', ' ';' etc
Example: For input "int a = b + 1c;", it will identify "int" as a keyword, "a" as an identifier, "=" as an operator, etc.
Approach :
- The ides is to breaks the input string (a C code) into tokens such as keywords, identifiers, operators, integers, and real numbers.
- It checks each token using helper functions (
isKeyword,isInteger,validIdentifier, etc.) and prints its category.
Below is a program to print all the keywords, literals, valid identifiers, invalid identifiers, integer number, real number in a given C code:
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
// Returns 'true' if the character is a DELIMITER.
bool isDelimiter(char ch)
{
if (ch == ' ' || ch == '+' || ch == '-' || ch == '*' ||
ch == '/' || ch == ',' || ch == ';' || ch == '>' ||
ch == '<' || ch == '=' || ch == '(' || ch == ')' ||
ch == '[' || ch == ']' || ch == '{' || ch == '}')
return (true);
return (false);
}
// Returns 'true' if the character is an OPERATOR.
bool isOperator(char ch)
{
if (ch == '+' || ch == '-' || ch == '*' ||
ch == '/' || ch == '>' || ch == '<' ||
ch == '=')
return (true);
return (false);
}
// Returns 'true' if the string is a VALID IDENTIFIER.
bool validIdentifier(char* str)
{
if (str[0] == '0' || str[0] == '1' || str[0] == '2' ||
str[0] == '3' || str[0] == '4' || str[0] == '5' ||
str[0] == '6' || str[0] == '7' || str[0] == '8' ||
str[0] == '9' || isDelimiter(str[0]) == true)
return (false);
return (true);
}
// Returns 'true' if the string is a KEYWORD.
bool isKeyword(char* str)
{
if (!strcmp(str, "if") || !strcmp(str, "else") ||
!strcmp(str, "while") || !strcmp(str, "do") ||
!strcmp(str, "break") ||
!strcmp(str, "continue") || !strcmp(str, "int")
|| !strcmp(str, "double") || !strcmp(str, "float")
|| !strcmp(str, "return") || !strcmp(str, "char")
|| !strcmp(str, "case") || !strcmp(str, "char")
|| !strcmp(str, "sizeof") || !strcmp(str, "long")
|| !strcmp(str, "short") || !strcmp(str, "typedef")
|| !strcmp(str, "switch") || !strcmp(str, "unsigned")
|| !strcmp(str, "void") || !strcmp(str, "static")
|| !strcmp(str, "struct") || !strcmp(str, "goto"))
return (true);
return (false);
}
// Returns 'true' if the string is an INTEGER.
bool isInteger(char* str)
{
int i, len = strlen(str);
if (len == 0)
return (false);
for (i = 0; i < len; i++) {
if (str[i] != '0' && str[i] != '1' && str[i] != '2'
&& str[i] != '3' && str[i] != '4' && str[i] != '5'
&& str[i] != '6' && str[i] != '7' && str[i] != '8'
&& str[i] != '9' || (str[i] == '-' && i > 0))
return (false);
}
return (true);
}
// Returns 'true' if the string is a REAL NUMBER.
bool isRealNumber(char* str)
{
int i, len = strlen(str);
bool hasDecimal = false;
if (len == 0)
return (false);
for (i = 0; i < len; i++) {
if (str[i] != '0' && str[i] != '1' && str[i] != '2'
&& str[i] != '3' && str[i] != '4' && str[i] != '5'
&& str[i] != '6' && str[i] != '7' && str[i] != '8'
&& str[i] != '9' && str[i] != '.' ||
(str[i] == '-' && i > 0))
return (false);
if (str[i] == '.')
hasDecimal = true;
}
return (hasDecimal);
}
// Extracts the SUBSTRING.
char* subString(char* str, int left, int right)
{
int i;
char* subStr = (char*)malloc(
sizeof(char) * (right - left + 2));
for (i = left; i <= right; i++)
subStr[i - left] = str[i];
subStr[right - left + 1] = '\0';
return (subStr);
}
// Parsing the input STRING.
void parse(char* str)
{
int left = 0, right = 0;
int len = strlen(str);
while (right <= len && left <= right) {
if (isDelimiter(str[right]) == false)
right++;
if (isDelimiter(str[right]) == true && left == right) {
if (isOperator(str[right]) == true)
printf("'%c' IS AN OPERATOR\n", str[right]);
right++;
left = right;
} else if (isDelimiter(str[right]) == true && left != right
|| (right == len && left != right)) {
char* subStr = subString(str, left, right - 1);
if (isKeyword(subStr) == true)
printf("'%s' IS A KEYWORD\n", subStr);
else if (isInteger(subStr) == true)
printf("'%s' IS AN INTEGER\n", subStr);
else if (isRealNumber(subStr) == true)
printf("'%s' IS A REAL NUMBER\n", subStr);
else if (validIdentifier(subStr) == true
&& isDelimiter(str[right - 1]) == false)
printf("'%s' IS A VALID IDENTIFIER\n", subStr);
else if (validIdentifier(subStr) == false
&& isDelimiter(str[right - 1]) == false)
printf("'%s' IS NOT A VALID IDENTIFIER\n", subStr);
left = right;
}
}
return;
}
// DRIVER FUNCTION
int main()
{
// maximum length of string is 100 here
char str[100] = "int a = b + 1c; ";
parse(str); // calling the parse function
return (0);
}
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
// Returns 'true' if the character is a DELIMITER.
bool isDelimiter(char ch)
{
if (ch == ' ' || ch == '+' || ch == '-' || ch == '*' ||
ch == '/' || ch == ',' || ch == ';' || ch == '>' ||
ch == '<' || ch == '=' || ch == '(' || ch == ')' ||
ch == '[' || ch == ']' || ch == '{' || ch == '}')
return (true);
return (false);
}
// Returns 'true' if the character is an OPERATOR.
bool isOperator(char ch)
{
if (ch == '+' || ch == '-' || ch == '*' ||
ch == '/' || ch == '>' || ch == '<' ||
ch == '=')
return (true);
return (false);
}
// Returns 'true' if the string is a VALID IDENTIFIER.
bool validIdentifier(char* str)
{
if (str[0] == '0' || str[0] == '1' || str[0] == '2' ||
str[0] == '3' || str[0] == '4' || str[0] == '5' ||
str[0] == '6' || str[0] == '7' || str[0] == '8' ||
str[0] == '9' || isDelimiter(str[0]) == true)
return (false);
return (true);
}
// Returns 'true' if the string is a KEYWORD.
bool isKeyword(char* str)
{
if (!strcmp(str, "if") || !strcmp(str, "else") ||
!strcmp(str, "while") || !strcmp(str, "do") ||
!strcmp(str, "break") ||
!strcmp(str, "continue") || !strcmp(str, "int")
|| !strcmp(str, "double") || !strcmp(str, "float")
|| !strcmp(str, "return") || !strcmp(str, "char")
|| !strcmp(str, "case") || !strcmp(str, "char")
|| !strcmp(str, "sizeof") || !strcmp(str, "long")
|| !strcmp(str, "short") || !strcmp(str, "typedef")
|| !strcmp(str, "switch") || !strcmp(str, "unsigned")
|| !strcmp(str, "void") || !strcmp(str, "static")
|| !strcmp(str, "struct") || !strcmp(str, "goto"))
return (true);
return (false);
}
// Returns 'true' if the string is an INTEGER.
bool isInteger(char* str)
{
int i, len = strlen(str);
if (len == 0)
return (false);
for (i = 0; i < len; i++) {
if (str[i] != '0' && str[i] != '1' && str[i] != '2'
&& str[i] != '3' && str[i] != '4' && str[i] != '5'
&& str[i] != '6' && str[i] != '7' && str[i] != '8'
&& str[i] != '9' || (str[i] == '-' && i > 0))
return (false);
}
return (true);
}
// Returns 'true' if the string is a REAL NUMBER.
bool isRealNumber(char* str)
{
int i, len = strlen(str);
bool hasDecimal = false;
if (len == 0)
return (false);
for (i = 0; i < len; i++) {
if (str[i] != '0' && str[i] != '1' && str[i] != '2'
&& str[i] != '3' && str[i] != '4' && str[i] != '5'
&& str[i] != '6' && str[i] != '7' && str[i] != '8'
&& str[i] != '9' && str[i] != '.' ||
(str[i] == '-' && i > 0))
return (false);
if (str[i] == '.')
hasDecimal = true;
}
return (hasDecimal);
}
// Extracts the SUBSTRING.
char* subString(char* str, int left, int right)
{
int i;
char* subStr = (char*)malloc(
sizeof(char) * (right - left + 2));
for (i = left; i <= right; i++)
subStr[i - left] = str[i];
subStr[right - left + 1] = '\0';
return (subStr);
}
// Parsing the input STRING.
void parse(char* str)
{
int left = 0, right = 0;
int len = strlen(str);
while (right <= len && left <= right) {
if (isDelimiter(str[right]) == false)
right++;
if (isDelimiter(str[right]) == true && left == right) {
if (isOperator(str[right]) == true)
printf("'%c' IS AN OPERATOR\n", str[right]);
right++;
left = right;
} else if (isDelimiter(str[right]) == true && left != right
|| (right == len && left != right)) {
char* subStr = subString(str, left, right - 1);
if (isKeyword(subStr) == true)
printf("'%s' IS A KEYWORD\n", subStr);
else if (isInteger(subStr) == true)
printf("'%s' IS AN INTEGER\n", subStr);
else if (isRealNumber(subStr) == true)
printf("'%s' IS A REAL NUMBER\n", subStr);
else if (validIdentifier(subStr) == true
&& isDelimiter(str[right - 1]) == false)
printf("'%s' IS A VALID IDENTIFIER\n", subStr);
else if (validIdentifier(subStr) == false
&& isDelimiter(str[right - 1]) == false)
printf("'%s' IS NOT A VALID IDENTIFIER\n", subStr);
left = right;
}
}
return;
}
// DRIVER FUNCTION
int main()
{
// maximum length of string is 100 here
char str[100] = "int a = b + 1c; ";
parse(str); // calling the parse function
return (0);
}
import java.util.Arrays;
public class Parser {
public static boolean isDelimiter(char ch) {
return " \+-*/ ,;><=()\[\]{}".indexOf(ch) != -1;
}
public static boolean isOperator(char ch) {
return "+\-*/><=".indexOf(ch) != -1;
}
public static boolean validIdentifier(String str) {
if (str.isEmpty() || Character.isDigit(str.charAt(0)) || isDelimiter(str.charAt(0)))
return false;
return true;
}
public static boolean isKeyword(String str) {
String[] keywords = { "if", "else", "while", "do", "break", "continue", "int", "double", "float", "return", "char", "case", "sizeof", "long", "short", "typedef", "switch", "unsigned", "void", "static", "struct", "goto" };
return Arrays.asList(keywords).contains(str);
}
public static boolean isInteger(String str) {
if (str.isEmpty())
return false;
for (int i = 0; i < str.length(); i++) {
if (!Character.isDigit(str.charAt(i)) && !(str.charAt(i) == '-' && i == 0))
return false;
}
return true;
}
public static boolean isRealNumber(String str) {
if (str.isEmpty())
return false;
boolean hasDecimal = false;
for (int i = 0; i < str.length(); i++) {
if (!Character.isDigit(str.charAt(i)) && str.charAt(i) != '.' && !(str.charAt(i) == '-' && i == 0))
return false;
if (str.charAt(i) == '.')
hasDecimal = true;
}
return hasDecimal;
}
public static String subString(String str, int left, int right) {
return str.substring(left, right + 1);
}
public static void parse(String str) {
int left = 0, right = 0;
int len = str.length();
while (right <= len && left <= right) {
if (!isDelimiter(str.charAt(right)))
right++;
if (isDelimiter(str.charAt(right)) && left == right) {
if (isOperator(str.charAt(right)))
System.out.println("'" + str.charAt(right) + "' IS AN OPERATOR");
right++;
left = right;
} else if (isDelimiter(str.charAt(right)) && left != right || (right == len && left != right)) {
String subStr = subString(str, left, right - 1);
if (isKeyword(subStr))
System.out.println("'" + subStr + "' IS A KEYWORD");
else if (isInteger(subStr))
System.out.println("'" + subStr + "' IS AN INTEGER");
else if (isRealNumber(subStr))
System.out.println("'" + subStr + "' IS A REAL NUMBER");
else if (validIdentifier(subStr) && !isDelimiter(str.charAt(right - 1)))
System.out.println("'" + subStr + "' IS A VALID IDENTIFIER");
else if (!validIdentifier(subStr) && !isDelimiter(str.charAt(right - 1)))
System.out.println("'" + subStr + "' IS NOT A VALID IDENTIFIER");
left = right;
}
}
}
public static void main(String[] args) {
String str = "int a = b + 1c; ";
parse(str);
}
}
def is_delimiter(ch):
return ch in ' +-*/ ,;><=()\[\]{}'
def is_operator(ch):
return ch in '+-*/><='
def valid_identifier(str):
if not str or str[0].isdigit() or is_delimiter(str[0]):
return False
return True
def is_keyword(str):
keywords = ["if", "else", "while", "do", "break", "continue", "int", "double", "float", "return", "char", "case", "sizeof", "long", "short", "typedef", "switch", "unsigned", "void", "static", "struct", "goto"]
return str in keywords
def is_integer(str):
if not str:
return False
for i in range(len(str)):
if not str[i].isdigit() and not (str[i] == '-' and i == 0):
return False
return True
def is_real_number(str):
if not str:
return False
has_decimal = False
for i in range(len(str)):
if not str[i].isdigit() and str[i] != '.' and not (str[i] == '-' and i == 0):
return False
if str[i] == '.':
has_decimal = True
return has_decimal
def sub_string(str, left, right):
return str[left:right + 1]
def parse(str):
left = 0
right = 0
len_str = len(str)
while right <= len_str and left <= right:
if not is_delimiter(str[right]):
right += 1
if is_delimiter(str[right]) and left == right:
if is_operator(str[right]):
print(f"'{str[right]}' IS AN OPERATOR")
right += 1
left = right
elif is_delimiter(str[right]) and left != right or (right == len_str and left != right):
sub_str = sub_string(str, left, right - 1)
if is_keyword(sub_str):
print(f"'{sub_str}' IS A KEYWORD")
elif is_integer(sub_str):
print(f"'{sub_str}' IS AN INTEGER")
elif is_real_number(sub_str):
print(f"'{sub_str}' IS A REAL NUMBER")
elif valid_identifier(sub_str) and not is_delimiter(str[right - 1]):
print(f"'{sub_str}' IS A VALID IDENTIFIER")
elif not valid_identifier(sub_str) and not is_delimiter(str[right - 1]):
print(f"'{sub_str}' IS NOT A VALID IDENTIFIER")
left = right
if __name__ == '__main__':
str = 'int a = b + 1c; '
parse(str)
Output:
'int' IS A KEYWORD
'a' IS A VALID IDENTIFIER
'=' IS AN OPERATOR
'b' IS A VALID IDENTIFIER
'+' IS AN OPERATOR
'1c' IS NOT A VALID IDENTIFIER