Open In App

Program to detect tokens in a C code

Last Updated : 27 Aug, 2025
Comments
Improve
Suggest changes
50 Likes
Like
Report

As it is known that Lexical Analysis is the first phase of compiler also known as scanner. It converts the input program into a sequence of Tokens. 
A C program consists of various tokens and a token is either a keyword, an identifier, a constant, a string literal, or a symbol.
For Example: 

  • Keywords: for, while, if etc.
  • Identifier: Variable name, function name etc.
  • Operators: '+', '++', '-' etc.
  • Separators: ', ' ';' etc

Example: For input "int a = b + 1c;", it will identify "int" as a keyword, "a" as an identifier, "=" as an operator, etc.

Approach :

  • The ides is to breaks the input string (a C code) into tokens such as keywords, identifiers, operators, integers, and real numbers.
  • It checks each token using helper functions (isKeyword, isInteger, validIdentifier, etc.) and prints its category.

Below is a program to print all the keywords, literals, valid identifiers, invalid identifiers, integer number, real number in a given C code:

C++
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

// Returns 'true' if the character is a DELIMITER.
bool isDelimiter(char ch)
{
    if (ch == ' ' || ch == '+' || ch == '-' || ch == '*' || 
         ch == '/' || ch == ',' || ch == ';' || ch == '>' || 
         ch == '<' || ch == '=' || ch == '(' || ch == ')' || 
         ch == '[' || ch == ']' || ch == '{' || ch == '}')
        return (true);
    return (false);
}

// Returns 'true' if the character is an OPERATOR.
bool isOperator(char ch)
{
    if (ch == '+' || ch == '-' || ch == '*' || 
        ch == '/' || ch == '>' || ch == '<' || 
        ch == '=')
        return (true);
    return (false);
}

// Returns 'true' if the string is a VALID IDENTIFIER.
bool validIdentifier(char* str)
{
    if (str[0] == '0' || str[0] == '1' || str[0] == '2' ||
        str[0] == '3' || str[0] == '4' || str[0] == '5' || 
        str[0] == '6' || str[0] == '7' || str[0] == '8' || 
        str[0] == '9' || isDelimiter(str[0]) == true)
        return (false);
    return (true);
}

// Returns 'true' if the string is a KEYWORD.
bool isKeyword(char* str)
{
    if (!strcmp(str, "if") || !strcmp(str, "else") ||
        !strcmp(str, "while") || !strcmp(str, "do") || 
        !strcmp(str, "break") || 
         !strcmp(str, "continue") || !strcmp(str, "int")
        || !strcmp(str, "double") || !strcmp(str, "float")
        || !strcmp(str, "return") || !strcmp(str, "char")
        || !strcmp(str, "case") || !strcmp(str, "char")
        || !strcmp(str, "sizeof") || !strcmp(str, "long")
        || !strcmp(str, "short") || !strcmp(str, "typedef")
        || !strcmp(str, "switch") || !strcmp(str, "unsigned")
        || !strcmp(str, "void") || !strcmp(str, "static")
        || !strcmp(str, "struct") || !strcmp(str, "goto"))
        return (true);
    return (false);
}

// Returns 'true' if the string is an INTEGER.
bool isInteger(char* str)
{
    int i, len = strlen(str);

    if (len == 0)
        return (false);
    for (i = 0; i < len; i++) {
        if (str[i] != '0' && str[i] != '1' && str[i] != '2'
            && str[i] != '3' && str[i] != '4' && str[i] != '5'
            && str[i] != '6' && str[i] != '7' && str[i] != '8'
            && str[i] != '9' || (str[i] == '-' && i > 0))
            return (false);
    }
    return (true);
}

// Returns 'true' if the string is a REAL NUMBER.
bool isRealNumber(char* str)
{
    int i, len = strlen(str);
    bool hasDecimal = false;

    if (len == 0)
        return (false);
    for (i = 0; i < len; i++) {
        if (str[i] != '0' && str[i] != '1' && str[i] != '2'
            && str[i] != '3' && str[i] != '4' && str[i] != '5'
            && str[i] != '6' && str[i] != '7' && str[i] != '8'
            && str[i] != '9' && str[i] != '.' || 
            (str[i] == '-' && i > 0))
            return (false);
        if (str[i] == '.')
            hasDecimal = true;
    }
    return (hasDecimal);
}

// Extracts the SUBSTRING.
char* subString(char* str, int left, int right)
{
    int i;
    char* subStr = (char*)malloc(
                  sizeof(char) * (right - left + 2));

    for (i = left; i <= right; i++)
        subStr[i - left] = str[i];
    subStr[right - left + 1] = '\0';
    return (subStr);
}

// Parsing the input STRING.
void parse(char* str)
{
    int left = 0, right = 0;
    int len = strlen(str);

    while (right <= len && left <= right) {
        if (isDelimiter(str[right]) == false)
            right++;

        if (isDelimiter(str[right]) == true && left == right) {
            if (isOperator(str[right]) == true)
                printf("'%c' IS AN OPERATOR\n", str[right]);

            right++;
            left = right;
        } else if (isDelimiter(str[right]) == true && left != right
                   || (right == len && left != right)) {
            char* subStr = subString(str, left, right - 1);

            if (isKeyword(subStr) == true)
                printf("'%s' IS A KEYWORD\n", subStr);

            else if (isInteger(subStr) == true)
                printf("'%s' IS AN INTEGER\n", subStr);

            else if (isRealNumber(subStr) == true)
                printf("'%s' IS A REAL NUMBER\n", subStr);

            else if (validIdentifier(subStr) == true
                     && isDelimiter(str[right - 1]) == false)
                printf("'%s' IS A VALID IDENTIFIER\n", subStr);

            else if (validIdentifier(subStr) == false
                     && isDelimiter(str[right - 1]) == false)
                printf("'%s' IS NOT A VALID IDENTIFIER\n", subStr);
            left = right;
        }
    }
    return;
}

// DRIVER FUNCTION
int main()
{
     // maximum length of string is 100 here 
    char str[100] = "int a = b + 1c; ";

    parse(str); // calling the parse function

    return (0);
}
C
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

// Returns 'true' if the character is a DELIMITER.
bool isDelimiter(char ch)
{
    if (ch == ' ' || ch == '+' || ch == '-' || ch == '*' || 
        ch == '/' || ch == ',' || ch == ';' || ch == '>' || 
        ch == '<' || ch == '=' || ch == '(' || ch == ')' || 
        ch == '[' || ch == ']' || ch == '{' || ch == '}')
        return (true);
    return (false);
}

// Returns 'true' if the character is an OPERATOR.
bool isOperator(char ch)
{
    if (ch == '+' || ch == '-' || ch == '*' || 
        ch == '/' || ch == '>' || ch == '<' || 
        ch == '=')
        return (true);
    return (false);
}

// Returns 'true' if the string is a VALID IDENTIFIER.
bool validIdentifier(char* str)
{
    if (str[0] == '0' || str[0] == '1' || str[0] == '2' ||
        str[0] == '3' || str[0] == '4' || str[0] == '5' || 
        str[0] == '6' || str[0] == '7' || str[0] == '8' || 
        str[0] == '9' || isDelimiter(str[0]) == true)
        return (false);
    return (true);
}

// Returns 'true' if the string is a KEYWORD.
bool isKeyword(char* str)
{
    if (!strcmp(str, "if") || !strcmp(str, "else") ||
        !strcmp(str, "while") || !strcmp(str, "do") || 
        !strcmp(str, "break") || 
         !strcmp(str, "continue") || !strcmp(str, "int")
        || !strcmp(str, "double") || !strcmp(str, "float")
        || !strcmp(str, "return") || !strcmp(str, "char")
        || !strcmp(str, "case") || !strcmp(str, "char")
        || !strcmp(str, "sizeof") || !strcmp(str, "long")
        || !strcmp(str, "short") || !strcmp(str, "typedef")
        || !strcmp(str, "switch") || !strcmp(str, "unsigned")
        || !strcmp(str, "void") || !strcmp(str, "static")
        || !strcmp(str, "struct") || !strcmp(str, "goto"))
        return (true);
    return (false);
}

// Returns 'true' if the string is an INTEGER.
bool isInteger(char* str)
{
    int i, len = strlen(str);

    if (len == 0)
        return (false);
    for (i = 0; i < len; i++) {
        if (str[i] != '0' && str[i] != '1' && str[i] != '2'
            && str[i] != '3' && str[i] != '4' && str[i] != '5'
            && str[i] != '6' && str[i] != '7' && str[i] != '8'
            && str[i] != '9' || (str[i] == '-' && i > 0))
            return (false);
    }
    return (true);
}

// Returns 'true' if the string is a REAL NUMBER.
bool isRealNumber(char* str)
{
    int i, len = strlen(str);
    bool hasDecimal = false;

    if (len == 0)
        return (false);
    for (i = 0; i < len; i++) {
        if (str[i] != '0' && str[i] != '1' && str[i] != '2'
            && str[i] != '3' && str[i] != '4' && str[i] != '5'
            && str[i] != '6' && str[i] != '7' && str[i] != '8'
            && str[i] != '9' && str[i] != '.' || 
            (str[i] == '-' && i > 0))
            return (false);
        if (str[i] == '.')
            hasDecimal = true;
    }
    return (hasDecimal);
}

// Extracts the SUBSTRING.
char* subString(char* str, int left, int right)
{
    int i;
    char* subStr = (char*)malloc(
                  sizeof(char) * (right - left + 2));

    for (i = left; i <= right; i++)
        subStr[i - left] = str[i];
    subStr[right - left + 1] = '\0';
    return (subStr);
}

// Parsing the input STRING.
void parse(char* str)
{
    int left = 0, right = 0;
    int len = strlen(str);

    while (right <= len && left <= right) {
        if (isDelimiter(str[right]) == false)
            right++;

        if (isDelimiter(str[right]) == true && left == right) {
            if (isOperator(str[right]) == true)
                printf("'%c' IS AN OPERATOR\n", str[right]);

            right++;
            left = right;
        } else if (isDelimiter(str[right]) == true && left != right
                   || (right == len && left != right)) {
            char* subStr = subString(str, left, right - 1);

            if (isKeyword(subStr) == true)
                printf("'%s' IS A KEYWORD\n", subStr);

            else if (isInteger(subStr) == true)
                printf("'%s' IS AN INTEGER\n", subStr);

            else if (isRealNumber(subStr) == true)
                printf("'%s' IS A REAL NUMBER\n", subStr);

            else if (validIdentifier(subStr) == true
                     && isDelimiter(str[right - 1]) == false)
                printf("'%s' IS A VALID IDENTIFIER\n", subStr);

            else if (validIdentifier(subStr) == false
                     && isDelimiter(str[right - 1]) == false)
                printf("'%s' IS NOT A VALID IDENTIFIER\n", subStr);
            left = right;
        }
    }
    return;
}

// DRIVER FUNCTION
int main()
{
     // maximum length of string is 100 here 
    char str[100] = "int a = b + 1c; ";

    parse(str); // calling the parse function

    return (0);
}
Java
import java.util.Arrays;

public class Parser {
    public static boolean isDelimiter(char ch) {
        return " \+-*/ ,;><=()\[\]{}".indexOf(ch) != -1;
    }

    public static boolean isOperator(char ch) {
        return "+\-*/><=".indexOf(ch) != -1;
    }

    public static boolean validIdentifier(String str) {
        if (str.isEmpty() || Character.isDigit(str.charAt(0)) || isDelimiter(str.charAt(0)))
            return false;
        return true;
    }

    public static boolean isKeyword(String str) {
        String[] keywords = { "if", "else", "while", "do", "break", "continue", "int", "double", "float", "return", "char", "case", "sizeof", "long", "short", "typedef", "switch", "unsigned", "void", "static", "struct", "goto" };
        return Arrays.asList(keywords).contains(str);
    }

    public static boolean isInteger(String str) {
        if (str.isEmpty())
            return false;
        for (int i = 0; i < str.length(); i++) {
            if (!Character.isDigit(str.charAt(i)) && !(str.charAt(i) == '-' && i == 0))
                return false;
        }
        return true;
    }

    public static boolean isRealNumber(String str) {
        if (str.isEmpty())
            return false;
        boolean hasDecimal = false;
        for (int i = 0; i < str.length(); i++) {
            if (!Character.isDigit(str.charAt(i)) && str.charAt(i) != '.' && !(str.charAt(i) == '-' && i == 0))
                return false;
            if (str.charAt(i) == '.')
                hasDecimal = true;
        }
        return hasDecimal;
    }

    public static String subString(String str, int left, int right) {
        return str.substring(left, right + 1);
    }

    public static void parse(String str) {
        int left = 0, right = 0;
        int len = str.length();

        while (right <= len && left <= right) {
            if (!isDelimiter(str.charAt(right)))
                right++;

            if (isDelimiter(str.charAt(right)) && left == right) {
                if (isOperator(str.charAt(right)))
                    System.out.println("'" + str.charAt(right) + "' IS AN OPERATOR");

                right++;
                left = right;
            } else if (isDelimiter(str.charAt(right)) && left != right || (right == len && left != right)) {
                String subStr = subString(str, left, right - 1);

                if (isKeyword(subStr))
                    System.out.println("'" + subStr + "' IS A KEYWORD");

                else if (isInteger(subStr))
                    System.out.println("'" + subStr + "' IS AN INTEGER");

                else if (isRealNumber(subStr))
                    System.out.println("'" + subStr + "' IS A REAL NUMBER");

                else if (validIdentifier(subStr) && !isDelimiter(str.charAt(right - 1)))
                    System.out.println("'" + subStr + "' IS A VALID IDENTIFIER");

                else if (!validIdentifier(subStr) && !isDelimiter(str.charAt(right - 1)))
                    System.out.println("'" + subStr + "' IS NOT A VALID IDENTIFIER");

                left = right;
            }
        }
    }

    public static void main(String[] args) {
        String str = "int a = b + 1c; ";
        parse(str);
    }
}
Python
def is_delimiter(ch):
    return ch in ' +-*/ ,;><=()\[\]{}'

def is_operator(ch):
    return ch in '+-*/><=' 

def valid_identifier(str):
    if not str or str[0].isdigit() or is_delimiter(str[0]):
        return False
    return True

def is_keyword(str):
    keywords = ["if", "else", "while", "do", "break", "continue", "int", "double", "float", "return", "char", "case", "sizeof", "long", "short", "typedef", "switch", "unsigned", "void", "static", "struct", "goto"]
    return str in keywords

def is_integer(str):
    if not str:
        return False
    for i in range(len(str)):
        if not str[i].isdigit() and not (str[i] == '-' and i == 0):
            return False
    return True

def is_real_number(str):
    if not str:
        return False
    has_decimal = False
    for i in range(len(str)):
        if not str[i].isdigit() and str[i] != '.' and not (str[i] == '-' and i == 0):
            return False
        if str[i] == '.':
            has_decimal = True
    return has_decimal

def sub_string(str, left, right):
    return str[left:right + 1]

def parse(str):
    left = 0
    right = 0
    len_str = len(str)

    while right <= len_str and left <= right:
        if not is_delimiter(str[right]):
            right += 1

        if is_delimiter(str[right]) and left == right:
            if is_operator(str[right]):
                print(f"'{str[right]}' IS AN OPERATOR")

            right += 1
            left = right
        elif is_delimiter(str[right]) and left != right or (right == len_str and left != right):
            sub_str = sub_string(str, left, right - 1)

            if is_keyword(sub_str):
                print(f"'{sub_str}' IS A KEYWORD")

            elif is_integer(sub_str):
                print(f"'{sub_str}' IS AN INTEGER")

            elif is_real_number(sub_str):
                print(f"'{sub_str}' IS A REAL NUMBER")

            elif valid_identifier(sub_str) and not is_delimiter(str[right - 1]):
                print(f"'{sub_str}' IS A VALID IDENTIFIER")

            elif not valid_identifier(sub_str) and not is_delimiter(str[right - 1]):
                print(f"'{sub_str}' IS NOT A VALID IDENTIFIER")

            left = right

if __name__ == '__main__':
    str = 'int a = b + 1c; '
    parse(str)

Output: 

'int' IS A KEYWORD
'a' IS A VALID IDENTIFIER
'=' IS AN OPERATOR
'b' IS A VALID IDENTIFIER
'+' IS AN OPERATOR
'1c' IS NOT A VALID IDENTIFIER


 


Article Tags :

Explore