0

Let's say I want to parse a simple programming language with boost regular expressions.

import a
import b

class c

Now I want to have a layout like this:

#include <string>
#include <boost/filesystem.hpp>
#include <exception>

using namespace std;
using namespace boost;
using namespace boost::filesystem;

class parser
{
    string _source;
    unsigned int pos;
public:
    parser(const string& source) : _source(source), pos(0) {}

    string expect(const regex& expr)
    {
        string s = next(expr)
        if (s != "")
            return s;
        else
        {           
            --pos;
            throw exception("Expected another expression.");
        }
    }

    string next(const regex& expr)
    {
        // Removing all whitespace before first non-whitespace character
        // Check if characters 0 till x matches regex expr
        // Return matched string of "" if not matched.
    }

    bool peek(const regex& expr);

    parse()
    {
        regex identifier("\a*");
        if (peek("import"))
            string package = expect(identifier);
        else if (peek("class"))
            string classname = expect(identifier);
    }
};

Now I need your help to define the function parser::next(const regex&). It's not clear to me how to iterate with boost regular expression trough a std::string.

I hope some one can help me!

2 Answers 2

1

Assuming that Boost regexp uses a similar approach as the regular expressions from the standard C++ library do (I realize that those in the standard are based on a proposal coming from Boost but other components are not entirely identical), you'd use the information obtained in a std::match_results<...> object to determine information associated with the match.

Sign up to request clarification or add additional context in comments.

Comments

0

For people who are interested. I solved it this way in the current implementation:

Note that crucial parts may be missing, but this answers the question

int submatch(const std::string& input, const regex& e)
{
   boost::match_results<std::string::const_iterator> what;
   if(0 == boost::regex_match(input, what, e, boost::match_default | boost::match_partial))
   {
        // the input so far could not possibly be valid so reject it:
        return 0;
   }
   // OK so far so good, but have we finished?
   if(what[0].matched)
   {
      // excellent, we have a result:
      return 2;
   }
   // what we have so far is only a partial match...
   return 1;
}

void skip_ws()
{
    // Skip all whitespaces
    regex ws("\\s");
    while ((pos < (source.length() - 1)) && boost::regex_match(source.substr(pos++, 1), ws))
    {
    }
    pos -= 1;
}

string lex(const token& t)
{
    skip_ws();
    string sub;

    unsigned int subpos = pos;
    bool matched = false;
    while (subpos < (source.length() - 1))
    {
        sub.push_back(source[subpos++]);
        int result = submatch(sub, t.expr);
        if (result == 1) // Partial
        {
            continue;
        }
        else if (result == 2)
        {

            matched = true;
            continue;
        }
        else if (result == 0) // No match
        {
            if (matched)
            {
                sub.erase(sub.end()-1);
                subpos -= 1;
                break;
            }
            else
            {
                return "";
            }
        }
    }

    return sub;
}

string expect(const token& t)
{
    cout << " string expect(\"" << t.expr << "\")";
    string s = lex(t);
    pos += s.length();
    if (s != "")
    {
        cout << endl;
        return s;
    }
    else
    {            
        --pos;
        cout << "-> False" << endl;
        throw string("Expected another expression.");
    }
}

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.