Skip to main content
Tweeted twitter.com/StackCodeReview/status/975647983143407616
typo
Source Link
rule :test do {
  str("a") >> releaprepeat(2, str("b"))
}
rule :test do {
  str("a") >> releap(2, str("b"))
}
rule :test do {
  str("a") >> repeat(2, str("b"))
}
deleted 33 characters in body; edited title
Source Link
Jamal
  • 35.2k
  • 13
  • 134
  • 238

Elixir PEG Parser Generator ... How do I make it more Idiomatic?

I am trying to learn Elixir, so I decided to write a port of Ruby's Parslet library.

  I want to define a DSL that let'slets you define your grammar and given a string can parse it into an AST.

I wrote something..., but it's not very Idiomaticidiomatic. I'd love some pointers.

  Also..., I wanted to be able to define rules as follows:

... where the '2' is 'min_count' and have it match 'abbbbb' With.

With my current design I had to do:

ok ... hereHere is my code... and the tests...:

#tests#Tests

Elixir PEG Parser Generator ... How do I make it more Idiomatic?

I am trying to learn Elixir, so I decided to write a port of Ruby's Parslet library.

  I want to define a DSL that let's you define your grammar and given a string can parse it into an AST.

I wrote something... but it's not very Idiomatic. I'd love some pointers.

  Also... I wanted to be able to define rules as follows

... where the '2' is 'min_count' and have it match 'abbbbb' With my current design I had to do

ok ... here is my code... and the tests...

#tests

Elixir PEG Parser Generator

I am trying to learn Elixir, so I decided to write a port of Ruby's Parslet library. I want to define a DSL that lets you define your grammar and given a string can parse it into an AST.

I wrote something, but it's not very idiomatic. I'd love some pointers. Also, I wanted to be able to define rules as follows:

where the '2' is 'min_count' and have it match 'abbbbb'.

With my current design I had to do:

Here is my code and the tests:

#Tests

Source Link

Elixir PEG Parser Generator ... How do I make it more Idiomatic?

I am trying to learn Elixir, so I decided to write a port of Ruby's Parslet library.

I want to define a DSL that let's you define your grammar and given a string can parse it into an AST.

I wrote something... but it's not very Idiomatic. I'd love some pointers.

Also... I wanted to be able to define rules as follows

rule :test do {
  str("a") >> (str("b") >> repeat(2))
}

... where the '2' is 'min_count' and have it match 'abbbbb' With my current design I had to do

rule :test do {
  str("a") >> releap(2, str("b"))
}

ok ... here is my code... and the tests...

defmodule Parslet do
  @moduledoc """
  Documentation for Parslet.
  """

  # Callback invoked by `use`.
  #
  # For now it returns a quoted expression that
  # imports the module itself into the user code.
  @doc false
  defmacro __using__(_opts) do
    quote do
      import Parslet

      # Initialize @tests to an empty list
      @rules []
      @root :undefined

      # Invoke Parslet.__before_compile__/1 before the module is compiled
      @before_compile Parslet
    end
  end

  @doc """
  Defines a test case with the given description.

  ## Examples

      rule :testString do
        str("test")
      end

  """
  defmacro rule(description, do: block) do
    function_name = description
    quote do
      # Prepend the newly defined test to the list of rules
      @rules [unquote(function_name) | @rules]
      def unquote(function_name)(), do: unquote(block)
    end
  end

  defmacro root(rule_name) do
    quote do
      # Prepend the newly defined test to the list of rules
      @root unquote(rule_name)
    end
  end


  # This will be invoked right before the target module is compiled
  # giving us the perfect opportunity to inject the `parse/1` function
  @doc false
  defmacro __before_compile__(_env) do
    quote do
      def parse(document) do
        # IO.puts "Root is defined as #{@root}"
        # Enum.each @rules, fn name ->
        #   IO.puts "Defined rule #{name}"
        # end
        case apply(__MODULE__, @root, []).(document) do
          {:ok, any, ""} -> {:ok , any}
          {:ok, any, rest} -> {:error, "Consumed #{inspect(any)}, but had the following remaining '#{rest}'"}
          error -> error
        end
      end
    end
  end

  def call_aux(fun, aux) do
    fn doc ->
      case fun.(doc) do
        {:ok, match, rest} -> aux.(rest, match)
        other -> other
      end
    end
  end


  # TODO ... checkout ("a" <> rest ) syntax...
  # https://stackoverflow.com/questions/25896762/how-can-pattern-matching-be-done-on-text
  def str(text), do: str(&Parslet.identity/1, text)
  def match(regex_s), do: match(&Parslet.identity/1, regex_s)
  def repeat(fun, min_count), do: repeat(&Parslet.identity/1, fun, min_count)


  def str(fun, text), do: call_aux( fun,
      fn (doc, matched) -> str_aux(text, doc, matched) end )
  def match(fun, regex_s), do: call_aux( fun,
      fn (doc, matched) -> match_aux(regex_s, doc, matched) end )
  def repeat(prev, fun, min_count), do: call_aux( prev,
      fn (doc, matched) -> repeat_aux(fun, min_count, doc, matched) end )


  defp str_aux(text, doc, matched) do
      tlen = String.length(text)
      if String.starts_with?(doc, text) do
        {:ok, matched <> text,  String.slice(doc, tlen..-1) }
      else
        {:error, "'#{doc}' does not match string '#{text}'"}
      end
  end

  defp match_aux(regex_s, doc, matched) do
    regex = ~r{^#{regex_s}}
    case Regex.run(regex, doc) do
      nil -> {:error, "'#{doc}' does not match regex '#{regex_s}'"}
      [match | _] -> {:ok, matched <> match, String.slice(doc, String.length(match)..-1)}
    end
  end

  defp repeat_aux(fun, 0, doc, matched) do
    case fun.(doc) do
      {:ok, match, rest} -> repeat_aux(fun, 0, rest, matched <> match)
      _ -> {:ok, matched, doc}
    end
  end

  defp repeat_aux(fun, count, doc, matched) do
    case fun.(doc) do
      {:ok, match, rest} -> repeat_aux(fun, count - 1, rest, matched <> match)
      other -> other
    end
  end

  def identity(doc) do
    {:ok, "", doc}
  end

end

#tests

defmodule ParsletTest do
  use ExUnit.Case
  doctest Parslet

  defmodule ParsletExample do
    use Parslet

    rule :test_string do
      str("test")
    end
    root :test_string
  end

  test "str matches whole string" do
    assert ParsletExample.parse("test") == {:ok, "test"}
  end
  test "str doesnt match different strings" do
    assert ParsletExample.parse("tost") == {:error, "'tost' does not match string 'test'"}
  end
  test "parse reports error if not all the input document is consumed" do
    assert ParsletExample.parse("test_the_best") ==
      {:error, "Consumed \"test\", but had the following remaining '_the_best'"}
  end

  defmodule ParsletExample2 do
    use Parslet

    rule :test_regex do
      match("123")
    end

    # calling another rule should just work. :)
    rule :document do
      test_regex()
    end

    root :document
  end

  test "[123]" do
    assert ParsletExample2.parse("123") == {:ok, "123"}
    assert ParsletExample2.parse("w123") == {:error, "'w123' does not match regex '123'"}
    assert ParsletExample2.parse("234") == {:error, "'234' does not match regex '123'"}
    assert ParsletExample2.parse("123the_rest") == {:error, "Consumed \"123\", but had the following remaining 'the_rest'"}
  end


  defmodule ParsletExample3 do
    use Parslet

    rule :a do
      repeat(str("a"), 1)
    end

    root :a
  end

  test "a+" do
    assert ParsletExample3.parse("a") == {:ok, "a"}
    assert ParsletExample3.parse("aaaaaa") == {:ok, "aaaaaa"}
  end

  defmodule ParsletExample4 do
    use Parslet

    rule :a do
      str("a") |> str("b")
    end

    root :a
  end

  test "a > b = ab" do
    assert ParsletExample4.parse("ab") == {:ok, "ab"}
  end

  defmodule ParsletExample5 do
    use Parslet

    rule :a do
      repeat(str("a") |>  str("b") , 1)
    end

    root :a
  end

  test "(a > b)+" do
    assert ParsletExample5.parse("ababab") == {:ok, "ababab"}
  end

   defmodule ParsletExample6 do
    use Parslet

    rule :a do
      str("a") |> repeat(str("b"), 1)
    end

    root :a
  end

  test "a > b+" do
    assert ParsletExample6.parse("abbbbb") == {:ok, "abbbbb"}
  end

end