--- /dev/null
+// Copyright (C) 2007 Chris Double.\r
+// \r
+// Redistribution and use in source and binary forms, with or without\r
+// modification, are permitted provided that the following conditions are met:\r
+// \r
+// 1. Redistributions of source code must retain the above copyright notice,\r
+// this list of conditions and the following disclaimer.\r
+// \r
+// 2. Redistributions in binary form must reproduce the above copyright notice,\r
+// this list of conditions and the following disclaimer in the documentation\r
+// and/or other materials provided with the distribution.\r
+// \r
+// THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,\r
+// INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND\r
+// FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE\r
+// DEVELOPERS AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\r
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,\r
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;\r
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,\r
+// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR\r
+// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF\r
+// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\r
+//\r
+function identity(x) {\r
+ return x;\r
+}\r
+\r
+function foldl(f, initial, seq) {\r
+ for(var i=0; i< seq.length; ++i) \r
+ initial = f(initial, seq[i]);\r
+ return initial;\r
+}\r
+\r
+var memoize = true;\r
+\r
+function ParseState(input, index) {\r
+ this.input = input;\r
+ this.index = index || 0;\r
+ this.length = input.length - this.index;\r
+ this.cache = { };\r
+ return this;\r
+}\r
+\r
+ParseState.prototype.from = function(index) {\r
+ var r = new ParseState(this.input, this.index + index);\r
+ r.cache = this.cache;\r
+ r.length = this.length - index;\r
+ return r;\r
+}\r
+\r
+ParseState.prototype.substring = function(start, end) {\r
+ return this.input.substring(start + this.index, (end || this.length) + this.index);\r
+}\r
+\r
+ParseState.prototype.trimLeft = function() {\r
+ var s = this.substring(0);\r
+ var m = s.match(/^\s+/);\r
+ return m ? this.from(m[0].length) : this;\r
+}\r
+\r
+ParseState.prototype.at = function(index) {\r
+ return this.input.charAt(this.index + index);\r
+}\r
+\r
+ParseState.prototype.toString = function() {\r
+ return 'PS"' + this.substring(0) + '"'; \r
+}\r
+\r
+ParseState.prototype.getCached = function(pid) {\r
+ if(!memoize)\r
+ return false;\r
+\r
+ var p = this.cache[pid];\r
+ if(p) \r
+ return p[this.index];\r
+ else\r
+ return false;\r
+}\r
+\r
+ParseState.prototype.putCached = function(pid, cached) {\r
+ if(!memoize)\r
+ return false;\r
+\r
+ var p = this.cache[pid];\r
+ if(p)\r
+ p[this.index] = cached;\r
+ else {\r
+ p = this.cache[pid] = { };\r
+ p[this.index] = cached;\r
+ }\r
+}\r
+\r
+function ps(str) {\r
+ return new ParseState(str);\r
+}\r
+\r
+// 'r' is the remaining string to be parsed.\r
+// 'matched' is the portion of the string that\r
+// was successfully matched by the parser.\r
+// 'ast' is the AST returned by the successfull parse.\r
+function make_result(r, matched, ast) {\r
+ return { remaining: r, matched: matched, ast: ast };\r
+}\r
+\r
+var parser_id = 0;\r
+ \r
+// 'token' is a parser combinator that given a string, returns a parser\r
+// that parses that string value. The AST contains the string that was parsed.\r
+function token(s) {\r
+ var pid = parser_id++;\r
+ return function(state) {\r
+ var savedState = state;\r
+ var cached = savedState.getCached(pid);\r
+ if(cached)\r
+ return cached;\r
+\r
+ var r = state.length >= s.length && state.substring(0,s.length) == s;\r
+ if(r) \r
+ cached = { remaining: state.from(s.length), matched: s, ast: s };\r
+ else\r
+ cached = false;\r
+ savedState.putCached(pid, cached);\r
+ return cached;\r
+ };\r
+}\r
+\r
+// Like 'token' but for a single character. Returns a parser that given a string\r
+// containing a single character, parses that character value.\r
+function ch(c) {\r
+ var pid = parser_id++;\r
+ return function(state) {\r
+ var savedState = state;\r
+ var cached = savedState.getCached(pid);\r
+ if(cached)\r
+ return cached;\r
+ var r = state.length >= 1 && state.at(0) == c;\r
+ if(r) \r
+ cached = { remaining: state.from(1), matched: c, ast: c };\r
+ else\r
+ cached = false;\r
+ savedState.putCached(pid, cached);\r
+ return cached;\r
+ };\r
+}\r
+\r
+// 'range' is a parser combinator that returns a single character parser\r
+// (similar to 'ch'). It parses single characters that are in the inclusive\r
+// range of the 'lower' and 'upper' bounds ("a" to "z" for example).\r
+function range(lower, upper) {\r
+ var pid = parser_id++;\r
+ return function(state) {\r
+ var savedState = state;\r
+ var cached = savedState.getCached(pid);\r
+ if(cached)\r
+ return cached;\r
+ \r
+ if(state.length < 1) \r
+ cached = false;\r
+ else {\r
+ var ch = state.at(0);\r
+ if(ch >= lower && ch <= upper) \r
+ cached = { remaining: state.from(1), matched: ch, ast: ch };\r
+ else\r
+ cached = false;\r
+ }\r
+ savedState.putCached(pid, cached);\r
+ return cached;\r
+ };\r
+}\r
+\r
+// Helper function to convert string literals to token parsers\r
+// and perform other implicit parser conversions.\r
+function toParser(p) {\r
+ return (typeof(p) == "string") ? token(p) : p;\r
+}\r
+\r
+// Parser combinator that returns a parser that\r
+// skips whitespace before applying parser.\r
+function whitespace(p) {\r
+ var p = toParser(p);\r
+ var pid = parser_id++;\r
+ return function(state) {\r
+ var savedState = state;\r
+ var cached = savedState.getCached(pid);\r
+ if(cached)\r
+ return cached;\r
+\r
+ cached = p(state.trimLeft());\r
+ savedState.putCached(pid, cached);\r
+ return cached;\r
+ };\r
+}\r
+\r
+// Parser combinator that passes the AST generated from the parser 'p' \r
+// to the function 'f'. The result of 'f' is used as the AST in the result.\r
+function action(p, f) {\r
+ var p = toParser(p);\r
+ var pid = parser_id++;\r
+ return function(state) {\r
+ var savedState = state;\r
+ var cached = savedState.getCached(pid);\r
+ if(cached) \r
+ return cached;\r
+\r
+ var x = p(state);\r
+ if(x) {\r
+ x.ast = f(x.ast);\r
+ cached = x;\r
+ }\r
+ else {\r
+ cached = false;\r
+ }\r
+ savedState.putCached(pid, cached);\r
+ return cached;\r
+ };\r
+}\r
+\r
+// Given a parser that produces an array as an ast, returns a\r
+// parser that produces an ast with the array joined by a separator.\r
+function join_action(p, sep) {\r
+ return action(p, function(ast) { return ast.join(sep); });\r
+}\r
+\r
+// Given an ast of the form [ Expression, [ a, b, ...] ], convert to\r
+// [ [ [ Expression [ a ] ] b ] ... ]\r
+// This is used for handling left recursive entries in the grammar. e.g.\r
+// MemberExpression:\r
+// PrimaryExpression\r
+// FunctionExpression\r
+// MemberExpression [ Expression ]\r
+// MemberExpression . Identifier\r
+// new MemberExpression Arguments \r
+function left_factor(ast) {\r
+ return foldl(function(v, action) { \r
+ return [ v, action ]; \r
+ }, \r
+ ast[0], \r
+ ast[1]);\r
+}\r
+\r
+// Return a parser that left factors the ast result of the original\r
+// parser.\r
+function left_factor_action(p) {\r
+ return action(p, left_factor);\r
+}\r
+\r
+// 'negate' will negate a single character parser. So given 'ch("a")' it will successfully\r
+// parse any character except for 'a'. Or 'negate(range("a", "z"))' will successfully parse\r
+// anything except the lowercase characters a-z.\r
+function negate(p) {\r
+ var p = toParser(p);\r
+ var pid = parser_id++;\r
+ return function(state) {\r
+ var savedState = state;\r
+ var cached = savedState.getCached(pid);\r
+ if(cached)\r
+ return cached;\r
+\r
+ if(state.length >= 1) {\r
+ var r = p(state);\r
+ if(!r) \r
+ cached = make_result(state.from(1), state.at(0), state.at(0));\r
+ else\r
+ cached = false;\r
+ }\r
+ else {\r
+ cached = false;\r
+ }\r
+ savedState.putCached(pid, cached);\r
+ return cached;\r
+ };\r
+}\r
+\r
+// 'end_p' is a parser that is successful if the input string is empty (ie. end of parse).\r
+function end_p(state) {\r
+ if(state.length == 0) \r
+ return make_result(state, undefined, undefined);\r
+ else\r
+ return false;\r
+}\r
+\r
+// 'nothing_p' is a parser that always fails.\r
+function nothing_p(state) {\r
+ return false;\r
+}\r
+\r
+// 'sequence' is a parser combinator that processes a number of parsers in sequence.\r
+// It can take any number of arguments, each one being a parser. The parser that 'sequence'\r
+// returns succeeds if all the parsers in the sequence succeeds. It fails if any of them fail.\r
+function sequence() {\r
+ var parsers = [];\r
+ for(var i = 0; i < arguments.length; ++i) \r
+ parsers.push(toParser(arguments[i])); \r
+ var pid = parser_id++;\r
+ return function(state) {\r
+ var savedState = state;\r
+ var cached = savedState.getCached(pid);\r
+ if(cached) {\r
+ return cached;\r
+ }\r
+\r
+ var ast = [];\r
+ var matched = "";\r
+ var i;\r
+ for(i=0; i< parsers.length; ++i) {\r
+ var parser = parsers[i]; \r
+ var result = parser(state);\r
+ if(result) {\r
+ state = result.remaining;\r
+ if(result.ast != undefined) {\r
+ ast.push(result.ast);\r
+ matched = matched + result.matched;\r
+ }\r
+ }\r
+ else {\r
+ break;\r
+ }\r
+ }\r
+ if(i == parsers.length) {\r
+ cached = make_result(state, matched, ast);\r
+ }\r
+ else \r
+ cached = false;\r
+ savedState.putCached(pid, cached);\r
+ return cached;\r
+ };\r
+}\r
+\r
+// Like sequence, but ignores whitespace between individual parsers.\r
+function wsequence() {\r
+ var parsers = [];\r
+ for(var i=0; i < arguments.length; ++i) {\r
+ parsers.push(whitespace(toParser(arguments[i])));\r
+ }\r
+ return sequence.apply(null, parsers); \r
+}\r
+\r
+// 'choice' is a parser combinator that provides a choice between other parsers.\r
+// It takes any number of parsers as arguments and returns a parser that will try\r
+// each of the given parsers in order. The first one that succeeds results in a \r
+// successfull parse. It fails if all parsers fail.\r
+function choice() {\r
+ var parsers = [];\r
+ for(var i = 0; i < arguments.length; ++i) \r
+ parsers.push(toParser(arguments[i])); \r
+ var pid = parser_id++;\r
+ return function(state) {\r
+ var savedState = state;\r
+ var cached = savedState.getCached(pid);\r
+ if(cached) {\r
+ return cached;\r
+ }\r
+ var i;\r
+ for(i=0; i< parsers.length; ++i) {\r
+ var parser=parsers[i];\r
+ var result = parser(state);\r
+ if(result) {\r
+ break;\r
+ }\r
+ } \r
+ if(i == parsers.length)\r
+ cached = false;\r
+ else\r
+ cached = result;\r
+ savedState.putCached(pid, cached);\r
+ return cached;\r
+ }\r
+}\r
+\r
+// 'butnot' is a parser combinator that takes two parsers, 'p1' and 'p2'. \r
+// It returns a parser that succeeds if 'p1' matches and 'p2' does not, or\r
+// 'p1' matches and the matched text is longer that p2's.\r
+// Useful for things like: butnot(IdentifierName, ReservedWord)\r
+function butnot(p1,p2) {\r
+ var p1 = toParser(p1);\r
+ var p2 = toParser(p2);\r
+ var pid = parser_id++;\r
+ \r
+ // match a but not b. if both match and b's matched text is shorter\r
+ // than a's, a failed match is made\r
+ return function(state) {\r
+ var savedState = state;\r
+ var cached = savedState.getCached(pid);\r
+ if(cached)\r
+ return cached;\r
+ \r
+ var br = p2(state);\r
+ if(!br) {\r
+ cached = p1(state);\r
+ } else {\r
+ var ar = p1(state);\r
+ if(ar.matched.length > br.matched.length)\r
+ cached = ar;\r
+ else\r
+ cached = false;\r
+ }\r
+ savedState.putCached(pid, cached);\r
+ return cached;\r
+ }\r
+}\r
+\r
+// 'difference' is a parser combinator that takes two parsers, 'p1' and 'p2'. \r
+// It returns a parser that succeeds if 'p1' matches and 'p2' does not. If\r
+// both match then if p2's matched text is shorter than p1's it is successfull.\r
+function difference(p1,p2) {\r
+ var p1 = toParser(p1);\r
+ var p2 = toParser(p2);\r
+ var pid = parser_id++;\r
+ \r
+ // match a but not b. if both match and b's matched text is shorter\r
+ // than a's, a successfull match is made\r
+ return function(state) {\r
+ var savedState = sate;\r
+ var cached = savedState.getCached(pid);\r
+ if(cached)\r
+ return cached;\r
+\r
+ var br = p2(state);\r
+ if(!br) {\r
+ cached = p1(state);\r
+ } else {\r
+ var ar = p1(state);\r
+ if(ar.matched.length >= br.matched.length)\r
+ cached = br;\r
+ else\r
+ cached = ar;\r
+ }\r
+ savedState.putCached(pid, cached);\r
+ return cached;\r
+ }\r
+}\r
+\r
+\r
+// 'xor' is a parser combinator that takes two parsers, 'p1' and 'p2'. \r
+// It returns a parser that succeeds if 'p1' or 'p2' match but fails if\r
+// they both match.\r
+function xor(p1, p2) {\r
+ var p1 = toParser(p1);\r
+ var p2 = toParser(p2);\r
+ var pid = parser_id++;\r
+\r
+ // match a or b but not both\r
+ return function(state) {\r
+ var savedState = state;\r
+ var cached = savedState.getCached(pid);\r
+ if(cached)\r
+ return cached;\r
+\r
+ var ar = p1(state);\r
+ var br = p2(state);\r
+ if(ar && br)\r
+ cached = false;\r
+ else\r
+ cached = ar || br;\r
+ savedState.putCached(pid, cached);\r
+ return cached;\r
+ }\r
+}\r
+\r
+// A parser combinator that takes one parser. It returns a parser that\r
+// looks for zero or more matches of the original parser.\r
+function repeat0(p) {\r
+ var p = toParser(p);\r
+ var pid = parser_id++;\r
+ \r
+ return function(state) {\r
+ var savedState = state;\r
+ var cached = savedState.getCached(pid);\r
+ if(cached) {\r
+ return cached;\r
+ }\r
+\r
+ var ast = [];\r
+ var matched = "";\r
+ var result;\r
+ while(result = p(state)) {\r
+ ast.push(result.ast);\r
+ matched = matched + result.matched;\r
+ if(result.remaining.index == state.index)\r
+ break;\r
+ state = result.remaining; \r
+ } \r
+ cached = make_result(state, matched, ast); \r
+ savedState.putCached(pid, cached);\r
+ return cached;\r
+ }\r
+}\r
+\r
+// A parser combinator that takes one parser. It returns a parser that\r
+// looks for one or more matches of the original parser.\r
+function repeat1(p) {\r
+ var p = toParser(p);\r
+ var pid = parser_id++;\r
+\r
+ return function(state) {\r
+ var savedState = state;\r
+ var cached = savedState.getCached(pid);\r
+ if(cached)\r
+ return cached;\r
+\r
+ var ast = [];\r
+ var matched = "";\r
+ var result= p(state);\r
+ if(!result) \r
+ cached = false;\r
+ else { \r
+ while(result) {\r
+ ast.push(result.ast);\r
+ matched = matched + result.matched;\r
+ if(result.remaining.index == state.index)\r
+ break;\r
+ state = result.remaining; \r
+ result = p(state);\r
+ } \r
+ cached = make_result(state, matched, ast); \r
+ }\r
+ savedState.putCached(pid, cached);\r
+ return cached;\r
+ }\r
+}\r
+\r
+// A parser combinator that takes one parser. It returns a parser that\r
+// matches zero or one matches of the original parser.\r
+function optional(p) {\r
+ var p = toParser(p);\r
+ var pid = parser_id++;\r
+ return function(state) {\r
+ var savedState = state;\r
+ var cached = savedState.getCached(pid);\r
+ if(cached)\r
+ return cached;\r
+ var r = p(state);\r
+ cached = r || make_result(state, "", false); \r
+ savedState.putCached(pid, cached);\r
+ return cached;\r
+ }\r
+}\r
+\r
+// A parser combinator that ensures that the given parser succeeds but\r
+// ignores its result. This can be useful for parsing literals that you\r
+// don't want to appear in the ast. eg:\r
+// sequence(expect("("), Number, expect(")")) => ast: Number\r
+function expect(p) {\r
+ return action(p, function(ast) { return undefined; });\r
+}\r
+\r
+function chain(p, s, f) {\r
+ var p = toParser(p);\r
+\r
+ return action(sequence(p, repeat0(action(sequence(s, p), f))),\r
+ function(ast) { return [ast[0]].concat(ast[1]); });\r
+}\r
+\r
+// A parser combinator to do left chaining and evaluation. Like 'chain', it expects a parser\r
+// for an item and for a seperator. The seperator parser's AST result should be a function\r
+// of the form: function(lhs,rhs) { return x; }\r
+// Where 'x' is the result of applying some operation to the lhs and rhs AST's from the item\r
+// parser.\r
+function chainl(p, s) {\r
+ var p = toParser(p);\r
+ return action(sequence(p, repeat0(sequence(s, p))),\r
+ function(ast) {\r
+ return foldl(function(v, action) { return action[0](v, action[1]); }, ast[0], ast[1]);\r
+ });\r
+}\r
+\r
+// A parser combinator that returns a parser that matches lists of things. The parser to \r
+// match the list item and the parser to match the seperator need to \r
+// be provided. The AST is the array of matched items.\r
+function list(p, s) {\r
+ return chain(p, s, function(ast) { return ast[1]; });\r
+}\r
+\r
+// Like list, but ignores whitespace between individual parsers.\r
+function wlist() {\r
+ var parsers = [];\r
+ for(var i=0; i < arguments.length; ++i) {\r
+ parsers.push(whitespace(arguments[i]));\r
+ }\r
+ return list.apply(null, parsers); \r
+}\r
+\r
+// A parser that always returns a zero length match\r
+function epsilon_p(state) {\r
+ return make_result(state, "", undefined);\r
+}\r
+\r
+// Allows attaching of a function anywhere in the grammer. If the function returns\r
+// true then parse succeeds otherwise it fails. Can be used for testing if a symbol\r
+// is in the symbol table, etc.\r
+function semantic(f) {\r
+ var pid = parser_id++;\r
+ return function(state) {\r
+ var savedState = state;\r
+ var cached = savedState.getCached(pid);\r
+ if(cached)\r
+ return cached; \r
+ cached = f() ? make_result(state, "", undefined) : false;\r
+ savedState.putCached(pid, cached);\r
+ return cached;\r
+ }\r
+}\r
+\r
+// The `and` predicate asserts that a certain conditional\r
+// syntax is satisfied before evaluating another production. Eg:\r
+// sequence(and("0"), oct_p)\r
+// (if a leading zero, then parse octal)\r
+// It succeeds if 'p' succeeds and fails if 'p' fails. It never \r
+// consume any input however, and doesn't put anything in the resulting\r
+// AST.\r
+function and(p) {\r
+ var p = toParser(p);\r
+ var pid = parser_id++;\r
+ return function(state) {\r
+ var savedState = state;\r
+ var cached = savedState.getCached(pid);\r
+ if(cached)\r
+ return cached; \r
+ var r = p(state);\r
+ cached = r ? make_result(state, "", undefined) : false;\r
+ savedState.putCached(pid, cached);\r
+ return cached;\r
+ }\r
+}\r
+ \r
+// The opposite of 'and'. It fails if 'p' succeeds and succeeds if\r
+// 'p' fails. It never consumes any input. This combined with 'and' can\r
+// be used for 'lookahead' and disambiguation of cases.\r
+//\r
+// Compare:\r
+// sequence("a",choice("+","++"),"b")\r
+// parses a+b\r
+// but not a++b because the + matches the first part and peg's don't\r
+// backtrack to other choice options if they succeed but later things fail.\r
+//\r
+// sequence("a",choice(sequence("+", not("+")),"++"),"b")\r
+// parses a+b\r
+// parses a++b\r
+//\r
+function not(p) {\r
+ var p = toParser(p);\r
+ var pid = parser_id++;\r
+ return function(state) {\r
+ var savedState = state;\r
+ var cached = savedState.getCached(pid);\r
+ if(cached)\r
+ return cached; \r
+ cached = p(state) ? false : make_result(state, "", undefined);\r
+ savedState.putCached(pid, cached);\r
+ return cached;\r
+ }\r
+}\r
+\r