283 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
		
		
			
		
	
	
			283 lines
		
	
	
		
			6.4 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
|  | var util      = require('./util'); | ||
|  | var types     = require('./types'); | ||
|  | var sets      = require('./sets'); | ||
|  | var positions = require('./positions'); | ||
|  | 
 | ||
|  | 
 | ||
|  | module.exports = function(regexpStr) { | ||
|  |   var i = 0, l, c, | ||
|  |       start = { type: types.ROOT, stack: []}, | ||
|  | 
 | ||
|  |       // Keep track of last clause/group and stack.
 | ||
|  |       lastGroup = start, | ||
|  |       last = start.stack, | ||
|  |       groupStack = []; | ||
|  | 
 | ||
|  | 
 | ||
|  |   var repeatErr = function(i) { | ||
|  |     util.error(regexpStr, 'Nothing to repeat at column ' + (i - 1)); | ||
|  |   }; | ||
|  | 
 | ||
|  |   // Decode a few escaped characters.
 | ||
|  |   var str = util.strToChars(regexpStr); | ||
|  |   l = str.length; | ||
|  | 
 | ||
|  |   // Iterate through each character in string.
 | ||
|  |   while (i < l) { | ||
|  |     c = str[i++]; | ||
|  | 
 | ||
|  |     switch (c) { | ||
|  |       // Handle escaped characters, inclues a few sets.
 | ||
|  |       case '\\': | ||
|  |         c = str[i++]; | ||
|  | 
 | ||
|  |         switch (c) { | ||
|  |           case 'b': | ||
|  |             last.push(positions.wordBoundary()); | ||
|  |             break; | ||
|  | 
 | ||
|  |           case 'B': | ||
|  |             last.push(positions.nonWordBoundary()); | ||
|  |             break; | ||
|  | 
 | ||
|  |           case 'w': | ||
|  |             last.push(sets.words()); | ||
|  |             break; | ||
|  | 
 | ||
|  |           case 'W': | ||
|  |             last.push(sets.notWords()); | ||
|  |             break; | ||
|  | 
 | ||
|  |           case 'd': | ||
|  |             last.push(sets.ints()); | ||
|  |             break; | ||
|  | 
 | ||
|  |           case 'D': | ||
|  |             last.push(sets.notInts()); | ||
|  |             break; | ||
|  | 
 | ||
|  |           case 's': | ||
|  |             last.push(sets.whitespace()); | ||
|  |             break; | ||
|  | 
 | ||
|  |           case 'S': | ||
|  |             last.push(sets.notWhitespace()); | ||
|  |             break; | ||
|  | 
 | ||
|  |           default: | ||
|  |             // Check if c is integer.
 | ||
|  |             // In which case it's a reference.
 | ||
|  |             if (/\d/.test(c)) { | ||
|  |               last.push({ type: types.REFERENCE, value: parseInt(c, 10) }); | ||
|  | 
 | ||
|  |             // Escaped character.
 | ||
|  |             } else { | ||
|  |               last.push({ type: types.CHAR, value: c.charCodeAt(0) }); | ||
|  |             } | ||
|  |         } | ||
|  | 
 | ||
|  |         break; | ||
|  | 
 | ||
|  | 
 | ||
|  |       // Positionals.
 | ||
|  |       case '^': | ||
|  |           last.push(positions.begin()); | ||
|  |         break; | ||
|  | 
 | ||
|  |       case '$': | ||
|  |           last.push(positions.end()); | ||
|  |         break; | ||
|  | 
 | ||
|  | 
 | ||
|  |       // Handle custom sets.
 | ||
|  |       case '[': | ||
|  |         // Check if this class is 'anti' i.e. [^abc].
 | ||
|  |         var not; | ||
|  |         if (str[i] === '^') { | ||
|  |           not = true; | ||
|  |           i++; | ||
|  |         } else { | ||
|  |           not = false; | ||
|  |         } | ||
|  | 
 | ||
|  |         // Get all the characters in class.
 | ||
|  |         var classTokens = util.tokenizeClass(str.slice(i), regexpStr); | ||
|  | 
 | ||
|  |         // Increase index by length of class.
 | ||
|  |         i += classTokens[1]; | ||
|  |         last.push({ | ||
|  |           type: types.SET, | ||
|  |           set: classTokens[0], | ||
|  |           not: not, | ||
|  |         }); | ||
|  | 
 | ||
|  |         break; | ||
|  | 
 | ||
|  | 
 | ||
|  |       // Class of any character except \n.
 | ||
|  |       case '.': | ||
|  |         last.push(sets.anyChar()); | ||
|  |         break; | ||
|  | 
 | ||
|  | 
 | ||
|  |       // Push group onto stack.
 | ||
|  |       case '(': | ||
|  |         // Create group.
 | ||
|  |         var group = { | ||
|  |           type: types.GROUP, | ||
|  |           stack: [], | ||
|  |           remember: true, | ||
|  |         }; | ||
|  | 
 | ||
|  |         c = str[i]; | ||
|  | 
 | ||
|  |         // If if this is a special kind of group.
 | ||
|  |         if (c === '?') { | ||
|  |           c = str[i + 1]; | ||
|  |           i += 2; | ||
|  | 
 | ||
|  |           // Match if followed by.
 | ||
|  |           if (c === '=') { | ||
|  |             group.followedBy = true; | ||
|  | 
 | ||
|  |           // Match if not followed by.
 | ||
|  |           } else if (c === '!') { | ||
|  |             group.notFollowedBy = true; | ||
|  | 
 | ||
|  |           } else if (c !== ':') { | ||
|  |             util.error(regexpStr, | ||
|  |               'Invalid group, character \'' + c + | ||
|  |               '\' after \'?\' at column ' + (i - 1)); | ||
|  |           } | ||
|  | 
 | ||
|  |           group.remember = false; | ||
|  |         } | ||
|  | 
 | ||
|  |         // Insert subgroup into current group stack.
 | ||
|  |         last.push(group); | ||
|  | 
 | ||
|  |         // Remember the current group for when the group closes.
 | ||
|  |         groupStack.push(lastGroup); | ||
|  | 
 | ||
|  |         // Make this new group the current group.
 | ||
|  |         lastGroup = group; | ||
|  |         last = group.stack; | ||
|  |         break; | ||
|  | 
 | ||
|  | 
 | ||
|  |       // Pop group out of stack.
 | ||
|  |       case ')': | ||
|  |         if (groupStack.length === 0) { | ||
|  |           util.error(regexpStr, 'Unmatched ) at column ' + (i - 1)); | ||
|  |         } | ||
|  |         lastGroup = groupStack.pop(); | ||
|  | 
 | ||
|  |         // Check if this group has a PIPE.
 | ||
|  |         // To get back the correct last stack.
 | ||
|  |         last = lastGroup.options ? | ||
|  |           lastGroup.options[lastGroup.options.length - 1] : lastGroup.stack; | ||
|  |         break; | ||
|  | 
 | ||
|  | 
 | ||
|  |       // Use pipe character to give more choices.
 | ||
|  |       case '|': | ||
|  |         // Create array where options are if this is the first PIPE
 | ||
|  |         // in this clause.
 | ||
|  |         if (!lastGroup.options) { | ||
|  |           lastGroup.options = [lastGroup.stack]; | ||
|  |           delete lastGroup.stack; | ||
|  |         } | ||
|  | 
 | ||
|  |         // Create a new stack and add to options for rest of clause.
 | ||
|  |         var stack = []; | ||
|  |         lastGroup.options.push(stack); | ||
|  |         last = stack; | ||
|  |         break; | ||
|  | 
 | ||
|  | 
 | ||
|  |       // Repetition.
 | ||
|  |       // For every repetition, remove last element from last stack
 | ||
|  |       // then insert back a RANGE object.
 | ||
|  |       // This design is chosen because there could be more than
 | ||
|  |       // one repetition symbols in a regex i.e. `a?+{2,3}`.
 | ||
|  |       case '{': | ||
|  |         var rs = /^(\d+)(,(\d+)?)?\}/.exec(str.slice(i)), min, max; | ||
|  |         if (rs !== null) { | ||
|  |           if (last.length === 0) { | ||
|  |             repeatErr(i); | ||
|  |           } | ||
|  |           min = parseInt(rs[1], 10); | ||
|  |           max = rs[2] ? rs[3] ? parseInt(rs[3], 10) : Infinity : min; | ||
|  |           i += rs[0].length; | ||
|  | 
 | ||
|  |           last.push({ | ||
|  |             type: types.REPETITION, | ||
|  |             min: min, | ||
|  |             max: max, | ||
|  |             value: last.pop(), | ||
|  |           }); | ||
|  |         } else { | ||
|  |           last.push({ | ||
|  |             type: types.CHAR, | ||
|  |             value: 123, | ||
|  |           }); | ||
|  |         } | ||
|  |         break; | ||
|  | 
 | ||
|  |       case '?': | ||
|  |         if (last.length === 0) { | ||
|  |           repeatErr(i); | ||
|  |         } | ||
|  |         last.push({ | ||
|  |           type: types.REPETITION, | ||
|  |           min: 0, | ||
|  |           max: 1, | ||
|  |           value: last.pop(), | ||
|  |         }); | ||
|  |         break; | ||
|  | 
 | ||
|  |       case '+': | ||
|  |         if (last.length === 0) { | ||
|  |           repeatErr(i); | ||
|  |         } | ||
|  |         last.push({ | ||
|  |           type: types.REPETITION, | ||
|  |           min: 1, | ||
|  |           max: Infinity, | ||
|  |           value: last.pop(), | ||
|  |         }); | ||
|  |         break; | ||
|  | 
 | ||
|  |       case '*': | ||
|  |         if (last.length === 0) { | ||
|  |           repeatErr(i); | ||
|  |         } | ||
|  |         last.push({ | ||
|  |           type: types.REPETITION, | ||
|  |           min: 0, | ||
|  |           max: Infinity, | ||
|  |           value: last.pop(), | ||
|  |         }); | ||
|  |         break; | ||
|  | 
 | ||
|  | 
 | ||
|  |       // Default is a character that is not `\[](){}?+*^$`.
 | ||
|  |       default: | ||
|  |         last.push({ | ||
|  |           type: types.CHAR, | ||
|  |           value: c.charCodeAt(0), | ||
|  |         }); | ||
|  |     } | ||
|  | 
 | ||
|  |   } | ||
|  | 
 | ||
|  |   // Check if any groups have not been closed.
 | ||
|  |   if (groupStack.length !== 0) { | ||
|  |     util.error(regexpStr, 'Unterminated group'); | ||
|  |   } | ||
|  | 
 | ||
|  |   return start; | ||
|  | }; | ||
|  | 
 | ||
|  | module.exports.types = types; |