diff --git a/lib/search-query-parser.js b/lib/search-query-parser.js index fba07e1..c6354ba 100644 --- a/lib/search-query-parser.js +++ b/lib/search-query-parser.js @@ -16,11 +16,11 @@ exports.parse = function (string, options) { } // When a simple string, return it - if (-1 === string.indexOf(':')) { + if (-1 === string.indexOf(':') && !options.tokenize) { return string; } // When no keywords or ranges set, treat as a simple string - else if (!options.keywords && !options.ranges){ + else if (!options.keywords && !options.ranges && !options.tokenize){ return string; } // Otherwise parse the advanced query syntax @@ -30,7 +30,7 @@ exports.parse = function (string, options) { var exclusion = {}; var terms = []; // Get a list of search terms respecting single and double quotes - var regex = /(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|\S+|\S+:\S+/g; + var regex = /(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|(-?"(?:[^"\\]|\\.)*")|(-?'(?:[^'\\]|\\.)*')|\S+|\S+:\S+/g; var match; while ((match = regex.exec(string)) !== null) { var term = match[0]; @@ -61,11 +61,47 @@ exports.parse = function (string, options) { offsetEnd: match.index + term.length }); } else { - terms.push({ - text: term, - offsetStart: match.index, - offsetEnd: match.index + term.length + var isExcludedTerm = false; + if (term[0] === '-') { + isExcludedTerm = true; + term = term.slice(1); + } + + // Strip surrounding quotes + term = term.replace(/^\"|\"$|^\'|\'$/g, ''); + // Strip backslashes respecting escapes + term = (term + '').replace(/\\(.?)/g, function (s, n1) { + switch (n1) { + case '\\': + return '\\'; + case '0': + return '\u0000'; + case '': + return ''; + default: + return n1; + } }); + + if (isExcludedTerm) { + if (exclusion['text']) { + if (exclusion['text'] instanceof Array) { + exclusion['text'].push(term); + } else { + exclusion['text'] = [exclusion['text']]; + exclusion['text'].push(term); + } + } else { + // First time seeing an excluded text term + exclusion['text'] = term; + } + } else { + terms.push({ + text: term, + offsetStart: match.index, + offsetEnd: match.index + term.length + }); + } } } // Reverse to ensure proper order when pop()'ing. @@ -228,7 +264,9 @@ exports.parse = function (string, options) { // Concatenate all text terms if any if (query.text.length) { - query.text = query.text.join(' ').trim(); + if (!options.tokenize) { + query.text = query.text.join(' ').trim(); + } } // Just remove the attribute text when it's empty else { diff --git a/package.json b/package.json index 1f45f74..216691b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "search-query-parser", - "version": "1.3.0", + "version": "1.4.0", "description": "Parser for advanced search query syntax", "main": "index.js", "scripts": { diff --git a/test/test.js b/test/test.js index 24a2013..674630d 100644 --- a/test/test.js +++ b/test/test.js @@ -14,6 +14,62 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.equal(searchQuery); }); + it('should return a tokenized string when option is set', function () { + var searchQuery = "fancy pyjama wear"; + var options = { tokenize: true }; + var parsedSearchQuery = searchquery.parse(searchQuery, options); + + parsedSearchQuery.should.be.an.Object; + parsedSearchQuery.should.have.property('text', ['fancy', 'pyjama', 'wear']); + }); + + it('should return a tokenized string when option is set, respecting double-quotes and escapes', function () { + var searchQuery = 'fancy "py\\"j\\"am\'a w\'ear"'; + var options = { tokenize: true }; + var parsedSearchQuery = searchquery.parse(searchQuery, options); + + parsedSearchQuery.should.be.an.Object; + parsedSearchQuery.should.have.property('text', ['fancy', 'py"j"am\'a w\'ear']); + }); + + it('should return a tokenized string when option is set, respecting single-quotes and escapes', function () { + var searchQuery = "fancy 'py\\'j\\'am\"a w\"ear'"; + var options = { tokenize: true }; + var parsedSearchQuery = searchquery.parse(searchQuery, options); + + parsedSearchQuery.should.be.an.Object; + parsedSearchQuery.should.have.property('text', ['fancy', "py'j'am\"a w\"ear"]); + }); + + it('should return a tokenized string with negation of unquoted terms', function () { + var searchQuery = "fancy -pyjama -wear"; + var options = { tokenize: true }; + var parsedSearchQuery = searchquery.parse(searchQuery, options); + + parsedSearchQuery.should.be.an.Object; + parsedSearchQuery.should.have.property('text', ['fancy']); + parsedSearchQuery.should.have.property('exclude', {text: ['pyjama', 'wear']}); + }); + + it('should return a tokenized string with negation of single-quoted terms', function () { + var searchQuery = "fancy -'pyjama -wear'"; + var options = { tokenize: true }; + var parsedSearchQuery = searchquery.parse(searchQuery, options); + + parsedSearchQuery.should.be.an.Object; + parsedSearchQuery.should.have.property('text', ['fancy']); + parsedSearchQuery.should.have.property('exclude', {text: 'pyjama -wear'}); + }); + + it('should return a tokenized string with negation of double-quoted terms', function () { + var searchQuery = 'fancy -"pyjama -wear"'; + var options = { tokenize: true }; + var parsedSearchQuery = searchquery.parse(searchQuery, options); + + parsedSearchQuery.should.be.an.Object; + parsedSearchQuery.should.have.property('text', ['fancy']); + parsedSearchQuery.should.have.property('exclude', {text: 'pyjama -wear'}); + }); it('should parse a single keyword with no text', function () { var searchQuery = 'from:jul@foo.com';