Merge pull request #18 from jonestristand/token-terms

Add support for tokenized terms, quoted terms, and negation of terms.
2018-04-08 16:48:13 +08:00
parent fdcb6e7af3 02357ad02b
commit 6ec2002085
3 changed files with 103 additions and 9 deletions
--- a/lib/search-query-parser.js
+++ b/lib/search-query-parser.js
@@ -16,11 +16,11 @@ exports.parse = function (string, options) {
  }

  // When a simple string, return it
-  if (-1 === string.indexOf(':')) {
+  if (-1 === string.indexOf(':') && !options.tokenize) {
    return string;
  }
  // When no keywords or ranges set, treat as a simple string
-  else if (!options.keywords && !options.ranges){
+  else if (!options.keywords && !options.ranges && !options.tokenize){
    return string;
  }
  // Otherwise parse the advanced query syntax
@@ -30,7 +30,7 @@ exports.parse = function (string, options) {
    var exclusion = {};
    var terms = [];
    // Get a list of search terms respecting single and double quotes
-    var regex = /(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|\S+|\S+:\S+/g;
+    var regex = /(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|(-?"(?:[^"\\]|\\.)*")|(-?'(?:[^'\\]|\\.)*')|\S+|\S+:\S+/g;
    var match;
    while ((match = regex.exec(string)) !== null) {
      var term = match[0];
@@ -61,11 +61,47 @@ exports.parse = function (string, options) {
          offsetEnd: match.index + term.length
        });
      } else {
-        terms.push({
-          text: term,
-          offsetStart: match.index,
-          offsetEnd: match.index + term.length
+        var isExcludedTerm = false;
+        if (term[0] === '-') {
+          isExcludedTerm = true;
+          term = term.slice(1);
+        }
+
+        // Strip surrounding quotes
+        term = term.replace(/^\"|\"$|^\'|\'$/g, '');
+        // Strip backslashes respecting escapes
+        term = (term + '').replace(/\\(.?)/g, function (s, n1) {
+          switch (n1) {
+          case '\\':
+            return '\\';
+          case '0':
+            return '\u0000';
+          case '':
+            return '';
+          default:
+            return n1;
+          }
        });
+
+        if (isExcludedTerm) {
+          if (exclusion['text']) {
+            if (exclusion['text'] instanceof Array) {
+              exclusion['text'].push(term);
+            } else {
+              exclusion['text'] = [exclusion['text']];
+              exclusion['text'].push(term);
+            }
+          } else {
+            // First time seeing an excluded text term
+            exclusion['text'] = term;
+          }
+        } else {
+          terms.push({
+            text: term,
+            offsetStart: match.index,
+            offsetEnd: match.index + term.length
+          });
+        }
      }
    }
    // Reverse to ensure proper order when pop()'ing.
@@ -228,7 +264,9 @@ exports.parse = function (string, options) {

    // Concatenate all text terms if any
    if (query.text.length) {
-      query.text = query.text.join(' ').trim();
+      if (!options.tokenize) {
+        query.text = query.text.join(' ').trim();
+      }
    }
    // Just remove the attribute text when it's empty
    else {
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
  "name": "search-query-parser",
-  "version": "1.3.0",
+  "version": "1.4.0",
  "description": "Parser for advanced search query syntax",
  "main": "index.js",
  "scripts": {
--- a/test/test.js
+++ b/test/test.js
@@ -14,6 +14,62 @@ describe('Search query syntax parser', function () {
    parsedSearchQuery.should.equal(searchQuery);
  });

+  it('should return a tokenized string when option is set', function () {
+    var searchQuery = "fancy pyjama wear";
+    var options = { tokenize: true };
+    var parsedSearchQuery = searchquery.parse(searchQuery, options);
+
+    parsedSearchQuery.should.be.an.Object;
+    parsedSearchQuery.should.have.property('text', ['fancy', 'pyjama', 'wear']);
+  });
+
+  it('should return a tokenized string when option is set, respecting double-quotes and escapes', function () {
+    var searchQuery = 'fancy "py\\"j\\"am\'a w\'ear"';
+    var options = { tokenize: true };
+    var parsedSearchQuery = searchquery.parse(searchQuery, options);
+
+    parsedSearchQuery.should.be.an.Object;
+    parsedSearchQuery.should.have.property('text', ['fancy', 'py"j"am\'a w\'ear']);
+  });
+
+  it('should return a tokenized string when option is set, respecting single-quotes and escapes', function () {
+    var searchQuery = "fancy 'py\\'j\\'am\"a w\"ear'";
+    var options = { tokenize: true };
+    var parsedSearchQuery = searchquery.parse(searchQuery, options);
+
+    parsedSearchQuery.should.be.an.Object;
+    parsedSearchQuery.should.have.property('text', ['fancy', "py'j'am\"a w\"ear"]);
+  });
+
+  it('should return a tokenized string with negation of unquoted terms', function () {
+    var searchQuery = "fancy -pyjama -wear";
+    var options = { tokenize: true };
+    var parsedSearchQuery = searchquery.parse(searchQuery, options);
+
+    parsedSearchQuery.should.be.an.Object;
+    parsedSearchQuery.should.have.property('text', ['fancy']);
+    parsedSearchQuery.should.have.property('exclude', {text: ['pyjama', 'wear']});
+  });
+
+  it('should return a tokenized string with negation of single-quoted terms', function () {
+    var searchQuery = "fancy -'pyjama -wear'";
+    var options = { tokenize: true };
+    var parsedSearchQuery = searchquery.parse(searchQuery, options);
+
+    parsedSearchQuery.should.be.an.Object;
+    parsedSearchQuery.should.have.property('text', ['fancy']);
+    parsedSearchQuery.should.have.property('exclude', {text: 'pyjama -wear'});
+  });
+
+  it('should return a tokenized string with negation of double-quoted terms', function () {
+    var searchQuery = 'fancy -"pyjama -wear"';
+    var options = { tokenize: true };
+    var parsedSearchQuery = searchquery.parse(searchQuery, options);
+
+    parsedSearchQuery.should.be.an.Object;
+    parsedSearchQuery.should.have.property('text', ['fancy']);
+    parsedSearchQuery.should.have.property('exclude', {text: 'pyjama -wear'});
+  });

  it('should parse a single keyword with no text', function () {
    var searchQuery = 'from:jul@foo.com';