From 09a1110f401fd715697c63dc066e1a2b29e02896 Mon Sep 17 00:00:00 2001 From: Brad Vogel Date: Fri, 28 Oct 2016 12:58:10 -0700 Subject: [PATCH 1/3] Return the character offsets of parsed search parts. The use case is that browser code using the library might want to track the cursor position (eg in an HTML element) to understand which part of the search query the user is editing. It could then show an editor specific to that keyboard, such as a calendar picker, and then splice in the edited value back into the search string. --- README.md | 8 +- lib/search-query-parser.js | 60 +++++---- test/test.js | 250 ++++++++++++++++++++++++++++++++++++- 3 files changed, 293 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index edd75b4..e3168aa 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,13 @@ And turns it into an object like this: from: '1/10/2013', to: '15/04/2014' }, - text: 'photos' + text: 'photos', + offsets: + [ { keyword: 'from', value: 'hi@retrace.io,foo@gmail.com', offsetStart: 0, offsetEnd: 32 }, + { keyword: 'to', value: 'me', offsetStart: 33, offsetEnd: 38 }, + { keyword: 'subject', value: 'vacations', offsetStart: 39, offsetEnd: 56 }, + { keyword: 'date', value: '1/10/2013-15/04/2014', offsetStart: 57, offsetEnd: 82 }, + { text: 'photos', offsetStart: 83, offsetEnd: 89 } ] } ``` diff --git a/lib/search-query-parser.js b/lib/search-query-parser.js index 690eefc..f190d2a 100644 --- a/lib/search-query-parser.js +++ b/lib/search-query-parser.js @@ -15,10 +15,6 @@ exports.parse = function (string, options) { string = ''; } - // Regularize white spacing - // Make in-between white spaces a unique space - string = string.trim().replace(/\s+/g, ' '); - // When a simple string, return it if (-1 === string.indexOf(':')) { return string; @@ -30,16 +26,18 @@ exports.parse = function (string, options) { // Otherwise parse the advanced query syntax else { // Our object to store the query object - var query = {text: []}; + var query = {text: [], offsets: []}; var exclusion = {}; + var terms = []; // Get a list of search terms respecting single and double quotes - var terms = string.match(/(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|\S+|\S+:\S+/g); - for (var i = 0; i < terms.length; i++) { - var sepIndex = terms[i].indexOf(':'); - if(sepIndex !== -1) { - var split = terms[i].split(':'), - key = terms[i].slice(0, sepIndex), - val = terms[i].slice(sepIndex + 1); + var regex = /(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|\S+|\S+:\S+/g; + while ((match = regex.exec(string)) !== null) { + var term = match[0]; + var sepIndex = term.indexOf(':'); + if (sepIndex !== -1) { + var split = term.split(':'), + key = term.slice(0, sepIndex), + val = term.slice(sepIndex + 1); // Strip surrounding quotes val = val.replace(/^\"|\"$|^\'|\'$/g, ''); // Strip backslashes respecting escapes @@ -55,24 +53,34 @@ exports.parse = function (string, options) { return n1; } }); - terms[i] = key + ':' + val; + terms.push({ + keyword: key, + value: val, + offsetStart: match.index, + offsetEnd: match.index + term.length + }); + } else { + terms.push({ + text: term, + offsetStart: match.index, + offsetEnd: match.index + term.length + }); } } // Reverse to ensure proper order when pop()'ing. terms.reverse(); // For each search term + var term; while (term = terms.pop()) { - // Advanced search terms syntax has key and value - // separated with a colon - var sepIdx = term.indexOf(':'); // When just a simple term - if (-1 === sepIdx) { + if (term.text) { // We add it as pure text - query.text.push(term); + query.text.push(term.text); + query.offsets.push(term); } // We got an advanced search syntax else { - var key = term.slice(0, sepIdx); + var key = term.keyword; // Check if the key is a registered keyword options.keywords = options.keywords || []; var isKeyword = false; @@ -87,12 +95,20 @@ exports.parse = function (string, options) { isExclusion = true; } } + + query.offsets.push({ + keyword: key, + value: term.value, + offsetStart: isExclusion ? term.offsetStart + 1 : term.offsetStart, + offsetEnd: term.offsetEnd + }); + // Check if the key is a registered range options.ranges = options.ranges || []; var isRange = !(-1 === options.ranges.indexOf(key)); // When the key matches a keyword if (isKeyword) { - var value = term.slice(sepIdx + 1); + var value = term.value; // When value is a thing if (value.length) { // Get an array of values when several are there @@ -173,7 +189,7 @@ exports.parse = function (string, options) { } // The key allows a range else if (isRange) { - var value = term.slice(sepIdx + 1); + var value = term.value; // Range are separated with a dash var rangeValues = value.split('-'); // When both end of the range are specified @@ -195,7 +211,7 @@ exports.parse = function (string, options) { } else { // We add it as pure text - query.text.push(term); + query.text.push(term.text); } } } diff --git a/test/test.js b/test/test.js index 529b97a..249ee9b 100644 --- a/test/test.js +++ b/test/test.js @@ -23,6 +23,12 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.not.have.property('text'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 0, + offsetEnd: 16 + }]); }); @@ -34,6 +40,20 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('text', 'hey buddy!'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 0, + offsetEnd: 16 + }, { + text: 'hey', + offsetStart: 17, + offsetEnd: 20 + }, { + text: 'buddy!', + offsetStart: 21, + offsetEnd: 27 + }]); }); @@ -45,6 +65,20 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('text', 'hey you!'); + parsedSearchQuery.should.have.property('offsets', [{ + text: 'hey', + offsetStart: 0, + offsetEnd: 3 + }, { + text: 'you!', + offsetStart: 4, + offsetEnd: 8 + }, { + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 9, + offsetEnd: 25 + }]); }); @@ -56,6 +90,24 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('text', 'hey you! pouet'); + parsedSearchQuery.should.have.property('offsets', [{ + text: 'hey', + offsetStart: 0, + offsetEnd: 3 + }, { + text: 'you!', + offsetStart: 4, + offsetEnd: 8 + }, { + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 9, + offsetEnd: 25 + }, { + text: 'pouet', + offsetStart: 26, + offsetEnd: 31 + }]); }); @@ -68,6 +120,24 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('text', 'hey you! pouet'); + parsedSearchQuery.should.have.property('offsets', [{ + text: 'hey', + offsetStart: 3, + offsetEnd: 6 + }, { + text: 'you!', + offsetStart: 11, + offsetEnd: 15 + }, { + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 16, + offsetEnd: 32 + }, { + text: 'pouet', + offsetStart: 35, + offsetEnd: 40 + }]); }); @@ -80,6 +150,37 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('to', 'bar@hey.ya'); parsedSearchQuery.should.have.property('text', 'hey, so what\'s up gents'); + parsedSearchQuery.should.have.property('offsets', [{ + text: 'hey,', + offsetStart: 0, + offsetEnd: 4 + }, { + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 5, + offsetEnd: 21 + }, { + keyword: 'to', + value: 'bar@hey.ya', + offsetStart: 22, + offsetEnd: 35 + }, { + text: 'so', + offsetStart: 36, + offsetEnd: 38 + }, { + text: 'what\'s', + offsetStart: 39, + offsetEnd: 45 + }, { + text: 'up', + offsetStart: 46, + offsetEnd: 48 + }, { + text: 'gents', + offsetStart: 49, + offsetEnd: 54 + }]); }); @@ -95,6 +196,21 @@ describe('Search query syntax parser', function () { parsedSearchQuery.from.length.should.equal(2); parsedSearchQuery.from.should.containEql('jul@foo.com'); parsedSearchQuery.from.should.containEql('bar@hey.ya'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 0, + offsetEnd: 16 + }, { + keyword: 'from', + value: 'bar@hey.ya', + offsetStart: 17, + offsetEnd: 32 + }, { + text: 'vaccationessss', + offsetStart: 33, + offsetEnd: 47 + }]); }); @@ -109,6 +225,12 @@ describe('Search query syntax parser', function () { parsedSearchQuery.from.length.should.equal(2); parsedSearchQuery.from.should.containEql('jul@foo.com'); parsedSearchQuery.from.should.containEql('bar@hey.ya'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com,bar@hey.ya', + offsetStart: 0, + offsetEnd: 27 + }]); }); @@ -126,6 +248,21 @@ describe('Search query syntax parser', function () { parsedSearchQuery.from.should.containEql('bar@hey.ya'); parsedSearchQuery.from.should.containEql('a@b.c'); parsedSearchQuery.from.should.containEql('d@e.f'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com,bar@hey.ya', + offsetStart: 0, + offsetEnd: 27 + }, { + keyword: 'from', + value: 'a@b.c,d@e.f', + offsetStart: 28, + offsetEnd: 44 + }, { + text: 'ouch!#', + offsetStart: 45, + offsetEnd: 51 + }]); }); @@ -139,6 +276,16 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.have.property('date'); parsedSearchQuery.date.should.be.an.Object; parsedSearchQuery.date.from.should.containEql('12/12/2012'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'date', + value: '12/12/2012', + offsetStart: 0, + offsetEnd: 15 + }, { + text: 'ahaha', + offsetStart: 16, + offsetEnd: 21 + }]); }); it('should parse range with 2 ends and free text', function () { @@ -152,6 +299,16 @@ describe('Search query syntax parser', function () { parsedSearchQuery.date.should.be.an.Object; parsedSearchQuery.date.from.should.containEql('12/12/2012'); parsedSearchQuery.date.to.should.containEql('01/01/2014'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'date', + value: '12/12/2012-01/01/2014', + offsetStart: 0, + offsetEnd: 26 + }, { + text: 'ahaha', + offsetStart: 27, + offsetEnd: 32 + }]); }); @@ -198,6 +355,48 @@ describe('Search query syntax parser', function () { parsedSearchQuery.to.length.should.equal(2); parsedSearchQuery.to.should.containEql('me@me.com'); parsedSearchQuery.to.should.containEql('toto@hey.co'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'date', + value: '12/12/2012-01/01/2014', + offsetStart: 3, + offsetEnd: 29 + }, { + text: 'ahaha', + offsetStart: 30, + offsetEnd: 35 + }, { + keyword: 'from', + value: 'jul@foo.com,bar@hey.ya', + offsetStart: 36, + offsetEnd: 63 + }, { + keyword: 'from', + value: 'a@b.c,d@e.f', + offsetStart: 64, + offsetEnd: 80 + }, { + text: 'ouch!#', + offsetStart: 81, + offsetEnd: 87 + }, { + keyword: 'to', + value: 'me@me.com', + offsetStart: 90, + offsetEnd: 102 + }, { + keyword: 'to', + value: 'toto@hey.co', + offsetStart: 103, + offsetEnd: 117 + }, { + text: 'about', + offsetStart: 118, + offsetEnd: 123 + }, { + text: 'that', + offsetStart: 124, + offsetEnd: 128 + }]); }); @@ -209,6 +408,17 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('name', 'Bob Saget'); parsedSearchQuery.should.have.property('description', 'Banana Sandwiche'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'name', + value: 'Bob Saget', + offsetStart: 0, + offsetEnd: 16 + }, { + keyword: 'description', + value: 'Banana Sandwiche', + offsetStart: 17, + offsetEnd: 47 + }]); }); @@ -220,6 +430,17 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('case1', 'This "is" \'a\' test'); parsedSearchQuery.should.have.property('case2', 'This "is" \'a\' test'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'case1', + value: 'This "is" \'a\' test', + offsetStart: 0, + offsetEnd: 28 + }, { + keyword: 'case2', + value: 'This "is" \'a\' test', + offsetStart: 29, + offsetEnd: 57 + }]); }); @@ -232,6 +453,12 @@ describe('Search query syntax parser', function () { parsedSearchQuery.exclude.should.be.an.Object; parsedSearchQuery.exclude.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.not.have.property('text'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 1, + offsetEnd: 17 + }]); }); it('should concatenate a keyword multiple values in exclusion syntax', function() { @@ -244,11 +471,19 @@ describe('Search query syntax parser', function () { parsedSearchQuery.exclude.from.should.containEql('jul@foo.com'); parsedSearchQuery.exclude.from.should.containEql('mar@foo.com'); parsedSearchQuery.should.not.have.property('text'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com,mar@foo.com', + offsetStart: 1, + offsetEnd: 29 + }]); }); it('should support keywords which appear multiple times with exclusion syntax', function() { var searchQuery = '-from:jul@foo.com,mar@foo.com -from:jan@foo.com'; - var options = {keywords: ['from']}; + var options = { + keywords: ['from'] + }; var parsedSearchQuery = searchquery.parse(searchQuery, options); parsedSearchQuery.should.be.an.Object; @@ -257,5 +492,16 @@ describe('Search query syntax parser', function () { parsedSearchQuery.exclude.from.should.containEql('mar@foo.com'); parsedSearchQuery.exclude.from.should.containEql('jan@foo.com'); parsedSearchQuery.should.not.have.property('text'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com,mar@foo.com', + offsetStart: 1, + offsetEnd: 29 + }, { + keyword: 'from', + value: 'jan@foo.com', + offsetStart: 31, + offsetEnd: 47 + }]); }); -}); +}); \ No newline at end of file From 23b36fd086886a291e5cf1fecbf718afae96c38c Mon Sep 17 00:00:00 2001 From: Brad Vogel Date: Fri, 28 Oct 2016 13:11:38 -0700 Subject: [PATCH 2/3] Fix implicit global variable. --- lib/search-query-parser.js | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/search-query-parser.js b/lib/search-query-parser.js index f190d2a..b23c84c 100644 --- a/lib/search-query-parser.js +++ b/lib/search-query-parser.js @@ -31,6 +31,7 @@ exports.parse = function (string, options) { var terms = []; // Get a list of search terms respecting single and double quotes var regex = /(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|\S+|\S+:\S+/g; + var match; while ((match = regex.exec(string)) !== null) { var term = match[0]; var sepIndex = term.indexOf(':'); From d72b16d255696b969299c88689150024a758e3e0 Mon Sep 17 00:00:00 2001 From: Brad Vogel Date: Fri, 28 Oct 2016 14:27:07 -0700 Subject: [PATCH 3/3] Fix issue where keywords that aren't specified will show up as keywords in the response. --- lib/search-query-parser.js | 25 +++++++++++++++++-------- test/test.js | 28 +++++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/lib/search-query-parser.js b/lib/search-query-parser.js index b23c84c..fba07e1 100644 --- a/lib/search-query-parser.js +++ b/lib/search-query-parser.js @@ -97,18 +97,18 @@ exports.parse = function (string, options) { } } - query.offsets.push({ - keyword: key, - value: term.value, - offsetStart: isExclusion ? term.offsetStart + 1 : term.offsetStart, - offsetEnd: term.offsetEnd - }); - // Check if the key is a registered range options.ranges = options.ranges || []; var isRange = !(-1 === options.ranges.indexOf(key)); // When the key matches a keyword if (isKeyword) { + query.offsets.push({ + keyword: key, + value: term.value, + offsetStart: isExclusion ? term.offsetStart + 1 : term.offsetStart, + offsetEnd: term.offsetEnd + }); + var value = term.value; // When value is a thing if (value.length) { @@ -190,6 +190,8 @@ exports.parse = function (string, options) { } // The key allows a range else if (isRange) { + query.offsets.push(term); + var value = term.value; // Range are separated with a dash var rangeValues = value.split('-'); @@ -212,7 +214,14 @@ exports.parse = function (string, options) { } else { // We add it as pure text - query.text.push(term.text); + var text = term.keyword + ':' + term.value; + query.text.push(text); + + query.offsets.push({ + text: text, + offsetStart: term.offsetStart, + offsetEnd: term.offsetEnd + }); } } } diff --git a/test/test.js b/test/test.js index 249ee9b..24a2013 100644 --- a/test/test.js +++ b/test/test.js @@ -57,7 +57,33 @@ describe('Search query syntax parser', function () { }); - it('should parse a single keyword with free text before it', function () { + it('should ignore keywords that are not specified', function() { + var searchQuery = 'test another other:jul@foo.com'; + var options = { + keywords: ['from'] + }; + var parsedSearchQuery = searchquery.parse(searchQuery, options); + + parsedSearchQuery.should.be.an.Object; + parsedSearchQuery.should.have.not.have.property('other'); + parsedSearchQuery.should.have.property('text', 'test another other:jul@foo.com'); + parsedSearchQuery.should.have.property('offsets', [{ + text: 'test', + offsetStart: 0, + offsetEnd: 4 + }, { + text: 'another', + offsetStart: 5, + offsetEnd: 12 + }, { + text: 'other:jul@foo.com', + offsetStart: 13, + offsetEnd: 30 + }]); + }); + + + it('should parse a single keyword with free text before it', function() { var searchQuery = 'hey you! from:jul@foo.com'; var options = {keywords: ['from']}; var parsedSearchQuery = searchquery.parse(searchQuery, options);