From 09a1110f401fd715697c63dc066e1a2b29e02896 Mon Sep 17 00:00:00 2001 From: Brad Vogel Date: Fri, 28 Oct 2016 12:58:10 -0700 Subject: [PATCH] Return the character offsets of parsed search parts. The use case is that browser code using the library might want to track the cursor position (eg in an HTML element) to understand which part of the search query the user is editing. It could then show an editor specific to that keyboard, such as a calendar picker, and then splice in the edited value back into the search string. --- README.md | 8 +- lib/search-query-parser.js | 60 +++++---- test/test.js | 250 ++++++++++++++++++++++++++++++++++++- 3 files changed, 293 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index edd75b4..e3168aa 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,13 @@ And turns it into an object like this: from: '1/10/2013', to: '15/04/2014' }, - text: 'photos' + text: 'photos', + offsets: + [ { keyword: 'from', value: 'hi@retrace.io,foo@gmail.com', offsetStart: 0, offsetEnd: 32 }, + { keyword: 'to', value: 'me', offsetStart: 33, offsetEnd: 38 }, + { keyword: 'subject', value: 'vacations', offsetStart: 39, offsetEnd: 56 }, + { keyword: 'date', value: '1/10/2013-15/04/2014', offsetStart: 57, offsetEnd: 82 }, + { text: 'photos', offsetStart: 83, offsetEnd: 89 } ] } ``` diff --git a/lib/search-query-parser.js b/lib/search-query-parser.js index 690eefc..f190d2a 100644 --- a/lib/search-query-parser.js +++ b/lib/search-query-parser.js @@ -15,10 +15,6 @@ exports.parse = function (string, options) { string = ''; } - // Regularize white spacing - // Make in-between white spaces a unique space - string = string.trim().replace(/\s+/g, ' '); - // When a simple string, return it if (-1 === string.indexOf(':')) { return string; @@ -30,16 +26,18 @@ exports.parse = function (string, options) { // Otherwise parse the advanced query syntax else { // Our object to store the query object - var query = {text: []}; + var query = {text: [], offsets: []}; var exclusion = {}; + var terms = []; // Get a list of search terms respecting single and double quotes - var terms = string.match(/(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|\S+|\S+:\S+/g); - for (var i = 0; i < terms.length; i++) { - var sepIndex = terms[i].indexOf(':'); - if(sepIndex !== -1) { - var split = terms[i].split(':'), - key = terms[i].slice(0, sepIndex), - val = terms[i].slice(sepIndex + 1); + var regex = /(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|\S+|\S+:\S+/g; + while ((match = regex.exec(string)) !== null) { + var term = match[0]; + var sepIndex = term.indexOf(':'); + if (sepIndex !== -1) { + var split = term.split(':'), + key = term.slice(0, sepIndex), + val = term.slice(sepIndex + 1); // Strip surrounding quotes val = val.replace(/^\"|\"$|^\'|\'$/g, ''); // Strip backslashes respecting escapes @@ -55,24 +53,34 @@ exports.parse = function (string, options) { return n1; } }); - terms[i] = key + ':' + val; + terms.push({ + keyword: key, + value: val, + offsetStart: match.index, + offsetEnd: match.index + term.length + }); + } else { + terms.push({ + text: term, + offsetStart: match.index, + offsetEnd: match.index + term.length + }); } } // Reverse to ensure proper order when pop()'ing. terms.reverse(); // For each search term + var term; while (term = terms.pop()) { - // Advanced search terms syntax has key and value - // separated with a colon - var sepIdx = term.indexOf(':'); // When just a simple term - if (-1 === sepIdx) { + if (term.text) { // We add it as pure text - query.text.push(term); + query.text.push(term.text); + query.offsets.push(term); } // We got an advanced search syntax else { - var key = term.slice(0, sepIdx); + var key = term.keyword; // Check if the key is a registered keyword options.keywords = options.keywords || []; var isKeyword = false; @@ -87,12 +95,20 @@ exports.parse = function (string, options) { isExclusion = true; } } + + query.offsets.push({ + keyword: key, + value: term.value, + offsetStart: isExclusion ? term.offsetStart + 1 : term.offsetStart, + offsetEnd: term.offsetEnd + }); + // Check if the key is a registered range options.ranges = options.ranges || []; var isRange = !(-1 === options.ranges.indexOf(key)); // When the key matches a keyword if (isKeyword) { - var value = term.slice(sepIdx + 1); + var value = term.value; // When value is a thing if (value.length) { // Get an array of values when several are there @@ -173,7 +189,7 @@ exports.parse = function (string, options) { } // The key allows a range else if (isRange) { - var value = term.slice(sepIdx + 1); + var value = term.value; // Range are separated with a dash var rangeValues = value.split('-'); // When both end of the range are specified @@ -195,7 +211,7 @@ exports.parse = function (string, options) { } else { // We add it as pure text - query.text.push(term); + query.text.push(term.text); } } } diff --git a/test/test.js b/test/test.js index 529b97a..249ee9b 100644 --- a/test/test.js +++ b/test/test.js @@ -23,6 +23,12 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.not.have.property('text'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 0, + offsetEnd: 16 + }]); }); @@ -34,6 +40,20 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('text', 'hey buddy!'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 0, + offsetEnd: 16 + }, { + text: 'hey', + offsetStart: 17, + offsetEnd: 20 + }, { + text: 'buddy!', + offsetStart: 21, + offsetEnd: 27 + }]); }); @@ -45,6 +65,20 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('text', 'hey you!'); + parsedSearchQuery.should.have.property('offsets', [{ + text: 'hey', + offsetStart: 0, + offsetEnd: 3 + }, { + text: 'you!', + offsetStart: 4, + offsetEnd: 8 + }, { + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 9, + offsetEnd: 25 + }]); }); @@ -56,6 +90,24 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('text', 'hey you! pouet'); + parsedSearchQuery.should.have.property('offsets', [{ + text: 'hey', + offsetStart: 0, + offsetEnd: 3 + }, { + text: 'you!', + offsetStart: 4, + offsetEnd: 8 + }, { + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 9, + offsetEnd: 25 + }, { + text: 'pouet', + offsetStart: 26, + offsetEnd: 31 + }]); }); @@ -68,6 +120,24 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('text', 'hey you! pouet'); + parsedSearchQuery.should.have.property('offsets', [{ + text: 'hey', + offsetStart: 3, + offsetEnd: 6 + }, { + text: 'you!', + offsetStart: 11, + offsetEnd: 15 + }, { + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 16, + offsetEnd: 32 + }, { + text: 'pouet', + offsetStart: 35, + offsetEnd: 40 + }]); }); @@ -80,6 +150,37 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('to', 'bar@hey.ya'); parsedSearchQuery.should.have.property('text', 'hey, so what\'s up gents'); + parsedSearchQuery.should.have.property('offsets', [{ + text: 'hey,', + offsetStart: 0, + offsetEnd: 4 + }, { + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 5, + offsetEnd: 21 + }, { + keyword: 'to', + value: 'bar@hey.ya', + offsetStart: 22, + offsetEnd: 35 + }, { + text: 'so', + offsetStart: 36, + offsetEnd: 38 + }, { + text: 'what\'s', + offsetStart: 39, + offsetEnd: 45 + }, { + text: 'up', + offsetStart: 46, + offsetEnd: 48 + }, { + text: 'gents', + offsetStart: 49, + offsetEnd: 54 + }]); }); @@ -95,6 +196,21 @@ describe('Search query syntax parser', function () { parsedSearchQuery.from.length.should.equal(2); parsedSearchQuery.from.should.containEql('jul@foo.com'); parsedSearchQuery.from.should.containEql('bar@hey.ya'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 0, + offsetEnd: 16 + }, { + keyword: 'from', + value: 'bar@hey.ya', + offsetStart: 17, + offsetEnd: 32 + }, { + text: 'vaccationessss', + offsetStart: 33, + offsetEnd: 47 + }]); }); @@ -109,6 +225,12 @@ describe('Search query syntax parser', function () { parsedSearchQuery.from.length.should.equal(2); parsedSearchQuery.from.should.containEql('jul@foo.com'); parsedSearchQuery.from.should.containEql('bar@hey.ya'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com,bar@hey.ya', + offsetStart: 0, + offsetEnd: 27 + }]); }); @@ -126,6 +248,21 @@ describe('Search query syntax parser', function () { parsedSearchQuery.from.should.containEql('bar@hey.ya'); parsedSearchQuery.from.should.containEql('a@b.c'); parsedSearchQuery.from.should.containEql('d@e.f'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com,bar@hey.ya', + offsetStart: 0, + offsetEnd: 27 + }, { + keyword: 'from', + value: 'a@b.c,d@e.f', + offsetStart: 28, + offsetEnd: 44 + }, { + text: 'ouch!#', + offsetStart: 45, + offsetEnd: 51 + }]); }); @@ -139,6 +276,16 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.have.property('date'); parsedSearchQuery.date.should.be.an.Object; parsedSearchQuery.date.from.should.containEql('12/12/2012'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'date', + value: '12/12/2012', + offsetStart: 0, + offsetEnd: 15 + }, { + text: 'ahaha', + offsetStart: 16, + offsetEnd: 21 + }]); }); it('should parse range with 2 ends and free text', function () { @@ -152,6 +299,16 @@ describe('Search query syntax parser', function () { parsedSearchQuery.date.should.be.an.Object; parsedSearchQuery.date.from.should.containEql('12/12/2012'); parsedSearchQuery.date.to.should.containEql('01/01/2014'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'date', + value: '12/12/2012-01/01/2014', + offsetStart: 0, + offsetEnd: 26 + }, { + text: 'ahaha', + offsetStart: 27, + offsetEnd: 32 + }]); }); @@ -198,6 +355,48 @@ describe('Search query syntax parser', function () { parsedSearchQuery.to.length.should.equal(2); parsedSearchQuery.to.should.containEql('me@me.com'); parsedSearchQuery.to.should.containEql('toto@hey.co'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'date', + value: '12/12/2012-01/01/2014', + offsetStart: 3, + offsetEnd: 29 + }, { + text: 'ahaha', + offsetStart: 30, + offsetEnd: 35 + }, { + keyword: 'from', + value: 'jul@foo.com,bar@hey.ya', + offsetStart: 36, + offsetEnd: 63 + }, { + keyword: 'from', + value: 'a@b.c,d@e.f', + offsetStart: 64, + offsetEnd: 80 + }, { + text: 'ouch!#', + offsetStart: 81, + offsetEnd: 87 + }, { + keyword: 'to', + value: 'me@me.com', + offsetStart: 90, + offsetEnd: 102 + }, { + keyword: 'to', + value: 'toto@hey.co', + offsetStart: 103, + offsetEnd: 117 + }, { + text: 'about', + offsetStart: 118, + offsetEnd: 123 + }, { + text: 'that', + offsetStart: 124, + offsetEnd: 128 + }]); }); @@ -209,6 +408,17 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('name', 'Bob Saget'); parsedSearchQuery.should.have.property('description', 'Banana Sandwiche'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'name', + value: 'Bob Saget', + offsetStart: 0, + offsetEnd: 16 + }, { + keyword: 'description', + value: 'Banana Sandwiche', + offsetStart: 17, + offsetEnd: 47 + }]); }); @@ -220,6 +430,17 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('case1', 'This "is" \'a\' test'); parsedSearchQuery.should.have.property('case2', 'This "is" \'a\' test'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'case1', + value: 'This "is" \'a\' test', + offsetStart: 0, + offsetEnd: 28 + }, { + keyword: 'case2', + value: 'This "is" \'a\' test', + offsetStart: 29, + offsetEnd: 57 + }]); }); @@ -232,6 +453,12 @@ describe('Search query syntax parser', function () { parsedSearchQuery.exclude.should.be.an.Object; parsedSearchQuery.exclude.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.not.have.property('text'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 1, + offsetEnd: 17 + }]); }); it('should concatenate a keyword multiple values in exclusion syntax', function() { @@ -244,11 +471,19 @@ describe('Search query syntax parser', function () { parsedSearchQuery.exclude.from.should.containEql('jul@foo.com'); parsedSearchQuery.exclude.from.should.containEql('mar@foo.com'); parsedSearchQuery.should.not.have.property('text'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com,mar@foo.com', + offsetStart: 1, + offsetEnd: 29 + }]); }); it('should support keywords which appear multiple times with exclusion syntax', function() { var searchQuery = '-from:jul@foo.com,mar@foo.com -from:jan@foo.com'; - var options = {keywords: ['from']}; + var options = { + keywords: ['from'] + }; var parsedSearchQuery = searchquery.parse(searchQuery, options); parsedSearchQuery.should.be.an.Object; @@ -257,5 +492,16 @@ describe('Search query syntax parser', function () { parsedSearchQuery.exclude.from.should.containEql('mar@foo.com'); parsedSearchQuery.exclude.from.should.containEql('jan@foo.com'); parsedSearchQuery.should.not.have.property('text'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com,mar@foo.com', + offsetStart: 1, + offsetEnd: 29 + }, { + keyword: 'from', + value: 'jan@foo.com', + offsetStart: 31, + offsetEnd: 47 + }]); }); -}); +}); \ No newline at end of file