diff --git a/README.md b/README.md index edd75b4..e3168aa 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,13 @@ And turns it into an object like this: from: '1/10/2013', to: '15/04/2014' }, - text: 'photos' + text: 'photos', + offsets: + [ { keyword: 'from', value: 'hi@retrace.io,foo@gmail.com', offsetStart: 0, offsetEnd: 32 }, + { keyword: 'to', value: 'me', offsetStart: 33, offsetEnd: 38 }, + { keyword: 'subject', value: 'vacations', offsetStart: 39, offsetEnd: 56 }, + { keyword: 'date', value: '1/10/2013-15/04/2014', offsetStart: 57, offsetEnd: 82 }, + { text: 'photos', offsetStart: 83, offsetEnd: 89 } ] } ``` diff --git a/lib/search-query-parser.js b/lib/search-query-parser.js index d92a000..fba07e1 100644 --- a/lib/search-query-parser.js +++ b/lib/search-query-parser.js @@ -15,10 +15,6 @@ exports.parse = function (string, options) { string = ''; } - // Regularize white spacing - // Make in-between white spaces a unique space - string = string.trim().replace(/\s+/g, ' '); - // When a simple string, return it if (-1 === string.indexOf(':')) { return string; @@ -30,16 +26,19 @@ exports.parse = function (string, options) { // Otherwise parse the advanced query syntax else { // Our object to store the query object - var query = {text: []}; + var query = {text: [], offsets: []}; var exclusion = {}; + var terms = []; // Get a list of search terms respecting single and double quotes - var terms = string.match(/(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|\S+|\S+:\S+/g); - for (var i = 0; i < terms.length; i++) { - var sepIndex = terms[i].indexOf(':'); - if(sepIndex !== -1) { - var split = terms[i].split(':'), - key = terms[i].slice(0, sepIndex), - val = terms[i].slice(sepIndex + 1); + var regex = /(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|\S+|\S+:\S+/g; + var match; + while ((match = regex.exec(string)) !== null) { + var term = match[0]; + var sepIndex = term.indexOf(':'); + if (sepIndex !== -1) { + var split = term.split(':'), + key = term.slice(0, sepIndex), + val = term.slice(sepIndex + 1); // Strip surrounding quotes val = val.replace(/^\"|\"$|^\'|\'$/g, ''); // Strip backslashes respecting escapes @@ -55,7 +54,18 @@ exports.parse = function (string, options) { return n1; } }); - terms[i] = key + ':' + val; + terms.push({ + keyword: key, + value: val, + offsetStart: match.index, + offsetEnd: match.index + term.length + }); + } else { + terms.push({ + text: term, + offsetStart: match.index, + offsetEnd: match.index + term.length + }); } } // Reverse to ensure proper order when pop()'ing. @@ -63,17 +73,15 @@ exports.parse = function (string, options) { // For each search term var term; while (term = terms.pop()) { - // Advanced search terms syntax has key and value - // separated with a colon - var sepIdx = term.indexOf(':'); // When just a simple term - if (-1 === sepIdx) { + if (term.text) { // We add it as pure text - query.text.push(term); + query.text.push(term.text); + query.offsets.push(term); } // We got an advanced search syntax else { - var key = term.slice(0, sepIdx); + var key = term.keyword; // Check if the key is a registered keyword options.keywords = options.keywords || []; var isKeyword = false; @@ -88,12 +96,20 @@ exports.parse = function (string, options) { isExclusion = true; } } + // Check if the key is a registered range options.ranges = options.ranges || []; var isRange = !(-1 === options.ranges.indexOf(key)); // When the key matches a keyword if (isKeyword) { - var value = term.slice(sepIdx + 1); + query.offsets.push({ + keyword: key, + value: term.value, + offsetStart: isExclusion ? term.offsetStart + 1 : term.offsetStart, + offsetEnd: term.offsetEnd + }); + + var value = term.value; // When value is a thing if (value.length) { // Get an array of values when several are there @@ -174,7 +190,9 @@ exports.parse = function (string, options) { } // The key allows a range else if (isRange) { - var value = term.slice(sepIdx + 1); + query.offsets.push(term); + + var value = term.value; // Range are separated with a dash var rangeValues = value.split('-'); // When both end of the range are specified @@ -196,7 +214,14 @@ exports.parse = function (string, options) { } else { // We add it as pure text - query.text.push(term); + var text = term.keyword + ':' + term.value; + query.text.push(text); + + query.offsets.push({ + text: text, + offsetStart: term.offsetStart, + offsetEnd: term.offsetEnd + }); } } } diff --git a/test/test.js b/test/test.js index 529b97a..24a2013 100644 --- a/test/test.js +++ b/test/test.js @@ -23,6 +23,12 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.not.have.property('text'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 0, + offsetEnd: 16 + }]); }); @@ -34,10 +40,50 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('text', 'hey buddy!'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 0, + offsetEnd: 16 + }, { + text: 'hey', + offsetStart: 17, + offsetEnd: 20 + }, { + text: 'buddy!', + offsetStart: 21, + offsetEnd: 27 + }]); }); - it('should parse a single keyword with free text before it', function () { + it('should ignore keywords that are not specified', function() { + var searchQuery = 'test another other:jul@foo.com'; + var options = { + keywords: ['from'] + }; + var parsedSearchQuery = searchquery.parse(searchQuery, options); + + parsedSearchQuery.should.be.an.Object; + parsedSearchQuery.should.have.not.have.property('other'); + parsedSearchQuery.should.have.property('text', 'test another other:jul@foo.com'); + parsedSearchQuery.should.have.property('offsets', [{ + text: 'test', + offsetStart: 0, + offsetEnd: 4 + }, { + text: 'another', + offsetStart: 5, + offsetEnd: 12 + }, { + text: 'other:jul@foo.com', + offsetStart: 13, + offsetEnd: 30 + }]); + }); + + + it('should parse a single keyword with free text before it', function() { var searchQuery = 'hey you! from:jul@foo.com'; var options = {keywords: ['from']}; var parsedSearchQuery = searchquery.parse(searchQuery, options); @@ -45,6 +91,20 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('text', 'hey you!'); + parsedSearchQuery.should.have.property('offsets', [{ + text: 'hey', + offsetStart: 0, + offsetEnd: 3 + }, { + text: 'you!', + offsetStart: 4, + offsetEnd: 8 + }, { + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 9, + offsetEnd: 25 + }]); }); @@ -56,6 +116,24 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('text', 'hey you! pouet'); + parsedSearchQuery.should.have.property('offsets', [{ + text: 'hey', + offsetStart: 0, + offsetEnd: 3 + }, { + text: 'you!', + offsetStart: 4, + offsetEnd: 8 + }, { + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 9, + offsetEnd: 25 + }, { + text: 'pouet', + offsetStart: 26, + offsetEnd: 31 + }]); }); @@ -68,6 +146,24 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('text', 'hey you! pouet'); + parsedSearchQuery.should.have.property('offsets', [{ + text: 'hey', + offsetStart: 3, + offsetEnd: 6 + }, { + text: 'you!', + offsetStart: 11, + offsetEnd: 15 + }, { + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 16, + offsetEnd: 32 + }, { + text: 'pouet', + offsetStart: 35, + offsetEnd: 40 + }]); }); @@ -80,6 +176,37 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('to', 'bar@hey.ya'); parsedSearchQuery.should.have.property('text', 'hey, so what\'s up gents'); + parsedSearchQuery.should.have.property('offsets', [{ + text: 'hey,', + offsetStart: 0, + offsetEnd: 4 + }, { + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 5, + offsetEnd: 21 + }, { + keyword: 'to', + value: 'bar@hey.ya', + offsetStart: 22, + offsetEnd: 35 + }, { + text: 'so', + offsetStart: 36, + offsetEnd: 38 + }, { + text: 'what\'s', + offsetStart: 39, + offsetEnd: 45 + }, { + text: 'up', + offsetStart: 46, + offsetEnd: 48 + }, { + text: 'gents', + offsetStart: 49, + offsetEnd: 54 + }]); }); @@ -95,6 +222,21 @@ describe('Search query syntax parser', function () { parsedSearchQuery.from.length.should.equal(2); parsedSearchQuery.from.should.containEql('jul@foo.com'); parsedSearchQuery.from.should.containEql('bar@hey.ya'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 0, + offsetEnd: 16 + }, { + keyword: 'from', + value: 'bar@hey.ya', + offsetStart: 17, + offsetEnd: 32 + }, { + text: 'vaccationessss', + offsetStart: 33, + offsetEnd: 47 + }]); }); @@ -109,6 +251,12 @@ describe('Search query syntax parser', function () { parsedSearchQuery.from.length.should.equal(2); parsedSearchQuery.from.should.containEql('jul@foo.com'); parsedSearchQuery.from.should.containEql('bar@hey.ya'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com,bar@hey.ya', + offsetStart: 0, + offsetEnd: 27 + }]); }); @@ -126,6 +274,21 @@ describe('Search query syntax parser', function () { parsedSearchQuery.from.should.containEql('bar@hey.ya'); parsedSearchQuery.from.should.containEql('a@b.c'); parsedSearchQuery.from.should.containEql('d@e.f'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com,bar@hey.ya', + offsetStart: 0, + offsetEnd: 27 + }, { + keyword: 'from', + value: 'a@b.c,d@e.f', + offsetStart: 28, + offsetEnd: 44 + }, { + text: 'ouch!#', + offsetStart: 45, + offsetEnd: 51 + }]); }); @@ -139,6 +302,16 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.have.property('date'); parsedSearchQuery.date.should.be.an.Object; parsedSearchQuery.date.from.should.containEql('12/12/2012'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'date', + value: '12/12/2012', + offsetStart: 0, + offsetEnd: 15 + }, { + text: 'ahaha', + offsetStart: 16, + offsetEnd: 21 + }]); }); it('should parse range with 2 ends and free text', function () { @@ -152,6 +325,16 @@ describe('Search query syntax parser', function () { parsedSearchQuery.date.should.be.an.Object; parsedSearchQuery.date.from.should.containEql('12/12/2012'); parsedSearchQuery.date.to.should.containEql('01/01/2014'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'date', + value: '12/12/2012-01/01/2014', + offsetStart: 0, + offsetEnd: 26 + }, { + text: 'ahaha', + offsetStart: 27, + offsetEnd: 32 + }]); }); @@ -198,6 +381,48 @@ describe('Search query syntax parser', function () { parsedSearchQuery.to.length.should.equal(2); parsedSearchQuery.to.should.containEql('me@me.com'); parsedSearchQuery.to.should.containEql('toto@hey.co'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'date', + value: '12/12/2012-01/01/2014', + offsetStart: 3, + offsetEnd: 29 + }, { + text: 'ahaha', + offsetStart: 30, + offsetEnd: 35 + }, { + keyword: 'from', + value: 'jul@foo.com,bar@hey.ya', + offsetStart: 36, + offsetEnd: 63 + }, { + keyword: 'from', + value: 'a@b.c,d@e.f', + offsetStart: 64, + offsetEnd: 80 + }, { + text: 'ouch!#', + offsetStart: 81, + offsetEnd: 87 + }, { + keyword: 'to', + value: 'me@me.com', + offsetStart: 90, + offsetEnd: 102 + }, { + keyword: 'to', + value: 'toto@hey.co', + offsetStart: 103, + offsetEnd: 117 + }, { + text: 'about', + offsetStart: 118, + offsetEnd: 123 + }, { + text: 'that', + offsetStart: 124, + offsetEnd: 128 + }]); }); @@ -209,6 +434,17 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('name', 'Bob Saget'); parsedSearchQuery.should.have.property('description', 'Banana Sandwiche'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'name', + value: 'Bob Saget', + offsetStart: 0, + offsetEnd: 16 + }, { + keyword: 'description', + value: 'Banana Sandwiche', + offsetStart: 17, + offsetEnd: 47 + }]); }); @@ -220,6 +456,17 @@ describe('Search query syntax parser', function () { parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.have.property('case1', 'This "is" \'a\' test'); parsedSearchQuery.should.have.property('case2', 'This "is" \'a\' test'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'case1', + value: 'This "is" \'a\' test', + offsetStart: 0, + offsetEnd: 28 + }, { + keyword: 'case2', + value: 'This "is" \'a\' test', + offsetStart: 29, + offsetEnd: 57 + }]); }); @@ -232,6 +479,12 @@ describe('Search query syntax parser', function () { parsedSearchQuery.exclude.should.be.an.Object; parsedSearchQuery.exclude.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.not.have.property('text'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com', + offsetStart: 1, + offsetEnd: 17 + }]); }); it('should concatenate a keyword multiple values in exclusion syntax', function() { @@ -244,11 +497,19 @@ describe('Search query syntax parser', function () { parsedSearchQuery.exclude.from.should.containEql('jul@foo.com'); parsedSearchQuery.exclude.from.should.containEql('mar@foo.com'); parsedSearchQuery.should.not.have.property('text'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com,mar@foo.com', + offsetStart: 1, + offsetEnd: 29 + }]); }); it('should support keywords which appear multiple times with exclusion syntax', function() { var searchQuery = '-from:jul@foo.com,mar@foo.com -from:jan@foo.com'; - var options = {keywords: ['from']}; + var options = { + keywords: ['from'] + }; var parsedSearchQuery = searchquery.parse(searchQuery, options); parsedSearchQuery.should.be.an.Object; @@ -257,5 +518,16 @@ describe('Search query syntax parser', function () { parsedSearchQuery.exclude.from.should.containEql('mar@foo.com'); parsedSearchQuery.exclude.from.should.containEql('jan@foo.com'); parsedSearchQuery.should.not.have.property('text'); + parsedSearchQuery.should.have.property('offsets', [{ + keyword: 'from', + value: 'jul@foo.com,mar@foo.com', + offsetStart: 1, + offsetEnd: 29 + }, { + keyword: 'from', + value: 'jan@foo.com', + offsetStart: 31, + offsetEnd: 47 + }]); }); -}); +}); \ No newline at end of file