You've already forked search-query-parser
Merge pull request #14 from mixmaxhq/return-character-offsets
Return the character offsets of parsed search parts. The use case is …
This commit is contained in:
@@ -20,7 +20,13 @@ And turns it into an object like this:
|
||||
from: '1/10/2013',
|
||||
to: '15/04/2014'
|
||||
},
|
||||
text: 'photos'
|
||||
text: 'photos',
|
||||
offsets:
|
||||
[ { keyword: 'from', value: 'hi@retrace.io,foo@gmail.com', offsetStart: 0, offsetEnd: 32 },
|
||||
{ keyword: 'to', value: 'me', offsetStart: 33, offsetEnd: 38 },
|
||||
{ keyword: 'subject', value: 'vacations', offsetStart: 39, offsetEnd: 56 },
|
||||
{ keyword: 'date', value: '1/10/2013-15/04/2014', offsetStart: 57, offsetEnd: 82 },
|
||||
{ text: 'photos', offsetStart: 83, offsetEnd: 89 } ]
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
@@ -15,10 +15,6 @@ exports.parse = function (string, options) {
|
||||
string = '';
|
||||
}
|
||||
|
||||
// Regularize white spacing
|
||||
// Make in-between white spaces a unique space
|
||||
string = string.trim().replace(/\s+/g, ' ');
|
||||
|
||||
// When a simple string, return it
|
||||
if (-1 === string.indexOf(':')) {
|
||||
return string;
|
||||
@@ -30,16 +26,19 @@ exports.parse = function (string, options) {
|
||||
// Otherwise parse the advanced query syntax
|
||||
else {
|
||||
// Our object to store the query object
|
||||
var query = {text: []};
|
||||
var query = {text: [], offsets: []};
|
||||
var exclusion = {};
|
||||
var terms = [];
|
||||
// Get a list of search terms respecting single and double quotes
|
||||
var terms = string.match(/(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|\S+|\S+:\S+/g);
|
||||
for (var i = 0; i < terms.length; i++) {
|
||||
var sepIndex = terms[i].indexOf(':');
|
||||
if(sepIndex !== -1) {
|
||||
var split = terms[i].split(':'),
|
||||
key = terms[i].slice(0, sepIndex),
|
||||
val = terms[i].slice(sepIndex + 1);
|
||||
var regex = /(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|\S+|\S+:\S+/g;
|
||||
var match;
|
||||
while ((match = regex.exec(string)) !== null) {
|
||||
var term = match[0];
|
||||
var sepIndex = term.indexOf(':');
|
||||
if (sepIndex !== -1) {
|
||||
var split = term.split(':'),
|
||||
key = term.slice(0, sepIndex),
|
||||
val = term.slice(sepIndex + 1);
|
||||
// Strip surrounding quotes
|
||||
val = val.replace(/^\"|\"$|^\'|\'$/g, '');
|
||||
// Strip backslashes respecting escapes
|
||||
@@ -55,7 +54,18 @@ exports.parse = function (string, options) {
|
||||
return n1;
|
||||
}
|
||||
});
|
||||
terms[i] = key + ':' + val;
|
||||
terms.push({
|
||||
keyword: key,
|
||||
value: val,
|
||||
offsetStart: match.index,
|
||||
offsetEnd: match.index + term.length
|
||||
});
|
||||
} else {
|
||||
terms.push({
|
||||
text: term,
|
||||
offsetStart: match.index,
|
||||
offsetEnd: match.index + term.length
|
||||
});
|
||||
}
|
||||
}
|
||||
// Reverse to ensure proper order when pop()'ing.
|
||||
@@ -63,17 +73,15 @@ exports.parse = function (string, options) {
|
||||
// For each search term
|
||||
var term;
|
||||
while (term = terms.pop()) {
|
||||
// Advanced search terms syntax has key and value
|
||||
// separated with a colon
|
||||
var sepIdx = term.indexOf(':');
|
||||
// When just a simple term
|
||||
if (-1 === sepIdx) {
|
||||
if (term.text) {
|
||||
// We add it as pure text
|
||||
query.text.push(term);
|
||||
query.text.push(term.text);
|
||||
query.offsets.push(term);
|
||||
}
|
||||
// We got an advanced search syntax
|
||||
else {
|
||||
var key = term.slice(0, sepIdx);
|
||||
var key = term.keyword;
|
||||
// Check if the key is a registered keyword
|
||||
options.keywords = options.keywords || [];
|
||||
var isKeyword = false;
|
||||
@@ -88,12 +96,20 @@ exports.parse = function (string, options) {
|
||||
isExclusion = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if the key is a registered range
|
||||
options.ranges = options.ranges || [];
|
||||
var isRange = !(-1 === options.ranges.indexOf(key));
|
||||
// When the key matches a keyword
|
||||
if (isKeyword) {
|
||||
var value = term.slice(sepIdx + 1);
|
||||
query.offsets.push({
|
||||
keyword: key,
|
||||
value: term.value,
|
||||
offsetStart: isExclusion ? term.offsetStart + 1 : term.offsetStart,
|
||||
offsetEnd: term.offsetEnd
|
||||
});
|
||||
|
||||
var value = term.value;
|
||||
// When value is a thing
|
||||
if (value.length) {
|
||||
// Get an array of values when several are there
|
||||
@@ -174,7 +190,9 @@ exports.parse = function (string, options) {
|
||||
}
|
||||
// The key allows a range
|
||||
else if (isRange) {
|
||||
var value = term.slice(sepIdx + 1);
|
||||
query.offsets.push(term);
|
||||
|
||||
var value = term.value;
|
||||
// Range are separated with a dash
|
||||
var rangeValues = value.split('-');
|
||||
// When both end of the range are specified
|
||||
@@ -196,7 +214,14 @@ exports.parse = function (string, options) {
|
||||
}
|
||||
else {
|
||||
// We add it as pure text
|
||||
query.text.push(term);
|
||||
var text = term.keyword + ':' + term.value;
|
||||
query.text.push(text);
|
||||
|
||||
query.offsets.push({
|
||||
text: text,
|
||||
offsetStart: term.offsetStart,
|
||||
offsetEnd: term.offsetEnd
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
276
test/test.js
276
test/test.js
@@ -23,6 +23,12 @@ describe('Search query syntax parser', function () {
|
||||
parsedSearchQuery.should.be.an.Object;
|
||||
parsedSearchQuery.should.have.property('from', 'jul@foo.com');
|
||||
parsedSearchQuery.should.not.have.property('text');
|
||||
parsedSearchQuery.should.have.property('offsets', [{
|
||||
keyword: 'from',
|
||||
value: 'jul@foo.com',
|
||||
offsetStart: 0,
|
||||
offsetEnd: 16
|
||||
}]);
|
||||
});
|
||||
|
||||
|
||||
@@ -34,10 +40,50 @@ describe('Search query syntax parser', function () {
|
||||
parsedSearchQuery.should.be.an.Object;
|
||||
parsedSearchQuery.should.have.property('from', 'jul@foo.com');
|
||||
parsedSearchQuery.should.have.property('text', 'hey buddy!');
|
||||
parsedSearchQuery.should.have.property('offsets', [{
|
||||
keyword: 'from',
|
||||
value: 'jul@foo.com',
|
||||
offsetStart: 0,
|
||||
offsetEnd: 16
|
||||
}, {
|
||||
text: 'hey',
|
||||
offsetStart: 17,
|
||||
offsetEnd: 20
|
||||
}, {
|
||||
text: 'buddy!',
|
||||
offsetStart: 21,
|
||||
offsetEnd: 27
|
||||
}]);
|
||||
});
|
||||
|
||||
|
||||
it('should parse a single keyword with free text before it', function () {
|
||||
it('should ignore keywords that are not specified', function() {
|
||||
var searchQuery = 'test another other:jul@foo.com';
|
||||
var options = {
|
||||
keywords: ['from']
|
||||
};
|
||||
var parsedSearchQuery = searchquery.parse(searchQuery, options);
|
||||
|
||||
parsedSearchQuery.should.be.an.Object;
|
||||
parsedSearchQuery.should.have.not.have.property('other');
|
||||
parsedSearchQuery.should.have.property('text', 'test another other:jul@foo.com');
|
||||
parsedSearchQuery.should.have.property('offsets', [{
|
||||
text: 'test',
|
||||
offsetStart: 0,
|
||||
offsetEnd: 4
|
||||
}, {
|
||||
text: 'another',
|
||||
offsetStart: 5,
|
||||
offsetEnd: 12
|
||||
}, {
|
||||
text: 'other:jul@foo.com',
|
||||
offsetStart: 13,
|
||||
offsetEnd: 30
|
||||
}]);
|
||||
});
|
||||
|
||||
|
||||
it('should parse a single keyword with free text before it', function() {
|
||||
var searchQuery = 'hey you! from:jul@foo.com';
|
||||
var options = {keywords: ['from']};
|
||||
var parsedSearchQuery = searchquery.parse(searchQuery, options);
|
||||
@@ -45,6 +91,20 @@ describe('Search query syntax parser', function () {
|
||||
parsedSearchQuery.should.be.an.Object;
|
||||
parsedSearchQuery.should.have.property('from', 'jul@foo.com');
|
||||
parsedSearchQuery.should.have.property('text', 'hey you!');
|
||||
parsedSearchQuery.should.have.property('offsets', [{
|
||||
text: 'hey',
|
||||
offsetStart: 0,
|
||||
offsetEnd: 3
|
||||
}, {
|
||||
text: 'you!',
|
||||
offsetStart: 4,
|
||||
offsetEnd: 8
|
||||
}, {
|
||||
keyword: 'from',
|
||||
value: 'jul@foo.com',
|
||||
offsetStart: 9,
|
||||
offsetEnd: 25
|
||||
}]);
|
||||
});
|
||||
|
||||
|
||||
@@ -56,6 +116,24 @@ describe('Search query syntax parser', function () {
|
||||
parsedSearchQuery.should.be.an.Object;
|
||||
parsedSearchQuery.should.have.property('from', 'jul@foo.com');
|
||||
parsedSearchQuery.should.have.property('text', 'hey you! pouet');
|
||||
parsedSearchQuery.should.have.property('offsets', [{
|
||||
text: 'hey',
|
||||
offsetStart: 0,
|
||||
offsetEnd: 3
|
||||
}, {
|
||||
text: 'you!',
|
||||
offsetStart: 4,
|
||||
offsetEnd: 8
|
||||
}, {
|
||||
keyword: 'from',
|
||||
value: 'jul@foo.com',
|
||||
offsetStart: 9,
|
||||
offsetEnd: 25
|
||||
}, {
|
||||
text: 'pouet',
|
||||
offsetStart: 26,
|
||||
offsetEnd: 31
|
||||
}]);
|
||||
});
|
||||
|
||||
|
||||
@@ -68,6 +146,24 @@ describe('Search query syntax parser', function () {
|
||||
parsedSearchQuery.should.be.an.Object;
|
||||
parsedSearchQuery.should.have.property('from', 'jul@foo.com');
|
||||
parsedSearchQuery.should.have.property('text', 'hey you! pouet');
|
||||
parsedSearchQuery.should.have.property('offsets', [{
|
||||
text: 'hey',
|
||||
offsetStart: 3,
|
||||
offsetEnd: 6
|
||||
}, {
|
||||
text: 'you!',
|
||||
offsetStart: 11,
|
||||
offsetEnd: 15
|
||||
}, {
|
||||
keyword: 'from',
|
||||
value: 'jul@foo.com',
|
||||
offsetStart: 16,
|
||||
offsetEnd: 32
|
||||
}, {
|
||||
text: 'pouet',
|
||||
offsetStart: 35,
|
||||
offsetEnd: 40
|
||||
}]);
|
||||
});
|
||||
|
||||
|
||||
@@ -80,6 +176,37 @@ describe('Search query syntax parser', function () {
|
||||
parsedSearchQuery.should.have.property('from', 'jul@foo.com');
|
||||
parsedSearchQuery.should.have.property('to', 'bar@hey.ya');
|
||||
parsedSearchQuery.should.have.property('text', 'hey, so what\'s up gents');
|
||||
parsedSearchQuery.should.have.property('offsets', [{
|
||||
text: 'hey,',
|
||||
offsetStart: 0,
|
||||
offsetEnd: 4
|
||||
}, {
|
||||
keyword: 'from',
|
||||
value: 'jul@foo.com',
|
||||
offsetStart: 5,
|
||||
offsetEnd: 21
|
||||
}, {
|
||||
keyword: 'to',
|
||||
value: 'bar@hey.ya',
|
||||
offsetStart: 22,
|
||||
offsetEnd: 35
|
||||
}, {
|
||||
text: 'so',
|
||||
offsetStart: 36,
|
||||
offsetEnd: 38
|
||||
}, {
|
||||
text: 'what\'s',
|
||||
offsetStart: 39,
|
||||
offsetEnd: 45
|
||||
}, {
|
||||
text: 'up',
|
||||
offsetStart: 46,
|
||||
offsetEnd: 48
|
||||
}, {
|
||||
text: 'gents',
|
||||
offsetStart: 49,
|
||||
offsetEnd: 54
|
||||
}]);
|
||||
});
|
||||
|
||||
|
||||
@@ -95,6 +222,21 @@ describe('Search query syntax parser', function () {
|
||||
parsedSearchQuery.from.length.should.equal(2);
|
||||
parsedSearchQuery.from.should.containEql('jul@foo.com');
|
||||
parsedSearchQuery.from.should.containEql('bar@hey.ya');
|
||||
parsedSearchQuery.should.have.property('offsets', [{
|
||||
keyword: 'from',
|
||||
value: 'jul@foo.com',
|
||||
offsetStart: 0,
|
||||
offsetEnd: 16
|
||||
}, {
|
||||
keyword: 'from',
|
||||
value: 'bar@hey.ya',
|
||||
offsetStart: 17,
|
||||
offsetEnd: 32
|
||||
}, {
|
||||
text: 'vaccationessss',
|
||||
offsetStart: 33,
|
||||
offsetEnd: 47
|
||||
}]);
|
||||
});
|
||||
|
||||
|
||||
@@ -109,6 +251,12 @@ describe('Search query syntax parser', function () {
|
||||
parsedSearchQuery.from.length.should.equal(2);
|
||||
parsedSearchQuery.from.should.containEql('jul@foo.com');
|
||||
parsedSearchQuery.from.should.containEql('bar@hey.ya');
|
||||
parsedSearchQuery.should.have.property('offsets', [{
|
||||
keyword: 'from',
|
||||
value: 'jul@foo.com,bar@hey.ya',
|
||||
offsetStart: 0,
|
||||
offsetEnd: 27
|
||||
}]);
|
||||
});
|
||||
|
||||
|
||||
@@ -126,6 +274,21 @@ describe('Search query syntax parser', function () {
|
||||
parsedSearchQuery.from.should.containEql('bar@hey.ya');
|
||||
parsedSearchQuery.from.should.containEql('a@b.c');
|
||||
parsedSearchQuery.from.should.containEql('d@e.f');
|
||||
parsedSearchQuery.should.have.property('offsets', [{
|
||||
keyword: 'from',
|
||||
value: 'jul@foo.com,bar@hey.ya',
|
||||
offsetStart: 0,
|
||||
offsetEnd: 27
|
||||
}, {
|
||||
keyword: 'from',
|
||||
value: 'a@b.c,d@e.f',
|
||||
offsetStart: 28,
|
||||
offsetEnd: 44
|
||||
}, {
|
||||
text: 'ouch!#',
|
||||
offsetStart: 45,
|
||||
offsetEnd: 51
|
||||
}]);
|
||||
});
|
||||
|
||||
|
||||
@@ -139,6 +302,16 @@ describe('Search query syntax parser', function () {
|
||||
parsedSearchQuery.should.have.property('date');
|
||||
parsedSearchQuery.date.should.be.an.Object;
|
||||
parsedSearchQuery.date.from.should.containEql('12/12/2012');
|
||||
parsedSearchQuery.should.have.property('offsets', [{
|
||||
keyword: 'date',
|
||||
value: '12/12/2012',
|
||||
offsetStart: 0,
|
||||
offsetEnd: 15
|
||||
}, {
|
||||
text: 'ahaha',
|
||||
offsetStart: 16,
|
||||
offsetEnd: 21
|
||||
}]);
|
||||
});
|
||||
|
||||
it('should parse range with 2 ends and free text', function () {
|
||||
@@ -152,6 +325,16 @@ describe('Search query syntax parser', function () {
|
||||
parsedSearchQuery.date.should.be.an.Object;
|
||||
parsedSearchQuery.date.from.should.containEql('12/12/2012');
|
||||
parsedSearchQuery.date.to.should.containEql('01/01/2014');
|
||||
parsedSearchQuery.should.have.property('offsets', [{
|
||||
keyword: 'date',
|
||||
value: '12/12/2012-01/01/2014',
|
||||
offsetStart: 0,
|
||||
offsetEnd: 26
|
||||
}, {
|
||||
text: 'ahaha',
|
||||
offsetStart: 27,
|
||||
offsetEnd: 32
|
||||
}]);
|
||||
});
|
||||
|
||||
|
||||
@@ -198,6 +381,48 @@ describe('Search query syntax parser', function () {
|
||||
parsedSearchQuery.to.length.should.equal(2);
|
||||
parsedSearchQuery.to.should.containEql('me@me.com');
|
||||
parsedSearchQuery.to.should.containEql('toto@hey.co');
|
||||
parsedSearchQuery.should.have.property('offsets', [{
|
||||
keyword: 'date',
|
||||
value: '12/12/2012-01/01/2014',
|
||||
offsetStart: 3,
|
||||
offsetEnd: 29
|
||||
}, {
|
||||
text: 'ahaha',
|
||||
offsetStart: 30,
|
||||
offsetEnd: 35
|
||||
}, {
|
||||
keyword: 'from',
|
||||
value: 'jul@foo.com,bar@hey.ya',
|
||||
offsetStart: 36,
|
||||
offsetEnd: 63
|
||||
}, {
|
||||
keyword: 'from',
|
||||
value: 'a@b.c,d@e.f',
|
||||
offsetStart: 64,
|
||||
offsetEnd: 80
|
||||
}, {
|
||||
text: 'ouch!#',
|
||||
offsetStart: 81,
|
||||
offsetEnd: 87
|
||||
}, {
|
||||
keyword: 'to',
|
||||
value: 'me@me.com',
|
||||
offsetStart: 90,
|
||||
offsetEnd: 102
|
||||
}, {
|
||||
keyword: 'to',
|
||||
value: 'toto@hey.co',
|
||||
offsetStart: 103,
|
||||
offsetEnd: 117
|
||||
}, {
|
||||
text: 'about',
|
||||
offsetStart: 118,
|
||||
offsetEnd: 123
|
||||
}, {
|
||||
text: 'that',
|
||||
offsetStart: 124,
|
||||
offsetEnd: 128
|
||||
}]);
|
||||
});
|
||||
|
||||
|
||||
@@ -209,6 +434,17 @@ describe('Search query syntax parser', function () {
|
||||
parsedSearchQuery.should.be.an.Object;
|
||||
parsedSearchQuery.should.have.property('name', 'Bob Saget');
|
||||
parsedSearchQuery.should.have.property('description', 'Banana Sandwiche');
|
||||
parsedSearchQuery.should.have.property('offsets', [{
|
||||
keyword: 'name',
|
||||
value: 'Bob Saget',
|
||||
offsetStart: 0,
|
||||
offsetEnd: 16
|
||||
}, {
|
||||
keyword: 'description',
|
||||
value: 'Banana Sandwiche',
|
||||
offsetStart: 17,
|
||||
offsetEnd: 47
|
||||
}]);
|
||||
});
|
||||
|
||||
|
||||
@@ -220,6 +456,17 @@ describe('Search query syntax parser', function () {
|
||||
parsedSearchQuery.should.be.an.Object;
|
||||
parsedSearchQuery.should.have.property('case1', 'This "is" \'a\' test');
|
||||
parsedSearchQuery.should.have.property('case2', 'This "is" \'a\' test');
|
||||
parsedSearchQuery.should.have.property('offsets', [{
|
||||
keyword: 'case1',
|
||||
value: 'This "is" \'a\' test',
|
||||
offsetStart: 0,
|
||||
offsetEnd: 28
|
||||
}, {
|
||||
keyword: 'case2',
|
||||
value: 'This "is" \'a\' test',
|
||||
offsetStart: 29,
|
||||
offsetEnd: 57
|
||||
}]);
|
||||
});
|
||||
|
||||
|
||||
@@ -232,6 +479,12 @@ describe('Search query syntax parser', function () {
|
||||
parsedSearchQuery.exclude.should.be.an.Object;
|
||||
parsedSearchQuery.exclude.should.have.property('from', 'jul@foo.com');
|
||||
parsedSearchQuery.should.not.have.property('text');
|
||||
parsedSearchQuery.should.have.property('offsets', [{
|
||||
keyword: 'from',
|
||||
value: 'jul@foo.com',
|
||||
offsetStart: 1,
|
||||
offsetEnd: 17
|
||||
}]);
|
||||
});
|
||||
|
||||
it('should concatenate a keyword multiple values in exclusion syntax', function() {
|
||||
@@ -244,11 +497,19 @@ describe('Search query syntax parser', function () {
|
||||
parsedSearchQuery.exclude.from.should.containEql('jul@foo.com');
|
||||
parsedSearchQuery.exclude.from.should.containEql('mar@foo.com');
|
||||
parsedSearchQuery.should.not.have.property('text');
|
||||
parsedSearchQuery.should.have.property('offsets', [{
|
||||
keyword: 'from',
|
||||
value: 'jul@foo.com,mar@foo.com',
|
||||
offsetStart: 1,
|
||||
offsetEnd: 29
|
||||
}]);
|
||||
});
|
||||
|
||||
it('should support keywords which appear multiple times with exclusion syntax', function() {
|
||||
var searchQuery = '-from:jul@foo.com,mar@foo.com -from:jan@foo.com';
|
||||
var options = {keywords: ['from']};
|
||||
var options = {
|
||||
keywords: ['from']
|
||||
};
|
||||
var parsedSearchQuery = searchquery.parse(searchQuery, options);
|
||||
|
||||
parsedSearchQuery.should.be.an.Object;
|
||||
@@ -257,5 +518,16 @@ describe('Search query syntax parser', function () {
|
||||
parsedSearchQuery.exclude.from.should.containEql('mar@foo.com');
|
||||
parsedSearchQuery.exclude.from.should.containEql('jan@foo.com');
|
||||
parsedSearchQuery.should.not.have.property('text');
|
||||
parsedSearchQuery.should.have.property('offsets', [{
|
||||
keyword: 'from',
|
||||
value: 'jul@foo.com,mar@foo.com',
|
||||
offsetStart: 1,
|
||||
offsetEnd: 29
|
||||
}, {
|
||||
keyword: 'from',
|
||||
value: 'jan@foo.com',
|
||||
offsetStart: 31,
|
||||
offsetEnd: 47
|
||||
}]);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user