1
0

Merge pull request #14 from mixmaxhq/return-character-offsets

Return the character offsets of parsed search parts. The use case is …
This commit is contained in:
Julien Buty
2016-11-02 09:00:03 +08:00
committed by GitHub
3 changed files with 329 additions and 26 deletions

View File

@@ -20,7 +20,13 @@ And turns it into an object like this:
from: '1/10/2013', from: '1/10/2013',
to: '15/04/2014' to: '15/04/2014'
}, },
text: 'photos' text: 'photos',
offsets:
[ { keyword: 'from', value: 'hi@retrace.io,foo@gmail.com', offsetStart: 0, offsetEnd: 32 },
{ keyword: 'to', value: 'me', offsetStart: 33, offsetEnd: 38 },
{ keyword: 'subject', value: 'vacations', offsetStart: 39, offsetEnd: 56 },
{ keyword: 'date', value: '1/10/2013-15/04/2014', offsetStart: 57, offsetEnd: 82 },
{ text: 'photos', offsetStart: 83, offsetEnd: 89 } ]
} }
``` ```

View File

@@ -15,10 +15,6 @@ exports.parse = function (string, options) {
string = ''; string = '';
} }
// Regularize white spacing
// Make in-between white spaces a unique space
string = string.trim().replace(/\s+/g, ' ');
// When a simple string, return it // When a simple string, return it
if (-1 === string.indexOf(':')) { if (-1 === string.indexOf(':')) {
return string; return string;
@@ -30,16 +26,19 @@ exports.parse = function (string, options) {
// Otherwise parse the advanced query syntax // Otherwise parse the advanced query syntax
else { else {
// Our object to store the query object // Our object to store the query object
var query = {text: []}; var query = {text: [], offsets: []};
var exclusion = {}; var exclusion = {};
var terms = [];
// Get a list of search terms respecting single and double quotes // Get a list of search terms respecting single and double quotes
var terms = string.match(/(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|\S+|\S+:\S+/g); var regex = /(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|\S+|\S+:\S+/g;
for (var i = 0; i < terms.length; i++) { var match;
var sepIndex = terms[i].indexOf(':'); while ((match = regex.exec(string)) !== null) {
var term = match[0];
var sepIndex = term.indexOf(':');
if (sepIndex !== -1) { if (sepIndex !== -1) {
var split = terms[i].split(':'), var split = term.split(':'),
key = terms[i].slice(0, sepIndex), key = term.slice(0, sepIndex),
val = terms[i].slice(sepIndex + 1); val = term.slice(sepIndex + 1);
// Strip surrounding quotes // Strip surrounding quotes
val = val.replace(/^\"|\"$|^\'|\'$/g, ''); val = val.replace(/^\"|\"$|^\'|\'$/g, '');
// Strip backslashes respecting escapes // Strip backslashes respecting escapes
@@ -55,7 +54,18 @@ exports.parse = function (string, options) {
return n1; return n1;
} }
}); });
terms[i] = key + ':' + val; terms.push({
keyword: key,
value: val,
offsetStart: match.index,
offsetEnd: match.index + term.length
});
} else {
terms.push({
text: term,
offsetStart: match.index,
offsetEnd: match.index + term.length
});
} }
} }
// Reverse to ensure proper order when pop()'ing. // Reverse to ensure proper order when pop()'ing.
@@ -63,17 +73,15 @@ exports.parse = function (string, options) {
// For each search term // For each search term
var term; var term;
while (term = terms.pop()) { while (term = terms.pop()) {
// Advanced search terms syntax has key and value
// separated with a colon
var sepIdx = term.indexOf(':');
// When just a simple term // When just a simple term
if (-1 === sepIdx) { if (term.text) {
// We add it as pure text // We add it as pure text
query.text.push(term); query.text.push(term.text);
query.offsets.push(term);
} }
// We got an advanced search syntax // We got an advanced search syntax
else { else {
var key = term.slice(0, sepIdx); var key = term.keyword;
// Check if the key is a registered keyword // Check if the key is a registered keyword
options.keywords = options.keywords || []; options.keywords = options.keywords || [];
var isKeyword = false; var isKeyword = false;
@@ -88,12 +96,20 @@ exports.parse = function (string, options) {
isExclusion = true; isExclusion = true;
} }
} }
// Check if the key is a registered range // Check if the key is a registered range
options.ranges = options.ranges || []; options.ranges = options.ranges || [];
var isRange = !(-1 === options.ranges.indexOf(key)); var isRange = !(-1 === options.ranges.indexOf(key));
// When the key matches a keyword // When the key matches a keyword
if (isKeyword) { if (isKeyword) {
var value = term.slice(sepIdx + 1); query.offsets.push({
keyword: key,
value: term.value,
offsetStart: isExclusion ? term.offsetStart + 1 : term.offsetStart,
offsetEnd: term.offsetEnd
});
var value = term.value;
// When value is a thing // When value is a thing
if (value.length) { if (value.length) {
// Get an array of values when several are there // Get an array of values when several are there
@@ -174,7 +190,9 @@ exports.parse = function (string, options) {
} }
// The key allows a range // The key allows a range
else if (isRange) { else if (isRange) {
var value = term.slice(sepIdx + 1); query.offsets.push(term);
var value = term.value;
// Range are separated with a dash // Range are separated with a dash
var rangeValues = value.split('-'); var rangeValues = value.split('-');
// When both end of the range are specified // When both end of the range are specified
@@ -196,7 +214,14 @@ exports.parse = function (string, options) {
} }
else { else {
// We add it as pure text // We add it as pure text
query.text.push(term); var text = term.keyword + ':' + term.value;
query.text.push(text);
query.offsets.push({
text: text,
offsetStart: term.offsetStart,
offsetEnd: term.offsetEnd
});
} }
} }
} }

View File

@@ -23,6 +23,12 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('from', 'jul@foo.com');
parsedSearchQuery.should.not.have.property('text'); parsedSearchQuery.should.not.have.property('text');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'from',
value: 'jul@foo.com',
offsetStart: 0,
offsetEnd: 16
}]);
}); });
@@ -34,6 +40,46 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('from', 'jul@foo.com');
parsedSearchQuery.should.have.property('text', 'hey buddy!'); parsedSearchQuery.should.have.property('text', 'hey buddy!');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'from',
value: 'jul@foo.com',
offsetStart: 0,
offsetEnd: 16
}, {
text: 'hey',
offsetStart: 17,
offsetEnd: 20
}, {
text: 'buddy!',
offsetStart: 21,
offsetEnd: 27
}]);
});
it('should ignore keywords that are not specified', function() {
var searchQuery = 'test another other:jul@foo.com';
var options = {
keywords: ['from']
};
var parsedSearchQuery = searchquery.parse(searchQuery, options);
parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.not.have.property('other');
parsedSearchQuery.should.have.property('text', 'test another other:jul@foo.com');
parsedSearchQuery.should.have.property('offsets', [{
text: 'test',
offsetStart: 0,
offsetEnd: 4
}, {
text: 'another',
offsetStart: 5,
offsetEnd: 12
}, {
text: 'other:jul@foo.com',
offsetStart: 13,
offsetEnd: 30
}]);
}); });
@@ -45,6 +91,20 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('from', 'jul@foo.com');
parsedSearchQuery.should.have.property('text', 'hey you!'); parsedSearchQuery.should.have.property('text', 'hey you!');
parsedSearchQuery.should.have.property('offsets', [{
text: 'hey',
offsetStart: 0,
offsetEnd: 3
}, {
text: 'you!',
offsetStart: 4,
offsetEnd: 8
}, {
keyword: 'from',
value: 'jul@foo.com',
offsetStart: 9,
offsetEnd: 25
}]);
}); });
@@ -56,6 +116,24 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('from', 'jul@foo.com');
parsedSearchQuery.should.have.property('text', 'hey you! pouet'); parsedSearchQuery.should.have.property('text', 'hey you! pouet');
parsedSearchQuery.should.have.property('offsets', [{
text: 'hey',
offsetStart: 0,
offsetEnd: 3
}, {
text: 'you!',
offsetStart: 4,
offsetEnd: 8
}, {
keyword: 'from',
value: 'jul@foo.com',
offsetStart: 9,
offsetEnd: 25
}, {
text: 'pouet',
offsetStart: 26,
offsetEnd: 31
}]);
}); });
@@ -68,6 +146,24 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('from', 'jul@foo.com');
parsedSearchQuery.should.have.property('text', 'hey you! pouet'); parsedSearchQuery.should.have.property('text', 'hey you! pouet');
parsedSearchQuery.should.have.property('offsets', [{
text: 'hey',
offsetStart: 3,
offsetEnd: 6
}, {
text: 'you!',
offsetStart: 11,
offsetEnd: 15
}, {
keyword: 'from',
value: 'jul@foo.com',
offsetStart: 16,
offsetEnd: 32
}, {
text: 'pouet',
offsetStart: 35,
offsetEnd: 40
}]);
}); });
@@ -80,6 +176,37 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.should.have.property('from', 'jul@foo.com');
parsedSearchQuery.should.have.property('to', 'bar@hey.ya'); parsedSearchQuery.should.have.property('to', 'bar@hey.ya');
parsedSearchQuery.should.have.property('text', 'hey, so what\'s up gents'); parsedSearchQuery.should.have.property('text', 'hey, so what\'s up gents');
parsedSearchQuery.should.have.property('offsets', [{
text: 'hey,',
offsetStart: 0,
offsetEnd: 4
}, {
keyword: 'from',
value: 'jul@foo.com',
offsetStart: 5,
offsetEnd: 21
}, {
keyword: 'to',
value: 'bar@hey.ya',
offsetStart: 22,
offsetEnd: 35
}, {
text: 'so',
offsetStart: 36,
offsetEnd: 38
}, {
text: 'what\'s',
offsetStart: 39,
offsetEnd: 45
}, {
text: 'up',
offsetStart: 46,
offsetEnd: 48
}, {
text: 'gents',
offsetStart: 49,
offsetEnd: 54
}]);
}); });
@@ -95,6 +222,21 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.from.length.should.equal(2); parsedSearchQuery.from.length.should.equal(2);
parsedSearchQuery.from.should.containEql('jul@foo.com'); parsedSearchQuery.from.should.containEql('jul@foo.com');
parsedSearchQuery.from.should.containEql('bar@hey.ya'); parsedSearchQuery.from.should.containEql('bar@hey.ya');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'from',
value: 'jul@foo.com',
offsetStart: 0,
offsetEnd: 16
}, {
keyword: 'from',
value: 'bar@hey.ya',
offsetStart: 17,
offsetEnd: 32
}, {
text: 'vaccationessss',
offsetStart: 33,
offsetEnd: 47
}]);
}); });
@@ -109,6 +251,12 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.from.length.should.equal(2); parsedSearchQuery.from.length.should.equal(2);
parsedSearchQuery.from.should.containEql('jul@foo.com'); parsedSearchQuery.from.should.containEql('jul@foo.com');
parsedSearchQuery.from.should.containEql('bar@hey.ya'); parsedSearchQuery.from.should.containEql('bar@hey.ya');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'from',
value: 'jul@foo.com,bar@hey.ya',
offsetStart: 0,
offsetEnd: 27
}]);
}); });
@@ -126,6 +274,21 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.from.should.containEql('bar@hey.ya'); parsedSearchQuery.from.should.containEql('bar@hey.ya');
parsedSearchQuery.from.should.containEql('a@b.c'); parsedSearchQuery.from.should.containEql('a@b.c');
parsedSearchQuery.from.should.containEql('d@e.f'); parsedSearchQuery.from.should.containEql('d@e.f');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'from',
value: 'jul@foo.com,bar@hey.ya',
offsetStart: 0,
offsetEnd: 27
}, {
keyword: 'from',
value: 'a@b.c,d@e.f',
offsetStart: 28,
offsetEnd: 44
}, {
text: 'ouch!#',
offsetStart: 45,
offsetEnd: 51
}]);
}); });
@@ -139,6 +302,16 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.should.have.property('date'); parsedSearchQuery.should.have.property('date');
parsedSearchQuery.date.should.be.an.Object; parsedSearchQuery.date.should.be.an.Object;
parsedSearchQuery.date.from.should.containEql('12/12/2012'); parsedSearchQuery.date.from.should.containEql('12/12/2012');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'date',
value: '12/12/2012',
offsetStart: 0,
offsetEnd: 15
}, {
text: 'ahaha',
offsetStart: 16,
offsetEnd: 21
}]);
}); });
it('should parse range with 2 ends and free text', function () { it('should parse range with 2 ends and free text', function () {
@@ -152,6 +325,16 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.date.should.be.an.Object; parsedSearchQuery.date.should.be.an.Object;
parsedSearchQuery.date.from.should.containEql('12/12/2012'); parsedSearchQuery.date.from.should.containEql('12/12/2012');
parsedSearchQuery.date.to.should.containEql('01/01/2014'); parsedSearchQuery.date.to.should.containEql('01/01/2014');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'date',
value: '12/12/2012-01/01/2014',
offsetStart: 0,
offsetEnd: 26
}, {
text: 'ahaha',
offsetStart: 27,
offsetEnd: 32
}]);
}); });
@@ -198,6 +381,48 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.to.length.should.equal(2); parsedSearchQuery.to.length.should.equal(2);
parsedSearchQuery.to.should.containEql('me@me.com'); parsedSearchQuery.to.should.containEql('me@me.com');
parsedSearchQuery.to.should.containEql('toto@hey.co'); parsedSearchQuery.to.should.containEql('toto@hey.co');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'date',
value: '12/12/2012-01/01/2014',
offsetStart: 3,
offsetEnd: 29
}, {
text: 'ahaha',
offsetStart: 30,
offsetEnd: 35
}, {
keyword: 'from',
value: 'jul@foo.com,bar@hey.ya',
offsetStart: 36,
offsetEnd: 63
}, {
keyword: 'from',
value: 'a@b.c,d@e.f',
offsetStart: 64,
offsetEnd: 80
}, {
text: 'ouch!#',
offsetStart: 81,
offsetEnd: 87
}, {
keyword: 'to',
value: 'me@me.com',
offsetStart: 90,
offsetEnd: 102
}, {
keyword: 'to',
value: 'toto@hey.co',
offsetStart: 103,
offsetEnd: 117
}, {
text: 'about',
offsetStart: 118,
offsetEnd: 123
}, {
text: 'that',
offsetStart: 124,
offsetEnd: 128
}]);
}); });
@@ -209,6 +434,17 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.property('name', 'Bob Saget'); parsedSearchQuery.should.have.property('name', 'Bob Saget');
parsedSearchQuery.should.have.property('description', 'Banana Sandwiche'); parsedSearchQuery.should.have.property('description', 'Banana Sandwiche');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'name',
value: 'Bob Saget',
offsetStart: 0,
offsetEnd: 16
}, {
keyword: 'description',
value: 'Banana Sandwiche',
offsetStart: 17,
offsetEnd: 47
}]);
}); });
@@ -220,6 +456,17 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.property('case1', 'This "is" \'a\' test'); parsedSearchQuery.should.have.property('case1', 'This "is" \'a\' test');
parsedSearchQuery.should.have.property('case2', 'This "is" \'a\' test'); parsedSearchQuery.should.have.property('case2', 'This "is" \'a\' test');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'case1',
value: 'This "is" \'a\' test',
offsetStart: 0,
offsetEnd: 28
}, {
keyword: 'case2',
value: 'This "is" \'a\' test',
offsetStart: 29,
offsetEnd: 57
}]);
}); });
@@ -232,6 +479,12 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.exclude.should.be.an.Object; parsedSearchQuery.exclude.should.be.an.Object;
parsedSearchQuery.exclude.should.have.property('from', 'jul@foo.com'); parsedSearchQuery.exclude.should.have.property('from', 'jul@foo.com');
parsedSearchQuery.should.not.have.property('text'); parsedSearchQuery.should.not.have.property('text');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'from',
value: 'jul@foo.com',
offsetStart: 1,
offsetEnd: 17
}]);
}); });
it('should concatenate a keyword multiple values in exclusion syntax', function() { it('should concatenate a keyword multiple values in exclusion syntax', function() {
@@ -244,11 +497,19 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.exclude.from.should.containEql('jul@foo.com'); parsedSearchQuery.exclude.from.should.containEql('jul@foo.com');
parsedSearchQuery.exclude.from.should.containEql('mar@foo.com'); parsedSearchQuery.exclude.from.should.containEql('mar@foo.com');
parsedSearchQuery.should.not.have.property('text'); parsedSearchQuery.should.not.have.property('text');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'from',
value: 'jul@foo.com,mar@foo.com',
offsetStart: 1,
offsetEnd: 29
}]);
}); });
it('should support keywords which appear multiple times with exclusion syntax', function() { it('should support keywords which appear multiple times with exclusion syntax', function() {
var searchQuery = '-from:jul@foo.com,mar@foo.com -from:jan@foo.com'; var searchQuery = '-from:jul@foo.com,mar@foo.com -from:jan@foo.com';
var options = {keywords: ['from']}; var options = {
keywords: ['from']
};
var parsedSearchQuery = searchquery.parse(searchQuery, options); var parsedSearchQuery = searchquery.parse(searchQuery, options);
parsedSearchQuery.should.be.an.Object; parsedSearchQuery.should.be.an.Object;
@@ -257,5 +518,16 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.exclude.from.should.containEql('mar@foo.com'); parsedSearchQuery.exclude.from.should.containEql('mar@foo.com');
parsedSearchQuery.exclude.from.should.containEql('jan@foo.com'); parsedSearchQuery.exclude.from.should.containEql('jan@foo.com');
parsedSearchQuery.should.not.have.property('text'); parsedSearchQuery.should.not.have.property('text');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'from',
value: 'jul@foo.com,mar@foo.com',
offsetStart: 1,
offsetEnd: 29
}, {
keyword: 'from',
value: 'jan@foo.com',
offsetStart: 31,
offsetEnd: 47
}]);
}); });
}); });