1
0

Merge pull request #14 from mixmaxhq/return-character-offsets

Return the character offsets of parsed search parts. The use case is …
This commit is contained in:
Julien Buty
2016-11-02 09:00:03 +08:00
committed by GitHub
3 changed files with 329 additions and 26 deletions

View File

@@ -20,7 +20,13 @@ And turns it into an object like this:
from: '1/10/2013',
to: '15/04/2014'
},
text: 'photos'
text: 'photos',
offsets:
[ { keyword: 'from', value: 'hi@retrace.io,foo@gmail.com', offsetStart: 0, offsetEnd: 32 },
{ keyword: 'to', value: 'me', offsetStart: 33, offsetEnd: 38 },
{ keyword: 'subject', value: 'vacations', offsetStart: 39, offsetEnd: 56 },
{ keyword: 'date', value: '1/10/2013-15/04/2014', offsetStart: 57, offsetEnd: 82 },
{ text: 'photos', offsetStart: 83, offsetEnd: 89 } ]
}
```

View File

@@ -15,10 +15,6 @@ exports.parse = function (string, options) {
string = '';
}
// Regularize white spacing
// Make in-between white spaces a unique space
string = string.trim().replace(/\s+/g, ' ');
// When a simple string, return it
if (-1 === string.indexOf(':')) {
return string;
@@ -30,16 +26,19 @@ exports.parse = function (string, options) {
// Otherwise parse the advanced query syntax
else {
// Our object to store the query object
var query = {text: []};
var query = {text: [], offsets: []};
var exclusion = {};
var terms = [];
// Get a list of search terms respecting single and double quotes
var terms = string.match(/(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|\S+|\S+:\S+/g);
for (var i = 0; i < terms.length; i++) {
var sepIndex = terms[i].indexOf(':');
if(sepIndex !== -1) {
var split = terms[i].split(':'),
key = terms[i].slice(0, sepIndex),
val = terms[i].slice(sepIndex + 1);
var regex = /(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|\S+|\S+:\S+/g;
var match;
while ((match = regex.exec(string)) !== null) {
var term = match[0];
var sepIndex = term.indexOf(':');
if (sepIndex !== -1) {
var split = term.split(':'),
key = term.slice(0, sepIndex),
val = term.slice(sepIndex + 1);
// Strip surrounding quotes
val = val.replace(/^\"|\"$|^\'|\'$/g, '');
// Strip backslashes respecting escapes
@@ -55,7 +54,18 @@ exports.parse = function (string, options) {
return n1;
}
});
terms[i] = key + ':' + val;
terms.push({
keyword: key,
value: val,
offsetStart: match.index,
offsetEnd: match.index + term.length
});
} else {
terms.push({
text: term,
offsetStart: match.index,
offsetEnd: match.index + term.length
});
}
}
// Reverse to ensure proper order when pop()'ing.
@@ -63,17 +73,15 @@ exports.parse = function (string, options) {
// For each search term
var term;
while (term = terms.pop()) {
// Advanced search terms syntax has key and value
// separated with a colon
var sepIdx = term.indexOf(':');
// When just a simple term
if (-1 === sepIdx) {
if (term.text) {
// We add it as pure text
query.text.push(term);
query.text.push(term.text);
query.offsets.push(term);
}
// We got an advanced search syntax
else {
var key = term.slice(0, sepIdx);
var key = term.keyword;
// Check if the key is a registered keyword
options.keywords = options.keywords || [];
var isKeyword = false;
@@ -88,12 +96,20 @@ exports.parse = function (string, options) {
isExclusion = true;
}
}
// Check if the key is a registered range
options.ranges = options.ranges || [];
var isRange = !(-1 === options.ranges.indexOf(key));
// When the key matches a keyword
if (isKeyword) {
var value = term.slice(sepIdx + 1);
query.offsets.push({
keyword: key,
value: term.value,
offsetStart: isExclusion ? term.offsetStart + 1 : term.offsetStart,
offsetEnd: term.offsetEnd
});
var value = term.value;
// When value is a thing
if (value.length) {
// Get an array of values when several are there
@@ -174,7 +190,9 @@ exports.parse = function (string, options) {
}
// The key allows a range
else if (isRange) {
var value = term.slice(sepIdx + 1);
query.offsets.push(term);
var value = term.value;
// Range are separated with a dash
var rangeValues = value.split('-');
// When both end of the range are specified
@@ -196,7 +214,14 @@ exports.parse = function (string, options) {
}
else {
// We add it as pure text
query.text.push(term);
var text = term.keyword + ':' + term.value;
query.text.push(text);
query.offsets.push({
text: text,
offsetStart: term.offsetStart,
offsetEnd: term.offsetEnd
});
}
}
}

View File

@@ -23,6 +23,12 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.property('from', 'jul@foo.com');
parsedSearchQuery.should.not.have.property('text');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'from',
value: 'jul@foo.com',
offsetStart: 0,
offsetEnd: 16
}]);
});
@@ -34,10 +40,50 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.property('from', 'jul@foo.com');
parsedSearchQuery.should.have.property('text', 'hey buddy!');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'from',
value: 'jul@foo.com',
offsetStart: 0,
offsetEnd: 16
}, {
text: 'hey',
offsetStart: 17,
offsetEnd: 20
}, {
text: 'buddy!',
offsetStart: 21,
offsetEnd: 27
}]);
});
it('should parse a single keyword with free text before it', function () {
it('should ignore keywords that are not specified', function() {
var searchQuery = 'test another other:jul@foo.com';
var options = {
keywords: ['from']
};
var parsedSearchQuery = searchquery.parse(searchQuery, options);
parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.not.have.property('other');
parsedSearchQuery.should.have.property('text', 'test another other:jul@foo.com');
parsedSearchQuery.should.have.property('offsets', [{
text: 'test',
offsetStart: 0,
offsetEnd: 4
}, {
text: 'another',
offsetStart: 5,
offsetEnd: 12
}, {
text: 'other:jul@foo.com',
offsetStart: 13,
offsetEnd: 30
}]);
});
it('should parse a single keyword with free text before it', function() {
var searchQuery = 'hey you! from:jul@foo.com';
var options = {keywords: ['from']};
var parsedSearchQuery = searchquery.parse(searchQuery, options);
@@ -45,6 +91,20 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.property('from', 'jul@foo.com');
parsedSearchQuery.should.have.property('text', 'hey you!');
parsedSearchQuery.should.have.property('offsets', [{
text: 'hey',
offsetStart: 0,
offsetEnd: 3
}, {
text: 'you!',
offsetStart: 4,
offsetEnd: 8
}, {
keyword: 'from',
value: 'jul@foo.com',
offsetStart: 9,
offsetEnd: 25
}]);
});
@@ -56,6 +116,24 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.property('from', 'jul@foo.com');
parsedSearchQuery.should.have.property('text', 'hey you! pouet');
parsedSearchQuery.should.have.property('offsets', [{
text: 'hey',
offsetStart: 0,
offsetEnd: 3
}, {
text: 'you!',
offsetStart: 4,
offsetEnd: 8
}, {
keyword: 'from',
value: 'jul@foo.com',
offsetStart: 9,
offsetEnd: 25
}, {
text: 'pouet',
offsetStart: 26,
offsetEnd: 31
}]);
});
@@ -68,6 +146,24 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.property('from', 'jul@foo.com');
parsedSearchQuery.should.have.property('text', 'hey you! pouet');
parsedSearchQuery.should.have.property('offsets', [{
text: 'hey',
offsetStart: 3,
offsetEnd: 6
}, {
text: 'you!',
offsetStart: 11,
offsetEnd: 15
}, {
keyword: 'from',
value: 'jul@foo.com',
offsetStart: 16,
offsetEnd: 32
}, {
text: 'pouet',
offsetStart: 35,
offsetEnd: 40
}]);
});
@@ -80,6 +176,37 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.should.have.property('from', 'jul@foo.com');
parsedSearchQuery.should.have.property('to', 'bar@hey.ya');
parsedSearchQuery.should.have.property('text', 'hey, so what\'s up gents');
parsedSearchQuery.should.have.property('offsets', [{
text: 'hey,',
offsetStart: 0,
offsetEnd: 4
}, {
keyword: 'from',
value: 'jul@foo.com',
offsetStart: 5,
offsetEnd: 21
}, {
keyword: 'to',
value: 'bar@hey.ya',
offsetStart: 22,
offsetEnd: 35
}, {
text: 'so',
offsetStart: 36,
offsetEnd: 38
}, {
text: 'what\'s',
offsetStart: 39,
offsetEnd: 45
}, {
text: 'up',
offsetStart: 46,
offsetEnd: 48
}, {
text: 'gents',
offsetStart: 49,
offsetEnd: 54
}]);
});
@@ -95,6 +222,21 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.from.length.should.equal(2);
parsedSearchQuery.from.should.containEql('jul@foo.com');
parsedSearchQuery.from.should.containEql('bar@hey.ya');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'from',
value: 'jul@foo.com',
offsetStart: 0,
offsetEnd: 16
}, {
keyword: 'from',
value: 'bar@hey.ya',
offsetStart: 17,
offsetEnd: 32
}, {
text: 'vaccationessss',
offsetStart: 33,
offsetEnd: 47
}]);
});
@@ -109,6 +251,12 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.from.length.should.equal(2);
parsedSearchQuery.from.should.containEql('jul@foo.com');
parsedSearchQuery.from.should.containEql('bar@hey.ya');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'from',
value: 'jul@foo.com,bar@hey.ya',
offsetStart: 0,
offsetEnd: 27
}]);
});
@@ -126,6 +274,21 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.from.should.containEql('bar@hey.ya');
parsedSearchQuery.from.should.containEql('a@b.c');
parsedSearchQuery.from.should.containEql('d@e.f');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'from',
value: 'jul@foo.com,bar@hey.ya',
offsetStart: 0,
offsetEnd: 27
}, {
keyword: 'from',
value: 'a@b.c,d@e.f',
offsetStart: 28,
offsetEnd: 44
}, {
text: 'ouch!#',
offsetStart: 45,
offsetEnd: 51
}]);
});
@@ -139,6 +302,16 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.should.have.property('date');
parsedSearchQuery.date.should.be.an.Object;
parsedSearchQuery.date.from.should.containEql('12/12/2012');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'date',
value: '12/12/2012',
offsetStart: 0,
offsetEnd: 15
}, {
text: 'ahaha',
offsetStart: 16,
offsetEnd: 21
}]);
});
it('should parse range with 2 ends and free text', function () {
@@ -152,6 +325,16 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.date.should.be.an.Object;
parsedSearchQuery.date.from.should.containEql('12/12/2012');
parsedSearchQuery.date.to.should.containEql('01/01/2014');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'date',
value: '12/12/2012-01/01/2014',
offsetStart: 0,
offsetEnd: 26
}, {
text: 'ahaha',
offsetStart: 27,
offsetEnd: 32
}]);
});
@@ -198,6 +381,48 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.to.length.should.equal(2);
parsedSearchQuery.to.should.containEql('me@me.com');
parsedSearchQuery.to.should.containEql('toto@hey.co');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'date',
value: '12/12/2012-01/01/2014',
offsetStart: 3,
offsetEnd: 29
}, {
text: 'ahaha',
offsetStart: 30,
offsetEnd: 35
}, {
keyword: 'from',
value: 'jul@foo.com,bar@hey.ya',
offsetStart: 36,
offsetEnd: 63
}, {
keyword: 'from',
value: 'a@b.c,d@e.f',
offsetStart: 64,
offsetEnd: 80
}, {
text: 'ouch!#',
offsetStart: 81,
offsetEnd: 87
}, {
keyword: 'to',
value: 'me@me.com',
offsetStart: 90,
offsetEnd: 102
}, {
keyword: 'to',
value: 'toto@hey.co',
offsetStart: 103,
offsetEnd: 117
}, {
text: 'about',
offsetStart: 118,
offsetEnd: 123
}, {
text: 'that',
offsetStart: 124,
offsetEnd: 128
}]);
});
@@ -209,6 +434,17 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.property('name', 'Bob Saget');
parsedSearchQuery.should.have.property('description', 'Banana Sandwiche');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'name',
value: 'Bob Saget',
offsetStart: 0,
offsetEnd: 16
}, {
keyword: 'description',
value: 'Banana Sandwiche',
offsetStart: 17,
offsetEnd: 47
}]);
});
@@ -220,6 +456,17 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.property('case1', 'This "is" \'a\' test');
parsedSearchQuery.should.have.property('case2', 'This "is" \'a\' test');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'case1',
value: 'This "is" \'a\' test',
offsetStart: 0,
offsetEnd: 28
}, {
keyword: 'case2',
value: 'This "is" \'a\' test',
offsetStart: 29,
offsetEnd: 57
}]);
});
@@ -232,6 +479,12 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.exclude.should.be.an.Object;
parsedSearchQuery.exclude.should.have.property('from', 'jul@foo.com');
parsedSearchQuery.should.not.have.property('text');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'from',
value: 'jul@foo.com',
offsetStart: 1,
offsetEnd: 17
}]);
});
it('should concatenate a keyword multiple values in exclusion syntax', function() {
@@ -244,11 +497,19 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.exclude.from.should.containEql('jul@foo.com');
parsedSearchQuery.exclude.from.should.containEql('mar@foo.com');
parsedSearchQuery.should.not.have.property('text');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'from',
value: 'jul@foo.com,mar@foo.com',
offsetStart: 1,
offsetEnd: 29
}]);
});
it('should support keywords which appear multiple times with exclusion syntax', function() {
var searchQuery = '-from:jul@foo.com,mar@foo.com -from:jan@foo.com';
var options = {keywords: ['from']};
var options = {
keywords: ['from']
};
var parsedSearchQuery = searchquery.parse(searchQuery, options);
parsedSearchQuery.should.be.an.Object;
@@ -257,5 +518,16 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.exclude.from.should.containEql('mar@foo.com');
parsedSearchQuery.exclude.from.should.containEql('jan@foo.com');
parsedSearchQuery.should.not.have.property('text');
parsedSearchQuery.should.have.property('offsets', [{
keyword: 'from',
value: 'jul@foo.com,mar@foo.com',
offsetStart: 1,
offsetEnd: 29
}, {
keyword: 'from',
value: 'jan@foo.com',
offsetStart: 31,
offsetEnd: 47
}]);
});
});
});