1
0

Merge pull request #18 from jonestristand/token-terms

Add support for tokenized terms, quoted terms, and negation of terms.
This commit is contained in:
Julien Buty
2018-04-08 16:48:13 +08:00
committed by GitHub
3 changed files with 103 additions and 9 deletions

View File

@@ -16,11 +16,11 @@ exports.parse = function (string, options) {
}
// When a simple string, return it
if (-1 === string.indexOf(':')) {
if (-1 === string.indexOf(':') && !options.tokenize) {
return string;
}
// When no keywords or ranges set, treat as a simple string
else if (!options.keywords && !options.ranges){
else if (!options.keywords && !options.ranges && !options.tokenize){
return string;
}
// Otherwise parse the advanced query syntax
@@ -30,7 +30,7 @@ exports.parse = function (string, options) {
var exclusion = {};
var terms = [];
// Get a list of search terms respecting single and double quotes
var regex = /(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|\S+|\S+:\S+/g;
var regex = /(\S+:'(?:[^'\\]|\\.)*')|(\S+:"(?:[^"\\]|\\.)*")|(-?"(?:[^"\\]|\\.)*")|(-?'(?:[^'\\]|\\.)*')|\S+|\S+:\S+/g;
var match;
while ((match = regex.exec(string)) !== null) {
var term = match[0];
@@ -61,11 +61,47 @@ exports.parse = function (string, options) {
offsetEnd: match.index + term.length
});
} else {
terms.push({
text: term,
offsetStart: match.index,
offsetEnd: match.index + term.length
var isExcludedTerm = false;
if (term[0] === '-') {
isExcludedTerm = true;
term = term.slice(1);
}
// Strip surrounding quotes
term = term.replace(/^\"|\"$|^\'|\'$/g, '');
// Strip backslashes respecting escapes
term = (term + '').replace(/\\(.?)/g, function (s, n1) {
switch (n1) {
case '\\':
return '\\';
case '0':
return '\u0000';
case '':
return '';
default:
return n1;
}
});
if (isExcludedTerm) {
if (exclusion['text']) {
if (exclusion['text'] instanceof Array) {
exclusion['text'].push(term);
} else {
exclusion['text'] = [exclusion['text']];
exclusion['text'].push(term);
}
} else {
// First time seeing an excluded text term
exclusion['text'] = term;
}
} else {
terms.push({
text: term,
offsetStart: match.index,
offsetEnd: match.index + term.length
});
}
}
}
// Reverse to ensure proper order when pop()'ing.
@@ -228,7 +264,9 @@ exports.parse = function (string, options) {
// Concatenate all text terms if any
if (query.text.length) {
query.text = query.text.join(' ').trim();
if (!options.tokenize) {
query.text = query.text.join(' ').trim();
}
}
// Just remove the attribute text when it's empty
else {

View File

@@ -1,6 +1,6 @@
{
"name": "search-query-parser",
"version": "1.3.0",
"version": "1.4.0",
"description": "Parser for advanced search query syntax",
"main": "index.js",
"scripts": {

View File

@@ -14,6 +14,62 @@ describe('Search query syntax parser', function () {
parsedSearchQuery.should.equal(searchQuery);
});
it('should return a tokenized string when option is set', function () {
var searchQuery = "fancy pyjama wear";
var options = { tokenize: true };
var parsedSearchQuery = searchquery.parse(searchQuery, options);
parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.property('text', ['fancy', 'pyjama', 'wear']);
});
it('should return a tokenized string when option is set, respecting double-quotes and escapes', function () {
var searchQuery = 'fancy "py\\"j\\"am\'a w\'ear"';
var options = { tokenize: true };
var parsedSearchQuery = searchquery.parse(searchQuery, options);
parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.property('text', ['fancy', 'py"j"am\'a w\'ear']);
});
it('should return a tokenized string when option is set, respecting single-quotes and escapes', function () {
var searchQuery = "fancy 'py\\'j\\'am\"a w\"ear'";
var options = { tokenize: true };
var parsedSearchQuery = searchquery.parse(searchQuery, options);
parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.property('text', ['fancy', "py'j'am\"a w\"ear"]);
});
it('should return a tokenized string with negation of unquoted terms', function () {
var searchQuery = "fancy -pyjama -wear";
var options = { tokenize: true };
var parsedSearchQuery = searchquery.parse(searchQuery, options);
parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.property('text', ['fancy']);
parsedSearchQuery.should.have.property('exclude', {text: ['pyjama', 'wear']});
});
it('should return a tokenized string with negation of single-quoted terms', function () {
var searchQuery = "fancy -'pyjama -wear'";
var options = { tokenize: true };
var parsedSearchQuery = searchquery.parse(searchQuery, options);
parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.property('text', ['fancy']);
parsedSearchQuery.should.have.property('exclude', {text: 'pyjama -wear'});
});
it('should return a tokenized string with negation of double-quoted terms', function () {
var searchQuery = 'fancy -"pyjama -wear"';
var options = { tokenize: true };
var parsedSearchQuery = searchquery.parse(searchQuery, options);
parsedSearchQuery.should.be.an.Object;
parsedSearchQuery.should.have.property('text', ['fancy']);
parsedSearchQuery.should.have.property('exclude', {text: 'pyjama -wear'});
});
it('should parse a single keyword with no text', function () {
var searchQuery = 'from:jul@foo.com';