Skip to content
This repository has been archived by the owner on May 27, 2020. It is now read-only.

Feature/build custom analyzer #328

Open
wants to merge 40 commits into
base: branch-3.0.13
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
e42179e
Added tokenizers
Apr 24, 2017
64aee42
Add lowercase, edgeNGram and thai tokenizers
jpgilaberte Apr 26, 2017
1eec1d5
Reformat code
jpgilaberte Apr 26, 2017
9966fe0
Add tokenizers in builder module
jpgilaberte Apr 28, 2017
491987b
Scala refactor in tokenizer feature
jpgilaberte May 16, 2017
33b3011
Add license
jpgilaberte May 16, 2017
e999d1f
Add license in scala files
jpgilaberte May 16, 2017
567c0df
Add license in test files
jpgilaberte May 16, 2017
563c76c
Add license in custom analyzer
jpgilaberte May 16, 2017
049d826
Refactor tokenizers
jpgilaberte Jun 2, 2017
048a1df
Add charFilters
jpgilaberte Jun 2, 2017
1645cf0
Add tokenFilter
jpgilaberte Jun 2, 2017
5ec11fd
Add builder objects
jpgilaberte Jun 2, 2017
759c248
Add plugin Test
jpgilaberte Jun 2, 2017
c98e3fc
Add testAt CustomAnalizer
jpgilaberte Jun 2, 2017
3461943
Add JavaDoc in builder
jpgilaberte Jun 7, 2017
598f314
Add ScalaDoc in plugin
jpgilaberte Jun 8, 2017
a1d5f0f
Add TokenFilter documentation
jpgilaberte Jun 12, 2017
3532cab
Fix RST format
jpgilaberte Jun 12, 2017
af386ef
Fix RST format
jpgilaberte Jun 12, 2017
c63b553
Fix RST format
jpgilaberte Jun 12, 2017
b16b9fc
Fix package name format
jpgilaberte Jun 12, 2017
a4cd8f3
Fix package name format
jpgilaberte Jun 12, 2017
3822c65
Fix mandatory column size
jpgilaberte Jun 12, 2017
5ed61a8
Fix mandatory column size
jpgilaberte Jun 12, 2017
cfa3a20
Add more TokenFilters
jpgilaberte Jun 13, 2017
b45479e
Add new TokenFilter documentation
jpgilaberte Jun 13, 2017
f80c83f
Fix rst format
jpgilaberte Jun 13, 2017
1c55d09
Fix rst format
jpgilaberte Jun 13, 2017
29d2fa4
Fix persian charfilter
jpgilaberte Jun 13, 2017
cf88e27
Fix documentation
jpgilaberte Jun 14, 2017
a2f7085
Add char filter test
jpgilaberte Jun 14, 2017
956de0a
Add token filter test
jpgilaberte Jun 14, 2017
4551f00
Add token filters in builder
jpgilaberte Jun 14, 2017
fc10a93
Add field in HtmlStripCharFilter
jpgilaberte Jun 14, 2017
e2dc196
Refactor test package
jpgilaberte Jun 14, 2017
da3a8dd
Fix documentation
jpgilaberte Jun 15, 2017
d01f7ab
Add token filter test
jpgilaberte Jun 15, 2017
b7a5770
Refactor CustomAnalyzerIT
jpgilaberte Jun 15, 2017
270f62b
Add CustomAnalyzerIT removed
jpgilaberte Jun 15, 2017
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions builder/pom.xml
Expand Up @@ -51,5 +51,11 @@
<version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.stratio.cassandra</groupId>
<artifactId>cassandra-lucene-index-plugin</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
Expand Up @@ -21,7 +21,11 @@
import com.stratio.cassandra.lucene.builder.index.Partitioner;
import com.stratio.cassandra.lucene.builder.index.schema.Schema;
import com.stratio.cassandra.lucene.builder.index.schema.analysis.ClasspathAnalyzer;
import com.stratio.cassandra.lucene.builder.index.schema.analysis.CustomAnalyzer;
import com.stratio.cassandra.lucene.builder.index.schema.analysis.SnowballAnalyzer;
import com.stratio.cassandra.lucene.builder.index.schema.analysis.charFilter.CharFilter;
import com.stratio.cassandra.lucene.builder.index.schema.analysis.tokenFilter.TokenFilter;
import com.stratio.cassandra.lucene.builder.index.schema.analysis.tokenizer.Tokenizer;
import com.stratio.cassandra.lucene.builder.index.schema.mapping.*;
import com.stratio.cassandra.lucene.builder.search.Search;
import com.stratio.cassandra.lucene.builder.search.condition.*;
Expand Down Expand Up @@ -253,6 +257,30 @@ public static SnowballAnalyzer snowballAnalyzer(String language) {
return new SnowballAnalyzer(language);
}

/**
* Returns a new {@link CustomAnalyzer} using custom tokenizer, char_filters and token_filters.
*
* @param tokenizer an {@link Tokenizer} the tokenizer to use.
* @param charFilter an {@link CharFilter[]} the charFilter array to use.
* @param tokenFilter an {@link TokenFilter[]} the tokenFilter array to use.
* @return
*/
public static CustomAnalyzer customAnalyzer(Tokenizer tokenizer, CharFilter[] charFilter, TokenFilter[] tokenFilter) {
return new CustomAnalyzer(tokenizer, charFilter, tokenFilter);
}

public static CustomAnalyzer customAnalyzer(Tokenizer tokenizer) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just copy the comments before here

return new CustomAnalyzer(tokenizer, null, null);
}

public static CustomAnalyzer customAnalyzer(Tokenizer tokenizer, CharFilter[] charFilter) {
return new CustomAnalyzer(tokenizer, charFilter, null);
}

public static CustomAnalyzer customAnalyzer(Tokenizer tokenizer, TokenFilter[] tokenFilter) {
return new CustomAnalyzer(tokenizer, null, tokenFilter);
}

/**
* Returns a new {@link Search}.
*
Expand Down
Expand Up @@ -26,6 +26,7 @@
*/
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "type")
@JsonSubTypes({@JsonSubTypes.Type(value = ClasspathAnalyzer.class, name = "classpath"),
@JsonSubTypes.Type(value = SnowballAnalyzer.class, name = "snowball")})
@JsonSubTypes.Type(value = SnowballAnalyzer.class, name = "snowball"),
@JsonSubTypes.Type(value = CustomAnalyzer.class, name = "custom")})
public abstract class Analyzer extends JSONBuilder {
}
@@ -0,0 +1,61 @@
/*
* Copyright (C) 2014 Stratio (http://stratio.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.stratio.cassandra.lucene.builder.index.schema.analysis;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.stratio.cassandra.lucene.builder.index.schema.analysis.charFilter.CharFilter;
import com.stratio.cassandra.lucene.builder.index.schema.analysis.tokenFilter.TokenFilter;
import com.stratio.cassandra.lucene.builder.index.schema.analysis.tokenizer.Tokenizer;


/**
* {@link Analyzer} using a Lucene's {@code Analyzer}s in classpath.
*
* It's uses the {@code Analyzer}'s default (no args) constructor.
*
* @author Juan Pedro Gilaberte {@literal <jpgilaberte@stratio.com>}
*/
public class CustomAnalyzer extends Analyzer{

/** The {@code TokenFilter} array. */
@JsonProperty("token_filter")
private final TokenFilter[] tokenFilter;

/** The {@code CharFilter} array. */
@JsonProperty("char_filter")
private final CharFilter[] charFilter;

/** The {@code Tokenizer} instance. */
@JsonProperty("tokenizer")
private final Tokenizer tokenizer;

/**
* Builds a new {@link CustomAnalyzer} using custom tokenizer, char_filters and token_filters.
*
* @param tokenizer an {@link Tokenizer} the tokenizer to use.
* @param charFilter an {@link CharFilter[]} the charFilter array to use.
* @param tokenFilter an {@link TokenFilter[]} the tokenFilter array to use.
*/
@JsonCreator
public CustomAnalyzer(@JsonProperty("tokenizer") Tokenizer tokenizer, @JsonProperty("char_filter") CharFilter[] charFilter,
@JsonProperty("token_filter") TokenFilter[] tokenFilter)
{
this.tokenizer = tokenizer;
this.charFilter = charFilter;
this.tokenFilter = tokenFilter;
}
}
@@ -0,0 +1,32 @@
/*
* Copyright (C) 2014 Stratio (http://stratio.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.stratio.cassandra.lucene.builder.index.schema.analysis.charFilter;

import com.fasterxml.jackson.annotation.JsonSubTypes;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import com.stratio.cassandra.lucene.builder.JSONBuilder;

/**
* Created by jpgilaberte on 25/05/17.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

*/
@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, property = "type")
@JsonSubTypes({@JsonSubTypes.Type(value = MappingCharFilter.class, name = "mapping"),
@JsonSubTypes.Type(value = HtmlStripCharFilter.class, name = "htmlstrip"),
@JsonSubTypes.Type(value = PatternCharFilter.class, name = "pattern"),
@JsonSubTypes.Type(value = PersianCharFilter.class, name = "persian")})
public class CharFilter extends JSONBuilder{

}
@@ -0,0 +1,45 @@
/*
* Copyright (C) 2014 Stratio (http://stratio.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.stratio.cassandra.lucene.builder.index.schema.analysis.charFilter;

import com.fasterxml.jackson.annotation.JsonCreator;

import java.util.ArrayList;

/**
* Created by jpgilaberte on 30/05/17.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pease put author label like this

*/
public class HtmlStripCharFilter extends CharFilter{

@JsonCreator
public HtmlStripCharFilter(){}

@JsonCreator
public HtmlStripCharFilter(ArrayList<String> escapedtags) {
this.escapedtags = escapedtags;
}

private ArrayList<String> escapedtags;

public ArrayList<String> getEscapedtags() {
return escapedtags;
}

public HtmlStripCharFilter setEscapedtags(ArrayList<String> escapedtags) {
this.escapedtags = escapedtags;
return this;
}
}
@@ -0,0 +1,33 @@
/*
* Copyright (C) 2014 Stratio (http://stratio.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.stratio.cassandra.lucene.builder.index.schema.analysis.charFilter;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;

/**
* Created by jpgilaberte on 25/05/17.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

*/
public class MappingCharFilter extends CharFilter{

@JsonProperty("mapping")
private final String mapping;

@JsonCreator
public MappingCharFilter( @JsonProperty("mapping") String mapping){
this.mapping = mapping;
}
}
@@ -0,0 +1,37 @@
/*
* Copyright (C) 2014 Stratio (http://stratio.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.stratio.cassandra.lucene.builder.index.schema.analysis.charFilter;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;

/**
* Created by jpgilaberte on 30/05/17.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

==

*/
public class PatternCharFilter extends CharFilter{

@JsonProperty("pattern")
final String pattern;

@JsonProperty("replacement")
final String replacement;

@JsonCreator
public PatternCharFilter(@JsonProperty("pattern") String pattern, @JsonProperty("replacement") String replacement){
this.pattern = pattern;
this.replacement = replacement;
}
}
@@ -0,0 +1,26 @@
/*
* Copyright (C) 2014 Stratio (http://stratio.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.stratio.cassandra.lucene.builder.index.schema.analysis.charFilter;

import com.fasterxml.jackson.annotation.JsonCreator;

/**
* Created by jpgilaberte on 30/05/17.
*/
public class PersianCharFilter extends CharFilter{
@JsonCreator
public PersianCharFilter(){}
}
@@ -0,0 +1,28 @@
/*
* Copyright (C) 2014 Stratio (http://stratio.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.stratio.cassandra.lucene.builder.index.schema.analysis.tokenFilter;

import com.fasterxml.jackson.annotation.JsonCreator;

/**
* Created by jpgilaberte on 25/05/17.
*/
public class ApostropheTokenFilter extends TokenFilter{

@JsonCreator
public ApostropheTokenFilter(){}
}

@@ -0,0 +1,28 @@
/*
* Copyright (C) 2014 Stratio (http://stratio.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.stratio.cassandra.lucene.builder.index.schema.analysis.tokenFilter;

import com.fasterxml.jackson.annotation.JsonCreator;

/**
* Created by jpgilaberte on 25/05/17.
*/
public class ArabicnormalizationTokenFilter extends TokenFilter{

@JsonCreator
public ArabicnormalizationTokenFilter(){}
}

@@ -0,0 +1,28 @@
/*
* Copyright (C) 2014 Stratio (http://stratio.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.stratio.cassandra.lucene.builder.index.schema.analysis.tokenFilter;

import com.fasterxml.jackson.annotation.JsonCreator;

/**
* Created by jpgilaberte on 25/05/17.
*/
public class ArabicstemTokenFilter extends TokenFilter{

@JsonCreator
public ArabicstemTokenFilter(){}
}