public class MockTokenizer
extends org.apache.lucene.analysis.Tokenizer
This tokenizer is a replacement for WHITESPACE
, SIMPLE
, and KEYWORD
tokenizers. If you are writing a component such as a TokenFilter, its a great idea to test
it wrapping this tokenizer instead for extra checks. This tokenizer has the following behavior:
setEnableChecks(boolean)
.
Modifier and Type | Field and Description |
---|---|
static int |
DEFAULT_MAX_TOKEN_LENGTH |
static int |
KEYWORD
Acts Similar to KeywordTokenizer.
|
static int |
SIMPLE
Acts like LetterTokenizer.
|
static int |
WHITESPACE
Acts Similar to WhitespaceTokenizer
|
Constructor and Description |
---|
MockTokenizer(org.apache.lucene.util.AttributeSource.AttributeFactory factory,
java.io.Reader input,
int pattern,
boolean lowerCase,
int maxTokenLength) |
MockTokenizer(java.io.Reader input)
|
MockTokenizer(java.io.Reader input,
int pattern,
boolean lowerCase) |
MockTokenizer(java.io.Reader input,
int pattern,
boolean lowerCase,
int maxTokenLength) |
Modifier and Type | Method and Description |
---|---|
void |
close() |
void |
end() |
boolean |
incrementToken() |
protected boolean |
isTokenChar(int c) |
protected int |
normalize(int c) |
protected int |
readCodePoint() |
void |
reset() |
void |
reset(java.io.Reader input) |
void |
setEnableChecks(boolean enableChecks)
Toggle consumer workflow checking: if your test consumes tokenstreams normally you
should leave this enabled.
|
addAttribute, addAttributeImpl, captureState, clearAttributes, cloneAttributes, copyTo, equals, getAttribute, getAttributeClassesIterator, getAttributeFactory, getAttributeImplsIterator, hasAttribute, hasAttributes, hashCode, reflectAsString, reflectWith, restoreState, toString
public static final int WHITESPACE
public static final int KEYWORD
public static final int SIMPLE
public static final int DEFAULT_MAX_TOKEN_LENGTH
public MockTokenizer(org.apache.lucene.util.AttributeSource.AttributeFactory factory, java.io.Reader input, int pattern, boolean lowerCase, int maxTokenLength)
public MockTokenizer(java.io.Reader input, int pattern, boolean lowerCase, int maxTokenLength)
public MockTokenizer(java.io.Reader input, int pattern, boolean lowerCase)
public MockTokenizer(java.io.Reader input)
public final boolean incrementToken() throws java.io.IOException
incrementToken
in class org.apache.lucene.analysis.TokenStream
java.io.IOException
protected int readCodePoint() throws java.io.IOException
java.io.IOException
protected boolean isTokenChar(int c)
protected int normalize(int c)
public void reset() throws java.io.IOException
reset
in class org.apache.lucene.analysis.TokenStream
java.io.IOException
public void close() throws java.io.IOException
close
in interface java.io.Closeable
close
in interface java.lang.AutoCloseable
close
in class org.apache.lucene.analysis.Tokenizer
java.io.IOException
public void reset(java.io.Reader input) throws java.io.IOException
reset
in class org.apache.lucene.analysis.Tokenizer
java.io.IOException
public void end() throws java.io.IOException
end
in class org.apache.lucene.analysis.TokenStream
java.io.IOException
public void setEnableChecks(boolean enableChecks)
Copyright © 2000-2019 Apache Software Foundation. All Rights Reserved.