SentenceBreakIterator.java [plain text]
package gnu.java.text;
import java.text.BreakIterator;
import java.text.CharacterIterator;
public class SentenceBreakIterator extends BaseBreakIterator
{
public Object clone ()
{
return new SentenceBreakIterator (this);
}
public SentenceBreakIterator ()
{
iter = null;
}
private SentenceBreakIterator (SentenceBreakIterator other)
{
iter = (CharacterIterator) other.iter.clone();
}
public int next ()
{
int end = iter.getEndIndex();
if (iter.getIndex() == end)
return DONE;
while (iter.getIndex() < end)
{
char c = iter.current();
if (c == CharacterIterator.DONE)
break;
int type = Character.getType(c);
char n = iter.next();
if (n == CharacterIterator.DONE)
break;
if (type == Character.PARAGRAPH_SEPARATOR)
break;
if (c == '!' || c == '?')
{
while (n != CharacterIterator.DONE
&& Character.getType(n) == Character.END_PUNCTUATION)
n = iter.next();
while (n != CharacterIterator.DONE
&& Character.getType(n) == Character.SPACE_SEPARATOR)
n = iter.next();
if (n != CharacterIterator.DONE
&& Character.getType(n) == Character.PARAGRAPH_SEPARATOR)
n = iter.next();
break;
}
if (c == '.')
{
int save = iter.getIndex();
while (n != CharacterIterator.DONE
&& Character.getType(n) == Character.END_PUNCTUATION)
n = iter.next();
int spcount = 0;
while (n != CharacterIterator.DONE
&& Character.getType(n) == Character.SPACE_SEPARATOR)
{
n = iter.next();
++spcount;
}
if (spcount > 0)
{
int save2 = iter.getIndex();
while (n != CharacterIterator.DONE
&& Character.getType(n) == Character.START_PUNCTUATION)
n = iter.next();
if (n == CharacterIterator.DONE
|| ! Character.isLowerCase(n))
{
iter.setIndex(save2);
break;
}
}
iter.setIndex(save);
}
}
return iter.getIndex();
}
private final int previous_internal ()
{
int start = iter.getBeginIndex();
if (iter.getIndex() == start)
return DONE;
while (iter.getIndex() >= start)
{
char c = iter.previous();
if (c == CharacterIterator.DONE)
break;
char n = iter.previous();
if (n == CharacterIterator.DONE)
break;
iter.next();
int nt = Character.getType(n);
if (! Character.isLowerCase(c)
&& (nt == Character.START_PUNCTUATION
|| nt == Character.SPACE_SEPARATOR))
{
int save = iter.getIndex();
int save_nt = nt;
char save_n = n;
while (n != CharacterIterator.DONE
&& Character.getType(n) == Character.START_PUNCTUATION)
n = iter.previous();
if (n == CharacterIterator.DONE)
break;
if (Character.getType(n) == Character.SPACE_SEPARATOR)
{
int save2 = iter.getIndex();
while (n != CharacterIterator.DONE
&& Character.getType(n) == Character.SPACE_SEPARATOR)
n = iter.previous();
while (n != CharacterIterator.DONE
&& Character.getType(n) == Character.END_PUNCTUATION)
n = iter.previous();
if (n == CharacterIterator.DONE || n == '.')
{
period = iter.getIndex();
iter.setIndex(save2);
break;
}
}
iter.setIndex(save);
nt = save_nt;
n = save_n;
}
if (nt == Character.PARAGRAPH_SEPARATOR)
{
period = iter.getIndex();
break;
}
else if (nt == Character.SPACE_SEPARATOR
|| nt == Character.END_PUNCTUATION)
{
int save = iter.getIndex();
while (n != CharacterIterator.DONE
&& Character.getType(n) == Character.SPACE_SEPARATOR)
n = iter.previous();
while (n != CharacterIterator.DONE
&& Character.getType(n) == Character.END_PUNCTUATION)
n = iter.previous();
int here = iter.getIndex();
iter.setIndex(save);
if (n == CharacterIterator.DONE || n == '!' || n == '?')
{
period = here;
break;
}
}
else if (n == '!' || n == '?')
{
period = iter.getIndex();
break;
}
}
return iter.getIndex();
}
public int previous ()
{
int here = iter.getIndex();
period = here;
int first = previous_internal ();
if (here == iter.getEndIndex() || first == DONE)
return first;
iter.setIndex(period);
return previous_internal ();
}
private int period;
}