Clover coverage report - DrJava Test Coverage (drjava-20110828-r5448)
Coverage timestamp: Sun Aug 28 2011 03:13:33 CDT
file stats: LOC: 787   Methods: 37
NCLOC: 507   Classes: 8
 
 Source file Conditionals Statements Methods TOTAL
BalancingStreamTokenizer.java 95.8% 97.9% 100% 97.4%
coverage coverage
 1    /*BEGIN_COPYRIGHT_BLOCK
 2    *
 3    * Copyright (c) 2001-2010, JavaPLT group at Rice University (drjava@rice.edu)
 4    * All rights reserved.
 5    *
 6    * Redistribution and use in source and binary forms, with or without
 7    * modification, are permitted provided that the following conditions are met:
 8    * * Redistributions of source code must retain the above copyright
 9    * notice, this list of conditions and the following disclaimer.
 10    * * Redistributions in binary form must reproduce the above copyright
 11    * notice, this list of conditions and the following disclaimer in the
 12    * documentation and/or other materials provided with the distribution.
 13    * * Neither the names of DrJava, the JavaPLT group, Rice University, nor the
 14    * names of its contributors may be used to endorse or promote products
 15    * derived from this software without specific prior written permission.
 16    *
 17    * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18    * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19    * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 20    * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 21    * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 22    * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 23    * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 24    * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 25    * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 26    * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 27    * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28    *
 29    * This software is Open Source Initiative approved Open Source Software.
 30    * Open Source Initative Approved is a trademark of the Open Source Initiative.
 31    *
 32    * This file is part of DrJava. Download the current version of this project
 33    * from http://www.drjava.org/ or http://sourceforge.net/projects/drjava/
 34    *
 35    * END_COPYRIGHT_BLOCK*/
 36   
 37    package edu.rice.cs.util;
 38   
 39    import java.io.Reader;
 40    import java.io.IOException;
 41    import java.util.Stack;
 42    import java.util.HashMap;
 43    import java.util.Set;
 44    import java.util.SortedSet;
 45    import java.util.HashSet;
 46    import java.util.TreeSet;
 47    import java.util.Iterator;
 48    import java.util.ArrayList;
 49   
 50    import edu.rice.cs.plt.lambda.Lambda;
 51   
 52    /**
 53    * A tokenizer that splits a stream into string tokens while balancing quoting characters.
 54    * @author Mathias Ricken
 55    * @version $Id: BalancingStreamTokenizer.java 5175 2010-01-20 08:46:32Z mgricken $
 56    */
 57   
 58    public class BalancingStreamTokenizer {
 59    /** Input Reader. */
 60    protected Reader _reader;
 61   
 62    /** Stack of characters having been pushed back. */
 63    public Stack<Integer> _pushed = new Stack<Integer>();
 64   
 65    /** State of the tokenizer. */
 66    public static class State {
 67    /** Pairs of beginning and ending quote strings. */
 68    public HashMap<String,String> quotePairs = new HashMap<String,String>();
 69   
 70    /** Sets of quote beginnings to be parsed as one symbol. */
 71    public TreeSet<String> quotes = new TreeSet<String>();
 72   
 73    /** Sets of quote endings to be parsed as one symbol. */
 74    public TreeSet<String> quoteEnds = new TreeSet<String>();
 75   
 76    /** Sets of keywords to be parsed as one symbol. */
 77    public TreeSet<String> keywords = new TreeSet<String>();
 78   
 79    /** Whitespace characters. */
 80    public HashSet<Integer> whitespace = new HashSet<Integer>();
 81   
 82    /** Default constructor. */
 83  931 public State() { }
 84   
 85    /** Copy constructor. */
 86  2 public State(State o) {
 87  2 quotePairs = new HashMap<String,String>(o.quotePairs);
 88  2 keywords = new TreeSet<String>(o.keywords);
 89  2 quotes = new TreeSet<String>(o.quotes);
 90  2 quoteEnds = new TreeSet<String>(o.quoteEnds);
 91  2 whitespace = new HashSet<Integer>(o.whitespace);
 92    }
 93    }
 94   
 95    /** Current state of the tokenizer. */
 96    protected State _state = new State();
 97   
 98    /** Stack of previous states. */
 99    protected Stack<State> _stateStack = new Stack<State>();
 100   
 101    /** Escape character, if available. If this character is placed in front
 102    * of any quote or keyword, the quote or keyword is treated as normal text.
 103    * To get this character to exist alone, it has to be doubled up.
 104    * If this escape character appears alone where it does not precede another escape
 105    * character, whitespace, a quote or keyword, it is dropped.
 106    * The escape character CANNOT be declared whitespace.
 107    * The escape character CAN be part of a quote or keyword, but it has to be
 108    * doubled up in the string, and when the quotes or keywords are added,
 109    * the escape character is automatically doubled up if present.
 110    * If set to null, no escaping is possible. */
 111    protected Character _escape = null;
 112   
 113    /** The previous character was the escape character. */
 114    protected boolean _wasEscape = false;
 115   
 116    /** The current character is the escape character. */
 117    protected boolean _isEscape = false;
 118   
 119    /** Kind of tokens to be returned. */
 120    public enum Token { NONE, NORMAL, QUOTED, KEYWORD, END }
 121   
 122    public volatile Token _token = Token.NONE;
 123   
 124    /** Create a new balancing stream tokenizer.
 125    * @param r reader to tokenize
 126    */
 127  64 public BalancingStreamTokenizer(Reader r) {
 128  64 this(r,null);
 129    }
 130   
 131    /** Create a new balancing stream tokenizer.
 132    * @param r reader to tokenize
 133    * @param escape escape character or null
 134    */
 135  574 public BalancingStreamTokenizer(Reader r, Character escape) {
 136  574 _escape = escape;
 137  574 _reader = r;
 138    }
 139   
 140    /** Setup a tokenizer with just whitespace. */
 141  14 public void defaultWhitespaceSetup() {
 142  14 wordRange(0,255);
 143  14 whitespaceRange(0,32);
 144    }
 145   
 146    /** Setup a tokenizer that recognizes " and ' quotes. */
 147  13 public void defaultTwoQuoteSetup() {
 148  13 wordRange(0,255);
 149  13 whitespaceRange(0,32);
 150  13 addQuotes("\"", "\"");
 151  13 addQuotes("'", "'");
 152    }
 153   
 154    /** Setup a tokenizer that recognizes ", ' and ` quotes. */
 155  1 public void defaultThreeQuoteSetup() {
 156  1 wordRange(0,255);
 157  1 whitespaceRange(0,32);
 158  1 addQuotes("\"", "\"");
 159  1 addQuotes("'", "'");
 160  1 addQuotes("`", "`");
 161    }
 162   
 163    /** Setup a tokenizer that recognizes " and ' quotes and { } braces. */
 164  1 public void defaultTwoQuoteCurlySetup() {
 165  1 wordRange(0,255);
 166  1 whitespaceRange(0,32);
 167  1 addQuotes("\"", "\"");
 168  1 addQuotes("'", "'");
 169  1 addQuotes("{", "}");
 170    }
 171   
 172    /** Setup a tokenizer that recognizes ", ' and ` quotes and { } braces. */
 173  1 public void defaultThreeQuoteCurlySetup() {
 174  1 wordRange(0,255);
 175  1 whitespaceRange(0,32);
 176  1 addQuotes("\"", "\"");
 177  1 addQuotes("'", "'");
 178  1 addQuotes("`", "`");
 179  1 addQuotes("{", "}");
 180    }
 181   
 182    /** Setup a tokenizer that recognizes ", ' and ` quotes and ${ } braces. */
 183  25 public void defaultThreeQuoteDollarCurlySetup() {
 184  25 wordRange(0,255);
 185  25 whitespaceRange(0,32);
 186  25 addQuotes("\"", "\"");
 187  25 addQuotes("'", "'");
 188  25 addQuotes("`", "`");
 189  25 addQuotes("${", "}");
 190    }
 191   
 192    /** Return the next token from the reader, or from the stack if it isn't empty.
 193    * @return next token or -1 when end of stream
 194    */
 195  9587 protected int nextToken() throws IOException {
 196  9587 if (_pushed.empty()) {
 197  8748 return _reader.read();
 198    }
 199    else {
 200  839 return _pushed.pop();
 201    }
 202    }
 203   
 204    /** Push a token back onto the stack.
 205    * @param token token to push back
 206    */
 207  839 protected void pushToken(int token) {
 208  839 _pushed.push(token);
 209    }
 210   
 211    /** Return a copy of the current state of the tokenizer.
 212    * @return copy of the state
 213    */
 214  1 public State getState() { return new State(_state); }
 215   
 216    /** Set the stream tokenizer the the state specified.
 217    * @param state state
 218    */
 219  358 public void setState(State state) { _state = state; }
 220   
 221    /** Push the current state onto the stack. */
 222  357 protected void pushState() { _stateStack.push(_state); }
 223   
 224    /** Pops the top of the state stack and makes it the current state. */
 225  357 protected void popState() { setState(_stateStack.pop()); }
 226   
 227    /** Returns the type of the current token. */
 228  1142 public Token token() { return _token; }
 229   
 230    /** Specify a range characters as word characters.
 231    * @param lo the character beginning the word character range, inclusive
 232    * @param hi the character ending the word character range, inclusive
 233    */
 234  576 public void wordRange(int lo, int hi) {
 235  576 ArrayList<String> kwToRemove = new ArrayList<String>();
 236  576 ArrayList<String> qpToRemove = new ArrayList<String>();
 237  576 for(int i = lo; i <= hi; ++i) {
 238    // now remove all whitespace in that range
 239  146837 if (_state.whitespace.contains(i)) {
 240  59 _state.whitespace.remove(i);
 241    }
 242   
 243    // now accumulate all keywords that begin with that character
 244  146837 Iterator<String> kit = _state.keywords.iterator();
 245  146837 while(kit.hasNext()) {
 246  52 String s = kit.next();
 247  2 if (s.charAt(0) == i) { kwToRemove.add(s); }
 248    }
 249   
 250    // now accumulate all quotes that begin with that character
 251  146837 Iterator<String> qit = _state.quotes.iterator();
 252  146837 while(qit.hasNext()) {
 253  414 String s = qit.next();
 254  5 if (s.charAt(0) == i) { qpToRemove.add(s); }
 255    }
 256    }
 257    // remove all accumulated keywords and quotes
 258  2 for(String s: kwToRemove) { _state.keywords.remove(s); }
 259  576 for(String s: qpToRemove) {
 260  5 _state.quotes.remove(s);
 261  5 _state.quoteEnds.remove(_state.quotePairs.get(s));
 262  5 _state.quotePairs.remove(s);
 263    }
 264    }
 265   
 266    /** Specify one or more characters as word characters.
 267    * @param c the character(s)
 268    */
 269  2 public void wordChars(int... c) {
 270  2 ArrayList<String> kwToRemove = new ArrayList<String>();
 271  2 ArrayList<String> qpToRemove = new ArrayList<String>();
 272  2 for(int i: c) {
 273    // now remove all whitespace in that range
 274  2 if (_state.whitespace.contains(i)) {
 275  1 _state.whitespace.remove(i);
 276    }
 277   
 278    // now accumulate all keywords that begin with that character
 279  2 Iterator<String> kit = _state.keywords.iterator();
 280  2 while(kit.hasNext()) {
 281  4 String s = kit.next();
 282  3 if (s.charAt(0) == i) { kwToRemove.add(s); }
 283    }
 284   
 285    // now accumulate all quotes that begin with that character
 286  2 Iterator<String> qit = _state.quotes.iterator();
 287  2 while(qit.hasNext()) {
 288  10 String s = qit.next();
 289  1 if (s.charAt(0) == i) { qpToRemove.add(s); }
 290    }
 291    }
 292    // remove all accumulated keywords and quotes
 293  3 for(String s: kwToRemove) { _state.keywords.remove(s); }
 294  2 for(String s: qpToRemove) {
 295  1 _state.quotes.remove(s);
 296  1 _state.quoteEnds.remove(_state.quotePairs.get(s));
 297  1 _state.quotePairs.remove(s);
 298    }
 299    }
 300   
 301    /** Specify a range characters as whitespace.
 302    * @param lo the character beginning the whitespace range, inclusive
 303    * @param hi the character ending the whitespace range, inclusive
 304    */
 305  84 public void whitespaceRange(int lo, int hi) {
 306  84 ArrayList<String> kwToRemove = new ArrayList<String>();
 307  84 ArrayList<String> qpToRemove = new ArrayList<String>();
 308  84 for(int i = lo; i <= hi; ++i) {
 309  1 if ((_escape != null) && (i == _escape)) { continue; }
 310   
 311    // set whitespace
 312  3044 _state.whitespace.add(i);
 313   
 314    // now accumulate all keywords that begin with that character
 315  3044 Iterator<String> kit = _state.keywords.iterator();
 316  3044 while(kit.hasNext()) {
 317  52 String s = kit.next();
 318  2 if (s.charAt(0) == i) { kwToRemove.add(s); }
 319    }
 320   
 321    // now accumulate all quotes that begin with that character
 322  3044 Iterator<String> qit = _state.quotes.iterator();
 323  3044 while(qit.hasNext()) {
 324  1538 String s = qit.next();
 325  9 if (s.charAt(0) == i) { qpToRemove.add(s); }
 326    }
 327    }
 328    // remove all accumulated keywords and quotes
 329  2 for(String s: kwToRemove) { _state.keywords.remove(s); }
 330  84 for(String s: qpToRemove) {
 331  9 _state.quotes.remove(s);
 332  9 _state.quoteEnds.remove(_state.quotePairs.get(s));
 333  9 _state.quotePairs.remove(s);
 334    }
 335    }
 336   
 337    /** Specify one or more characters as whitespace.
 338    * @param c the character(s)
 339    */
 340  3 public void whitespace(int... c) {
 341  3 ArrayList<String> kwToRemove = new ArrayList<String>();
 342  3 ArrayList<String> qpToRemove = new ArrayList<String>();
 343  3 for(int i: c) {
 344  1 if ((_escape != null) && (i == _escape)) { continue; }
 345   
 346    // set whitespace
 347  2 _state.whitespace.add(i);
 348   
 349    // now accumulate all keywords that begin with that character
 350  2 Iterator<String> kit = _state.keywords.iterator();
 351  2 while(kit.hasNext()) {
 352  1 String s = kit.next();
 353  0 if (s.charAt(0) == i) { kwToRemove.add(s); }
 354    }
 355   
 356    // now accumulate all quotes that begin with that character
 357  2 Iterator<String> qit = _state.quotes.iterator();
 358  2 while(qit.hasNext()) {
 359  11 String s = qit.next();
 360  2 if (s.charAt(0) == i) { qpToRemove.add(s); }
 361    }
 362    }
 363    // remove all accumulated keywords and quotes
 364  0 for(String s: kwToRemove) { _state.keywords.remove(s); }
 365  3 for(String s: qpToRemove) {
 366  2 _state.quotes.remove(s);
 367  2 _state.quoteEnds.remove(_state.quotePairs.get(s));
 368  2 _state.quotePairs.remove(s);
 369    }
 370    }
 371   
 372    /** Specify a pair of quotes.
 373    * @param begin the beginning quotation mark
 374    * @param end the ending quotation mark
 375    */
 376  1117 public void addQuotes(String begin, String end) {
 377  1117 begin = escape(begin);
 378  1117 end = escape(end);
 379   
 380    // check if the first character of the beginning quotation mark is considered whitespace
 381  1117 Iterator<Integer> wit = _state.whitespace.iterator();
 382  1117 while(wit.hasNext()) {
 383  6499 int c = wit.next();
 384  6499 if (begin.charAt(0) == c) {
 385  1 throw new QuoteStartsWithWhitespaceException("Cannot add quote pair '" +
 386    begin + "'-'" + end + "' because the first character of the beginning has " +
 387    "already been marked as whitespace");
 388    }
 389    }
 390    // check that there is not already a quote pair that begins with this end string
 391  1116 Iterator<String> qit = _state.quotes.iterator();
 392  1116 while(qit.hasNext()) {
 393  794 String s = qit.next();
 394  794 if (s.equals(end)) {
 395  1 throw new QuoteStartsWithWhitespaceException("Cannot add quote pair '" + begin + "'-'" + end+
 396    "' because the end is already used as beginning of another quote pair");
 397    }
 398    }
 399   
 400    // add or replace pair of quotation marks
 401  1115 String b = null;
 402  1115 qit = _state.quotes.iterator();
 403  1115 while(qit.hasNext()) {
 404  790 b = qit.next();
 405  1 if (b.equals(begin)) { break; }
 406    }
 407  1115 if ((b != null) && (qit.hasNext())) {
 408  1 _state.quotes.remove(b);
 409  1 _state.quoteEnds.remove(_state.quotePairs.get(b));
 410  1 _state.quotePairs.remove(b);
 411    }
 412  1115 _state.quotes.add(begin);
 413  1115 _state.quoteEnds.add(end);
 414  1115 _state.quotePairs.put(begin,end);
 415   
 416    // now accumulate all keywords that begin with that character
 417  1115 ArrayList<String> kwToRemove = new ArrayList<String>();
 418  1115 Iterator<String> kit = _state.keywords.iterator();
 419  1115 while(kit.hasNext()) {
 420  21 String s = kit.next();
 421  3 if (s.startsWith(begin)) { kwToRemove.add(s); }
 422    }
 423    // remove all accumulated keywords
 424  3 for(String s: kwToRemove) { _state.keywords.remove(s); }
 425    }
 426   
 427    /** Specify a new keyword.
 428    * @param kw the new keyword
 429    */
 430  740 public void addKeyword(String kw) {
 431  740 kw = escape(kw);
 432   
 433    // check if the first character of the beginning quotation mark is considered whitespace
 434  740 Iterator<Integer> wit = _state.whitespace.iterator();
 435  740 while(wit.hasNext()) {
 436  2504 int c = wit.next();
 437  2504 if (kw.charAt(0) == c) {
 438  1 throw new KeywordStartsWithWhitespaceException("Cannot add keyword '" +
 439    kw + "' because the first character of the beginning has " +
 440    "already been marked as whitespace");
 441    }
 442    }
 443   
 444    // check if the keyword is considered a beginning quotation mark
 445  739 Iterator<String> qit = _state.quotes.iterator();
 446  739 while(qit.hasNext()) {
 447  2836 String s = qit.next();
 448  2836 if (s.startsWith(kw)) {
 449  1 throw new KeywordStartsWithQuoteException("Cannot add keyword '" +
 450    kw + "' because it has the same beginning as the quote pair '" +
 451    s + "'-'" + _state.quotePairs.get(s) + "'");
 452    }
 453    }
 454   
 455    // add keyword
 456  738 _state.keywords.add(kw);
 457    }
 458   
 459    /** Return the next token, or null if the end of the stream has been reached.
 460    * @return next token, or null if end of stream has been reached.
 461    */
 462  2680 public String getNextToken() throws IOException {
 463  2680 StringBuilder buf = new StringBuilder();
 464  2680 int c = nextToken();
 465  2680 while (c!=-1) {
 466  8375 _isEscape = ((_escape != null) && (((char)c) == _escape));
 467   
 468    // see if this is whitespace
 469  8375 if (_state.whitespace.contains(c)) {
 470  153 if (_wasEscape) {
 471    // there was a previous escape, do not count as whitespace
 472  5 buf.append(String.valueOf((char)c));
 473  5 _wasEscape = false;
 474    }
 475    else {
 476  148 if (buf.length() > 0) {
 477  118 _token = Token.NORMAL;
 478  118 return buf.toString();
 479    }
 480    }
 481  35 c = nextToken();
 482  35 continue;
 483    }
 484   
 485  8222 if (!_wasEscape) {
 486    // see if it can be a quote
 487  8158 String temp;
 488  8158 temp = findMatch(c, _state.quotes, new Lambda<String,String>() {
 489  7707 public String value(String in) {
 490    // we didn't find a match
 491    // push the tokens back, all except for the last one
 492  7707 for(int i=in.length()-1; i > 0; --i) {
 493  6 pushToken(in.charAt(i));
 494    }
 495  7707 return null;
 496    }
 497    });
 498  8158 if (temp != null) {
 499    // we found the beginning of a quote
 500  451 if (buf.length() > 0) {
 501    // but we still have regular text to output
 502    // so we need to push all tokens back
 503  94 for(int i=temp.length()-1; i >= 0; --i) {
 504  146 pushToken(temp.charAt(i));
 505    }
 506  94 _token = Token.NORMAL;
 507  94 return buf.toString();
 508    }
 509  357 String begin = temp;
 510  357 Stack<String> quoteStack = new Stack<String>();
 511  357 quoteStack.add(begin);
 512  357 StringBuilder quoteBuf = new StringBuilder(unescape(begin));
 513   
 514    // push the state of the tokenizer and set up a new state:
 515    // - no whitespace, i.e. whitespace is not discarded
 516    // - scan for both ending and beginning quotes, but as keywords
 517    // - no quotes at all
 518  357 pushState();
 519  357 _state = new State();
 520  357 _state.whitespace.clear();
 521  357 _state.keywords.clear();
 522  357 _state.keywords.addAll(_stateStack.peek().quotes);
 523  357 _state.keywords.addAll(_stateStack.peek().quoteEnds);
 524  357 _state.quotes.clear();
 525  357 _state.quoteEnds.clear();
 526  357 _state.quotePairs.clear();
 527   
 528  357 while(quoteStack.size() > 0) {
 529  1053 String s = getNextToken();
 530  0 if (s == null) { break; }
 531  1053 if (_stateStack.peek().quoteEnds.contains(s)) {
 532    // ending quote
 533  554 String top = quoteStack.peek();
 534  554 if (_stateStack.peek().quotePairs.get(top).equals(s)) {
 535    // matches top of stack
 536  460 quoteBuf.append(unescape(s));
 537  460 quoteStack.pop();
 538    }
 539    else {
 540    // closing quote does not match top of stack
 541    // it may be an opening quote though
 542  94 if (_stateStack.peek().quotes.contains(s)) {
 543    // beginning quote
 544  93 quoteBuf.append(unescape(s));
 545  93 quoteStack.add(s);
 546    }
 547    else {
 548    // neither a matching closing brace nor an opening brace
 549  1 quoteBuf.append(s);
 550  1 break;
 551    }
 552    }
 553    }
 554  499 else if (_stateStack.peek().quotes.contains(s)) {
 555    // beginning quote
 556  11 quoteBuf.append(unescape(s));
 557  11 quoteStack.add(s);
 558    }
 559    else {
 560  488 quoteBuf.append(s);
 561    }
 562    }
 563   
 564    // restore the old state
 565  357 popState();
 566  357 _token = Token.QUOTED;
 567  357 return quoteBuf.toString();
 568    }
 569    }
 570   
 571  7771 if (!_wasEscape) {
 572    // it wasn't a quote, see if it is a keyword
 573  7707 String temp = findMatch(c, _state.keywords, new Lambda<String,String>() {
 574  6306 public String value(String in) {
 575    // we didn't find a match
 576    // push the tokens back, all except for the last one
 577  6306 for(int i=in.length()-1; i > 0; --i) {
 578  10 pushToken(in.charAt(i));
 579    }
 580  6306 return null;
 581    }
 582    });
 583  7707 if (temp != null) {
 584    // we found a keyword
 585  1401 if (buf.length() > 0) {
 586    // but we still have regular text to output
 587    // so we need to push all tokens back
 588  573 for(int i=temp.length()-1; i >= 0; --i) {
 589  596 pushToken(temp.charAt(i));
 590    }
 591  573 _token = Token.NORMAL;
 592  573 return buf.toString();
 593    }
 594  828 _token = Token.KEYWORD;
 595  828 return unescape(temp);
 596    }
 597    }
 598   
 599    // it must be a regular word
 600    // append character to buffer
 601  6370 if (_isEscape) {
 602  97 if (_wasEscape) {
 603  23 buf.append(String.valueOf(_escape));
 604  23 _isEscape = _wasEscape = false;
 605    }
 606    else {
 607    // there was an escape
 608    // see if whitespace or escape is coming up
 609    // System.err.println("There was an escape");
 610  74 int cnext = nextToken();
 611  74 if ((cnext!=(int)_escape) && (!_state.whitespace.contains(cnext))) {
 612    // System.err.println("But it's not an escape or whitespace");
 613    // see if a quote might be coming up
 614  46 String temp = findMatch(cnext, _state.quotes, new Lambda<String,String>() {
 615  25 public String value(String in) {
 616    // push the tokens back
 617  25 for(int i=in.length()-1; i > 0; --i) {
 618  0 pushToken(in.charAt(i));
 619    }
 620  25 return null;
 621    }
 622    });
 623  46 if (temp != null) {
 624    // push the tokens back
 625  21 for(int i=temp.length()-1; i > 0; --i) {
 626  5 pushToken(temp.charAt(i));
 627    }
 628    // System.err.println("It looks like a quote");
 629    }
 630    else {
 631    // System.err.println("But it's not a quote");
 632    // it wasn't a quote, see if it could be a keyword
 633  25 temp = findMatch(cnext, _state.keywords, new Lambda<String,String>() {
 634  5 public String value(String in) {
 635    // push the tokens back
 636  5 for(int i=in.length()-1; i > 0; --i) {
 637  0 pushToken(in.charAt(i));
 638    }
 639  5 return null;
 640    }
 641    });
 642  25 if (temp != null) {
 643    // push the tokens back
 644  20 for(int i=temp.length()-1; i > 0; --i) {
 645  2 pushToken(temp.charAt(i));
 646    }
 647    // System.err.println("It looks like a keyword");
 648    }
 649    else {
 650    // System.err.println("But it's not a keyword ==> lone escape");
 651    // neither a quote nor a keyword coming up
 652    // lone escape
 653  5 buf.append(String.valueOf(_escape));
 654  5 _isEscape = _wasEscape = false;
 655    }
 656    }
 657    }
 658  74 pushToken(cnext);
 659    }
 660    }
 661    else {
 662  6273 buf.append(String.valueOf((char)c));
 663    }
 664  6370 _wasEscape = _isEscape;
 665  6370 c = nextToken();
 666    }
 667  710 if (_wasEscape) {
 668    // last thing we saw was a lone escape
 669    // generously append it
 670  0 buf.append(String.valueOf(_escape));
 671    }
 672    // end of stream, return remaining buffer as last token
 673  710 if (buf.length() > 0) {
 674  171 _token = Token.NORMAL;
 675  171 return buf.toString();
 676    }
 677    // or return null to represent the end of the stream
 678  539 _token = Token.END;
 679  539 return null;
 680    }
 681   
 682    /** Return the subset of the set whose entries begin with the prefix.
 683    * @param set parent set
 684    * @param prefix prefix string
 685    * @return subset of only those entries that begin with the prefix
 686    */
 687  15990 public static TreeSet<String> prefixSet(Set<String> set, String prefix) {
 688  15990 TreeSet<String> out = new TreeSet<String>();
 689  15990 Iterator<String> it = set.iterator();
 690  15990 while(it.hasNext()) {
 691  33262 String s = it.next();
 692  2192 if (s.startsWith(prefix)) { out.add(s); }
 693    }
 694  15990 return out;
 695    }
 696   
 697  15936 protected String findMatch(int c, TreeSet<String> choices, Lambda<String,String> notFoundLambda) throws IOException {
 698  15936 StringBuilder buf = new StringBuilder(String.valueOf((char)c));
 699  15936 SortedSet<String> prefixSet = prefixSet(choices,buf.toString());
 700  15936 while(prefixSet.size()>1) { // while there is no definite answer, keep reading tokens
 701  58 c = nextToken();
 702  58 if (c!=-1) {
 703    // add character to the string, and narrow prefix set
 704  54 buf.append(String.valueOf((char)c));
 705  54 prefixSet = prefixSet(choices,buf.toString());
 706    }
 707    else {
 708    // end of stream reached without finding a match
 709  4 break;
 710    }
 711    }
 712  15936 if ((c!=-1) &&
 713    (prefixSet.size() == 1) &&
 714    (choices.contains(prefixSet.first()))) {
 715    // there is only one match
 716  1904 String match = prefixSet.first();
 717    // read tokens to make sure it actually is it
 718  1904 while((c!=-1) && (buf.length()<match.length())) {
 719  370 c = nextToken();
 720  370 if (c!=-1) {
 721    // add character to the string, and narrow prefix set
 722  370 buf.append(String.valueOf((char)c));
 723    }
 724    else {
 725    // end of stream reached without finding a match
 726  0 break;
 727    }
 728    }
 729  1893 if (buf.toString().equals(match)) { return buf.toString(); }
 730    }
 731  14043 return notFoundLambda.value(buf.toString());
 732    }
 733   
 734   
 735  2974 protected String escape(String s) {
 736  1149 if (_escape == null) { return s; }
 737  1825 StringBuilder sb = new StringBuilder();
 738  1825 for(int i = 0; i < s.length(); ++i) {
 739  1825 if (i == 0) { sb.append(s.charAt(0)); }
 740    else {
 741  2 if (s.charAt(i) == _escape) { sb.append(_escape); }
 742  496 sb.append(s.charAt(i));
 743    }
 744    }
 745  1825 return sb.toString();
 746    }
 747   
 748  1749 protected String unescape(String s) {
 749  564 if (_escape == null) { return s; }
 750  1185 StringBuilder sb = new StringBuilder();
 751  1185 for(int i = 0; i < s.length(); ++i) {
 752  1185 if (i == 0) { sb.append(s.charAt(0)); }
 753    else {
 754  278 if (s.charAt(i) == _escape) {
 755  1 if ((i+1<s.length()) && (s.charAt(i+1) == _escape)) { ++i; }
 756    }
 757  278 sb.append(s.charAt(i));
 758    }
 759    }
 760  1185 return sb.toString();
 761    }
 762   
 763    /** Setup exception. */
 764    public static class SetupException extends RuntimeException {
 765  4 public SetupException(String s) { super(s); }
 766    }
 767   
 768    /** Quote or keyword starts with whitespace exception. */
 769    public static class StartsWithWhitespaceException extends SetupException {
 770  3 public StartsWithWhitespaceException(String s) { super(s); }
 771    }
 772   
 773    /** Quote starts with whitespace exception. */
 774    public static class QuoteStartsWithWhitespaceException extends StartsWithWhitespaceException {
 775  2 public QuoteStartsWithWhitespaceException(String s) { super(s); }
 776    }
 777   
 778    /** Keyword starts with whitespace exception. */
 779    public static class KeywordStartsWithWhitespaceException extends StartsWithWhitespaceException {
 780  1 public KeywordStartsWithWhitespaceException(String s) { super(s); }
 781    }
 782   
 783    /** Keyword starts with quote exception. */
 784    public static class KeywordStartsWithQuoteException extends SetupException {
 785  1 public KeywordStartsWithQuoteException(String s) { super(s); }
 786    }
 787    }