Clover coverage report - PLT Utilities Test Coverage (plt-20120304-r5436)
Coverage timestamp: Sat Mar 3 2012 22:01:56 CST
file stats: LOC: 1,316   Methods: 84
NCLOC: 918   Classes: 8
 
 Source file Conditionals Statements Methods TOTAL
TextUtil.java 35.8% 17.8% 35.7% 21.9%
coverage coverage
 1    /*BEGIN_COPYRIGHT_BLOCK*
 2   
 3    PLT Utilities BSD License
 4   
 5    Copyright (c) 2007-2010 JavaPLT group at Rice University
 6    All rights reserved.
 7   
 8    Developed by: Java Programming Languages Team
 9    Rice University
 10    http://www.cs.rice.edu/~javaplt/
 11   
 12    Redistribution and use in source and binary forms, with or without modification, are permitted
 13    provided that the following conditions are met:
 14   
 15    - Redistributions of source code must retain the above copyright notice, this list of conditions
 16    and the following disclaimer.
 17    - Redistributions in binary form must reproduce the above copyright notice, this list of
 18    conditions and the following disclaimer in the documentation and/or other materials provided
 19    with the distribution.
 20    - Neither the name of the JavaPLT group, Rice University, nor the names of the library's
 21    contributors may be used to endorse or promote products derived from this software without
 22    specific prior written permission.
 23   
 24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
 25    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
 26    FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND
 27    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 28    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 29    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
 30    IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 31    OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 32   
 33    *END_COPYRIGHT_BLOCK*/
 34   
 35    package edu.rice.cs.plt.text;
 36   
 37    import java.io.Serializable;
 38    import java.io.StringReader;
 39    import java.io.BufferedReader;
 40    import java.io.IOException;
 41    import java.util.ArrayList;
 42    import java.util.Arrays;
 43    import java.util.Collections;
 44    import java.util.LinkedList;
 45    import java.util.List;
 46    import java.util.Map;
 47    import java.util.regex.Matcher;
 48    import java.util.regex.Pattern;
 49   
 50    import edu.rice.cs.plt.iter.IterUtil;
 51    import edu.rice.cs.plt.iter.SizedIterable;
 52    import edu.rice.cs.plt.recur.RecurUtil;
 53    import edu.rice.cs.plt.tuple.Pair;
 54    import edu.rice.cs.plt.collect.OneToOneRelation;
 55    import edu.rice.cs.plt.collect.IndexedOneToOneRelation;
 56    import edu.rice.cs.plt.lambda.Lambda;
 57    import edu.rice.cs.plt.lambda.Thunk;
 58    import edu.rice.cs.plt.lambda.LazyThunk;
 59   
 60    public final class TextUtil {
 61   
 62    /** The system-dependent "line.separator" property. */
 63    public static final String NEWLINE = System.getProperty("line.separator", "\n");
 64   
 65    /** A regex matching any line break: {@code \r\n}, {@code \n}, or {@code \r}. */
 66    public static final String NEWLINE_PATTERN = "\\r\\n|\\n|\\r";
 67   
 68    /** Prevents instance creation */
 69  0 private TextUtil() {}
 70   
 71    /**
 72    * Convert the given object to a string. This method invokes {@link RecurUtil#safeToString(Object)}
 73    * to provide simple, safe handling of {@code null} values, arrays, and self-referential data structures
 74    * (with cooperation from the {@code toString()} method of the relevant class).
 75    */
 76  172 public static String toString(Object o) {
 77  172 return RecurUtil.safeToString(o);
 78    }
 79   
 80    /**
 81    * Break a string into a list of lines. {@code "\n"}, {@code "\r"}, and {@code "\r\n"}
 82    * are considered line delimiters. The empty string is taken to contain 0 lines. An optional final
 83    * trailing newline will be ignored.
 84    */
 85  6 public static SizedIterable<String> getLines(String s) {
 86  6 SizedIterable<String> result = IterUtil.<String>empty();
 87  6 BufferedReader r = new BufferedReader(new StringReader(s));
 88  6 try {
 89  6 String line = r.readLine();
 90  6 while (line != null) {
 91  14 result = IterUtil.compose(result, line);
 92  14 line = r.readLine();
 93    }
 94    }
 95    catch (IOException e) {
 96    // Should not happen with a StringReader, but if it does, just ignore it
 97    }
 98    finally {
 99  6 try { r.close(); }
 100    catch (IOException e) { /* ignore */ }
 101    }
 102  6 return result;
 103    }
 104   
 105    /** Produce a string by concatenating {@code copies} instances of {@code s} */
 106  2 public static String repeat(String s, int copies) {
 107  2 StringBuilder result = new StringBuilder();
 108  3 for (int i = 0; i < copies; i++) { result.append(s); }
 109  2 return result.toString();
 110    }
 111   
 112    /** Produce a string by concatenating {@code copies} instances of {@code c} */
 113  2 public static String repeat(char c, int copies) {
 114  2 char[] result = new char[copies];
 115  2 Arrays.fill(result, c);
 116  2 return String.valueOf(result);
 117    }
 118   
 119    /** Create a string of (at least) the given length by filling in copies of {@code c} to the left of {@code s}. */
 120  0 public static String padLeft(String s, char c, int length) {
 121  0 StringBuilder result = new StringBuilder();
 122  0 int delta = length - s.length();
 123  0 for (int i = 0; i < delta; i++) { result.append(c); }
 124  0 result.append(s);
 125  0 return result.toString();
 126    }
 127   
 128    /** Create a string of (at least) the given length by filling in copies of {@code c} to the right of {@code s}. */
 129  0 public static String padRight(String s, char c, int length) {
 130  0 StringBuilder result = new StringBuilder();
 131  0 result.append(s);
 132  0 int delta = length - s.length();
 133  0 for (int i = 0; i < delta; i++) { result.append(c); }
 134  0 return result.toString();
 135    }
 136   
 137    // Here are subsequently, a javadoc bug requires referring to java.lang.String with a fully-qualified name
 138   
 139    /**
 140    * Determine if the given character occurs in {@code s}. Defined in terms of
 141    * {@link java.lang.String#indexOf(int)}.
 142    */
 143  3 public static boolean contains(String s, int character) { return s.indexOf(character) >= 0; }
 144   
 145    /**
 146    * Determine if the given string occurs in {@code s}. Defined in terms of {@link java.lang.String#indexOf(String)}.
 147    * This is also defined as {@link String#contains}, but is defined here for legacy support.
 148    */
 149  7 public static boolean contains(String s, String piece) { return s.indexOf(piece) >= 0; }
 150   
 151    /**
 152    * Determine if <em>any</em> of the given characters occurs in {@code s}. Defined in terms of
 153    * {@link java.lang.String#indexOf(int)}.
 154    */
 155  0 public static boolean containsAny(String s, int... characters) {
 156  0 for (int c: characters) { if (contains(s, c)) { return true; } }
 157  0 return false;
 158    }
 159   
 160    /**
 161    * Determine if <em>any</em> of the given strings occurs in {@code s}. Defined in terms of
 162    * {@link java.lang.String#indexOf(String)}.
 163    */
 164  0 public static boolean containsAny(String s, String... pieces) {
 165  0 for (String piece: pieces) { if (contains(s, piece)) { return true; } }
 166  0 return false;
 167    }
 168   
 169    /**
 170    * Determine if <em>all</em> of the given characters occur in {@code s}. Defined in terms of
 171    * {@link java.lang.String#indexOf(int)}.
 172    */
 173  0 public static boolean containsAll(String s, int... characters) {
 174  0 for (int c: characters) { if (!contains(s, c)) { return false; } }
 175  0 return true;
 176    }
 177   
 178    /**
 179    * Determine if <em>all</em> of the given strings occur in {@code s}. Defined in terms of
 180    * {@link java.lang.String#indexOf(String)}.
 181    */
 182  0 public static boolean containsAll(String s, String... pieces) {
 183  0 for (String piece: pieces) { if (!contains(s, piece)) { return false; } }
 184  0 return true;
 185    }
 186   
 187    /**
 188    * Determine if the given string occurs in {@code s}, ignoring differences in case. Unlike
 189    * {@link java.lang.String#equalsIgnoreCase}, this test only compares the lower-case conversion of
 190    * {@code s} to the lower-case conversion of {@code piece}.
 191    */
 192  13 public static boolean containsIgnoreCase(String s, String piece) {
 193  13 return s.toLowerCase().indexOf(piece.toLowerCase()) >= 0;
 194    }
 195   
 196    /**
 197    * Determine if <em>any</em> of the given strings occurs in {@code s}, ignoring differences in case. Defined in
 198    * terms of {@link #containsIgnoreCase}.
 199    */
 200  0 public static boolean containsAnyIgnoreCase(String s, String... pieces) {
 201  0 for (String piece: pieces) { if (contains(s, piece)) { return true; } }
 202  0 return false;
 203    }
 204   
 205    /**
 206    * Determine if <em>all</em> of the given strings occur in {@code s}, ignoring differences in case. Defined in
 207    * terms of {@link #containsIgnoreCase}.
 208    */
 209  0 public static boolean containsAllIgnoreCase(String s, String... pieces) {
 210  0 for (String piece: pieces) { if (!contains(s, piece)) { return false; } }
 211  0 return true;
 212    }
 213   
 214    /**
 215    * Determine if any of the given strings is a prefix of {@code s}. Defined in terms of
 216    * {@link java.lang.String#startsWith}.
 217    */
 218  0 public static boolean startsWithAny(String s, String... prefixes) {
 219  0 for (String prefix : prefixes) { if (s.startsWith(prefix)) { return true; } }
 220  0 return false;
 221    }
 222   
 223    /**
 224    * Determine if any of the given strings is a suffix of {@code s}. Defined in terms of
 225    * {@link java.lang.String#endsWith}.
 226    */
 227  0 public static boolean endsWithAny(String s, String... suffixes) {
 228  0 for (String suffix : suffixes) { if (s.endsWith(suffix)) { return true; } }
 229  0 return false;
 230    }
 231   
 232    /**
 233    * Find the first occurrence of any of the given characters in {@code s}. If none are present, the result is
 234    * {@code -1}. Defined in terms of {@link java.lang.String#indexOf(int)}.
 235    */
 236  0 public static int indexOfFirst(String s, int... characters) {
 237  0 int result = -1;
 238  0 for (int c : characters) {
 239  0 int index = s.indexOf(c);
 240  0 if (index >= 0 && (result < 0 || index < result)) { result = index; }
 241    }
 242  0 return result;
 243    }
 244   
 245    /**
 246    * Find the first occurrence of any of the given strings in {@code s}. If none are present, the result is
 247    * {@code -1}. Defined in terms of {@link java.lang.String#indexOf(String)}.
 248    */
 249  0 public static int indexOfFirst(String s, String... pieces) {
 250  0 int result = -1;
 251  0 for (String piece : pieces) {
 252  0 int index = s.indexOf(piece);
 253  0 if (index >= 0 && (result < 0 || index < result)) { result = index; }
 254    }
 255  0 return result;
 256    }
 257   
 258    /**
 259    * Extract the portion of {@code s} before the first occurrence of the given delimiter. {@code s} if the
 260    * delimiter is not found.
 261    */
 262  5 public static String prefix(String s, int delim) {
 263  5 int index = s.indexOf(delim);
 264  5 return (index == -1) ? s : s.substring(0, index);
 265    }
 266   
 267    /**
 268    * Extract the portion of {@code s} after the first occurrence of the given delimiter. {@code s} if the
 269    * delimiter is not found.
 270    */
 271  5 public static String removePrefix(String s, int delim) {
 272  5 int index = s.indexOf(delim);
 273  5 return (index == -1) ? s : s.substring(index+1);
 274    }
 275   
 276    /**
 277    * Extract the portion of {@code s} after the last occurrence of the given delimiter. {@code s} if the
 278    * delimiter is not found.
 279    */
 280  5 public static String suffix(String s, int delim) {
 281  5 int index = s.lastIndexOf(delim);
 282  5 return (index == -1) ? s : s.substring(index+1);
 283    }
 284   
 285    /**
 286    * Extract the portion of {@code s} before the last occurrence of the given delimiter. {@code s} if the
 287    * delimiter is not found.
 288    */
 289  5 public static String removeSuffix(String s, int delim) {
 290  5 int index = s.lastIndexOf(delim);
 291  5 return (index == -1) ? s : s.substring(0, index);
 292    }
 293   
 294    /**
 295    * An extended version of {@link String#split} that recognizes nested parentheses and only splits
 296    * where the delimiter occurs at the top level. This convenience method sets {@code limit} to {@code 0}
 297    * (unlimited number of matches) and {@code brackets} to {@link Bracket#PARENTHESES}. See
 298    * {@link #split(String, String, int, Bracket[])} for a full specification.
 299    */
 300  2 public static SplitString splitWithParens(String s, String delimRegex) {
 301  2 return new StringSplitter(s, delimRegex, 0, Bracket.PARENTHESES).split();
 302    }
 303   
 304    /**
 305    * An extended version of {@link String#split} that recognizes nested parentheses and only splits
 306    * where the delimiter occurs at the top level. This convenience method sets {@code brackets} to
 307    * {@link Bracket#PARENTHESES}. See {@link #split(String, String, int, Bracket[])} for a full
 308    * specification.
 309    */
 310  10 public static SplitString splitWithParens(String s, String delimRegex, int limit) {
 311  10 return new StringSplitter(s, delimRegex, limit, Bracket.PARENTHESES).split();
 312    }
 313   
 314    /**
 315    * An extended version of {@link String#split} that recognizes nested matched brackets and only splits
 316    * where the delimiter occurs at the top level. This convenience method sets {@code limit} to {@code 0}
 317    * (unlimited number of matches). See {@link #split(String, String, int, Bracket[])} for a full
 318    * specification.
 319    */
 320  65 public static SplitString split(String s, String delimRegex, Bracket... brackets) {
 321  65 return new StringSplitter(s, delimRegex, 0, brackets).split();
 322    }
 323   
 324    /**
 325    * An extended version of {@link String#split} that recognizes nested matched brackets and only splits
 326    * where the delimiter occurs at the top level. For convenience when the delimiter is a nontrivial
 327    * regular expression, the result includes both the split strings and the matched delimiters. Ignoring
 328    * these extensions, the behavior is roughly equivalent: {@code s.split(delimRegex, limit)} is equivalent
 329    * to {@code TextUtil.split(s, delimRegex, limit).array()}, with the exception that trailing empty strings
 330    * (separated by delimiters) are never discarded here.
 331    * @param s A string to split
 332    * @param delimRegex A regular expression recognizing delimiters
 333    * @param limit The number of non-delimiter pieces to produce. Consistent with {@code String.split()},
 334    * {@code limit-1} is the number of delimiters to search for. If {@code 0} or negative, the
 335    * search continues until the string is exhausted. Unlike {@code String.split()}, trailing
 336    * empty strings (separated by delimiters) are never discarded, even when {@code limit == 0}.
 337    * @param brackets Bracket pairs that should be recognized. A delimiter match that occurs within one of
 338    * these bracket pairs (at any nonzero nesting depth) is not considered a delimiter.
 339    * A left bracket increases the nesting level only if it is at the top level or follows
 340    * another left bracket that supports nesting; a right bracket reduces the nesting level
 341    * only if it matches the most recent left bracket. If {@code delimRegex} recognizes part
 342    * of a valid bracket (e.g., {@code "*"} is the delimiter and {@code "/*"} is a bracket),
 343    * how relevant text is handled is unspecified (it would be nice, but difficult, to fix this).
 344    * If multiple brackets overlap, an expected right bracket will match before a left bracket,
 345    * and the first left bracket listed in {@code brackets} has priority over later left
 346    * brackets.
 347    */
 348  37 public static SplitString split(String s, String delimRegex, int limit, Bracket... brackets) {
 349  37 return new StringSplitter(s, delimRegex, limit, brackets).split();
 350    }
 351   
 352    /**
 353    * The result of a {@code split()} invocation. The original string can be formed by concatenating
 354    * {@code splits()}, {@code delims()} (interleaved), and {@code rest()}.
 355    */
 356    public static class SplitString implements Serializable {
 357    private final List<String> _splits;
 358    private final List<String> _delims;
 359    private final String _rest;
 360   
 361  114 private SplitString(List<String> splits, List<String> delims, String rest) {
 362  114 _splits = Collections.unmodifiableList(splits);
 363  114 _delims = Collections.unmodifiableList(delims);
 364  114 _rest = rest;
 365    }
 366   
 367    /**
 368    * The sequence of strings that were followed by a recognized delimiter.
 369    * {@code splits().size() == delims().size()}.
 370    */
 371  68 public List<String> splits() { return _splits; }
 372    /**
 373    * The delimiters that followed the corresponding members of {@code splits()}.
 374    * {@code splits().size() == delims().size()}.
 375    */
 376  28 public List<String> delimiters() { return _delims; }
 377    /**
 378    * The tail portion of the input string. Either this string contains no delimiters, or it was
 379    * left unsearched.
 380    */
 381  28 public String rest() { return _rest; }
 382   
 383    /**
 384    * Fill an array with the non-delimiter portions of the original string. The array has the same
 385    * form as the result of {@code String#split}. It always has length {@code >= 1} and
 386    * {@code <= limit}, if {@code limit} (a parameter of the {@code split} method) was positive.
 387    */
 388  60 public String[] array() {
 389  60 String[] result = new String[_splits.size() + 1];
 390  60 _splits.toArray(result);
 391  60 result[_splits.size()] = _rest;
 392  60 return result;
 393    }
 394   
 395  0 public String toString() {
 396  0 StringBuilder result = new StringBuilder();
 397  0 result.append("SplitString: ");
 398  0 for (Pair<String, String> pair : IterUtil.zip(_splits, _delims)) {
 399  0 result.append("(").append(pair.first()).append(") ");
 400  0 result.append("[").append(pair.second()).append("] ");
 401    }
 402  0 result.append("+ (").append(_rest).append(")");
 403  0 return result.toString();
 404    }
 405    }
 406   
 407    /** Implementation of the split algorithm. */
 408    private static class StringSplitter {
 409    private final List<String> _splits;
 410    private final List<String> _delims;
 411    private final String _s;
 412   
 413    private final Matcher _delim;
 414    private final Bracket[] _brackets;
 415    private final Matcher[] _lefts;
 416    private final Matcher[] _rights;
 417    private final LinkedList<Integer> _stack; // grows left -- use addFirst and removeFirst
 418   
 419    private int _remaining;
 420   
 421  114 public StringSplitter(String s, String delimRegex, int limit, Bracket... brackets) {
 422  114 if (limit > 0 && limit < 10) { // 10 is the specified default capacity
 423  44 _splits = new ArrayList<String>(limit);
 424  44 _delims = new ArrayList<String>(limit);
 425    }
 426    else {
 427  70 _splits = new ArrayList<String>();
 428  70 _delims = new ArrayList<String>();
 429    }
 430  114 _s = s;
 431  114 _delim = Pattern.compile(delimRegex).matcher(_s);
 432  114 _brackets = brackets;
 433  114 _lefts = new Matcher[_brackets.length];
 434  114 _rights = new Matcher[_brackets.length];
 435  114 for (int i = 0; i < _brackets.length; i++) {
 436  198 _lefts[i] = _brackets[i].left().matcher(_s);
 437  198 _rights[i] = _brackets[i].right().matcher(_s);
 438    }
 439  114 _stack = new LinkedList<Integer>();
 440  114 _remaining = limit;
 441    }
 442   
 443  114 public SplitString split() {
 444  114 int rest = 0; // text not yet added to _splits or _delims
 445  114 int cursor = 0; // current start location for search; >= rest
 446  114 while (_remaining != 1) {
 447  167 if (_delim.find()) {
 448  66 int dStart = _delim.start();
 449  66 int dEnd = _delim.end();
 450  66 processStack(cursor, dStart, false);
 451  66 if (_stack.isEmpty()) {
 452  54 _splits.add(_s.substring(rest, dStart));
 453  54 _delims.add(_s.substring(dStart, dEnd));
 454  19 if (_remaining > 1) { _remaining--; }
 455  54 rest = dEnd;
 456  54 cursor = dEnd;
 457    }
 458    else {
 459  12 cursor = processStack(dStart, _s.length(), true);
 460  12 _delim.region(cursor, _s.length()); // skip delimiter search ahead past right brackets
 461    }
 462    }
 463  101 else { _remaining = 1; /* end search */ }
 464    }
 465  114 return new SplitString(_splits, _delims, _s.substring(rest));
 466    }
 467   
 468    /**
 469    * Push and pop brackets on the stack until {@code rangeEnd} is reached or, if
 470    * {@code stopWhenEmpty}, the stack is empty.
 471    */
 472  78 private int processStack(int rangeStart, int rangeEnd, boolean stopWhenEmpty) {
 473    // Match doesn't have a state query method, so we have to keep track here
 474    // null -> haven't tried; true -> successful match; false -> no match
 475  78 Boolean[] leftMatches = new Boolean[_lefts.length];
 476  78 Boolean[] rightMatches = new Boolean[_rights.length];
 477  78 int cursor = rangeStart;
 478  78 boolean searchLefts = _stack.isEmpty() || _brackets[_stack.getFirst()].nests();
 479   
 480  78 while (cursor < rangeEnd && !(stopWhenEmpty && _stack.isEmpty())) {
 481    // possible next brackets are any rights in the stack and, if searchLefts, all lefts
 482  98 int first = rangeEnd;
 483  98 int firstIndex = -1;
 484  98 boolean firstIsLeft = false;
 485  98 if (!_stack.isEmpty()) { // look for a right bracket
 486  40 int i = _stack.getFirst();
 487  40 Matcher m = _rights[i];
 488  40 Boolean matched = rightMatches[i];
 489  40 if (matched == null || (matched && m.start() < cursor) || (!matched && m.regionEnd() < first)) {
 490  34 matched = m.region(cursor, first).find();
 491  34 rightMatches[i] = matched;
 492    }
 493  40 if (matched && m.start() < first) {
 494  23 first = m.start();
 495  23 firstIndex = i;
 496  23 firstIsLeft = false;
 497    }
 498    }
 499  98 if (searchLefts) { // rights take priority; earlier lefts take priority
 500  88 for (int i = 0; i < _lefts.length; i++) {
 501  102 Matcher m = _lefts[i];
 502  102 Boolean matched = leftMatches[i];
 503  102 if (matched == null || (matched && m.start() < cursor) || (!matched && m.regionEnd() < first)) {
 504    // minimize search region so we don't perform needless work (but this does impact behavior
 505    // where different brackets overlap)
 506  100 matched = m.region(cursor, first).find();
 507  100 leftMatches[i] = matched;
 508    }
 509  102 if (matched && m.start() < first) {
 510  21 first = m.start();
 511  21 firstIndex = i;
 512  21 firstIsLeft = true;
 513    }
 514    }
 515    }
 516   
 517  98 if (first < rangeEnd) { // at least one bracket was found
 518  42 if (firstIsLeft) {
 519  21 _stack.addFirst(firstIndex);
 520  21 cursor = _lefts[firstIndex].end();
 521  21 searchLefts = _brackets[firstIndex].nests();
 522    }
 523    else {
 524  21 _stack.removeFirst();
 525  21 cursor = _rights[firstIndex].end();
 526  21 searchLefts = true; // either the stack is empty or the top supports nesting
 527    }
 528    }
 529  56 else { cursor = rangeEnd; }
 530   
 531    }
 532  78 return cursor;
 533    }
 534   
 535    }
 536   
 537    /** Express a byte array as a sequence of unsigned hexadecimal bytes. */
 538  6 public static String toHexString(byte[] bs) {
 539  6 return toHexString(bs, 0, bs.length);
 540    }
 541   
 542    /** Express a byte array as a sequence of unsigned hexadecimal bytes. */
 543  6 public static String toHexString(byte[] bs, int offset, int length) {
 544  6 StringBuilder result = new StringBuilder();
 545  6 for (int i = offset; i < offset+length; i++) {
 546  130 if (i > offset) { result.append(' '); }
 547  136 byte b = bs[i];
 548    // Integer.toHexString pads results in range 128-255 with "ffff...",
 549    // and using it on (b & 0xff) excludes leading 0s.
 550  136 result.append(Character.forDigit((b & 0xf0) >> 4, 16));
 551  136 result.append(Character.forDigit(b & 0xf, 16));
 552    }
 553  6 return result.toString();
 554    }
 555   
 556   
 557  0 public static boolean isDecimalDigit(char c) { return c >= '0' && c <= '9'; }
 558   
 559  0 public static boolean isOctalDigit(char c) { return c >= '0' && c <= '7'; }
 560   
 561  0 public static boolean isHexDigit(char c) {
 562  0 return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
 563    }
 564   
 565    /**
 566    * Abstract class for string translation algorithms. Implementations are responsible for modifying
 567    * {@code _result} and {@code _changed}; each character in the original string will be passed
 568    * to {@code processChar()}, followed by a single invocation of {@code finish()}.
 569    */
 570    private static abstract class StringTranslator implements Lambda<String, String> {
 571    protected final StringBuilder _result;
 572    protected boolean _changed;
 573   
 574  12 StringTranslator() { _result = new StringBuilder(); _changed = false; }
 575   
 576  12 public final String value(String s) {
 577  12 int length = s.length();
 578  12 for (int i = 0; i < length; i++) { processChar(s.charAt(i)); }
 579  12 finish();
 580  12 return _changed ? _result.toString() : s;
 581    }
 582   
 583    protected abstract void processChar(char c);
 584   
 585    protected abstract void finish();
 586    }
 587   
 588    /** Shared code for Unicode escaping and unescaping algorithms */
 589    private static abstract class UnicodeTranslator extends StringTranslator {
 590    private static enum State { START, BACKSLASH, U, DIG1, DIG2, DIG3 };
 591   
 592    private State _state;
 593    private StringBuilder _buffer; // includes everything that follows a "\\u"
 594   
 595  0 UnicodeTranslator() { _state = State.START; _buffer = new StringBuilder(); }
 596   
 597    protected abstract void handleStandardChar(char c, boolean backslashed);
 598    protected abstract void handlePartialEscape(String escape);
 599    protected abstract void handleCompleteEscape(String escape);
 600   
 601  0 private void reset(char c) {
 602  0 handlePartialEscape(_buffer.toString());
 603  0 _buffer.delete(0, _buffer.length());
 604  0 _state = State.START;
 605  0 if (c == '\\') { _state = State.BACKSLASH; }
 606  0 else { handleStandardChar(c, false); }
 607    }
 608   
 609  0 protected final void processChar(char c) {
 610  0 switch (_state) {
 611  0 case START:
 612  0 if (c == '\\') { _state = State.BACKSLASH; }
 613  0 else { handleStandardChar(c, false); }
 614  0 break;
 615  0 case BACKSLASH:
 616  0 if (c == 'u') { _state = State.U; }
 617  0 else { handleStandardChar(c, true); _state = State.START; } // intentionally includes double-backslash
 618  0 break;
 619  0 case U:
 620  0 if (isHexDigit(c)) { _buffer.append(c); _state = State.DIG1; }
 621  0 else if (c != 'u') { reset(c); }
 622  0 break;
 623  0 case DIG1:
 624  0 if (isHexDigit(c)) { _buffer.append(c); _state = State.DIG2; }
 625  0 else { reset(c); }
 626  0 break;
 627  0 case DIG2:
 628  0 if (isHexDigit(c)) { _buffer.append(c); _state = State.DIG3; }
 629  0 else { reset(c); }
 630  0 break;
 631  0 case DIG3:
 632  0 if (isHexDigit(c)) {
 633  0 _buffer.append(c);
 634  0 handleCompleteEscape(_buffer.toString());
 635  0 _buffer.delete(0, _buffer.length());
 636  0 _state = State.START;
 637    }
 638  0 else { reset(c); }
 639  0 break;
 640    }
 641    }
 642   
 643  0 protected final void finish() {
 644  0 switch (_state) {
 645  0 case START: break;
 646  0 case BACKSLASH: handleStandardChar('\\', false); break;
 647  0 default: handlePartialEscape(_buffer.toString()); break;
 648    }
 649    }
 650    }
 651   
 652    /**
 653    * Convert all non-ASCII characters in the string to Unicode escapes, as specified by JLS 3.3.
 654    * As suggested by JLS, an additional {@code u} is added to existing escapes in the string;
 655    * instances of {@code \} that precede a non-ASCII character or a malformed Unicode escape will
 656    * be encoded as {@code &#92;u005c}. The original string may be safely reconstructed with
 657    * {@link #unicodeUnescapeOnce}; to safely interpret <em>all</em> Unicode escapes, including
 658    * those in the original string, use {@link #unicodeUnescape} (in either case, this method
 659    * guarantees an absence of {@code IllegalArgumentException}s).
 660    */
 661  0 public static String unicodeEscape(String s) {
 662  0 return new UnicodeTranslator() {
 663  0 protected void handleStandardChar(char c, boolean backslashed) {
 664  0 if (c > '\u007f') {
 665  0 if (backslashed) { _result.append("\\u005c"); } // encoded '\'
 666  0 _result.append("\\u");
 667  0 _result.append(padLeft(Integer.toHexString(c), '0', 4));
 668  0 _changed = true;
 669    }
 670    else {
 671  0 if (backslashed) { _result.append('\\'); }
 672  0 _result.append(c);
 673    }
 674    }
 675  0 protected void handlePartialEscape(String escape) {
 676  0 _result.append("\\u005cu"); // encoded '\' plus 'u'
 677  0 _result.append(escape);
 678  0 _changed = true;
 679    }
 680  0 protected void handleCompleteEscape(String escape) {
 681  0 _result.append("\\uu"); // add a 'u'
 682  0 _result.append(escape);
 683  0 _changed = true;
 684    }
 685    }.value(s);
 686    }
 687   
 688    /**
 689    * Convert all one-level Unicode escapes in the string to their equivalent characters, as specified by JLS 3.3.
 690    * Higher-level escapes (containing multiple 'u' characters) will have a single 'u' removed.
 691    * @throws IllegalArgumentException If a backslash-u escape in the string is not followed by 4 hex digits
 692    */
 693  0 public static String unicodeUnescapeOnce(String s) {
 694  0 return new UnicodeTranslator() {
 695  0 protected void handleStandardChar(char c, boolean backslashed) {
 696  0 if (backslashed) { _result.append('\\'); }
 697  0 _result.append(c);
 698    }
 699  0 protected void handlePartialEscape(String escape) {
 700  0 throw new IllegalArgumentException("Expected a hexadecimal digit after '\\u" + escape + "'");
 701    }
 702  0 protected void handleCompleteEscape(String escape) {
 703  0 if (escape.charAt(0) == 'u') {
 704  0 _result.append('\\');
 705  0 _result.append(escape); // skip the initial 'u'
 706    }
 707  0 else { _result.append((char) Integer.parseInt(escape, 16)); }
 708  0 _changed = true;
 709    }
 710    }.value(s);
 711    }
 712   
 713    /**
 714    * Convert all Unicode escapes in the string into their equivalent Unicode characters, as specified
 715    * by JLS 3.3.
 716    * @throws IllegalArgumentException If a backslash-u escape in the string is not followed by 4 hex digits
 717    */
 718  0 public static String unicodeUnescape(String s) {
 719  0 return new UnicodeTranslator() {
 720  0 protected void handleStandardChar(char c, boolean backslashed) {
 721  0 if (backslashed) { _result.append('\\'); }
 722  0 _result.append(c);
 723    }
 724  0 protected void handlePartialEscape(String escape) {
 725  0 throw new IllegalArgumentException("Expected a hexadecimal digit after '\\u" + escape + "'");
 726    }
 727  0 protected void handleCompleteEscape(String escape) {
 728  0 int firstDigit = escape.lastIndexOf('u') + 1;
 729  0 _result.append((char) Integer.parseInt(escape.substring(firstDigit), 16));
 730  0 _changed = true;
 731    }
 732    }.value(s);
 733    }
 734   
 735    /**
 736    * Convert the given string to a form compatible with the Java language specification for character and string
 737    * literals (see JLS 3.10.6). The characters {@code \}, {@code "}, and {@code '} are replaced with escape
 738    * sequences. All control characters between {@code &#92;u0000} and {@code &#92;u001F}, along with
 739    * {@code &#92;u007F}, are replaced with mnemonic escape sequences (such as {@code "\n"}), or octal escape
 740    * sequences if no mnemonic exists.
 741    */
 742  0 public static String javaEscape(String s) {
 743  0 return new StringTranslator() {
 744  0 protected void processChar(char c) {
 745  0 switch (c) {
 746  0 case '\b': _result.append("\\b"); _changed = true; break;
 747  0 case '\t': _result.append("\\t"); _changed = true; break;
 748  0 case '\n': _result.append("\\n"); _changed = true; break;
 749  0 case '\f': _result.append("\\f"); _changed = true; break;
 750  0 case '\r': _result.append("\\r"); _changed = true; break;
 751  0 case '\"': _result.append("\\\""); _changed = true; break;
 752  0 case '\'': _result.append("\\\'"); _changed = true; break;
 753  0 case '\\': _result.append("\\\\"); _changed = true; break;
 754  0 default:
 755  0 if (c < ' ' || c == '\u007f') {
 756    // must use 3 digits so that unescaping doesn't consume too many chars ("\12" vs. "\0012")
 757  0 _result.append('\\');
 758  0 _result.append(padLeft(Integer.toOctalString(c), '0', 3));
 759  0 _changed = true;
 760    }
 761  0 else { _result.append(c); }
 762  0 break;
 763    }
 764    }
 765  0 protected void finish() {}
 766    }.value(s);
 767    }
 768   
 769    private static enum JState { START, BACKSLASH, DIG1, DIG2, DIG3 };
 770   
 771    /**
 772    * Convert a string potentially containing Java character escapes (as in {@link #javaEscape}) to its
 773    * unescaped equivalent. Note that Unicode escapes are <em>not</em> interpreted (strings from Java source
 774    * code should first be processed by {@link #unicodeUnescape}).
 775    * @throws IllegalArgumentException If the character {@code \} is followed by an invalid escape character
 776    * or the end of the string.
 777    */
 778  0 public static String javaUnescape(String s) {
 779  0 return new StringTranslator() {
 780   
 781    private JState _state = JState.START;
 782    private final StringBuilder _buffer = new StringBuilder(); // contains octal digits
 783   
 784  0 private void reset(char c) {
 785  0 _result.append((char) Integer.parseInt(_buffer.toString(), 8));
 786  0 _buffer.delete(0, _buffer.length());
 787  0 _state = JState.START;
 788  0 if (c == '\\') { _state = JState.BACKSLASH; _changed = true; }
 789  0 else { _result.append(c); }
 790    }
 791   
 792  0 protected void processChar(char c) {
 793  0 switch (_state) {
 794  0 case START:
 795  0 if (c == '\\') { _state = JState.BACKSLASH; _changed = true; }
 796  0 else { _result.append(c); }
 797  0 break;
 798  0 case BACKSLASH:
 799  0 switch (c) {
 800  0 case 'b': _result.append('\b'); _state = JState.START; break;
 801  0 case 't': _result.append('\t'); _state = JState.START; break;
 802  0 case 'n': _result.append('\n'); _state = JState.START; break;
 803  0 case 'f': _result.append('\f'); _state = JState.START; break;
 804  0 case 'r': _result.append('\r'); _state = JState.START; break;
 805  0 case '\"': _result.append('\"'); _state = JState.START; break;
 806  0 case '\'': _result.append('\''); _state = JState.START; break;
 807  0 case '\\': _result.append('\\'); _state = JState.START; break;
 808  0 case '0':
 809  0 case '1':
 810  0 case '2':
 811  0 case '3':
 812  0 _buffer.append(c); _state = JState.DIG1; break;
 813  0 case '4':
 814  0 case '5':
 815  0 case '6':
 816  0 case '7':
 817  0 _buffer.append(c); _state = JState.DIG2; break;
 818  0 default:
 819  0 throw new IllegalArgumentException("'" + c + "' after '\\'");
 820    }
 821  0 break;
 822  0 case DIG1:
 823  0 if (isOctalDigit(c)) { _buffer.append(c); _state = JState.DIG2; }
 824  0 else { reset(c); }
 825  0 break;
 826  0 case DIG2:
 827  0 if (isOctalDigit(c)) { _buffer.append(c); _state = JState.DIG3; }
 828  0 else { reset(c); }
 829  0 break;
 830  0 case DIG3:
 831  0 reset(c);
 832  0 break;
 833    }
 834    }
 835   
 836  0 protected void finish() {
 837  0 switch (_state) {
 838  0 case START: break;
 839  0 case BACKSLASH: throw new IllegalArgumentException("Nothing after after '\\'");
 840  0 default: _result.append((char) Integer.parseInt(_buffer.toString(), 8)); break;
 841    }
 842    }
 843    }.value(s);
 844    }
 845   
 846    /**
 847    * <p>Produce a regular expression that matches the given string. Backslash escape sequences are
 848    * used for all characters that potentially clash with regular expression syntax. For simplicity,
 849    * escapes are applied to all control characters ({@code &#92;u0000} to {@code &#92;u001F} and
 850    * {@code &#92;u007F}) and to all non-alphanumeric, non-space ASCII characters (in the range
 851    * {@code &#92;u0020} to {@code &#92;u007E}), including those that have no special meaning in
 852    * the regular expression syntax (such as {@code @}, {@code "}, and {@code ~}). Where a
 853    * mnemonic escape for control characters exists, it is used; otherwise, the hexadecimal {@code \xhh}
 854    * notation is used.</p>
 855    *
 856    * <p>Note: a similar method is available in Java 5: {@link Pattern#quote}. It has the same basic
 857    * contract &mdash; produce a regex to match the given string &mdash; but produces different (equivalent)
 858    * results.</p>
 859    */
 860  12 public static String regexEscape(String s) {
 861  12 return new StringTranslator() {
 862  12 protected void processChar(char c) {
 863  12 switch (c) {
 864  0 case '\t': _result.append("\\t"); _changed = true; break;
 865  0 case '\n': _result.append("\\n"); _changed = true; break;
 866  0 case '\r': _result.append("\\r"); _changed = true; break;
 867  0 case '\f': _result.append("\\f"); _changed = true; break;
 868  0 case '\u0007': _result.append("\\a"); _changed = true; break;
 869  0 case '\u001b': _result.append("\\e"); _changed = true; break;
 870  12 default:
 871  12 if (c < ' ' || c == '\u007f') {
 872  0 _result.append("\\x");
 873  0 _result.append(padLeft(Integer.toHexString(c), '0', 2));
 874  0 _changed = true;
 875    }
 876  12 else if ((c > ' ' && c < '0') || (c > '9' && c < 'A') ||
 877    (c > 'Z' && c < 'a') || (c > 'z' && c < '\u007F')) {
 878  12 _result.append('\\');
 879  12 _result.append(c);
 880  12 _changed = true;
 881    }
 882  0 else { _result.append(c); }
 883  12 break;
 884    }
 885    }
 886  12 protected void finish() {}
 887    }.value(s);
 888    }
 889   
 890    /**
 891    * Convert the given string to a form containing SGML character entities. All characters appearing in
 892    * {@code entities} will be translated to their corrresponding entity names; if {@code convertToAscii} is
 893    * {@code true}, all other non-ASCII characters will be converted to numeric references.
 894    */
 895  0 public static String sgmlEscape(String s, final Map<Character, String> entities, final boolean convertToAscii) {
 896  0 return new StringTranslator() {
 897  0 protected void processChar(char c) {
 898  0 String entity = entities.get(c);
 899  0 if (entity != null) {
 900  0 _result.append('&');
 901  0 _result.append(entity);
 902  0 _result.append(';');
 903  0 _changed = true;
 904    }
 905  0 else if (convertToAscii && c > '\u007F') {
 906  0 _result.append("&#");
 907  0 _result.append((int) c);
 908  0 _result.append(';');
 909  0 _changed = true;
 910    }
 911  0 else { _result.append(c); }
 912    }
 913  0 protected void finish() {}
 914    }.value(s);
 915    }
 916   
 917    private static enum SGMLState { START, AMP, NAME, NUM, HEX_DIGITS, DEC_DIGITS };
 918   
 919    /**
 920    * Interpret all SGML character entities in the given string according to the provided name-character mapping.
 921    * @throws IllegalArgumentException If the string contains a malformed or unrecognized character entity
 922    */
 923  0 public static String sgmlUnescape(String s, final Map<String, Character> entities) {
 924  0 return new StringTranslator() {
 925   
 926    private SGMLState _state = SGMLState.START;
 927    private final StringBuilder _buffer = new StringBuilder(); // contains name or digits
 928   
 929  0 private void reset() { _buffer.delete(0, _buffer.length()); _state = SGMLState.START; }
 930   
 931  0 protected void processChar(char c) {
 932  0 switch (_state) {
 933  0 case START:
 934  0 if (c == '&') { _state = SGMLState.AMP; _changed = true; }
 935  0 else { _result.append(c); }
 936  0 break;
 937  0 case AMP:
 938  0 if (c == '#') { _state = SGMLState.NUM; }
 939  0 else if (c == ';') { throw new IllegalArgumentException("Missing entity name"); }
 940  0 else { _state = SGMLState.NAME; _buffer.append(c); }
 941  0 break;
 942  0 case NAME:
 943  0 if (c == ';') {
 944  0 Character namedChar = entities.get(_buffer.toString());
 945  0 if (namedChar == null) {
 946  0 throw new IllegalArgumentException("Unrecognized entity name: '" + _buffer.toString() + "'");
 947    }
 948  0 else { _result.append((char) namedChar); reset(); }
 949    }
 950  0 else { _buffer.append(c); }
 951  0 break;
 952  0 case NUM:
 953  0 if (c == 'x') { _state = SGMLState.HEX_DIGITS; }
 954  0 else if (isDecimalDigit(c)) { _state = SGMLState.DEC_DIGITS; _buffer.append(c); }
 955  0 else { throw new IllegalArgumentException("Expected decimal digit: '" + c + "'"); }
 956  0 break;
 957  0 case HEX_DIGITS:
 958  0 if (c == ';') {
 959  0 if (_buffer.length() == 0) { throw new IllegalArgumentException("Expected hexadecimal digit: ';'"); }
 960  0 else { _result.append((char) Integer.parseInt(_buffer.toString(), 16)); reset(); }
 961    }
 962  0 else if (isHexDigit(c)) { _buffer.append(c); }
 963  0 else { throw new IllegalArgumentException("Expected hexadecimal digit: '" + c + "'"); }
 964  0 break;
 965  0 case DEC_DIGITS:
 966  0 if (c == ';') { _result.append((char) Integer.parseInt(_buffer.toString())); reset(); }
 967  0 else if (isDecimalDigit(c)) { _buffer.append(c); }
 968  0 else { throw new IllegalArgumentException("Expected decimal digit: '" + c + "'"); }
 969  0 break;
 970    }
 971    }
 972   
 973  0 protected void finish() {
 974  0 if (_state != SGMLState.START) { throw new IllegalArgumentException("Unfinished entity"); }
 975    }
 976    }.value(s);
 977    }
 978   
 979    /**
 980    * Convert the given string to an escaped form compatible with XML. The standard XML named entities
 981    * ({@code "}, {@code &}, {@code '}, {@code <}, and {@code >}) will be replaced with named references
 982    * (such as {@code &quot;}), and all non-ASCII characters will be replaced with numeric references.
 983    */
 984  0 public static String xmlEscape(String s) {
 985  0 return sgmlEscape(s, XML_ENTITIES.value().functionMap(), true);
 986    }
 987   
 988    /**
 989    * Convert the given string to an escaped form compatible with XML. The standard XML named entities
 990    * ({@code "}, {@code &}, {@code '}, {@code <}, and {@code >}) will be replaced with named references
 991    * (such as {@code &quot;}); if {@code convertToAscii} is {@code true}, all non-ASCII characters
 992    * will be replaced with numeric references.
 993    */
 994  0 public static String xmlEscape(String s, boolean convertToAscii) {
 995  0 return sgmlEscape(s, XML_ENTITIES.value().functionMap(), convertToAscii);
 996    }
 997   
 998    /**
 999    * Interpret all XML character entities in the given string.
 1000    * @throws IllegalArgumentException If the string contains a malformed or unrecognized character entity
 1001    */
 1002  0 public static String xmlUnescape(String s) {
 1003  0 return sgmlUnescape(s, XML_ENTITIES.value().injectionMap());
 1004    }
 1005   
 1006    /**
 1007    * Convert the given string to an escaped form compatible with HTML. All named entities
 1008    * supported by HTML 4.0 will be replaced with named references, and all other non-ASCII
 1009    * characters will be replaced with numeric references. The {@code '} character will also
 1010    * be replaced with a numeric refererence.
 1011    */
 1012  0 public static String htmlEscape(String s) {
 1013  0 return sgmlEscape(s, HTML_ENTITIES.value().functionMap(), true);
 1014    }
 1015   
 1016    /**
 1017    * Interpret all HTML character entities in the given string.
 1018    * @throws IllegalArgumentException If the string contains a malformed or unrecognized character entity
 1019    */
 1020  0 public static String htmlUnescape(String s) {
 1021  0 return sgmlUnescape(s, HTML_ENTITIES.value().injectionMap());
 1022    }
 1023   
 1024   
 1025    /** Entity names for XML; declared lazily to prevent creation when it is not used */
 1026    private static final Thunk<OneToOneRelation<Character, String>> XML_ENTITIES =
 1027    LazyThunk.make(new Thunk<OneToOneRelation<Character, String>>() {
 1028  0 public OneToOneRelation<Character, String> value() {
 1029  0 OneToOneRelation<Character, String> result = new IndexedOneToOneRelation<Character, String>();
 1030    // Source: Wikipedia, "List of XML and HTML character entity references"
 1031  0 result.add('"', "quot");
 1032  0 result.add('&', "amp");
 1033  0 result.add('\'', "apos");
 1034  0 result.add('<', "lt");
 1035  0 result.add('>', "gt");
 1036  0 return result;
 1037    }
 1038    });
 1039   
 1040   
 1041    /** Entity names for HTML; declared lazily to prevent creation when it is not used */
 1042    private static final Thunk<OneToOneRelation<Character, String>> HTML_ENTITIES =
 1043    LazyThunk.make(new Thunk<OneToOneRelation<Character, String>>() {
 1044  0 public OneToOneRelation<Character, String> value() {
 1045  0 OneToOneRelation<Character, String> result = new IndexedOneToOneRelation<Character, String>();
 1046    // Source: Wikipedia, "List of XML and HTML character entity references"
 1047  0 result.add('\'', "#39"); // no entity defined, but it's safer to escape it
 1048  0 result.add('"', "quot");
 1049  0 result.add('&', "amp");
 1050  0 result.add('<', "lt");
 1051  0 result.add('>', "gt");
 1052   
 1053  0 result.add('\u00A0', "nbsp");
 1054  0 result.add('\u00A1', "iexcl");
 1055  0 result.add('\u00A2', "cent");
 1056  0 result.add('\u00A3', "pound");
 1057  0 result.add('\u00A4', "curren");
 1058  0 result.add('\u00A5', "yen");
 1059  0 result.add('\u00A6', "brvbar");
 1060  0 result.add('\u00A7', "sect");
 1061  0 result.add('\u00A8', "uml");
 1062  0 result.add('\u00A9', "copy");
 1063  0 result.add('\u00AA', "ordf");
 1064  0 result.add('\u00AB', "laquo");
 1065  0 result.add('\u00AC', "not");
 1066  0 result.add('\u00AD', "shy");
 1067  0 result.add('\u00AE', "reg");
 1068  0 result.add('\u00AF', "macr");
 1069  0 result.add('\u00B0', "deg");
 1070  0 result.add('\u00B1', "plusmn");
 1071  0 result.add('\u00B2', "sup2");
 1072  0 result.add('\u00B3', "sup3");
 1073  0 result.add('\u00B4', "acute");
 1074  0 result.add('\u00B5', "micro");
 1075  0 result.add('\u00B6', "para");
 1076  0 result.add('\u00B7', "middot");
 1077  0 result.add('\u00B8', "cedil");
 1078  0 result.add('\u00B9', "sup1");
 1079  0 result.add('\u00BA', "ordm");
 1080  0 result.add('\u00BB', "raquo");
 1081  0 result.add('\u00BC', "frac14");
 1082  0 result.add('\u00BD', "frac12");
 1083  0 result.add('\u00BE', "frac34");
 1084  0 result.add('\u00BF', "iquest");
 1085  0 result.add('\u00C0', "Agrave");
 1086  0 result.add('\u00C1', "Aacute");
 1087  0 result.add('\u00C2', "Acirc");
 1088  0 result.add('\u00C3', "Atilde");
 1089  0 result.add('\u00C4', "Auml");
 1090  0 result.add('\u00C5', "Aring");
 1091  0 result.add('\u00C6', "AElig");
 1092  0 result.add('\u00C7', "Ccedil");
 1093  0 result.add('\u00C8', "Egrave");
 1094  0 result.add('\u00C9', "Eacute");
 1095  0 result.add('\u00CA', "Ecirc");
 1096  0 result.add('\u00CB', "Euml");
 1097  0 result.add('\u00CC', "Igrave");
 1098  0 result.add('\u00CD', "Iacute");
 1099  0 result.add('\u00CE', "Icirc");
 1100  0 result.add('\u00CF', "Iuml");
 1101  0 result.add('\u00D0', "ETH");
 1102  0 result.add('\u00D1', "Ntilde");
 1103  0 result.add('\u00D2', "Ograve");
 1104  0 result.add('\u00D3', "Oacute");
 1105  0 result.add('\u00D4', "Ocirc");
 1106  0 result.add('\u00D5', "Otilde");
 1107  0 result.add('\u00D6', "Ouml");
 1108  0 result.add('\u00D7', "times");
 1109  0 result.add('\u00D8', "Oslash");
 1110  0 result.add('\u00D9', "Ugrave");
 1111  0 result.add('\u00DA', "Uacute");
 1112  0 result.add('\u00DB', "Ucirc");
 1113  0 result.add('\u00DC', "Uuml");
 1114  0 result.add('\u00DD', "Yacute");
 1115  0 result.add('\u00DE', "THORN");
 1116  0 result.add('\u00DF', "szlig");
 1117  0 result.add('\u00E0', "agrave");
 1118  0 result.add('\u00E1', "aacute");
 1119  0 result.add('\u00E2', "acirc");
 1120  0 result.add('\u00E3', "atilde");
 1121  0 result.add('\u00E4', "auml");
 1122  0 result.add('\u00E5', "aring");
 1123  0 result.add('\u00E6', "aelig");
 1124  0 result.add('\u00E7', "ccedil");
 1125  0 result.add('\u00E8', "egrave");
 1126  0 result.add('\u00E9', "eacute");
 1127  0 result.add('\u00EA', "ecirc");
 1128  0 result.add('\u00EB', "euml");
 1129  0 result.add('\u00EC', "igrave");
 1130  0 result.add('\u00ED', "iacute");
 1131  0 result.add('\u00EE', "icirc");
 1132  0 result.add('\u00EF', "iuml");
 1133  0 result.add('\u00F0', "eth");
 1134  0 result.add('\u00F1', "ntilde");
 1135  0 result.add('\u00F2', "ograve");
 1136  0 result.add('\u00F3', "oacute");
 1137  0 result.add('\u00F4', "ocirc");
 1138  0 result.add('\u00F5', "otilde");
 1139  0 result.add('\u00F6', "ouml");
 1140  0 result.add('\u00F7', "divide");
 1141  0 result.add('\u00F8', "oslash");
 1142  0 result.add('\u00F9', "ugrave");
 1143  0 result.add('\u00FA', "uacute");
 1144  0 result.add('\u00FB', "ucirc");
 1145  0 result.add('\u00FC', "uuml");
 1146  0 result.add('\u00FD', "yacute");
 1147  0 result.add('\u00FE', "thorn");
 1148  0 result.add('\u00FF', "yuml");
 1149   
 1150  0 result.add('\u0152', "OElig");
 1151  0 result.add('\u0153', "oelig");
 1152  0 result.add('\u0160', "Scaron");
 1153  0 result.add('\u0161', "scaron");
 1154  0 result.add('\u0178', "Yuml");
 1155  0 result.add('\u0192', "fnof");
 1156   
 1157  0 result.add('\u02C6', "circ");
 1158  0 result.add('\u02DC', "tilde");
 1159   
 1160  0 result.add('\u0391', "Alpha");
 1161  0 result.add('\u0392', "Beta");
 1162  0 result.add('\u0393', "Gamma");
 1163  0 result.add('\u0394', "Delta");
 1164  0 result.add('\u0395', "Epsilon");
 1165  0 result.add('\u0396', "Zeta");
 1166  0 result.add('\u0397', "Eta");
 1167  0 result.add('\u0398', "Theta");
 1168  0 result.add('\u0399', "Iota");
 1169  0 result.add('\u039A', "Kappa");
 1170  0 result.add('\u039B', "Lambda");
 1171  0 result.add('\u039C', "Mu");
 1172  0 result.add('\u039D', "Nu");
 1173  0 result.add('\u039E', "Xi");
 1174  0 result.add('\u039F', "Omicron");
 1175  0 result.add('\u03A0', "Pi");
 1176  0 result.add('\u03A1', "Rho");
 1177  0 result.add('\u03A3', "Sigma");
 1178  0 result.add('\u03A4', "Tau");
 1179  0 result.add('\u03A5', "Upsilon");
 1180  0 result.add('\u03A6', "Phi");
 1181  0 result.add('\u03A7', "Chi");
 1182  0 result.add('\u03A8', "Psi");
 1183  0 result.add('\u03A9', "Omega");
 1184   
 1185  0 result.add('\u03B1', "alpha");
 1186  0 result.add('\u03B2', "beta");
 1187  0 result.add('\u03B3', "gamma");
 1188  0 result.add('\u03B4', "delta");
 1189  0 result.add('\u03B5', "epsilon");
 1190  0 result.add('\u03B6', "zeta");
 1191  0 result.add('\u03B7', "eta");
 1192  0 result.add('\u03B8', "theta");
 1193  0 result.add('\u03B9', "iota");
 1194  0 result.add('\u03BA', "kappa");
 1195  0 result.add('\u03BB', "lambda");
 1196  0 result.add('\u03BC', "mu");
 1197  0 result.add('\u03BD', "nu");
 1198  0 result.add('\u03BE', "xi");
 1199  0 result.add('\u03BF', "omicron");
 1200  0 result.add('\u03C0', "pi");
 1201  0 result.add('\u03C1', "rho");
 1202  0 result.add('\u03C2', "sigmaf");
 1203  0 result.add('\u03C3', "sigma");
 1204  0 result.add('\u03C4', "tau");
 1205  0 result.add('\u03C5', "upsilon");
 1206  0 result.add('\u03C6', "phi");
 1207  0 result.add('\u03C7', "chi");
 1208  0 result.add('\u03C8', "psi");
 1209  0 result.add('\u03C9', "omega");
 1210   
 1211  0 result.add('\u03D1', "thetasym");
 1212  0 result.add('\u03D2', "upsih");
 1213  0 result.add('\u03D6', "piv");
 1214   
 1215  0 result.add('\u2002', "ensp");
 1216  0 result.add('\u2003', "emsp");
 1217  0 result.add('\u2009', "thinsp");
 1218  0 result.add('\u200C', "zwnj");
 1219  0 result.add('\u200D', "zwj");
 1220  0 result.add('\u200E', "lrm");
 1221  0 result.add('\u200F', "rlm");
 1222  0 result.add('\u2013', "ndash");
 1223  0 result.add('\u2014', "mdash");
 1224  0 result.add('\u2018', "lsquo");
 1225  0 result.add('\u2019', "rsquo");
 1226  0 result.add('\u201A', "sbquo");
 1227  0 result.add('\u201C', "ldquo");
 1228  0 result.add('\u201D', "rdquo");
 1229  0 result.add('\u201E', "bdquo");
 1230  0 result.add('\u2020', "dagger");
 1231  0 result.add('\u2021', "Dagger");
 1232  0 result.add('\u2022', "bull");
 1233  0 result.add('\u2026', "hellip");
 1234  0 result.add('\u2030', "permil");
 1235  0 result.add('\u2032', "prime");
 1236  0 result.add('\u2033', "Prime");
 1237  0 result.add('\u2039', "lsaquo");
 1238  0 result.add('\u203A', "rsaquo");
 1239  0 result.add('\u203E', "oline");
 1240  0 result.add('\u2044', "frasl");
 1241  0 result.add('\u20AC', "euro");
 1242   
 1243  0 result.add('\u2111', "image");
 1244  0 result.add('\u2118', "weierp");
 1245  0 result.add('\u211C', "real");
 1246  0 result.add('\u2122', "trade");
 1247  0 result.add('\u2135', "alefsym");
 1248  0 result.add('\u2190', "larr");
 1249  0 result.add('\u2191', "uarr");
 1250  0 result.add('\u2192', "rarr");
 1251  0 result.add('\u2193', "darr");
 1252  0 result.add('\u2194', "harr");
 1253  0 result.add('\u21B5', "crarr");
 1254  0 result.add('\u21D0', "lArr");
 1255  0 result.add('\u21D1', "uArr");
 1256  0 result.add('\u21D2', "rArr");
 1257  0 result.add('\u21D3', "dArr");
 1258  0 result.add('\u21D4', "hArr");
 1259   
 1260  0 result.add('\u2200', "forall");
 1261  0 result.add('\u2202', "part");
 1262  0 result.add('\u2203', "exist");
 1263  0 result.add('\u2205', "empty");
 1264  0 result.add('\u2207', "nabla");
 1265  0 result.add('\u2208', "isin");
 1266  0 result.add('\u2209', "notin");
 1267  0 result.add('\u220B', "ni");
 1268  0 result.add('\u220F', "prod");
 1269  0 result.add('\u2211', "sum");
 1270  0 result.add('\u2212', "minus");
 1271  0 result.add('\u2217', "lowast");
 1272  0 result.add('\u221A', "radic");
 1273  0 result.add('\u221D', "prop");
 1274  0 result.add('\u221E', "infin");
 1275  0 result.add('\u2220', "ang");
 1276  0 result.add('\u2227', "and");
 1277  0 result.add('\u2228', "or");
 1278  0 result.add('\u2229', "cap");
 1279  0 result.add('\u222A', "cup");
 1280  0 result.add('\u222B', "int");
 1281  0 result.add('\u2234', "there4");
 1282  0 result.add('\u223C', "sim");
 1283  0 result.add('\u2245', "cong");
 1284  0 result.add('\u2248', "asymp");
 1285  0 result.add('\u2260', "ne");
 1286  0 result.add('\u2261', "equiv");
 1287  0 result.add('\u2264', "le");
 1288  0 result.add('\u2265', "ge");
 1289  0 result.add('\u2282', "sub");
 1290  0 result.add('\u2283', "sup");
 1291  0 result.add('\u2284', "nsub");
 1292  0 result.add('\u2286', "sube");
 1293  0 result.add('\u2287', "supe");
 1294  0 result.add('\u2295', "oplus");
 1295  0 result.add('\u2297', "otimes");
 1296  0 result.add('\u22A5', "perp");
 1297  0 result.add('\u22C5', "sdot");
 1298   
 1299  0 result.add('\u2308', "lceil");
 1300  0 result.add('\u2309', "rceil");
 1301  0 result.add('\u230A', "lfloor");
 1302  0 result.add('\u230B', "rfloor");
 1303  0 result.add('\u2329', "lang");
 1304  0 result.add('\u232A', "rang");
 1305   
 1306  0 result.add('\u25CA', "loz");
 1307   
 1308  0 result.add('\u2660', "spades");
 1309  0 result.add('\u2663', "clubs");
 1310  0 result.add('\u2665', "hearts");
 1311  0 result.add('\u2666', "diams");
 1312  0 return result;
 1313    }
 1314    });
 1315   
 1316    }