001    /*BEGIN_COPYRIGHT_BLOCK
002     *
003     * Copyright (c) 2001-2010, JavaPLT group at Rice University (drjava@rice.edu)
004     * All rights reserved.
005     * 
006     * Redistribution and use in source and binary forms, with or without
007     * modification, are permitted provided that the following conditions are met:
008     *    * Redistributions of source code must retain the above copyright
009     *      notice, this list of conditions and the following disclaimer.
010     *    * Redistributions in binary form must reproduce the above copyright
011     *      notice, this list of conditions and the following disclaimer in the
012     *      documentation and/or other materials provided with the distribution.
013     *    * Neither the names of DrJava, the JavaPLT group, Rice University, nor the
014     *      names of its contributors may be used to endorse or promote products
015     *      derived from this software without specific prior written permission.
016     * 
017     * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
018     * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
019     * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
020     * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
021     * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
022     * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
023     * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
024     * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
025     * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
026     * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
027     * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028     *
029     * This software is Open Source Initiative approved Open Source Software.
030     * Open Source Initative Approved is a trademark of the Open Source Initiative.
031     * 
032     * This file is part of DrJava.  Download the current version of this project
033     * from http://www.drjava.org/ or http://sourceforge.net/projects/drjava/
034     * 
035     * END_COPYRIGHT_BLOCK*/
036    
037    package edu.rice.cs.javalanglevels;
038    
039    /**
040     * A class full of static methods for escaping/unescaping characters.
041     * It's abstract because it should not be instantiated.
042     */
043    public abstract class CharConverter {
044      /**
045       * Escapes the given char to be suitable for writing out in
046       * a Java char or String literal.
047       *
048       * @param c  Character to escape
049       * @return  A string consisting of c, properly escaped for use
050       *          inside a Java char or String literal. There are no
051       *          quotation marks around the result.
052       */
053      public static String escapeChar(char c) {
054        StringBuffer buf = new StringBuffer();
055        escapeChar(c, buf);
056        return buf.toString();
057      }
058    
059      /**
060       * Escapes the given char to be suitable for writing out in
061       * a Java char or String literal.
062       *
063       * @param c  Character to escape
064       * @param buf  StringBuffer where the result is written out
065       */
066      public static void escapeChar(char c, StringBuffer buf) {
067        switch (c) {
068          case '\n': buf.append("\\n"); break;
069          case '\t': buf.append("\\t"); break;
070          case '\b': buf.append("\\b"); break;
071          case '\r': buf.append("\\r"); break;
072          case '\f': buf.append("\\f"); break;
073          case '\\': buf.append("\\\\"); break;
074          case '\'': buf.append("\\\'"); break;
075          case '\"': buf.append("\\\""); break;
076          default:
077            // unicode escape all non-ascii
078            if ((c < 32) || (c > 127)) {
079              String hex = Integer.toHexString(c);
080              buf.append("\\u");
081    
082              for (int i = hex.length(); i < 4; i++) {
083                buf.append('0');
084              }
085    
086              buf.append(hex);
087            }
088            else {
089              buf.append(c);
090            }
091            break;
092        }
093      }
094    
095      /**
096       * Escapes the given String to be suitable for writing out as a
097       * Java String literal.
098       *
099       * @param s  String to escape
100       * @return  A string consisting of s, properly escaped for use
101       *          inside a Java String literal. There are no
102       *          quotation marks around the result.
103       */
104      public static String escapeString(String s) {
105        StringBuffer buf = new StringBuffer();
106    
107        for (int i = 0; i < s.length(); i++) {
108          escapeChar(s.charAt(i), buf);
109        }
110    
111        return buf.toString();
112      }
113    
114      /**
115       * Unescapes the given string, escaped as it would be in Java source,
116       * to a single char. Note that this does not handle unicode escapes
117       * of the form \ uXXXX; these are expected to have already been processed
118       * out of the input string. The only escapes that are handled are octal escapes
119       * and \n \t \b \r \f \\ \' \".
120       *
121       * @param in  String containing (possibly) escaped character, without quotes
122       * @return  char value of the input string, unescaped.
123       */
124      public static char unescapeChar(String in) {
125        StringBuffer buf = new StringBuffer();
126        int endPos = unescapeString(in, 0, buf);
127        if (endPos < in.length()) {
128          throw new IllegalArgumentException((in.length() - endPos) + " trailing" +
129                                             " characters at the end of character"+
130                                             " literal '" + in + "'");
131        }
132    
133        return buf.charAt(0);
134      }
135    
136      /**
137       * Unescapes the given string, escaped as it would be in Java source,
138       * to a String. Note that this does not handle unicode escapes
139       * of the form \ uXXXX; these are expected to have already been processed
140       * out of the input string. The only escapes that are handled are octal escapes
141       * and \n \t \b \r \f \\ \' \".
142       *
143       * @param in  String containing (possibly) escaped characters, without quotes
144       * @return  Unescaped string value of the input string
145       */
146      public static String unescapeString(String in) {
147        // short circuit "" to allow the rest to assume != "".
148        if (in.length() == 0) {
149          return in;
150        }
151    
152        StringBuffer buf = new StringBuffer();
153        int nextStart = 0;
154        
155        while (nextStart < in.length()) {
156          nextStart = unescapeString(in, nextStart, buf);
157        }
158    
159        return buf.toString();
160      }
161    
162      /*
163      public static char unescapeChar(String in) {
164        if (in.length() == 1) {
165          return s.charAt(0);
166        }
167        else if ((in.length() > 1) && (in.charAt(0) == '\\')) {
168          switch (in.charAt(1)) {
169            case 'n': return '\n';
170            case 't': return '\t';
171            case 'b': return '\b';
172            case 'r': return '\r';
173            case 'f': return '\f';
174            case '\\': return '\\';
175            case '\'': return '\'';
176            case '\"': return '\"';
177          }
178          
179          // deal with octal escapes
180          String afterBackslash = in.substring(1);
181          try {
182            int charValue = Integer.parseInt(afterBackslash, 8);
183            if ((charValue > Character.MAX_VALUE) || (charValue < Character.MIN_VALUE)) {
184              throw new RuntimeException("octal escaped character out of range: " + in);
185            }
186    
187            return (char) charValue;
188          }
189          catch (NumberFormatException e) {
190            throw new RuntimeException("multi-char char literal invalid! value=" + in);
191          }
192        }
193        else {
194          throw new RuntimeException("multi-character char literal doesn't start with \\! value=" + in);
195        }
196      }
197      */
198    
199      /**
200       * Unescapes one character in the given string, escaped as it would be in
201       * Java source, to a String. Note that this does not handle unicode escapes
202       * of the form \ uXXXX; these are expected to have already been processed
203       * out of the input string. The only escapes that are handled are octal escapes
204       * and \n \t \b \r \f \\ \' \".
205       *
206       * @param in  String containing (possibly) escaped characters
207       * @param startPos  Starting position of the next character to unescape
208       * @param out  StringBuffer to write out the unescaped character to.
209       *
210       * @return  Position after the end of the parsed character
211       */
212      public static int unescapeString(final String in,
213                                       final int startPos,
214                                       final StringBuffer out)
215      {
216        char first = in.charAt(startPos);
217    
218        if (first != '\\') {
219          out.append(first);
220          return startPos + 1;
221        }
222    
223        char second = in.charAt(startPos + 1);
224    
225        switch (second) {
226          case 'n': out.append('\n'); return startPos + 2;
227          case 't': out.append('\t'); return startPos + 2;
228          case 'b': out.append('\b'); return startPos + 2;
229          case 'r': out.append('\r'); return startPos + 2;
230          case 'f': out.append('\f'); return startPos + 2;
231          case '\\': out.append('\\'); return startPos + 2;
232          case '\'': out.append('\''); return startPos + 2;
233          case '\"': out.append('\"'); return startPos + 2;
234        }
235    
236        // The only cases left to deal with are octal escapes or invalid.
237        if (_isOctalDigit(second)) {
238          // If the first digit is < 4, there could be three octal digits.
239          // Otherwise there can be only two.
240          int maxDigits;
241          if (second < '4') {
242            maxDigits = 3;
243          }
244          else {
245            maxDigits = 2;
246          }
247    
248          StringBuffer octal = new StringBuffer(maxDigits);
249          octal.append(second);
250    
251          int nextDigitPos = startPos + 2;
252          while ((octal.length() < maxDigits) && (nextDigitPos < in.length())) {
253            char nextChar = in.charAt(nextDigitPos);
254            if (_isOctalDigit(nextChar)) {
255              octal.append(nextChar);
256              nextDigitPos++;
257            }
258            else { // not an octal digit, so our work here is done.
259              break;
260            }
261          }
262    
263          try {
264            int charValue = Integer.parseInt(octal.toString(), 8);
265            if ((charValue > Character.MAX_VALUE) || (charValue < Character.MIN_VALUE)) {
266              throw new IllegalArgumentException("Octal escape beginning at " +
267                                                 "position " + startPos + " out of range: " + in);
268            }
269    
270            out.append((char) charValue);
271            return nextDigitPos;
272          }
273          catch (NumberFormatException e) {
274            throw new RuntimeException("Impossible to occur, but number format exception in octal escape!");
275          }
276        }
277        else {
278          throw new IllegalArgumentException("Invalid escape sequence at position "+
279                                             startPos + ": " + in);
280        }
281      }
282    
283      private static boolean _isOctalDigit(char c) {
284        return ((c >= '0') && (c <= '7'));
285      }
286    }