001 /*BEGIN_COPYRIGHT_BLOCK
002 *
003 * Copyright (c) 2001-2010, JavaPLT group at Rice University (drjava@rice.edu)
004 * All rights reserved.
005 *
006 * Redistribution and use in source and binary forms, with or without
007 * modification, are permitted provided that the following conditions are met:
008 * * Redistributions of source code must retain the above copyright
009 * notice, this list of conditions and the following disclaimer.
010 * * Redistributions in binary form must reproduce the above copyright
011 * notice, this list of conditions and the following disclaimer in the
012 * documentation and/or other materials provided with the distribution.
013 * * Neither the names of DrJava, the JavaPLT group, Rice University, nor the
014 * names of its contributors may be used to endorse or promote products
015 * derived from this software without specific prior written permission.
016 *
017 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
018 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
019 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
020 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
021 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
022 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
023 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
024 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
025 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
026 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
027 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
028 *
029 * This software is Open Source Initiative approved Open Source Software.
030 * Open Source Initative Approved is a trademark of the Open Source Initiative.
031 *
032 * This file is part of DrJava. Download the current version of this project
033 * from http://www.drjava.org/ or http://sourceforge.net/projects/drjava/
034 *
035 * END_COPYRIGHT_BLOCK*/
036
037 package edu.rice.cs.javalanglevels;
038
039 /**
040 * A class full of static methods for escaping/unescaping characters.
041 * It's abstract because it should not be instantiated.
042 */
043 public abstract class CharConverter {
044 /**
045 * Escapes the given char to be suitable for writing out in
046 * a Java char or String literal.
047 *
048 * @param c Character to escape
049 * @return A string consisting of c, properly escaped for use
050 * inside a Java char or String literal. There are no
051 * quotation marks around the result.
052 */
053 public static String escapeChar(char c) {
054 StringBuffer buf = new StringBuffer();
055 escapeChar(c, buf);
056 return buf.toString();
057 }
058
059 /**
060 * Escapes the given char to be suitable for writing out in
061 * a Java char or String literal.
062 *
063 * @param c Character to escape
064 * @param buf StringBuffer where the result is written out
065 */
066 public static void escapeChar(char c, StringBuffer buf) {
067 switch (c) {
068 case '\n': buf.append("\\n"); break;
069 case '\t': buf.append("\\t"); break;
070 case '\b': buf.append("\\b"); break;
071 case '\r': buf.append("\\r"); break;
072 case '\f': buf.append("\\f"); break;
073 case '\\': buf.append("\\\\"); break;
074 case '\'': buf.append("\\\'"); break;
075 case '\"': buf.append("\\\""); break;
076 default:
077 // unicode escape all non-ascii
078 if ((c < 32) || (c > 127)) {
079 String hex = Integer.toHexString(c);
080 buf.append("\\u");
081
082 for (int i = hex.length(); i < 4; i++) {
083 buf.append('0');
084 }
085
086 buf.append(hex);
087 }
088 else {
089 buf.append(c);
090 }
091 break;
092 }
093 }
094
095 /**
096 * Escapes the given String to be suitable for writing out as a
097 * Java String literal.
098 *
099 * @param s String to escape
100 * @return A string consisting of s, properly escaped for use
101 * inside a Java String literal. There are no
102 * quotation marks around the result.
103 */
104 public static String escapeString(String s) {
105 StringBuffer buf = new StringBuffer();
106
107 for (int i = 0; i < s.length(); i++) {
108 escapeChar(s.charAt(i), buf);
109 }
110
111 return buf.toString();
112 }
113
114 /**
115 * Unescapes the given string, escaped as it would be in Java source,
116 * to a single char. Note that this does not handle unicode escapes
117 * of the form \ uXXXX; these are expected to have already been processed
118 * out of the input string. The only escapes that are handled are octal escapes
119 * and \n \t \b \r \f \\ \' \".
120 *
121 * @param in String containing (possibly) escaped character, without quotes
122 * @return char value of the input string, unescaped.
123 */
124 public static char unescapeChar(String in) {
125 StringBuffer buf = new StringBuffer();
126 int endPos = unescapeString(in, 0, buf);
127 if (endPos < in.length()) {
128 throw new IllegalArgumentException((in.length() - endPos) + " trailing" +
129 " characters at the end of character"+
130 " literal '" + in + "'");
131 }
132
133 return buf.charAt(0);
134 }
135
136 /**
137 * Unescapes the given string, escaped as it would be in Java source,
138 * to a String. Note that this does not handle unicode escapes
139 * of the form \ uXXXX; these are expected to have already been processed
140 * out of the input string. The only escapes that are handled are octal escapes
141 * and \n \t \b \r \f \\ \' \".
142 *
143 * @param in String containing (possibly) escaped characters, without quotes
144 * @return Unescaped string value of the input string
145 */
146 public static String unescapeString(String in) {
147 // short circuit "" to allow the rest to assume != "".
148 if (in.length() == 0) {
149 return in;
150 }
151
152 StringBuffer buf = new StringBuffer();
153 int nextStart = 0;
154
155 while (nextStart < in.length()) {
156 nextStart = unescapeString(in, nextStart, buf);
157 }
158
159 return buf.toString();
160 }
161
162 /*
163 public static char unescapeChar(String in) {
164 if (in.length() == 1) {
165 return s.charAt(0);
166 }
167 else if ((in.length() > 1) && (in.charAt(0) == '\\')) {
168 switch (in.charAt(1)) {
169 case 'n': return '\n';
170 case 't': return '\t';
171 case 'b': return '\b';
172 case 'r': return '\r';
173 case 'f': return '\f';
174 case '\\': return '\\';
175 case '\'': return '\'';
176 case '\"': return '\"';
177 }
178
179 // deal with octal escapes
180 String afterBackslash = in.substring(1);
181 try {
182 int charValue = Integer.parseInt(afterBackslash, 8);
183 if ((charValue > Character.MAX_VALUE) || (charValue < Character.MIN_VALUE)) {
184 throw new RuntimeException("octal escaped character out of range: " + in);
185 }
186
187 return (char) charValue;
188 }
189 catch (NumberFormatException e) {
190 throw new RuntimeException("multi-char char literal invalid! value=" + in);
191 }
192 }
193 else {
194 throw new RuntimeException("multi-character char literal doesn't start with \\! value=" + in);
195 }
196 }
197 */
198
199 /**
200 * Unescapes one character in the given string, escaped as it would be in
201 * Java source, to a String. Note that this does not handle unicode escapes
202 * of the form \ uXXXX; these are expected to have already been processed
203 * out of the input string. The only escapes that are handled are octal escapes
204 * and \n \t \b \r \f \\ \' \".
205 *
206 * @param in String containing (possibly) escaped characters
207 * @param startPos Starting position of the next character to unescape
208 * @param out StringBuffer to write out the unescaped character to.
209 *
210 * @return Position after the end of the parsed character
211 */
212 public static int unescapeString(final String in,
213 final int startPos,
214 final StringBuffer out)
215 {
216 char first = in.charAt(startPos);
217
218 if (first != '\\') {
219 out.append(first);
220 return startPos + 1;
221 }
222
223 char second = in.charAt(startPos + 1);
224
225 switch (second) {
226 case 'n': out.append('\n'); return startPos + 2;
227 case 't': out.append('\t'); return startPos + 2;
228 case 'b': out.append('\b'); return startPos + 2;
229 case 'r': out.append('\r'); return startPos + 2;
230 case 'f': out.append('\f'); return startPos + 2;
231 case '\\': out.append('\\'); return startPos + 2;
232 case '\'': out.append('\''); return startPos + 2;
233 case '\"': out.append('\"'); return startPos + 2;
234 }
235
236 // The only cases left to deal with are octal escapes or invalid.
237 if (_isOctalDigit(second)) {
238 // If the first digit is < 4, there could be three octal digits.
239 // Otherwise there can be only two.
240 int maxDigits;
241 if (second < '4') {
242 maxDigits = 3;
243 }
244 else {
245 maxDigits = 2;
246 }
247
248 StringBuffer octal = new StringBuffer(maxDigits);
249 octal.append(second);
250
251 int nextDigitPos = startPos + 2;
252 while ((octal.length() < maxDigits) && (nextDigitPos < in.length())) {
253 char nextChar = in.charAt(nextDigitPos);
254 if (_isOctalDigit(nextChar)) {
255 octal.append(nextChar);
256 nextDigitPos++;
257 }
258 else { // not an octal digit, so our work here is done.
259 break;
260 }
261 }
262
263 try {
264 int charValue = Integer.parseInt(octal.toString(), 8);
265 if ((charValue > Character.MAX_VALUE) || (charValue < Character.MIN_VALUE)) {
266 throw new IllegalArgumentException("Octal escape beginning at " +
267 "position " + startPos + " out of range: " + in);
268 }
269
270 out.append((char) charValue);
271 return nextDigitPos;
272 }
273 catch (NumberFormatException e) {
274 throw new RuntimeException("Impossible to occur, but number format exception in octal escape!");
275 }
276 }
277 else {
278 throw new IllegalArgumentException("Invalid escape sequence at position "+
279 startPos + ": " + in);
280 }
281 }
282
283 private static boolean _isOctalDigit(char c) {
284 return ((c >= '0') && (c <= '7'));
285 }
286 }