1 | /* |
2 | * Copyright (C) Christian Schulte, 2012-253 |
3 | * All rights reserved. |
4 | * |
5 | * Redistribution and use in source and binary forms, with or without |
6 | * modification, are permitted provided that the following conditions |
7 | * are met: |
8 | * |
9 | * o Redistributions of source code must retain the above copyright |
10 | * notice, this list of conditions and the following disclaimer. |
11 | * |
12 | * o Redistributions in binary form must reproduce the above copyright |
13 | * notice, this list of conditions and the following disclaimer in |
14 | * the documentation and/or other materials provided with the |
15 | * distribution. |
16 | * |
17 | * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, |
18 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY |
19 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL |
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT, |
21 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
22 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
23 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
24 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
26 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | * |
28 | * $JOMC: JavaIdentifier.java 4962 2014-09-06 23:58:48Z schulte $ |
29 | * |
30 | */ |
31 | package org.jomc.model; |
32 | |
33 | import java.io.Serializable; |
34 | import java.lang.ref.Reference; |
35 | import java.lang.ref.SoftReference; |
36 | import java.text.MessageFormat; |
37 | import java.text.ParseException; |
38 | import java.util.ArrayList; |
39 | import java.util.HashMap; |
40 | import java.util.List; |
41 | import java.util.Locale; |
42 | import java.util.Map; |
43 | import java.util.ResourceBundle; |
44 | |
45 | /** |
46 | * Data type of a Java identifier. |
47 | * <p> |
48 | * This class provides support for parsing and normalizing text to java identifiers as specified in the Java |
49 | * Language Specification - Java SE 7 Edition - Chapter 3.8ff. |
50 | * </p> |
51 | * <p> |
52 | * <i>Please note that this class will move to package {@code org.jomc.util} in JOMC 2.0.</i> |
53 | * </p> |
54 | * |
55 | * @author <a href="mailto:cs@schulte.it">Christian Schulte</a> |
56 | * @version $JOMC: JavaIdentifier.java 4962 2014-09-06 23:58:48Z schulte $ |
57 | * @see #normalize(java.lang.String, org.jomc.model.JavaIdentifier.NormalizationMode) |
58 | * @see #parse(java.lang.String) |
59 | * @see #valueOf(java.lang.String) |
60 | * @since 1.4 |
61 | */ |
62 | public final class JavaIdentifier implements CharSequence, Serializable |
63 | { |
64 | |
65 | /** |
66 | * Normalization modes. |
67 | * |
68 | * @author <a href="mailto:cs@schulte.it">Christian Schulte</a> |
69 | * @version $JOMC: JavaIdentifier.java 4962 2014-09-06 23:58:48Z schulte $ |
70 | * @since 1.4 |
71 | * @see JavaIdentifier#normalize(java.lang.String, org.jomc.model.JavaIdentifier.NormalizationMode) |
72 | */ |
73 | public static enum NormalizationMode |
74 | { |
75 | |
76 | /** Mode to normalize by compacting words using camel-case. */ |
77 | CAMEL_CASE, |
78 | /** Mode to normalize by separating words using '_' and by converting all characters to lower-case. */ |
79 | LOWER_CASE, |
80 | /** Mode to normalize by separating words using '_' and by converting all characters to upper-case. */ |
81 | UPPER_CASE, |
82 | /** |
83 | * Mode to normalize according to the |
84 | * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Constants</cite>. |
85 | * <blockquote> |
86 | * The names of variables declared class constants and of ANSI constants should be all uppercase with words |
87 | * separated by underscores ("_"). (ANSI constants should be avoided, for ease of debugging.) |
88 | * </blockquote> |
89 | */ |
90 | CONSTANT_NAME_CONVENTION, |
91 | /** |
92 | * Mode to normalize according to the |
93 | * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Methods</cite>. |
94 | * <blockquote> |
95 | * Methods should be verbs, in mixed case with the first letter lowercase, with the first letter of each |
96 | * internal word capitalized. |
97 | * </blockquote> |
98 | */ |
99 | METHOD_NAME_CONVENTION, |
100 | /** |
101 | * Mode to normalize according to the |
102 | * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Variables</cite>. |
103 | * <blockquote> |
104 | * Except for variables, all instance, class, and class constants are in mixed case with a lowercase first |
105 | * letter. Internal words start with capital letters. Variable names should not start with underscore _ or |
106 | * dollar sign $ characters, even though both are allowed. Variable names should be short yet meaningful. The |
107 | * choice of a variable name should be mnemonic - that is - designed to indicate to the casual observer the |
108 | * intent of its use. One-character variable names should be avoided except for temporary "throwaway" variables. |
109 | * Common names for temporary variables are i, j, k, m, and n for integers; c, d, and e for characters. |
110 | * </blockquote> |
111 | */ |
112 | VARIABLE_NAME_CONVENTION |
113 | |
114 | } |
115 | |
116 | /** |
117 | * The value of the instance. |
118 | * @serial |
119 | */ |
120 | private String identifier; |
121 | |
122 | /** Cached instances. */ |
123 | private static volatile Reference<Map<CacheKey, JavaIdentifier>> cache; |
124 | |
125 | /** Serial version UID for backwards compatibility with 1.4.x object streams. */ |
126 | private static final long serialVersionUID = 7600377999055800720L; |
127 | |
128 | /** Underscore character. */ |
129 | private static final int UNDERSCORE_CODEPOINT = Character.codePointAt( "_", 0 ); |
130 | |
131 | /** Creates a new {@code JavaIdentifier} instance. */ |
132 | private JavaIdentifier() |
133 | { |
134 | super(); |
135 | } |
136 | |
137 | /** |
138 | * Returns the length of this character sequence. |
139 | * |
140 | * @return The number of {@code char}s in this sequence. |
141 | */ |
142 | public int length() |
143 | { |
144 | return this.identifier.length(); |
145 | } |
146 | |
147 | /** |
148 | * Returns the {@code char} value at a given index. |
149 | * |
150 | * @param index The index of the {@code char} value to return. |
151 | * |
152 | * @return The {@code char} value at {@code index}. |
153 | * |
154 | * @throws IndexOutOfBoundsException if {@code index} is negative or not less than the length of the sequence. |
155 | */ |
156 | public char charAt( final int index ) |
157 | { |
158 | return this.identifier.charAt( index ); |
159 | } |
160 | |
161 | /** |
162 | * Returns a new {@code CharSequence} that is a subsequence of this sequence. |
163 | * |
164 | * @param start The start index, inclusive. |
165 | * @param end The end index, exclusive. |
166 | * |
167 | * @return The sequence of characters starting at index {@code start} up to index {@code end - 1}. |
168 | * |
169 | * @throws IndexOutOfBoundsException if {@code start} or {@code end} are negative, if {@code end} is greater than |
170 | * the length of the sequence, or if {@code start} is greater than {@code end}. |
171 | */ |
172 | public CharSequence subSequence( final int start, final int end ) |
173 | { |
174 | return this.identifier.subSequence( start, end ); |
175 | } |
176 | |
177 | /** |
178 | * Returns a string containing the characters in this sequence in the same order as this sequence. The length of the |
179 | * string will be the length of this sequence. |
180 | * |
181 | * @return A string consisting of exactly this sequence of characters. |
182 | */ |
183 | @Override |
184 | public String toString() |
185 | { |
186 | return this.identifier; |
187 | } |
188 | |
189 | /** |
190 | * Returns the hash-code value of the object. |
191 | * |
192 | * @return The hash-code value of the object. |
193 | */ |
194 | @Override |
195 | public int hashCode() |
196 | { |
197 | return this.identifier.hashCode(); |
198 | } |
199 | |
200 | /** |
201 | * Tests whether some other object is equal to the object. |
202 | * |
203 | * @param o The object to test. |
204 | * |
205 | * @return {@code true}, if {@code o} is an instance of the class of the object and its string value is equal to the |
206 | * string value of the object. |
207 | */ |
208 | @Override |
209 | public boolean equals( final Object o ) |
210 | { |
211 | boolean equal = o == this; |
212 | |
213 | if ( !equal && o instanceof JavaIdentifier ) |
214 | { |
215 | equal = this.toString().equals( o.toString() ); |
216 | } |
217 | |
218 | return equal; |
219 | } |
220 | |
221 | /** |
222 | * Normalizes text from the beginning of the given string to produce a {@code JavaIdentifier}. |
223 | * |
224 | * @param text The text to normalize. |
225 | * @param mode The normalization to apply. |
226 | * |
227 | * @return A {@code JavaIdentifier} instance constructed by normalizing {@code text} according to {@code mode}. |
228 | * |
229 | * @throws NullPointerException if {@code text} or {@code mode} is {@code null}. |
230 | * @throws ParseException if normalization fails. |
231 | */ |
232 | public static JavaIdentifier normalize( final String text, final NormalizationMode mode ) throws ParseException |
233 | { |
234 | if ( text == null ) |
235 | { |
236 | throw new NullPointerException( "text" ); |
237 | } |
238 | if ( mode == null ) |
239 | { |
240 | throw new NullPointerException( "mode" ); |
241 | } |
242 | |
243 | return parse( text, mode, false ); |
244 | } |
245 | |
246 | /** |
247 | * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance. |
248 | * |
249 | * @param text The text to parse. |
250 | * |
251 | * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}. |
252 | * |
253 | * @throws NullPointerException if {@code text} is {@code null}. |
254 | * @throws ParseException if parsing fails. |
255 | * |
256 | * @see #valueOf(java.lang.String) |
257 | */ |
258 | public static JavaIdentifier parse( final String text ) throws ParseException |
259 | { |
260 | if ( text == null ) |
261 | { |
262 | throw new NullPointerException( "text" ); |
263 | } |
264 | |
265 | return parse( text, null, false ); |
266 | } |
267 | |
268 | /** |
269 | * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance. |
270 | * <p> |
271 | * Unlike the {@link #parse(String)} method, this method throws an {@code IllegalArgumentException} if parsing |
272 | * fails. |
273 | * </p> |
274 | * |
275 | * @param text The text to parse. |
276 | * |
277 | * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}. |
278 | * |
279 | * @throws NullPointerException if {@code text} is {@code null}. |
280 | * @throws IllegalArgumentException if parsing fails. |
281 | * |
282 | * @see #parse(java.lang.String) |
283 | */ |
284 | public static JavaIdentifier valueOf( final String text ) throws IllegalArgumentException |
285 | { |
286 | if ( text == null ) |
287 | { |
288 | throw new NullPointerException( "text" ); |
289 | } |
290 | |
291 | try |
292 | { |
293 | return parse( text, null, true ); |
294 | } |
295 | catch ( final ParseException e ) |
296 | { |
297 | throw new AssertionError( e ); |
298 | } |
299 | } |
300 | |
301 | private static JavaIdentifier parse( final String text, final NormalizationMode mode, |
302 | final boolean runtimeException ) |
303 | throws ParseException |
304 | { |
305 | Map<CacheKey, JavaIdentifier> map = cache == null ? null : cache.get(); |
306 | |
307 | if ( map == null ) |
308 | { |
309 | map = new HashMap<CacheKey, JavaIdentifier>( 128 ); |
310 | cache = new SoftReference<Map<CacheKey, JavaIdentifier>>( map ); |
311 | } |
312 | |
313 | synchronized ( map ) |
314 | { |
315 | final CacheKey key = new CacheKey( text, mode ); |
316 | JavaIdentifier javaIdentifier = map.get( key ); |
317 | |
318 | if ( javaIdentifier == null ) |
319 | { |
320 | javaIdentifier = new JavaIdentifier(); |
321 | parseIdentifier( javaIdentifier, text, mode, runtimeException ); |
322 | |
323 | if ( mode != null ) |
324 | { |
325 | final CacheKey normalizedKey = new CacheKey( javaIdentifier.toString(), mode ); |
326 | final JavaIdentifier normalizedInstance = map.get( normalizedKey ); |
327 | |
328 | if ( normalizedInstance != null ) |
329 | { |
330 | map.put( key, normalizedInstance ); |
331 | javaIdentifier = normalizedInstance; |
332 | } |
333 | else |
334 | { |
335 | map.put( key, javaIdentifier ); |
336 | map.put( normalizedKey, javaIdentifier ); |
337 | } |
338 | } |
339 | else |
340 | { |
341 | map.put( key, javaIdentifier ); |
342 | } |
343 | } |
344 | |
345 | return javaIdentifier; |
346 | } |
347 | } |
348 | |
349 | private static void parseIdentifier( final JavaIdentifier t, final String text, final NormalizationMode mode, |
350 | final boolean runtimeException ) |
351 | throws ParseException |
352 | { |
353 | if ( text.length() <= 0 ) |
354 | { |
355 | if ( runtimeException ) |
356 | { |
357 | throw new IllegalArgumentException( getMessage( "invalidEmptyString" ) ); |
358 | } |
359 | else |
360 | { |
361 | throw new ParseException( getMessage( "invalidEmptyString" ), 0 ); |
362 | } |
363 | } |
364 | |
365 | final StringBuilder identifierBuilder = new StringBuilder( text.length() ); |
366 | final List<Integer> retainedIndices = new ArrayList<Integer>( text.length() ); |
367 | boolean start_of_word = true; |
368 | int words = 0; |
369 | |
370 | for ( int i = 0, j = 1, s0 = text.length(), last_codepoint = -1; i < s0; i++, j++ ) |
371 | { |
372 | if ( !isWordSeparator( text.codePointAt( i ), mode, identifierBuilder.length() <= 0 ) ) |
373 | { |
374 | if ( mode != null ) |
375 | { |
376 | switch ( mode ) |
377 | { |
378 | case CAMEL_CASE: |
379 | if ( start_of_word ) |
380 | { |
381 | identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) ); |
382 | } |
383 | else if ( last_codepoint > -1 && j < s0 |
384 | && isCamelCase( last_codepoint, text.codePointAt( i ), |
385 | text.codePointAt( j ) ) ) |
386 | { // Retain camel-case in words. |
387 | identifierBuilder.append( text.charAt( i ) ); |
388 | retainedIndices.add( identifierBuilder.length() - 1 ); |
389 | } |
390 | else |
391 | { |
392 | identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) ); |
393 | } |
394 | break; |
395 | |
396 | case LOWER_CASE: |
397 | if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT ) |
398 | { |
399 | identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) ); |
400 | } |
401 | |
402 | identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) ); |
403 | break; |
404 | |
405 | case UPPER_CASE: |
406 | case CONSTANT_NAME_CONVENTION: |
407 | if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT ) |
408 | { |
409 | identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) ); |
410 | } |
411 | |
412 | identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) ); |
413 | break; |
414 | |
415 | case VARIABLE_NAME_CONVENTION: |
416 | case METHOD_NAME_CONVENTION: |
417 | if ( start_of_word ) |
418 | { |
419 | identifierBuilder.append( words == 0 |
420 | ? Character.toLowerCase( text.charAt( i ) ) |
421 | : Character.toUpperCase( text.charAt( i ) ) ); |
422 | |
423 | } |
424 | else if ( last_codepoint > -1 && j < s0 |
425 | && isCamelCase( last_codepoint, text.codePointAt( i ), |
426 | text.codePointAt( j ) ) ) |
427 | { // Retain camel-case in words. |
428 | identifierBuilder.append( text.charAt( i ) ); |
429 | retainedIndices.add( identifierBuilder.length() - 1 ); |
430 | } |
431 | else |
432 | { |
433 | identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) ); |
434 | } |
435 | break; |
436 | |
437 | default: |
438 | throw new AssertionError( mode ); |
439 | |
440 | } |
441 | } |
442 | else |
443 | { |
444 | identifierBuilder.append( text.charAt( i ) ); |
445 | } |
446 | |
447 | last_codepoint = identifierBuilder.codePointAt( identifierBuilder.length() - 1 ); |
448 | start_of_word = false; |
449 | } |
450 | else |
451 | { |
452 | if ( mode != null ) |
453 | { |
454 | if ( !start_of_word ) |
455 | { |
456 | start_of_word = true; |
457 | words++; |
458 | } |
459 | } |
460 | else if ( runtimeException ) |
461 | { |
462 | throw new IllegalArgumentException( getMessage( "invalidCharacter", text, text.charAt( i ), i ) ); |
463 | } |
464 | else |
465 | { |
466 | throw new ParseException( getMessage( "invalidCharacter", text, text.charAt( i ), i ), i ); |
467 | } |
468 | } |
469 | } |
470 | |
471 | if ( words > 0 ) |
472 | { |
473 | // Multiple words - no camel-case retained in any word. |
474 | toLowerCase( identifierBuilder, retainedIndices ); |
475 | } |
476 | |
477 | t.identifier = identifierBuilder.toString(); |
478 | |
479 | if ( t.identifier.length() <= 0 ) |
480 | { |
481 | if ( runtimeException ) |
482 | { |
483 | throw new IllegalArgumentException( getMessage( "invalidCharacters", text ) ); |
484 | } |
485 | else |
486 | { |
487 | throw new ParseException( getMessage( "invalidCharacters", text ), 0 ); |
488 | } |
489 | } |
490 | |
491 | if ( JavaLanguage.KEYWORDS.contains( t.identifier ) |
492 | || JavaLanguage.BOOLEAN_LITERALS.contains( t.identifier ) |
493 | || JavaLanguage.NULL_LITERAL.equals( t.identifier ) ) |
494 | { |
495 | if ( mode != null ) |
496 | { |
497 | t.identifier = "_" + t.identifier; |
498 | } |
499 | else if ( runtimeException ) |
500 | { |
501 | throw new IllegalArgumentException( getMessage( "invalidWord", text, t.identifier, |
502 | text.indexOf( t.identifier ) ) ); |
503 | |
504 | } |
505 | else |
506 | { |
507 | throw new ParseException( getMessage( "invalidWord", text, t.identifier, text.indexOf( t.identifier ) ), |
508 | text.indexOf( t.identifier ) ); |
509 | |
510 | } |
511 | } |
512 | } |
513 | |
514 | private static boolean isWordSeparator( final int codePoint, final NormalizationMode mode, final boolean first ) |
515 | { |
516 | return !( ( first ? Character.isJavaIdentifierStart( codePoint ) : Character.isJavaIdentifierPart( codePoint ) ) |
517 | && ( mode != null ? Character.isLetterOrDigit( codePoint ) : true ) ); |
518 | |
519 | } |
520 | |
521 | private static boolean isCamelCase( final int left, final int middle, final int right ) |
522 | { |
523 | return Character.isLowerCase( left ) && Character.isUpperCase( middle ) && Character.isLowerCase( right ); |
524 | } |
525 | |
526 | private static void toLowerCase( final StringBuilder stringBuilder, final List<Integer> indices ) |
527 | { |
528 | for ( int i = 0, s0 = indices.size(); i < s0; i++ ) |
529 | { |
530 | final int index = indices.get( i ); |
531 | final int cp = Character.toLowerCase( stringBuilder.codePointAt( index ) ); |
532 | stringBuilder.replace( index, index + 1, String.valueOf( Character.toChars( cp ) ) ); |
533 | } |
534 | } |
535 | |
536 | private static String getMessage( final String key, final Object... args ) |
537 | { |
538 | return MessageFormat.format( ResourceBundle.getBundle( |
539 | JavaIdentifier.class.getName().replace( '.', '/' ), Locale.getDefault() ). |
540 | getString( key ), args ); |
541 | |
542 | } |
543 | |
544 | private static final class CacheKey |
545 | { |
546 | |
547 | private final String text; |
548 | |
549 | private final NormalizationMode mode; |
550 | |
551 | private CacheKey( final String text, final NormalizationMode mode ) |
552 | { |
553 | super(); |
554 | this.text = text; |
555 | this.mode = mode; |
556 | } |
557 | |
558 | @Override |
559 | public int hashCode() |
560 | { |
561 | int hc = 23; |
562 | hc = 37 * hc + this.text.hashCode(); |
563 | hc = 37 * hc + ( this.mode == null ? 0 : this.mode.hashCode() ); |
564 | return hc; |
565 | } |
566 | |
567 | @Override |
568 | public boolean equals( final Object o ) |
569 | { |
570 | boolean equal = o == this; |
571 | |
572 | if ( !equal && o instanceof CacheKey ) |
573 | { |
574 | final CacheKey that = (CacheKey) o; |
575 | equal = this.mode == that.mode && this.text.equals( that.text ); |
576 | } |
577 | |
578 | return equal; |
579 | } |
580 | |
581 | } |
582 | |
583 | } |