1 /*
2 * Copyright (C) Christian Schulte, 2012-253
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * o Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * o Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
18 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
19 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * $JOMC: JavaIdentifier.java 4962 2014-09-06 23:58:48Z schulte $
29 *
30 */
31 package org.jomc.model;
32
33 import java.io.Serializable;
34 import java.lang.ref.Reference;
35 import java.lang.ref.SoftReference;
36 import java.text.MessageFormat;
37 import java.text.ParseException;
38 import java.util.ArrayList;
39 import java.util.HashMap;
40 import java.util.List;
41 import java.util.Locale;
42 import java.util.Map;
43 import java.util.ResourceBundle;
44
45 /**
46 * Data type of a Java identifier.
47 * <p>
48 * This class provides support for parsing and normalizing text to java identifiers as specified in the Java
49 * Language Specification - Java SE 7 Edition - Chapter 3.8ff.
50 * </p>
51 * <p>
52 * <i>Please note that this class will move to package {@code org.jomc.util} in JOMC 2.0.</i>
53 * </p>
54 *
55 * @author <a href="mailto:cs@schulte.it">Christian Schulte</a>
56 * @version $JOMC: JavaIdentifier.java 4962 2014-09-06 23:58:48Z schulte $
57 * @see #normalize(java.lang.String, org.jomc.model.JavaIdentifier.NormalizationMode)
58 * @see #parse(java.lang.String)
59 * @see #valueOf(java.lang.String)
60 * @since 1.4
61 */
62 public final class JavaIdentifier implements CharSequence, Serializable
63 {
64
65 /**
66 * Normalization modes.
67 *
68 * @author <a href="mailto:cs@schulte.it">Christian Schulte</a>
69 * @version $JOMC: JavaIdentifier.java 4962 2014-09-06 23:58:48Z schulte $
70 * @since 1.4
71 * @see JavaIdentifier#normalize(java.lang.String, org.jomc.model.JavaIdentifier.NormalizationMode)
72 */
73 public static enum NormalizationMode
74 {
75
76 /** Mode to normalize by compacting words using camel-case. */
77 CAMEL_CASE,
78 /** Mode to normalize by separating words using '_' and by converting all characters to lower-case. */
79 LOWER_CASE,
80 /** Mode to normalize by separating words using '_' and by converting all characters to upper-case. */
81 UPPER_CASE,
82 /**
83 * Mode to normalize according to the
84 * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Constants</cite>.
85 * <blockquote>
86 * The names of variables declared class constants and of ANSI constants should be all uppercase with words
87 * separated by underscores ("_"). (ANSI constants should be avoided, for ease of debugging.)
88 * </blockquote>
89 */
90 CONSTANT_NAME_CONVENTION,
91 /**
92 * Mode to normalize according to the
93 * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Methods</cite>.
94 * <blockquote>
95 * Methods should be verbs, in mixed case with the first letter lowercase, with the first letter of each
96 * internal word capitalized.
97 * </blockquote>
98 */
99 METHOD_NAME_CONVENTION,
100 /**
101 * Mode to normalize according to the
102 * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Variables</cite>.
103 * <blockquote>
104 * Except for variables, all instance, class, and class constants are in mixed case with a lowercase first
105 * letter. Internal words start with capital letters. Variable names should not start with underscore _ or
106 * dollar sign $ characters, even though both are allowed. Variable names should be short yet meaningful. The
107 * choice of a variable name should be mnemonic - that is - designed to indicate to the casual observer the
108 * intent of its use. One-character variable names should be avoided except for temporary "throwaway" variables.
109 * Common names for temporary variables are i, j, k, m, and n for integers; c, d, and e for characters.
110 * </blockquote>
111 */
112 VARIABLE_NAME_CONVENTION
113
114 }
115
116 /**
117 * The value of the instance.
118 * @serial
119 */
120 private String identifier;
121
122 /** Cached instances. */
123 private static volatile Reference<Map<CacheKey, JavaIdentifier>> cache;
124
125 /** Serial version UID for backwards compatibility with 1.4.x object streams. */
126 private static final long serialVersionUID = 7600377999055800720L;
127
128 /** Underscore character. */
129 private static final int UNDERSCORE_CODEPOINT = Character.codePointAt( "_", 0 );
130
131 /** Creates a new {@code JavaIdentifier} instance. */
132 private JavaIdentifier()
133 {
134 super();
135 }
136
137 /**
138 * Returns the length of this character sequence.
139 *
140 * @return The number of {@code char}s in this sequence.
141 */
142 public int length()
143 {
144 return this.identifier.length();
145 }
146
147 /**
148 * Returns the {@code char} value at a given index.
149 *
150 * @param index The index of the {@code char} value to return.
151 *
152 * @return The {@code char} value at {@code index}.
153 *
154 * @throws IndexOutOfBoundsException if {@code index} is negative or not less than the length of the sequence.
155 */
156 public char charAt( final int index )
157 {
158 return this.identifier.charAt( index );
159 }
160
161 /**
162 * Returns a new {@code CharSequence} that is a subsequence of this sequence.
163 *
164 * @param start The start index, inclusive.
165 * @param end The end index, exclusive.
166 *
167 * @return The sequence of characters starting at index {@code start} up to index {@code end - 1}.
168 *
169 * @throws IndexOutOfBoundsException if {@code start} or {@code end} are negative, if {@code end} is greater than
170 * the length of the sequence, or if {@code start} is greater than {@code end}.
171 */
172 public CharSequence subSequence( final int start, final int end )
173 {
174 return this.identifier.subSequence( start, end );
175 }
176
177 /**
178 * Returns a string containing the characters in this sequence in the same order as this sequence. The length of the
179 * string will be the length of this sequence.
180 *
181 * @return A string consisting of exactly this sequence of characters.
182 */
183 @Override
184 public String toString()
185 {
186 return this.identifier;
187 }
188
189 /**
190 * Returns the hash-code value of the object.
191 *
192 * @return The hash-code value of the object.
193 */
194 @Override
195 public int hashCode()
196 {
197 return this.identifier.hashCode();
198 }
199
200 /**
201 * Tests whether some other object is equal to the object.
202 *
203 * @param o The object to test.
204 *
205 * @return {@code true}, if {@code o} is an instance of the class of the object and its string value is equal to the
206 * string value of the object.
207 */
208 @Override
209 public boolean equals( final Object o )
210 {
211 boolean equal = o == this;
212
213 if ( !equal && o instanceof JavaIdentifier )
214 {
215 equal = this.toString().equals( o.toString() );
216 }
217
218 return equal;
219 }
220
221 /**
222 * Normalizes text from the beginning of the given string to produce a {@code JavaIdentifier}.
223 *
224 * @param text The text to normalize.
225 * @param mode The normalization to apply.
226 *
227 * @return A {@code JavaIdentifier} instance constructed by normalizing {@code text} according to {@code mode}.
228 *
229 * @throws NullPointerException if {@code text} or {@code mode} is {@code null}.
230 * @throws ParseException if normalization fails.
231 */
232 public static JavaIdentifier normalize( final String text, final NormalizationMode mode ) throws ParseException
233 {
234 if ( text == null )
235 {
236 throw new NullPointerException( "text" );
237 }
238 if ( mode == null )
239 {
240 throw new NullPointerException( "mode" );
241 }
242
243 return parse( text, mode, false );
244 }
245
246 /**
247 * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance.
248 *
249 * @param text The text to parse.
250 *
251 * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}.
252 *
253 * @throws NullPointerException if {@code text} is {@code null}.
254 * @throws ParseException if parsing fails.
255 *
256 * @see #valueOf(java.lang.String)
257 */
258 public static JavaIdentifier parse( final String text ) throws ParseException
259 {
260 if ( text == null )
261 {
262 throw new NullPointerException( "text" );
263 }
264
265 return parse( text, null, false );
266 }
267
268 /**
269 * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance.
270 * <p>
271 * Unlike the {@link #parse(String)} method, this method throws an {@code IllegalArgumentException} if parsing
272 * fails.
273 * </p>
274 *
275 * @param text The text to parse.
276 *
277 * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}.
278 *
279 * @throws NullPointerException if {@code text} is {@code null}.
280 * @throws IllegalArgumentException if parsing fails.
281 *
282 * @see #parse(java.lang.String)
283 */
284 public static JavaIdentifier valueOf( final String text ) throws IllegalArgumentException
285 {
286 if ( text == null )
287 {
288 throw new NullPointerException( "text" );
289 }
290
291 try
292 {
293 return parse( text, null, true );
294 }
295 catch ( final ParseException e )
296 {
297 throw new AssertionError( e );
298 }
299 }
300
301 private static JavaIdentifier parse( final String text, final NormalizationMode mode,
302 final boolean runtimeException )
303 throws ParseException
304 {
305 Map<CacheKey, JavaIdentifier> map = cache == null ? null : cache.get();
306
307 if ( map == null )
308 {
309 map = new HashMap<CacheKey, JavaIdentifier>( 128 );
310 cache = new SoftReference<Map<CacheKey, JavaIdentifier>>( map );
311 }
312
313 synchronized ( map )
314 {
315 final CacheKey key = new CacheKey( text, mode );
316 JavaIdentifier javaIdentifier = map.get( key );
317
318 if ( javaIdentifier == null )
319 {
320 javaIdentifier = new JavaIdentifier();
321 parseIdentifier( javaIdentifier, text, mode, runtimeException );
322
323 if ( mode != null )
324 {
325 final CacheKey normalizedKey = new CacheKey( javaIdentifier.toString(), mode );
326 final JavaIdentifier normalizedInstance = map.get( normalizedKey );
327
328 if ( normalizedInstance != null )
329 {
330 map.put( key, normalizedInstance );
331 javaIdentifier = normalizedInstance;
332 }
333 else
334 {
335 map.put( key, javaIdentifier );
336 map.put( normalizedKey, javaIdentifier );
337 }
338 }
339 else
340 {
341 map.put( key, javaIdentifier );
342 }
343 }
344
345 return javaIdentifier;
346 }
347 }
348
349 private static void parseIdentifier( final JavaIdentifier t, final String text, final NormalizationMode mode,
350 final boolean runtimeException )
351 throws ParseException
352 {
353 if ( text.length() <= 0 )
354 {
355 if ( runtimeException )
356 {
357 throw new IllegalArgumentException( getMessage( "invalidEmptyString" ) );
358 }
359 else
360 {
361 throw new ParseException( getMessage( "invalidEmptyString" ), 0 );
362 }
363 }
364
365 final StringBuilder identifierBuilder = new StringBuilder( text.length() );
366 final List<Integer> retainedIndices = new ArrayList<Integer>( text.length() );
367 boolean start_of_word = true;
368 int words = 0;
369
370 for ( int i = 0, j = 1, s0 = text.length(), last_codepoint = -1; i < s0; i++, j++ )
371 {
372 if ( !isWordSeparator( text.codePointAt( i ), mode, identifierBuilder.length() <= 0 ) )
373 {
374 if ( mode != null )
375 {
376 switch ( mode )
377 {
378 case CAMEL_CASE:
379 if ( start_of_word )
380 {
381 identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) );
382 }
383 else if ( last_codepoint > -1 && j < s0
384 && isCamelCase( last_codepoint, text.codePointAt( i ),
385 text.codePointAt( j ) ) )
386 { // Retain camel-case in words.
387 identifierBuilder.append( text.charAt( i ) );
388 retainedIndices.add( identifierBuilder.length() - 1 );
389 }
390 else
391 {
392 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
393 }
394 break;
395
396 case LOWER_CASE:
397 if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT )
398 {
399 identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) );
400 }
401
402 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
403 break;
404
405 case UPPER_CASE:
406 case CONSTANT_NAME_CONVENTION:
407 if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT )
408 {
409 identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) );
410 }
411
412 identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) );
413 break;
414
415 case VARIABLE_NAME_CONVENTION:
416 case METHOD_NAME_CONVENTION:
417 if ( start_of_word )
418 {
419 identifierBuilder.append( words == 0
420 ? Character.toLowerCase( text.charAt( i ) )
421 : Character.toUpperCase( text.charAt( i ) ) );
422
423 }
424 else if ( last_codepoint > -1 && j < s0
425 && isCamelCase( last_codepoint, text.codePointAt( i ),
426 text.codePointAt( j ) ) )
427 { // Retain camel-case in words.
428 identifierBuilder.append( text.charAt( i ) );
429 retainedIndices.add( identifierBuilder.length() - 1 );
430 }
431 else
432 {
433 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
434 }
435 break;
436
437 default:
438 throw new AssertionError( mode );
439
440 }
441 }
442 else
443 {
444 identifierBuilder.append( text.charAt( i ) );
445 }
446
447 last_codepoint = identifierBuilder.codePointAt( identifierBuilder.length() - 1 );
448 start_of_word = false;
449 }
450 else
451 {
452 if ( mode != null )
453 {
454 if ( !start_of_word )
455 {
456 start_of_word = true;
457 words++;
458 }
459 }
460 else if ( runtimeException )
461 {
462 throw new IllegalArgumentException( getMessage( "invalidCharacter", text, text.charAt( i ), i ) );
463 }
464 else
465 {
466 throw new ParseException( getMessage( "invalidCharacter", text, text.charAt( i ), i ), i );
467 }
468 }
469 }
470
471 if ( words > 0 )
472 {
473 // Multiple words - no camel-case retained in any word.
474 toLowerCase( identifierBuilder, retainedIndices );
475 }
476
477 t.identifier = identifierBuilder.toString();
478
479 if ( t.identifier.length() <= 0 )
480 {
481 if ( runtimeException )
482 {
483 throw new IllegalArgumentException( getMessage( "invalidCharacters", text ) );
484 }
485 else
486 {
487 throw new ParseException( getMessage( "invalidCharacters", text ), 0 );
488 }
489 }
490
491 if ( JavaLanguage.KEYWORDS.contains( t.identifier )
492 || JavaLanguage.BOOLEAN_LITERALS.contains( t.identifier )
493 || JavaLanguage.NULL_LITERAL.equals( t.identifier ) )
494 {
495 if ( mode != null )
496 {
497 t.identifier = "_" + t.identifier;
498 }
499 else if ( runtimeException )
500 {
501 throw new IllegalArgumentException( getMessage( "invalidWord", text, t.identifier,
502 text.indexOf( t.identifier ) ) );
503
504 }
505 else
506 {
507 throw new ParseException( getMessage( "invalidWord", text, t.identifier, text.indexOf( t.identifier ) ),
508 text.indexOf( t.identifier ) );
509
510 }
511 }
512 }
513
514 private static boolean isWordSeparator( final int codePoint, final NormalizationMode mode, final boolean first )
515 {
516 return !( ( first ? Character.isJavaIdentifierStart( codePoint ) : Character.isJavaIdentifierPart( codePoint ) )
517 && ( mode != null ? Character.isLetterOrDigit( codePoint ) : true ) );
518
519 }
520
521 private static boolean isCamelCase( final int left, final int middle, final int right )
522 {
523 return Character.isLowerCase( left ) && Character.isUpperCase( middle ) && Character.isLowerCase( right );
524 }
525
526 private static void toLowerCase( final StringBuilder stringBuilder, final List<Integer> indices )
527 {
528 for ( int i = 0, s0 = indices.size(); i < s0; i++ )
529 {
530 final int index = indices.get( i );
531 final int cp = Character.toLowerCase( stringBuilder.codePointAt( index ) );
532 stringBuilder.replace( index, index + 1, String.valueOf( Character.toChars( cp ) ) );
533 }
534 }
535
536 private static String getMessage( final String key, final Object... args )
537 {
538 return MessageFormat.format( ResourceBundle.getBundle(
539 JavaIdentifier.class.getName().replace( '.', '/' ), Locale.getDefault() ).
540 getString( key ), args );
541
542 }
543
544 private static final class CacheKey
545 {
546
547 private final String text;
548
549 private final NormalizationMode mode;
550
551 private CacheKey( final String text, final NormalizationMode mode )
552 {
553 super();
554 this.text = text;
555 this.mode = mode;
556 }
557
558 @Override
559 public int hashCode()
560 {
561 int hc = 23;
562 hc = 37 * hc + this.text.hashCode();
563 hc = 37 * hc + ( this.mode == null ? 0 : this.mode.hashCode() );
564 return hc;
565 }
566
567 @Override
568 public boolean equals( final Object o )
569 {
570 boolean equal = o == this;
571
572 if ( !equal && o instanceof CacheKey )
573 {
574 final CacheKey that = (CacheKey) o;
575 equal = this.mode == that.mode && this.text.equals( that.text );
576 }
577
578 return equal;
579 }
580
581 }
582
583 }