View Javadoc
1   /*
2    *   Copyright (C) Christian Schulte, 2012-253
3    *   All rights reserved.
4    *
5    *   Redistribution and use in source and binary forms, with or without
6    *   modification, are permitted provided that the following conditions
7    *   are met:
8    *
9    *     o Redistributions of source code must retain the above copyright
10   *       notice, this list of conditions and the following disclaimer.
11   *
12   *     o Redistributions in binary form must reproduce the above copyright
13   *       notice, this list of conditions and the following disclaimer in
14   *       the documentation and/or other materials provided with the
15   *       distribution.
16   *
17   *   THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
18   *   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
19   *   AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
20   *   THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT,
21   *   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22   *   NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23   *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24   *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25   *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26   *   THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27   *
28   *   $JOMC: JavaIdentifier.java 4962 2014-09-06 23:58:48Z schulte $
29   *
30   */
31  package org.jomc.model;
32  
33  import java.io.Serializable;
34  import java.lang.ref.Reference;
35  import java.lang.ref.SoftReference;
36  import java.text.MessageFormat;
37  import java.text.ParseException;
38  import java.util.ArrayList;
39  import java.util.HashMap;
40  import java.util.List;
41  import java.util.Locale;
42  import java.util.Map;
43  import java.util.ResourceBundle;
44  
45  /**
46   * Data type of a Java identifier.
47   * <p>
48   * This class provides support for parsing and normalizing text to java identifiers as specified in the Java
49   * Language Specification - Java SE 7 Edition - Chapter 3.8ff.
50   * </p>
51   * <p>
52   * <i>Please note that this class will move to package {@code org.jomc.util} in JOMC 2.0.</i>
53   * </p>
54   *
55   * @author <a href="mailto:cs@schulte.it">Christian Schulte</a>
56   * @version $JOMC: JavaIdentifier.java 4962 2014-09-06 23:58:48Z schulte $
57   * @see #normalize(java.lang.String, org.jomc.model.JavaIdentifier.NormalizationMode)
58   * @see #parse(java.lang.String)
59   * @see #valueOf(java.lang.String)
60   * @since 1.4
61   */
62  public final class JavaIdentifier implements CharSequence, Serializable
63  {
64  
65      /**
66       * Normalization modes.
67       *
68       * @author <a href="mailto:cs@schulte.it">Christian Schulte</a>
69       * @version $JOMC: JavaIdentifier.java 4962 2014-09-06 23:58:48Z schulte $
70       * @since 1.4
71       * @see JavaIdentifier#normalize(java.lang.String, org.jomc.model.JavaIdentifier.NormalizationMode)
72       */
73      public static enum NormalizationMode
74      {
75  
76          /** Mode to normalize by compacting words using camel-case. */
77          CAMEL_CASE,
78          /** Mode to normalize by separating words using '_' and by converting all characters to lower-case. */
79          LOWER_CASE,
80          /** Mode to normalize by separating words using '_' and by converting all characters to upper-case. */
81          UPPER_CASE,
82          /**
83           * Mode to normalize according to the
84           * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Constants</cite>.
85           * <blockquote>
86           * The names of variables declared class constants and of ANSI constants should be all uppercase with words
87           * separated by underscores ("_"). (ANSI constants should be avoided, for ease of debugging.)
88           * </blockquote>
89           */
90          CONSTANT_NAME_CONVENTION,
91          /**
92           * Mode to normalize according to the
93           * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Methods</cite>.
94           * <blockquote>
95           * Methods should be verbs, in mixed case with the first letter lowercase, with the first letter of each
96           * internal word capitalized.
97           * </blockquote>
98           */
99          METHOD_NAME_CONVENTION,
100         /**
101          * Mode to normalize according to the
102          * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Variables</cite>.
103          * <blockquote>
104          * Except for variables, all instance, class, and class constants are in mixed case with a lowercase first
105          * letter. Internal words start with capital letters. Variable names should not start with underscore _ or
106          * dollar sign $ characters, even though both are allowed. Variable names should be short yet meaningful. The
107          * choice of a variable name should be mnemonic - that is - designed to indicate to the casual observer the
108          * intent of its use. One-character variable names should be avoided except for temporary "throwaway" variables.
109          * Common names for temporary variables are i, j, k, m, and n for integers; c, d, and e for characters.
110          * </blockquote>
111          */
112         VARIABLE_NAME_CONVENTION
113 
114     }
115 
116     /**
117      * The value of the instance.
118      * @serial
119      */
120     private String identifier;
121 
122     /** Cached instances. */
123     private static volatile Reference<Map<CacheKey, JavaIdentifier>> cache;
124 
125     /** Serial version UID for backwards compatibility with 1.4.x object streams. */
126     private static final long serialVersionUID = 7600377999055800720L;
127 
128     /** Underscore character. */
129     private static final int UNDERSCORE_CODEPOINT = Character.codePointAt( "_", 0 );
130 
131     /** Creates a new {@code JavaIdentifier} instance. */
132     private JavaIdentifier()
133     {
134         super();
135     }
136 
137     /**
138      * Returns the length of this character sequence.
139      *
140      * @return The number of {@code char}s in this sequence.
141      */
142     public int length()
143     {
144         return this.identifier.length();
145     }
146 
147     /**
148      * Returns the {@code char} value at a given index.
149      *
150      * @param index The index of the {@code char} value to return.
151      *
152      * @return The {@code char} value at {@code index}.
153      *
154      * @throws IndexOutOfBoundsException if {@code index} is negative or not less than the length of the sequence.
155      */
156     public char charAt( final int index )
157     {
158         return this.identifier.charAt( index );
159     }
160 
161     /**
162      * Returns a new {@code CharSequence} that is a subsequence of this sequence.
163      *
164      * @param start The start index, inclusive.
165      * @param end The end index, exclusive.
166      *
167      * @return The sequence of characters starting at index {@code start} up to index {@code end - 1}.
168      *
169      * @throws IndexOutOfBoundsException if {@code start} or {@code end} are negative, if {@code end} is greater than
170      * the length of the sequence, or if {@code start} is greater than {@code end}.
171      */
172     public CharSequence subSequence( final int start, final int end )
173     {
174         return this.identifier.subSequence( start, end );
175     }
176 
177     /**
178      * Returns a string containing the characters in this sequence in the same order as this sequence. The length of the
179      * string will be the length of this sequence.
180      *
181      * @return A string consisting of exactly this sequence of characters.
182      */
183     @Override
184     public String toString()
185     {
186         return this.identifier;
187     }
188 
189     /**
190      * Returns the hash-code value of the object.
191      *
192      * @return The hash-code value of the object.
193      */
194     @Override
195     public int hashCode()
196     {
197         return this.identifier.hashCode();
198     }
199 
200     /**
201      * Tests whether some other object is equal to the object.
202      *
203      * @param o The object to test.
204      *
205      * @return {@code true}, if {@code o} is an instance of the class of the object and its string value is equal to the
206      * string value of the object.
207      */
208     @Override
209     public boolean equals( final Object o )
210     {
211         boolean equal = o == this;
212 
213         if ( !equal && o instanceof JavaIdentifier )
214         {
215             equal = this.toString().equals( o.toString() );
216         }
217 
218         return equal;
219     }
220 
221     /**
222      * Normalizes text from the beginning of the given string to produce a {@code JavaIdentifier}.
223      *
224      * @param text The text to normalize.
225      * @param mode The normalization to apply.
226      *
227      * @return A {@code JavaIdentifier} instance constructed by normalizing {@code text} according to {@code mode}.
228      *
229      * @throws NullPointerException if {@code text} or {@code mode} is {@code null}.
230      * @throws ParseException if normalization fails.
231      */
232     public static JavaIdentifier normalize( final String text, final NormalizationMode mode ) throws ParseException
233     {
234         if ( text == null )
235         {
236             throw new NullPointerException( "text" );
237         }
238         if ( mode == null )
239         {
240             throw new NullPointerException( "mode" );
241         }
242 
243         return parse( text, mode, false );
244     }
245 
246     /**
247      * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance.
248      *
249      * @param text The text to parse.
250      *
251      * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}.
252      *
253      * @throws NullPointerException if {@code text} is {@code null}.
254      * @throws ParseException if parsing fails.
255      *
256      * @see #valueOf(java.lang.String)
257      */
258     public static JavaIdentifier parse( final String text ) throws ParseException
259     {
260         if ( text == null )
261         {
262             throw new NullPointerException( "text" );
263         }
264 
265         return parse( text, null, false );
266     }
267 
268     /**
269      * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance.
270      * <p>
271      * Unlike the {@link #parse(String)} method, this method throws an {@code IllegalArgumentException} if parsing
272      * fails.
273      * </p>
274      *
275      * @param text The text to parse.
276      *
277      * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}.
278      *
279      * @throws NullPointerException if {@code text} is {@code null}.
280      * @throws IllegalArgumentException if parsing fails.
281      *
282      * @see #parse(java.lang.String)
283      */
284     public static JavaIdentifier valueOf( final String text ) throws IllegalArgumentException
285     {
286         if ( text == null )
287         {
288             throw new NullPointerException( "text" );
289         }
290 
291         try
292         {
293             return parse( text, null, true );
294         }
295         catch ( final ParseException e )
296         {
297             throw new AssertionError( e );
298         }
299     }
300 
301     private static JavaIdentifier parse( final String text, final NormalizationMode mode,
302                                          final boolean runtimeException )
303         throws ParseException
304     {
305         Map<CacheKey, JavaIdentifier> map = cache == null ? null : cache.get();
306 
307         if ( map == null )
308         {
309             map = new HashMap<CacheKey, JavaIdentifier>( 128 );
310             cache = new SoftReference<Map<CacheKey, JavaIdentifier>>( map );
311         }
312 
313         synchronized ( map )
314         {
315             final CacheKey key = new CacheKey( text, mode );
316             JavaIdentifier javaIdentifier = map.get( key );
317 
318             if ( javaIdentifier == null )
319             {
320                 javaIdentifier = new JavaIdentifier();
321                 parseIdentifier( javaIdentifier, text, mode, runtimeException );
322 
323                 if ( mode != null )
324                 {
325                     final CacheKey normalizedKey = new CacheKey( javaIdentifier.toString(), mode );
326                     final JavaIdentifier normalizedInstance = map.get( normalizedKey );
327 
328                     if ( normalizedInstance != null )
329                     {
330                         map.put( key, normalizedInstance );
331                         javaIdentifier = normalizedInstance;
332                     }
333                     else
334                     {
335                         map.put( key, javaIdentifier );
336                         map.put( normalizedKey, javaIdentifier );
337                     }
338                 }
339                 else
340                 {
341                     map.put( key, javaIdentifier );
342                 }
343             }
344 
345             return javaIdentifier;
346         }
347     }
348 
349     private static void parseIdentifier( final JavaIdentifier t, final String text, final NormalizationMode mode,
350                                          final boolean runtimeException )
351         throws ParseException
352     {
353         if ( text.length() <= 0 )
354         {
355             if ( runtimeException )
356             {
357                 throw new IllegalArgumentException( getMessage( "invalidEmptyString" ) );
358             }
359             else
360             {
361                 throw new ParseException( getMessage( "invalidEmptyString" ), 0 );
362             }
363         }
364 
365         final StringBuilder identifierBuilder = new StringBuilder( text.length() );
366         final List<Integer> retainedIndices = new ArrayList<Integer>( text.length() );
367         boolean start_of_word = true;
368         int words = 0;
369 
370         for ( int i = 0, j = 1, s0 = text.length(), last_codepoint = -1; i < s0; i++, j++ )
371         {
372             if ( !isWordSeparator( text.codePointAt( i ), mode, identifierBuilder.length() <= 0 ) )
373             {
374                 if ( mode != null )
375                 {
376                     switch ( mode )
377                     {
378                         case CAMEL_CASE:
379                             if ( start_of_word )
380                             {
381                                 identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) );
382                             }
383                             else if ( last_codepoint > -1 && j < s0
384                                           && isCamelCase( last_codepoint, text.codePointAt( i ),
385                                                           text.codePointAt( j ) ) )
386                             { // Retain camel-case in words.
387                                 identifierBuilder.append( text.charAt( i ) );
388                                 retainedIndices.add( identifierBuilder.length() - 1 );
389                             }
390                             else
391                             {
392                                 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
393                             }
394                             break;
395 
396                         case LOWER_CASE:
397                             if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT )
398                             {
399                                 identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) );
400                             }
401 
402                             identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
403                             break;
404 
405                         case UPPER_CASE:
406                         case CONSTANT_NAME_CONVENTION:
407                             if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT )
408                             {
409                                 identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) );
410                             }
411 
412                             identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) );
413                             break;
414 
415                         case VARIABLE_NAME_CONVENTION:
416                         case METHOD_NAME_CONVENTION:
417                             if ( start_of_word )
418                             {
419                                 identifierBuilder.append( words == 0
420                                                               ? Character.toLowerCase( text.charAt( i ) )
421                                                               : Character.toUpperCase( text.charAt( i ) ) );
422 
423                             }
424                             else if ( last_codepoint > -1 && j < s0
425                                           && isCamelCase( last_codepoint, text.codePointAt( i ),
426                                                           text.codePointAt( j ) ) )
427                             { // Retain camel-case in words.
428                                 identifierBuilder.append( text.charAt( i ) );
429                                 retainedIndices.add( identifierBuilder.length() - 1 );
430                             }
431                             else
432                             {
433                                 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
434                             }
435                             break;
436 
437                         default:
438                             throw new AssertionError( mode );
439 
440                     }
441                 }
442                 else
443                 {
444                     identifierBuilder.append( text.charAt( i ) );
445                 }
446 
447                 last_codepoint = identifierBuilder.codePointAt( identifierBuilder.length() - 1 );
448                 start_of_word = false;
449             }
450             else
451             {
452                 if ( mode != null )
453                 {
454                     if ( !start_of_word )
455                     {
456                         start_of_word = true;
457                         words++;
458                     }
459                 }
460                 else if ( runtimeException )
461                 {
462                     throw new IllegalArgumentException( getMessage( "invalidCharacter", text, text.charAt( i ), i ) );
463                 }
464                 else
465                 {
466                     throw new ParseException( getMessage( "invalidCharacter", text, text.charAt( i ), i ), i );
467                 }
468             }
469         }
470 
471         if ( words > 0 )
472         {
473             // Multiple words - no camel-case retained in any word.
474             toLowerCase( identifierBuilder, retainedIndices );
475         }
476 
477         t.identifier = identifierBuilder.toString();
478 
479         if ( t.identifier.length() <= 0 )
480         {
481             if ( runtimeException )
482             {
483                 throw new IllegalArgumentException( getMessage( "invalidCharacters", text ) );
484             }
485             else
486             {
487                 throw new ParseException( getMessage( "invalidCharacters", text ), 0 );
488             }
489         }
490 
491         if ( JavaLanguage.KEYWORDS.contains( t.identifier )
492                  || JavaLanguage.BOOLEAN_LITERALS.contains( t.identifier )
493                  || JavaLanguage.NULL_LITERAL.equals( t.identifier ) )
494         {
495             if ( mode != null )
496             {
497                 t.identifier = "_" + t.identifier;
498             }
499             else if ( runtimeException )
500             {
501                 throw new IllegalArgumentException( getMessage( "invalidWord", text, t.identifier,
502                                                                 text.indexOf( t.identifier ) ) );
503 
504             }
505             else
506             {
507                 throw new ParseException( getMessage( "invalidWord", text, t.identifier, text.indexOf( t.identifier ) ),
508                                           text.indexOf( t.identifier ) );
509 
510             }
511         }
512     }
513 
514     private static boolean isWordSeparator( final int codePoint, final NormalizationMode mode, final boolean first )
515     {
516         return !( ( first ? Character.isJavaIdentifierStart( codePoint ) : Character.isJavaIdentifierPart( codePoint ) )
517                   && ( mode != null ? Character.isLetterOrDigit( codePoint ) : true ) );
518 
519     }
520 
521     private static boolean isCamelCase( final int left, final int middle, final int right )
522     {
523         return Character.isLowerCase( left ) && Character.isUpperCase( middle ) && Character.isLowerCase( right );
524     }
525 
526     private static void toLowerCase( final StringBuilder stringBuilder, final List<Integer> indices )
527     {
528         for ( int i = 0, s0 = indices.size(); i < s0; i++ )
529         {
530             final int index = indices.get( i );
531             final int cp = Character.toLowerCase( stringBuilder.codePointAt( index ) );
532             stringBuilder.replace( index, index + 1, String.valueOf( Character.toChars( cp ) ) );
533         }
534     }
535 
536     private static String getMessage( final String key, final Object... args )
537     {
538         return MessageFormat.format( ResourceBundle.getBundle(
539             JavaIdentifier.class.getName().replace( '.', '/' ), Locale.getDefault() ).
540             getString( key ), args );
541 
542     }
543 
544     private static final class CacheKey
545     {
546 
547         private final String text;
548 
549         private final NormalizationMode mode;
550 
551         private CacheKey( final String text, final NormalizationMode mode )
552         {
553             super();
554             this.text = text;
555             this.mode = mode;
556         }
557 
558         @Override
559         public int hashCode()
560         {
561             int hc = 23;
562             hc = 37 * hc + this.text.hashCode();
563             hc = 37 * hc + ( this.mode == null ? 0 : this.mode.hashCode() );
564             return hc;
565         }
566 
567         @Override
568         public boolean equals( final Object o )
569         {
570             boolean equal = o == this;
571 
572             if ( !equal && o instanceof CacheKey )
573             {
574                 final CacheKey that = (CacheKey) o;
575                 equal = this.mode == that.mode && this.text.equals( that.text );
576             }
577 
578             return equal;
579         }
580 
581     }
582 
583 }