001/*
002 *   Copyright (C) 2012 Christian Schulte <cs@schulte.it>
003 *   All rights reserved.
004 *
005 *   Redistribution and use in source and binary forms, with or without
006 *   modification, are permitted provided that the following conditions
007 *   are met:
008 *
009 *     o Redistributions of source code must retain the above copyright
010 *       notice, this list of conditions and the following disclaimer.
011 *
012 *     o Redistributions in binary form must reproduce the above copyright
013 *       notice, this list of conditions and the following disclaimer in
014 *       the documentation and/or other materials provided with the
015 *       distribution.
016 *
017 *   THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
018 *   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
019 *   AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
020 *   THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT,
021 *   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
022 *   NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
023 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
024 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
026 *   THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027 *
028 *   $JOMC: JavaIdentifier.java 5106 2016-04-04 19:56:25Z schulte $
029 *
030 */
031package org.jomc.jls;
032
033import java.io.Serializable;
034import java.lang.ref.Reference;
035import java.lang.ref.SoftReference;
036import java.text.MessageFormat;
037import java.text.ParseException;
038import java.util.ArrayList;
039import java.util.HashMap;
040import java.util.List;
041import java.util.Locale;
042import java.util.Map;
043import java.util.ResourceBundle;
044
045/**
046 * Data type of a Java identifier.
047 * <p>
048 * This class provides support for parsing and normalizing text to java identifiers as specified in the Java
049 * Language Specification - Java SE 7 Edition - Chapter 3.8ff.
050 * </p>
051 *
052 * @author <a href="mailto:cs@schulte.it">Christian Schulte</a>
053 * @version $JOMC: JavaIdentifier.java 5106 2016-04-04 19:56:25Z schulte $
054 * @see #normalize(java.lang.String, org.jomc.jls.JavaIdentifier.NormalizationMode)
055 * @see #parse(java.lang.String)
056 * @see #valueOf(java.lang.String)
057 */
058public final class JavaIdentifier implements CharSequence, Serializable
059{
060
061    /**
062     * Normalization modes.
063     *
064     * @author <a href="mailto:cs@schulte.it">Christian Schulte</a>
065     * @version $JOMC: JavaIdentifier.java 5106 2016-04-04 19:56:25Z schulte $
066     * @see JavaIdentifier#normalize(java.lang.String, org.jomc.jls.JavaIdentifier.NormalizationMode)
067     */
068    public static enum NormalizationMode
069    {
070
071        /**
072         * Mode to normalize by compacting words using camel-case.
073         */
074        CAMEL_CASE,
075        /**
076         * Mode to normalize by separating words using '_' and by converting all characters to lower-case.
077         */
078        LOWER_CASE,
079        /**
080         * Mode to normalize by separating words using '_' and by converting all characters to upper-case.
081         */
082        UPPER_CASE,
083        /**
084         * Mode to normalize according to the
085         * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Constants</cite>.
086         * <blockquote>
087         * The names of variables declared class constants and of ANSI constants should be all uppercase with words
088         * separated by underscores ("_"). (ANSI constants should be avoided, for ease of debugging.)
089         * </blockquote>
090         */
091        CONSTANT_NAME_CONVENTION,
092        /**
093         * Mode to normalize according to the
094         * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Methods</cite>.
095         * <blockquote>
096         * Methods should be verbs, in mixed case with the first letter lowercase, with the first letter of each
097         * internal word capitalized.
098         * </blockquote>
099         */
100        METHOD_NAME_CONVENTION,
101        /**
102         * Mode to normalize according to the
103         * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Variables</cite>.
104         * <blockquote>
105         * Except for variables, all instance, class, and class constants are in mixed case with a lowercase first
106         * letter. Internal words start with capital letters. Variable names should not start with underscore _ or
107         * dollar sign $ characters, even though both are allowed. Variable names should be short yet meaningful. The
108         * choice of a variable name should be mnemonic - that is - designed to indicate to the casual observer the
109         * intent of its use. One-character variable names should be avoided except for temporary "throwaway" variables.
110         * Common names for temporary variables are i, j, k, m, and n for integers; c, d, and e for characters.
111         * </blockquote>
112         */
113        VARIABLE_NAME_CONVENTION
114
115    }
116
117    /**
118     * The value of the instance.
119     *
120     * @serial
121     */
122    private String identifier;
123
124    /**
125     * Cached instances.
126     */
127    private static volatile Reference<Map<CacheKey, JavaIdentifier>> cache;
128
129    /**
130     * Serial version UID for backwards compatibility with 7.x object streams.
131     */
132    private static final long serialVersionUID = 7639783770152985285L;
133
134    /**
135     * Underscore character.
136     */
137    private static final int UNDERSCORE_CODEPOINT = Character.codePointAt( "_", 0 );
138
139    /**
140     * Creates a new {@code JavaIdentifier} instance.
141     */
142    private JavaIdentifier()
143    {
144        super();
145    }
146
147    /**
148     * Returns the length of this character sequence.
149     *
150     * @return The number of {@code char}s in this sequence.
151     */
152    public int length()
153    {
154        return this.identifier.length();
155    }
156
157    /**
158     * Returns the {@code char} value at a given index.
159     *
160     * @param index The index of the {@code char} value to return.
161     *
162     * @return The {@code char} value at {@code index}.
163     *
164     * @throws IndexOutOfBoundsException if {@code index} is negative or not less than the length of the sequence.
165     */
166    public char charAt( final int index )
167    {
168        return this.identifier.charAt( index );
169    }
170
171    /**
172     * Returns a new {@code CharSequence} that is a subsequence of this sequence.
173     *
174     * @param start The start index, inclusive.
175     * @param end The end index, exclusive.
176     *
177     * @return The sequence of characters starting at index {@code start} up to index {@code end - 1}.
178     *
179     * @throws IndexOutOfBoundsException if {@code start} or {@code end} are negative, if {@code end} is greater than
180     * the length of the sequence, or if {@code start} is greater than {@code end}.
181     */
182    public CharSequence subSequence( final int start, final int end )
183    {
184        return this.identifier.subSequence( start, end );
185    }
186
187    /**
188     * Returns a string containing the characters in this sequence in the same order as this sequence. The length of the
189     * string will be the length of this sequence.
190     *
191     * @return A string consisting of exactly this sequence of characters.
192     */
193    @Override
194    public String toString()
195    {
196        return this.identifier;
197    }
198
199    /**
200     * Returns the hash-code value of the object.
201     *
202     * @return The hash-code value of the object.
203     */
204    @Override
205    public int hashCode()
206    {
207        return this.identifier.hashCode();
208    }
209
210    /**
211     * Tests whether some other object is equal to the object.
212     *
213     * @param o The object to test.
214     *
215     * @return {@code true}, if {@code o} is an instance of the class of the object and its string value is equal to the
216     * string value of the object.
217     */
218    @Override
219    public boolean equals( final Object o )
220    {
221        boolean equal = o == this;
222
223        if ( !equal && o instanceof JavaIdentifier )
224        {
225            equal = this.toString().equals( o.toString() );
226        }
227
228        return equal;
229    }
230
231    /**
232     * Normalizes text from the beginning of the given string to produce a {@code JavaIdentifier}.
233     *
234     * @param text The text to normalize.
235     * @param mode The normalization to apply.
236     *
237     * @return A {@code JavaIdentifier} instance constructed by normalizing {@code text} according to {@code mode}.
238     *
239     * @throws NullPointerException if {@code text} or {@code mode} is {@code null}.
240     * @throws ParseException if normalization fails.
241     */
242    public static JavaIdentifier normalize( final String text, final NormalizationMode mode ) throws ParseException
243    {
244        if ( text == null )
245        {
246            throw new NullPointerException( "text" );
247        }
248        if ( mode == null )
249        {
250            throw new NullPointerException( "mode" );
251        }
252
253        return parse( text, mode, false );
254    }
255
256    /**
257     * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance.
258     *
259     * @param text The text to parse.
260     *
261     * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}.
262     *
263     * @throws NullPointerException if {@code text} is {@code null}.
264     * @throws ParseException if parsing fails.
265     *
266     * @see #valueOf(java.lang.String)
267     */
268    public static JavaIdentifier parse( final String text ) throws ParseException
269    {
270        if ( text == null )
271        {
272            throw new NullPointerException( "text" );
273        }
274
275        return parse( text, null, false );
276    }
277
278    /**
279     * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance.
280     * <p>
281     * Unlike the {@link #parse(String)} method, this method throws an {@code IllegalArgumentException} if parsing
282     * fails.
283     * </p>
284     *
285     * @param text The text to parse.
286     *
287     * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}.
288     *
289     * @throws NullPointerException if {@code text} is {@code null}.
290     * @throws IllegalArgumentException if parsing fails.
291     *
292     * @see #parse(java.lang.String)
293     */
294    public static JavaIdentifier valueOf( final String text ) throws IllegalArgumentException
295    {
296        if ( text == null )
297        {
298            throw new NullPointerException( "text" );
299        }
300
301        try
302        {
303            return parse( text, null, true );
304        }
305        catch ( final ParseException e )
306        {
307            throw new AssertionError( e );
308        }
309    }
310
311    private static JavaIdentifier parse( final String text, final NormalizationMode mode,
312                                         final boolean runtimeException )
313        throws ParseException
314    {
315        Map<CacheKey, JavaIdentifier> map = cache == null ? null : cache.get();
316
317        if ( map == null )
318        {
319            map = new HashMap<CacheKey, JavaIdentifier>( 128 );
320            cache = new SoftReference<Map<CacheKey, JavaIdentifier>>( map );
321        }
322
323        synchronized ( map )
324        {
325            final CacheKey key = new CacheKey( text, mode );
326            JavaIdentifier javaIdentifier = map.get( key );
327
328            if ( javaIdentifier == null )
329            {
330                javaIdentifier = new JavaIdentifier();
331                parseIdentifier( javaIdentifier, text, mode, runtimeException );
332
333                if ( mode != null )
334                {
335                    final CacheKey normalizedKey = new CacheKey( javaIdentifier.toString(), mode );
336                    final JavaIdentifier normalizedInstance = map.get( normalizedKey );
337
338                    if ( normalizedInstance != null )
339                    {
340                        map.put( key, normalizedInstance );
341                        javaIdentifier = normalizedInstance;
342                    }
343                    else
344                    {
345                        map.put( key, javaIdentifier );
346                        map.put( normalizedKey, javaIdentifier );
347                    }
348                }
349                else
350                {
351                    map.put( key, javaIdentifier );
352                }
353            }
354
355            return javaIdentifier;
356        }
357    }
358
359    private static void parseIdentifier( final JavaIdentifier t, final String text, final NormalizationMode mode,
360                                         final boolean runtimeException )
361        throws ParseException
362    {
363        if ( text.length() <= 0 )
364        {
365            if ( runtimeException )
366            {
367                throw new IllegalArgumentException( getMessage( "invalidEmptyString" ) );
368            }
369            else
370            {
371                throw new ParseException( getMessage( "invalidEmptyString" ), 0 );
372            }
373        }
374
375        final StringBuilder identifierBuilder = new StringBuilder( text.length() );
376        final List<Integer> retainedIndices = new ArrayList<Integer>( text.length() );
377        boolean start_of_word = true;
378        int words = 0;
379
380        for ( int i = 0, j = 1, s0 = text.length(), last_codepoint = -1; i < s0; i++, j++ )
381        {
382            if ( !isWordSeparator( text.codePointAt( i ), mode, identifierBuilder.length() <= 0 ) )
383            {
384                if ( mode != null )
385                {
386                    switch ( mode )
387                    {
388                        case CAMEL_CASE:
389                            if ( start_of_word )
390                            {
391                                identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) );
392                            }
393                            else if ( last_codepoint > -1 && j < s0
394                                          && isCamelCase( last_codepoint, text.codePointAt( i ),
395                                                          text.codePointAt( j ) ) )
396                            { // Retain camel-case in words.
397                                identifierBuilder.append( text.charAt( i ) );
398                                retainedIndices.add( identifierBuilder.length() - 1 );
399                            }
400                            else
401                            {
402                                identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
403                            }
404                            break;
405
406                        case LOWER_CASE:
407                            if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT )
408                            {
409                                identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) );
410                            }
411
412                            identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
413                            break;
414
415                        case UPPER_CASE:
416                        case CONSTANT_NAME_CONVENTION:
417                            if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT )
418                            {
419                                identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) );
420                            }
421
422                            identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) );
423                            break;
424
425                        case VARIABLE_NAME_CONVENTION:
426                        case METHOD_NAME_CONVENTION:
427                            if ( start_of_word )
428                            {
429                                identifierBuilder.append( words == 0
430                                                              ? Character.toLowerCase( text.charAt( i ) )
431                                                              : Character.toUpperCase( text.charAt( i ) ) );
432
433                            }
434                            else if ( last_codepoint > -1 && j < s0
435                                          && isCamelCase( last_codepoint, text.codePointAt( i ),
436                                                          text.codePointAt( j ) ) )
437                            { // Retain camel-case in words.
438                                identifierBuilder.append( text.charAt( i ) );
439                                retainedIndices.add( identifierBuilder.length() - 1 );
440                            }
441                            else
442                            {
443                                identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) );
444                            }
445                            break;
446
447                        default:
448                            throw new AssertionError( mode );
449
450                    }
451                }
452                else
453                {
454                    identifierBuilder.append( text.charAt( i ) );
455                }
456
457                last_codepoint = identifierBuilder.codePointAt( identifierBuilder.length() - 1 );
458                start_of_word = false;
459            }
460            else
461            {
462                if ( mode != null )
463                {
464                    if ( !start_of_word )
465                    {
466                        start_of_word = true;
467                        words++;
468                    }
469                }
470                else if ( runtimeException )
471                {
472                    throw new IllegalArgumentException( getMessage( "invalidCharacter", text, text.charAt( i ), i ) );
473                }
474                else
475                {
476                    throw new ParseException( getMessage( "invalidCharacter", text, text.charAt( i ), i ), i );
477                }
478            }
479        }
480
481        if ( words > 0 )
482        {
483            // Multiple words - no camel-case retained in any word.
484            toLowerCase( identifierBuilder, retainedIndices );
485        }
486
487        t.identifier = identifierBuilder.toString();
488
489        if ( t.identifier.length() <= 0 )
490        {
491            if ( runtimeException )
492            {
493                throw new IllegalArgumentException( getMessage( "invalidCharacters", text ) );
494            }
495            else
496            {
497                throw new ParseException( getMessage( "invalidCharacters", text ), 0 );
498            }
499        }
500
501        if ( JavaLanguage.KEYWORDS.contains( t.identifier )
502                 || JavaLanguage.BOOLEAN_LITERALS.contains( t.identifier )
503                 || JavaLanguage.NULL_LITERAL.equals( t.identifier ) )
504        {
505            if ( mode != null )
506            {
507                t.identifier = "_" + t.identifier;
508            }
509            else if ( runtimeException )
510            {
511                throw new IllegalArgumentException( getMessage( "invalidWord", text, t.identifier,
512                                                                text.indexOf( t.identifier ) ) );
513
514            }
515            else
516            {
517                throw new ParseException( getMessage( "invalidWord", text, t.identifier, text.indexOf( t.identifier ) ),
518                                          text.indexOf( t.identifier ) );
519
520            }
521        }
522    }
523
524    private static boolean isWordSeparator( final int codePoint, final NormalizationMode mode, final boolean first )
525    {
526        return !( ( first ? Character.isJavaIdentifierStart( codePoint ) : Character.isJavaIdentifierPart( codePoint ) )
527                  && ( mode != null ? Character.isLetterOrDigit( codePoint ) : true ) );
528
529    }
530
531    private static boolean isCamelCase( final int left, final int middle, final int right )
532    {
533        return Character.isLowerCase( left ) && Character.isUpperCase( middle ) && Character.isLowerCase( right );
534    }
535
536    private static void toLowerCase( final StringBuilder stringBuilder, final List<Integer> indices )
537    {
538        for ( int i = 0, s0 = indices.size(); i < s0; i++ )
539        {
540            final int index = indices.get( i );
541            final int cp = Character.toLowerCase( stringBuilder.codePointAt( index ) );
542            stringBuilder.replace( index, index + 1, String.valueOf( Character.toChars( cp ) ) );
543        }
544    }
545
546    private static String getMessage( final String key, final Object... args )
547    {
548        return MessageFormat.format( ResourceBundle.getBundle(
549            JavaIdentifier.class.getName().replace( '.', '/' ), Locale.getDefault() ).
550            getString( key ), args );
551
552    }
553
554    private static final class CacheKey
555    {
556
557        private final String text;
558
559        private final NormalizationMode mode;
560
561        private CacheKey( final String text, final NormalizationMode mode )
562        {
563            super();
564            this.text = text;
565            this.mode = mode;
566        }
567
568        @Override
569        public int hashCode()
570        {
571            int hc = 23;
572            hc = 37 * hc + this.text.hashCode();
573            hc = 37 * hc + ( this.mode == null ? 0 : this.mode.hashCode() );
574            return hc;
575        }
576
577        @Override
578        public boolean equals( final Object o )
579        {
580            boolean equal = o == this;
581
582            if ( !equal && o instanceof CacheKey )
583            {
584                final CacheKey that = (CacheKey) o;
585                equal = this.mode == that.mode && this.text.equals( that.text );
586            }
587
588            return equal;
589        }
590
591    }
592
593}