001/* 002 * Copyright (C) Christian Schulte, 2012-253 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without 006 * modification, are permitted provided that the following conditions 007 * are met: 008 * 009 * o Redistributions of source code must retain the above copyright 010 * notice, this list of conditions and the following disclaimer. 011 * 012 * o Redistributions in binary form must reproduce the above copyright 013 * notice, this list of conditions and the following disclaimer in 014 * the documentation and/or other materials provided with the 015 * distribution. 016 * 017 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 018 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 019 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 020 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT, 021 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 022 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 023 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 024 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 025 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 026 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 027 * 028 * $JOMC: JavaIdentifier.java 4804 2013-04-22 05:07:33Z schulte $ 029 * 030 */ 031package org.jomc.model; 032 033import java.io.Serializable; 034import java.lang.ref.Reference; 035import java.lang.ref.SoftReference; 036import java.text.MessageFormat; 037import java.text.ParseException; 038import java.util.ArrayList; 039import java.util.HashMap; 040import java.util.List; 041import java.util.Locale; 042import java.util.Map; 043import java.util.ResourceBundle; 044 045/** 046 * Data type of a Java identifier. 047 * <p>This class provides support for parsing and normalizing text to java identifiers as specified in the Java 048 * Language Specification - Java SE 7 Edition - Chapter 3.8ff.</p> 049 * 050 * @author <a href="mailto:cs@schulte.it">Christian Schulte</a> 051 * @version $JOMC: JavaIdentifier.java 4804 2013-04-22 05:07:33Z schulte $ 052 * @see #normalize(java.lang.String, org.jomc.model.JavaIdentifier.NormalizationMode) 053 * @see #parse(java.lang.String) 054 * @see #valueOf(java.lang.String) 055 * @since 1.4 056 */ 057public final class JavaIdentifier implements CharSequence, Serializable 058{ 059 060 /** 061 * Normalization modes. 062 * 063 * @author <a href="mailto:cs@schulte.it">Christian Schulte</a> 064 * @version $JOMC: JavaIdentifier.java 4804 2013-04-22 05:07:33Z schulte $ 065 * @since 1.4 066 * @see JavaIdentifier#normalize(java.lang.String, org.jomc.model.JavaIdentifier.NormalizationMode) 067 */ 068 public static enum NormalizationMode 069 { 070 071 /** Mode to normalize by compacting words using camel-case. */ 072 CAMEL_CASE, 073 /** Mode to normalize by separating words using '_' and by converting all characters to lower-case. */ 074 LOWER_CASE, 075 /** Mode to normalize by separating words using '_' and by converting all characters to upper-case. */ 076 UPPER_CASE, 077 /** 078 * Mode to normalize according to the 079 * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Constants</cite>. 080 * <blockquote> 081 * The names of variables declared class constants and of ANSI constants should be all uppercase with words 082 * separated by underscores ("_"). (ANSI constants should be avoided, for ease of debugging.) 083 * </blockquote> 084 */ 085 CONSTANT_NAME_CONVENTION, 086 /** 087 * Mode to normalize according to the 088 * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Methods</cite>. 089 * <blockquote> 090 * Methods should be verbs, in mixed case with the first letter lowercase, with the first letter of each 091 * internal word capitalized. 092 * </blockquote> 093 */ 094 METHOD_NAME_CONVENTION, 095 /** 096 * Mode to normalize according to the 097 * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Variables</cite>. 098 * <blockquote> 099 * Except for variables, all instance, class, and class constants are in mixed case with a lowercase first 100 * letter. Internal words start with capital letters. Variable names should not start with underscore _ or 101 * dollar sign $ characters, even though both are allowed. Variable names should be short yet meaningful. The 102 * choice of a variable name should be mnemonic - that is - designed to indicate to the casual observer the 103 * intent of its use. One-character variable names should be avoided except for temporary "throwaway" variables. 104 * Common names for temporary variables are i, j, k, m, and n for integers; c, d, and e for characters. 105 * </blockquote> 106 */ 107 VARIABLE_NAME_CONVENTION 108 109 } 110 111 /** 112 * The value of the instance. 113 * @serial 114 */ 115 private String identifier; 116 117 /** Cached instances. */ 118 private static volatile Reference<Map<CacheKey, JavaIdentifier>> cache; 119 120 /** Serial version UID for backwards compatibility with 1.4.x object streams. */ 121 private static final long serialVersionUID = 7600377999055800720L; 122 123 /** Underscore character. */ 124 private static final int UNDERSCORE_CODEPOINT = Character.codePointAt( "_", 0 ); 125 126 /** Creates a new {@code JavaIdentifier} instance. */ 127 private JavaIdentifier() 128 { 129 super(); 130 } 131 132 /** 133 * Returns the length of this character sequence. 134 * 135 * @return The number of {@code char}s in this sequence. 136 */ 137 public int length() 138 { 139 return this.identifier.length(); 140 } 141 142 /** 143 * Returns the {@code char} value at a given index. 144 * 145 * @param index The index of the {@code char} value to return. 146 * 147 * @return The {@code char} value at {@code index}. 148 * 149 * @throws IndexOutOfBoundsException if {@code index} is negative or not less than the length of the sequence. 150 */ 151 public char charAt( final int index ) 152 { 153 return this.identifier.charAt( index ); 154 } 155 156 /** 157 * Returns a new {@code CharSequence} that is a subsequence of this sequence. 158 * 159 * @param start The start index, inclusive. 160 * @param end The end index, exclusive. 161 * 162 * @return The sequence of characters starting at index {@code start} up to index {@code end - 1}. 163 * 164 * @throws IndexOutOfBoundsException if {@code start} or {@code end} are negative, if {@code end} is greater than 165 * the length of the sequence, or if {@code start} is greater than {@code end}. 166 */ 167 public CharSequence subSequence( final int start, final int end ) 168 { 169 return this.identifier.subSequence( start, end ); 170 } 171 172 /** 173 * Returns a string containing the characters in this sequence in the same order as this sequence. The length of the 174 * string will be the length of this sequence. 175 * 176 * @return A string consisting of exactly this sequence of characters. 177 */ 178 @Override 179 public String toString() 180 { 181 return this.identifier; 182 } 183 184 /** 185 * Returns the hash-code value of the object. 186 * 187 * @return The hash-code value of the object. 188 */ 189 @Override 190 public int hashCode() 191 { 192 return this.identifier.hashCode(); 193 } 194 195 /** 196 * Tests whether some other object is equal to the object. 197 * 198 * @param o The object to test. 199 * 200 * @return {@code true}, if {@code o} is an instance of the class of the object and its string value is equal to the 201 * string value of the object. 202 */ 203 @Override 204 public boolean equals( final Object o ) 205 { 206 boolean equal = o == this; 207 208 if ( !equal && o instanceof JavaIdentifier ) 209 { 210 equal = this.toString().equals( o.toString() ); 211 } 212 213 return equal; 214 } 215 216 /** 217 * Normalizes text from the beginning of the given string to produce a {@code JavaIdentifier}. 218 * 219 * @param text The text to normalize. 220 * @param mode The normalization to apply. 221 * 222 * @return A {@code JavaIdentifier} instance constructed by normalizing {@code text} according to {@code mode}. 223 * 224 * @throws NullPointerException if {@code text} or {@code mode} is {@code null}. 225 * @throws ParseException if normalization fails. 226 */ 227 public static JavaIdentifier normalize( final String text, final NormalizationMode mode ) throws ParseException 228 { 229 if ( text == null ) 230 { 231 throw new NullPointerException( "text" ); 232 } 233 if ( mode == null ) 234 { 235 throw new NullPointerException( "mode" ); 236 } 237 238 return parse( text, mode, false ); 239 } 240 241 /** 242 * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance. 243 * 244 * @param text The text to parse. 245 * 246 * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}. 247 * 248 * @throws NullPointerException if {@code text} is {@code null}. 249 * @throws ParseException if parsing fails. 250 * 251 * @see #valueOf(java.lang.String) 252 */ 253 public static JavaIdentifier parse( final String text ) throws ParseException 254 { 255 if ( text == null ) 256 { 257 throw new NullPointerException( "text" ); 258 } 259 260 return parse( text, null, false ); 261 } 262 263 /** 264 * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance. 265 * <p>Unlike the {@link #parse(String)} method, this method throws an {@code IllegalArgumentException} if parsing 266 * fails.</p> 267 * 268 * @param text The text to parse. 269 * 270 * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}. 271 * 272 * @throws NullPointerException if {@code text} is {@code null}. 273 * @throws IllegalArgumentException if parsing fails. 274 * 275 * @see #parse(java.lang.String) 276 */ 277 public static JavaIdentifier valueOf( final String text ) throws IllegalArgumentException 278 { 279 if ( text == null ) 280 { 281 throw new NullPointerException( "text" ); 282 } 283 284 try 285 { 286 return parse( text, null, true ); 287 } 288 catch ( final ParseException e ) 289 { 290 throw new AssertionError( e ); 291 } 292 } 293 294 private static JavaIdentifier parse( final String text, final NormalizationMode mode, 295 final boolean runtimeException ) 296 throws ParseException 297 { 298 Map<CacheKey, JavaIdentifier> map = cache == null ? null : cache.get(); 299 300 if ( map == null ) 301 { 302 map = new HashMap<CacheKey, JavaIdentifier>( 128 ); 303 cache = new SoftReference<Map<CacheKey, JavaIdentifier>>( map ); 304 } 305 306 synchronized ( map ) 307 { 308 final CacheKey key = new CacheKey( text, mode ); 309 JavaIdentifier javaIdentifier = map.get( key ); 310 311 if ( javaIdentifier == null ) 312 { 313 javaIdentifier = new JavaIdentifier(); 314 parseIdentifier( javaIdentifier, text, mode, runtimeException ); 315 316 if ( mode != null ) 317 { 318 final CacheKey normalizedKey = new CacheKey( javaIdentifier.toString(), mode ); 319 final JavaIdentifier normalizedInstance = map.get( normalizedKey ); 320 321 if ( normalizedInstance != null ) 322 { 323 map.put( key, normalizedInstance ); 324 javaIdentifier = normalizedInstance; 325 } 326 else 327 { 328 map.put( key, javaIdentifier ); 329 map.put( normalizedKey, javaIdentifier ); 330 } 331 } 332 else 333 { 334 map.put( key, javaIdentifier ); 335 } 336 } 337 338 return javaIdentifier; 339 } 340 } 341 342 private static void parseIdentifier( final JavaIdentifier t, final String text, final NormalizationMode mode, 343 final boolean runtimeException ) 344 throws ParseException 345 { 346 if ( text.length() <= 0 ) 347 { 348 if ( runtimeException ) 349 { 350 throw new IllegalArgumentException( getMessage( "invalidEmptyString" ) ); 351 } 352 else 353 { 354 throw new ParseException( getMessage( "invalidEmptyString" ), 0 ); 355 } 356 } 357 358 final StringBuilder identifierBuilder = new StringBuilder( text.length() ); 359 final List<Integer> retainedIndices = new ArrayList<Integer>( text.length() ); 360 boolean start_of_word = true; 361 int words = 0; 362 363 for ( int i = 0, j = 1, s0 = text.length(), last_codepoint = -1; i < s0; i++, j++ ) 364 { 365 if ( !isWordSeparator( text.codePointAt( i ), mode, identifierBuilder.length() <= 0 ) ) 366 { 367 if ( mode != null ) 368 { 369 switch ( mode ) 370 { 371 case CAMEL_CASE: 372 if ( start_of_word ) 373 { 374 identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) ); 375 } 376 else if ( last_codepoint > -1 && j < s0 377 && isCamelCase( last_codepoint, text.codePointAt( i ), text.codePointAt( j ) ) ) 378 { // Retain camel-case in words. 379 identifierBuilder.append( text.charAt( i ) ); 380 retainedIndices.add( identifierBuilder.length() - 1 ); 381 } 382 else 383 { 384 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) ); 385 } 386 break; 387 388 case LOWER_CASE: 389 if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT ) 390 { 391 identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) ); 392 } 393 394 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) ); 395 break; 396 397 case UPPER_CASE: 398 case CONSTANT_NAME_CONVENTION: 399 if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT ) 400 { 401 identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) ); 402 } 403 404 identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) ); 405 break; 406 407 case VARIABLE_NAME_CONVENTION: 408 case METHOD_NAME_CONVENTION: 409 if ( start_of_word ) 410 { 411 identifierBuilder.append( words == 0 ? Character.toLowerCase( text.charAt( i ) ) 412 : Character.toUpperCase( text.charAt( i ) ) ); 413 414 } 415 else if ( last_codepoint > -1 && j < s0 416 && isCamelCase( last_codepoint, text.codePointAt( i ), text.codePointAt( j ) ) ) 417 { // Retain camel-case in words. 418 identifierBuilder.append( text.charAt( i ) ); 419 retainedIndices.add( identifierBuilder.length() - 1 ); 420 } 421 else 422 { 423 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) ); 424 } 425 break; 426 427 default: 428 throw new AssertionError( mode ); 429 430 } 431 } 432 else 433 { 434 identifierBuilder.append( text.charAt( i ) ); 435 } 436 437 last_codepoint = identifierBuilder.codePointAt( identifierBuilder.length() - 1 ); 438 start_of_word = false; 439 } 440 else 441 { 442 if ( mode != null ) 443 { 444 if ( !start_of_word ) 445 { 446 start_of_word = true; 447 words++; 448 } 449 } 450 else if ( runtimeException ) 451 { 452 throw new IllegalArgumentException( getMessage( "invalidCharacter", text, text.charAt( i ), i ) ); 453 } 454 else 455 { 456 throw new ParseException( getMessage( "invalidCharacter", text, text.charAt( i ), i ), i ); 457 } 458 } 459 } 460 461 if ( words > 0 ) 462 { 463 // Multiple words - no camel-case retained in any word. 464 toLowerCase( identifierBuilder, retainedIndices ); 465 } 466 467 t.identifier = identifierBuilder.toString(); 468 469 if ( t.identifier.length() <= 0 ) 470 { 471 if ( runtimeException ) 472 { 473 throw new IllegalArgumentException( getMessage( "invalidCharacters", text ) ); 474 } 475 else 476 { 477 throw new ParseException( getMessage( "invalidCharacters", text ), 0 ); 478 } 479 } 480 481 if ( JavaLanguage.KEYWORDS.contains( t.identifier ) 482 || JavaLanguage.BOOLEAN_LITERALS.contains( t.identifier ) 483 || JavaLanguage.NULL_LITERAL.equals( t.identifier ) ) 484 { 485 if ( mode != null ) 486 { 487 t.identifier = "_" + t.identifier; 488 } 489 else if ( runtimeException ) 490 { 491 throw new IllegalArgumentException( getMessage( "invalidWord", text, t.identifier, 492 text.indexOf( t.identifier ) ) ); 493 494 } 495 else 496 { 497 throw new ParseException( getMessage( "invalidWord", text, t.identifier, text.indexOf( t.identifier ) ), 498 text.indexOf( t.identifier ) ); 499 500 } 501 } 502 } 503 504 private static boolean isWordSeparator( final int codePoint, final NormalizationMode mode, final boolean first ) 505 { 506 return !( ( first ? Character.isJavaIdentifierStart( codePoint ) : Character.isJavaIdentifierPart( codePoint ) ) 507 && ( mode != null ? Character.isLetterOrDigit( codePoint ) : true ) ); 508 509 } 510 511 private static boolean isCamelCase( final int left, final int middle, final int right ) 512 { 513 return Character.isLowerCase( left ) && Character.isUpperCase( middle ) && Character.isLowerCase( right ); 514 } 515 516 private static void toLowerCase( final StringBuilder stringBuilder, final List<Integer> indices ) 517 { 518 for ( int i = 0, s0 = indices.size(); i < s0; i++ ) 519 { 520 final int index = indices.get( i ); 521 final int cp = Character.toLowerCase( stringBuilder.codePointAt( index ) ); 522 stringBuilder.replace( index, index + 1, String.valueOf( Character.toChars( cp ) ) ); 523 } 524 } 525 526 private static String getMessage( final String key, final Object... args ) 527 { 528 return MessageFormat.format( ResourceBundle.getBundle( 529 JavaIdentifier.class.getName().replace( '.', '/' ), Locale.getDefault() ). 530 getString( key ), args ); 531 532 } 533 534 private static final class CacheKey 535 { 536 537 private final String text; 538 539 private final NormalizationMode mode; 540 541 private CacheKey( final String text, final NormalizationMode mode ) 542 { 543 super(); 544 this.text = text; 545 this.mode = mode; 546 } 547 548 @Override 549 public int hashCode() 550 { 551 int hc = 23; 552 hc = 37 * hc + this.text.hashCode(); 553 hc = 37 * hc + ( this.mode == null ? 0 : this.mode.hashCode() ); 554 return hc; 555 } 556 557 @Override 558 public boolean equals( final Object o ) 559 { 560 boolean equal = o == this; 561 562 if ( !equal && o instanceof CacheKey ) 563 { 564 final CacheKey that = (CacheKey) o; 565 equal = this.mode == that.mode && this.text.equals( that.text ); 566 } 567 568 return equal; 569 } 570 571 } 572 573}