001/* 002 * Copyright (C) Christian Schulte <cs@schulte.it>, 2012-253 003 * All rights reserved. 004 * 005 * Redistribution and use in source and binary forms, with or without 006 * modification, are permitted provided that the following conditions 007 * are met: 008 * 009 * o Redistributions of source code must retain the above copyright 010 * notice, this list of conditions and the following disclaimer. 011 * 012 * o Redistributions in binary form must reproduce the above copyright 013 * notice, this list of conditions and the following disclaimer in 014 * the documentation and/or other materials provided with the 015 * distribution. 016 * 017 * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 018 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY 019 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 020 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT, 021 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 022 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 023 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 024 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 025 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 026 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 027 * 028 * $JOMC: JavaIdentifier.java 5101 2016-04-04 18:52:11Z schulte $ 029 * 030 */ 031package org.jomc.model; 032 033import java.io.Serializable; 034import java.lang.ref.Reference; 035import java.lang.ref.SoftReference; 036import java.text.MessageFormat; 037import java.text.ParseException; 038import java.util.ArrayList; 039import java.util.HashMap; 040import java.util.List; 041import java.util.Locale; 042import java.util.Map; 043import java.util.ResourceBundle; 044 045/** 046 * Data type of a Java identifier. 047 * <p> 048 * This class provides support for parsing and normalizing text to java identifiers as specified in the Java 049 * Language Specification - Java SE 7 Edition - Chapter 3.8ff. 050 * </p> 051 * <p> 052 * <i>Please note that this class will move to package {@code org.jomc.jsl} in JOMC 2.0.</i> 053 * </p> 054 * 055 * @author <a href="mailto:cs@schulte.it">Christian Schulte</a> 056 * @version $JOMC: JavaIdentifier.java 5101 2016-04-04 18:52:11Z schulte $ 057 * @see #normalize(java.lang.String, org.jomc.model.JavaIdentifier.NormalizationMode) 058 * @see #parse(java.lang.String) 059 * @see #valueOf(java.lang.String) 060 * @since 1.4 061 */ 062public final class JavaIdentifier implements CharSequence, Serializable 063{ 064 065 /** 066 * Normalization modes. 067 * 068 * @author <a href="mailto:cs@schulte.it">Christian Schulte</a> 069 * @version $JOMC: JavaIdentifier.java 5101 2016-04-04 18:52:11Z schulte $ 070 * @since 1.4 071 * @see JavaIdentifier#normalize(java.lang.String, org.jomc.model.JavaIdentifier.NormalizationMode) 072 */ 073 public static enum NormalizationMode 074 { 075 076 /** 077 * Mode to normalize by compacting words using camel-case. 078 */ 079 CAMEL_CASE, 080 /** 081 * Mode to normalize by separating words using '_' and by converting all characters to lower-case. 082 */ 083 LOWER_CASE, 084 /** 085 * Mode to normalize by separating words using '_' and by converting all characters to upper-case. 086 */ 087 UPPER_CASE, 088 /** 089 * Mode to normalize according to the 090 * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Constants</cite>. 091 * <blockquote> 092 * The names of variables declared class constants and of ANSI constants should be all uppercase with words 093 * separated by underscores ("_"). (ANSI constants should be avoided, for ease of debugging.) 094 * </blockquote> 095 */ 096 CONSTANT_NAME_CONVENTION, 097 /** 098 * Mode to normalize according to the 099 * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Methods</cite>. 100 * <blockquote> 101 * Methods should be verbs, in mixed case with the first letter lowercase, with the first letter of each 102 * internal word capitalized. 103 * </blockquote> 104 */ 105 METHOD_NAME_CONVENTION, 106 /** 107 * Mode to normalize according to the 108 * <cite>Code Conventions for the Java Programming Language - 9 - Naming Conventions - Variables</cite>. 109 * <blockquote> 110 * Except for variables, all instance, class, and class constants are in mixed case with a lowercase first 111 * letter. Internal words start with capital letters. Variable names should not start with underscore _ or 112 * dollar sign $ characters, even though both are allowed. Variable names should be short yet meaningful. The 113 * choice of a variable name should be mnemonic - that is - designed to indicate to the casual observer the 114 * intent of its use. One-character variable names should be avoided except for temporary "throwaway" variables. 115 * Common names for temporary variables are i, j, k, m, and n for integers; c, d, and e for characters. 116 * </blockquote> 117 */ 118 VARIABLE_NAME_CONVENTION 119 120 } 121 122 /** 123 * The value of the instance. 124 * 125 * @serial 126 */ 127 private String identifier; 128 129 /** 130 * Cached instances. 131 */ 132 private static volatile Reference<Map<CacheKey, JavaIdentifier>> cache; 133 134 /** 135 * Serial version UID for backwards compatibility with 1.4.x object streams. 136 */ 137 private static final long serialVersionUID = 7600377999055800720L; 138 139 /** 140 * Underscore character. 141 */ 142 private static final int UNDERSCORE_CODEPOINT = Character.codePointAt( "_", 0 ); 143 144 /** 145 * Creates a new {@code JavaIdentifier} instance. 146 */ 147 private JavaIdentifier() 148 { 149 super(); 150 } 151 152 /** 153 * Returns the length of this character sequence. 154 * 155 * @return The number of {@code char}s in this sequence. 156 */ 157 public int length() 158 { 159 return this.identifier.length(); 160 } 161 162 /** 163 * Returns the {@code char} value at a given index. 164 * 165 * @param index The index of the {@code char} value to return. 166 * 167 * @return The {@code char} value at {@code index}. 168 * 169 * @throws IndexOutOfBoundsException if {@code index} is negative or not less than the length of the sequence. 170 */ 171 public char charAt( final int index ) 172 { 173 return this.identifier.charAt( index ); 174 } 175 176 /** 177 * Returns a new {@code CharSequence} that is a subsequence of this sequence. 178 * 179 * @param start The start index, inclusive. 180 * @param end The end index, exclusive. 181 * 182 * @return The sequence of characters starting at index {@code start} up to index {@code end - 1}. 183 * 184 * @throws IndexOutOfBoundsException if {@code start} or {@code end} are negative, if {@code end} is greater than 185 * the length of the sequence, or if {@code start} is greater than {@code end}. 186 */ 187 public CharSequence subSequence( final int start, final int end ) 188 { 189 return this.identifier.subSequence( start, end ); 190 } 191 192 /** 193 * Returns a string containing the characters in this sequence in the same order as this sequence. The length of the 194 * string will be the length of this sequence. 195 * 196 * @return A string consisting of exactly this sequence of characters. 197 */ 198 @Override 199 public String toString() 200 { 201 return this.identifier; 202 } 203 204 /** 205 * Returns the hash-code value of the object. 206 * 207 * @return The hash-code value of the object. 208 */ 209 @Override 210 public int hashCode() 211 { 212 return this.identifier.hashCode(); 213 } 214 215 /** 216 * Tests whether some other object is equal to the object. 217 * 218 * @param o The object to test. 219 * 220 * @return {@code true}, if {@code o} is an instance of the class of the object and its string value is equal to the 221 * string value of the object. 222 */ 223 @Override 224 public boolean equals( final Object o ) 225 { 226 boolean equal = o == this; 227 228 if ( !equal && o instanceof JavaIdentifier ) 229 { 230 equal = this.toString().equals( o.toString() ); 231 } 232 233 return equal; 234 } 235 236 /** 237 * Normalizes text from the beginning of the given string to produce a {@code JavaIdentifier}. 238 * 239 * @param text The text to normalize. 240 * @param mode The normalization to apply. 241 * 242 * @return A {@code JavaIdentifier} instance constructed by normalizing {@code text} according to {@code mode}. 243 * 244 * @throws NullPointerException if {@code text} or {@code mode} is {@code null}. 245 * @throws ParseException if normalization fails. 246 */ 247 public static JavaIdentifier normalize( final String text, final NormalizationMode mode ) throws ParseException 248 { 249 if ( text == null ) 250 { 251 throw new NullPointerException( "text" ); 252 } 253 if ( mode == null ) 254 { 255 throw new NullPointerException( "mode" ); 256 } 257 258 return parse( text, mode, false ); 259 } 260 261 /** 262 * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance. 263 * 264 * @param text The text to parse. 265 * 266 * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}. 267 * 268 * @throws NullPointerException if {@code text} is {@code null}. 269 * @throws ParseException if parsing fails. 270 * 271 * @see #valueOf(java.lang.String) 272 */ 273 public static JavaIdentifier parse( final String text ) throws ParseException 274 { 275 if ( text == null ) 276 { 277 throw new NullPointerException( "text" ); 278 } 279 280 return parse( text, null, false ); 281 } 282 283 /** 284 * Parses text from the beginning of a given string to produce a {@code JavaIdentifier} instance. 285 * <p> 286 * Unlike the {@link #parse(String)} method, this method throws an {@code IllegalArgumentException} if parsing 287 * fails. 288 * </p> 289 * 290 * @param text The text to parse. 291 * 292 * @return A {@code JavaIdentifier} instance constructed by parsing {@code text}. 293 * 294 * @throws NullPointerException if {@code text} is {@code null}. 295 * @throws IllegalArgumentException if parsing fails. 296 * 297 * @see #parse(java.lang.String) 298 */ 299 public static JavaIdentifier valueOf( final String text ) throws IllegalArgumentException 300 { 301 if ( text == null ) 302 { 303 throw new NullPointerException( "text" ); 304 } 305 306 try 307 { 308 return parse( text, null, true ); 309 } 310 catch ( final ParseException e ) 311 { 312 throw new AssertionError( e ); 313 } 314 } 315 316 private static JavaIdentifier parse( final String text, final NormalizationMode mode, 317 final boolean runtimeException ) 318 throws ParseException 319 { 320 Map<CacheKey, JavaIdentifier> map = cache == null ? null : cache.get(); 321 322 if ( map == null ) 323 { 324 map = new HashMap<CacheKey, JavaIdentifier>( 128 ); 325 cache = new SoftReference<Map<CacheKey, JavaIdentifier>>( map ); 326 } 327 328 synchronized ( map ) 329 { 330 final CacheKey key = new CacheKey( text, mode ); 331 JavaIdentifier javaIdentifier = map.get( key ); 332 333 if ( javaIdentifier == null ) 334 { 335 javaIdentifier = new JavaIdentifier(); 336 parseIdentifier( javaIdentifier, text, mode, runtimeException ); 337 338 if ( mode != null ) 339 { 340 final CacheKey normalizedKey = new CacheKey( javaIdentifier.toString(), mode ); 341 final JavaIdentifier normalizedInstance = map.get( normalizedKey ); 342 343 if ( normalizedInstance != null ) 344 { 345 map.put( key, normalizedInstance ); 346 javaIdentifier = normalizedInstance; 347 } 348 else 349 { 350 map.put( key, javaIdentifier ); 351 map.put( normalizedKey, javaIdentifier ); 352 } 353 } 354 else 355 { 356 map.put( key, javaIdentifier ); 357 } 358 } 359 360 return javaIdentifier; 361 } 362 } 363 364 private static void parseIdentifier( final JavaIdentifier t, final String text, final NormalizationMode mode, 365 final boolean runtimeException ) 366 throws ParseException 367 { 368 if ( text.length() <= 0 ) 369 { 370 if ( runtimeException ) 371 { 372 throw new IllegalArgumentException( getMessage( "invalidEmptyString" ) ); 373 } 374 else 375 { 376 throw new ParseException( getMessage( "invalidEmptyString" ), 0 ); 377 } 378 } 379 380 final StringBuilder identifierBuilder = new StringBuilder( text.length() ); 381 final List<Integer> retainedIndices = new ArrayList<Integer>( text.length() ); 382 boolean start_of_word = true; 383 int words = 0; 384 385 for ( int i = 0, j = 1, s0 = text.length(), last_codepoint = -1; i < s0; i++, j++ ) 386 { 387 if ( !isWordSeparator( text.codePointAt( i ), mode, identifierBuilder.length() <= 0 ) ) 388 { 389 if ( mode != null ) 390 { 391 switch ( mode ) 392 { 393 case CAMEL_CASE: 394 if ( start_of_word ) 395 { 396 identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) ); 397 } 398 else if ( last_codepoint > -1 && j < s0 399 && isCamelCase( last_codepoint, text.codePointAt( i ), 400 text.codePointAt( j ) ) ) 401 { // Retain camel-case in words. 402 identifierBuilder.append( text.charAt( i ) ); 403 retainedIndices.add( identifierBuilder.length() - 1 ); 404 } 405 else 406 { 407 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) ); 408 } 409 break; 410 411 case LOWER_CASE: 412 if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT ) 413 { 414 identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) ); 415 } 416 417 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) ); 418 break; 419 420 case UPPER_CASE: 421 case CONSTANT_NAME_CONVENTION: 422 if ( start_of_word && last_codepoint > -1 && last_codepoint != UNDERSCORE_CODEPOINT ) 423 { 424 identifierBuilder.append( Character.toChars( UNDERSCORE_CODEPOINT ) ); 425 } 426 427 identifierBuilder.append( Character.toUpperCase( text.charAt( i ) ) ); 428 break; 429 430 case VARIABLE_NAME_CONVENTION: 431 case METHOD_NAME_CONVENTION: 432 if ( start_of_word ) 433 { 434 identifierBuilder.append( words == 0 435 ? Character.toLowerCase( text.charAt( i ) ) 436 : Character.toUpperCase( text.charAt( i ) ) ); 437 438 } 439 else if ( last_codepoint > -1 && j < s0 440 && isCamelCase( last_codepoint, text.codePointAt( i ), 441 text.codePointAt( j ) ) ) 442 { // Retain camel-case in words. 443 identifierBuilder.append( text.charAt( i ) ); 444 retainedIndices.add( identifierBuilder.length() - 1 ); 445 } 446 else 447 { 448 identifierBuilder.append( Character.toLowerCase( text.charAt( i ) ) ); 449 } 450 break; 451 452 default: 453 throw new AssertionError( mode ); 454 455 } 456 } 457 else 458 { 459 identifierBuilder.append( text.charAt( i ) ); 460 } 461 462 last_codepoint = identifierBuilder.codePointAt( identifierBuilder.length() - 1 ); 463 start_of_word = false; 464 } 465 else 466 { 467 if ( mode != null ) 468 { 469 if ( !start_of_word ) 470 { 471 start_of_word = true; 472 words++; 473 } 474 } 475 else if ( runtimeException ) 476 { 477 throw new IllegalArgumentException( getMessage( "invalidCharacter", text, text.charAt( i ), i ) ); 478 } 479 else 480 { 481 throw new ParseException( getMessage( "invalidCharacter", text, text.charAt( i ), i ), i ); 482 } 483 } 484 } 485 486 if ( words > 0 ) 487 { 488 // Multiple words - no camel-case retained in any word. 489 toLowerCase( identifierBuilder, retainedIndices ); 490 } 491 492 t.identifier = identifierBuilder.toString(); 493 494 if ( t.identifier.length() <= 0 ) 495 { 496 if ( runtimeException ) 497 { 498 throw new IllegalArgumentException( getMessage( "invalidCharacters", text ) ); 499 } 500 else 501 { 502 throw new ParseException( getMessage( "invalidCharacters", text ), 0 ); 503 } 504 } 505 506 if ( JavaLanguage.KEYWORDS.contains( t.identifier ) 507 || JavaLanguage.BOOLEAN_LITERALS.contains( t.identifier ) 508 || JavaLanguage.NULL_LITERAL.equals( t.identifier ) ) 509 { 510 if ( mode != null ) 511 { 512 t.identifier = "_" + t.identifier; 513 } 514 else if ( runtimeException ) 515 { 516 throw new IllegalArgumentException( getMessage( "invalidWord", text, t.identifier, 517 text.indexOf( t.identifier ) ) ); 518 519 } 520 else 521 { 522 throw new ParseException( getMessage( "invalidWord", text, t.identifier, text.indexOf( t.identifier ) ), 523 text.indexOf( t.identifier ) ); 524 525 } 526 } 527 } 528 529 private static boolean isWordSeparator( final int codePoint, final NormalizationMode mode, final boolean first ) 530 { 531 return !( ( first ? Character.isJavaIdentifierStart( codePoint ) : Character.isJavaIdentifierPart( codePoint ) ) 532 && ( mode != null ? Character.isLetterOrDigit( codePoint ) : true ) ); 533 534 } 535 536 private static boolean isCamelCase( final int left, final int middle, final int right ) 537 { 538 return Character.isLowerCase( left ) && Character.isUpperCase( middle ) && Character.isLowerCase( right ); 539 } 540 541 private static void toLowerCase( final StringBuilder stringBuilder, final List<Integer> indices ) 542 { 543 for ( int i = 0, s0 = indices.size(); i < s0; i++ ) 544 { 545 final int index = indices.get( i ); 546 final int cp = Character.toLowerCase( stringBuilder.codePointAt( index ) ); 547 stringBuilder.replace( index, index + 1, String.valueOf( Character.toChars( cp ) ) ); 548 } 549 } 550 551 private static String getMessage( final String key, final Object... args ) 552 { 553 return MessageFormat.format( ResourceBundle.getBundle( 554 JavaIdentifier.class.getName().replace( '.', '/' ), Locale.getDefault() ). 555 getString( key ), args ); 556 557 } 558 559 private static final class CacheKey 560 { 561 562 private final String text; 563 564 private final NormalizationMode mode; 565 566 private CacheKey( final String text, final NormalizationMode mode ) 567 { 568 super(); 569 this.text = text; 570 this.mode = mode; 571 } 572 573 @Override 574 public int hashCode() 575 { 576 int hc = 23; 577 hc = 37 * hc + this.text.hashCode(); 578 hc = 37 * hc + ( this.mode == null ? 0 : this.mode.hashCode() ); 579 return hc; 580 } 581 582 @Override 583 public boolean equals( final Object o ) 584 { 585 boolean equal = o == this; 586 587 if ( !equal && o instanceof CacheKey ) 588 { 589 final CacheKey that = (CacheKey) o; 590 equal = this.mode == that.mode && this.text.equals( that.text ); 591 } 592 593 return equal; 594 } 595 596 } 597 598}