1 /* 2 * Copyright (c) 1994, 2018, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.io.ObjectStreamField; 29 import java.io.UnsupportedEncodingException; 30 import java.lang.annotation.Native; 31 import java.nio.charset.Charset; 32 import java.util.ArrayList; 33 import java.util.Arrays; 34 import java.util.Comparator; 35 import java.util.Formatter; 36 import java.util.Locale; 37 import java.util.Objects; 38 import java.util.Spliterator; 39 import java.util.StringJoiner; 40 import java.util.regex.Matcher; 41 import java.util.regex.Pattern; 42 import java.util.regex.PatternSyntaxException; 43 import java.util.stream.Collectors; 44 import java.util.stream.IntStream; 45 import java.util.stream.Stream; 46 import java.util.stream.StreamSupport; 47 import jdk.internal.HotSpotIntrinsicCandidate; 48 import jdk.internal.vm.annotation.Stable; 49 50 import static java.util.function.Predicate.not; 51 52 /** 53 * The {@code String} class represents character strings. All 54 * string literals in Java programs, such as {@code "abc"}, are 55 * implemented as instances of this class. 56 * <p> 57 * Strings are constant; their values cannot be changed after they 58 * are created. String buffers support mutable strings. 59 * Because String objects are immutable they can be shared. For example: 60 * <blockquote><pre> 61 * String str = "abc"; 62 * </pre></blockquote><p> 63 * is equivalent to: 64 * <blockquote><pre> 65 * char data[] = {'a', 'b', 'c'}; 66 * String str = new String(data); 67 * </pre></blockquote><p> 68 * Here are some more examples of how strings can be used: 69 * <blockquote><pre> 70 * System.out.println("abc"); 71 * String cde = "cde"; 72 * System.out.println("abc" + cde); 73 * String c = "abc".substring(2,3); 74 * String d = cde.substring(1, 2); 75 * </pre></blockquote> 76 * <p> 77 * The class {@code String} includes methods for examining 78 * individual characters of the sequence, for comparing strings, for 79 * searching strings, for extracting substrings, and for creating a 80 * copy of a string with all characters translated to uppercase or to 81 * lowercase. Case mapping is based on the Unicode Standard version 82 * specified by the {@link java.lang.Character Character} class. 83 * <p> 84 * The Java language provides special support for the string 85 * concatenation operator ( + ), and for conversion of 86 * other objects to strings. For additional information on string 87 * concatenation and conversion, see <i>The Java™ Language Specification</i>. 88 * 89 * <p> Unless otherwise noted, passing a {@code null} argument to a constructor 90 * or method in this class will cause a {@link NullPointerException} to be 91 * thrown. 92 * 93 * <p>A {@code String} represents a string in the UTF-16 format 94 * in which <em>supplementary characters</em> are represented by <em>surrogate 95 * pairs</em> (see the section <a href="Character.html#unicode">Unicode 96 * Character Representations</a> in the {@code Character} class for 97 * more information). 98 * Index values refer to {@code char} code units, so a supplementary 99 * character uses two positions in a {@code String}. 100 * <p>The {@code String} class provides methods for dealing with 101 * Unicode code points (i.e., characters), in addition to those for 102 * dealing with Unicode code units (i.e., {@code char} values). 103 * 104 * <p>Unless otherwise noted, methods for comparing Strings do not take locale 105 * into account. The {@link java.text.Collator} class provides methods for 106 * finer-grain, locale-sensitive String comparison. 107 * 108 * @implNote The implementation of the string concatenation operator is left to 109 * the discretion of a Java compiler, as long as the compiler ultimately conforms 110 * to <i>The Java™ Language Specification</i>. For example, the {@code javac} compiler 111 * may implement the operator with {@code StringBuffer}, {@code StringBuilder}, 112 * or {@code java.lang.invoke.StringConcatFactory} depending on the JDK version. The 113 * implementation of string conversion is typically through the method {@code toString}, 114 * defined by {@code Object} and inherited by all classes in Java. 115 * 116 * @author Lee Boynton 117 * @author Arthur van Hoff 118 * @author Martin Buchholz 119 * @author Ulf Zibis 120 * @see java.lang.Object#toString() 121 * @see java.lang.StringBuffer 122 * @see java.lang.StringBuilder 123 * @see java.nio.charset.Charset 124 * @since 1.0 125 * @jls 15.18.1 String Concatenation Operator + 126 */ 127 128 public final class String 129 implements java.io.Serializable, Comparable<String>, CharSequence { 130 131 /** 132 * The value is used for character storage. 133 * 134 * @implNote This field is trusted by the VM, and is a subject to 135 * constant folding if String instance is constant. Overwriting this 136 * field after construction will cause problems. 137 * 138 * Additionally, it is marked with {@link Stable} to trust the contents 139 * of the array. No other facility in JDK provides this functionality (yet). 140 * {@link Stable} is safe here, because value is never null. 141 */ 142 @Stable 143 private final byte[] value; 144 145 /** 146 * The identifier of the encoding used to encode the bytes in 147 * {@code value}. The supported values in this implementation are 148 * 149 * LATIN1 150 * UTF16 151 * 152 * @implNote This field is trusted by the VM, and is a subject to 153 * constant folding if String instance is constant. Overwriting this 154 * field after construction will cause problems. 155 */ 156 private final byte coder; 157 158 /** Cache the hash code for the string */ 159 private int hash; // Default to 0 160 161 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 162 private static final long serialVersionUID = -6849794470754667710L; 163 164 /** 165 * If String compaction is disabled, the bytes in {@code value} are 166 * always encoded in UTF16. 167 * 168 * For methods with several possible implementation paths, when String 169 * compaction is disabled, only one code path is taken. 170 * 171 * The instance field value is generally opaque to optimizing JIT 172 * compilers. Therefore, in performance-sensitive place, an explicit 173 * check of the static boolean {@code COMPACT_STRINGS} is done first 174 * before checking the {@code coder} field since the static boolean 175 * {@code COMPACT_STRINGS} would be constant folded away by an 176 * optimizing JIT compiler. The idioms for these cases are as follows. 177 * 178 * For code such as: 179 * 180 * if (coder == LATIN1) { ... } 181 * 182 * can be written more optimally as 183 * 184 * if (coder() == LATIN1) { ... } 185 * 186 * or: 187 * 188 * if (COMPACT_STRINGS && coder == LATIN1) { ... } 189 * 190 * An optimizing JIT compiler can fold the above conditional as: 191 * 192 * COMPACT_STRINGS == true => if (coder == LATIN1) { ... } 193 * COMPACT_STRINGS == false => if (false) { ... } 194 * 195 * @implNote 196 * The actual value for this field is injected by JVM. The static 197 * initialization block is used to set the value here to communicate 198 * that this static final field is not statically foldable, and to 199 * avoid any possible circular dependency during vm initialization. 200 */ 201 static final boolean COMPACT_STRINGS; 202 203 static { 204 COMPACT_STRINGS = true; 205 } 206 207 /** 208 * Class String is special cased within the Serialization Stream Protocol. 209 * 210 * A String instance is written into an ObjectOutputStream according to 211 * <a href="{@docRoot}/../specs/serialization/protocol.html#stream-elements"> 212 * Object Serialization Specification, Section 6.2, "Stream Elements"</a> 213 */ 214 private static final ObjectStreamField[] serialPersistentFields = 215 new ObjectStreamField[0]; 216 217 /** 218 * Initializes a newly created {@code String} object so that it represents 219 * an empty character sequence. Note that use of this constructor is 220 * unnecessary since Strings are immutable. 221 */ 222 public String() { 223 this.value = "".value; 224 this.coder = "".coder; 225 } 226 227 /** 228 * Initializes a newly created {@code String} object so that it represents 229 * the same sequence of characters as the argument; in other words, the 230 * newly created string is a copy of the argument string. Unless an 231 * explicit copy of {@code original} is needed, use of this constructor is 232 * unnecessary since Strings are immutable. 233 * 234 * @param original 235 * A {@code String} 236 */ 237 @HotSpotIntrinsicCandidate 238 public String(String original) { 239 this.value = original.value; 240 this.coder = original.coder; 241 this.hash = original.hash; 242 } 243 244 /** 245 * Allocates a new {@code String} so that it represents the sequence of 246 * characters currently contained in the character array argument. The 247 * contents of the character array are copied; subsequent modification of 248 * the character array does not affect the newly created string. 249 * 250 * @param value 251 * The initial value of the string 252 */ 253 public String(char value[]) { 254 this(value, 0, value.length, null); 255 } 256 257 /** 258 * Allocates a new {@code String} that contains characters from a subarray 259 * of the character array argument. The {@code offset} argument is the 260 * index of the first character of the subarray and the {@code count} 261 * argument specifies the length of the subarray. The contents of the 262 * subarray are copied; subsequent modification of the character array does 263 * not affect the newly created string. 264 * 265 * @param value 266 * Array that is the source of characters 267 * 268 * @param offset 269 * The initial offset 270 * 271 * @param count 272 * The length 273 * 274 * @throws IndexOutOfBoundsException 275 * If {@code offset} is negative, {@code count} is negative, or 276 * {@code offset} is greater than {@code value.length - count} 277 */ 278 public String(char value[], int offset, int count) { 279 this(value, offset, count, rangeCheck(value, offset, count)); 280 } 281 282 private static Void rangeCheck(char[] value, int offset, int count) { 283 checkBoundsOffCount(offset, count, value.length); 284 return null; 285 } 286 287 /** 288 * Allocates a new {@code String} that contains characters from a subarray 289 * of the <a href="Character.html#unicode">Unicode code point</a> array 290 * argument. The {@code offset} argument is the index of the first code 291 * point of the subarray and the {@code count} argument specifies the 292 * length of the subarray. The contents of the subarray are converted to 293 * {@code char}s; subsequent modification of the {@code int} array does not 294 * affect the newly created string. 295 * 296 * @param codePoints 297 * Array that is the source of Unicode code points 298 * 299 * @param offset 300 * The initial offset 301 * 302 * @param count 303 * The length 304 * 305 * @throws IllegalArgumentException 306 * If any invalid Unicode code point is found in {@code 307 * codePoints} 308 * 309 * @throws IndexOutOfBoundsException 310 * If {@code offset} is negative, {@code count} is negative, or 311 * {@code offset} is greater than {@code codePoints.length - count} 312 * 313 * @since 1.5 314 */ 315 public String(int[] codePoints, int offset, int count) { 316 checkBoundsOffCount(offset, count, codePoints.length); 317 if (count == 0) { 318 this.value = "".value; 319 this.coder = "".coder; 320 return; 321 } 322 if (COMPACT_STRINGS) { 323 byte[] val = StringLatin1.toBytes(codePoints, offset, count); 324 if (val != null) { 325 this.coder = LATIN1; 326 this.value = val; 327 return; 328 } 329 } 330 this.coder = UTF16; 331 this.value = StringUTF16.toBytes(codePoints, offset, count); 332 } 333 334 /** 335 * Allocates a new {@code String} constructed from a subarray of an array 336 * of 8-bit integer values. 337 * 338 * <p> The {@code offset} argument is the index of the first byte of the 339 * subarray, and the {@code count} argument specifies the length of the 340 * subarray. 341 * 342 * <p> Each {@code byte} in the subarray is converted to a {@code char} as 343 * specified in the {@link #String(byte[],int) String(byte[],int)} constructor. 344 * 345 * @deprecated This method does not properly convert bytes into characters. 346 * As of JDK 1.1, the preferred way to do this is via the 347 * {@code String} constructors that take a {@link 348 * java.nio.charset.Charset}, charset name, or that use the platform's 349 * default charset. 350 * 351 * @param ascii 352 * The bytes to be converted to characters 353 * 354 * @param hibyte 355 * The top 8 bits of each 16-bit Unicode code unit 356 * 357 * @param offset 358 * The initial offset 359 * @param count 360 * The length 361 * 362 * @throws IndexOutOfBoundsException 363 * If {@code offset} is negative, {@code count} is negative, or 364 * {@code offset} is greater than {@code ascii.length - count} 365 * 366 * @see #String(byte[], int) 367 * @see #String(byte[], int, int, java.lang.String) 368 * @see #String(byte[], int, int, java.nio.charset.Charset) 369 * @see #String(byte[], int, int) 370 * @see #String(byte[], java.lang.String) 371 * @see #String(byte[], java.nio.charset.Charset) 372 * @see #String(byte[]) 373 */ 374 @Deprecated(since="1.1") 375 public String(byte ascii[], int hibyte, int offset, int count) { 376 checkBoundsOffCount(offset, count, ascii.length); 377 if (count == 0) { 378 this.value = "".value; 379 this.coder = "".coder; 380 return; 381 } 382 if (COMPACT_STRINGS && (byte)hibyte == 0) { 383 this.value = Arrays.copyOfRange(ascii, offset, offset + count); 384 this.coder = LATIN1; 385 } else { 386 hibyte <<= 8; 387 byte[] val = StringUTF16.newBytesFor(count); 388 for (int i = 0; i < count; i++) { 389 StringUTF16.putChar(val, i, hibyte | (ascii[offset++] & 0xff)); 390 } 391 this.value = val; 392 this.coder = UTF16; 393 } 394 } 395 396 /** 397 * Allocates a new {@code String} containing characters constructed from 398 * an array of 8-bit integer values. Each character <i>c</i> in the 399 * resulting string is constructed from the corresponding component 400 * <i>b</i> in the byte array such that: 401 * 402 * <blockquote><pre> 403 * <b><i>c</i></b> == (char)(((hibyte & 0xff) << 8) 404 * | (<b><i>b</i></b> & 0xff)) 405 * </pre></blockquote> 406 * 407 * @deprecated This method does not properly convert bytes into 408 * characters. As of JDK 1.1, the preferred way to do this is via the 409 * {@code String} constructors that take a {@link 410 * java.nio.charset.Charset}, charset name, or that use the platform's 411 * default charset. 412 * 413 * @param ascii 414 * The bytes to be converted to characters 415 * 416 * @param hibyte 417 * The top 8 bits of each 16-bit Unicode code unit 418 * 419 * @see #String(byte[], int, int, java.lang.String) 420 * @see #String(byte[], int, int, java.nio.charset.Charset) 421 * @see #String(byte[], int, int) 422 * @see #String(byte[], java.lang.String) 423 * @see #String(byte[], java.nio.charset.Charset) 424 * @see #String(byte[]) 425 */ 426 @Deprecated(since="1.1") 427 public String(byte ascii[], int hibyte) { 428 this(ascii, hibyte, 0, ascii.length); 429 } 430 431 /** 432 * Constructs a new {@code String} by decoding the specified subarray of 433 * bytes using the specified charset. The length of the new {@code String} 434 * is a function of the charset, and hence may not be equal to the length 435 * of the subarray. 436 * 437 * <p> The behavior of this constructor when the given bytes are not valid 438 * in the given charset is unspecified. The {@link 439 * java.nio.charset.CharsetDecoder} class should be used when more control 440 * over the decoding process is required. 441 * 442 * @param bytes 443 * The bytes to be decoded into characters 444 * 445 * @param offset 446 * The index of the first byte to decode 447 * 448 * @param length 449 * The number of bytes to decode 450 451 * @param charsetName 452 * The name of a supported {@linkplain java.nio.charset.Charset 453 * charset} 454 * 455 * @throws UnsupportedEncodingException 456 * If the named charset is not supported 457 * 458 * @throws IndexOutOfBoundsException 459 * If {@code offset} is negative, {@code length} is negative, or 460 * {@code offset} is greater than {@code bytes.length - length} 461 * 462 * @since 1.1 463 */ 464 public String(byte bytes[], int offset, int length, String charsetName) 465 throws UnsupportedEncodingException { 466 if (charsetName == null) 467 throw new NullPointerException("charsetName"); 468 checkBoundsOffCount(offset, length, bytes.length); 469 StringCoding.Result ret = 470 StringCoding.decode(charsetName, bytes, offset, length); 471 this.value = ret.value; 472 this.coder = ret.coder; 473 } 474 475 /** 476 * Constructs a new {@code String} by decoding the specified subarray of 477 * bytes using the specified {@linkplain java.nio.charset.Charset charset}. 478 * The length of the new {@code String} is a function of the charset, and 479 * hence may not be equal to the length of the subarray. 480 * 481 * <p> This method always replaces malformed-input and unmappable-character 482 * sequences with this charset's default replacement string. The {@link 483 * java.nio.charset.CharsetDecoder} class should be used when more control 484 * over the decoding process is required. 485 * 486 * @param bytes 487 * The bytes to be decoded into characters 488 * 489 * @param offset 490 * The index of the first byte to decode 491 * 492 * @param length 493 * The number of bytes to decode 494 * 495 * @param charset 496 * The {@linkplain java.nio.charset.Charset charset} to be used to 497 * decode the {@code bytes} 498 * 499 * @throws IndexOutOfBoundsException 500 * If {@code offset} is negative, {@code length} is negative, or 501 * {@code offset} is greater than {@code bytes.length - length} 502 * 503 * @since 1.6 504 */ 505 public String(byte bytes[], int offset, int length, Charset charset) { 506 if (charset == null) 507 throw new NullPointerException("charset"); 508 checkBoundsOffCount(offset, length, bytes.length); 509 StringCoding.Result ret = 510 StringCoding.decode(charset, bytes, offset, length); 511 this.value = ret.value; 512 this.coder = ret.coder; 513 } 514 515 /** 516 * Constructs a new {@code String} by decoding the specified array of bytes 517 * using the specified {@linkplain java.nio.charset.Charset charset}. The 518 * length of the new {@code String} is a function of the charset, and hence 519 * may not be equal to the length of the byte array. 520 * 521 * <p> The behavior of this constructor when the given bytes are not valid 522 * in the given charset is unspecified. The {@link 523 * java.nio.charset.CharsetDecoder} class should be used when more control 524 * over the decoding process is required. 525 * 526 * @param bytes 527 * The bytes to be decoded into characters 528 * 529 * @param charsetName 530 * The name of a supported {@linkplain java.nio.charset.Charset 531 * charset} 532 * 533 * @throws UnsupportedEncodingException 534 * If the named charset is not supported 535 * 536 * @since 1.1 537 */ 538 public String(byte bytes[], String charsetName) 539 throws UnsupportedEncodingException { 540 this(bytes, 0, bytes.length, charsetName); 541 } 542 543 /** 544 * Constructs a new {@code String} by decoding the specified array of 545 * bytes using the specified {@linkplain java.nio.charset.Charset charset}. 546 * The length of the new {@code String} is a function of the charset, and 547 * hence may not be equal to the length of the byte array. 548 * 549 * <p> This method always replaces malformed-input and unmappable-character 550 * sequences with this charset's default replacement string. The {@link 551 * java.nio.charset.CharsetDecoder} class should be used when more control 552 * over the decoding process is required. 553 * 554 * @param bytes 555 * The bytes to be decoded into characters 556 * 557 * @param charset 558 * The {@linkplain java.nio.charset.Charset charset} to be used to 559 * decode the {@code bytes} 560 * 561 * @since 1.6 562 */ 563 public String(byte bytes[], Charset charset) { 564 this(bytes, 0, bytes.length, charset); 565 } 566 567 /** 568 * Constructs a new {@code String} by decoding the specified subarray of 569 * bytes using the platform's default charset. The length of the new 570 * {@code String} is a function of the charset, and hence may not be equal 571 * to the length of the subarray. 572 * 573 * <p> The behavior of this constructor when the given bytes are not valid 574 * in the default charset is unspecified. The {@link 575 * java.nio.charset.CharsetDecoder} class should be used when more control 576 * over the decoding process is required. 577 * 578 * @param bytes 579 * The bytes to be decoded into characters 580 * 581 * @param offset 582 * The index of the first byte to decode 583 * 584 * @param length 585 * The number of bytes to decode 586 * 587 * @throws IndexOutOfBoundsException 588 * If {@code offset} is negative, {@code length} is negative, or 589 * {@code offset} is greater than {@code bytes.length - length} 590 * 591 * @since 1.1 592 */ 593 public String(byte bytes[], int offset, int length) { 594 checkBoundsOffCount(offset, length, bytes.length); 595 StringCoding.Result ret = StringCoding.decode(bytes, offset, length); 596 this.value = ret.value; 597 this.coder = ret.coder; 598 } 599 600 /** 601 * Constructs a new {@code String} by decoding the specified array of bytes 602 * using the platform's default charset. The length of the new {@code 603 * String} is a function of the charset, and hence may not be equal to the 604 * length of the byte array. 605 * 606 * <p> The behavior of this constructor when the given bytes are not valid 607 * in the default charset is unspecified. The {@link 608 * java.nio.charset.CharsetDecoder} class should be used when more control 609 * over the decoding process is required. 610 * 611 * @param bytes 612 * The bytes to be decoded into characters 613 * 614 * @since 1.1 615 */ 616 public String(byte[] bytes) { 617 this(bytes, 0, bytes.length); 618 } 619 620 /** 621 * Allocates a new string that contains the sequence of characters 622 * currently contained in the string buffer argument. The contents of the 623 * string buffer are copied; subsequent modification of the string buffer 624 * does not affect the newly created string. 625 * 626 * @param buffer 627 * A {@code StringBuffer} 628 */ 629 public String(StringBuffer buffer) { 630 this(buffer.toString()); 631 } 632 633 /** 634 * Allocates a new string that contains the sequence of characters 635 * currently contained in the string builder argument. The contents of the 636 * string builder are copied; subsequent modification of the string builder 637 * does not affect the newly created string. 638 * 639 * <p> This constructor is provided to ease migration to {@code 640 * StringBuilder}. Obtaining a string from a string builder via the {@code 641 * toString} method is likely to run faster and is generally preferred. 642 * 643 * @param builder 644 * A {@code StringBuilder} 645 * 646 * @since 1.5 647 */ 648 public String(StringBuilder builder) { 649 this(builder, null); 650 } 651 652 /** 653 * Returns the length of this string. 654 * The length is equal to the number of <a href="Character.html#unicode">Unicode 655 * code units</a> in the string. 656 * 657 * @return the length of the sequence of characters represented by this 658 * object. 659 */ 660 public int length() { 661 return value.length >> coder(); 662 } 663 664 /** 665 * Returns {@code true} if, and only if, {@link #length()} is {@code 0}. 666 * 667 * @return {@code true} if {@link #length()} is {@code 0}, otherwise 668 * {@code false} 669 * 670 * @since 1.6 671 */ 672 public boolean isEmpty() { 673 return value.length == 0; 674 } 675 676 /** 677 * Returns the {@code char} value at the 678 * specified index. An index ranges from {@code 0} to 679 * {@code length() - 1}. The first {@code char} value of the sequence 680 * is at index {@code 0}, the next at index {@code 1}, 681 * and so on, as for array indexing. 682 * 683 * <p>If the {@code char} value specified by the index is a 684 * <a href="Character.html#unicode">surrogate</a>, the surrogate 685 * value is returned. 686 * 687 * @param index the index of the {@code char} value. 688 * @return the {@code char} value at the specified index of this string. 689 * The first {@code char} value is at index {@code 0}. 690 * @exception IndexOutOfBoundsException if the {@code index} 691 * argument is negative or not less than the length of this 692 * string. 693 */ 694 public char charAt(int index) { 695 if (isLatin1()) { 696 return StringLatin1.charAt(value, index); 697 } else { 698 return StringUTF16.charAt(value, index); 699 } 700 } 701 702 /** 703 * Returns the character (Unicode code point) at the specified 704 * index. The index refers to {@code char} values 705 * (Unicode code units) and ranges from {@code 0} to 706 * {@link #length()}{@code - 1}. 707 * 708 * <p> If the {@code char} value specified at the given index 709 * is in the high-surrogate range, the following index is less 710 * than the length of this {@code String}, and the 711 * {@code char} value at the following index is in the 712 * low-surrogate range, then the supplementary code point 713 * corresponding to this surrogate pair is returned. Otherwise, 714 * the {@code char} value at the given index is returned. 715 * 716 * @param index the index to the {@code char} values 717 * @return the code point value of the character at the 718 * {@code index} 719 * @exception IndexOutOfBoundsException if the {@code index} 720 * argument is negative or not less than the length of this 721 * string. 722 * @since 1.5 723 */ 724 public int codePointAt(int index) { 725 if (isLatin1()) { 726 checkIndex(index, value.length); 727 return value[index] & 0xff; 728 } 729 int length = value.length >> 1; 730 checkIndex(index, length); 731 return StringUTF16.codePointAt(value, index, length); 732 } 733 734 /** 735 * Returns the character (Unicode code point) before the specified 736 * index. The index refers to {@code char} values 737 * (Unicode code units) and ranges from {@code 1} to {@link 738 * CharSequence#length() length}. 739 * 740 * <p> If the {@code char} value at {@code (index - 1)} 741 * is in the low-surrogate range, {@code (index - 2)} is not 742 * negative, and the {@code char} value at {@code (index - 743 * 2)} is in the high-surrogate range, then the 744 * supplementary code point value of the surrogate pair is 745 * returned. If the {@code char} value at {@code index - 746 * 1} is an unpaired low-surrogate or a high-surrogate, the 747 * surrogate value is returned. 748 * 749 * @param index the index following the code point that should be returned 750 * @return the Unicode code point value before the given index. 751 * @exception IndexOutOfBoundsException if the {@code index} 752 * argument is less than 1 or greater than the length 753 * of this string. 754 * @since 1.5 755 */ 756 public int codePointBefore(int index) { 757 int i = index - 1; 758 if (i < 0 || i >= length()) { 759 throw new StringIndexOutOfBoundsException(index); 760 } 761 if (isLatin1()) { 762 return (value[i] & 0xff); 763 } 764 return StringUTF16.codePointBefore(value, index); 765 } 766 767 /** 768 * Returns the number of Unicode code points in the specified text 769 * range of this {@code String}. The text range begins at the 770 * specified {@code beginIndex} and extends to the 771 * {@code char} at index {@code endIndex - 1}. Thus the 772 * length (in {@code char}s) of the text range is 773 * {@code endIndex-beginIndex}. Unpaired surrogates within 774 * the text range count as one code point each. 775 * 776 * @param beginIndex the index to the first {@code char} of 777 * the text range. 778 * @param endIndex the index after the last {@code char} of 779 * the text range. 780 * @return the number of Unicode code points in the specified text 781 * range 782 * @exception IndexOutOfBoundsException if the 783 * {@code beginIndex} is negative, or {@code endIndex} 784 * is larger than the length of this {@code String}, or 785 * {@code beginIndex} is larger than {@code endIndex}. 786 * @since 1.5 787 */ 788 public int codePointCount(int beginIndex, int endIndex) { 789 if (beginIndex < 0 || beginIndex > endIndex || 790 endIndex > length()) { 791 throw new IndexOutOfBoundsException(); 792 } 793 if (isLatin1()) { 794 return endIndex - beginIndex; 795 } 796 return StringUTF16.codePointCount(value, beginIndex, endIndex); 797 } 798 799 /** 800 * Returns the index within this {@code String} that is 801 * offset from the given {@code index} by 802 * {@code codePointOffset} code points. Unpaired surrogates 803 * within the text range given by {@code index} and 804 * {@code codePointOffset} count as one code point each. 805 * 806 * @param index the index to be offset 807 * @param codePointOffset the offset in code points 808 * @return the index within this {@code String} 809 * @exception IndexOutOfBoundsException if {@code index} 810 * is negative or larger then the length of this 811 * {@code String}, or if {@code codePointOffset} is positive 812 * and the substring starting with {@code index} has fewer 813 * than {@code codePointOffset} code points, 814 * or if {@code codePointOffset} is negative and the substring 815 * before {@code index} has fewer than the absolute value 816 * of {@code codePointOffset} code points. 817 * @since 1.5 818 */ 819 public int offsetByCodePoints(int index, int codePointOffset) { 820 if (index < 0 || index > length()) { 821 throw new IndexOutOfBoundsException(); 822 } 823 return Character.offsetByCodePoints(this, index, codePointOffset); 824 } 825 826 /** 827 * Copies characters from this string into the destination character 828 * array. 829 * <p> 830 * The first character to be copied is at index {@code srcBegin}; 831 * the last character to be copied is at index {@code srcEnd-1} 832 * (thus the total number of characters to be copied is 833 * {@code srcEnd-srcBegin}). The characters are copied into the 834 * subarray of {@code dst} starting at index {@code dstBegin} 835 * and ending at index: 836 * <blockquote><pre> 837 * dstBegin + (srcEnd-srcBegin) - 1 838 * </pre></blockquote> 839 * 840 * @param srcBegin index of the first character in the string 841 * to copy. 842 * @param srcEnd index after the last character in the string 843 * to copy. 844 * @param dst the destination array. 845 * @param dstBegin the start offset in the destination array. 846 * @exception IndexOutOfBoundsException If any of the following 847 * is true: 848 * <ul><li>{@code srcBegin} is negative. 849 * <li>{@code srcBegin} is greater than {@code srcEnd} 850 * <li>{@code srcEnd} is greater than the length of this 851 * string 852 * <li>{@code dstBegin} is negative 853 * <li>{@code dstBegin+(srcEnd-srcBegin)} is larger than 854 * {@code dst.length}</ul> 855 */ 856 public void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) { 857 checkBoundsBeginEnd(srcBegin, srcEnd, length()); 858 checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length); 859 if (isLatin1()) { 860 StringLatin1.getChars(value, srcBegin, srcEnd, dst, dstBegin); 861 } else { 862 StringUTF16.getChars(value, srcBegin, srcEnd, dst, dstBegin); 863 } 864 } 865 866 /** 867 * Copies characters from this string into the destination byte array. Each 868 * byte receives the 8 low-order bits of the corresponding character. The 869 * eight high-order bits of each character are not copied and do not 870 * participate in the transfer in any way. 871 * 872 * <p> The first character to be copied is at index {@code srcBegin}; the 873 * last character to be copied is at index {@code srcEnd-1}. The total 874 * number of characters to be copied is {@code srcEnd-srcBegin}. The 875 * characters, converted to bytes, are copied into the subarray of {@code 876 * dst} starting at index {@code dstBegin} and ending at index: 877 * 878 * <blockquote><pre> 879 * dstBegin + (srcEnd-srcBegin) - 1 880 * </pre></blockquote> 881 * 882 * @deprecated This method does not properly convert characters into 883 * bytes. As of JDK 1.1, the preferred way to do this is via the 884 * {@link #getBytes()} method, which uses the platform's default charset. 885 * 886 * @param srcBegin 887 * Index of the first character in the string to copy 888 * 889 * @param srcEnd 890 * Index after the last character in the string to copy 891 * 892 * @param dst 893 * The destination array 894 * 895 * @param dstBegin 896 * The start offset in the destination array 897 * 898 * @throws IndexOutOfBoundsException 899 * If any of the following is true: 900 * <ul> 901 * <li> {@code srcBegin} is negative 902 * <li> {@code srcBegin} is greater than {@code srcEnd} 903 * <li> {@code srcEnd} is greater than the length of this String 904 * <li> {@code dstBegin} is negative 905 * <li> {@code dstBegin+(srcEnd-srcBegin)} is larger than {@code 906 * dst.length} 907 * </ul> 908 */ 909 @Deprecated(since="1.1") 910 public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin) { 911 checkBoundsBeginEnd(srcBegin, srcEnd, length()); 912 Objects.requireNonNull(dst); 913 checkBoundsOffCount(dstBegin, srcEnd - srcBegin, dst.length); 914 if (isLatin1()) { 915 StringLatin1.getBytes(value, srcBegin, srcEnd, dst, dstBegin); 916 } else { 917 StringUTF16.getBytes(value, srcBegin, srcEnd, dst, dstBegin); 918 } 919 } 920 921 /** 922 * Encodes this {@code String} into a sequence of bytes using the named 923 * charset, storing the result into a new byte array. 924 * 925 * <p> The behavior of this method when this string cannot be encoded in 926 * the given charset is unspecified. The {@link 927 * java.nio.charset.CharsetEncoder} class should be used when more control 928 * over the encoding process is required. 929 * 930 * @param charsetName 931 * The name of a supported {@linkplain java.nio.charset.Charset 932 * charset} 933 * 934 * @return The resultant byte array 935 * 936 * @throws UnsupportedEncodingException 937 * If the named charset is not supported 938 * 939 * @since 1.1 940 */ 941 public byte[] getBytes(String charsetName) 942 throws UnsupportedEncodingException { 943 if (charsetName == null) throw new NullPointerException(); 944 return StringCoding.encode(charsetName, coder(), value); 945 } 946 947 /** 948 * Encodes this {@code String} into a sequence of bytes using the given 949 * {@linkplain java.nio.charset.Charset charset}, storing the result into a 950 * new byte array. 951 * 952 * <p> This method always replaces malformed-input and unmappable-character 953 * sequences with this charset's default replacement byte array. The 954 * {@link java.nio.charset.CharsetEncoder} class should be used when more 955 * control over the encoding process is required. 956 * 957 * @param charset 958 * The {@linkplain java.nio.charset.Charset} to be used to encode 959 * the {@code String} 960 * 961 * @return The resultant byte array 962 * 963 * @since 1.6 964 */ 965 public byte[] getBytes(Charset charset) { 966 if (charset == null) throw new NullPointerException(); 967 return StringCoding.encode(charset, coder(), value); 968 } 969 970 /** 971 * Encodes this {@code String} into a sequence of bytes using the 972 * platform's default charset, storing the result into a new byte array. 973 * 974 * <p> The behavior of this method when this string cannot be encoded in 975 * the default charset is unspecified. The {@link 976 * java.nio.charset.CharsetEncoder} class should be used when more control 977 * over the encoding process is required. 978 * 979 * @return The resultant byte array 980 * 981 * @since 1.1 982 */ 983 public byte[] getBytes() { 984 return StringCoding.encode(coder(), value); 985 } 986 987 /** 988 * Compares this string to the specified object. The result is {@code 989 * true} if and only if the argument is not {@code null} and is a {@code 990 * String} object that represents the same sequence of characters as this 991 * object. 992 * 993 * <p>For finer-grained String comparison, refer to 994 * {@link java.text.Collator}. 995 * 996 * @param anObject 997 * The object to compare this {@code String} against 998 * 999 * @return {@code true} if the given object represents a {@code String} 1000 * equivalent to this string, {@code false} otherwise 1001 * 1002 * @see #compareTo(String) 1003 * @see #equalsIgnoreCase(String) 1004 */ 1005 public boolean equals(Object anObject) { 1006 if (this == anObject) { 1007 return true; 1008 } 1009 if (anObject instanceof String) { 1010 String aString = (String)anObject; 1011 if (coder() == aString.coder()) { 1012 return isLatin1() ? StringLatin1.equals(value, aString.value) 1013 : StringUTF16.equals(value, aString.value); 1014 } 1015 } 1016 return false; 1017 } 1018 1019 /** 1020 * Compares this string to the specified {@code StringBuffer}. The result 1021 * is {@code true} if and only if this {@code String} represents the same 1022 * sequence of characters as the specified {@code StringBuffer}. This method 1023 * synchronizes on the {@code StringBuffer}. 1024 * 1025 * <p>For finer-grained String comparison, refer to 1026 * {@link java.text.Collator}. 1027 * 1028 * @param sb 1029 * The {@code StringBuffer} to compare this {@code String} against 1030 * 1031 * @return {@code true} if this {@code String} represents the same 1032 * sequence of characters as the specified {@code StringBuffer}, 1033 * {@code false} otherwise 1034 * 1035 * @since 1.4 1036 */ 1037 public boolean contentEquals(StringBuffer sb) { 1038 return contentEquals((CharSequence)sb); 1039 } 1040 1041 private boolean nonSyncContentEquals(AbstractStringBuilder sb) { 1042 int len = length(); 1043 if (len != sb.length()) { 1044 return false; 1045 } 1046 byte v1[] = value; 1047 byte v2[] = sb.getValue(); 1048 if (coder() == sb.getCoder()) { 1049 int n = v1.length; 1050 for (int i = 0; i < n; i++) { 1051 if (v1[i] != v2[i]) { 1052 return false; 1053 } 1054 } 1055 } else { 1056 if (!isLatin1()) { // utf16 str and latin1 abs can never be "equal" 1057 return false; 1058 } 1059 return StringUTF16.contentEquals(v1, v2, len); 1060 } 1061 return true; 1062 } 1063 1064 /** 1065 * Compares this string to the specified {@code CharSequence}. The 1066 * result is {@code true} if and only if this {@code String} represents the 1067 * same sequence of char values as the specified sequence. Note that if the 1068 * {@code CharSequence} is a {@code StringBuffer} then the method 1069 * synchronizes on it. 1070 * 1071 * <p>For finer-grained String comparison, refer to 1072 * {@link java.text.Collator}. 1073 * 1074 * @param cs 1075 * The sequence to compare this {@code String} against 1076 * 1077 * @return {@code true} if this {@code String} represents the same 1078 * sequence of char values as the specified sequence, {@code 1079 * false} otherwise 1080 * 1081 * @since 1.5 1082 */ 1083 public boolean contentEquals(CharSequence cs) { 1084 // Argument is a StringBuffer, StringBuilder 1085 if (cs instanceof AbstractStringBuilder) { 1086 if (cs instanceof StringBuffer) { 1087 synchronized(cs) { 1088 return nonSyncContentEquals((AbstractStringBuilder)cs); 1089 } 1090 } else { 1091 return nonSyncContentEquals((AbstractStringBuilder)cs); 1092 } 1093 } 1094 // Argument is a String 1095 if (cs instanceof String) { 1096 return equals(cs); 1097 } 1098 // Argument is a generic CharSequence 1099 int n = cs.length(); 1100 if (n != length()) { 1101 return false; 1102 } 1103 byte[] val = this.value; 1104 if (isLatin1()) { 1105 for (int i = 0; i < n; i++) { 1106 if ((val[i] & 0xff) != cs.charAt(i)) { 1107 return false; 1108 } 1109 } 1110 } else { 1111 if (!StringUTF16.contentEquals(val, cs, n)) { 1112 return false; 1113 } 1114 } 1115 return true; 1116 } 1117 1118 /** 1119 * Compares this {@code String} to another {@code String}, ignoring case 1120 * considerations. Two strings are considered equal ignoring case if they 1121 * are of the same length and corresponding characters in the two strings 1122 * are equal ignoring case. 1123 * 1124 * <p> Two characters {@code c1} and {@code c2} are considered the same 1125 * ignoring case if at least one of the following is true: 1126 * <ul> 1127 * <li> The two characters are the same (as compared by the 1128 * {@code ==} operator) 1129 * <li> Calling {@code Character.toLowerCase(Character.toUpperCase(char))} 1130 * on each character produces the same result 1131 * </ul> 1132 * 1133 * <p>Note that this method does <em>not</em> take locale into account, and 1134 * will result in unsatisfactory results for certain locales. The 1135 * {@link java.text.Collator} class provides locale-sensitive comparison. 1136 * 1137 * @param anotherString 1138 * The {@code String} to compare this {@code String} against 1139 * 1140 * @return {@code true} if the argument is not {@code null} and it 1141 * represents an equivalent {@code String} ignoring case; {@code 1142 * false} otherwise 1143 * 1144 * @see #equals(Object) 1145 */ 1146 public boolean equalsIgnoreCase(String anotherString) { 1147 return (this == anotherString) ? true 1148 : (anotherString != null) 1149 && (anotherString.length() == length()) 1150 && regionMatches(true, 0, anotherString, 0, length()); 1151 } 1152 1153 /** 1154 * Compares two strings lexicographically. 1155 * The comparison is based on the Unicode value of each character in 1156 * the strings. The character sequence represented by this 1157 * {@code String} object is compared lexicographically to the 1158 * character sequence represented by the argument string. The result is 1159 * a negative integer if this {@code String} object 1160 * lexicographically precedes the argument string. The result is a 1161 * positive integer if this {@code String} object lexicographically 1162 * follows the argument string. The result is zero if the strings 1163 * are equal; {@code compareTo} returns {@code 0} exactly when 1164 * the {@link #equals(Object)} method would return {@code true}. 1165 * <p> 1166 * This is the definition of lexicographic ordering. If two strings are 1167 * different, then either they have different characters at some index 1168 * that is a valid index for both strings, or their lengths are different, 1169 * or both. If they have different characters at one or more index 1170 * positions, let <i>k</i> be the smallest such index; then the string 1171 * whose character at position <i>k</i> has the smaller value, as 1172 * determined by using the {@code <} operator, lexicographically precedes the 1173 * other string. In this case, {@code compareTo} returns the 1174 * difference of the two character values at position {@code k} in 1175 * the two string -- that is, the value: 1176 * <blockquote><pre> 1177 * this.charAt(k)-anotherString.charAt(k) 1178 * </pre></blockquote> 1179 * If there is no index position at which they differ, then the shorter 1180 * string lexicographically precedes the longer string. In this case, 1181 * {@code compareTo} returns the difference of the lengths of the 1182 * strings -- that is, the value: 1183 * <blockquote><pre> 1184 * this.length()-anotherString.length() 1185 * </pre></blockquote> 1186 * 1187 * <p>For finer-grained String comparison, refer to 1188 * {@link java.text.Collator}. 1189 * 1190 * @param anotherString the {@code String} to be compared. 1191 * @return the value {@code 0} if the argument string is equal to 1192 * this string; a value less than {@code 0} if this string 1193 * is lexicographically less than the string argument; and a 1194 * value greater than {@code 0} if this string is 1195 * lexicographically greater than the string argument. 1196 */ 1197 public int compareTo(String anotherString) { 1198 byte v1[] = value; 1199 byte v2[] = anotherString.value; 1200 if (coder() == anotherString.coder()) { 1201 return isLatin1() ? StringLatin1.compareTo(v1, v2) 1202 : StringUTF16.compareTo(v1, v2); 1203 } 1204 return isLatin1() ? StringLatin1.compareToUTF16(v1, v2) 1205 : StringUTF16.compareToLatin1(v1, v2); 1206 } 1207 1208 /** 1209 * A Comparator that orders {@code String} objects as by 1210 * {@code compareToIgnoreCase}. This comparator is serializable. 1211 * <p> 1212 * Note that this Comparator does <em>not</em> take locale into account, 1213 * and will result in an unsatisfactory ordering for certain locales. 1214 * The {@link java.text.Collator} class provides locale-sensitive comparison. 1215 * 1216 * @see java.text.Collator 1217 * @since 1.2 1218 */ 1219 public static final Comparator<String> CASE_INSENSITIVE_ORDER 1220 = new CaseInsensitiveComparator(); 1221 private static class CaseInsensitiveComparator 1222 implements Comparator<String>, java.io.Serializable { 1223 // use serialVersionUID from JDK 1.2.2 for interoperability 1224 private static final long serialVersionUID = 8575799808933029326L; 1225 1226 public int compare(String s1, String s2) { 1227 byte v1[] = s1.value; 1228 byte v2[] = s2.value; 1229 if (s1.coder() == s2.coder()) { 1230 return s1.isLatin1() ? StringLatin1.compareToCI(v1, v2) 1231 : StringUTF16.compareToCI(v1, v2); 1232 } 1233 return s1.isLatin1() ? StringLatin1.compareToCI_UTF16(v1, v2) 1234 : StringUTF16.compareToCI_Latin1(v1, v2); 1235 } 1236 1237 /** Replaces the de-serialized object. */ 1238 private Object readResolve() { return CASE_INSENSITIVE_ORDER; } 1239 } 1240 1241 /** 1242 * Compares two strings lexicographically, ignoring case 1243 * differences. This method returns an integer whose sign is that of 1244 * calling {@code compareTo} with normalized versions of the strings 1245 * where case differences have been eliminated by calling 1246 * {@code Character.toLowerCase(Character.toUpperCase(character))} on 1247 * each character. 1248 * <p> 1249 * Note that this method does <em>not</em> take locale into account, 1250 * and will result in an unsatisfactory ordering for certain locales. 1251 * The {@link java.text.Collator} class provides locale-sensitive comparison. 1252 * 1253 * @param str the {@code String} to be compared. 1254 * @return a negative integer, zero, or a positive integer as the 1255 * specified String is greater than, equal to, or less 1256 * than this String, ignoring case considerations. 1257 * @see java.text.Collator 1258 * @since 1.2 1259 */ 1260 public int compareToIgnoreCase(String str) { 1261 return CASE_INSENSITIVE_ORDER.compare(this, str); 1262 } 1263 1264 /** 1265 * Tests if two string regions are equal. 1266 * <p> 1267 * A substring of this {@code String} object is compared to a substring 1268 * of the argument other. The result is true if these substrings 1269 * represent identical character sequences. The substring of this 1270 * {@code String} object to be compared begins at index {@code toffset} 1271 * and has length {@code len}. The substring of other to be compared 1272 * begins at index {@code ooffset} and has length {@code len}. The 1273 * result is {@code false} if and only if at least one of the following 1274 * is true: 1275 * <ul><li>{@code toffset} is negative. 1276 * <li>{@code ooffset} is negative. 1277 * <li>{@code toffset+len} is greater than the length of this 1278 * {@code String} object. 1279 * <li>{@code ooffset+len} is greater than the length of the other 1280 * argument. 1281 * <li>There is some nonnegative integer <i>k</i> less than {@code len} 1282 * such that: 1283 * {@code this.charAt(toffset + }<i>k</i>{@code ) != other.charAt(ooffset + } 1284 * <i>k</i>{@code )} 1285 * </ul> 1286 * 1287 * <p>Note that this method does <em>not</em> take locale into account. The 1288 * {@link java.text.Collator} class provides locale-sensitive comparison. 1289 * 1290 * @param toffset the starting offset of the subregion in this string. 1291 * @param other the string argument. 1292 * @param ooffset the starting offset of the subregion in the string 1293 * argument. 1294 * @param len the number of characters to compare. 1295 * @return {@code true} if the specified subregion of this string 1296 * exactly matches the specified subregion of the string argument; 1297 * {@code false} otherwise. 1298 */ 1299 public boolean regionMatches(int toffset, String other, int ooffset, int len) { 1300 byte tv[] = value; 1301 byte ov[] = other.value; 1302 // Note: toffset, ooffset, or len might be near -1>>>1. 1303 if ((ooffset < 0) || (toffset < 0) || 1304 (toffset > (long)length() - len) || 1305 (ooffset > (long)other.length() - len)) { 1306 return false; 1307 } 1308 if (coder() == other.coder()) { 1309 if (!isLatin1() && (len > 0)) { 1310 toffset = toffset << 1; 1311 ooffset = ooffset << 1; 1312 len = len << 1; 1313 } 1314 while (len-- > 0) { 1315 if (tv[toffset++] != ov[ooffset++]) { 1316 return false; 1317 } 1318 } 1319 } else { 1320 if (coder() == LATIN1) { 1321 while (len-- > 0) { 1322 if (StringLatin1.getChar(tv, toffset++) != 1323 StringUTF16.getChar(ov, ooffset++)) { 1324 return false; 1325 } 1326 } 1327 } else { 1328 while (len-- > 0) { 1329 if (StringUTF16.getChar(tv, toffset++) != 1330 StringLatin1.getChar(ov, ooffset++)) { 1331 return false; 1332 } 1333 } 1334 } 1335 } 1336 return true; 1337 } 1338 1339 /** 1340 * Tests if two string regions are equal. 1341 * <p> 1342 * A substring of this {@code String} object is compared to a substring 1343 * of the argument {@code other}. The result is {@code true} if these 1344 * substrings represent character sequences that are the same, ignoring 1345 * case if and only if {@code ignoreCase} is true. The substring of 1346 * this {@code String} object to be compared begins at index 1347 * {@code toffset} and has length {@code len}. The substring of 1348 * {@code other} to be compared begins at index {@code ooffset} and 1349 * has length {@code len}. The result is {@code false} if and only if 1350 * at least one of the following is true: 1351 * <ul><li>{@code toffset} is negative. 1352 * <li>{@code ooffset} is negative. 1353 * <li>{@code toffset+len} is greater than the length of this 1354 * {@code String} object. 1355 * <li>{@code ooffset+len} is greater than the length of the other 1356 * argument. 1357 * <li>{@code ignoreCase} is {@code false} and there is some nonnegative 1358 * integer <i>k</i> less than {@code len} such that: 1359 * <blockquote><pre> 1360 * this.charAt(toffset+k) != other.charAt(ooffset+k) 1361 * </pre></blockquote> 1362 * <li>{@code ignoreCase} is {@code true} and there is some nonnegative 1363 * integer <i>k</i> less than {@code len} such that: 1364 * <blockquote><pre> 1365 * Character.toLowerCase(Character.toUpperCase(this.charAt(toffset+k))) != 1366 Character.toLowerCase(Character.toUpperCase(other.charAt(ooffset+k))) 1367 * </pre></blockquote> 1368 * </ul> 1369 * 1370 * <p>Note that this method does <em>not</em> take locale into account, 1371 * and will result in unsatisfactory results for certain locales when 1372 * {@code ignoreCase} is {@code true}. The {@link java.text.Collator} class 1373 * provides locale-sensitive comparison. 1374 * 1375 * @param ignoreCase if {@code true}, ignore case when comparing 1376 * characters. 1377 * @param toffset the starting offset of the subregion in this 1378 * string. 1379 * @param other the string argument. 1380 * @param ooffset the starting offset of the subregion in the string 1381 * argument. 1382 * @param len the number of characters to compare. 1383 * @return {@code true} if the specified subregion of this string 1384 * matches the specified subregion of the string argument; 1385 * {@code false} otherwise. Whether the matching is exact 1386 * or case insensitive depends on the {@code ignoreCase} 1387 * argument. 1388 */ 1389 public boolean regionMatches(boolean ignoreCase, int toffset, 1390 String other, int ooffset, int len) { 1391 if (!ignoreCase) { 1392 return regionMatches(toffset, other, ooffset, len); 1393 } 1394 // Note: toffset, ooffset, or len might be near -1>>>1. 1395 if ((ooffset < 0) || (toffset < 0) 1396 || (toffset > (long)length() - len) 1397 || (ooffset > (long)other.length() - len)) { 1398 return false; 1399 } 1400 byte tv[] = value; 1401 byte ov[] = other.value; 1402 if (coder() == other.coder()) { 1403 return isLatin1() 1404 ? StringLatin1.regionMatchesCI(tv, toffset, ov, ooffset, len) 1405 : StringUTF16.regionMatchesCI(tv, toffset, ov, ooffset, len); 1406 } 1407 return isLatin1() 1408 ? StringLatin1.regionMatchesCI_UTF16(tv, toffset, ov, ooffset, len) 1409 : StringUTF16.regionMatchesCI_Latin1(tv, toffset, ov, ooffset, len); 1410 } 1411 1412 /** 1413 * Tests if the substring of this string beginning at the 1414 * specified index starts with the specified prefix. 1415 * 1416 * @param prefix the prefix. 1417 * @param toffset where to begin looking in this string. 1418 * @return {@code true} if the character sequence represented by the 1419 * argument is a prefix of the substring of this object starting 1420 * at index {@code toffset}; {@code false} otherwise. 1421 * The result is {@code false} if {@code toffset} is 1422 * negative or greater than the length of this 1423 * {@code String} object; otherwise the result is the same 1424 * as the result of the expression 1425 * <pre> 1426 * this.substring(toffset).startsWith(prefix) 1427 * </pre> 1428 */ 1429 public boolean startsWith(String prefix, int toffset) { 1430 // Note: toffset might be near -1>>>1. 1431 if (toffset < 0 || toffset > length() - prefix.length()) { 1432 return false; 1433 } 1434 byte ta[] = value; 1435 byte pa[] = prefix.value; 1436 int po = 0; 1437 int pc = pa.length; 1438 if (coder() == prefix.coder()) { 1439 int to = isLatin1() ? toffset : toffset << 1; 1440 while (po < pc) { 1441 if (ta[to++] != pa[po++]) { 1442 return false; 1443 } 1444 } 1445 } else { 1446 if (isLatin1()) { // && pcoder == UTF16 1447 return false; 1448 } 1449 // coder == UTF16 && pcoder == LATIN1) 1450 while (po < pc) { 1451 if (StringUTF16.getChar(ta, toffset++) != (pa[po++] & 0xff)) { 1452 return false; 1453 } 1454 } 1455 } 1456 return true; 1457 } 1458 1459 /** 1460 * Tests if this string starts with the specified prefix. 1461 * 1462 * @param prefix the prefix. 1463 * @return {@code true} if the character sequence represented by the 1464 * argument is a prefix of the character sequence represented by 1465 * this string; {@code false} otherwise. 1466 * Note also that {@code true} will be returned if the 1467 * argument is an empty string or is equal to this 1468 * {@code String} object as determined by the 1469 * {@link #equals(Object)} method. 1470 * @since 1.0 1471 */ 1472 public boolean startsWith(String prefix) { 1473 return startsWith(prefix, 0); 1474 } 1475 1476 /** 1477 * Tests if this string ends with the specified suffix. 1478 * 1479 * @param suffix the suffix. 1480 * @return {@code true} if the character sequence represented by the 1481 * argument is a suffix of the character sequence represented by 1482 * this object; {@code false} otherwise. Note that the 1483 * result will be {@code true} if the argument is the 1484 * empty string or is equal to this {@code String} object 1485 * as determined by the {@link #equals(Object)} method. 1486 */ 1487 public boolean endsWith(String suffix) { 1488 return startsWith(suffix, length() - suffix.length()); 1489 } 1490 1491 /** 1492 * Returns a hash code for this string. The hash code for a 1493 * {@code String} object is computed as 1494 * <blockquote><pre> 1495 * s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1] 1496 * </pre></blockquote> 1497 * using {@code int} arithmetic, where {@code s[i]} is the 1498 * <i>i</i>th character of the string, {@code n} is the length of 1499 * the string, and {@code ^} indicates exponentiation. 1500 * (The hash value of the empty string is zero.) 1501 * 1502 * @return a hash code value for this object. 1503 */ 1504 public int hashCode() { 1505 int h = hash; 1506 if (h == 0 && value.length > 0) { 1507 hash = h = isLatin1() ? StringLatin1.hashCode(value) 1508 : StringUTF16.hashCode(value); 1509 } 1510 return h; 1511 } 1512 1513 /** 1514 * Returns the index within this string of the first occurrence of 1515 * the specified character. If a character with value 1516 * {@code ch} occurs in the character sequence represented by 1517 * this {@code String} object, then the index (in Unicode 1518 * code units) of the first such occurrence is returned. For 1519 * values of {@code ch} in the range from 0 to 0xFFFF 1520 * (inclusive), this is the smallest value <i>k</i> such that: 1521 * <blockquote><pre> 1522 * this.charAt(<i>k</i>) == ch 1523 * </pre></blockquote> 1524 * is true. For other values of {@code ch}, it is the 1525 * smallest value <i>k</i> such that: 1526 * <blockquote><pre> 1527 * this.codePointAt(<i>k</i>) == ch 1528 * </pre></blockquote> 1529 * is true. In either case, if no such character occurs in this 1530 * string, then {@code -1} is returned. 1531 * 1532 * @param ch a character (Unicode code point). 1533 * @return the index of the first occurrence of the character in the 1534 * character sequence represented by this object, or 1535 * {@code -1} if the character does not occur. 1536 */ 1537 public int indexOf(int ch) { 1538 return indexOf(ch, 0); 1539 } 1540 1541 /** 1542 * Returns the index within this string of the first occurrence of the 1543 * specified character, starting the search at the specified index. 1544 * <p> 1545 * If a character with value {@code ch} occurs in the 1546 * character sequence represented by this {@code String} 1547 * object at an index no smaller than {@code fromIndex}, then 1548 * the index of the first such occurrence is returned. For values 1549 * of {@code ch} in the range from 0 to 0xFFFF (inclusive), 1550 * this is the smallest value <i>k</i> such that: 1551 * <blockquote><pre> 1552 * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> >= fromIndex) 1553 * </pre></blockquote> 1554 * is true. For other values of {@code ch}, it is the 1555 * smallest value <i>k</i> such that: 1556 * <blockquote><pre> 1557 * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> >= fromIndex) 1558 * </pre></blockquote> 1559 * is true. In either case, if no such character occurs in this 1560 * string at or after position {@code fromIndex}, then 1561 * {@code -1} is returned. 1562 * 1563 * <p> 1564 * There is no restriction on the value of {@code fromIndex}. If it 1565 * is negative, it has the same effect as if it were zero: this entire 1566 * string may be searched. If it is greater than the length of this 1567 * string, it has the same effect as if it were equal to the length of 1568 * this string: {@code -1} is returned. 1569 * 1570 * <p>All indices are specified in {@code char} values 1571 * (Unicode code units). 1572 * 1573 * @param ch a character (Unicode code point). 1574 * @param fromIndex the index to start the search from. 1575 * @return the index of the first occurrence of the character in the 1576 * character sequence represented by this object that is greater 1577 * than or equal to {@code fromIndex}, or {@code -1} 1578 * if the character does not occur. 1579 */ 1580 public int indexOf(int ch, int fromIndex) { 1581 return isLatin1() ? StringLatin1.indexOf(value, ch, fromIndex) 1582 : StringUTF16.indexOf(value, ch, fromIndex); 1583 } 1584 1585 /** 1586 * Returns the index within this string of the last occurrence of 1587 * the specified character. For values of {@code ch} in the 1588 * range from 0 to 0xFFFF (inclusive), the index (in Unicode code 1589 * units) returned is the largest value <i>k</i> such that: 1590 * <blockquote><pre> 1591 * this.charAt(<i>k</i>) == ch 1592 * </pre></blockquote> 1593 * is true. For other values of {@code ch}, it is the 1594 * largest value <i>k</i> such that: 1595 * <blockquote><pre> 1596 * this.codePointAt(<i>k</i>) == ch 1597 * </pre></blockquote> 1598 * is true. In either case, if no such character occurs in this 1599 * string, then {@code -1} is returned. The 1600 * {@code String} is searched backwards starting at the last 1601 * character. 1602 * 1603 * @param ch a character (Unicode code point). 1604 * @return the index of the last occurrence of the character in the 1605 * character sequence represented by this object, or 1606 * {@code -1} if the character does not occur. 1607 */ 1608 public int lastIndexOf(int ch) { 1609 return lastIndexOf(ch, length() - 1); 1610 } 1611 1612 /** 1613 * Returns the index within this string of the last occurrence of 1614 * the specified character, searching backward starting at the 1615 * specified index. For values of {@code ch} in the range 1616 * from 0 to 0xFFFF (inclusive), the index returned is the largest 1617 * value <i>k</i> such that: 1618 * <blockquote><pre> 1619 * (this.charAt(<i>k</i>) == ch) {@code &&} (<i>k</i> <= fromIndex) 1620 * </pre></blockquote> 1621 * is true. For other values of {@code ch}, it is the 1622 * largest value <i>k</i> such that: 1623 * <blockquote><pre> 1624 * (this.codePointAt(<i>k</i>) == ch) {@code &&} (<i>k</i> <= fromIndex) 1625 * </pre></blockquote> 1626 * is true. In either case, if no such character occurs in this 1627 * string at or before position {@code fromIndex}, then 1628 * {@code -1} is returned. 1629 * 1630 * <p>All indices are specified in {@code char} values 1631 * (Unicode code units). 1632 * 1633 * @param ch a character (Unicode code point). 1634 * @param fromIndex the index to start the search from. There is no 1635 * restriction on the value of {@code fromIndex}. If it is 1636 * greater than or equal to the length of this string, it has 1637 * the same effect as if it were equal to one less than the 1638 * length of this string: this entire string may be searched. 1639 * If it is negative, it has the same effect as if it were -1: 1640 * -1 is returned. 1641 * @return the index of the last occurrence of the character in the 1642 * character sequence represented by this object that is less 1643 * than or equal to {@code fromIndex}, or {@code -1} 1644 * if the character does not occur before that point. 1645 */ 1646 public int lastIndexOf(int ch, int fromIndex) { 1647 return isLatin1() ? StringLatin1.lastIndexOf(value, ch, fromIndex) 1648 : StringUTF16.lastIndexOf(value, ch, fromIndex); 1649 } 1650 1651 /** 1652 * Returns the index within this string of the first occurrence of the 1653 * specified substring. 1654 * 1655 * <p>The returned index is the smallest value {@code k} for which: 1656 * <pre>{@code 1657 * this.startsWith(str, k) 1658 * }</pre> 1659 * If no such value of {@code k} exists, then {@code -1} is returned. 1660 * 1661 * @param str the substring to search for. 1662 * @return the index of the first occurrence of the specified substring, 1663 * or {@code -1} if there is no such occurrence. 1664 */ 1665 public int indexOf(String str) { 1666 if (coder() == str.coder()) { 1667 return isLatin1() ? StringLatin1.indexOf(value, str.value) 1668 : StringUTF16.indexOf(value, str.value); 1669 } 1670 if (coder() == LATIN1) { // str.coder == UTF16 1671 return -1; 1672 } 1673 return StringUTF16.indexOfLatin1(value, str.value); 1674 } 1675 1676 /** 1677 * Returns the index within this string of the first occurrence of the 1678 * specified substring, starting at the specified index. 1679 * 1680 * <p>The returned index is the smallest value {@code k} for which: 1681 * <pre>{@code 1682 * k >= Math.min(fromIndex, this.length()) && 1683 * this.startsWith(str, k) 1684 * }</pre> 1685 * If no such value of {@code k} exists, then {@code -1} is returned. 1686 * 1687 * @param str the substring to search for. 1688 * @param fromIndex the index from which to start the search. 1689 * @return the index of the first occurrence of the specified substring, 1690 * starting at the specified index, 1691 * or {@code -1} if there is no such occurrence. 1692 */ 1693 public int indexOf(String str, int fromIndex) { 1694 return indexOf(value, coder(), length(), str, fromIndex); 1695 } 1696 1697 /** 1698 * Code shared by String and AbstractStringBuilder to do searches. The 1699 * source is the character array being searched, and the target 1700 * is the string being searched for. 1701 * 1702 * @param src the characters being searched. 1703 * @param srcCoder the coder of the source string. 1704 * @param srcCount length of the source string. 1705 * @param tgtStr the characters being searched for. 1706 * @param fromIndex the index to begin searching from. 1707 */ 1708 static int indexOf(byte[] src, byte srcCoder, int srcCount, 1709 String tgtStr, int fromIndex) { 1710 byte[] tgt = tgtStr.value; 1711 byte tgtCoder = tgtStr.coder(); 1712 int tgtCount = tgtStr.length(); 1713 1714 if (fromIndex >= srcCount) { 1715 return (tgtCount == 0 ? srcCount : -1); 1716 } 1717 if (fromIndex < 0) { 1718 fromIndex = 0; 1719 } 1720 if (tgtCount == 0) { 1721 return fromIndex; 1722 } 1723 if (tgtCount > srcCount) { 1724 return -1; 1725 } 1726 if (srcCoder == tgtCoder) { 1727 return srcCoder == LATIN1 1728 ? StringLatin1.indexOf(src, srcCount, tgt, tgtCount, fromIndex) 1729 : StringUTF16.indexOf(src, srcCount, tgt, tgtCount, fromIndex); 1730 } 1731 if (srcCoder == LATIN1) { // && tgtCoder == UTF16 1732 return -1; 1733 } 1734 // srcCoder == UTF16 && tgtCoder == LATIN1) { 1735 return StringUTF16.indexOfLatin1(src, srcCount, tgt, tgtCount, fromIndex); 1736 } 1737 1738 /** 1739 * Returns the index within this string of the last occurrence of the 1740 * specified substring. The last occurrence of the empty string "" 1741 * is considered to occur at the index value {@code this.length()}. 1742 * 1743 * <p>The returned index is the largest value {@code k} for which: 1744 * <pre>{@code 1745 * this.startsWith(str, k) 1746 * }</pre> 1747 * If no such value of {@code k} exists, then {@code -1} is returned. 1748 * 1749 * @param str the substring to search for. 1750 * @return the index of the last occurrence of the specified substring, 1751 * or {@code -1} if there is no such occurrence. 1752 */ 1753 public int lastIndexOf(String str) { 1754 return lastIndexOf(str, length()); 1755 } 1756 1757 /** 1758 * Returns the index within this string of the last occurrence of the 1759 * specified substring, searching backward starting at the specified index. 1760 * 1761 * <p>The returned index is the largest value {@code k} for which: 1762 * <pre>{@code 1763 * k <= Math.min(fromIndex, this.length()) && 1764 * this.startsWith(str, k) 1765 * }</pre> 1766 * If no such value of {@code k} exists, then {@code -1} is returned. 1767 * 1768 * @param str the substring to search for. 1769 * @param fromIndex the index to start the search from. 1770 * @return the index of the last occurrence of the specified substring, 1771 * searching backward from the specified index, 1772 * or {@code -1} if there is no such occurrence. 1773 */ 1774 public int lastIndexOf(String str, int fromIndex) { 1775 return lastIndexOf(value, coder(), length(), str, fromIndex); 1776 } 1777 1778 /** 1779 * Code shared by String and AbstractStringBuilder to do searches. The 1780 * source is the character array being searched, and the target 1781 * is the string being searched for. 1782 * 1783 * @param src the characters being searched. 1784 * @param srcCoder coder handles the mapping between bytes/chars 1785 * @param srcCount count of the source string. 1786 * @param tgt the characters being searched for. 1787 * @param fromIndex the index to begin searching from. 1788 */ 1789 static int lastIndexOf(byte[] src, byte srcCoder, int srcCount, 1790 String tgtStr, int fromIndex) { 1791 byte[] tgt = tgtStr.value; 1792 byte tgtCoder = tgtStr.coder(); 1793 int tgtCount = tgtStr.length(); 1794 /* 1795 * Check arguments; return immediately where possible. For 1796 * consistency, don't check for null str. 1797 */ 1798 int rightIndex = srcCount - tgtCount; 1799 if (fromIndex > rightIndex) { 1800 fromIndex = rightIndex; 1801 } 1802 if (fromIndex < 0) { 1803 return -1; 1804 } 1805 /* Empty string always matches. */ 1806 if (tgtCount == 0) { 1807 return fromIndex; 1808 } 1809 if (srcCoder == tgtCoder) { 1810 return srcCoder == LATIN1 1811 ? StringLatin1.lastIndexOf(src, srcCount, tgt, tgtCount, fromIndex) 1812 : StringUTF16.lastIndexOf(src, srcCount, tgt, tgtCount, fromIndex); 1813 } 1814 if (srcCoder == LATIN1) { // && tgtCoder == UTF16 1815 return -1; 1816 } 1817 // srcCoder == UTF16 && tgtCoder == LATIN1 1818 return StringUTF16.lastIndexOfLatin1(src, srcCount, tgt, tgtCount, fromIndex); 1819 } 1820 1821 /** 1822 * Returns a string that is a substring of this string. The 1823 * substring begins with the character at the specified index and 1824 * extends to the end of this string. <p> 1825 * Examples: 1826 * <blockquote><pre> 1827 * "unhappy".substring(2) returns "happy" 1828 * "Harbison".substring(3) returns "bison" 1829 * "emptiness".substring(9) returns "" (an empty string) 1830 * </pre></blockquote> 1831 * 1832 * @param beginIndex the beginning index, inclusive. 1833 * @return the specified substring. 1834 * @exception IndexOutOfBoundsException if 1835 * {@code beginIndex} is negative or larger than the 1836 * length of this {@code String} object. 1837 */ 1838 public String substring(int beginIndex) { 1839 if (beginIndex < 0) { 1840 throw new StringIndexOutOfBoundsException(beginIndex); 1841 } 1842 int subLen = length() - beginIndex; 1843 if (subLen < 0) { 1844 throw new StringIndexOutOfBoundsException(subLen); 1845 } 1846 if (beginIndex == 0) { 1847 return this; 1848 } 1849 return isLatin1() ? StringLatin1.newString(value, beginIndex, subLen) 1850 : StringUTF16.newString(value, beginIndex, subLen); 1851 } 1852 1853 /** 1854 * Returns a string that is a substring of this string. The 1855 * substring begins at the specified {@code beginIndex} and 1856 * extends to the character at index {@code endIndex - 1}. 1857 * Thus the length of the substring is {@code endIndex-beginIndex}. 1858 * <p> 1859 * Examples: 1860 * <blockquote><pre> 1861 * "hamburger".substring(4, 8) returns "urge" 1862 * "smiles".substring(1, 5) returns "mile" 1863 * </pre></blockquote> 1864 * 1865 * @param beginIndex the beginning index, inclusive. 1866 * @param endIndex the ending index, exclusive. 1867 * @return the specified substring. 1868 * @exception IndexOutOfBoundsException if the 1869 * {@code beginIndex} is negative, or 1870 * {@code endIndex} is larger than the length of 1871 * this {@code String} object, or 1872 * {@code beginIndex} is larger than 1873 * {@code endIndex}. 1874 */ 1875 public String substring(int beginIndex, int endIndex) { 1876 int length = length(); 1877 checkBoundsBeginEnd(beginIndex, endIndex, length); 1878 int subLen = endIndex - beginIndex; 1879 if (beginIndex == 0 && endIndex == length) { 1880 return this; 1881 } 1882 return isLatin1() ? StringLatin1.newString(value, beginIndex, subLen) 1883 : StringUTF16.newString(value, beginIndex, subLen); 1884 } 1885 1886 /** 1887 * Returns a character sequence that is a subsequence of this sequence. 1888 * 1889 * <p> An invocation of this method of the form 1890 * 1891 * <blockquote><pre> 1892 * str.subSequence(begin, end)</pre></blockquote> 1893 * 1894 * behaves in exactly the same way as the invocation 1895 * 1896 * <blockquote><pre> 1897 * str.substring(begin, end)</pre></blockquote> 1898 * 1899 * @apiNote 1900 * This method is defined so that the {@code String} class can implement 1901 * the {@link CharSequence} interface. 1902 * 1903 * @param beginIndex the begin index, inclusive. 1904 * @param endIndex the end index, exclusive. 1905 * @return the specified subsequence. 1906 * 1907 * @throws IndexOutOfBoundsException 1908 * if {@code beginIndex} or {@code endIndex} is negative, 1909 * if {@code endIndex} is greater than {@code length()}, 1910 * or if {@code beginIndex} is greater than {@code endIndex} 1911 * 1912 * @since 1.4 1913 * @spec JSR-51 1914 */ 1915 public CharSequence subSequence(int beginIndex, int endIndex) { 1916 return this.substring(beginIndex, endIndex); 1917 } 1918 1919 /** 1920 * Concatenates the specified string to the end of this string. 1921 * <p> 1922 * If the length of the argument string is {@code 0}, then this 1923 * {@code String} object is returned. Otherwise, a 1924 * {@code String} object is returned that represents a character 1925 * sequence that is the concatenation of the character sequence 1926 * represented by this {@code String} object and the character 1927 * sequence represented by the argument string.<p> 1928 * Examples: 1929 * <blockquote><pre> 1930 * "cares".concat("s") returns "caress" 1931 * "to".concat("get").concat("her") returns "together" 1932 * </pre></blockquote> 1933 * 1934 * @param str the {@code String} that is concatenated to the end 1935 * of this {@code String}. 1936 * @return a string that represents the concatenation of this object's 1937 * characters followed by the string argument's characters. 1938 */ 1939 public String concat(String str) { 1940 int olen = str.length(); 1941 if (olen == 0) { 1942 return this; 1943 } 1944 if (coder() == str.coder()) { 1945 byte[] val = this.value; 1946 byte[] oval = str.value; 1947 int len = val.length + oval.length; 1948 byte[] buf = Arrays.copyOf(val, len); 1949 System.arraycopy(oval, 0, buf, val.length, oval.length); 1950 return new String(buf, coder); 1951 } 1952 int len = length(); 1953 byte[] buf = StringUTF16.newBytesFor(len + olen); 1954 getBytes(buf, 0, UTF16); 1955 str.getBytes(buf, len, UTF16); 1956 return new String(buf, UTF16); 1957 } 1958 1959 /** 1960 * Returns a string resulting from replacing all occurrences of 1961 * {@code oldChar} in this string with {@code newChar}. 1962 * <p> 1963 * If the character {@code oldChar} does not occur in the 1964 * character sequence represented by this {@code String} object, 1965 * then a reference to this {@code String} object is returned. 1966 * Otherwise, a {@code String} object is returned that 1967 * represents a character sequence identical to the character sequence 1968 * represented by this {@code String} object, except that every 1969 * occurrence of {@code oldChar} is replaced by an occurrence 1970 * of {@code newChar}. 1971 * <p> 1972 * Examples: 1973 * <blockquote><pre> 1974 * "mesquite in your cellar".replace('e', 'o') 1975 * returns "mosquito in your collar" 1976 * "the war of baronets".replace('r', 'y') 1977 * returns "the way of bayonets" 1978 * "sparring with a purple porpoise".replace('p', 't') 1979 * returns "starring with a turtle tortoise" 1980 * "JonL".replace('q', 'x') returns "JonL" (no change) 1981 * </pre></blockquote> 1982 * 1983 * @param oldChar the old character. 1984 * @param newChar the new character. 1985 * @return a string derived from this string by replacing every 1986 * occurrence of {@code oldChar} with {@code newChar}. 1987 */ 1988 public String replace(char oldChar, char newChar) { 1989 if (oldChar != newChar) { 1990 String ret = isLatin1() ? StringLatin1.replace(value, oldChar, newChar) 1991 : StringUTF16.replace(value, oldChar, newChar); 1992 if (ret != null) { 1993 return ret; 1994 } 1995 } 1996 return this; 1997 } 1998 1999 /** 2000 * Tells whether or not this string matches the given <a 2001 * href="../util/regex/Pattern.html#sum">regular expression</a>. 2002 * 2003 * <p> An invocation of this method of the form 2004 * <i>str</i>{@code .matches(}<i>regex</i>{@code )} yields exactly the 2005 * same result as the expression 2006 * 2007 * <blockquote> 2008 * {@link java.util.regex.Pattern}.{@link java.util.regex.Pattern#matches(String,CharSequence) 2009 * matches(<i>regex</i>, <i>str</i>)} 2010 * </blockquote> 2011 * 2012 * @param regex 2013 * the regular expression to which this string is to be matched 2014 * 2015 * @return {@code true} if, and only if, this string matches the 2016 * given regular expression 2017 * 2018 * @throws PatternSyntaxException 2019 * if the regular expression's syntax is invalid 2020 * 2021 * @see java.util.regex.Pattern 2022 * 2023 * @since 1.4 2024 * @spec JSR-51 2025 */ 2026 public boolean matches(String regex) { 2027 return Pattern.matches(regex, this); 2028 } 2029 2030 /** 2031 * Returns true if and only if this string contains the specified 2032 * sequence of char values. 2033 * 2034 * @param s the sequence to search for 2035 * @return true if this string contains {@code s}, false otherwise 2036 * @since 1.5 2037 */ 2038 public boolean contains(CharSequence s) { 2039 return indexOf(s.toString()) >= 0; 2040 } 2041 2042 /** 2043 * Replaces the first substring of this string that matches the given <a 2044 * href="../util/regex/Pattern.html#sum">regular expression</a> with the 2045 * given replacement. 2046 * 2047 * <p> An invocation of this method of the form 2048 * <i>str</i>{@code .replaceFirst(}<i>regex</i>{@code ,} <i>repl</i>{@code )} 2049 * yields exactly the same result as the expression 2050 * 2051 * <blockquote> 2052 * <code> 2053 * {@link java.util.regex.Pattern}.{@link 2054 * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link 2055 * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link 2056 * java.util.regex.Matcher#replaceFirst replaceFirst}(<i>repl</i>) 2057 * </code> 2058 * </blockquote> 2059 * 2060 *<p> 2061 * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the 2062 * replacement string may cause the results to be different than if it were 2063 * being treated as a literal replacement string; see 2064 * {@link java.util.regex.Matcher#replaceFirst}. 2065 * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special 2066 * meaning of these characters, if desired. 2067 * 2068 * @param regex 2069 * the regular expression to which this string is to be matched 2070 * @param replacement 2071 * the string to be substituted for the first match 2072 * 2073 * @return The resulting {@code String} 2074 * 2075 * @throws PatternSyntaxException 2076 * if the regular expression's syntax is invalid 2077 * 2078 * @see java.util.regex.Pattern 2079 * 2080 * @since 1.4 2081 * @spec JSR-51 2082 */ 2083 public String replaceFirst(String regex, String replacement) { 2084 return Pattern.compile(regex).matcher(this).replaceFirst(replacement); 2085 } 2086 2087 /** 2088 * Replaces each substring of this string that matches the given <a 2089 * href="../util/regex/Pattern.html#sum">regular expression</a> with the 2090 * given replacement. 2091 * 2092 * <p> An invocation of this method of the form 2093 * <i>str</i>{@code .replaceAll(}<i>regex</i>{@code ,} <i>repl</i>{@code )} 2094 * yields exactly the same result as the expression 2095 * 2096 * <blockquote> 2097 * <code> 2098 * {@link java.util.regex.Pattern}.{@link 2099 * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link 2100 * java.util.regex.Pattern#matcher(java.lang.CharSequence) matcher}(<i>str</i>).{@link 2101 * java.util.regex.Matcher#replaceAll replaceAll}(<i>repl</i>) 2102 * </code> 2103 * </blockquote> 2104 * 2105 *<p> 2106 * Note that backslashes ({@code \}) and dollar signs ({@code $}) in the 2107 * replacement string may cause the results to be different than if it were 2108 * being treated as a literal replacement string; see 2109 * {@link java.util.regex.Matcher#replaceAll Matcher.replaceAll}. 2110 * Use {@link java.util.regex.Matcher#quoteReplacement} to suppress the special 2111 * meaning of these characters, if desired. 2112 * 2113 * @param regex 2114 * the regular expression to which this string is to be matched 2115 * @param replacement 2116 * the string to be substituted for each match 2117 * 2118 * @return The resulting {@code String} 2119 * 2120 * @throws PatternSyntaxException 2121 * if the regular expression's syntax is invalid 2122 * 2123 * @see java.util.regex.Pattern 2124 * 2125 * @since 1.4 2126 * @spec JSR-51 2127 */ 2128 public String replaceAll(String regex, String replacement) { 2129 return Pattern.compile(regex).matcher(this).replaceAll(replacement); 2130 } 2131 2132 /** 2133 * Replaces each substring of this string that matches the literal target 2134 * sequence with the specified literal replacement sequence. The 2135 * replacement proceeds from the beginning of the string to the end, for 2136 * example, replacing "aa" with "b" in the string "aaa" will result in 2137 * "ba" rather than "ab". 2138 * 2139 * @param target The sequence of char values to be replaced 2140 * @param replacement The replacement sequence of char values 2141 * @return The resulting string 2142 * @since 1.5 2143 */ 2144 public String replace(CharSequence target, CharSequence replacement) { 2145 String tgtStr = target.toString(); 2146 String replStr = replacement.toString(); 2147 int j = indexOf(tgtStr); 2148 if (j < 0) { 2149 return this; 2150 } 2151 int tgtLen = tgtStr.length(); 2152 int tgtLen1 = Math.max(tgtLen, 1); 2153 int thisLen = length(); 2154 2155 int newLenHint = thisLen - tgtLen + replStr.length(); 2156 if (newLenHint < 0) { 2157 throw new OutOfMemoryError(); 2158 } 2159 StringBuilder sb = new StringBuilder(newLenHint); 2160 int i = 0; 2161 do { 2162 sb.append(this, i, j).append(replStr); 2163 i = j + tgtLen; 2164 } while (j < thisLen && (j = indexOf(tgtStr, j + tgtLen1)) > 0); 2165 return sb.append(this, i, thisLen).toString(); 2166 } 2167 2168 /** 2169 * Splits this string around matches of the given 2170 * <a href="../util/regex/Pattern.html#sum">regular expression</a>. 2171 * 2172 * <p> The array returned by this method contains each substring of this 2173 * string that is terminated by another substring that matches the given 2174 * expression or is terminated by the end of the string. The substrings in 2175 * the array are in the order in which they occur in this string. If the 2176 * expression does not match any part of the input then the resulting array 2177 * has just one element, namely this string. 2178 * 2179 * <p> When there is a positive-width match at the beginning of this 2180 * string then an empty leading substring is included at the beginning 2181 * of the resulting array. A zero-width match at the beginning however 2182 * never produces such empty leading substring. 2183 * 2184 * <p> The {@code limit} parameter controls the number of times the 2185 * pattern is applied and therefore affects the length of the resulting 2186 * array. 2187 * <ul> 2188 * <li><p> 2189 * If the <i>limit</i> is positive then the pattern will be applied 2190 * at most <i>limit</i> - 1 times, the array's length will be 2191 * no greater than <i>limit</i>, and the array's last entry will contain 2192 * all input beyond the last matched delimiter.</p></li> 2193 * 2194 * <li><p> 2195 * If the <i>limit</i> is zero then the pattern will be applied as 2196 * many times as possible, the array can have any length, and trailing 2197 * empty strings will be discarded.</p></li> 2198 * 2199 * <li><p> 2200 * If the <i>limit</i> is negative then the pattern will be applied 2201 * as many times as possible and the array can have any length.</p></li> 2202 * </ul> 2203 * 2204 * <p> The string {@code "boo:and:foo"}, for example, yields the 2205 * following results with these parameters: 2206 * 2207 * <blockquote><table class="plain"> 2208 * <caption style="display:none">Split example showing regex, limit, and result</caption> 2209 * <thead> 2210 * <tr> 2211 * <th scope="col">Regex</th> 2212 * <th scope="col">Limit</th> 2213 * <th scope="col">Result</th> 2214 * </tr> 2215 * </thead> 2216 * <tbody> 2217 * <tr><th scope="row" rowspan="3" style="font-weight:normal">:</th> 2218 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">2</th> 2219 * <td>{@code { "boo", "and:foo" }}</td></tr> 2220 * <tr><!-- : --> 2221 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th> 2222 * <td>{@code { "boo", "and", "foo" }}</td></tr> 2223 * <tr><!-- : --> 2224 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th> 2225 * <td>{@code { "boo", "and", "foo" }}</td></tr> 2226 * <tr><th scope="row" rowspan="3" style="font-weight:normal">o</th> 2227 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">5</th> 2228 * <td>{@code { "b", "", ":and:f", "", "" }}</td></tr> 2229 * <tr><!-- o --> 2230 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">-2</th> 2231 * <td>{@code { "b", "", ":and:f", "", "" }}</td></tr> 2232 * <tr><!-- o --> 2233 * <th scope="row" style="font-weight:normal; text-align:right; padding-right:1em">0</th> 2234 * <td>{@code { "b", "", ":and:f" }}</td></tr> 2235 * </tbody> 2236 * </table></blockquote> 2237 * 2238 * <p> An invocation of this method of the form 2239 * <i>str.</i>{@code split(}<i>regex</i>{@code ,} <i>n</i>{@code )} 2240 * yields the same result as the expression 2241 * 2242 * <blockquote> 2243 * <code> 2244 * {@link java.util.regex.Pattern}.{@link 2245 * java.util.regex.Pattern#compile compile}(<i>regex</i>).{@link 2246 * java.util.regex.Pattern#split(java.lang.CharSequence,int) split}(<i>str</i>, <i>n</i>) 2247 * </code> 2248 * </blockquote> 2249 * 2250 * 2251 * @param regex 2252 * the delimiting regular expression 2253 * 2254 * @param limit 2255 * the result threshold, as described above 2256 * 2257 * @return the array of strings computed by splitting this string 2258 * around matches of the given regular expression 2259 * 2260 * @throws PatternSyntaxException 2261 * if the regular expression's syntax is invalid 2262 * 2263 * @see java.util.regex.Pattern 2264 * 2265 * @since 1.4 2266 * @spec JSR-51 2267 */ 2268 public String[] split(String regex, int limit) { 2269 /* fastpath if the regex is a 2270 (1)one-char String and this character is not one of the 2271 RegEx's meta characters ".$|()[{^?*+\\", or 2272 (2)two-char String and the first char is the backslash and 2273 the second is not the ascii digit or ascii letter. 2274 */ 2275 char ch = 0; 2276 if (((regex.length() == 1 && 2277 ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) || 2278 (regex.length() == 2 && 2279 regex.charAt(0) == '\\' && 2280 (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 && 2281 ((ch-'a')|('z'-ch)) < 0 && 2282 ((ch-'A')|('Z'-ch)) < 0)) && 2283 (ch < Character.MIN_HIGH_SURROGATE || 2284 ch > Character.MAX_LOW_SURROGATE)) 2285 { 2286 int off = 0; 2287 int next = 0; 2288 boolean limited = limit > 0; 2289 ArrayList<String> list = new ArrayList<>(); 2290 while ((next = indexOf(ch, off)) != -1) { 2291 if (!limited || list.size() < limit - 1) { 2292 list.add(substring(off, next)); 2293 off = next + 1; 2294 } else { // last one 2295 //assert (list.size() == limit - 1); 2296 int last = length(); 2297 list.add(substring(off, last)); 2298 off = last; 2299 break; 2300 } 2301 } 2302 // If no match was found, return this 2303 if (off == 0) 2304 return new String[]{this}; 2305 2306 // Add remaining segment 2307 if (!limited || list.size() < limit) 2308 list.add(substring(off, length())); 2309 2310 // Construct result 2311 int resultSize = list.size(); 2312 if (limit == 0) { 2313 while (resultSize > 0 && list.get(resultSize - 1).length() == 0) { 2314 resultSize--; 2315 } 2316 } 2317 String[] result = new String[resultSize]; 2318 return list.subList(0, resultSize).toArray(result); 2319 } 2320 return Pattern.compile(regex).split(this, limit); 2321 } 2322 2323 /** 2324 * Splits this string around matches of the given <a 2325 * href="../util/regex/Pattern.html#sum">regular expression</a>. 2326 * 2327 * <p> This method works as if by invoking the two-argument {@link 2328 * #split(String, int) split} method with the given expression and a limit 2329 * argument of zero. Trailing empty strings are therefore not included in 2330 * the resulting array. 2331 * 2332 * <p> The string {@code "boo:and:foo"}, for example, yields the following 2333 * results with these expressions: 2334 * 2335 * <blockquote><table class="plain"> 2336 * <caption style="display:none">Split examples showing regex and result</caption> 2337 * <thead> 2338 * <tr> 2339 * <th scope="col">Regex</th> 2340 * <th scope="col">Result</th> 2341 * </tr> 2342 * </thead> 2343 * <tbody> 2344 * <tr><th scope="row" style="text-weight:normal">:</th> 2345 * <td>{@code { "boo", "and", "foo" }}</td></tr> 2346 * <tr><th scope="row" style="text-weight:normal">o</th> 2347 * <td>{@code { "b", "", ":and:f" }}</td></tr> 2348 * </tbody> 2349 * </table></blockquote> 2350 * 2351 * 2352 * @param regex 2353 * the delimiting regular expression 2354 * 2355 * @return the array of strings computed by splitting this string 2356 * around matches of the given regular expression 2357 * 2358 * @throws PatternSyntaxException 2359 * if the regular expression's syntax is invalid 2360 * 2361 * @see java.util.regex.Pattern 2362 * 2363 * @since 1.4 2364 * @spec JSR-51 2365 */ 2366 public String[] split(String regex) { 2367 return split(regex, 0); 2368 } 2369 2370 /** 2371 * Returns a new String composed of copies of the 2372 * {@code CharSequence elements} joined together with a copy of 2373 * the specified {@code delimiter}. 2374 * 2375 * <blockquote>For example, 2376 * <pre>{@code 2377 * String message = String.join("-", "Java", "is", "cool"); 2378 * // message returned is: "Java-is-cool" 2379 * }</pre></blockquote> 2380 * 2381 * Note that if an element is null, then {@code "null"} is added. 2382 * 2383 * @param delimiter the delimiter that separates each element 2384 * @param elements the elements to join together. 2385 * 2386 * @return a new {@code String} that is composed of the {@code elements} 2387 * separated by the {@code delimiter} 2388 * 2389 * @throws NullPointerException If {@code delimiter} or {@code elements} 2390 * is {@code null} 2391 * 2392 * @see java.util.StringJoiner 2393 * @since 1.8 2394 */ 2395 public static String join(CharSequence delimiter, CharSequence... elements) { 2396 Objects.requireNonNull(delimiter); 2397 Objects.requireNonNull(elements); 2398 // Number of elements not likely worth Arrays.stream overhead. 2399 StringJoiner joiner = new StringJoiner(delimiter); 2400 for (CharSequence cs: elements) { 2401 joiner.add(cs); 2402 } 2403 return joiner.toString(); 2404 } 2405 2406 /** 2407 * Returns a new {@code String} composed of copies of the 2408 * {@code CharSequence elements} joined together with a copy of the 2409 * specified {@code delimiter}. 2410 * 2411 * <blockquote>For example, 2412 * <pre>{@code 2413 * List<String> strings = List.of("Java", "is", "cool"); 2414 * String message = String.join(" ", strings); 2415 * //message returned is: "Java is cool" 2416 * 2417 * Set<String> strings = 2418 * new LinkedHashSet<>(List.of("Java", "is", "very", "cool")); 2419 * String message = String.join("-", strings); 2420 * //message returned is: "Java-is-very-cool" 2421 * }</pre></blockquote> 2422 * 2423 * Note that if an individual element is {@code null}, then {@code "null"} is added. 2424 * 2425 * @param delimiter a sequence of characters that is used to separate each 2426 * of the {@code elements} in the resulting {@code String} 2427 * @param elements an {@code Iterable} that will have its {@code elements} 2428 * joined together. 2429 * 2430 * @return a new {@code String} that is composed from the {@code elements} 2431 * argument 2432 * 2433 * @throws NullPointerException If {@code delimiter} or {@code elements} 2434 * is {@code null} 2435 * 2436 * @see #join(CharSequence,CharSequence...) 2437 * @see java.util.StringJoiner 2438 * @since 1.8 2439 */ 2440 public static String join(CharSequence delimiter, 2441 Iterable<? extends CharSequence> elements) { 2442 Objects.requireNonNull(delimiter); 2443 Objects.requireNonNull(elements); 2444 StringJoiner joiner = new StringJoiner(delimiter); 2445 for (CharSequence cs: elements) { 2446 joiner.add(cs); 2447 } 2448 return joiner.toString(); 2449 } 2450 2451 /** 2452 * Converts all of the characters in this {@code String} to lower 2453 * case using the rules of the given {@code Locale}. Case mapping is based 2454 * on the Unicode Standard version specified by the {@link java.lang.Character Character} 2455 * class. Since case mappings are not always 1:1 char mappings, the resulting 2456 * {@code String} may be a different length than the original {@code String}. 2457 * <p> 2458 * Examples of lowercase mappings are in the following table: 2459 * <table class="plain"> 2460 * <caption style="display:none">Lowercase mapping examples showing language code of locale, upper case, lower case, and description</caption> 2461 * <thead> 2462 * <tr> 2463 * <th scope="col">Language Code of Locale</th> 2464 * <th scope="col">Upper Case</th> 2465 * <th scope="col">Lower Case</th> 2466 * <th scope="col">Description</th> 2467 * </tr> 2468 * </thead> 2469 * <tbody> 2470 * <tr> 2471 * <td>tr (Turkish)</td> 2472 * <th scope="row" style="font-weight:normal; text-align:left">\u0130</th> 2473 * <td>\u0069</td> 2474 * <td>capital letter I with dot above -> small letter i</td> 2475 * </tr> 2476 * <tr> 2477 * <td>tr (Turkish)</td> 2478 * <th scope="row" style="font-weight:normal; text-align:left">\u0049</th> 2479 * <td>\u0131</td> 2480 * <td>capital letter I -> small letter dotless i </td> 2481 * </tr> 2482 * <tr> 2483 * <td>(all)</td> 2484 * <th scope="row" style="font-weight:normal; text-align:left">French Fries</th> 2485 * <td>french fries</td> 2486 * <td>lowercased all chars in String</td> 2487 * </tr> 2488 * <tr> 2489 * <td>(all)</td> 2490 * <th scope="row" style="font-weight:normal; text-align:left"> 2491 * ΙΧΘΥΣ</th> 2492 * <td>ιχθυσ</td> 2493 * <td>lowercased all chars in String</td> 2494 * </tr> 2495 * </tbody> 2496 * </table> 2497 * 2498 * @param locale use the case transformation rules for this locale 2499 * @return the {@code String}, converted to lowercase. 2500 * @see java.lang.String#toLowerCase() 2501 * @see java.lang.String#toUpperCase() 2502 * @see java.lang.String#toUpperCase(Locale) 2503 * @since 1.1 2504 */ 2505 public String toLowerCase(Locale locale) { 2506 return isLatin1() ? StringLatin1.toLowerCase(this, value, locale) 2507 : StringUTF16.toLowerCase(this, value, locale); 2508 } 2509 2510 /** 2511 * Converts all of the characters in this {@code String} to lower 2512 * case using the rules of the default locale. This is equivalent to calling 2513 * {@code toLowerCase(Locale.getDefault())}. 2514 * <p> 2515 * <b>Note:</b> This method is locale sensitive, and may produce unexpected 2516 * results if used for strings that are intended to be interpreted locale 2517 * independently. 2518 * Examples are programming language identifiers, protocol keys, and HTML 2519 * tags. 2520 * For instance, {@code "TITLE".toLowerCase()} in a Turkish locale 2521 * returns {@code "t\u005Cu0131tle"}, where '\u005Cu0131' is the 2522 * LATIN SMALL LETTER DOTLESS I character. 2523 * To obtain correct results for locale insensitive strings, use 2524 * {@code toLowerCase(Locale.ROOT)}. 2525 * 2526 * @return the {@code String}, converted to lowercase. 2527 * @see java.lang.String#toLowerCase(Locale) 2528 */ 2529 public String toLowerCase() { 2530 return toLowerCase(Locale.getDefault()); 2531 } 2532 2533 /** 2534 * Converts all of the characters in this {@code String} to upper 2535 * case using the rules of the given {@code Locale}. Case mapping is based 2536 * on the Unicode Standard version specified by the {@link java.lang.Character Character} 2537 * class. Since case mappings are not always 1:1 char mappings, the resulting 2538 * {@code String} may be a different length than the original {@code String}. 2539 * <p> 2540 * Examples of locale-sensitive and 1:M case mappings are in the following table. 2541 * 2542 * <table class="plain"> 2543 * <caption style="display:none">Examples of locale-sensitive and 1:M case mappings. Shows Language code of locale, lower case, upper case, and description.</caption> 2544 * <thead> 2545 * <tr> 2546 * <th scope="col">Language Code of Locale</th> 2547 * <th scope="col">Lower Case</th> 2548 * <th scope="col">Upper Case</th> 2549 * <th scope="col">Description</th> 2550 * </tr> 2551 * </thead> 2552 * <tbody> 2553 * <tr> 2554 * <td>tr (Turkish)</td> 2555 * <th scope="row" style="font-weight:normal; text-align:left">\u0069</th> 2556 * <td>\u0130</td> 2557 * <td>small letter i -> capital letter I with dot above</td> 2558 * </tr> 2559 * <tr> 2560 * <td>tr (Turkish)</td> 2561 * <th scope="row" style="font-weight:normal; text-align:left">\u0131</th> 2562 * <td>\u0049</td> 2563 * <td>small letter dotless i -> capital letter I</td> 2564 * </tr> 2565 * <tr> 2566 * <td>(all)</td> 2567 * <th scope="row" style="font-weight:normal; text-align:left">\u00df</th> 2568 * <td>\u0053 \u0053</td> 2569 * <td>small letter sharp s -> two letters: SS</td> 2570 * </tr> 2571 * <tr> 2572 * <td>(all)</td> 2573 * <th scope="row" style="font-weight:normal; text-align:left">Fahrvergnügen</th> 2574 * <td>FAHRVERGNÜGEN</td> 2575 * <td></td> 2576 * </tr> 2577 * </tbody> 2578 * </table> 2579 * @param locale use the case transformation rules for this locale 2580 * @return the {@code String}, converted to uppercase. 2581 * @see java.lang.String#toUpperCase() 2582 * @see java.lang.String#toLowerCase() 2583 * @see java.lang.String#toLowerCase(Locale) 2584 * @since 1.1 2585 */ 2586 public String toUpperCase(Locale locale) { 2587 return isLatin1() ? StringLatin1.toUpperCase(this, value, locale) 2588 : StringUTF16.toUpperCase(this, value, locale); 2589 } 2590 2591 /** 2592 * Converts all of the characters in this {@code String} to upper 2593 * case using the rules of the default locale. This method is equivalent to 2594 * {@code toUpperCase(Locale.getDefault())}. 2595 * <p> 2596 * <b>Note:</b> This method is locale sensitive, and may produce unexpected 2597 * results if used for strings that are intended to be interpreted locale 2598 * independently. 2599 * Examples are programming language identifiers, protocol keys, and HTML 2600 * tags. 2601 * For instance, {@code "title".toUpperCase()} in a Turkish locale 2602 * returns {@code "T\u005Cu0130TLE"}, where '\u005Cu0130' is the 2603 * LATIN CAPITAL LETTER I WITH DOT ABOVE character. 2604 * To obtain correct results for locale insensitive strings, use 2605 * {@code toUpperCase(Locale.ROOT)}. 2606 * 2607 * @return the {@code String}, converted to uppercase. 2608 * @see java.lang.String#toUpperCase(Locale) 2609 */ 2610 public String toUpperCase() { 2611 return toUpperCase(Locale.getDefault()); 2612 } 2613 2614 /** 2615 * Returns a string whose value is this string, with all leading 2616 * and trailing space removed, where space is defined 2617 * as any character whose codepoint is less than or equal to 2618 * {@code 'U+0020'} (the space character). 2619 * <p> 2620 * If this {@code String} object represents an empty character 2621 * sequence, or the first and last characters of character sequence 2622 * represented by this {@code String} object both have codes 2623 * that are not space (as defined above), then a 2624 * reference to this {@code String} object is returned. 2625 * <p> 2626 * Otherwise, if all characters in this string are space (as 2627 * defined above), then a {@code String} object representing an 2628 * empty string is returned. 2629 * <p> 2630 * Otherwise, let <i>k</i> be the index of the first character in the 2631 * string whose code is not a space (as defined above) and let 2632 * <i>m</i> be the index of the last character in the string whose code 2633 * is not a space (as defined above). A {@code String} 2634 * object is returned, representing the substring of this string that 2635 * begins with the character at index <i>k</i> and ends with the 2636 * character at index <i>m</i>-that is, the result of 2637 * {@code this.substring(k, m + 1)}. 2638 * <p> 2639 * This method may be used to trim space (as defined above) from 2640 * the beginning and end of a string. 2641 * 2642 * @return a string whose value is this string, with all leading 2643 * and trailing space removed, or this string if it 2644 * has no leading or trailing space. 2645 */ 2646 public String trim() { 2647 String ret = isLatin1() ? StringLatin1.trim(value) 2648 : StringUTF16.trim(value); 2649 return ret == null ? this : ret; 2650 } 2651 2652 /** 2653 * Returns a string whose value is this string, with all leading 2654 * and trailing {@link Character#isWhitespace(int) white space} 2655 * removed. 2656 * <p> 2657 * If this {@code String} object represents an empty string, 2658 * or if all code points in this string are 2659 * {@link Character#isWhitespace(int) white space}, then an empty string 2660 * is returned. 2661 * <p> 2662 * Otherwise, returns a substring of this string beginning with the first 2663 * code point that is not a {@link Character#isWhitespace(int) white space} 2664 * up to and including the last code point that is not a 2665 * {@link Character#isWhitespace(int) white space}. 2666 * <p> 2667 * This method may be used to strip 2668 * {@link Character#isWhitespace(int) white space} from 2669 * the beginning and end of a string. 2670 * 2671 * @return a string whose value is this string, with all leading 2672 * and trailing white space removed 2673 * 2674 * @see Character#isWhitespace(int) 2675 * 2676 * @since 11 2677 */ 2678 public String strip() { 2679 String ret = isLatin1() ? StringLatin1.strip(value) 2680 : StringUTF16.strip(value); 2681 return ret == null ? this : ret; 2682 } 2683 2684 /** 2685 * Returns a string whose value is this string, with all leading 2686 * {@link Character#isWhitespace(int) white space} removed. 2687 * <p> 2688 * If this {@code String} object represents an empty string, 2689 * or if all code points in this string are 2690 * {@link Character#isWhitespace(int) white space}, then an empty string 2691 * is returned. 2692 * <p> 2693 * Otherwise, returns a substring of this string beginning with the first 2694 * code point that is not a {@link Character#isWhitespace(int) white space} 2695 * up to to and including the last code point of this string. 2696 * <p> 2697 * This method may be used to trim 2698 * {@link Character#isWhitespace(int) white space} from 2699 * the beginning of a string. 2700 * 2701 * @return a string whose value is this string, with all leading white 2702 * space removed 2703 * 2704 * @see Character#isWhitespace(int) 2705 * 2706 * @since 11 2707 */ 2708 public String stripLeading() { 2709 String ret = isLatin1() ? StringLatin1.stripLeading(value) 2710 : StringUTF16.stripLeading(value); 2711 return ret == null ? this : ret; 2712 } 2713 2714 /** 2715 * Returns a string whose value is this string, with all trailing 2716 * {@link Character#isWhitespace(int) white space} removed. 2717 * <p> 2718 * If this {@code String} object represents an empty string, 2719 * or if all characters in this string are 2720 * {@link Character#isWhitespace(int) white space}, then an empty string 2721 * is returned. 2722 * <p> 2723 * Otherwise, returns a substring of this string beginning with the first 2724 * code point of this string up to and including the last code point 2725 * that is not a {@link Character#isWhitespace(int) white space}. 2726 * <p> 2727 * This method may be used to trim 2728 * {@link Character#isWhitespace(int) white space} from 2729 * the end of a string. 2730 * 2731 * @return a string whose value is this string, with all trailing white 2732 * space removed 2733 * 2734 * @see Character#isWhitespace(int) 2735 * 2736 * @since 11 2737 */ 2738 public String stripTrailing() { 2739 String ret = isLatin1() ? StringLatin1.stripTrailing(value) 2740 : StringUTF16.stripTrailing(value); 2741 return ret == null ? this : ret; 2742 } 2743 2744 /** 2745 * Returns {@code true} if the string is empty or contains only 2746 * {@link Character#isWhitespace(int) white space} codepoints, 2747 * otherwise {@code false}. 2748 * 2749 * @return {@code true} if the string is empty or contains only 2750 * {@link Character#isWhitespace(int) white space} codepoints, 2751 * otherwise {@code false} 2752 * 2753 * @see Character#isWhitespace(int) 2754 * 2755 * @since 11 2756 */ 2757 public boolean isBlank() { 2758 return indexOfNonWhitespace() == length(); 2759 } 2760 2761 private Stream<String> lines(int maxLeading, int maxTrailing) { 2762 return isLatin1() ? StringLatin1.lines(value, maxLeading, maxTrailing) 2763 : StringUTF16.lines(value, maxLeading, maxTrailing); 2764 } 2765 2766 /** 2767 * Returns a stream of lines extracted from this string, 2768 * separated by line terminators. 2769 * <p> 2770 * A <i>line terminator</i> is one of the following: 2771 * a line feed character {@code "\n"} (U+000A), 2772 * a carriage return character {@code "\r"} (U+000D), 2773 * or a carriage return followed immediately by a line feed 2774 * {@code "\r\n"} (U+000D U+000A). 2775 * <p> 2776 * A <i>line</i> is either a sequence of zero or more characters 2777 * followed by a line terminator, or it is a sequence of one or 2778 * more characters followed by the end of the string. A 2779 * line does not include the line terminator. 2780 * <p> 2781 * The stream returned by this method contains the lines from 2782 * this string in the order in which they occur. 2783 * 2784 * @apiNote This definition of <i>line</i> implies that an empty 2785 * string has zero lines and that there is no empty line 2786 * following a line terminator at the end of a string. 2787 * 2788 * @implNote This method provides better performance than 2789 * split("\R") by supplying elements lazily and 2790 * by faster search of new line terminators. 2791 * 2792 * @return the stream of lines extracted from this string 2793 * 2794 * @since 11 2795 */ 2796 public Stream<String> lines() { 2797 return lines(0, 0); 2798 } 2799 2800 /** 2801 * Adjusts the indentation of each line of this string based on the value of 2802 * {@code n}, and normalizes line termination characters. 2803 * <p> 2804 * This string is conceptually separated into lines using 2805 * {@link String#lines()}. Each line is then adjusted as described below 2806 * and then suffixed with a line feed {@code "\n"} (U+000A). The resulting 2807 * lines are then concatenated and returned. 2808 * <p> 2809 * If {@code n > 0} then {@code n} spaces (U+0020) are inserted at the 2810 * beginning of each line. {@link String#isBlank() Blank lines} are 2811 * unaffected. 2812 * <p> 2813 * If {@code n < 0} then up to {@code n} 2814 * {@link Character#isWhitespace(int) white space characters} are removed 2815 * from the beginning of each line. If a given line does not contain 2816 * sufficient white space then all leading 2817 * {@link Character#isWhitespace(int) white space characters} are removed. 2818 * Each white space character is treated as a single character. In 2819 * particular, the tab character {@code "\t"} (U+0009) is considered a 2820 * single character; it is not expanded. 2821 * <p> 2822 * If {@code n == 0} then the line remains unchanged. However, line 2823 * terminators are still normalized. 2824 * <p> 2825 * 2826 * @param n number of leading 2827 * {@link Character#isWhitespace(int) white space characters} 2828 * to add or remove 2829 * 2830 * @return string with indentation adjusted and line endings normalized 2831 * 2832 * @see String#lines() 2833 * @see String#isBlank() 2834 * @see Character#isWhitespace(int) 2835 * 2836 * @since 12 2837 */ 2838 public String indent(int n) { 2839 return isEmpty() ? "" : indent(n, false); 2840 } 2841 2842 private String indent(int n, boolean removeBlanks) { 2843 Stream<String> stream = removeBlanks ? lines(Integer.MAX_VALUE, Integer.MAX_VALUE) 2844 : lines(); 2845 if (n > 0) { 2846 final String spaces = " ".repeat(n); 2847 stream = stream.map(s -> s.isBlank() ? s : spaces + s); 2848 } else if (n == Integer.MIN_VALUE) { 2849 stream = stream.map(s -> s.stripLeading()); 2850 } else if (n < 0) { 2851 stream = stream.map(s -> s.substring(Math.min(-n, s.indexOfNonWhitespace()))); 2852 } 2853 return stream.collect(Collectors.joining("\n", "", "\n")); 2854 } 2855 2856 private int indexOfNonWhitespace() { 2857 return isLatin1() ? StringLatin1.indexOfNonWhitespace(value) 2858 : StringUTF16.indexOfNonWhitespace(value); 2859 } 2860 2861 private int lastIndexOfNonWhitespace() { 2862 return isLatin1() ? StringLatin1.lastIndexOfNonWhitespace(value) 2863 : StringUTF16.lastIndexOfNonWhitespace(value); 2864 } 2865 2866 /** 2867 * Removes vertical and horizontal white space margins from around the 2868 * essential body of a multi-line string, while preserving relative 2869 * indentation. 2870 * <p> 2871 * This string is first conceptually separated into lines as if by 2872 * {@link String#lines()}. 2873 * <p> 2874 * Then, the <i>minimum indentation</i> (min) is determined as follows. For 2875 * each non-blank line (as defined by {@link String#isBlank()}), the 2876 * leading {@link Character#isWhitespace(int) white space} characters are 2877 * counted. The <i>min</i> value is the smallest of these counts. 2878 * <p> 2879 * For each non-blank line, <i>min</i> leading white space characters are 2880 * removed. Each white space character is treated as a single character. In 2881 * particular, the tab character {@code "\t"} (U+0009) is considered a 2882 * single character; it is not expanded. 2883 * <p> 2884 * Leading and trailing blank lines, if any, are removed. Trailing spaces are 2885 * preserved. 2886 * <p> 2887 * Each line is suffixed with a line feed character {@code "\n"} (U+000A). 2888 * <p> 2889 * Finally, the lines are concatenated into a single string and returned. 2890 * 2891 * @apiNote 2892 * This method's primary purpose is to shift a block of lines as far as 2893 * possible to the left, while preserving relative indentation. Lines 2894 * that were indented the least will thus have no leading white space. 2895 * 2896 * Example: 2897 * <blockquote><pre> 2898 * ` 2899 * This is the first line 2900 * This is the second line 2901 * `.align(); 2902 * 2903 * returns 2904 * This is the first line 2905 * This is the second line 2906 * </pre></blockquote> 2907 * 2908 * @return string with margins removed and line terminators normalized 2909 * 2910 * @see String#lines() 2911 * @see String#isBlank() 2912 * @see String#indent(int) 2913 * @see Character#isWhitespace(int) 2914 * 2915 * @since 12 2916 */ 2917 public String align() { 2918 return align(0); 2919 } 2920 2921 /** 2922 * Removes vertical and horizontal white space margins from around the 2923 * essential body of a multi-line string, while preserving relative 2924 * indentation and with optional indentation adjustment. 2925 * <p> 2926 * Invoking this method is equivalent to: 2927 * <blockquote> 2928 * {@code this.align().indent(n)} 2929 * </blockquote> 2930 * 2931 * @apiNote 2932 * Examples: 2933 * <blockquote><pre> 2934 * ` 2935 * This is the first line 2936 * This is the second line 2937 * `.align(0); 2938 * 2939 * returns 2940 * This is the first line 2941 * This is the second line 2942 * 2943 * 2944 * ` 2945 * This is the first line 2946 * This is the second line 2947 * `.align(4); 2948 * returns 2949 * This is the first line 2950 * This is the second line 2951 * </pre></blockquote> 2952 * 2953 * @param n number of leading white space characters 2954 * to add or remove 2955 * 2956 * @return string with margins removed, indentation adjusted and 2957 * line terminators normalized 2958 * 2959 * @see String#align() 2960 * 2961 * @since 12 2962 */ 2963 public String align(int n) { 2964 if (isEmpty()) { 2965 return ""; 2966 } 2967 int outdent = lines().filter(not(String::isBlank)) 2968 .mapToInt(String::indexOfNonWhitespace) 2969 .min() 2970 .orElse(0); 2971 return indent(n - outdent, true); 2972 } 2973 2974 /** 2975 * Replaces tab (U+0009) characters with enough space 2976 * (U+0020) characters to align to tab stops at 2977 * intervals {@code n}. 2978 * 2979 * @param n number of characters between tab stops 2980 * 2981 * @return this string with tabs replaced with spaces 2982 * 2983 * @throws IllegalArgumentException if n is less that equals to zero. 2984 * 2985 * @since 12 2986 */ 2987 public String detab(int n) { 2988 if (n <= 0) { 2989 throw new IllegalArgumentException("n must be greater than zero: " + n); 2990 } 2991 int length = length(); 2992 int column = 0; 2993 int spaces = 0; 2994 final StringBuilder sb = new StringBuilder(length * 2); 2995 for (int pos = 0; pos < length; pos++) { 2996 char ch = charAt(pos); 2997 if (ch == ' ') { 2998 spaces++; 2999 } else if (ch == '\t') { 3000 spaces += n - (column + spaces) % n; 3001 } else if (ch == '\n' || ch == '\r') { 3002 sb.append(ch); 3003 column = 0; 3004 spaces = 0; 3005 } else { 3006 if (0 < spaces) { 3007 column += spaces; 3008 while (0 < spaces) { 3009 spaces--; 3010 sb.append(' '); 3011 } 3012 } 3013 sb.append(ch); 3014 column++; 3015 } 3016 } 3017 return sb.toString(); 3018 } 3019 3020 /** 3021 * Replaces some space (U+0020) characters with tab 3022 * (U+0009) characters if the spacing aligns to tab 3023 * stops at intervals {@code n}. 3024 * 3025 * @param n number of characters between tab stops 3026 * 3027 * @return this string with some spaces replaced with tabs 3028 * 3029 * @throws IllegalArgumentException if n is less that equals to zero. 3030 * 3031 * @since 12 3032 */ 3033 public String entab(int n) { 3034 if (n <= 0) { 3035 throw new IllegalArgumentException("n must be greater than zero: " + n); 3036 } 3037 int length = length(); 3038 int column = 0; 3039 int spaces = 0; 3040 final StringBuilder sb = new StringBuilder(length); 3041 for (int i = 0; i < length; i++) { 3042 char ch = charAt(i); 3043 if (ch == ' ') { 3044 spaces++; 3045 } else if (ch == '\t') { 3046 spaces += n - (column + spaces) % n; 3047 } else if (ch == '\n' || ch == '\r') { 3048 sb.append(ch); 3049 column = 0; 3050 spaces = 0; 3051 } else { 3052 if (0 < spaces) { 3053 int nexttab = n - column % n; 3054 column += spaces; 3055 while (nexttab <= spaces) { 3056 spaces -= nexttab; 3057 nexttab = n; 3058 sb.append('\t'); 3059 } 3060 while (0 < spaces) { 3061 spaces--; 3062 sb.append(' '); 3063 } 3064 } 3065 sb.append(ch); 3066 column++; 3067 } 3068 } 3069 return sb.toString(); 3070 } 3071 3072 /** 3073 * This object (which is already a string!) is itself returned. 3074 * 3075 * @return the string itself. 3076 */ 3077 public String toString() { 3078 return this; 3079 } 3080 3081 /** 3082 * Returns a stream of {@code int} zero-extending the {@code char} values 3083 * from this sequence. Any char which maps to a <a 3084 * href="{@docRoot}/java.base/java/lang/Character.html#unicode">surrogate code 3085 * point</a> is passed through uninterpreted. 3086 * 3087 * @return an IntStream of char values from this sequence 3088 * @since 9 3089 */ 3090 @Override 3091 public IntStream chars() { 3092 return StreamSupport.intStream( 3093 isLatin1() ? new StringLatin1.CharsSpliterator(value, Spliterator.IMMUTABLE) 3094 : new StringUTF16.CharsSpliterator(value, Spliterator.IMMUTABLE), 3095 false); 3096 } 3097 3098 3099 /** 3100 * Returns a stream of code point values from this sequence. Any surrogate 3101 * pairs encountered in the sequence are combined as if by {@linkplain 3102 * Character#toCodePoint Character.toCodePoint} and the result is passed 3103 * to the stream. Any other code units, including ordinary BMP characters, 3104 * unpaired surrogates, and undefined code units, are zero-extended to 3105 * {@code int} values which are then passed to the stream. 3106 * 3107 * @return an IntStream of Unicode code points from this sequence 3108 * @since 9 3109 */ 3110 @Override 3111 public IntStream codePoints() { 3112 return StreamSupport.intStream( 3113 isLatin1() ? new StringLatin1.CharsSpliterator(value, Spliterator.IMMUTABLE) 3114 : new StringUTF16.CodePointsSpliterator(value, Spliterator.IMMUTABLE), 3115 false); 3116 } 3117 3118 /** 3119 * Converts this string to a new character array. 3120 * 3121 * @return a newly allocated character array whose length is the length 3122 * of this string and whose contents are initialized to contain 3123 * the character sequence represented by this string. 3124 */ 3125 public char[] toCharArray() { 3126 return isLatin1() ? StringLatin1.toChars(value) 3127 : StringUTF16.toChars(value); 3128 } 3129 3130 /** 3131 * Returns a formatted string using the specified format string and 3132 * arguments. 3133 * 3134 * <p> The locale always used is the one returned by {@link 3135 * java.util.Locale#getDefault(java.util.Locale.Category) 3136 * Locale.getDefault(Locale.Category)} with 3137 * {@link java.util.Locale.Category#FORMAT FORMAT} category specified. 3138 * 3139 * @param format 3140 * A <a href="../util/Formatter.html#syntax">format string</a> 3141 * 3142 * @param args 3143 * Arguments referenced by the format specifiers in the format 3144 * string. If there are more arguments than format specifiers, the 3145 * extra arguments are ignored. The number of arguments is 3146 * variable and may be zero. The maximum number of arguments is 3147 * limited by the maximum dimension of a Java array as defined by 3148 * <cite>The Java™ Virtual Machine Specification</cite>. 3149 * The behaviour on a 3150 * {@code null} argument depends on the <a 3151 * href="../util/Formatter.html#syntax">conversion</a>. 3152 * 3153 * @throws java.util.IllegalFormatException 3154 * If a format string contains an illegal syntax, a format 3155 * specifier that is incompatible with the given arguments, 3156 * insufficient arguments given the format string, or other 3157 * illegal conditions. For specification of all possible 3158 * formatting errors, see the <a 3159 * href="../util/Formatter.html#detail">Details</a> section of the 3160 * formatter class specification. 3161 * 3162 * @return A formatted string 3163 * 3164 * @see java.util.Formatter 3165 * @since 1.5 3166 */ 3167 public static String format(String format, Object... args) { 3168 return new Formatter().format(format, args).toString(); 3169 } 3170 3171 /** 3172 * Returns a formatted string using the specified locale, format string, 3173 * and arguments. 3174 * 3175 * @param l 3176 * The {@linkplain java.util.Locale locale} to apply during 3177 * formatting. If {@code l} is {@code null} then no localization 3178 * is applied. 3179 * 3180 * @param format 3181 * A <a href="../util/Formatter.html#syntax">format string</a> 3182 * 3183 * @param args 3184 * Arguments referenced by the format specifiers in the format 3185 * string. If there are more arguments than format specifiers, the 3186 * extra arguments are ignored. The number of arguments is 3187 * variable and may be zero. The maximum number of arguments is 3188 * limited by the maximum dimension of a Java array as defined by 3189 * <cite>The Java™ Virtual Machine Specification</cite>. 3190 * The behaviour on a 3191 * {@code null} argument depends on the 3192 * <a href="../util/Formatter.html#syntax">conversion</a>. 3193 * 3194 * @throws java.util.IllegalFormatException 3195 * If a format string contains an illegal syntax, a format 3196 * specifier that is incompatible with the given arguments, 3197 * insufficient arguments given the format string, or other 3198 * illegal conditions. For specification of all possible 3199 * formatting errors, see the <a 3200 * href="../util/Formatter.html#detail">Details</a> section of the 3201 * formatter class specification 3202 * 3203 * @return A formatted string 3204 * 3205 * @see java.util.Formatter 3206 * @since 1.5 3207 */ 3208 public static String format(Locale l, String format, Object... args) { 3209 return new Formatter(l).format(format, args).toString(); 3210 } 3211 3212 /** 3213 * Returns the string representation of the {@code Object} argument. 3214 * 3215 * @param obj an {@code Object}. 3216 * @return if the argument is {@code null}, then a string equal to 3217 * {@code "null"}; otherwise, the value of 3218 * {@code obj.toString()} is returned. 3219 * @see java.lang.Object#toString() 3220 */ 3221 public static String valueOf(Object obj) { 3222 return (obj == null) ? "null" : obj.toString(); 3223 } 3224 3225 /** 3226 * Returns the string representation of the {@code char} array 3227 * argument. The contents of the character array are copied; subsequent 3228 * modification of the character array does not affect the returned 3229 * string. 3230 * 3231 * @param data the character array. 3232 * @return a {@code String} that contains the characters of the 3233 * character array. 3234 */ 3235 public static String valueOf(char data[]) { 3236 return new String(data); 3237 } 3238 3239 /** 3240 * Returns the string representation of a specific subarray of the 3241 * {@code char} array argument. 3242 * <p> 3243 * The {@code offset} argument is the index of the first 3244 * character of the subarray. The {@code count} argument 3245 * specifies the length of the subarray. The contents of the subarray 3246 * are copied; subsequent modification of the character array does not 3247 * affect the returned string. 3248 * 3249 * @param data the character array. 3250 * @param offset initial offset of the subarray. 3251 * @param count length of the subarray. 3252 * @return a {@code String} that contains the characters of the 3253 * specified subarray of the character array. 3254 * @exception IndexOutOfBoundsException if {@code offset} is 3255 * negative, or {@code count} is negative, or 3256 * {@code offset+count} is larger than 3257 * {@code data.length}. 3258 */ 3259 public static String valueOf(char data[], int offset, int count) { 3260 return new String(data, offset, count); 3261 } 3262 3263 /** 3264 * Equivalent to {@link #valueOf(char[], int, int)}. 3265 * 3266 * @param data the character array. 3267 * @param offset initial offset of the subarray. 3268 * @param count length of the subarray. 3269 * @return a {@code String} that contains the characters of the 3270 * specified subarray of the character array. 3271 * @exception IndexOutOfBoundsException if {@code offset} is 3272 * negative, or {@code count} is negative, or 3273 * {@code offset+count} is larger than 3274 * {@code data.length}. 3275 */ 3276 public static String copyValueOf(char data[], int offset, int count) { 3277 return new String(data, offset, count); 3278 } 3279 3280 /** 3281 * Equivalent to {@link #valueOf(char[])}. 3282 * 3283 * @param data the character array. 3284 * @return a {@code String} that contains the characters of the 3285 * character array. 3286 */ 3287 public static String copyValueOf(char data[]) { 3288 return new String(data); 3289 } 3290 3291 /** 3292 * Returns the string representation of the {@code boolean} argument. 3293 * 3294 * @param b a {@code boolean}. 3295 * @return if the argument is {@code true}, a string equal to 3296 * {@code "true"} is returned; otherwise, a string equal to 3297 * {@code "false"} is returned. 3298 */ 3299 public static String valueOf(boolean b) { 3300 return b ? "true" : "false"; 3301 } 3302 3303 /** 3304 * Returns the string representation of the {@code char} 3305 * argument. 3306 * 3307 * @param c a {@code char}. 3308 * @return a string of length {@code 1} containing 3309 * as its single character the argument {@code c}. 3310 */ 3311 public static String valueOf(char c) { 3312 if (COMPACT_STRINGS && StringLatin1.canEncode(c)) { 3313 return new String(StringLatin1.toBytes(c), LATIN1); 3314 } 3315 return new String(StringUTF16.toBytes(c), UTF16); 3316 } 3317 3318 /** 3319 * Returns the string representation of the {@code int} argument. 3320 * <p> 3321 * The representation is exactly the one returned by the 3322 * {@code Integer.toString} method of one argument. 3323 * 3324 * @param i an {@code int}. 3325 * @return a string representation of the {@code int} argument. 3326 * @see java.lang.Integer#toString(int, int) 3327 */ 3328 public static String valueOf(int i) { 3329 return Integer.toString(i); 3330 } 3331 3332 /** 3333 * Returns the string representation of the {@code long} argument. 3334 * <p> 3335 * The representation is exactly the one returned by the 3336 * {@code Long.toString} method of one argument. 3337 * 3338 * @param l a {@code long}. 3339 * @return a string representation of the {@code long} argument. 3340 * @see java.lang.Long#toString(long) 3341 */ 3342 public static String valueOf(long l) { 3343 return Long.toString(l); 3344 } 3345 3346 /** 3347 * Returns the string representation of the {@code float} argument. 3348 * <p> 3349 * The representation is exactly the one returned by the 3350 * {@code Float.toString} method of one argument. 3351 * 3352 * @param f a {@code float}. 3353 * @return a string representation of the {@code float} argument. 3354 * @see java.lang.Float#toString(float) 3355 */ 3356 public static String valueOf(float f) { 3357 return Float.toString(f); 3358 } 3359 3360 /** 3361 * Returns the string representation of the {@code double} argument. 3362 * <p> 3363 * The representation is exactly the one returned by the 3364 * {@code Double.toString} method of one argument. 3365 * 3366 * @param d a {@code double}. 3367 * @return a string representation of the {@code double} argument. 3368 * @see java.lang.Double#toString(double) 3369 */ 3370 public static String valueOf(double d) { 3371 return Double.toString(d); 3372 } 3373 3374 /** 3375 * Returns a canonical representation for the string object. 3376 * <p> 3377 * A pool of strings, initially empty, is maintained privately by the 3378 * class {@code String}. 3379 * <p> 3380 * When the intern method is invoked, if the pool already contains a 3381 * string equal to this {@code String} object as determined by 3382 * the {@link #equals(Object)} method, then the string from the pool is 3383 * returned. Otherwise, this {@code String} object is added to the 3384 * pool and a reference to this {@code String} object is returned. 3385 * <p> 3386 * It follows that for any two strings {@code s} and {@code t}, 3387 * {@code s.intern() == t.intern()} is {@code true} 3388 * if and only if {@code s.equals(t)} is {@code true}. 3389 * <p> 3390 * All literal strings and string-valued constant expressions are 3391 * interned. String literals are defined in section 3.10.5 of the 3392 * <cite>The Java™ Language Specification</cite>. 3393 * 3394 * @return a string that has the same contents as this string, but is 3395 * guaranteed to be from a pool of unique strings. 3396 * @jls 3.10.5 String Literals 3397 */ 3398 public native String intern(); 3399 3400 /** 3401 * Returns a string whose value is the concatenation of this 3402 * string repeated {@code count} times. 3403 * <p> 3404 * If this string is empty or count is zero then the empty 3405 * string is returned. 3406 * 3407 * @param count number of times to repeat 3408 * 3409 * @return A string composed of this string repeated 3410 * {@code count} times or the empty string if this 3411 * string is empty or count is zero 3412 * 3413 * @throws IllegalArgumentException if the {@code count} is 3414 * negative. 3415 * 3416 * @since 11 3417 */ 3418 public String repeat(int count) { 3419 if (count < 0) { 3420 throw new IllegalArgumentException("count is negative: " + count); 3421 } 3422 if (count == 1) { 3423 return this; 3424 } 3425 final int len = value.length; 3426 if (len == 0 || count == 0) { 3427 return ""; 3428 } 3429 if (len == 1) { 3430 final byte[] single = new byte[count]; 3431 Arrays.fill(single, value[0]); 3432 return new String(single, coder); 3433 } 3434 if (Integer.MAX_VALUE / count < len) { 3435 throw new OutOfMemoryError("Repeating " + len + " bytes String " + count + 3436 " times will produce a String exceeding maximum size."); 3437 } 3438 final int limit = len * count; 3439 final byte[] multiple = new byte[limit]; 3440 System.arraycopy(value, 0, multiple, 0, len); 3441 int copied = len; 3442 for (; copied < limit - copied; copied <<= 1) { 3443 System.arraycopy(multiple, 0, multiple, copied, copied); 3444 } 3445 System.arraycopy(multiple, 0, multiple, copied, limit - copied); 3446 return new String(multiple, coder); 3447 } 3448 3449 //////////////////////////////////////////////////////////////// 3450 3451 /** 3452 * Copy character bytes from this string into dst starting at dstBegin. 3453 * This method doesn't perform any range checking. 3454 * 3455 * Invoker guarantees: dst is in UTF16 (inflate itself for asb), if two 3456 * coders are different, and dst is big enough (range check) 3457 * 3458 * @param dstBegin the char index, not offset of byte[] 3459 * @param coder the coder of dst[] 3460 */ 3461 void getBytes(byte dst[], int dstBegin, byte coder) { 3462 if (coder() == coder) { 3463 System.arraycopy(value, 0, dst, dstBegin << coder, value.length); 3464 } else { // this.coder == LATIN && coder == UTF16 3465 StringLatin1.inflate(value, 0, dst, dstBegin, value.length); 3466 } 3467 } 3468 3469 /* 3470 * Package private constructor. Trailing Void argument is there for 3471 * disambiguating it against other (public) constructors. 3472 * 3473 * Stores the char[] value into a byte[] that each byte represents 3474 * the8 low-order bits of the corresponding character, if the char[] 3475 * contains only latin1 character. Or a byte[] that stores all 3476 * characters in their byte sequences defined by the {@code StringUTF16}. 3477 */ 3478 String(char[] value, int off, int len, Void sig) { 3479 if (len == 0) { 3480 this.value = "".value; 3481 this.coder = "".coder; 3482 return; 3483 } 3484 if (COMPACT_STRINGS) { 3485 byte[] val = StringUTF16.compress(value, off, len); 3486 if (val != null) { 3487 this.value = val; 3488 this.coder = LATIN1; 3489 return; 3490 } 3491 } 3492 this.coder = UTF16; 3493 this.value = StringUTF16.toBytes(value, off, len); 3494 } 3495 3496 /* 3497 * Package private constructor. Trailing Void argument is there for 3498 * disambiguating it against other (public) constructors. 3499 */ 3500 String(AbstractStringBuilder asb, Void sig) { 3501 byte[] val = asb.getValue(); 3502 int length = asb.length(); 3503 if (asb.isLatin1()) { 3504 this.coder = LATIN1; 3505 this.value = Arrays.copyOfRange(val, 0, length); 3506 } else { 3507 if (COMPACT_STRINGS) { 3508 byte[] buf = StringUTF16.compress(val, 0, length); 3509 if (buf != null) { 3510 this.coder = LATIN1; 3511 this.value = buf; 3512 return; 3513 } 3514 } 3515 this.coder = UTF16; 3516 this.value = Arrays.copyOfRange(val, 0, length << 1); 3517 } 3518 } 3519 3520 /* 3521 * Package private constructor which shares value array for speed. 3522 */ 3523 String(byte[] value, byte coder) { 3524 this.value = value; 3525 this.coder = coder; 3526 } 3527 3528 byte coder() { 3529 return COMPACT_STRINGS ? coder : UTF16; 3530 } 3531 3532 byte[] value() { 3533 return value; 3534 } 3535 3536 private boolean isLatin1() { 3537 return COMPACT_STRINGS && coder == LATIN1; 3538 } 3539 3540 @Native static final byte LATIN1 = 0; 3541 @Native static final byte UTF16 = 1; 3542 3543 /* 3544 * StringIndexOutOfBoundsException if {@code index} is 3545 * negative or greater than or equal to {@code length}. 3546 */ 3547 static void checkIndex(int index, int length) { 3548 if (index < 0 || index >= length) { 3549 throw new StringIndexOutOfBoundsException("index " + index + 3550 ",length " + length); 3551 } 3552 } 3553 3554 /* 3555 * StringIndexOutOfBoundsException if {@code offset} 3556 * is negative or greater than {@code length}. 3557 */ 3558 static void checkOffset(int offset, int length) { 3559 if (offset < 0 || offset > length) { 3560 throw new StringIndexOutOfBoundsException("offset " + offset + 3561 ",length " + length); 3562 } 3563 } 3564 3565 /* 3566 * Check {@code offset}, {@code count} against {@code 0} and {@code length} 3567 * bounds. 3568 * 3569 * @throws StringIndexOutOfBoundsException 3570 * If {@code offset} is negative, {@code count} is negative, 3571 * or {@code offset} is greater than {@code length - count} 3572 */ 3573 static void checkBoundsOffCount(int offset, int count, int length) { 3574 if (offset < 0 || count < 0 || offset > length - count) { 3575 throw new StringIndexOutOfBoundsException( 3576 "offset " + offset + ", count " + count + ", length " + length); 3577 } 3578 } 3579 3580 /* 3581 * Check {@code begin}, {@code end} against {@code 0} and {@code length} 3582 * bounds. 3583 * 3584 * @throws StringIndexOutOfBoundsException 3585 * If {@code begin} is negative, {@code begin} is greater than 3586 * {@code end}, or {@code end} is greater than {@code length}. 3587 */ 3588 static void checkBoundsBeginEnd(int begin, int end, int length) { 3589 if (begin < 0 || begin > end || end > length) { 3590 throw new StringIndexOutOfBoundsException( 3591 "begin " + begin + ", end " + end + ", length " + length); 3592 } 3593 } 3594 3595 /** 3596 * Returns the string representation of the {@code codePoint} 3597 * argument. 3598 * 3599 * @param codePoint a {@code codePoint}. 3600 * @return a string of length {@code 1} or {@code 2} containing 3601 * as its single character the argument {@code codePoint}. 3602 * @throws IllegalArgumentException if the specified 3603 * {@code codePoint} is not a {@linkplain Character#isValidCodePoint 3604 * valid Unicode code point}. 3605 */ 3606 static String valueOfCodePoint(int codePoint) { 3607 if (COMPACT_STRINGS && StringLatin1.canEncode(codePoint)) { 3608 return new String(StringLatin1.toBytes((char)codePoint), LATIN1); 3609 } else if (Character.isBmpCodePoint(codePoint)) { 3610 return new String(StringUTF16.toBytes((char)codePoint), UTF16); 3611 } else if (Character.isSupplementaryCodePoint(codePoint)) { 3612 return new String(StringUTF16.toBytesSupplementary(codePoint), UTF16); 3613 } 3614 3615 throw new IllegalArgumentException( 3616 format("Not a valid Unicode code point: 0x%X", codePoint)); 3617 } 3618 }