Line data Source code
1 : /*
2 : **********************************************************************
3 : * Copyright (C) 1998-2009, International Business Machines
4 : * Corporation and others. All Rights Reserved.
5 : **********************************************************************
6 : *
7 : * File unistr.h
8 : *
9 : * Modification History:
10 : *
11 : * Date Name Description
12 : * 09/25/98 stephen Creation.
13 : * 11/11/98 stephen Changed per 11/9 code review.
14 : * 04/20/99 stephen Overhauled per 4/16 code review.
15 : * 11/18/99 aliu Made to inherit from Replaceable. Added method
16 : * handleReplaceBetween(); other methods unchanged.
17 : * 06/25/01 grhoten Remove dependency on iostream.
18 : ******************************************************************************
19 : */
20 :
21 : #ifndef UNISTR_H
22 : #define UNISTR_H
23 :
24 : /**
25 : * \file
26 : * \brief C++ API: Unicode String
27 : */
28 :
29 : #include "unicode/utypes.h"
30 : #include "unicode/rep.h"
31 : #include "unicode/std_string.h"
32 : #include "unicode/stringpiece.h"
33 : #include "unicode/bytestream.h"
34 :
35 : struct UConverter; // unicode/ucnv.h
36 : class StringThreadTest;
37 :
38 : #ifndef U_COMPARE_CODE_POINT_ORDER
39 : /* see also ustring.h and unorm.h */
40 : /**
41 : * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
42 : * Compare strings in code point order instead of code unit order.
43 : * @stable ICU 2.2
44 : */
45 : #define U_COMPARE_CODE_POINT_ORDER 0x8000
46 : #endif
47 :
48 : #ifndef USTRING_H
49 : /**
50 : * \ingroup ustring_ustrlen
51 : */
52 : U_STABLE int32_t U_EXPORT2
53 : u_strlen(const UChar *s);
54 : #endif
55 :
56 : U_NAMESPACE_BEGIN
57 :
58 : class Locale; // unicode/locid.h
59 : class StringCharacterIterator;
60 : class BreakIterator; // unicode/brkiter.h
61 :
62 : /* The <iostream> include has been moved to unicode/ustream.h */
63 :
64 : /**
65 : * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
66 : * which constructs a Unicode string from an invariant-character char * string.
67 : * About invariant characters see utypes.h.
68 : * This constructor has no runtime dependency on conversion code and is
69 : * therefore recommended over ones taking a charset name string
70 : * (where the empty string "" indicates invariant-character conversion).
71 : *
72 : * @stable ICU 3.2
73 : */
74 : #define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant
75 :
76 : /**
77 : * Unicode String literals in C++.
78 : * Dependent on the platform properties, different UnicodeString
79 : * constructors should be used to create a UnicodeString object from
80 : * a string literal.
81 : * The macros are defined for maximum performance.
82 : * They work only for strings that contain "invariant characters", i.e.,
83 : * only latin letters, digits, and some punctuation.
84 : * See utypes.h for details.
85 : *
86 : * The string parameter must be a C string literal.
87 : * The length of the string, not including the terminating
88 : * <code>NUL</code>, must be specified as a constant.
89 : * The U_STRING_DECL macro should be invoked exactly once for one
90 : * such string variable before it is used.
91 : * @stable ICU 2.0
92 : */
93 : #if defined(U_DECLARE_UTF16)
94 : # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
95 : #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
96 : # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length)
97 : #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
98 : # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length)
99 : #else
100 : # define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV)
101 : #endif
102 :
103 : /**
104 : * Unicode String literals in C++.
105 : * Dependent on the platform properties, different UnicodeString
106 : * constructors should be used to create a UnicodeString object from
107 : * a string literal.
108 : * The macros are defined for improved performance.
109 : * They work only for strings that contain "invariant characters", i.e.,
110 : * only latin letters, digits, and some punctuation.
111 : * See utypes.h for details.
112 : *
113 : * The string parameter must be a C string literal.
114 : * @stable ICU 2.0
115 : */
116 : #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
117 :
118 : /**
119 : * UnicodeString is a string class that stores Unicode characters directly and provides
120 : * similar functionality as the Java String and StringBuffer classes.
121 : * It is a concrete implementation of the abstract class Replaceable (for transliteration).
122 : *
123 : * The UnicodeString class is not suitable for subclassing.
124 : *
125 : * <p>For an overview of Unicode strings in C and C++ see the
126 : * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
127 : *
128 : * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
129 : * A Unicode character may be stored with either one code unit
130 : * (the most common case) or with a matched pair of special code units
131 : * ("surrogates"). The data type for code units is UChar.
132 : * For single-character handling, a Unicode character code <em>point</em> is a value
133 : * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
134 : *
135 : * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
136 : * This is the same as with multi-byte char* strings in traditional string handling.
137 : * Operations on partial strings typically do not test for code point boundaries.
138 : * If necessary, the user needs to take care of such boundaries by testing for the code unit
139 : * values or by using functions like
140 : * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
141 : * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
142 : *
143 : * UnicodeString methods are more lenient with regard to input parameter values
144 : * than other ICU APIs. In particular:
145 : * - If indexes are out of bounds for a UnicodeString object
146 : * (<0 or >length()) then they are "pinned" to the nearest boundary.
147 : * - If primitive string pointer values (e.g., const UChar * or char *)
148 : * for input strings are NULL, then those input string parameters are treated
149 : * as if they pointed to an empty string.
150 : * However, this is <em>not</em> the case for char * parameters for charset names
151 : * or other IDs.
152 : * - Most UnicodeString methods do not take a UErrorCode parameter because
153 : * there are usually very few opportunities for failure other than a shortage
154 : * of memory, error codes in low-level C++ string methods would be inconvenient,
155 : * and the error code as the last parameter (ICU convention) would prevent
156 : * the use of default parameter values.
157 : * Instead, such methods set the UnicodeString into a "bogus" state
158 : * (see isBogus()) if an error occurs.
159 : *
160 : * In string comparisons, two UnicodeString objects that are both "bogus"
161 : * compare equal (to be transitive and prevent endless loops in sorting),
162 : * and a "bogus" string compares less than any non-"bogus" one.
163 : *
164 : * Const UnicodeString methods are thread-safe. Multiple threads can use
165 : * const methods on the same UnicodeString object simultaneously,
166 : * but non-const methods must not be called concurrently (in multiple threads)
167 : * with any other (const or non-const) methods.
168 : *
169 : * Similarly, const UnicodeString & parameters are thread-safe.
170 : * One object may be passed in as such a parameter concurrently in multiple threads.
171 : * This includes the const UnicodeString & parameters for
172 : * copy construction, assignment, and cloning.
173 : *
174 : * <p>UnicodeString uses several storage methods.
175 : * String contents can be stored inside the UnicodeString object itself,
176 : * in an allocated and shared buffer, or in an outside buffer that is "aliased".
177 : * Most of this is done transparently, but careful aliasing in particular provides
178 : * significant performance improvements.
179 : * Also, the internal buffer is accessible via special functions.
180 : * For details see the
181 : * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
182 : *
183 : * @see utf.h
184 : * @see CharacterIterator
185 : * @stable ICU 2.0
186 : */
187 : class U_COMMON_API UnicodeString : public Replaceable
188 : {
189 : public:
190 :
191 : /**
192 : * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
193 : * which constructs a Unicode string from an invariant-character char * string.
194 : * Use the macro US_INV instead of the full qualification for this value.
195 : *
196 : * @see US_INV
197 : * @stable ICU 3.2
198 : */
199 : enum EInvariant {
200 : /**
201 : * @see EInvariant
202 : * @stable ICU 3.2
203 : */
204 : kInvariant
205 : };
206 :
207 : //========================================
208 : // Read-only operations
209 : //========================================
210 :
211 : /* Comparison - bitwise only - for international comparison use collation */
212 :
213 : /**
214 : * Equality operator. Performs only bitwise comparison.
215 : * @param text The UnicodeString to compare to this one.
216 : * @return TRUE if <TT>text</TT> contains the same characters as this one,
217 : * FALSE otherwise.
218 : * @stable ICU 2.0
219 : */
220 : inline UBool operator== (const UnicodeString& text) const;
221 :
222 : /**
223 : * Inequality operator. Performs only bitwise comparison.
224 : * @param text The UnicodeString to compare to this one.
225 : * @return FALSE if <TT>text</TT> contains the same characters as this one,
226 : * TRUE otherwise.
227 : * @stable ICU 2.0
228 : */
229 : inline UBool operator!= (const UnicodeString& text) const;
230 :
231 : /**
232 : * Greater than operator. Performs only bitwise comparison.
233 : * @param text The UnicodeString to compare to this one.
234 : * @return TRUE if the characters in this are bitwise
235 : * greater than the characters in <code>text</code>, FALSE otherwise
236 : * @stable ICU 2.0
237 : */
238 : inline UBool operator> (const UnicodeString& text) const;
239 :
240 : /**
241 : * Less than operator. Performs only bitwise comparison.
242 : * @param text The UnicodeString to compare to this one.
243 : * @return TRUE if the characters in this are bitwise
244 : * less than the characters in <code>text</code>, FALSE otherwise
245 : * @stable ICU 2.0
246 : */
247 : inline UBool operator< (const UnicodeString& text) const;
248 :
249 : /**
250 : * Greater than or equal operator. Performs only bitwise comparison.
251 : * @param text The UnicodeString to compare to this one.
252 : * @return TRUE if the characters in this are bitwise
253 : * greater than or equal to the characters in <code>text</code>, FALSE otherwise
254 : * @stable ICU 2.0
255 : */
256 : inline UBool operator>= (const UnicodeString& text) const;
257 :
258 : /**
259 : * Less than or equal operator. Performs only bitwise comparison.
260 : * @param text The UnicodeString to compare to this one.
261 : * @return TRUE if the characters in this are bitwise
262 : * less than or equal to the characters in <code>text</code>, FALSE otherwise
263 : * @stable ICU 2.0
264 : */
265 : inline UBool operator<= (const UnicodeString& text) const;
266 :
267 : /**
268 : * Compare the characters bitwise in this UnicodeString to
269 : * the characters in <code>text</code>.
270 : * @param text The UnicodeString to compare to this one.
271 : * @return The result of bitwise character comparison: 0 if this
272 : * contains the same characters as <code>text</code>, -1 if the characters in
273 : * this are bitwise less than the characters in <code>text</code>, +1 if the
274 : * characters in this are bitwise greater than the characters
275 : * in <code>text</code>.
276 : * @stable ICU 2.0
277 : */
278 : inline int8_t compare(const UnicodeString& text) const;
279 :
280 : /**
281 : * Compare the characters bitwise in the range
282 : * [<TT>start</TT>, <TT>start + length</TT>) with the characters
283 : * in <TT>text</TT>
284 : * @param start the offset at which the compare operation begins
285 : * @param length the number of characters of text to compare.
286 : * @param text the other text to be compared against this string.
287 : * @return The result of bitwise character comparison: 0 if this
288 : * contains the same characters as <code>text</code>, -1 if the characters in
289 : * this are bitwise less than the characters in <code>text</code>, +1 if the
290 : * characters in this are bitwise greater than the characters
291 : * in <code>text</code>.
292 : * @stable ICU 2.0
293 : */
294 : inline int8_t compare(int32_t start,
295 : int32_t length,
296 : const UnicodeString& text) const;
297 :
298 : /**
299 : * Compare the characters bitwise in the range
300 : * [<TT>start</TT>, <TT>start + length</TT>) with the characters
301 : * in <TT>srcText</TT> in the range
302 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
303 : * @param start the offset at which the compare operation begins
304 : * @param length the number of characters in this to compare.
305 : * @param srcText the text to be compared
306 : * @param srcStart the offset into <TT>srcText</TT> to start comparison
307 : * @param srcLength the number of characters in <TT>src</TT> to compare
308 : * @return The result of bitwise character comparison: 0 if this
309 : * contains the same characters as <code>srcText</code>, -1 if the characters in
310 : * this are bitwise less than the characters in <code>srcText</code>, +1 if the
311 : * characters in this are bitwise greater than the characters
312 : * in <code>srcText</code>.
313 : * @stable ICU 2.0
314 : */
315 : inline int8_t compare(int32_t start,
316 : int32_t length,
317 : const UnicodeString& srcText,
318 : int32_t srcStart,
319 : int32_t srcLength) const;
320 :
321 : /**
322 : * Compare the characters bitwise in this UnicodeString with the first
323 : * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
324 : * @param srcChars The characters to compare to this UnicodeString.
325 : * @param srcLength the number of characters in <TT>srcChars</TT> to compare
326 : * @return The result of bitwise character comparison: 0 if this
327 : * contains the same characters as <code>srcChars</code>, -1 if the characters in
328 : * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
329 : * characters in this are bitwise greater than the characters
330 : * in <code>srcChars</code>.
331 : * @stable ICU 2.0
332 : */
333 : inline int8_t compare(const UChar *srcChars,
334 : int32_t srcLength) const;
335 :
336 : /**
337 : * Compare the characters bitwise in the range
338 : * [<TT>start</TT>, <TT>start + length</TT>) with the first
339 : * <TT>length</TT> characters in <TT>srcChars</TT>
340 : * @param start the offset at which the compare operation begins
341 : * @param length the number of characters to compare.
342 : * @param srcChars the characters to be compared
343 : * @return The result of bitwise character comparison: 0 if this
344 : * contains the same characters as <code>srcChars</code>, -1 if the characters in
345 : * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
346 : * characters in this are bitwise greater than the characters
347 : * in <code>srcChars</code>.
348 : * @stable ICU 2.0
349 : */
350 : inline int8_t compare(int32_t start,
351 : int32_t length,
352 : const UChar *srcChars) const;
353 :
354 : /**
355 : * Compare the characters bitwise in the range
356 : * [<TT>start</TT>, <TT>start + length</TT>) with the characters
357 : * in <TT>srcChars</TT> in the range
358 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
359 : * @param start the offset at which the compare operation begins
360 : * @param length the number of characters in this to compare
361 : * @param srcChars the characters to be compared
362 : * @param srcStart the offset into <TT>srcChars</TT> to start comparison
363 : * @param srcLength the number of characters in <TT>srcChars</TT> to compare
364 : * @return The result of bitwise character comparison: 0 if this
365 : * contains the same characters as <code>srcChars</code>, -1 if the characters in
366 : * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
367 : * characters in this are bitwise greater than the characters
368 : * in <code>srcChars</code>.
369 : * @stable ICU 2.0
370 : */
371 : inline int8_t compare(int32_t start,
372 : int32_t length,
373 : const UChar *srcChars,
374 : int32_t srcStart,
375 : int32_t srcLength) const;
376 :
377 : /**
378 : * Compare the characters bitwise in the range
379 : * [<TT>start</TT>, <TT>limit</TT>) with the characters
380 : * in <TT>srcText</TT> in the range
381 : * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
382 : * @param start the offset at which the compare operation begins
383 : * @param limit the offset immediately following the compare operation
384 : * @param srcText the text to be compared
385 : * @param srcStart the offset into <TT>srcText</TT> to start comparison
386 : * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
387 : * @return The result of bitwise character comparison: 0 if this
388 : * contains the same characters as <code>srcText</code>, -1 if the characters in
389 : * this are bitwise less than the characters in <code>srcText</code>, +1 if the
390 : * characters in this are bitwise greater than the characters
391 : * in <code>srcText</code>.
392 : * @stable ICU 2.0
393 : */
394 : inline int8_t compareBetween(int32_t start,
395 : int32_t limit,
396 : const UnicodeString& srcText,
397 : int32_t srcStart,
398 : int32_t srcLimit) const;
399 :
400 : /**
401 : * Compare two Unicode strings in code point order.
402 : * The result may be different from the results of compare(), operator<, etc.
403 : * if supplementary characters are present:
404 : *
405 : * In UTF-16, supplementary characters (with code points U+10000 and above) are
406 : * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
407 : * which means that they compare as less than some other BMP characters like U+feff.
408 : * This function compares Unicode strings in code point order.
409 : * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
410 : *
411 : * @param text Another string to compare this one to.
412 : * @return a negative/zero/positive integer corresponding to whether
413 : * this string is less than/equal to/greater than the second one
414 : * in code point order
415 : * @stable ICU 2.0
416 : */
417 : inline int8_t compareCodePointOrder(const UnicodeString& text) const;
418 :
419 : /**
420 : * Compare two Unicode strings in code point order.
421 : * The result may be different from the results of compare(), operator<, etc.
422 : * if supplementary characters are present:
423 : *
424 : * In UTF-16, supplementary characters (with code points U+10000 and above) are
425 : * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
426 : * which means that they compare as less than some other BMP characters like U+feff.
427 : * This function compares Unicode strings in code point order.
428 : * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
429 : *
430 : * @param start The start offset in this string at which the compare operation begins.
431 : * @param length The number of code units from this string to compare.
432 : * @param srcText Another string to compare this one to.
433 : * @return a negative/zero/positive integer corresponding to whether
434 : * this string is less than/equal to/greater than the second one
435 : * in code point order
436 : * @stable ICU 2.0
437 : */
438 : inline int8_t compareCodePointOrder(int32_t start,
439 : int32_t length,
440 : const UnicodeString& srcText) const;
441 :
442 : /**
443 : * Compare two Unicode strings in code point order.
444 : * The result may be different from the results of compare(), operator<, etc.
445 : * if supplementary characters are present:
446 : *
447 : * In UTF-16, supplementary characters (with code points U+10000 and above) are
448 : * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
449 : * which means that they compare as less than some other BMP characters like U+feff.
450 : * This function compares Unicode strings in code point order.
451 : * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
452 : *
453 : * @param start The start offset in this string at which the compare operation begins.
454 : * @param length The number of code units from this string to compare.
455 : * @param srcText Another string to compare this one to.
456 : * @param srcStart The start offset in that string at which the compare operation begins.
457 : * @param srcLength The number of code units from that string to compare.
458 : * @return a negative/zero/positive integer corresponding to whether
459 : * this string is less than/equal to/greater than the second one
460 : * in code point order
461 : * @stable ICU 2.0
462 : */
463 : inline int8_t compareCodePointOrder(int32_t start,
464 : int32_t length,
465 : const UnicodeString& srcText,
466 : int32_t srcStart,
467 : int32_t srcLength) const;
468 :
469 : /**
470 : * Compare two Unicode strings in code point order.
471 : * The result may be different from the results of compare(), operator<, etc.
472 : * if supplementary characters are present:
473 : *
474 : * In UTF-16, supplementary characters (with code points U+10000 and above) are
475 : * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
476 : * which means that they compare as less than some other BMP characters like U+feff.
477 : * This function compares Unicode strings in code point order.
478 : * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
479 : *
480 : * @param srcChars A pointer to another string to compare this one to.
481 : * @param srcLength The number of code units from that string to compare.
482 : * @return a negative/zero/positive integer corresponding to whether
483 : * this string is less than/equal to/greater than the second one
484 : * in code point order
485 : * @stable ICU 2.0
486 : */
487 : inline int8_t compareCodePointOrder(const UChar *srcChars,
488 : int32_t srcLength) const;
489 :
490 : /**
491 : * Compare two Unicode strings in code point order.
492 : * The result may be different from the results of compare(), operator<, etc.
493 : * if supplementary characters are present:
494 : *
495 : * In UTF-16, supplementary characters (with code points U+10000 and above) are
496 : * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
497 : * which means that they compare as less than some other BMP characters like U+feff.
498 : * This function compares Unicode strings in code point order.
499 : * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
500 : *
501 : * @param start The start offset in this string at which the compare operation begins.
502 : * @param length The number of code units from this string to compare.
503 : * @param srcChars A pointer to another string to compare this one to.
504 : * @return a negative/zero/positive integer corresponding to whether
505 : * this string is less than/equal to/greater than the second one
506 : * in code point order
507 : * @stable ICU 2.0
508 : */
509 : inline int8_t compareCodePointOrder(int32_t start,
510 : int32_t length,
511 : const UChar *srcChars) const;
512 :
513 : /**
514 : * Compare two Unicode strings in code point order.
515 : * The result may be different from the results of compare(), operator<, etc.
516 : * if supplementary characters are present:
517 : *
518 : * In UTF-16, supplementary characters (with code points U+10000 and above) are
519 : * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
520 : * which means that they compare as less than some other BMP characters like U+feff.
521 : * This function compares Unicode strings in code point order.
522 : * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
523 : *
524 : * @param start The start offset in this string at which the compare operation begins.
525 : * @param length The number of code units from this string to compare.
526 : * @param srcChars A pointer to another string to compare this one to.
527 : * @param srcStart The start offset in that string at which the compare operation begins.
528 : * @param srcLength The number of code units from that string to compare.
529 : * @return a negative/zero/positive integer corresponding to whether
530 : * this string is less than/equal to/greater than the second one
531 : * in code point order
532 : * @stable ICU 2.0
533 : */
534 : inline int8_t compareCodePointOrder(int32_t start,
535 : int32_t length,
536 : const UChar *srcChars,
537 : int32_t srcStart,
538 : int32_t srcLength) const;
539 :
540 : /**
541 : * Compare two Unicode strings in code point order.
542 : * The result may be different from the results of compare(), operator<, etc.
543 : * if supplementary characters are present:
544 : *
545 : * In UTF-16, supplementary characters (with code points U+10000 and above) are
546 : * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
547 : * which means that they compare as less than some other BMP characters like U+feff.
548 : * This function compares Unicode strings in code point order.
549 : * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
550 : *
551 : * @param start The start offset in this string at which the compare operation begins.
552 : * @param limit The offset after the last code unit from this string to compare.
553 : * @param srcText Another string to compare this one to.
554 : * @param srcStart The start offset in that string at which the compare operation begins.
555 : * @param srcLimit The offset after the last code unit from that string to compare.
556 : * @return a negative/zero/positive integer corresponding to whether
557 : * this string is less than/equal to/greater than the second one
558 : * in code point order
559 : * @stable ICU 2.0
560 : */
561 : inline int8_t compareCodePointOrderBetween(int32_t start,
562 : int32_t limit,
563 : const UnicodeString& srcText,
564 : int32_t srcStart,
565 : int32_t srcLimit) const;
566 :
567 : /**
568 : * Compare two strings case-insensitively using full case folding.
569 : * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
570 : *
571 : * @param text Another string to compare this one to.
572 : * @param options A bit set of options:
573 : * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
574 : * Comparison in code unit order with default case folding.
575 : *
576 : * - U_COMPARE_CODE_POINT_ORDER
577 : * Set to choose code point order instead of code unit order
578 : * (see u_strCompare for details).
579 : *
580 : * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
581 : *
582 : * @return A negative, zero, or positive integer indicating the comparison result.
583 : * @stable ICU 2.0
584 : */
585 : inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
586 :
587 : /**
588 : * Compare two strings case-insensitively using full case folding.
589 : * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
590 : *
591 : * @param start The start offset in this string at which the compare operation begins.
592 : * @param length The number of code units from this string to compare.
593 : * @param srcText Another string to compare this one to.
594 : * @param options A bit set of options:
595 : * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
596 : * Comparison in code unit order with default case folding.
597 : *
598 : * - U_COMPARE_CODE_POINT_ORDER
599 : * Set to choose code point order instead of code unit order
600 : * (see u_strCompare for details).
601 : *
602 : * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
603 : *
604 : * @return A negative, zero, or positive integer indicating the comparison result.
605 : * @stable ICU 2.0
606 : */
607 : inline int8_t caseCompare(int32_t start,
608 : int32_t length,
609 : const UnicodeString& srcText,
610 : uint32_t options) const;
611 :
612 : /**
613 : * Compare two strings case-insensitively using full case folding.
614 : * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
615 : *
616 : * @param start The start offset in this string at which the compare operation begins.
617 : * @param length The number of code units from this string to compare.
618 : * @param srcText Another string to compare this one to.
619 : * @param srcStart The start offset in that string at which the compare operation begins.
620 : * @param srcLength The number of code units from that string to compare.
621 : * @param options A bit set of options:
622 : * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
623 : * Comparison in code unit order with default case folding.
624 : *
625 : * - U_COMPARE_CODE_POINT_ORDER
626 : * Set to choose code point order instead of code unit order
627 : * (see u_strCompare for details).
628 : *
629 : * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
630 : *
631 : * @return A negative, zero, or positive integer indicating the comparison result.
632 : * @stable ICU 2.0
633 : */
634 : inline int8_t caseCompare(int32_t start,
635 : int32_t length,
636 : const UnicodeString& srcText,
637 : int32_t srcStart,
638 : int32_t srcLength,
639 : uint32_t options) const;
640 :
641 : /**
642 : * Compare two strings case-insensitively using full case folding.
643 : * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
644 : *
645 : * @param srcChars A pointer to another string to compare this one to.
646 : * @param srcLength The number of code units from that string to compare.
647 : * @param options A bit set of options:
648 : * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
649 : * Comparison in code unit order with default case folding.
650 : *
651 : * - U_COMPARE_CODE_POINT_ORDER
652 : * Set to choose code point order instead of code unit order
653 : * (see u_strCompare for details).
654 : *
655 : * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
656 : *
657 : * @return A negative, zero, or positive integer indicating the comparison result.
658 : * @stable ICU 2.0
659 : */
660 : inline int8_t caseCompare(const UChar *srcChars,
661 : int32_t srcLength,
662 : uint32_t options) const;
663 :
664 : /**
665 : * Compare two strings case-insensitively using full case folding.
666 : * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
667 : *
668 : * @param start The start offset in this string at which the compare operation begins.
669 : * @param length The number of code units from this string to compare.
670 : * @param srcChars A pointer to another string to compare this one to.
671 : * @param options A bit set of options:
672 : * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
673 : * Comparison in code unit order with default case folding.
674 : *
675 : * - U_COMPARE_CODE_POINT_ORDER
676 : * Set to choose code point order instead of code unit order
677 : * (see u_strCompare for details).
678 : *
679 : * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
680 : *
681 : * @return A negative, zero, or positive integer indicating the comparison result.
682 : * @stable ICU 2.0
683 : */
684 : inline int8_t caseCompare(int32_t start,
685 : int32_t length,
686 : const UChar *srcChars,
687 : uint32_t options) const;
688 :
689 : /**
690 : * Compare two strings case-insensitively using full case folding.
691 : * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
692 : *
693 : * @param start The start offset in this string at which the compare operation begins.
694 : * @param length The number of code units from this string to compare.
695 : * @param srcChars A pointer to another string to compare this one to.
696 : * @param srcStart The start offset in that string at which the compare operation begins.
697 : * @param srcLength The number of code units from that string to compare.
698 : * @param options A bit set of options:
699 : * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
700 : * Comparison in code unit order with default case folding.
701 : *
702 : * - U_COMPARE_CODE_POINT_ORDER
703 : * Set to choose code point order instead of code unit order
704 : * (see u_strCompare for details).
705 : *
706 : * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
707 : *
708 : * @return A negative, zero, or positive integer indicating the comparison result.
709 : * @stable ICU 2.0
710 : */
711 : inline int8_t caseCompare(int32_t start,
712 : int32_t length,
713 : const UChar *srcChars,
714 : int32_t srcStart,
715 : int32_t srcLength,
716 : uint32_t options) const;
717 :
718 : /**
719 : * Compare two strings case-insensitively using full case folding.
720 : * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
721 : *
722 : * @param start The start offset in this string at which the compare operation begins.
723 : * @param limit The offset after the last code unit from this string to compare.
724 : * @param srcText Another string to compare this one to.
725 : * @param srcStart The start offset in that string at which the compare operation begins.
726 : * @param srcLimit The offset after the last code unit from that string to compare.
727 : * @param options A bit set of options:
728 : * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
729 : * Comparison in code unit order with default case folding.
730 : *
731 : * - U_COMPARE_CODE_POINT_ORDER
732 : * Set to choose code point order instead of code unit order
733 : * (see u_strCompare for details).
734 : *
735 : * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
736 : *
737 : * @return A negative, zero, or positive integer indicating the comparison result.
738 : * @stable ICU 2.0
739 : */
740 : inline int8_t caseCompareBetween(int32_t start,
741 : int32_t limit,
742 : const UnicodeString& srcText,
743 : int32_t srcStart,
744 : int32_t srcLimit,
745 : uint32_t options) const;
746 :
747 : /**
748 : * Determine if this starts with the characters in <TT>text</TT>
749 : * @param text The text to match.
750 : * @return TRUE if this starts with the characters in <TT>text</TT>,
751 : * FALSE otherwise
752 : * @stable ICU 2.0
753 : */
754 : inline UBool startsWith(const UnicodeString& text) const;
755 :
756 : /**
757 : * Determine if this starts with the characters in <TT>srcText</TT>
758 : * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
759 : * @param srcText The text to match.
760 : * @param srcStart the offset into <TT>srcText</TT> to start matching
761 : * @param srcLength the number of characters in <TT>srcText</TT> to match
762 : * @return TRUE if this starts with the characters in <TT>text</TT>,
763 : * FALSE otherwise
764 : * @stable ICU 2.0
765 : */
766 : inline UBool startsWith(const UnicodeString& srcText,
767 : int32_t srcStart,
768 : int32_t srcLength) const;
769 :
770 : /**
771 : * Determine if this starts with the characters in <TT>srcChars</TT>
772 : * @param srcChars The characters to match.
773 : * @param srcLength the number of characters in <TT>srcChars</TT>
774 : * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
775 : * FALSE otherwise
776 : * @stable ICU 2.0
777 : */
778 : inline UBool startsWith(const UChar *srcChars,
779 : int32_t srcLength) const;
780 :
781 : /**
782 : * Determine if this ends with the characters in <TT>srcChars</TT>
783 : * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
784 : * @param srcChars The characters to match.
785 : * @param srcStart the offset into <TT>srcText</TT> to start matching
786 : * @param srcLength the number of characters in <TT>srcChars</TT> to match
787 : * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
788 : * @stable ICU 2.0
789 : */
790 : inline UBool startsWith(const UChar *srcChars,
791 : int32_t srcStart,
792 : int32_t srcLength) const;
793 :
794 : /**
795 : * Determine if this ends with the characters in <TT>text</TT>
796 : * @param text The text to match.
797 : * @return TRUE if this ends with the characters in <TT>text</TT>,
798 : * FALSE otherwise
799 : * @stable ICU 2.0
800 : */
801 : inline UBool endsWith(const UnicodeString& text) const;
802 :
803 : /**
804 : * Determine if this ends with the characters in <TT>srcText</TT>
805 : * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
806 : * @param srcText The text to match.
807 : * @param srcStart the offset into <TT>srcText</TT> to start matching
808 : * @param srcLength the number of characters in <TT>srcText</TT> to match
809 : * @return TRUE if this ends with the characters in <TT>text</TT>,
810 : * FALSE otherwise
811 : * @stable ICU 2.0
812 : */
813 : inline UBool endsWith(const UnicodeString& srcText,
814 : int32_t srcStart,
815 : int32_t srcLength) const;
816 :
817 : /**
818 : * Determine if this ends with the characters in <TT>srcChars</TT>
819 : * @param srcChars The characters to match.
820 : * @param srcLength the number of characters in <TT>srcChars</TT>
821 : * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
822 : * FALSE otherwise
823 : * @stable ICU 2.0
824 : */
825 : inline UBool endsWith(const UChar *srcChars,
826 : int32_t srcLength) const;
827 :
828 : /**
829 : * Determine if this ends with the characters in <TT>srcChars</TT>
830 : * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
831 : * @param srcChars The characters to match.
832 : * @param srcStart the offset into <TT>srcText</TT> to start matching
833 : * @param srcLength the number of characters in <TT>srcChars</TT> to match
834 : * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
835 : * FALSE otherwise
836 : * @stable ICU 2.0
837 : */
838 : inline UBool endsWith(const UChar *srcChars,
839 : int32_t srcStart,
840 : int32_t srcLength) const;
841 :
842 :
843 : /* Searching - bitwise only */
844 :
845 : /**
846 : * Locate in this the first occurrence of the characters in <TT>text</TT>,
847 : * using bitwise comparison.
848 : * @param text The text to search for.
849 : * @return The offset into this of the start of <TT>text</TT>,
850 : * or -1 if not found.
851 : * @stable ICU 2.0
852 : */
853 : inline int32_t indexOf(const UnicodeString& text) const;
854 :
855 : /**
856 : * Locate in this the first occurrence of the characters in <TT>text</TT>
857 : * starting at offset <TT>start</TT>, using bitwise comparison.
858 : * @param text The text to search for.
859 : * @param start The offset at which searching will start.
860 : * @return The offset into this of the start of <TT>text</TT>,
861 : * or -1 if not found.
862 : * @stable ICU 2.0
863 : */
864 : inline int32_t indexOf(const UnicodeString& text,
865 : int32_t start) const;
866 :
867 : /**
868 : * Locate in this the first occurrence in the range
869 : * [<TT>start</TT>, <TT>start + length</TT>) of the characters
870 : * in <TT>text</TT>, using bitwise comparison.
871 : * @param text The text to search for.
872 : * @param start The offset at which searching will start.
873 : * @param length The number of characters to search
874 : * @return The offset into this of the start of <TT>text</TT>,
875 : * or -1 if not found.
876 : * @stable ICU 2.0
877 : */
878 : inline int32_t indexOf(const UnicodeString& text,
879 : int32_t start,
880 : int32_t length) const;
881 :
882 : /**
883 : * Locate in this the first occurrence in the range
884 : * [<TT>start</TT>, <TT>start + length</TT>) of the characters
885 : * in <TT>srcText</TT> in the range
886 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
887 : * using bitwise comparison.
888 : * @param srcText The text to search for.
889 : * @param srcStart the offset into <TT>srcText</TT> at which
890 : * to start matching
891 : * @param srcLength the number of characters in <TT>srcText</TT> to match
892 : * @param start the offset into this at which to start matching
893 : * @param length the number of characters in this to search
894 : * @return The offset into this of the start of <TT>text</TT>,
895 : * or -1 if not found.
896 : * @stable ICU 2.0
897 : */
898 : inline int32_t indexOf(const UnicodeString& srcText,
899 : int32_t srcStart,
900 : int32_t srcLength,
901 : int32_t start,
902 : int32_t length) const;
903 :
904 : /**
905 : * Locate in this the first occurrence of the characters in
906 : * <TT>srcChars</TT>
907 : * starting at offset <TT>start</TT>, using bitwise comparison.
908 : * @param srcChars The text to search for.
909 : * @param srcLength the number of characters in <TT>srcChars</TT> to match
910 : * @param start the offset into this at which to start matching
911 : * @return The offset into this of the start of <TT>text</TT>,
912 : * or -1 if not found.
913 : * @stable ICU 2.0
914 : */
915 : inline int32_t indexOf(const UChar *srcChars,
916 : int32_t srcLength,
917 : int32_t start) const;
918 :
919 : /**
920 : * Locate in this the first occurrence in the range
921 : * [<TT>start</TT>, <TT>start + length</TT>) of the characters
922 : * in <TT>srcChars</TT>, using bitwise comparison.
923 : * @param srcChars The text to search for.
924 : * @param srcLength the number of characters in <TT>srcChars</TT>
925 : * @param start The offset at which searching will start.
926 : * @param length The number of characters to search
927 : * @return The offset into this of the start of <TT>srcChars</TT>,
928 : * or -1 if not found.
929 : * @stable ICU 2.0
930 : */
931 : inline int32_t indexOf(const UChar *srcChars,
932 : int32_t srcLength,
933 : int32_t start,
934 : int32_t length) const;
935 :
936 : /**
937 : * Locate in this the first occurrence in the range
938 : * [<TT>start</TT>, <TT>start + length</TT>) of the characters
939 : * in <TT>srcChars</TT> in the range
940 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
941 : * using bitwise comparison.
942 : * @param srcChars The text to search for.
943 : * @param srcStart the offset into <TT>srcChars</TT> at which
944 : * to start matching
945 : * @param srcLength the number of characters in <TT>srcChars</TT> to match
946 : * @param start the offset into this at which to start matching
947 : * @param length the number of characters in this to search
948 : * @return The offset into this of the start of <TT>text</TT>,
949 : * or -1 if not found.
950 : * @stable ICU 2.0
951 : */
952 : int32_t indexOf(const UChar *srcChars,
953 : int32_t srcStart,
954 : int32_t srcLength,
955 : int32_t start,
956 : int32_t length) const;
957 :
958 : /**
959 : * Locate in this the first occurrence of the BMP code point <code>c</code>,
960 : * using bitwise comparison.
961 : * @param c The code unit to search for.
962 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
963 : * @stable ICU 2.0
964 : */
965 : inline int32_t indexOf(UChar c) const;
966 :
967 : /**
968 : * Locate in this the first occurrence of the code point <TT>c</TT>,
969 : * using bitwise comparison.
970 : *
971 : * @param c The code point to search for.
972 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
973 : * @stable ICU 2.0
974 : */
975 : inline int32_t indexOf(UChar32 c) const;
976 :
977 : /**
978 : * Locate in this the first occurrence of the BMP code point <code>c</code>,
979 : * starting at offset <TT>start</TT>, using bitwise comparison.
980 : * @param c The code unit to search for.
981 : * @param start The offset at which searching will start.
982 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
983 : * @stable ICU 2.0
984 : */
985 : inline int32_t indexOf(UChar c,
986 : int32_t start) const;
987 :
988 : /**
989 : * Locate in this the first occurrence of the code point <TT>c</TT>
990 : * starting at offset <TT>start</TT>, using bitwise comparison.
991 : *
992 : * @param c The code point to search for.
993 : * @param start The offset at which searching will start.
994 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
995 : * @stable ICU 2.0
996 : */
997 : inline int32_t indexOf(UChar32 c,
998 : int32_t start) const;
999 :
1000 : /**
1001 : * Locate in this the first occurrence of the BMP code point <code>c</code>
1002 : * in the range [<TT>start</TT>, <TT>start + length</TT>),
1003 : * using bitwise comparison.
1004 : * @param c The code unit to search for.
1005 : * @param start the offset into this at which to start matching
1006 : * @param length the number of characters in this to search
1007 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
1008 : * @stable ICU 2.0
1009 : */
1010 : inline int32_t indexOf(UChar c,
1011 : int32_t start,
1012 : int32_t length) const;
1013 :
1014 : /**
1015 : * Locate in this the first occurrence of the code point <TT>c</TT>
1016 : * in the range [<TT>start</TT>, <TT>start + length</TT>),
1017 : * using bitwise comparison.
1018 : *
1019 : * @param c The code point to search for.
1020 : * @param start the offset into this at which to start matching
1021 : * @param length the number of characters in this to search
1022 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
1023 : * @stable ICU 2.0
1024 : */
1025 : inline int32_t indexOf(UChar32 c,
1026 : int32_t start,
1027 : int32_t length) const;
1028 :
1029 : /**
1030 : * Locate in this the last occurrence of the characters in <TT>text</TT>,
1031 : * using bitwise comparison.
1032 : * @param text The text to search for.
1033 : * @return The offset into this of the start of <TT>text</TT>,
1034 : * or -1 if not found.
1035 : * @stable ICU 2.0
1036 : */
1037 : inline int32_t lastIndexOf(const UnicodeString& text) const;
1038 :
1039 : /**
1040 : * Locate in this the last occurrence of the characters in <TT>text</TT>
1041 : * starting at offset <TT>start</TT>, using bitwise comparison.
1042 : * @param text The text to search for.
1043 : * @param start The offset at which searching will start.
1044 : * @return The offset into this of the start of <TT>text</TT>,
1045 : * or -1 if not found.
1046 : * @stable ICU 2.0
1047 : */
1048 : inline int32_t lastIndexOf(const UnicodeString& text,
1049 : int32_t start) const;
1050 :
1051 : /**
1052 : * Locate in this the last occurrence in the range
1053 : * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1054 : * in <TT>text</TT>, using bitwise comparison.
1055 : * @param text The text to search for.
1056 : * @param start The offset at which searching will start.
1057 : * @param length The number of characters to search
1058 : * @return The offset into this of the start of <TT>text</TT>,
1059 : * or -1 if not found.
1060 : * @stable ICU 2.0
1061 : */
1062 : inline int32_t lastIndexOf(const UnicodeString& text,
1063 : int32_t start,
1064 : int32_t length) const;
1065 :
1066 : /**
1067 : * Locate in this the last occurrence in the range
1068 : * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1069 : * in <TT>srcText</TT> in the range
1070 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1071 : * using bitwise comparison.
1072 : * @param srcText The text to search for.
1073 : * @param srcStart the offset into <TT>srcText</TT> at which
1074 : * to start matching
1075 : * @param srcLength the number of characters in <TT>srcText</TT> to match
1076 : * @param start the offset into this at which to start matching
1077 : * @param length the number of characters in this to search
1078 : * @return The offset into this of the start of <TT>text</TT>,
1079 : * or -1 if not found.
1080 : * @stable ICU 2.0
1081 : */
1082 : inline int32_t lastIndexOf(const UnicodeString& srcText,
1083 : int32_t srcStart,
1084 : int32_t srcLength,
1085 : int32_t start,
1086 : int32_t length) const;
1087 :
1088 : /**
1089 : * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
1090 : * starting at offset <TT>start</TT>, using bitwise comparison.
1091 : * @param srcChars The text to search for.
1092 : * @param srcLength the number of characters in <TT>srcChars</TT> to match
1093 : * @param start the offset into this at which to start matching
1094 : * @return The offset into this of the start of <TT>text</TT>,
1095 : * or -1 if not found.
1096 : * @stable ICU 2.0
1097 : */
1098 : inline int32_t lastIndexOf(const UChar *srcChars,
1099 : int32_t srcLength,
1100 : int32_t start) const;
1101 :
1102 : /**
1103 : * Locate in this the last occurrence in the range
1104 : * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1105 : * in <TT>srcChars</TT>, using bitwise comparison.
1106 : * @param srcChars The text to search for.
1107 : * @param srcLength the number of characters in <TT>srcChars</TT>
1108 : * @param start The offset at which searching will start.
1109 : * @param length The number of characters to search
1110 : * @return The offset into this of the start of <TT>srcChars</TT>,
1111 : * or -1 if not found.
1112 : * @stable ICU 2.0
1113 : */
1114 : inline int32_t lastIndexOf(const UChar *srcChars,
1115 : int32_t srcLength,
1116 : int32_t start,
1117 : int32_t length) const;
1118 :
1119 : /**
1120 : * Locate in this the last occurrence in the range
1121 : * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1122 : * in <TT>srcChars</TT> in the range
1123 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1124 : * using bitwise comparison.
1125 : * @param srcChars The text to search for.
1126 : * @param srcStart the offset into <TT>srcChars</TT> at which
1127 : * to start matching
1128 : * @param srcLength the number of characters in <TT>srcChars</TT> to match
1129 : * @param start the offset into this at which to start matching
1130 : * @param length the number of characters in this to search
1131 : * @return The offset into this of the start of <TT>text</TT>,
1132 : * or -1 if not found.
1133 : * @stable ICU 2.0
1134 : */
1135 : int32_t lastIndexOf(const UChar *srcChars,
1136 : int32_t srcStart,
1137 : int32_t srcLength,
1138 : int32_t start,
1139 : int32_t length) const;
1140 :
1141 : /**
1142 : * Locate in this the last occurrence of the BMP code point <code>c</code>,
1143 : * using bitwise comparison.
1144 : * @param c The code unit to search for.
1145 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
1146 : * @stable ICU 2.0
1147 : */
1148 : inline int32_t lastIndexOf(UChar c) const;
1149 :
1150 : /**
1151 : * Locate in this the last occurrence of the code point <TT>c</TT>,
1152 : * using bitwise comparison.
1153 : *
1154 : * @param c The code point to search for.
1155 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
1156 : * @stable ICU 2.0
1157 : */
1158 : inline int32_t lastIndexOf(UChar32 c) const;
1159 :
1160 : /**
1161 : * Locate in this the last occurrence of the BMP code point <code>c</code>
1162 : * starting at offset <TT>start</TT>, using bitwise comparison.
1163 : * @param c The code unit to search for.
1164 : * @param start The offset at which searching will start.
1165 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
1166 : * @stable ICU 2.0
1167 : */
1168 : inline int32_t lastIndexOf(UChar c,
1169 : int32_t start) const;
1170 :
1171 : /**
1172 : * Locate in this the last occurrence of the code point <TT>c</TT>
1173 : * starting at offset <TT>start</TT>, using bitwise comparison.
1174 : *
1175 : * @param c The code point to search for.
1176 : * @param start The offset at which searching will start.
1177 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
1178 : * @stable ICU 2.0
1179 : */
1180 : inline int32_t lastIndexOf(UChar32 c,
1181 : int32_t start) const;
1182 :
1183 : /**
1184 : * Locate in this the last occurrence of the BMP code point <code>c</code>
1185 : * in the range [<TT>start</TT>, <TT>start + length</TT>),
1186 : * using bitwise comparison.
1187 : * @param c The code unit to search for.
1188 : * @param start the offset into this at which to start matching
1189 : * @param length the number of characters in this to search
1190 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
1191 : * @stable ICU 2.0
1192 : */
1193 : inline int32_t lastIndexOf(UChar c,
1194 : int32_t start,
1195 : int32_t length) const;
1196 :
1197 : /**
1198 : * Locate in this the last occurrence of the code point <TT>c</TT>
1199 : * in the range [<TT>start</TT>, <TT>start + length</TT>),
1200 : * using bitwise comparison.
1201 : *
1202 : * @param c The code point to search for.
1203 : * @param start the offset into this at which to start matching
1204 : * @param length the number of characters in this to search
1205 : * @return The offset into this of <TT>c</TT>, or -1 if not found.
1206 : * @stable ICU 2.0
1207 : */
1208 : inline int32_t lastIndexOf(UChar32 c,
1209 : int32_t start,
1210 : int32_t length) const;
1211 :
1212 :
1213 : /* Character access */
1214 :
1215 : /**
1216 : * Return the code unit at offset <tt>offset</tt>.
1217 : * If the offset is not valid (0..length()-1) then U+ffff is returned.
1218 : * @param offset a valid offset into the text
1219 : * @return the code unit at offset <tt>offset</tt>
1220 : * or 0xffff if the offset is not valid for this string
1221 : * @stable ICU 2.0
1222 : */
1223 : inline UChar charAt(int32_t offset) const;
1224 :
1225 : /**
1226 : * Return the code unit at offset <tt>offset</tt>.
1227 : * If the offset is not valid (0..length()-1) then U+ffff is returned.
1228 : * @param offset a valid offset into the text
1229 : * @return the code unit at offset <tt>offset</tt>
1230 : * @stable ICU 2.0
1231 : */
1232 : inline UChar operator[] (int32_t offset) const;
1233 :
1234 : /**
1235 : * Return the code point that contains the code unit
1236 : * at offset <tt>offset</tt>.
1237 : * If the offset is not valid (0..length()-1) then U+ffff is returned.
1238 : * @param offset a valid offset into the text
1239 : * that indicates the text offset of any of the code units
1240 : * that will be assembled into a code point (21-bit value) and returned
1241 : * @return the code point of text at <tt>offset</tt>
1242 : * or 0xffff if the offset is not valid for this string
1243 : * @stable ICU 2.0
1244 : */
1245 : inline UChar32 char32At(int32_t offset) const;
1246 :
1247 : /**
1248 : * Adjust a random-access offset so that
1249 : * it points to the beginning of a Unicode character.
1250 : * The offset that is passed in points to
1251 : * any code unit of a code point,
1252 : * while the returned offset will point to the first code unit
1253 : * of the same code point.
1254 : * In UTF-16, if the input offset points to a second surrogate
1255 : * of a surrogate pair, then the returned offset will point
1256 : * to the first surrogate.
1257 : * @param offset a valid offset into one code point of the text
1258 : * @return offset of the first code unit of the same code point
1259 : * @see U16_SET_CP_START
1260 : * @stable ICU 2.0
1261 : */
1262 : inline int32_t getChar32Start(int32_t offset) const;
1263 :
1264 : /**
1265 : * Adjust a random-access offset so that
1266 : * it points behind a Unicode character.
1267 : * The offset that is passed in points behind
1268 : * any code unit of a code point,
1269 : * while the returned offset will point behind the last code unit
1270 : * of the same code point.
1271 : * In UTF-16, if the input offset points behind the first surrogate
1272 : * (i.e., to the second surrogate)
1273 : * of a surrogate pair, then the returned offset will point
1274 : * behind the second surrogate (i.e., to the first surrogate).
1275 : * @param offset a valid offset after any code unit of a code point of the text
1276 : * @return offset of the first code unit after the same code point
1277 : * @see U16_SET_CP_LIMIT
1278 : * @stable ICU 2.0
1279 : */
1280 : inline int32_t getChar32Limit(int32_t offset) const;
1281 :
1282 : /**
1283 : * Move the code unit index along the string by delta code points.
1284 : * Interpret the input index as a code unit-based offset into the string,
1285 : * move the index forward or backward by delta code points, and
1286 : * return the resulting index.
1287 : * The input index should point to the first code unit of a code point,
1288 : * if there is more than one.
1289 : *
1290 : * Both input and output indexes are code unit-based as for all
1291 : * string indexes/offsets in ICU (and other libraries, like MBCS char*).
1292 : * If delta<0 then the index is moved backward (toward the start of the string).
1293 : * If delta>0 then the index is moved forward (toward the end of the string).
1294 : *
1295 : * This behaves like CharacterIterator::move32(delta, kCurrent).
1296 : *
1297 : * Behavior for out-of-bounds indexes:
1298 : * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
1299 : * if the input index<0 then it is pinned to 0;
1300 : * if it is index>length() then it is pinned to length().
1301 : * Afterwards, the index is moved by <code>delta</code> code points
1302 : * forward or backward,
1303 : * but no further backward than to 0 and no further forward than to length().
1304 : * The resulting index return value will be in between 0 and length(), inclusively.
1305 : *
1306 : * Examples:
1307 : * <pre>
1308 : * // s has code points 'a' U+10000 'b' U+10ffff U+2029
1309 : * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
1310 : *
1311 : * // initial index: position of U+10000
1312 : * int32_t index=1;
1313 : *
1314 : * // the following examples will all result in index==4, position of U+10ffff
1315 : *
1316 : * // skip 2 code points from some position in the string
1317 : * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1318 : *
1319 : * // go to the 3rd code point from the start of s (0-based)
1320 : * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1321 : *
1322 : * // go to the next-to-last code point of s
1323 : * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1324 : * </pre>
1325 : *
1326 : * @param index input code unit index
1327 : * @param delta (signed) code point count to move the index forward or backward
1328 : * in the string
1329 : * @return the resulting code unit index
1330 : * @stable ICU 2.0
1331 : */
1332 : int32_t moveIndex32(int32_t index, int32_t delta) const;
1333 :
1334 : /* Substring extraction */
1335 :
1336 : /**
1337 : * Copy the characters in the range
1338 : * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
1339 : * beginning at <tt>dstStart</tt>.
1340 : * If the string aliases to <code>dst</code> itself as an external buffer,
1341 : * then extract() will not copy the contents.
1342 : *
1343 : * @param start offset of first character which will be copied into the array
1344 : * @param length the number of characters to extract
1345 : * @param dst array in which to copy characters. The length of <tt>dst</tt>
1346 : * must be at least (<tt>dstStart + length</tt>).
1347 : * @param dstStart the offset in <TT>dst</TT> where the first character
1348 : * will be extracted
1349 : * @stable ICU 2.0
1350 : */
1351 : inline void extract(int32_t start,
1352 : int32_t length,
1353 : UChar *dst,
1354 : int32_t dstStart = 0) const;
1355 :
1356 : /**
1357 : * Copy the contents of the string into dest.
1358 : * This is a convenience function that
1359 : * checks if there is enough space in dest,
1360 : * extracts the entire string if possible,
1361 : * and NUL-terminates dest if possible.
1362 : *
1363 : * If the string fits into dest but cannot be NUL-terminated
1364 : * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1365 : * If the string itself does not fit into dest
1366 : * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1367 : *
1368 : * If the string aliases to <code>dest</code> itself as an external buffer,
1369 : * then extract() will not copy the contents.
1370 : *
1371 : * @param dest Destination string buffer.
1372 : * @param destCapacity Number of UChars available at dest.
1373 : * @param errorCode ICU error code.
1374 : * @return length()
1375 : * @stable ICU 2.0
1376 : */
1377 : int32_t
1378 : extract(UChar *dest, int32_t destCapacity,
1379 : UErrorCode &errorCode) const;
1380 :
1381 : /**
1382 : * Copy the characters in the range
1383 : * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString
1384 : * <tt>target</tt>.
1385 : * @param start offset of first character which will be copied
1386 : * @param length the number of characters to extract
1387 : * @param target UnicodeString into which to copy characters.
1388 : * @return A reference to <TT>target</TT>
1389 : * @stable ICU 2.0
1390 : */
1391 : inline void extract(int32_t start,
1392 : int32_t length,
1393 : UnicodeString& target) const;
1394 :
1395 : /**
1396 : * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1397 : * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
1398 : * @param start offset of first character which will be copied into the array
1399 : * @param limit offset immediately following the last character to be copied
1400 : * @param dst array in which to copy characters. The length of <tt>dst</tt>
1401 : * must be at least (<tt>dstStart + (limit - start)</tt>).
1402 : * @param dstStart the offset in <TT>dst</TT> where the first character
1403 : * will be extracted
1404 : * @stable ICU 2.0
1405 : */
1406 : inline void extractBetween(int32_t start,
1407 : int32_t limit,
1408 : UChar *dst,
1409 : int32_t dstStart = 0) const;
1410 :
1411 : /**
1412 : * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1413 : * into the UnicodeString <tt>target</tt>. Replaceable API.
1414 : * @param start offset of first character which will be copied
1415 : * @param limit offset immediately following the last character to be copied
1416 : * @param target UnicodeString into which to copy characters.
1417 : * @return A reference to <TT>target</TT>
1418 : * @stable ICU 2.0
1419 : */
1420 : virtual void extractBetween(int32_t start,
1421 : int32_t limit,
1422 : UnicodeString& target) const;
1423 :
1424 : /**
1425 : * Copy the characters in the range
1426 : * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
1427 : * All characters must be invariant (see utypes.h).
1428 : * Use US_INV as the last, signature-distinguishing parameter.
1429 : *
1430 : * This function does not write any more than <code>targetLength</code>
1431 : * characters but returns the length of the entire output string
1432 : * so that one can allocate a larger buffer and call the function again
1433 : * if necessary.
1434 : * The output string is NUL-terminated if possible.
1435 : *
1436 : * @param start offset of first character which will be copied
1437 : * @param startLength the number of characters to extract
1438 : * @param target the target buffer for extraction, can be NULL
1439 : * if targetLength is 0
1440 : * @param targetCapacity the length of the target buffer
1441 : * @param inv Signature-distinguishing paramater, use US_INV.
1442 : * @return the output string length, not including the terminating NUL
1443 : * @stable ICU 3.2
1444 : */
1445 : int32_t extract(int32_t start,
1446 : int32_t startLength,
1447 : char *target,
1448 : int32_t targetCapacity,
1449 : enum EInvariant inv) const;
1450 :
1451 : #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1452 :
1453 : /**
1454 : * Copy the characters in the range
1455 : * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1456 : * in the platform's default codepage.
1457 : * This function does not write any more than <code>targetLength</code>
1458 : * characters but returns the length of the entire output string
1459 : * so that one can allocate a larger buffer and call the function again
1460 : * if necessary.
1461 : * The output string is NUL-terminated if possible.
1462 : *
1463 : * @param start offset of first character which will be copied
1464 : * @param startLength the number of characters to extract
1465 : * @param target the target buffer for extraction
1466 : * @param targetLength the length of the target buffer
1467 : * If <TT>target</TT> is NULL, then the number of bytes required for
1468 : * <TT>target</TT> is returned.
1469 : * @return the output string length, not including the terminating NUL
1470 : * @stable ICU 2.0
1471 : */
1472 : int32_t extract(int32_t start,
1473 : int32_t startLength,
1474 : char *target,
1475 : uint32_t targetLength) const;
1476 :
1477 : #endif
1478 :
1479 : #if !UCONFIG_NO_CONVERSION
1480 :
1481 : /**
1482 : * Copy the characters in the range
1483 : * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1484 : * in a specified codepage.
1485 : * The output string is NUL-terminated.
1486 : *
1487 : * Recommendation: For invariant-character strings use
1488 : * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1489 : * because it avoids object code dependencies of UnicodeString on
1490 : * the conversion code.
1491 : *
1492 : * @param start offset of first character which will be copied
1493 : * @param startLength the number of characters to extract
1494 : * @param target the target buffer for extraction
1495 : * @param codepage the desired codepage for the characters. 0 has
1496 : * the special meaning of the default codepage
1497 : * If <code>codepage</code> is an empty string (<code>""</code>),
1498 : * then a simple conversion is performed on the codepage-invariant
1499 : * subset ("invariant characters") of the platform encoding. See utypes.h.
1500 : * If <TT>target</TT> is NULL, then the number of bytes required for
1501 : * <TT>target</TT> is returned. It is assumed that the target is big enough
1502 : * to fit all of the characters.
1503 : * @return the output string length, not including the terminating NUL
1504 : * @stable ICU 2.0
1505 : */
1506 : inline int32_t extract(int32_t start,
1507 : int32_t startLength,
1508 : char *target,
1509 : const char *codepage = 0) const;
1510 :
1511 : /**
1512 : * Copy the characters in the range
1513 : * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1514 : * in a specified codepage.
1515 : * This function does not write any more than <code>targetLength</code>
1516 : * characters but returns the length of the entire output string
1517 : * so that one can allocate a larger buffer and call the function again
1518 : * if necessary.
1519 : * The output string is NUL-terminated if possible.
1520 : *
1521 : * Recommendation: For invariant-character strings use
1522 : * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1523 : * because it avoids object code dependencies of UnicodeString on
1524 : * the conversion code.
1525 : *
1526 : * @param start offset of first character which will be copied
1527 : * @param startLength the number of characters to extract
1528 : * @param target the target buffer for extraction
1529 : * @param targetLength the length of the target buffer
1530 : * @param codepage the desired codepage for the characters. 0 has
1531 : * the special meaning of the default codepage
1532 : * If <code>codepage</code> is an empty string (<code>""</code>),
1533 : * then a simple conversion is performed on the codepage-invariant
1534 : * subset ("invariant characters") of the platform encoding. See utypes.h.
1535 : * If <TT>target</TT> is NULL, then the number of bytes required for
1536 : * <TT>target</TT> is returned.
1537 : * @return the output string length, not including the terminating NUL
1538 : * @stable ICU 2.0
1539 : */
1540 : int32_t extract(int32_t start,
1541 : int32_t startLength,
1542 : char *target,
1543 : uint32_t targetLength,
1544 : const char *codepage) const;
1545 :
1546 : /**
1547 : * Convert the UnicodeString into a codepage string using an existing UConverter.
1548 : * The output string is NUL-terminated if possible.
1549 : *
1550 : * This function avoids the overhead of opening and closing a converter if
1551 : * multiple strings are extracted.
1552 : *
1553 : * @param dest destination string buffer, can be NULL if destCapacity==0
1554 : * @param destCapacity the number of chars available at dest
1555 : * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1556 : * or NULL for the default converter
1557 : * @param errorCode normal ICU error code
1558 : * @return the length of the output string, not counting the terminating NUL;
1559 : * if the length is greater than destCapacity, then the string will not fit
1560 : * and a buffer of the indicated length would need to be passed in
1561 : * @stable ICU 2.0
1562 : */
1563 : int32_t extract(char *dest, int32_t destCapacity,
1564 : UConverter *cnv,
1565 : UErrorCode &errorCode) const;
1566 :
1567 : #endif
1568 :
1569 : /**
1570 : * Convert the UnicodeString to UTF-8 and write the result
1571 : * to a ByteSink. This is called by toUTF8String().
1572 : * Unpaired surrogates are replaced with U+FFFD.
1573 : * Calls u_strToUTF8WithSub().
1574 : *
1575 : * @param sink A ByteSink to which the UTF-8 version of the string is written.
1576 : * @draft ICU 4.2
1577 : * @see toUTF8String
1578 : */
1579 : void toUTF8(ByteSink &sink) const;
1580 :
1581 : #if U_HAVE_STD_STRING
1582 :
1583 : /**
1584 : * Convert the UnicodeString to UTF-8 and append the result
1585 : * to a standard string.
1586 : * Unpaired surrogates are replaced with U+FFFD.
1587 : * Calls toUTF8().
1588 : *
1589 : * @param A standard string (or a compatible object)
1590 : * to which the UTF-8 version of the string is appended.
1591 : * @return The string object.
1592 : * @draft ICU 4.2
1593 : * @see toUTF8
1594 : */
1595 : template<typename StringClass>
1596 : StringClass &toUTF8String(StringClass &result) const {
1597 : StringByteSink<StringClass> sbs(&result);
1598 : toUTF8(sbs);
1599 : return result;
1600 : }
1601 :
1602 : #endif
1603 :
1604 : /**
1605 : * Convert the UnicodeString to UTF-32.
1606 : * Unpaired surrogates are replaced with U+FFFD.
1607 : * Calls u_strToUTF32WithSub().
1608 : *
1609 : * @param utf32 destination string buffer, can be NULL if capacity==0
1610 : * @param capacity the number of UChar32s available at utf32
1611 : * @param errorCode Standard ICU error code. Its input value must
1612 : * pass the U_SUCCESS() test, or else the function returns
1613 : * immediately. Check for U_FAILURE() on output or use with
1614 : * function chaining. (See User Guide for details.)
1615 : * @return The length of the UTF-32 string.
1616 : * @see fromUTF32
1617 : * @draft ICU 4.2
1618 : */
1619 : int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1620 :
1621 : /* Length operations */
1622 :
1623 : /**
1624 : * Return the length of the UnicodeString object.
1625 : * The length is the number of UChar code units are in the UnicodeString.
1626 : * If you want the number of code points, please use countChar32().
1627 : * @return the length of the UnicodeString object
1628 : * @see countChar32
1629 : * @stable ICU 2.0
1630 : */
1631 : inline int32_t length(void) const;
1632 :
1633 : /**
1634 : * Count Unicode code points in the length UChar code units of the string.
1635 : * A code point may occupy either one or two UChar code units.
1636 : * Counting code points involves reading all code units.
1637 : *
1638 : * This functions is basically the inverse of moveIndex32().
1639 : *
1640 : * @param start the index of the first code unit to check
1641 : * @param length the number of UChar code units to check
1642 : * @return the number of code points in the specified code units
1643 : * @see length
1644 : * @stable ICU 2.0
1645 : */
1646 : int32_t
1647 : countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1648 :
1649 : /**
1650 : * Check if the length UChar code units of the string
1651 : * contain more Unicode code points than a certain number.
1652 : * This is more efficient than counting all code points in this part of the string
1653 : * and comparing that number with a threshold.
1654 : * This function may not need to scan the string at all if the length
1655 : * falls within a certain range, and
1656 : * never needs to count more than 'number+1' code points.
1657 : * Logically equivalent to (countChar32(start, length)>number).
1658 : * A Unicode code point may occupy either one or two UChar code units.
1659 : *
1660 : * @param start the index of the first code unit to check (0 for the entire string)
1661 : * @param length the number of UChar code units to check
1662 : * (use INT32_MAX for the entire string; remember that start/length
1663 : * values are pinned)
1664 : * @param number The number of code points in the (sub)string is compared against
1665 : * the 'number' parameter.
1666 : * @return Boolean value for whether the string contains more Unicode code points
1667 : * than 'number'. Same as (u_countChar32(s, length)>number).
1668 : * @see countChar32
1669 : * @see u_strHasMoreChar32Than
1670 : * @stable ICU 2.4
1671 : */
1672 : UBool
1673 : hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1674 :
1675 : /**
1676 : * Determine if this string is empty.
1677 : * @return TRUE if this string contains 0 characters, FALSE otherwise.
1678 : * @stable ICU 2.0
1679 : */
1680 : inline UBool isEmpty(void) const;
1681 :
1682 : /**
1683 : * Return the capacity of the internal buffer of the UnicodeString object.
1684 : * This is useful together with the getBuffer functions.
1685 : * See there for details.
1686 : *
1687 : * @return the number of UChars available in the internal buffer
1688 : * @see getBuffer
1689 : * @stable ICU 2.0
1690 : */
1691 : inline int32_t getCapacity(void) const;
1692 :
1693 : /* Other operations */
1694 :
1695 : /**
1696 : * Generate a hash code for this object.
1697 : * @return The hash code of this UnicodeString.
1698 : * @stable ICU 2.0
1699 : */
1700 : inline int32_t hashCode(void) const;
1701 :
1702 : /**
1703 : * Determine if this object contains a valid string.
1704 : * A bogus string has no value. It is different from an empty string.
1705 : * It can be used to indicate that no string value is available.
1706 : * getBuffer() and getTerminatedBuffer() return NULL, and
1707 : * length() returns 0.
1708 : *
1709 : * @return TRUE if the string is valid, FALSE otherwise
1710 : * @see setToBogus()
1711 : * @stable ICU 2.0
1712 : */
1713 : inline UBool isBogus(void) const;
1714 :
1715 :
1716 : //========================================
1717 : // Write operations
1718 : //========================================
1719 :
1720 : /* Assignment operations */
1721 :
1722 : /**
1723 : * Assignment operator. Replace the characters in this UnicodeString
1724 : * with the characters from <TT>srcText</TT>.
1725 : * @param srcText The text containing the characters to replace
1726 : * @return a reference to this
1727 : * @stable ICU 2.0
1728 : */
1729 : UnicodeString &operator=(const UnicodeString &srcText);
1730 :
1731 : /**
1732 : * Almost the same as the assignment operator.
1733 : * Replace the characters in this UnicodeString
1734 : * with the characters from <code>srcText</code>.
1735 : *
1736 : * This function works the same for all strings except for ones that
1737 : * are readonly aliases.
1738 : * Starting with ICU 2.4, the assignment operator and the copy constructor
1739 : * allocate a new buffer and copy the buffer contents even for readonly aliases.
1740 : * This function implements the old, more efficient but less safe behavior
1741 : * of making this string also a readonly alias to the same buffer.
1742 : * The fastCopyFrom function must be used only if it is known that the lifetime of
1743 : * this UnicodeString is at least as long as the lifetime of the aliased buffer
1744 : * including its contents, for example for strings from resource bundles
1745 : * or aliases to string contents.
1746 : *
1747 : * @param src The text containing the characters to replace.
1748 : * @return a reference to this
1749 : * @stable ICU 2.4
1750 : */
1751 : UnicodeString &fastCopyFrom(const UnicodeString &src);
1752 :
1753 : /**
1754 : * Assignment operator. Replace the characters in this UnicodeString
1755 : * with the code unit <TT>ch</TT>.
1756 : * @param ch the code unit to replace
1757 : * @return a reference to this
1758 : * @stable ICU 2.0
1759 : */
1760 : inline UnicodeString& operator= (UChar ch);
1761 :
1762 : /**
1763 : * Assignment operator. Replace the characters in this UnicodeString
1764 : * with the code point <TT>ch</TT>.
1765 : * @param ch the code point to replace
1766 : * @return a reference to this
1767 : * @stable ICU 2.0
1768 : */
1769 : inline UnicodeString& operator= (UChar32 ch);
1770 :
1771 : /**
1772 : * Set the text in the UnicodeString object to the characters
1773 : * in <TT>srcText</TT> in the range
1774 : * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
1775 : * <TT>srcText</TT> is not modified.
1776 : * @param srcText the source for the new characters
1777 : * @param srcStart the offset into <TT>srcText</TT> where new characters
1778 : * will be obtained
1779 : * @return a reference to this
1780 : * @stable ICU 2.2
1781 : */
1782 : inline UnicodeString& setTo(const UnicodeString& srcText,
1783 : int32_t srcStart);
1784 :
1785 : /**
1786 : * Set the text in the UnicodeString object to the characters
1787 : * in <TT>srcText</TT> in the range
1788 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1789 : * <TT>srcText</TT> is not modified.
1790 : * @param srcText the source for the new characters
1791 : * @param srcStart the offset into <TT>srcText</TT> where new characters
1792 : * will be obtained
1793 : * @param srcLength the number of characters in <TT>srcText</TT> in the
1794 : * replace string.
1795 : * @return a reference to this
1796 : * @stable ICU 2.0
1797 : */
1798 : inline UnicodeString& setTo(const UnicodeString& srcText,
1799 : int32_t srcStart,
1800 : int32_t srcLength);
1801 :
1802 : /**
1803 : * Set the text in the UnicodeString object to the characters in
1804 : * <TT>srcText</TT>.
1805 : * <TT>srcText</TT> is not modified.
1806 : * @param srcText the source for the new characters
1807 : * @return a reference to this
1808 : * @stable ICU 2.0
1809 : */
1810 : inline UnicodeString& setTo(const UnicodeString& srcText);
1811 :
1812 : /**
1813 : * Set the characters in the UnicodeString object to the characters
1814 : * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
1815 : * @param srcChars the source for the new characters
1816 : * @param srcLength the number of Unicode characters in srcChars.
1817 : * @return a reference to this
1818 : * @stable ICU 2.0
1819 : */
1820 : inline UnicodeString& setTo(const UChar *srcChars,
1821 : int32_t srcLength);
1822 :
1823 : /**
1824 : * Set the characters in the UnicodeString object to the code unit
1825 : * <TT>srcChar</TT>.
1826 : * @param srcChar the code unit which becomes the UnicodeString's character
1827 : * content
1828 : * @return a reference to this
1829 : * @stable ICU 2.0
1830 : */
1831 : UnicodeString& setTo(UChar srcChar);
1832 :
1833 : /**
1834 : * Set the characters in the UnicodeString object to the code point
1835 : * <TT>srcChar</TT>.
1836 : * @param srcChar the code point which becomes the UnicodeString's character
1837 : * content
1838 : * @return a reference to this
1839 : * @stable ICU 2.0
1840 : */
1841 : UnicodeString& setTo(UChar32 srcChar);
1842 :
1843 : /**
1844 : * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
1845 : * The text will be used for the UnicodeString object, but
1846 : * it will not be released when the UnicodeString is destroyed.
1847 : * This has copy-on-write semantics:
1848 : * When the string is modified, then the buffer is first copied into
1849 : * newly allocated memory.
1850 : * The aliased buffer is never modified.
1851 : * In an assignment to another UnicodeString, the text will be aliased again,
1852 : * so that both strings then alias the same readonly-text.
1853 : *
1854 : * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
1855 : * This must be true if <code>textLength==-1</code>.
1856 : * @param text The characters to alias for the UnicodeString.
1857 : * @param textLength The number of Unicode characters in <code>text</code> to alias.
1858 : * If -1, then this constructor will determine the length
1859 : * by calling <code>u_strlen()</code>.
1860 : * @return a reference to this
1861 : * @stable ICU 2.0
1862 : */
1863 : UnicodeString &setTo(UBool isTerminated,
1864 : const UChar *text,
1865 : int32_t textLength);
1866 :
1867 : /**
1868 : * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
1869 : * The text will be used for the UnicodeString object, but
1870 : * it will not be released when the UnicodeString is destroyed.
1871 : * This has write-through semantics:
1872 : * For as long as the capacity of the buffer is sufficient, write operations
1873 : * will directly affect the buffer. When more capacity is necessary, then
1874 : * a new buffer will be allocated and the contents copied as with regularly
1875 : * constructed strings.
1876 : * In an assignment to another UnicodeString, the buffer will be copied.
1877 : * The extract(UChar *dst) function detects whether the dst pointer is the same
1878 : * as the string buffer itself and will in this case not copy the contents.
1879 : *
1880 : * @param buffer The characters to alias for the UnicodeString.
1881 : * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
1882 : * @param buffCapacity The size of <code>buffer</code> in UChars.
1883 : * @return a reference to this
1884 : * @stable ICU 2.0
1885 : */
1886 : UnicodeString &setTo(UChar *buffer,
1887 : int32_t buffLength,
1888 : int32_t buffCapacity);
1889 :
1890 : /**
1891 : * Make this UnicodeString object invalid.
1892 : * The string will test TRUE with isBogus().
1893 : *
1894 : * A bogus string has no value. It is different from an empty string.
1895 : * It can be used to indicate that no string value is available.
1896 : * getBuffer() and getTerminatedBuffer() return NULL, and
1897 : * length() returns 0.
1898 : *
1899 : * This utility function is used throughout the UnicodeString
1900 : * implementation to indicate that a UnicodeString operation failed,
1901 : * and may be used in other functions,
1902 : * especially but not exclusively when such functions do not
1903 : * take a UErrorCode for simplicity.
1904 : *
1905 : * The following methods, and no others, will clear a string object's bogus flag:
1906 : * - remove()
1907 : * - remove(0, INT32_MAX)
1908 : * - truncate(0)
1909 : * - operator=() (assignment operator)
1910 : * - setTo(...)
1911 : *
1912 : * The simplest ways to turn a bogus string into an empty one
1913 : * is to use the remove() function.
1914 : * Examples for other functions that are equivalent to "set to empty string":
1915 : * \code
1916 : * if(s.isBogus()) {
1917 : * s.remove(); // set to an empty string (remove all), or
1918 : * s.remove(0, INT32_MAX); // set to an empty string (remove all), or
1919 : * s.truncate(0); // set to an empty string (complete truncation), or
1920 : * s=UnicodeString(); // assign an empty string, or
1921 : * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
1922 : * static const UChar nul=0;
1923 : * s.setTo(&nul, 0); // set to an empty C Unicode string
1924 : * }
1925 : * \endcode
1926 : *
1927 : * @see isBogus()
1928 : * @stable ICU 2.0
1929 : */
1930 : void setToBogus();
1931 :
1932 : /**
1933 : * Set the character at the specified offset to the specified character.
1934 : * @param offset A valid offset into the text of the character to set
1935 : * @param ch The new character
1936 : * @return A reference to this
1937 : * @stable ICU 2.0
1938 : */
1939 : UnicodeString& setCharAt(int32_t offset,
1940 : UChar ch);
1941 :
1942 :
1943 : /* Append operations */
1944 :
1945 : /**
1946 : * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
1947 : * object.
1948 : * @param ch the code unit to be appended
1949 : * @return a reference to this
1950 : * @stable ICU 2.0
1951 : */
1952 : inline UnicodeString& operator+= (UChar ch);
1953 :
1954 : /**
1955 : * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
1956 : * object.
1957 : * @param ch the code point to be appended
1958 : * @return a reference to this
1959 : * @stable ICU 2.0
1960 : */
1961 : inline UnicodeString& operator+= (UChar32 ch);
1962 :
1963 : /**
1964 : * Append operator. Append the characters in <TT>srcText</TT> to the
1965 : * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> is
1966 : * not modified.
1967 : * @param srcText the source for the new characters
1968 : * @return a reference to this
1969 : * @stable ICU 2.0
1970 : */
1971 : inline UnicodeString& operator+= (const UnicodeString& srcText);
1972 :
1973 : /**
1974 : * Append the characters
1975 : * in <TT>srcText</TT> in the range
1976 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
1977 : * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
1978 : * is not modified.
1979 : * @param srcText the source for the new characters
1980 : * @param srcStart the offset into <TT>srcText</TT> where new characters
1981 : * will be obtained
1982 : * @param srcLength the number of characters in <TT>srcText</TT> in
1983 : * the append string
1984 : * @return a reference to this
1985 : * @stable ICU 2.0
1986 : */
1987 : inline UnicodeString& append(const UnicodeString& srcText,
1988 : int32_t srcStart,
1989 : int32_t srcLength);
1990 :
1991 : /**
1992 : * Append the characters in <TT>srcText</TT> to the UnicodeString object at
1993 : * offset <TT>start</TT>. <TT>srcText</TT> is not modified.
1994 : * @param srcText the source for the new characters
1995 : * @return a reference to this
1996 : * @stable ICU 2.0
1997 : */
1998 : inline UnicodeString& append(const UnicodeString& srcText);
1999 :
2000 : /**
2001 : * Append the characters in <TT>srcChars</TT> in the range
2002 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
2003 : * object at offset
2004 : * <TT>start</TT>. <TT>srcChars</TT> is not modified.
2005 : * @param srcChars the source for the new characters
2006 : * @param srcStart the offset into <TT>srcChars</TT> where new characters
2007 : * will be obtained
2008 : * @param srcLength the number of characters in <TT>srcChars</TT> in
2009 : * the append string
2010 : * @return a reference to this
2011 : * @stable ICU 2.0
2012 : */
2013 : inline UnicodeString& append(const UChar *srcChars,
2014 : int32_t srcStart,
2015 : int32_t srcLength);
2016 :
2017 : /**
2018 : * Append the characters in <TT>srcChars</TT> to the UnicodeString object
2019 : * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2020 : * @param srcChars the source for the new characters
2021 : * @param srcLength the number of Unicode characters in <TT>srcChars</TT>
2022 : * @return a reference to this
2023 : * @stable ICU 2.0
2024 : */
2025 : inline UnicodeString& append(const UChar *srcChars,
2026 : int32_t srcLength);
2027 :
2028 : /**
2029 : * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
2030 : * @param srcChar the code unit to append
2031 : * @return a reference to this
2032 : * @stable ICU 2.0
2033 : */
2034 : inline UnicodeString& append(UChar srcChar);
2035 :
2036 : /**
2037 : * Append the code point <TT>srcChar</TT> to the UnicodeString object.
2038 : * @param srcChar the code point to append
2039 : * @return a reference to this
2040 : * @stable ICU 2.0
2041 : */
2042 : inline UnicodeString& append(UChar32 srcChar);
2043 :
2044 :
2045 : /* Insert operations */
2046 :
2047 : /**
2048 : * Insert the characters in <TT>srcText</TT> in the range
2049 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2050 : * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2051 : * @param start the offset where the insertion begins
2052 : * @param srcText the source for the new characters
2053 : * @param srcStart the offset into <TT>srcText</TT> where new characters
2054 : * will be obtained
2055 : * @param srcLength the number of characters in <TT>srcText</TT> in
2056 : * the insert string
2057 : * @return a reference to this
2058 : * @stable ICU 2.0
2059 : */
2060 : inline UnicodeString& insert(int32_t start,
2061 : const UnicodeString& srcText,
2062 : int32_t srcStart,
2063 : int32_t srcLength);
2064 :
2065 : /**
2066 : * Insert the characters in <TT>srcText</TT> into the UnicodeString object
2067 : * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2068 : * @param start the offset where the insertion begins
2069 : * @param srcText the source for the new characters
2070 : * @return a reference to this
2071 : * @stable ICU 2.0
2072 : */
2073 : inline UnicodeString& insert(int32_t start,
2074 : const UnicodeString& srcText);
2075 :
2076 : /**
2077 : * Insert the characters in <TT>srcChars</TT> in the range
2078 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2079 : * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2080 : * @param start the offset at which the insertion begins
2081 : * @param srcChars the source for the new characters
2082 : * @param srcStart the offset into <TT>srcChars</TT> where new characters
2083 : * will be obtained
2084 : * @param srcLength the number of characters in <TT>srcChars</TT>
2085 : * in the insert string
2086 : * @return a reference to this
2087 : * @stable ICU 2.0
2088 : */
2089 : inline UnicodeString& insert(int32_t start,
2090 : const UChar *srcChars,
2091 : int32_t srcStart,
2092 : int32_t srcLength);
2093 :
2094 : /**
2095 : * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
2096 : * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2097 : * @param start the offset where the insertion begins
2098 : * @param srcChars the source for the new characters
2099 : * @param srcLength the number of Unicode characters in srcChars.
2100 : * @return a reference to this
2101 : * @stable ICU 2.0
2102 : */
2103 : inline UnicodeString& insert(int32_t start,
2104 : const UChar *srcChars,
2105 : int32_t srcLength);
2106 :
2107 : /**
2108 : * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
2109 : * offset <TT>start</TT>.
2110 : * @param start the offset at which the insertion occurs
2111 : * @param srcChar the code unit to insert
2112 : * @return a reference to this
2113 : * @stable ICU 2.0
2114 : */
2115 : inline UnicodeString& insert(int32_t start,
2116 : UChar srcChar);
2117 :
2118 : /**
2119 : * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
2120 : * offset <TT>start</TT>.
2121 : * @param start the offset at which the insertion occurs
2122 : * @param srcChar the code point to insert
2123 : * @return a reference to this
2124 : * @stable ICU 2.0
2125 : */
2126 : inline UnicodeString& insert(int32_t start,
2127 : UChar32 srcChar);
2128 :
2129 :
2130 : /* Replace operations */
2131 :
2132 : /**
2133 : * Replace the characters in the range
2134 : * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2135 : * <TT>srcText</TT> in the range
2136 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
2137 : * <TT>srcText</TT> is not modified.
2138 : * @param start the offset at which the replace operation begins
2139 : * @param length the number of characters to replace. The character at
2140 : * <TT>start + length</TT> is not modified.
2141 : * @param srcText the source for the new characters
2142 : * @param srcStart the offset into <TT>srcText</TT> where new characters
2143 : * will be obtained
2144 : * @param srcLength the number of characters in <TT>srcText</TT> in
2145 : * the replace string
2146 : * @return a reference to this
2147 : * @stable ICU 2.0
2148 : */
2149 : UnicodeString& replace(int32_t start,
2150 : int32_t length,
2151 : const UnicodeString& srcText,
2152 : int32_t srcStart,
2153 : int32_t srcLength);
2154 :
2155 : /**
2156 : * Replace the characters in the range
2157 : * [<TT>start</TT>, <TT>start + length</TT>)
2158 : * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is
2159 : * not modified.
2160 : * @param start the offset at which the replace operation begins
2161 : * @param length the number of characters to replace. The character at
2162 : * <TT>start + length</TT> is not modified.
2163 : * @param srcText the source for the new characters
2164 : * @return a reference to this
2165 : * @stable ICU 2.0
2166 : */
2167 : UnicodeString& replace(int32_t start,
2168 : int32_t length,
2169 : const UnicodeString& srcText);
2170 :
2171 : /**
2172 : * Replace the characters in the range
2173 : * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2174 : * <TT>srcChars</TT> in the range
2175 : * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
2176 : * is not modified.
2177 : * @param start the offset at which the replace operation begins
2178 : * @param length the number of characters to replace. The character at
2179 : * <TT>start + length</TT> is not modified.
2180 : * @param srcChars the source for the new characters
2181 : * @param srcStart the offset into <TT>srcChars</TT> where new characters
2182 : * will be obtained
2183 : * @param srcLength the number of characters in <TT>srcChars</TT>
2184 : * in the replace string
2185 : * @return a reference to this
2186 : * @stable ICU 2.0
2187 : */
2188 : UnicodeString& replace(int32_t start,
2189 : int32_t length,
2190 : const UChar *srcChars,
2191 : int32_t srcStart,
2192 : int32_t srcLength);
2193 :
2194 : /**
2195 : * Replace the characters in the range
2196 : * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2197 : * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
2198 : * @param start the offset at which the replace operation begins
2199 : * @param length number of characters to replace. The character at
2200 : * <TT>start + length</TT> is not modified.
2201 : * @param srcChars the source for the new characters
2202 : * @param srcLength the number of Unicode characters in srcChars
2203 : * @return a reference to this
2204 : * @stable ICU 2.0
2205 : */
2206 : inline UnicodeString& replace(int32_t start,
2207 : int32_t length,
2208 : const UChar *srcChars,
2209 : int32_t srcLength);
2210 :
2211 : /**
2212 : * Replace the characters in the range
2213 : * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
2214 : * <TT>srcChar</TT>.
2215 : * @param start the offset at which the replace operation begins
2216 : * @param length the number of characters to replace. The character at
2217 : * <TT>start + length</TT> is not modified.
2218 : * @param srcChar the new code unit
2219 : * @return a reference to this
2220 : * @stable ICU 2.0
2221 : */
2222 : inline UnicodeString& replace(int32_t start,
2223 : int32_t length,
2224 : UChar srcChar);
2225 :
2226 : /**
2227 : * Replace the characters in the range
2228 : * [<TT>start</TT>, <TT>start + length</TT>) with the code point
2229 : * <TT>srcChar</TT>.
2230 : * @param start the offset at which the replace operation begins
2231 : * @param length the number of characters to replace. The character at
2232 : * <TT>start + length</TT> is not modified.
2233 : * @param srcChar the new code point
2234 : * @return a reference to this
2235 : * @stable ICU 2.0
2236 : */
2237 : inline UnicodeString& replace(int32_t start,
2238 : int32_t length,
2239 : UChar32 srcChar);
2240 :
2241 : /**
2242 : * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2243 : * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
2244 : * @param start the offset at which the replace operation begins
2245 : * @param limit the offset immediately following the replace range
2246 : * @param srcText the source for the new characters
2247 : * @return a reference to this
2248 : * @stable ICU 2.0
2249 : */
2250 : inline UnicodeString& replaceBetween(int32_t start,
2251 : int32_t limit,
2252 : const UnicodeString& srcText);
2253 :
2254 : /**
2255 : * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2256 : * with the characters in <TT>srcText</TT> in the range
2257 : * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
2258 : * @param start the offset at which the replace operation begins
2259 : * @param limit the offset immediately following the replace range
2260 : * @param srcText the source for the new characters
2261 : * @param srcStart the offset into <TT>srcChars</TT> where new characters
2262 : * will be obtained
2263 : * @param srcLimit the offset immediately following the range to copy
2264 : * in <TT>srcText</TT>
2265 : * @return a reference to this
2266 : * @stable ICU 2.0
2267 : */
2268 : inline UnicodeString& replaceBetween(int32_t start,
2269 : int32_t limit,
2270 : const UnicodeString& srcText,
2271 : int32_t srcStart,
2272 : int32_t srcLimit);
2273 :
2274 : /**
2275 : * Replace a substring of this object with the given text.
2276 : * @param start the beginning index, inclusive; <code>0 <= start
2277 : * <= limit</code>.
2278 : * @param limit the ending index, exclusive; <code>start <= limit
2279 : * <= length()</code>.
2280 : * @param text the text to replace characters <code>start</code>
2281 : * to <code>limit - 1</code>
2282 : * @stable ICU 2.0
2283 : */
2284 : virtual void handleReplaceBetween(int32_t start,
2285 : int32_t limit,
2286 : const UnicodeString& text);
2287 :
2288 : /**
2289 : * Replaceable API
2290 : * @return TRUE if it has MetaData
2291 : * @stable ICU 2.4
2292 : */
2293 : virtual UBool hasMetaData() const;
2294 :
2295 : /**
2296 : * Copy a substring of this object, retaining attribute (out-of-band)
2297 : * information. This method is used to duplicate or reorder substrings.
2298 : * The destination index must not overlap the source range.
2299 : *
2300 : * @param start the beginning index, inclusive; <code>0 <= start <=
2301 : * limit</code>.
2302 : * @param limit the ending index, exclusive; <code>start <= limit <=
2303 : * length()</code>.
2304 : * @param dest the destination index. The characters from
2305 : * <code>start..limit-1</code> will be copied to <code>dest</code>.
2306 : * Implementations of this method may assume that <code>dest <= start ||
2307 : * dest >= limit</code>.
2308 : * @stable ICU 2.0
2309 : */
2310 : virtual void copy(int32_t start, int32_t limit, int32_t dest);
2311 :
2312 : /* Search and replace operations */
2313 :
2314 : /**
2315 : * Replace all occurrences of characters in oldText with the characters
2316 : * in newText
2317 : * @param oldText the text containing the search text
2318 : * @param newText the text containing the replacement text
2319 : * @return a reference to this
2320 : * @stable ICU 2.0
2321 : */
2322 : inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2323 : const UnicodeString& newText);
2324 :
2325 : /**
2326 : * Replace all occurrences of characters in oldText with characters
2327 : * in newText
2328 : * in the range [<TT>start</TT>, <TT>start + length</TT>).
2329 : * @param start the start of the range in which replace will performed
2330 : * @param length the length of the range in which replace will be performed
2331 : * @param oldText the text containing the search text
2332 : * @param newText the text containing the replacement text
2333 : * @return a reference to this
2334 : * @stable ICU 2.0
2335 : */
2336 : inline UnicodeString& findAndReplace(int32_t start,
2337 : int32_t length,
2338 : const UnicodeString& oldText,
2339 : const UnicodeString& newText);
2340 :
2341 : /**
2342 : * Replace all occurrences of characters in oldText in the range
2343 : * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
2344 : * in newText in the range
2345 : * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
2346 : * in the range [<TT>start</TT>, <TT>start + length</TT>).
2347 : * @param start the start of the range in which replace will performed
2348 : * @param length the length of the range in which replace will be performed
2349 : * @param oldText the text containing the search text
2350 : * @param oldStart the start of the search range in <TT>oldText</TT>
2351 : * @param oldLength the length of the search range in <TT>oldText</TT>
2352 : * @param newText the text containing the replacement text
2353 : * @param newStart the start of the replacement range in <TT>newText</TT>
2354 : * @param newLength the length of the replacement range in <TT>newText</TT>
2355 : * @return a reference to this
2356 : * @stable ICU 2.0
2357 : */
2358 : UnicodeString& findAndReplace(int32_t start,
2359 : int32_t length,
2360 : const UnicodeString& oldText,
2361 : int32_t oldStart,
2362 : int32_t oldLength,
2363 : const UnicodeString& newText,
2364 : int32_t newStart,
2365 : int32_t newLength);
2366 :
2367 :
2368 : /* Remove operations */
2369 :
2370 : /**
2371 : * Remove all characters from the UnicodeString object.
2372 : * @return a reference to this
2373 : * @stable ICU 2.0
2374 : */
2375 : inline UnicodeString& remove(void);
2376 :
2377 : /**
2378 : * Remove the characters in the range
2379 : * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
2380 : * @param start the offset of the first character to remove
2381 : * @param length the number of characters to remove
2382 : * @return a reference to this
2383 : * @stable ICU 2.0
2384 : */
2385 : inline UnicodeString& remove(int32_t start,
2386 : int32_t length = (int32_t)INT32_MAX);
2387 :
2388 : /**
2389 : * Remove the characters in the range
2390 : * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
2391 : * @param start the offset of the first character to remove
2392 : * @param limit the offset immediately following the range to remove
2393 : * @return a reference to this
2394 : * @stable ICU 2.0
2395 : */
2396 : inline UnicodeString& removeBetween(int32_t start,
2397 : int32_t limit = (int32_t)INT32_MAX);
2398 :
2399 :
2400 : /* Length operations */
2401 :
2402 : /**
2403 : * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
2404 : * If the length of this UnicodeString is less than targetLength,
2405 : * length() - targetLength copies of padChar will be added to the
2406 : * beginning of this UnicodeString.
2407 : * @param targetLength the desired length of the string
2408 : * @param padChar the character to use for padding. Defaults to
2409 : * space (U+0020)
2410 : * @return TRUE if the text was padded, FALSE otherwise.
2411 : * @stable ICU 2.0
2412 : */
2413 : UBool padLeading(int32_t targetLength,
2414 : UChar padChar = 0x0020);
2415 :
2416 : /**
2417 : * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
2418 : * If the length of this UnicodeString is less than targetLength,
2419 : * length() - targetLength copies of padChar will be added to the
2420 : * end of this UnicodeString.
2421 : * @param targetLength the desired length of the string
2422 : * @param padChar the character to use for padding. Defaults to
2423 : * space (U+0020)
2424 : * @return TRUE if the text was padded, FALSE otherwise.
2425 : * @stable ICU 2.0
2426 : */
2427 : UBool padTrailing(int32_t targetLength,
2428 : UChar padChar = 0x0020);
2429 :
2430 : /**
2431 : * Truncate this UnicodeString to the <TT>targetLength</TT>.
2432 : * @param targetLength the desired length of this UnicodeString.
2433 : * @return TRUE if the text was truncated, FALSE otherwise
2434 : * @stable ICU 2.0
2435 : */
2436 : inline UBool truncate(int32_t targetLength);
2437 :
2438 : /**
2439 : * Trims leading and trailing whitespace from this UnicodeString.
2440 : * @return a reference to this
2441 : * @stable ICU 2.0
2442 : */
2443 : UnicodeString& trim(void);
2444 :
2445 :
2446 : /* Miscellaneous operations */
2447 :
2448 : /**
2449 : * Reverse this UnicodeString in place.
2450 : * @return a reference to this
2451 : * @stable ICU 2.0
2452 : */
2453 : inline UnicodeString& reverse(void);
2454 :
2455 : /**
2456 : * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
2457 : * this UnicodeString.
2458 : * @param start the start of the range to reverse
2459 : * @param length the number of characters to to reverse
2460 : * @return a reference to this
2461 : * @stable ICU 2.0
2462 : */
2463 : inline UnicodeString& reverse(int32_t start,
2464 : int32_t length);
2465 :
2466 : /**
2467 : * Convert the characters in this to UPPER CASE following the conventions of
2468 : * the default locale.
2469 : * @return A reference to this.
2470 : * @stable ICU 2.0
2471 : */
2472 : UnicodeString& toUpper(void);
2473 :
2474 : /**
2475 : * Convert the characters in this to UPPER CASE following the conventions of
2476 : * a specific locale.
2477 : * @param locale The locale containing the conventions to use.
2478 : * @return A reference to this.
2479 : * @stable ICU 2.0
2480 : */
2481 : UnicodeString& toUpper(const Locale& locale);
2482 :
2483 : /**
2484 : * Convert the characters in this to lower case following the conventions of
2485 : * the default locale.
2486 : * @return A reference to this.
2487 : * @stable ICU 2.0
2488 : */
2489 : UnicodeString& toLower(void);
2490 :
2491 : /**
2492 : * Convert the characters in this to lower case following the conventions of
2493 : * a specific locale.
2494 : * @param locale The locale containing the conventions to use.
2495 : * @return A reference to this.
2496 : * @stable ICU 2.0
2497 : */
2498 : UnicodeString& toLower(const Locale& locale);
2499 :
2500 : #if !UCONFIG_NO_BREAK_ITERATION
2501 :
2502 : /**
2503 : * Titlecase this string, convenience function using the default locale.
2504 : *
2505 : * Casing is locale-dependent and context-sensitive.
2506 : * Titlecasing uses a break iterator to find the first characters of words
2507 : * that are to be titlecased. It titlecases those characters and lowercases
2508 : * all others.
2509 : *
2510 : * The titlecase break iterator can be provided to customize for arbitrary
2511 : * styles, using rules and dictionaries beyond the standard iterators.
2512 : * It may be more efficient to always provide an iterator to avoid
2513 : * opening and closing one for each string.
2514 : * The standard titlecase iterator for the root locale implements the
2515 : * algorithm of Unicode TR 21.
2516 : *
2517 : * This function uses only the setText(), first() and next() methods of the
2518 : * provided break iterator.
2519 : *
2520 : * @param titleIter A break iterator to find the first characters of words
2521 : * that are to be titlecased.
2522 : * If none is provided (0), then a standard titlecase
2523 : * break iterator is opened.
2524 : * Otherwise the provided iterator is set to the string's text.
2525 : * @return A reference to this.
2526 : * @stable ICU 2.1
2527 : */
2528 : UnicodeString &toTitle(BreakIterator *titleIter);
2529 :
2530 : /**
2531 : * Titlecase this string.
2532 : *
2533 : * Casing is locale-dependent and context-sensitive.
2534 : * Titlecasing uses a break iterator to find the first characters of words
2535 : * that are to be titlecased. It titlecases those characters and lowercases
2536 : * all others.
2537 : *
2538 : * The titlecase break iterator can be provided to customize for arbitrary
2539 : * styles, using rules and dictionaries beyond the standard iterators.
2540 : * It may be more efficient to always provide an iterator to avoid
2541 : * opening and closing one for each string.
2542 : * The standard titlecase iterator for the root locale implements the
2543 : * algorithm of Unicode TR 21.
2544 : *
2545 : * This function uses only the setText(), first() and next() methods of the
2546 : * provided break iterator.
2547 : *
2548 : * @param titleIter A break iterator to find the first characters of words
2549 : * that are to be titlecased.
2550 : * If none is provided (0), then a standard titlecase
2551 : * break iterator is opened.
2552 : * Otherwise the provided iterator is set to the string's text.
2553 : * @param locale The locale to consider.
2554 : * @return A reference to this.
2555 : * @stable ICU 2.1
2556 : */
2557 : UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2558 :
2559 : /**
2560 : * Titlecase this string, with options.
2561 : *
2562 : * Casing is locale-dependent and context-sensitive.
2563 : * Titlecasing uses a break iterator to find the first characters of words
2564 : * that are to be titlecased. It titlecases those characters and lowercases
2565 : * all others. (This can be modified with options.)
2566 : *
2567 : * The titlecase break iterator can be provided to customize for arbitrary
2568 : * styles, using rules and dictionaries beyond the standard iterators.
2569 : * It may be more efficient to always provide an iterator to avoid
2570 : * opening and closing one for each string.
2571 : * The standard titlecase iterator for the root locale implements the
2572 : * algorithm of Unicode TR 21.
2573 : *
2574 : * This function uses only the setText(), first() and next() methods of the
2575 : * provided break iterator.
2576 : *
2577 : * @param titleIter A break iterator to find the first characters of words
2578 : * that are to be titlecased.
2579 : * If none is provided (0), then a standard titlecase
2580 : * break iterator is opened.
2581 : * Otherwise the provided iterator is set to the string's text.
2582 : * @param locale The locale to consider.
2583 : * @param options Options bit set, see ucasemap_open().
2584 : * @return A reference to this.
2585 : * @see U_TITLECASE_NO_LOWERCASE
2586 : * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
2587 : * @see ucasemap_open
2588 : * @stable ICU 3.8
2589 : */
2590 : UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2591 :
2592 : #endif
2593 :
2594 : /**
2595 : * Case-fold the characters in this string.
2596 : * Case-folding is locale-independent and not context-sensitive,
2597 : * but there is an option for whether to include or exclude mappings for dotted I
2598 : * and dotless i that are marked with 'I' in CaseFolding.txt.
2599 : * The result may be longer or shorter than the original.
2600 : *
2601 : * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2602 : * @return A reference to this.
2603 : * @stable ICU 2.0
2604 : */
2605 : UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2606 :
2607 : //========================================
2608 : // Access to the internal buffer
2609 : //========================================
2610 :
2611 : /**
2612 : * Get a read/write pointer to the internal buffer.
2613 : * The buffer is guaranteed to be large enough for at least minCapacity UChars,
2614 : * writable, and is still owned by the UnicodeString object.
2615 : * Calls to getBuffer(minCapacity) must not be nested, and
2616 : * must be matched with calls to releaseBuffer(newLength).
2617 : * If the string buffer was read-only or shared,
2618 : * then it will be reallocated and copied.
2619 : *
2620 : * An attempted nested call will return 0, and will not further modify the
2621 : * state of the UnicodeString object.
2622 : * It also returns 0 if the string is bogus.
2623 : *
2624 : * The actual capacity of the string buffer may be larger than minCapacity.
2625 : * getCapacity() returns the actual capacity.
2626 : * For many operations, the full capacity should be used to avoid reallocations.
2627 : *
2628 : * While the buffer is "open" between getBuffer(minCapacity)
2629 : * and releaseBuffer(newLength), the following applies:
2630 : * - The string length is set to 0.
2631 : * - Any read API call on the UnicodeString object will behave like on a 0-length string.
2632 : * - Any write API call on the UnicodeString object is disallowed and will have no effect.
2633 : * - You can read from and write to the returned buffer.
2634 : * - The previous string contents will still be in the buffer;
2635 : * if you want to use it, then you need to call length() before getBuffer(minCapacity).
2636 : * If the length() was greater than minCapacity, then any contents after minCapacity
2637 : * may be lost.
2638 : * The buffer contents is not NUL-terminated by getBuffer().
2639 : * If length()<getCapacity() then you can terminate it by writing a NUL
2640 : * at index length().
2641 : * - You must call releaseBuffer(newLength) before and in order to
2642 : * return to normal UnicodeString operation.
2643 : *
2644 : * @param minCapacity the minimum number of UChars that are to be available
2645 : * in the buffer, starting at the returned pointer;
2646 : * default to the current string capacity if minCapacity==-1
2647 : * @return a writable pointer to the internal string buffer,
2648 : * or 0 if an error occurs (nested calls, out of memory)
2649 : *
2650 : * @see releaseBuffer
2651 : * @see getTerminatedBuffer()
2652 : * @stable ICU 2.0
2653 : */
2654 : UChar *getBuffer(int32_t minCapacity);
2655 :
2656 : /**
2657 : * Release a read/write buffer on a UnicodeString object with an
2658 : * "open" getBuffer(minCapacity).
2659 : * This function must be called in a matched pair with getBuffer(minCapacity).
2660 : * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2661 : *
2662 : * It will set the string length to newLength, at most to the current capacity.
2663 : * If newLength==-1 then it will set the length according to the
2664 : * first NUL in the buffer, or to the capacity if there is no NUL.
2665 : *
2666 : * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2667 : *
2668 : * @param newLength the new length of the UnicodeString object;
2669 : * defaults to the current capacity if newLength is greater than that;
2670 : * if newLength==-1, it defaults to u_strlen(buffer) but not more than
2671 : * the current capacity of the string
2672 : *
2673 : * @see getBuffer(int32_t minCapacity)
2674 : * @stable ICU 2.0
2675 : */
2676 : void releaseBuffer(int32_t newLength=-1);
2677 :
2678 : /**
2679 : * Get a read-only pointer to the internal buffer.
2680 : * This can be called at any time on a valid UnicodeString.
2681 : *
2682 : * It returns 0 if the string is bogus, or
2683 : * during an "open" getBuffer(minCapacity).
2684 : *
2685 : * It can be called as many times as desired.
2686 : * The pointer that it returns will remain valid until the UnicodeString object is modified,
2687 : * at which time the pointer is semantically invalidated and must not be used any more.
2688 : *
2689 : * The capacity of the buffer can be determined with getCapacity().
2690 : * The part after length() may or may not be initialized and valid,
2691 : * depending on the history of the UnicodeString object.
2692 : *
2693 : * The buffer contents is (probably) not NUL-terminated.
2694 : * You can check if it is with
2695 : * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
2696 : * (See getTerminatedBuffer().)
2697 : *
2698 : * The buffer may reside in read-only memory. Its contents must not
2699 : * be modified.
2700 : *
2701 : * @return a read-only pointer to the internal string buffer,
2702 : * or 0 if the string is empty or bogus
2703 : *
2704 : * @see getBuffer(int32_t minCapacity)
2705 : * @see getTerminatedBuffer()
2706 : * @stable ICU 2.0
2707 : */
2708 : inline const UChar *getBuffer() const;
2709 :
2710 : /**
2711 : * Get a read-only pointer to the internal buffer,
2712 : * making sure that it is NUL-terminated.
2713 : * This can be called at any time on a valid UnicodeString.
2714 : *
2715 : * It returns 0 if the string is bogus, or
2716 : * during an "open" getBuffer(minCapacity), or if the buffer cannot
2717 : * be NUL-terminated (because memory allocation failed).
2718 : *
2719 : * It can be called as many times as desired.
2720 : * The pointer that it returns will remain valid until the UnicodeString object is modified,
2721 : * at which time the pointer is semantically invalidated and must not be used any more.
2722 : *
2723 : * The capacity of the buffer can be determined with getCapacity().
2724 : * The part after length()+1 may or may not be initialized and valid,
2725 : * depending on the history of the UnicodeString object.
2726 : *
2727 : * The buffer contents is guaranteed to be NUL-terminated.
2728 : * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
2729 : * is written.
2730 : * For this reason, this function is not const, unlike getBuffer().
2731 : * Note that a UnicodeString may also contain NUL characters as part of its contents.
2732 : *
2733 : * The buffer may reside in read-only memory. Its contents must not
2734 : * be modified.
2735 : *
2736 : * @return a read-only pointer to the internal string buffer,
2737 : * or 0 if the string is empty or bogus
2738 : *
2739 : * @see getBuffer(int32_t minCapacity)
2740 : * @see getBuffer()
2741 : * @stable ICU 2.2
2742 : */
2743 : inline const UChar *getTerminatedBuffer();
2744 :
2745 : //========================================
2746 : // Constructors
2747 : //========================================
2748 :
2749 : /** Construct an empty UnicodeString.
2750 : * @stable ICU 2.0
2751 : */
2752 : UnicodeString();
2753 :
2754 : /**
2755 : * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
2756 : * @param capacity the number of UChars this UnicodeString should hold
2757 : * before a resize is necessary; if count is greater than 0 and count
2758 : * code points c take up more space than capacity, then capacity is adjusted
2759 : * accordingly.
2760 : * @param c is used to initially fill the string
2761 : * @param count specifies how many code points c are to be written in the
2762 : * string
2763 : * @stable ICU 2.0
2764 : */
2765 : UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2766 :
2767 : /**
2768 : * Single UChar (code unit) constructor.
2769 : * @param ch the character to place in the UnicodeString
2770 : * @stable ICU 2.0
2771 : */
2772 : UnicodeString(UChar ch);
2773 :
2774 : /**
2775 : * Single UChar32 (code point) constructor.
2776 : * @param ch the character to place in the UnicodeString
2777 : * @stable ICU 2.0
2778 : */
2779 : UnicodeString(UChar32 ch);
2780 :
2781 : /**
2782 : * UChar* constructor.
2783 : * @param text The characters to place in the UnicodeString. <TT>text</TT>
2784 : * must be NULL (U+0000) terminated.
2785 : * @stable ICU 2.0
2786 : */
2787 : UnicodeString(const UChar *text);
2788 :
2789 : /**
2790 : * UChar* constructor.
2791 : * @param text The characters to place in the UnicodeString.
2792 : * @param textLength The number of Unicode characters in <TT>text</TT>
2793 : * to copy.
2794 : * @stable ICU 2.0
2795 : */
2796 : UnicodeString(const UChar *text,
2797 : int32_t textLength);
2798 :
2799 : /**
2800 : * Readonly-aliasing UChar* constructor.
2801 : * The text will be used for the UnicodeString object, but
2802 : * it will not be released when the UnicodeString is destroyed.
2803 : * This has copy-on-write semantics:
2804 : * When the string is modified, then the buffer is first copied into
2805 : * newly allocated memory.
2806 : * The aliased buffer is never modified.
2807 : * In an assignment to another UnicodeString, the text will be aliased again,
2808 : * so that both strings then alias the same readonly-text.
2809 : *
2810 : * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
2811 : * This must be true if <code>textLength==-1</code>.
2812 : * @param text The characters to alias for the UnicodeString.
2813 : * @param textLength The number of Unicode characters in <code>text</code> to alias.
2814 : * If -1, then this constructor will determine the length
2815 : * by calling <code>u_strlen()</code>.
2816 : * @stable ICU 2.0
2817 : */
2818 : UnicodeString(UBool isTerminated,
2819 : const UChar *text,
2820 : int32_t textLength);
2821 :
2822 : /**
2823 : * Writable-aliasing UChar* constructor.
2824 : * The text will be used for the UnicodeString object, but
2825 : * it will not be released when the UnicodeString is destroyed.
2826 : * This has write-through semantics:
2827 : * For as long as the capacity of the buffer is sufficient, write operations
2828 : * will directly affect the buffer. When more capacity is necessary, then
2829 : * a new buffer will be allocated and the contents copied as with regularly
2830 : * constructed strings.
2831 : * In an assignment to another UnicodeString, the buffer will be copied.
2832 : * The extract(UChar *dst) function detects whether the dst pointer is the same
2833 : * as the string buffer itself and will in this case not copy the contents.
2834 : *
2835 : * @param buffer The characters to alias for the UnicodeString.
2836 : * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
2837 : * @param buffCapacity The size of <code>buffer</code> in UChars.
2838 : * @stable ICU 2.0
2839 : */
2840 : UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
2841 :
2842 : #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
2843 :
2844 : /**
2845 : * char* constructor.
2846 : * @param codepageData an array of bytes, null-terminated,
2847 : * in the platform's default codepage.
2848 : * @stable ICU 2.0
2849 : */
2850 : UnicodeString(const char *codepageData);
2851 :
2852 : /**
2853 : * char* constructor.
2854 : * @param codepageData an array of bytes in the platform's default codepage.
2855 : * @param dataLength The number of bytes in <TT>codepageData</TT>.
2856 : * @stable ICU 2.0
2857 : */
2858 : UnicodeString(const char *codepageData, int32_t dataLength);
2859 :
2860 : #endif
2861 :
2862 : #if !UCONFIG_NO_CONVERSION
2863 :
2864 : /**
2865 : * char* constructor.
2866 : * @param codepageData an array of bytes, null-terminated
2867 : * @param codepage the encoding of <TT>codepageData</TT>. The special
2868 : * value 0 for <TT>codepage</TT> indicates that the text is in the
2869 : * platform's default codepage.
2870 : *
2871 : * If <code>codepage</code> is an empty string (<code>""</code>),
2872 : * then a simple conversion is performed on the codepage-invariant
2873 : * subset ("invariant characters") of the platform encoding. See utypes.h.
2874 : * Recommendation: For invariant-character strings use the constructor
2875 : * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
2876 : * because it avoids object code dependencies of UnicodeString on
2877 : * the conversion code.
2878 : *
2879 : * @stable ICU 2.0
2880 : */
2881 : UnicodeString(const char *codepageData, const char *codepage);
2882 :
2883 : /**
2884 : * char* constructor.
2885 : * @param codepageData an array of bytes.
2886 : * @param dataLength The number of bytes in <TT>codepageData</TT>.
2887 : * @param codepage the encoding of <TT>codepageData</TT>. The special
2888 : * value 0 for <TT>codepage</TT> indicates that the text is in the
2889 : * platform's default codepage.
2890 : * If <code>codepage</code> is an empty string (<code>""</code>),
2891 : * then a simple conversion is performed on the codepage-invariant
2892 : * subset ("invariant characters") of the platform encoding. See utypes.h.
2893 : * Recommendation: For invariant-character strings use the constructor
2894 : * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
2895 : * because it avoids object code dependencies of UnicodeString on
2896 : * the conversion code.
2897 : *
2898 : * @stable ICU 2.0
2899 : */
2900 : UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
2901 :
2902 : /**
2903 : * char * / UConverter constructor.
2904 : * This constructor uses an existing UConverter object to
2905 : * convert the codepage string to Unicode and construct a UnicodeString
2906 : * from that.
2907 : *
2908 : * The converter is reset at first.
2909 : * If the error code indicates a failure before this constructor is called,
2910 : * or if an error occurs during conversion or construction,
2911 : * then the string will be bogus.
2912 : *
2913 : * This function avoids the overhead of opening and closing a converter if
2914 : * multiple strings are constructed.
2915 : *
2916 : * @param src input codepage string
2917 : * @param srcLength length of the input string, can be -1 for NUL-terminated strings
2918 : * @param cnv converter object (ucnv_resetToUnicode() will be called),
2919 : * can be NULL for the default converter
2920 : * @param errorCode normal ICU error code
2921 : * @stable ICU 2.0
2922 : */
2923 : UnicodeString(
2924 : const char *src, int32_t srcLength,
2925 : UConverter *cnv,
2926 : UErrorCode &errorCode);
2927 :
2928 : #endif
2929 :
2930 : /**
2931 : * Constructs a Unicode string from an invariant-character char * string.
2932 : * About invariant characters see utypes.h.
2933 : * This constructor has no runtime dependency on conversion code and is
2934 : * therefore recommended over ones taking a charset name string
2935 : * (where the empty string "" indicates invariant-character conversion).
2936 : *
2937 : * Use the macro US_INV as the third, signature-distinguishing parameter.
2938 : *
2939 : * For example:
2940 : * \code
2941 : * void fn(const char *s) {
2942 : * UnicodeString ustr(s, -1, US_INV);
2943 : * // use ustr ...
2944 : * }
2945 : * \endcode
2946 : *
2947 : * @param src String using only invariant characters.
2948 : * @param length Length of src, or -1 if NUL-terminated.
2949 : * @param inv Signature-distinguishing paramater, use US_INV.
2950 : *
2951 : * @see US_INV
2952 : * @stable ICU 3.2
2953 : */
2954 : UnicodeString(const char *src, int32_t length, enum EInvariant inv);
2955 :
2956 :
2957 : /**
2958 : * Copy constructor.
2959 : * @param that The UnicodeString object to copy.
2960 : * @stable ICU 2.0
2961 : */
2962 : UnicodeString(const UnicodeString& that);
2963 :
2964 : /**
2965 : * 'Substring' constructor from tail of source string.
2966 : * @param src The UnicodeString object to copy.
2967 : * @param srcStart The offset into <tt>src</tt> at which to start copying.
2968 : * @stable ICU 2.2
2969 : */
2970 : UnicodeString(const UnicodeString& src, int32_t srcStart);
2971 :
2972 : /**
2973 : * 'Substring' constructor from subrange of source string.
2974 : * @param src The UnicodeString object to copy.
2975 : * @param srcStart The offset into <tt>src</tt> at which to start copying.
2976 : * @param srcLength The number of characters from <tt>src</tt> to copy.
2977 : * @stable ICU 2.2
2978 : */
2979 : UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
2980 :
2981 : /**
2982 : * Clone this object, an instance of a subclass of Replaceable.
2983 : * Clones can be used concurrently in multiple threads.
2984 : * If a subclass does not implement clone(), or if an error occurs,
2985 : * then NULL is returned.
2986 : * The clone functions in all subclasses return a pointer to a Replaceable
2987 : * because some compilers do not support covariant (same-as-this)
2988 : * return types; cast to the appropriate subclass if necessary.
2989 : * The caller must delete the clone.
2990 : *
2991 : * @return a clone of this object
2992 : *
2993 : * @see Replaceable::clone
2994 : * @see getDynamicClassID
2995 : * @stable ICU 2.6
2996 : */
2997 : virtual Replaceable *clone() const;
2998 :
2999 : /** Destructor.
3000 : * @stable ICU 2.0
3001 : */
3002 : virtual ~UnicodeString();
3003 :
3004 : /**
3005 : * Create a UnicodeString from a UTF-8 string.
3006 : * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3007 : * Calls u_strFromUTF8WithSub().
3008 : *
3009 : * @param utf8 UTF-8 input string.
3010 : * Note that a StringPiece can be implicitly constructed
3011 : * from a std::string or a NUL-terminated const char * string.
3012 : * @return A UnicodeString with equivalent UTF-16 contents.
3013 : * @see toUTF8
3014 : * @see toUTF8String
3015 : * @draft ICU 4.2
3016 : */
3017 : static UnicodeString fromUTF8(const StringPiece &utf8);
3018 :
3019 : /**
3020 : * Create a UnicodeString from a UTF-32 string.
3021 : * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3022 : * Calls u_strFromUTF32WithSub().
3023 : *
3024 : * @param utf32 UTF-32 input string. Must not be NULL.
3025 : * @param length Length of the input string, or -1 if NUL-terminated.
3026 : * @return A UnicodeString with equivalent UTF-16 contents.
3027 : * @see toUTF32
3028 : * @draft ICU 4.2
3029 : */
3030 : static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3031 :
3032 : /* Miscellaneous operations */
3033 :
3034 : /**
3035 : * Unescape a string of characters and return a string containing
3036 : * the result. The following escape sequences are recognized:
3037 : *
3038 : * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
3039 : * \\Uhhhhhhhh 8 hex digits
3040 : * \\xhh 1-2 hex digits
3041 : * \\ooo 1-3 octal digits; o in [0-7]
3042 : * \\cX control-X; X is masked with 0x1F
3043 : *
3044 : * as well as the standard ANSI C escapes:
3045 : *
3046 : * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
3047 : * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
3048 : * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
3049 : *
3050 : * Anything else following a backslash is generically escaped. For
3051 : * example, "[a\\-z]" returns "[a-z]".
3052 : *
3053 : * If an escape sequence is ill-formed, this method returns an empty
3054 : * string. An example of an ill-formed sequence is "\\u" followed by
3055 : * fewer than 4 hex digits.
3056 : *
3057 : * This function is similar to u_unescape() but not identical to it.
3058 : * The latter takes a source char*, so it does escape recognition
3059 : * and also invariant conversion.
3060 : *
3061 : * @return a string with backslash escapes interpreted, or an
3062 : * empty string on error.
3063 : * @see UnicodeString#unescapeAt()
3064 : * @see u_unescape()
3065 : * @see u_unescapeAt()
3066 : * @stable ICU 2.0
3067 : */
3068 : UnicodeString unescape() const;
3069 :
3070 : /**
3071 : * Unescape a single escape sequence and return the represented
3072 : * character. See unescape() for a listing of the recognized escape
3073 : * sequences. The character at offset-1 is assumed (without
3074 : * checking) to be a backslash. If the escape sequence is
3075 : * ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is
3076 : * returned.
3077 : *
3078 : * @param offset an input output parameter. On input, it is the
3079 : * offset into this string where the escape sequence is located,
3080 : * after the initial backslash. On output, it is advanced after the
3081 : * last character parsed. On error, it is not advanced at all.
3082 : * @return the character represented by the escape sequence at
3083 : * offset, or (UChar32)0xFFFFFFFF on error.
3084 : * @see UnicodeString#unescape()
3085 : * @see u_unescape()
3086 : * @see u_unescapeAt()
3087 : * @stable ICU 2.0
3088 : */
3089 : UChar32 unescapeAt(int32_t &offset) const;
3090 :
3091 : /**
3092 : * ICU "poor man's RTTI", returns a UClassID for this class.
3093 : *
3094 : * @stable ICU 2.2
3095 : */
3096 : static UClassID U_EXPORT2 getStaticClassID();
3097 :
3098 : /**
3099 : * ICU "poor man's RTTI", returns a UClassID for the actual class.
3100 : *
3101 : * @stable ICU 2.2
3102 : */
3103 : virtual UClassID getDynamicClassID() const;
3104 :
3105 : //========================================
3106 : // Implementation methods
3107 : //========================================
3108 :
3109 : protected:
3110 : /**
3111 : * Implement Replaceable::getLength() (see jitterbug 1027).
3112 : * @stable ICU 2.4
3113 : */
3114 : virtual int32_t getLength() const;
3115 :
3116 : /**
3117 : * The change in Replaceable to use virtual getCharAt() allows
3118 : * UnicodeString::charAt() to be inline again (see jitterbug 709).
3119 : * @stable ICU 2.4
3120 : */
3121 : virtual UChar getCharAt(int32_t offset) const;
3122 :
3123 : /**
3124 : * The change in Replaceable to use virtual getChar32At() allows
3125 : * UnicodeString::char32At() to be inline again (see jitterbug 709).
3126 : * @stable ICU 2.4
3127 : */
3128 : virtual UChar32 getChar32At(int32_t offset) const;
3129 :
3130 : private:
3131 : // For char* constructors. Could be made public.
3132 : UnicodeString &setToUTF8(const StringPiece &utf8);
3133 : // For extract(char*).
3134 : // We could make a toUTF8(target, capacity, errorCode) public but not
3135 : // this version: New API will be cleaner if we make callers create substrings
3136 : // rather than having start+length on every method,
3137 : // and it should take a UErrorCode&.
3138 : int32_t
3139 : toUTF8(int32_t start, int32_t len,
3140 : char *target, int32_t capacity) const;
3141 :
3142 :
3143 : inline int8_t
3144 : doCompare(int32_t start,
3145 : int32_t length,
3146 : const UnicodeString& srcText,
3147 : int32_t srcStart,
3148 : int32_t srcLength) const;
3149 :
3150 : int8_t doCompare(int32_t start,
3151 : int32_t length,
3152 : const UChar *srcChars,
3153 : int32_t srcStart,
3154 : int32_t srcLength) const;
3155 :
3156 : inline int8_t
3157 : doCompareCodePointOrder(int32_t start,
3158 : int32_t length,
3159 : const UnicodeString& srcText,
3160 : int32_t srcStart,
3161 : int32_t srcLength) const;
3162 :
3163 : int8_t doCompareCodePointOrder(int32_t start,
3164 : int32_t length,
3165 : const UChar *srcChars,
3166 : int32_t srcStart,
3167 : int32_t srcLength) const;
3168 :
3169 : inline int8_t
3170 : doCaseCompare(int32_t start,
3171 : int32_t length,
3172 : const UnicodeString &srcText,
3173 : int32_t srcStart,
3174 : int32_t srcLength,
3175 : uint32_t options) const;
3176 :
3177 : int8_t
3178 : doCaseCompare(int32_t start,
3179 : int32_t length,
3180 : const UChar *srcChars,
3181 : int32_t srcStart,
3182 : int32_t srcLength,
3183 : uint32_t options) const;
3184 :
3185 : int32_t doIndexOf(UChar c,
3186 : int32_t start,
3187 : int32_t length) const;
3188 :
3189 : int32_t doIndexOf(UChar32 c,
3190 : int32_t start,
3191 : int32_t length) const;
3192 :
3193 : int32_t doLastIndexOf(UChar c,
3194 : int32_t start,
3195 : int32_t length) const;
3196 :
3197 : int32_t doLastIndexOf(UChar32 c,
3198 : int32_t start,
3199 : int32_t length) const;
3200 :
3201 : void doExtract(int32_t start,
3202 : int32_t length,
3203 : UChar *dst,
3204 : int32_t dstStart) const;
3205 :
3206 : inline void doExtract(int32_t start,
3207 : int32_t length,
3208 : UnicodeString& target) const;
3209 :
3210 : inline UChar doCharAt(int32_t offset) const;
3211 :
3212 : UnicodeString& doReplace(int32_t start,
3213 : int32_t length,
3214 : const UnicodeString& srcText,
3215 : int32_t srcStart,
3216 : int32_t srcLength);
3217 :
3218 : UnicodeString& doReplace(int32_t start,
3219 : int32_t length,
3220 : const UChar *srcChars,
3221 : int32_t srcStart,
3222 : int32_t srcLength);
3223 :
3224 : UnicodeString& doReverse(int32_t start,
3225 : int32_t length);
3226 :
3227 : // calculate hash code
3228 : int32_t doHashCode(void) const;
3229 :
3230 : // get pointer to start of array
3231 : // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3232 : inline UChar* getArrayStart(void);
3233 : inline const UChar* getArrayStart(void) const;
3234 :
3235 : // A UnicodeString object (not necessarily its current buffer)
3236 : // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3237 : inline UBool isWritable() const;
3238 :
3239 : // Is the current buffer writable?
3240 : inline UBool isBufferWritable() const;
3241 :
3242 : // None of the following does releaseArray().
3243 : inline void setLength(int32_t len); // sets only fShortLength and fLength
3244 : inline void setToEmpty(); // sets fFlags=kShortString
3245 : inline void setToStackBuffer(int32_t len); // sets fFlags=kShortString
3246 : inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
3247 :
3248 : // allocate the array; result may be fStackBuffer
3249 : // sets refCount to 1 if appropriate
3250 : // sets fArray, fCapacity, and fFlags
3251 : // returns boolean for success or failure
3252 : UBool allocate(int32_t capacity);
3253 :
3254 : // release the array if owned
3255 : void releaseArray(void);
3256 :
3257 : // turn a bogus string into an empty one
3258 : void unBogus();
3259 :
3260 : // implements assigment operator, copy constructor, and fastCopyFrom()
3261 : UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE);
3262 :
3263 : // Pin start and limit to acceptable values.
3264 : inline void pinIndex(int32_t& start) const;
3265 : inline void pinIndices(int32_t& start,
3266 : int32_t& length) const;
3267 :
3268 : #if !UCONFIG_NO_CONVERSION
3269 :
3270 : /* Internal extract() using UConverter. */
3271 : int32_t doExtract(int32_t start, int32_t length,
3272 : char *dest, int32_t destCapacity,
3273 : UConverter *cnv,
3274 : UErrorCode &errorCode) const;
3275 :
3276 : /*
3277 : * Real constructor for converting from codepage data.
3278 : * It assumes that it is called with !fRefCounted.
3279 : *
3280 : * If <code>codepage==0</code>, then the default converter
3281 : * is used for the platform encoding.
3282 : * If <code>codepage</code> is an empty string (<code>""</code>),
3283 : * then a simple conversion is performed on the codepage-invariant
3284 : * subset ("invariant characters") of the platform encoding. See utypes.h.
3285 : */
3286 : void doCodepageCreate(const char *codepageData,
3287 : int32_t dataLength,
3288 : const char *codepage);
3289 :
3290 : /*
3291 : * Worker function for creating a UnicodeString from
3292 : * a codepage string using a UConverter.
3293 : */
3294 : void
3295 : doCodepageCreate(const char *codepageData,
3296 : int32_t dataLength,
3297 : UConverter *converter,
3298 : UErrorCode &status);
3299 :
3300 : #endif
3301 :
3302 : /*
3303 : * This function is called when write access to the array
3304 : * is necessary.
3305 : *
3306 : * We need to make a copy of the array if
3307 : * the buffer is read-only, or
3308 : * the buffer is refCounted (shared), and refCount>1, or
3309 : * the buffer is too small.
3310 : *
3311 : * Return FALSE if memory could not be allocated.
3312 : */
3313 : UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3314 : int32_t growCapacity = -1,
3315 : UBool doCopyArray = TRUE,
3316 : int32_t **pBufferToDelete = 0,
3317 : UBool forceClone = FALSE);
3318 :
3319 : // common function for case mappings
3320 : UnicodeString &
3321 : caseMap(BreakIterator *titleIter,
3322 : const char *locale,
3323 : uint32_t options,
3324 : int32_t toWhichCase);
3325 :
3326 : // ref counting
3327 : void addRef(void);
3328 : int32_t removeRef(void);
3329 : int32_t refCount(void) const;
3330 :
3331 : // constants
3332 : enum {
3333 : // Set the stack buffer size so that sizeof(UnicodeString) is a multiple of sizeof(pointer):
3334 : // 32-bit pointers: 4+1+1+13*2 = 32 bytes
3335 : // 64-bit pointers: 8+1+1+15*2 = 40 bytes
3336 : US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for small strings
3337 : kInvalidUChar=0xffff, // invalid UChar index
3338 : kGrowSize=128, // grow size for this buffer
3339 : kInvalidHashCode=0, // invalid hash code
3340 : kEmptyHashCode=1, // hash code for empty string
3341 :
3342 : // bit flag values for fFlags
3343 : kIsBogus=1, // this string is bogus, i.e., not valid or NULL
3344 : kUsingStackBuffer=2,// fArray==fStackBuffer
3345 : kRefCounted=4, // there is a refCount field before the characters in fArray
3346 : kBufferIsReadonly=8,// do not write to this buffer
3347 : kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
3348 : // and releaseBuffer(newLength) must be called
3349 :
3350 : // combined values for convenience
3351 : kShortString=kUsingStackBuffer,
3352 : kLongString=kRefCounted,
3353 : kReadonlyAlias=kBufferIsReadonly,
3354 : kWritableAlias=0
3355 : };
3356 :
3357 : friend class StringThreadTest;
3358 :
3359 : union StackBufferOrFields; // forward declaration necessary before friend declaration
3360 : friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3361 :
3362 : /*
3363 : * The following are all the class fields that are stored
3364 : * in each UnicodeString object.
3365 : * Note that UnicodeString has virtual functions,
3366 : * therefore there is an implicit vtable pointer
3367 : * as the first real field.
3368 : * The fields should be aligned such that no padding is
3369 : * necessary, mostly by having larger types first.
3370 : * On 32-bit machines, the size should be 32 bytes,
3371 : * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3372 : */
3373 : // (implicit) *vtable;
3374 : int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength
3375 : uint8_t fFlags; // bit flags: see constants above
3376 : union StackBufferOrFields {
3377 : // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
3378 : // else fFields is used
3379 : UChar fStackBuffer [US_STACKBUF_SIZE]; // buffer for small strings
3380 : struct {
3381 : uint16_t fPadding; // align the following field at 8B (32b pointers) or 12B (64b)
3382 : int32_t fLength; // number of characters in fArray if >127; else undefined
3383 : UChar *fArray; // the Unicode data (aligned at 12B (32b pointers) or 16B (64b))
3384 : int32_t fCapacity; // sizeof fArray
3385 : } fFields;
3386 : } fUnion;
3387 : };
3388 :
3389 : /**
3390 : * Create a new UnicodeString with the concatenation of two others.
3391 : *
3392 : * @param s1 The first string to be copied to the new one.
3393 : * @param s2 The second string to be copied to the new one, after s1.
3394 : * @return UnicodeString(s1).append(s2)
3395 : * @stable ICU 2.8
3396 : */
3397 : U_COMMON_API UnicodeString U_EXPORT2
3398 : operator+ (const UnicodeString &s1, const UnicodeString &s2);
3399 :
3400 : //========================================
3401 : // Inline members
3402 : //========================================
3403 :
3404 : //========================================
3405 : // Privates
3406 : //========================================
3407 :
3408 : inline void
3409 : UnicodeString::pinIndex(int32_t& start) const
3410 : {
3411 : // pin index
3412 : if(start < 0) {
3413 : start = 0;
3414 : } else if(start > length()) {
3415 : start = length();
3416 : }
3417 : }
3418 :
3419 : inline void
3420 : UnicodeString::pinIndices(int32_t& start,
3421 : int32_t& _length) const
3422 : {
3423 : // pin indices
3424 : int32_t len = length();
3425 : if(start < 0) {
3426 : start = 0;
3427 : } else if(start > len) {
3428 : start = len;
3429 : }
3430 : if(_length < 0) {
3431 : _length = 0;
3432 : } else if(_length > (len - start)) {
3433 : _length = (len - start);
3434 : }
3435 : }
3436 :
3437 : inline UChar*
3438 : UnicodeString::getArrayStart()
3439 : { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3440 :
3441 : inline const UChar*
3442 : UnicodeString::getArrayStart() const
3443 : { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3444 :
3445 : //========================================
3446 : // Read-only implementation methods
3447 : //========================================
3448 : inline int32_t
3449 29 : UnicodeString::length() const
3450 29 : { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
3451 :
3452 : inline int32_t
3453 : UnicodeString::getCapacity() const
3454 : { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
3455 :
3456 : inline int32_t
3457 : UnicodeString::hashCode() const
3458 : { return doHashCode(); }
3459 :
3460 : inline UBool
3461 : UnicodeString::isBogus() const
3462 : { return (UBool)(fFlags & kIsBogus); }
3463 :
3464 : inline UBool
3465 : UnicodeString::isWritable() const
3466 : { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
3467 :
3468 : inline UBool
3469 : UnicodeString::isBufferWritable() const
3470 : {
3471 : return (UBool)(
3472 : !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
3473 : (!(fFlags&kRefCounted) || refCount()==1));
3474 : }
3475 :
3476 : inline const UChar *
3477 0 : UnicodeString::getBuffer() const {
3478 0 : if(fFlags&(kIsBogus|kOpenGetBuffer)) {
3479 0 : return 0;
3480 0 : } else if(fFlags&kUsingStackBuffer) {
3481 0 : return fUnion.fStackBuffer;
3482 : } else {
3483 0 : return fUnion.fFields.fArray;
3484 : }
3485 : }
3486 :
3487 : //========================================
3488 : // Read-only alias methods
3489 : //========================================
3490 : inline int8_t
3491 : UnicodeString::doCompare(int32_t start,
3492 : int32_t thisLength,
3493 : const UnicodeString& srcText,
3494 : int32_t srcStart,
3495 : int32_t srcLength) const
3496 : {
3497 : if(srcText.isBogus()) {
3498 : return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3499 : } else {
3500 : srcText.pinIndices(srcStart, srcLength);
3501 : return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3502 : }
3503 : }
3504 :
3505 : inline UBool
3506 : UnicodeString::operator== (const UnicodeString& text) const
3507 : {
3508 : if(isBogus()) {
3509 : return text.isBogus();
3510 : } else {
3511 : int32_t len = length(), textLength = text.length();
3512 : return
3513 : !text.isBogus() &&
3514 : len == textLength &&
3515 : doCompare(0, len, text, 0, textLength) == 0;
3516 : }
3517 : }
3518 :
3519 : inline UBool
3520 : UnicodeString::operator!= (const UnicodeString& text) const
3521 : { return (! operator==(text)); }
3522 :
3523 : inline UBool
3524 : UnicodeString::operator> (const UnicodeString& text) const
3525 : { return doCompare(0, length(), text, 0, text.length()) == 1; }
3526 :
3527 : inline UBool
3528 : UnicodeString::operator< (const UnicodeString& text) const
3529 : { return doCompare(0, length(), text, 0, text.length()) == -1; }
3530 :
3531 : inline UBool
3532 : UnicodeString::operator>= (const UnicodeString& text) const
3533 : { return doCompare(0, length(), text, 0, text.length()) != -1; }
3534 :
3535 : inline UBool
3536 : UnicodeString::operator<= (const UnicodeString& text) const
3537 : { return doCompare(0, length(), text, 0, text.length()) != 1; }
3538 :
3539 : inline int8_t
3540 : UnicodeString::compare(const UnicodeString& text) const
3541 : { return doCompare(0, length(), text, 0, text.length()); }
3542 :
3543 : inline int8_t
3544 : UnicodeString::compare(int32_t start,
3545 : int32_t _length,
3546 : const UnicodeString& srcText) const
3547 : { return doCompare(start, _length, srcText, 0, srcText.length()); }
3548 :
3549 : inline int8_t
3550 : UnicodeString::compare(const UChar *srcChars,
3551 : int32_t srcLength) const
3552 : { return doCompare(0, length(), srcChars, 0, srcLength); }
3553 :
3554 : inline int8_t
3555 : UnicodeString::compare(int32_t start,
3556 : int32_t _length,
3557 : const UnicodeString& srcText,
3558 : int32_t srcStart,
3559 : int32_t srcLength) const
3560 : { return doCompare(start, _length, srcText, srcStart, srcLength); }
3561 :
3562 : inline int8_t
3563 : UnicodeString::compare(int32_t start,
3564 : int32_t _length,
3565 : const UChar *srcChars) const
3566 : { return doCompare(start, _length, srcChars, 0, _length); }
3567 :
3568 : inline int8_t
3569 : UnicodeString::compare(int32_t start,
3570 : int32_t _length,
3571 : const UChar *srcChars,
3572 : int32_t srcStart,
3573 : int32_t srcLength) const
3574 : { return doCompare(start, _length, srcChars, srcStart, srcLength); }
3575 :
3576 : inline int8_t
3577 : UnicodeString::compareBetween(int32_t start,
3578 : int32_t limit,
3579 : const UnicodeString& srcText,
3580 : int32_t srcStart,
3581 : int32_t srcLimit) const
3582 : { return doCompare(start, limit - start,
3583 : srcText, srcStart, srcLimit - srcStart); }
3584 :
3585 : inline int8_t
3586 : UnicodeString::doCompareCodePointOrder(int32_t start,
3587 : int32_t thisLength,
3588 : const UnicodeString& srcText,
3589 : int32_t srcStart,
3590 : int32_t srcLength) const
3591 : {
3592 : if(srcText.isBogus()) {
3593 : return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3594 : } else {
3595 : srcText.pinIndices(srcStart, srcLength);
3596 : return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3597 : }
3598 : }
3599 :
3600 : inline int8_t
3601 : UnicodeString::compareCodePointOrder(const UnicodeString& text) const
3602 : { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
3603 :
3604 : inline int8_t
3605 : UnicodeString::compareCodePointOrder(int32_t start,
3606 : int32_t _length,
3607 : const UnicodeString& srcText) const
3608 : { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
3609 :
3610 : inline int8_t
3611 : UnicodeString::compareCodePointOrder(const UChar *srcChars,
3612 : int32_t srcLength) const
3613 : { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
3614 :
3615 : inline int8_t
3616 : UnicodeString::compareCodePointOrder(int32_t start,
3617 : int32_t _length,
3618 : const UnicodeString& srcText,
3619 : int32_t srcStart,
3620 : int32_t srcLength) const
3621 : { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
3622 :
3623 : inline int8_t
3624 : UnicodeString::compareCodePointOrder(int32_t start,
3625 : int32_t _length,
3626 : const UChar *srcChars) const
3627 : { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
3628 :
3629 : inline int8_t
3630 : UnicodeString::compareCodePointOrder(int32_t start,
3631 : int32_t _length,
3632 : const UChar *srcChars,
3633 : int32_t srcStart,
3634 : int32_t srcLength) const
3635 : { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
3636 :
3637 : inline int8_t
3638 : UnicodeString::compareCodePointOrderBetween(int32_t start,
3639 : int32_t limit,
3640 : const UnicodeString& srcText,
3641 : int32_t srcStart,
3642 : int32_t srcLimit) const
3643 : { return doCompareCodePointOrder(start, limit - start,
3644 : srcText, srcStart, srcLimit - srcStart); }
3645 :
3646 : inline int8_t
3647 : UnicodeString::doCaseCompare(int32_t start,
3648 : int32_t thisLength,
3649 : const UnicodeString &srcText,
3650 : int32_t srcStart,
3651 : int32_t srcLength,
3652 : uint32_t options) const
3653 : {
3654 : if(srcText.isBogus()) {
3655 : return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3656 : } else {
3657 : srcText.pinIndices(srcStart, srcLength);
3658 : return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
3659 : }
3660 : }
3661 :
3662 : inline int8_t
3663 : UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
3664 : return doCaseCompare(0, length(), text, 0, text.length(), options);
3665 : }
3666 :
3667 : inline int8_t
3668 : UnicodeString::caseCompare(int32_t start,
3669 : int32_t _length,
3670 : const UnicodeString &srcText,
3671 : uint32_t options) const {
3672 : return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
3673 : }
3674 :
3675 : inline int8_t
3676 : UnicodeString::caseCompare(const UChar *srcChars,
3677 : int32_t srcLength,
3678 : uint32_t options) const {
3679 : return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
3680 : }
3681 :
3682 : inline int8_t
3683 : UnicodeString::caseCompare(int32_t start,
3684 : int32_t _length,
3685 : const UnicodeString &srcText,
3686 : int32_t srcStart,
3687 : int32_t srcLength,
3688 : uint32_t options) const {
3689 : return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
3690 : }
3691 :
3692 : inline int8_t
3693 : UnicodeString::caseCompare(int32_t start,
3694 : int32_t _length,
3695 : const UChar *srcChars,
3696 : uint32_t options) const {
3697 : return doCaseCompare(start, _length, srcChars, 0, _length, options);
3698 : }
3699 :
3700 : inline int8_t
3701 : UnicodeString::caseCompare(int32_t start,
3702 : int32_t _length,
3703 : const UChar *srcChars,
3704 : int32_t srcStart,
3705 : int32_t srcLength,
3706 : uint32_t options) const {
3707 : return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
3708 : }
3709 :
3710 : inline int8_t
3711 : UnicodeString::caseCompareBetween(int32_t start,
3712 : int32_t limit,
3713 : const UnicodeString &srcText,
3714 : int32_t srcStart,
3715 : int32_t srcLimit,
3716 : uint32_t options) const {
3717 : return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
3718 : }
3719 :
3720 : inline int32_t
3721 : UnicodeString::indexOf(const UnicodeString& srcText,
3722 : int32_t srcStart,
3723 : int32_t srcLength,
3724 : int32_t start,
3725 : int32_t _length) const
3726 : {
3727 : if(!srcText.isBogus()) {
3728 : srcText.pinIndices(srcStart, srcLength);
3729 : if(srcLength > 0) {
3730 : return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3731 : }
3732 : }
3733 : return -1;
3734 : }
3735 :
3736 : inline int32_t
3737 : UnicodeString::indexOf(const UnicodeString& text) const
3738 : { return indexOf(text, 0, text.length(), 0, length()); }
3739 :
3740 : inline int32_t
3741 : UnicodeString::indexOf(const UnicodeString& text,
3742 : int32_t start) const {
3743 : pinIndex(start);
3744 : return indexOf(text, 0, text.length(), start, length() - start);
3745 : }
3746 :
3747 : inline int32_t
3748 : UnicodeString::indexOf(const UnicodeString& text,
3749 : int32_t start,
3750 : int32_t _length) const
3751 : { return indexOf(text, 0, text.length(), start, _length); }
3752 :
3753 : inline int32_t
3754 : UnicodeString::indexOf(const UChar *srcChars,
3755 : int32_t srcLength,
3756 : int32_t start) const {
3757 : pinIndex(start);
3758 : return indexOf(srcChars, 0, srcLength, start, length() - start);
3759 : }
3760 :
3761 : inline int32_t
3762 : UnicodeString::indexOf(const UChar *srcChars,
3763 : int32_t srcLength,
3764 : int32_t start,
3765 : int32_t _length) const
3766 : { return indexOf(srcChars, 0, srcLength, start, _length); }
3767 :
3768 : inline int32_t
3769 : UnicodeString::indexOf(UChar c,
3770 : int32_t start,
3771 : int32_t _length) const
3772 : { return doIndexOf(c, start, _length); }
3773 :
3774 : inline int32_t
3775 : UnicodeString::indexOf(UChar32 c,
3776 : int32_t start,
3777 : int32_t _length) const
3778 : { return doIndexOf(c, start, _length); }
3779 :
3780 : inline int32_t
3781 : UnicodeString::indexOf(UChar c) const
3782 : { return doIndexOf(c, 0, length()); }
3783 :
3784 : inline int32_t
3785 : UnicodeString::indexOf(UChar32 c) const
3786 : { return indexOf(c, 0, length()); }
3787 :
3788 : inline int32_t
3789 : UnicodeString::indexOf(UChar c,
3790 : int32_t start) const {
3791 : pinIndex(start);
3792 : return doIndexOf(c, start, length() - start);
3793 : }
3794 :
3795 : inline int32_t
3796 : UnicodeString::indexOf(UChar32 c,
3797 : int32_t start) const {
3798 : pinIndex(start);
3799 : return indexOf(c, start, length() - start);
3800 : }
3801 :
3802 : inline int32_t
3803 : UnicodeString::lastIndexOf(const UChar *srcChars,
3804 : int32_t srcLength,
3805 : int32_t start,
3806 : int32_t _length) const
3807 : { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
3808 :
3809 : inline int32_t
3810 : UnicodeString::lastIndexOf(const UChar *srcChars,
3811 : int32_t srcLength,
3812 : int32_t start) const {
3813 : pinIndex(start);
3814 : return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
3815 : }
3816 :
3817 : inline int32_t
3818 : UnicodeString::lastIndexOf(const UnicodeString& srcText,
3819 : int32_t srcStart,
3820 : int32_t srcLength,
3821 : int32_t start,
3822 : int32_t _length) const
3823 : {
3824 : if(!srcText.isBogus()) {
3825 : srcText.pinIndices(srcStart, srcLength);
3826 : if(srcLength > 0) {
3827 : return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3828 : }
3829 : }
3830 : return -1;
3831 : }
3832 :
3833 : inline int32_t
3834 : UnicodeString::lastIndexOf(const UnicodeString& text,
3835 : int32_t start,
3836 : int32_t _length) const
3837 : { return lastIndexOf(text, 0, text.length(), start, _length); }
3838 :
3839 : inline int32_t
3840 : UnicodeString::lastIndexOf(const UnicodeString& text,
3841 : int32_t start) const {
3842 : pinIndex(start);
3843 : return lastIndexOf(text, 0, text.length(), start, length() - start);
3844 : }
3845 :
3846 : inline int32_t
3847 : UnicodeString::lastIndexOf(const UnicodeString& text) const
3848 : { return lastIndexOf(text, 0, text.length(), 0, length()); }
3849 :
3850 : inline int32_t
3851 : UnicodeString::lastIndexOf(UChar c,
3852 : int32_t start,
3853 : int32_t _length) const
3854 : { return doLastIndexOf(c, start, _length); }
3855 :
3856 : inline int32_t
3857 : UnicodeString::lastIndexOf(UChar32 c,
3858 : int32_t start,
3859 : int32_t _length) const {
3860 : return doLastIndexOf(c, start, _length);
3861 : }
3862 :
3863 : inline int32_t
3864 : UnicodeString::lastIndexOf(UChar c) const
3865 : { return doLastIndexOf(c, 0, length()); }
3866 :
3867 : inline int32_t
3868 : UnicodeString::lastIndexOf(UChar32 c) const {
3869 : return lastIndexOf(c, 0, length());
3870 : }
3871 :
3872 : inline int32_t
3873 : UnicodeString::lastIndexOf(UChar c,
3874 : int32_t start) const {
3875 : pinIndex(start);
3876 : return doLastIndexOf(c, start, length() - start);
3877 : }
3878 :
3879 : inline int32_t
3880 : UnicodeString::lastIndexOf(UChar32 c,
3881 : int32_t start) const {
3882 : pinIndex(start);
3883 : return lastIndexOf(c, start, length() - start);
3884 : }
3885 :
3886 : inline UBool
3887 : UnicodeString::startsWith(const UnicodeString& text) const
3888 : { return compare(0, text.length(), text, 0, text.length()) == 0; }
3889 :
3890 : inline UBool
3891 : UnicodeString::startsWith(const UnicodeString& srcText,
3892 : int32_t srcStart,
3893 : int32_t srcLength) const
3894 : { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
3895 :
3896 : inline UBool
3897 : UnicodeString::startsWith(const UChar *srcChars,
3898 : int32_t srcLength) const
3899 : { return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; }
3900 :
3901 : inline UBool
3902 : UnicodeString::startsWith(const UChar *srcChars,
3903 : int32_t srcStart,
3904 : int32_t srcLength) const
3905 : { return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;}
3906 :
3907 : inline UBool
3908 : UnicodeString::endsWith(const UnicodeString& text) const
3909 : { return doCompare(length() - text.length(), text.length(),
3910 : text, 0, text.length()) == 0; }
3911 :
3912 : inline UBool
3913 : UnicodeString::endsWith(const UnicodeString& srcText,
3914 : int32_t srcStart,
3915 : int32_t srcLength) const {
3916 : srcText.pinIndices(srcStart, srcLength);
3917 : return doCompare(length() - srcLength, srcLength,
3918 : srcText, srcStart, srcLength) == 0;
3919 : }
3920 :
3921 : inline UBool
3922 : UnicodeString::endsWith(const UChar *srcChars,
3923 : int32_t srcLength) const {
3924 : if(srcLength < 0) {
3925 : srcLength = u_strlen(srcChars);
3926 : }
3927 : return doCompare(length() - srcLength, srcLength,
3928 : srcChars, 0, srcLength) == 0;
3929 : }
3930 :
3931 : inline UBool
3932 : UnicodeString::endsWith(const UChar *srcChars,
3933 : int32_t srcStart,
3934 : int32_t srcLength) const {
3935 : if(srcLength < 0) {
3936 : srcLength = u_strlen(srcChars + srcStart);
3937 : }
3938 : return doCompare(length() - srcLength, srcLength,
3939 : srcChars, srcStart, srcLength) == 0;
3940 : }
3941 :
3942 : //========================================
3943 : // replace
3944 : //========================================
3945 : inline UnicodeString&
3946 : UnicodeString::replace(int32_t start,
3947 : int32_t _length,
3948 : const UnicodeString& srcText)
3949 : { return doReplace(start, _length, srcText, 0, srcText.length()); }
3950 :
3951 : inline UnicodeString&
3952 : UnicodeString::replace(int32_t start,
3953 : int32_t _length,
3954 : const UnicodeString& srcText,
3955 : int32_t srcStart,
3956 : int32_t srcLength)
3957 : { return doReplace(start, _length, srcText, srcStart, srcLength); }
3958 :
3959 : inline UnicodeString&
3960 : UnicodeString::replace(int32_t start,
3961 : int32_t _length,
3962 : const UChar *srcChars,
3963 : int32_t srcLength)
3964 : { return doReplace(start, _length, srcChars, 0, srcLength); }
3965 :
3966 : inline UnicodeString&
3967 : UnicodeString::replace(int32_t start,
3968 : int32_t _length,
3969 : const UChar *srcChars,
3970 : int32_t srcStart,
3971 : int32_t srcLength)
3972 : { return doReplace(start, _length, srcChars, srcStart, srcLength); }
3973 :
3974 : inline UnicodeString&
3975 : UnicodeString::replace(int32_t start,
3976 : int32_t _length,
3977 : UChar srcChar)
3978 : { return doReplace(start, _length, &srcChar, 0, 1); }
3979 :
3980 : inline UnicodeString&
3981 : UnicodeString::replace(int32_t start,
3982 : int32_t _length,
3983 : UChar32 srcChar) {
3984 : UChar buffer[U16_MAX_LENGTH];
3985 : int32_t count = 0;
3986 : UBool isError = FALSE;
3987 : U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
3988 : return doReplace(start, _length, buffer, 0, count);
3989 : }
3990 :
3991 : inline UnicodeString&
3992 : UnicodeString::replaceBetween(int32_t start,
3993 : int32_t limit,
3994 : const UnicodeString& srcText)
3995 : { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
3996 :
3997 : inline UnicodeString&
3998 : UnicodeString::replaceBetween(int32_t start,
3999 : int32_t limit,
4000 : const UnicodeString& srcText,
4001 : int32_t srcStart,
4002 : int32_t srcLimit)
4003 : { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4004 :
4005 : inline UnicodeString&
4006 : UnicodeString::findAndReplace(const UnicodeString& oldText,
4007 : const UnicodeString& newText)
4008 : { return findAndReplace(0, length(), oldText, 0, oldText.length(),
4009 : newText, 0, newText.length()); }
4010 :
4011 : inline UnicodeString&
4012 : UnicodeString::findAndReplace(int32_t start,
4013 : int32_t _length,
4014 : const UnicodeString& oldText,
4015 : const UnicodeString& newText)
4016 : { return findAndReplace(start, _length, oldText, 0, oldText.length(),
4017 : newText, 0, newText.length()); }
4018 :
4019 : // ============================
4020 : // extract
4021 : // ============================
4022 : inline void
4023 : UnicodeString::doExtract(int32_t start,
4024 : int32_t _length,
4025 : UnicodeString& target) const
4026 : { target.replace(0, target.length(), *this, start, _length); }
4027 :
4028 : inline void
4029 : UnicodeString::extract(int32_t start,
4030 : int32_t _length,
4031 : UChar *target,
4032 : int32_t targetStart) const
4033 : { doExtract(start, _length, target, targetStart); }
4034 :
4035 : inline void
4036 : UnicodeString::extract(int32_t start,
4037 : int32_t _length,
4038 : UnicodeString& target) const
4039 : { doExtract(start, _length, target); }
4040 :
4041 : #if !UCONFIG_NO_CONVERSION
4042 :
4043 : inline int32_t
4044 : UnicodeString::extract(int32_t start,
4045 : int32_t _length,
4046 : char *dst,
4047 : const char *codepage) const
4048 :
4049 : {
4050 : // This dstSize value will be checked explicitly
4051 : return extract(start, _length, dst, dst!=0 ? (((size_t)dst >= ((size_t)-1) - UINT32_MAX) ? (((char*)UINT32_MAX) - dst) : UINT32_MAX) : 0, codepage);
4052 : }
4053 :
4054 : #endif
4055 :
4056 : inline void
4057 : UnicodeString::extractBetween(int32_t start,
4058 : int32_t limit,
4059 : UChar *dst,
4060 : int32_t dstStart) const {
4061 : pinIndex(start);
4062 : pinIndex(limit);
4063 : doExtract(start, limit - start, dst, dstStart);
4064 : }
4065 :
4066 : inline UChar
4067 : UnicodeString::doCharAt(int32_t offset) const
4068 : {
4069 : if((uint32_t)offset < (uint32_t)length()) {
4070 : return getArrayStart()[offset];
4071 : } else {
4072 : return kInvalidUChar;
4073 : }
4074 : }
4075 :
4076 : inline UChar
4077 : UnicodeString::charAt(int32_t offset) const
4078 : { return doCharAt(offset); }
4079 :
4080 : inline UChar
4081 : UnicodeString::operator[] (int32_t offset) const
4082 : { return doCharAt(offset); }
4083 :
4084 : inline UChar32
4085 : UnicodeString::char32At(int32_t offset) const
4086 : {
4087 : int32_t len = length();
4088 : if((uint32_t)offset < (uint32_t)len) {
4089 : const UChar *array = getArrayStart();
4090 : UChar32 c;
4091 : U16_GET(array, 0, offset, len, c);
4092 : return c;
4093 : } else {
4094 : return kInvalidUChar;
4095 : }
4096 : }
4097 :
4098 : inline int32_t
4099 : UnicodeString::getChar32Start(int32_t offset) const {
4100 : if((uint32_t)offset < (uint32_t)length()) {
4101 : const UChar *array = getArrayStart();
4102 : U16_SET_CP_START(array, 0, offset);
4103 : return offset;
4104 : } else {
4105 : return 0;
4106 : }
4107 : }
4108 :
4109 : inline int32_t
4110 : UnicodeString::getChar32Limit(int32_t offset) const {
4111 : int32_t len = length();
4112 : if((uint32_t)offset < (uint32_t)len) {
4113 : const UChar *array = getArrayStart();
4114 : U16_SET_CP_LIMIT(array, 0, offset, len);
4115 : return offset;
4116 : } else {
4117 : return len;
4118 : }
4119 : }
4120 :
4121 : inline UBool
4122 : UnicodeString::isEmpty() const {
4123 : return fShortLength == 0;
4124 : }
4125 :
4126 : //========================================
4127 : // Write implementation methods
4128 : //========================================
4129 : inline void
4130 : UnicodeString::setLength(int32_t len) {
4131 : if(len <= 127) {
4132 : fShortLength = (int8_t)len;
4133 : } else {
4134 : fShortLength = (int8_t)-1;
4135 : fUnion.fFields.fLength = len;
4136 : }
4137 : }
4138 :
4139 : inline void
4140 : UnicodeString::setToEmpty() {
4141 : fShortLength = 0;
4142 : fFlags = kShortString;
4143 : }
4144 :
4145 : inline void
4146 : UnicodeString::setToStackBuffer(int32_t len) {
4147 : fShortLength = (int8_t)len;
4148 : fFlags = kShortString;
4149 : }
4150 :
4151 : inline void
4152 : UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
4153 : setLength(len);
4154 : fUnion.fFields.fArray = array;
4155 : fUnion.fFields.fCapacity = capacity;
4156 : }
4157 :
4158 : inline const UChar *
4159 : UnicodeString::getTerminatedBuffer() {
4160 : if(!isWritable()) {
4161 : return 0;
4162 : } else {
4163 : UChar *array = getArrayStart();
4164 : int32_t len = length();
4165 : #ifndef U_VALGRIND
4166 : if(len < getCapacity() && array[len] == 0) {
4167 : return array;
4168 : }
4169 : #endif
4170 : if(cloneArrayIfNeeded(len+1)) {
4171 : array = getArrayStart();
4172 : array[len] = 0;
4173 : return array;
4174 : } else {
4175 : return 0;
4176 : }
4177 : }
4178 : }
4179 :
4180 : inline UnicodeString&
4181 : UnicodeString::operator= (UChar ch)
4182 : { return doReplace(0, length(), &ch, 0, 1); }
4183 :
4184 : inline UnicodeString&
4185 : UnicodeString::operator= (UChar32 ch)
4186 : { return replace(0, length(), ch); }
4187 :
4188 : inline UnicodeString&
4189 : UnicodeString::setTo(const UnicodeString& srcText,
4190 : int32_t srcStart,
4191 : int32_t srcLength)
4192 : {
4193 : unBogus();
4194 : return doReplace(0, length(), srcText, srcStart, srcLength);
4195 : }
4196 :
4197 : inline UnicodeString&
4198 : UnicodeString::setTo(const UnicodeString& srcText,
4199 : int32_t srcStart)
4200 : {
4201 : unBogus();
4202 : srcText.pinIndex(srcStart);
4203 : return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4204 : }
4205 :
4206 : inline UnicodeString&
4207 : UnicodeString::setTo(const UnicodeString& srcText)
4208 : {
4209 : unBogus();
4210 : return doReplace(0, length(), srcText, 0, srcText.length());
4211 : }
4212 :
4213 : inline UnicodeString&
4214 : UnicodeString::setTo(const UChar *srcChars,
4215 : int32_t srcLength)
4216 : {
4217 : unBogus();
4218 : return doReplace(0, length(), srcChars, 0, srcLength);
4219 : }
4220 :
4221 : inline UnicodeString&
4222 : UnicodeString::setTo(UChar srcChar)
4223 : {
4224 : unBogus();
4225 : return doReplace(0, length(), &srcChar, 0, 1);
4226 : }
4227 :
4228 : inline UnicodeString&
4229 : UnicodeString::setTo(UChar32 srcChar)
4230 : {
4231 : unBogus();
4232 : return replace(0, length(), srcChar);
4233 : }
4234 :
4235 : inline UnicodeString&
4236 : UnicodeString::append(const UnicodeString& srcText,
4237 : int32_t srcStart,
4238 : int32_t srcLength)
4239 : { return doReplace(length(), 0, srcText, srcStart, srcLength); }
4240 :
4241 : inline UnicodeString&
4242 : UnicodeString::append(const UnicodeString& srcText)
4243 : { return doReplace(length(), 0, srcText, 0, srcText.length()); }
4244 :
4245 : inline UnicodeString&
4246 : UnicodeString::append(const UChar *srcChars,
4247 : int32_t srcStart,
4248 : int32_t srcLength)
4249 : { return doReplace(length(), 0, srcChars, srcStart, srcLength); }
4250 :
4251 : inline UnicodeString&
4252 : UnicodeString::append(const UChar *srcChars,
4253 : int32_t srcLength)
4254 : { return doReplace(length(), 0, srcChars, 0, srcLength); }
4255 :
4256 : inline UnicodeString&
4257 : UnicodeString::append(UChar srcChar)
4258 : { return doReplace(length(), 0, &srcChar, 0, 1); }
4259 :
4260 : inline UnicodeString&
4261 : UnicodeString::append(UChar32 srcChar) {
4262 : UChar buffer[U16_MAX_LENGTH];
4263 : int32_t _length = 0;
4264 : UBool isError = FALSE;
4265 : U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
4266 : return doReplace(length(), 0, buffer, 0, _length);
4267 : }
4268 :
4269 : inline UnicodeString&
4270 : UnicodeString::operator+= (UChar ch)
4271 : { return doReplace(length(), 0, &ch, 0, 1); }
4272 :
4273 : inline UnicodeString&
4274 : UnicodeString::operator+= (UChar32 ch) {
4275 : return append(ch);
4276 : }
4277 :
4278 : inline UnicodeString&
4279 : UnicodeString::operator+= (const UnicodeString& srcText)
4280 : { return doReplace(length(), 0, srcText, 0, srcText.length()); }
4281 :
4282 : inline UnicodeString&
4283 : UnicodeString::insert(int32_t start,
4284 : const UnicodeString& srcText,
4285 : int32_t srcStart,
4286 : int32_t srcLength)
4287 : { return doReplace(start, 0, srcText, srcStart, srcLength); }
4288 :
4289 : inline UnicodeString&
4290 : UnicodeString::insert(int32_t start,
4291 : const UnicodeString& srcText)
4292 : { return doReplace(start, 0, srcText, 0, srcText.length()); }
4293 :
4294 : inline UnicodeString&
4295 : UnicodeString::insert(int32_t start,
4296 : const UChar *srcChars,
4297 : int32_t srcStart,
4298 : int32_t srcLength)
4299 : { return doReplace(start, 0, srcChars, srcStart, srcLength); }
4300 :
4301 : inline UnicodeString&
4302 : UnicodeString::insert(int32_t start,
4303 : const UChar *srcChars,
4304 : int32_t srcLength)
4305 : { return doReplace(start, 0, srcChars, 0, srcLength); }
4306 :
4307 : inline UnicodeString&
4308 : UnicodeString::insert(int32_t start,
4309 : UChar srcChar)
4310 : { return doReplace(start, 0, &srcChar, 0, 1); }
4311 :
4312 : inline UnicodeString&
4313 : UnicodeString::insert(int32_t start,
4314 : UChar32 srcChar)
4315 : { return replace(start, 0, srcChar); }
4316 :
4317 :
4318 : inline UnicodeString&
4319 : UnicodeString::remove()
4320 : {
4321 : // remove() of a bogus string makes the string empty and non-bogus
4322 : if(isBogus()) {
4323 : unBogus();
4324 : } else {
4325 : setLength(0);
4326 : }
4327 : return *this;
4328 : }
4329 :
4330 : inline UnicodeString&
4331 : UnicodeString::remove(int32_t start,
4332 : int32_t _length)
4333 : {
4334 : if(start <= 0 && _length == INT32_MAX) {
4335 : // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4336 : return remove();
4337 : }
4338 : return doReplace(start, _length, NULL, 0, 0);
4339 : }
4340 :
4341 : inline UnicodeString&
4342 : UnicodeString::removeBetween(int32_t start,
4343 : int32_t limit)
4344 : { return doReplace(start, limit - start, NULL, 0, 0); }
4345 :
4346 : inline UBool
4347 : UnicodeString::truncate(int32_t targetLength)
4348 : {
4349 : if(isBogus() && targetLength == 0) {
4350 : // truncate(0) of a bogus string makes the string empty and non-bogus
4351 : unBogus();
4352 : return FALSE;
4353 : } else if((uint32_t)targetLength < (uint32_t)length()) {
4354 : setLength(targetLength);
4355 : return TRUE;
4356 : } else {
4357 : return FALSE;
4358 : }
4359 : }
4360 :
4361 : inline UnicodeString&
4362 : UnicodeString::reverse()
4363 : { return doReverse(0, length()); }
4364 :
4365 : inline UnicodeString&
4366 : UnicodeString::reverse(int32_t start,
4367 : int32_t _length)
4368 : { return doReverse(start, _length); }
4369 :
4370 : U_NAMESPACE_END
4371 :
4372 : #endif
|