ICU 67.1  67.1
utext.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2004-2012, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: utext.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2004oct06
16 * created by: Markus W. Scherer
17 */
18 
19 #ifndef __UTEXT_H__
20 #define __UTEXT_H__
21 
140 #include "unicode/utypes.h"
141 #include "unicode/uchar.h"
142 #if U_SHOW_CPLUSPLUS_API
143 #include "unicode/localpointer.h"
144 #include "unicode/rep.h"
145 #include "unicode/unistr.h"
146 #include "unicode/chariter.h"
147 #endif
148 
149 
151 
152 struct UText;
153 typedef struct UText UText;
156 /***************************************************************************************
157  *
158  * C Functions for creating UText wrappers around various kinds of text strings.
159  *
160  ****************************************************************************************/
161 
162 
183 U_STABLE UText * U_EXPORT2
184 utext_close(UText *ut);
185 
207 U_STABLE UText * U_EXPORT2
208 utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status);
209 
210 
225 U_STABLE UText * U_EXPORT2
226 utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
227 
228 
229 #if U_SHOW_CPLUSPLUS_API
230 
242 U_STABLE UText * U_EXPORT2
244 
245 
258 U_STABLE UText * U_EXPORT2
260 
261 
274 U_STABLE UText * U_EXPORT2
276 
289 U_STABLE UText * U_EXPORT2
291 
292 #endif
293 
294 
352 U_STABLE UText * U_EXPORT2
353 utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status);
354 
355 
367 U_STABLE UBool U_EXPORT2
368 utext_equals(const UText *a, const UText *b);
369 
370 
371 /*****************************************************************************
372  *
373  * Functions to work with the text represented by a UText wrapper
374  *
375  *****************************************************************************/
376 
388 U_STABLE int64_t U_EXPORT2
390 
404 U_STABLE UBool U_EXPORT2
405 utext_isLengthExpensive(const UText *ut);
406 
432 U_STABLE UChar32 U_EXPORT2
433 utext_char32At(UText *ut, int64_t nativeIndex);
434 
435 
446 U_STABLE UChar32 U_EXPORT2
448 
449 
468 U_STABLE UChar32 U_EXPORT2
469 utext_next32(UText *ut);
470 
471 
489 U_STABLE UChar32 U_EXPORT2
491 
492 
511 U_STABLE UChar32 U_EXPORT2
512 utext_next32From(UText *ut, int64_t nativeIndex);
513 
514 
515 
531 U_STABLE UChar32 U_EXPORT2
532 utext_previous32From(UText *ut, int64_t nativeIndex);
533 
546 U_STABLE int64_t U_EXPORT2
547 utext_getNativeIndex(const UText *ut);
548 
572 U_STABLE void U_EXPORT2
573 utext_setNativeIndex(UText *ut, int64_t nativeIndex);
574 
591 U_STABLE UBool U_EXPORT2
592 utext_moveIndex32(UText *ut, int32_t delta);
593 
616 U_STABLE int64_t U_EXPORT2
618 
619 
654 U_STABLE int32_t U_EXPORT2
655 utext_extract(UText *ut,
656  int64_t nativeStart, int64_t nativeLimit,
657  UChar *dest, int32_t destCapacity,
658  UErrorCode *status);
659 
660 
661 
662 /************************************************************************************
663  *
664  * #define inline versions of selected performance-critical text access functions
665  * Caution: do not use auto increment++ or decrement-- expressions
666  * as parameters to these macros.
667  *
668  * For most use, where there is no extreme performance constraint, the
669  * normal, non-inline functions are a better choice. The resulting code
670  * will be smaller, and, if the need ever arises, easier to debug.
671  *
672  * These are implemented as #defines rather than real functions
673  * because there is no fully portable way to do inline functions in plain C.
674  *
675  ************************************************************************************/
676 
677 #ifndef U_HIDE_INTERNAL_API
678 
687 #define UTEXT_CURRENT32(ut) \
688  ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
689  ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut))
690 #endif /* U_HIDE_INTERNAL_API */
691 
703 #define UTEXT_NEXT32(ut) \
704  ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
705  ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut))
706 
717 #define UTEXT_PREVIOUS32(ut) \
718  ((ut)->chunkOffset > 0 && \
719  (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \
720  (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut))
721 
734 #define UTEXT_GETNATIVEINDEX(ut) \
735  ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \
736  (ut)->chunkNativeStart+(ut)->chunkOffset : \
737  (ut)->pFuncs->mapOffsetToNative(ut))
738 
750 #define UTEXT_SETNATIVEINDEX(ut, ix) UPRV_BLOCK_MACRO_BEGIN { \
751  int64_t __offset = (ix) - (ut)->chunkNativeStart; \
752  if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \
753  (ut)->chunkOffset=(int32_t)__offset; \
754  } else { \
755  utext_setNativeIndex((ut), (ix)); \
756  } \
757 } UPRV_BLOCK_MACRO_END
758 
759 
760 
761 /************************************************************************************
762  *
763  * Functions related to writing or modifying the text.
764  * These will work only with modifiable UTexts. Attempting to
765  * modify a read-only UText will return an error status.
766  *
767  ************************************************************************************/
768 
769 
788 U_STABLE UBool U_EXPORT2
789 utext_isWritable(const UText *ut);
790 
791 
800 U_STABLE UBool U_EXPORT2
801 utext_hasMetaData(const UText *ut);
802 
803 
831 U_STABLE int32_t U_EXPORT2
832 utext_replace(UText *ut,
833  int64_t nativeStart, int64_t nativeLimit,
834  const UChar *replacementText, int32_t replacementLength,
835  UErrorCode *status);
836 
837 
838 
871 U_STABLE void U_EXPORT2
872 utext_copy(UText *ut,
873  int64_t nativeStart, int64_t nativeLimit,
874  int64_t destIndex,
875  UBool move,
876  UErrorCode *status);
877 
878 
900 U_STABLE void U_EXPORT2
901 utext_freeze(UText *ut);
902 
903 
910 enum {
944 };
945 
983 typedef UText * U_CALLCONV
984 UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
985 
986 
995 typedef int64_t U_CALLCONV
997 
1023 typedef UBool U_CALLCONV
1024 UTextAccess(UText *ut, int64_t nativeIndex, UBool forward);
1025 
1053 typedef int32_t U_CALLCONV
1055  int64_t nativeStart, int64_t nativeLimit,
1056  UChar *dest, int32_t destCapacity,
1057  UErrorCode *status);
1058 
1088 typedef int32_t U_CALLCONV
1090  int64_t nativeStart, int64_t nativeLimit,
1091  const UChar *replacementText, int32_t replacmentLength,
1092  UErrorCode *status);
1093 
1122 typedef void U_CALLCONV
1124  int64_t nativeStart, int64_t nativeLimit,
1125  int64_t nativeDest,
1126  UBool move,
1127  UErrorCode *status);
1128 
1142 typedef int64_t U_CALLCONV
1144 
1160 typedef int32_t U_CALLCONV
1161 UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex);
1162 
1163 
1181 typedef void U_CALLCONV
1183 
1184 
1194 struct UTextFuncs {
1209  int32_t tableSize;
1210 
1217 
1218 
1226 
1235 
1243 
1251 
1259 
1267 
1275 
1283 
1291 
1297 
1303 
1309 
1310 };
1315 typedef struct UTextFuncs UTextFuncs;
1316 
1328 struct UText {
1341  uint32_t magic;
1342 
1343 
1349  int32_t flags;
1350 
1351 
1358 
1365  int32_t sizeOfStruct;
1366 
1367  /* ------ 16 byte alignment boundary ----------- */
1368 
1369 
1376 
1381  int32_t extraSize;
1382 
1391 
1392  /* ---- 16 byte alignment boundary------ */
1393 
1399 
1405  int32_t chunkOffset;
1406 
1411  int32_t chunkLength;
1412 
1413  /* ---- 16 byte alignment boundary-- */
1414 
1415 
1423 
1429 
1435  void *pExtra;
1436 
1443  const void *context;
1444 
1445  /* --- 16 byte alignment boundary--- */
1446 
1452  const void *p;
1458  const void *q;
1464  const void *r;
1465 
1471  void *privP;
1472 
1473 
1474  /* --- 16 byte alignment boundary--- */
1475 
1476 
1482  int64_t a;
1483 
1489  int32_t b;
1490 
1496  int32_t c;
1497 
1498  /* ---- 16 byte alignment boundary---- */
1499 
1500 
1506  int64_t privA;
1512  int32_t privB;
1518  int32_t privC;
1519 };
1520 
1521 
1538 U_STABLE UText * U_EXPORT2
1539 utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
1540 
1541 // do not use #ifndef U_HIDE_INTERNAL_API around the following!
1547 enum {
1548  UTEXT_MAGIC = 0x345ad82c
1549 };
1550 
1558 #define UTEXT_INITIALIZER { \
1559  UTEXT_MAGIC, /* magic */ \
1560  0, /* flags */ \
1561  0, /* providerProps */ \
1562  sizeof(UText), /* sizeOfStruct */ \
1563  0, /* chunkNativeLimit */ \
1564  0, /* extraSize */ \
1565  0, /* nativeIndexingLimit */ \
1566  0, /* chunkNativeStart */ \
1567  0, /* chunkOffset */ \
1568  0, /* chunkLength */ \
1569  NULL, /* chunkContents */ \
1570  NULL, /* pFuncs */ \
1571  NULL, /* pExtra */ \
1572  NULL, /* context */ \
1573  NULL, NULL, NULL, /* p, q, r */ \
1574  NULL, /* privP */ \
1575  0, 0, 0, /* a, b, c */ \
1576  0, 0, 0 /* privA,B,C, */ \
1577  }
1578 
1579 
1581 
1582 
1583 #if U_SHOW_CPLUSPLUS_API
1584 
1585 U_NAMESPACE_BEGIN
1586 
1597 
1598 U_NAMESPACE_END
1599 
1600 #endif
1601 
1602 
1603 #endif
int32_t UTextExtract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
Function type declaration for UText.extract().
Definition: utext.h:1054
int32_t c
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1496
int64_t utext_nativeLength(UText *ut)
Get the length of the text.
UChar32 utext_previous32(UText *ut)
Move the iterator position to the character (code point) whose index precedes the current position...
UTextClose * spare3
(private) Spare function pointer
Definition: utext.h:1308
int32_t nativeIndexingLimit
(protected) The highest chunk offset where native indexing and chunk (UTF-16) indexing correspond...
Definition: utext.h:1390
int64_t chunkNativeStart
(protected) Native index of the first character in the text chunk.
Definition: utext.h:1398
UBool utext_isWritable(const UText *ut)
Return TRUE if the text can be written (modified) with utext_replace() or utext_copy().
void UTextClose(UText *ut)
Function type declaration for UText.utextClose().
Definition: utext.h:1182
int32_t providerProperties
Text provider properties.
Definition: utext.h:1357
void * pExtra
(protected) Pointer to additional space requested by the text provider during the utext_open operatio...
Definition: utext.h:1435
int64_t a
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1482
UChar32 utext_previous32From(UText *ut, int64_t nativeIndex)
Set the iteration index, and return the code point preceding the one specified by the initial index...
int32_t chunkLength
(protected) Length the text chunk (UTF-16 buffer), in UChars.
Definition: utext.h:1411
C++ API: Unicode String.
void UTextCopy(UText *ut, int64_t nativeStart, int64_t nativeLimit, int64_t nativeDest, UBool move, UErrorCode *status)
Function type declaration for UText.copy().
Definition: utext.h:1123
UTextMapNativeIndexToUTF16 * mapNativeIndexToUTF16
(public) Function pointer for UTextMapNativeIndexToUTF16.
Definition: utext.h:1282
UText * utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status)
Open a read-only UText implementation for UTF-8 strings.
UText * UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status)
Function type declaration for UText.clone().
Definition: utext.h:984
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:870
int32_t reserved1
(private) Alignment padding.
Definition: utext.h:1216
void utext_freeze(UText *ut)
UTextExtract * extract
(public) Function pointer for UTextExtract.
Definition: utext.h:1250
int64_t UTextNativeLength(UText *ut)
Function type declaration for UText.nativeLength().
Definition: utext.h:996
void * privP
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1471
UTextClose * close
(public) Function pointer for UTextClose.
Definition: utext.h:1290
int32_t flags
(private) Flags for managing the allocation and freeing of memory associated with this UText...
Definition: utext.h:1349
int32_t privC
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1518
UTextClone * clone
(public) Function pointer for UTextClone
Definition: utext.h:1225
UTextNativeLength * nativeLength
(public) function pointer for UTextLength May be expensive to compute!
Definition: utext.h:1234
const void * q
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1458
(public) Function dispatch table for UText.
Definition: utext.h:1194
UChar32 utext_next32(UText *ut)
Get the code point at the current iteration position of the UText, and advance the position to the fi...
The provider supports modifying the text via the replace() and copy() functions.
Definition: utext.h:929
UTextAccess * access
(public) Function pointer for UTextAccess.
Definition: utext.h:1242
UChar32 utext_char32At(UText *ut, int64_t nativeIndex)
Returns the code point at the requested index, or U_SENTINEL (-1) if it is out of bounds...
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:84
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:77
Text provider owns the text storage.
Definition: utext.h:943
const void * p
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1452
void utext_copy(UText *ut, int64_t nativeStart, int64_t nativeLimit, int64_t destIndex, UBool move, UErrorCode *status)
Copy or move a substring from one position to another within the text, while retaining any metadata a...
UTextCopy * copy
(public) Function pointer for UTextCopy.
Definition: utext.h:1266
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:550
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:361
UBool utext_equals(const UText *a, const UText *b)
Compare two UText objects for equality.
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
UChar32 utext_current32(UText *ut)
Get the code point at the current iteration position, or U_SENTINEL (-1) if the iteration has reached...
const UChar * chunkContents
(protected) pointer to a chunk of text in UTF-16 format.
Definition: utext.h:1422
int32_t reserved3
Definition: utext.h:1216
UText * utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status)
Open a UText implementation over an ICU CharacterIterator.
int32_t tableSize
(public) Function table size, sizeof(UTextFuncs) Intended for use should the table grow to accommodat...
Definition: utext.h:1209
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:425
int32_t privB
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1512
UBool UTextAccess(UText *ut, int64_t nativeIndex, UBool forward)
Function type declaration for UText.access().
Definition: utext.h:1024
int32_t chunkOffset
(protected) Current iteration position within the text chunk (UTF-16 buffer).
Definition: utext.h:1405
int32_t extraSize
(protected) Size in bytes of the extra space (pExtra).
Definition: utext.h:1381
UChar32 utext_next32From(UText *ut, int64_t nativeIndex)
Set the iteration index and return the code point at that index.
int64_t UTextMapOffsetToNative(const UText *ut)
Function type declaration for UText.mapOffsetToNative().
Definition: utext.h:1143
int64_t utext_getPreviousNativeIndex(UText *ut)
Get the native index of the character preceding the current position.
int32_t utext_extract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
Extract text from a UText into a UChar buffer.
C API: Unicode Properties.
It is potentially time consuming for the provider to determine the length of the text.
Definition: utext.h:915
int32_t UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex)
Function type declaration for UText.mapIndexToUTF16().
Definition: utext.h:1161
void utext_setNativeIndex(UText *ut, int64_t nativeIndex)
Set the current iteration position to the nearest code point boundary at or preceding the specified i...
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:376
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:85
int64_t privA
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1506
There is meta data associated with the text.
Definition: utext.h:935
const void * r
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1464
UBool utext_moveIndex32(UText *ut, int32_t delta)
Move the iterator position by delta code points.
UText * utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status)
Clone a UText.
int64_t chunkNativeLimit
(protected) Native index of the first character position following the current chunk.
Definition: utext.h:1375
int32_t sizeOfStruct
(public) sizeOfStruct=sizeof(UText) Allows possible backward compatible extension.
Definition: utext.h:1365
UTextClose * spare2
(private) Spare function pointer
Definition: utext.h:1302
int32_t b
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1489
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
"Smart pointer" class, closes a UText via utext_close().
int64_t utext_getNativeIndex(const UText *ut)
Get the current iterator position, which can range from 0 to the length of the text.
const UTextFuncs * pFuncs
(public) Pointer to Dispatch table for accessing functions for this UText.
Definition: utext.h:1428
UText * utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status)
Open a writable UText implementation for an ICU Replaceable object.
UTextClose * spare1
(private) Spare function pointer
Definition: utext.h:1296
uint32_t magic
(private) Magic.
Definition: utext.h:1341
UText * utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status)
Open a writable UText for a non-const UnicodeString.
int32_t UTextReplace(UText *ut, int64_t nativeStart, int64_t nativeLimit, const UChar *replacementText, int32_t replacmentLength, UErrorCode *status)
Function type declaration for UText.replace().
Definition: utext.h:1089
UText struct.
Definition: utext.h:1328
UTextReplace * replace
(public) Function pointer for UTextReplace.
Definition: utext.h:1258
Basic definitions for ICU, for both C and C++ APIs.
UBool utext_isLengthExpensive(const UText *ut)
Return TRUE if calculating the length of the text could be expensive.
UText * utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status)
Open a UText for a const UnicodeString.
int32_t utext_replace(UText *ut, int64_t nativeStart, int64_t nativeLimit, const UChar *replacementText, int32_t replacementLength, UErrorCode *status)
Replace a range of the original text with a replacement text.
Text chunks remain valid and usable until the text object is modified or deleted, not just until the ...
Definition: utext.h:922
const void * context
(protected) Pointer to string or text-containing object or similar.
Definition: utext.h:1443
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:294
UText * utext_close(UText *ut)
Close function for UText instances.
C++ API: Character Iterator.
UTextMapOffsetToNative * mapOffsetToNative
(public) Function pointer for UTextMapOffsetToNative.
Definition: utext.h:1274
int32_t reserved2
Definition: utext.h:1216
UText * utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status)
Common function for use by Text Provider implementations to allocate and/or initialize a new UText st...
UBool utext_hasMetaData(const UText *ut)
Test whether there is meta data associated with the text.
UText * utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status)
Open a read-only UText for UChar * string.
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:111
int8_t UBool
The ICU boolean type.
Definition: umachine.h:261
C++ API: Replaceable String.