diff options
Diffstat (limited to 'common/quicktime_win32/TextCommon.h')
| -rw-r--r-- | common/quicktime_win32/TextCommon.h | 942 |
1 files changed, 942 insertions, 0 deletions
diff --git a/common/quicktime_win32/TextCommon.h b/common/quicktime_win32/TextCommon.h new file mode 100644 index 0000000..9998436 --- /dev/null +++ b/common/quicktime_win32/TextCommon.h @@ -0,0 +1,942 @@ +/* + File: TextCommon.h + + Contains: TextEncoding-related types and constants, and prototypes for related functions + + Version: QuickTime 7.3 + + Copyright: (c) 2007 (c) 1995-2002 by Apple Computer, Inc., all rights reserved. + + Bugs?: For bug reports, consult the following page on + the World Wide Web: + + http://developer.apple.com/bugreporter/ + +*/ +#ifndef __TEXTCOMMON__ +#define __TEXTCOMMON__ + +#ifndef __MACTYPES__ +#include <MacTypes.h> +#endif + + + + +#if PRAGMA_ONCE +#pragma once +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#if PRAGMA_IMPORT +#pragma import on +#endif + +#if PRAGMA_STRUCT_ALIGN + #pragma options align=mac68k +#elif PRAGMA_STRUCT_PACKPUSH + #pragma pack(push, 2) +#elif PRAGMA_STRUCT_PACK + #pragma pack(2) +#endif + +/* TextEncodingBase type & values */ +/* (values 0-32 correspond to the Script Codes defined in Inside Macintosh: Text pages 6-52 and 6-53 */ +typedef UInt32 TextEncodingBase; +enum { + /* Mac OS encodings*/ + kTextEncodingMacRoman = 0L, + kTextEncodingMacJapanese = 1, + kTextEncodingMacChineseTrad = 2, + kTextEncodingMacKorean = 3, + kTextEncodingMacArabic = 4, + kTextEncodingMacHebrew = 5, + kTextEncodingMacGreek = 6, + kTextEncodingMacCyrillic = 7, + kTextEncodingMacDevanagari = 9, + kTextEncodingMacGurmukhi = 10, + kTextEncodingMacGujarati = 11, + kTextEncodingMacOriya = 12, + kTextEncodingMacBengali = 13, + kTextEncodingMacTamil = 14, + kTextEncodingMacTelugu = 15, + kTextEncodingMacKannada = 16, + kTextEncodingMacMalayalam = 17, + kTextEncodingMacSinhalese = 18, + kTextEncodingMacBurmese = 19, + kTextEncodingMacKhmer = 20, + kTextEncodingMacThai = 21, + kTextEncodingMacLaotian = 22, + kTextEncodingMacGeorgian = 23, + kTextEncodingMacArmenian = 24, + kTextEncodingMacChineseSimp = 25, + kTextEncodingMacTibetan = 26, + kTextEncodingMacMongolian = 27, + kTextEncodingMacEthiopic = 28, + kTextEncodingMacCentralEurRoman = 29, + kTextEncodingMacVietnamese = 30, + kTextEncodingMacExtArabic = 31, /* The following use script code 0, smRoman*/ + kTextEncodingMacSymbol = 33, + kTextEncodingMacDingbats = 34, + kTextEncodingMacTurkish = 35, + kTextEncodingMacCroatian = 36, + kTextEncodingMacIcelandic = 37, + kTextEncodingMacRomanian = 38, + kTextEncodingMacCeltic = 39, + kTextEncodingMacGaelic = 40, + kTextEncodingMacKeyboardGlyphs = 41 +}; + +/* The following are older names for backward compatibility*/ +enum { + kTextEncodingMacTradChinese = kTextEncodingMacChineseTrad, + kTextEncodingMacRSymbol = 8, + kTextEncodingMacSimpChinese = kTextEncodingMacChineseSimp, + kTextEncodingMacGeez = kTextEncodingMacEthiopic, + kTextEncodingMacEastEurRoman = kTextEncodingMacCentralEurRoman, + kTextEncodingMacUninterp = 32 +}; + + +/* + Beginning in Mac OS 8.5, the following meta-value is used to indicate Unicode in some parts + of the Mac OS which previously only expected a Mac OS script code. In some of these places, + only 7 bits are available to indicate encoding (script code), so kTextEncodingUnicodeDefault + cannot be used. For example, kTextEncodingMacUnicode can be used to indicate Unicode in the + 7-bit script code field of a Unicode input method's ComponentDescription.componentFlags field; + it can also be used to indicate Unicode in the 16-bit script code field of an AppleEvent's + typeIntlWritingCode text tag. +*/ +enum { + kTextEncodingMacUnicode = 0x7E /* Meta-value, Unicode as a Mac encoding*/ +}; + +/* Variant Mac OS encodings that use script codes other than 0*/ +enum { + /* The following use script code 4, smArabic*/ + kTextEncodingMacFarsi = 0x8C, /* Like MacArabic but uses Farsi digits*/ + /* The following use script code 7, smCyrillic*/ + kTextEncodingMacUkrainian = 0x98, /* Meta-value in TEC 1.5 & later; maps to kTextEncodingMacCyrillic variant */ + /* The following use script code 28, smEthiopic*/ + kTextEncodingMacInuit = 0xEC, /* The following use script code 32, smUnimplemented*/ + kTextEncodingMacVT100 = 0xFC /* VT100/102 font from Comm Toolbox: Latin-1 repertoire + box drawing etc*/ +}; + +/* Special Mac OS encodings*/ +enum { + kTextEncodingMacHFS = 0xFF /* Meta-value, should never appear in a table.*/ +}; + +/* Unicode & ISO UCS encodings begin at 0x100*/ +enum { + kTextEncodingUnicodeDefault = 0x0100, /* Meta-value, should never appear in a table.*/ + kTextEncodingUnicodeV1_1 = 0x0101, + kTextEncodingISO10646_1993 = 0x0101, /* Code points identical to Unicode 1.1*/ + kTextEncodingUnicodeV2_0 = 0x0103, /* New location for Korean Hangul*/ + kTextEncodingUnicodeV2_1 = 0x0103, /* We treat both Unicode 2.0 and Unicode 2.1 as 2.1*/ + kTextEncodingUnicodeV3_0 = 0x0104, + kTextEncodingUnicodeV3_1 = 0x0105, /* Adds characters requiring surrogate pairs in UTF-16*/ + kTextEncodingUnicodeV3_2 = 0x0106 +}; + +/* ISO 8-bit and 7-bit encodings begin at 0x200*/ +enum { + kTextEncodingISOLatin1 = 0x0201, /* ISO 8859-1*/ + kTextEncodingISOLatin2 = 0x0202, /* ISO 8859-2*/ + kTextEncodingISOLatin3 = 0x0203, /* ISO 8859-3*/ + kTextEncodingISOLatin4 = 0x0204, /* ISO 8859-4*/ + kTextEncodingISOLatinCyrillic = 0x0205, /* ISO 8859-5*/ + kTextEncodingISOLatinArabic = 0x0206, /* ISO 8859-6, = ASMO 708, =DOS CP 708*/ + kTextEncodingISOLatinGreek = 0x0207, /* ISO 8859-7*/ + kTextEncodingISOLatinHebrew = 0x0208, /* ISO 8859-8*/ + kTextEncodingISOLatin5 = 0x0209, /* ISO 8859-9*/ + kTextEncodingISOLatin6 = 0x020A, /* ISO 8859-10 */ + kTextEncodingISOLatin7 = 0x020D, /* ISO 8859-13, Baltic Rim */ + kTextEncodingISOLatin8 = 0x020E, /* ISO 8859-14, Celtic */ + kTextEncodingISOLatin9 = 0x020F /* ISO 8859-15, 8859-1 changed for EURO & CP1252 letters */ +}; + +/* MS-DOS & Windows encodings begin at 0x400*/ +enum { + kTextEncodingDOSLatinUS = 0x0400, /* code page 437*/ + kTextEncodingDOSGreek = 0x0405, /* code page 737 (formerly code page 437G)*/ + kTextEncodingDOSBalticRim = 0x0406, /* code page 775*/ + kTextEncodingDOSLatin1 = 0x0410, /* code page 850, "Multilingual"*/ + kTextEncodingDOSGreek1 = 0x0411, /* code page 851*/ + kTextEncodingDOSLatin2 = 0x0412, /* code page 852, Slavic*/ + kTextEncodingDOSCyrillic = 0x0413, /* code page 855, IBM Cyrillic*/ + kTextEncodingDOSTurkish = 0x0414, /* code page 857, IBM Turkish*/ + kTextEncodingDOSPortuguese = 0x0415, /* code page 860*/ + kTextEncodingDOSIcelandic = 0x0416, /* code page 861*/ + kTextEncodingDOSHebrew = 0x0417, /* code page 862*/ + kTextEncodingDOSCanadianFrench = 0x0418, /* code page 863*/ + kTextEncodingDOSArabic = 0x0419, /* code page 864*/ + kTextEncodingDOSNordic = 0x041A, /* code page 865*/ + kTextEncodingDOSRussian = 0x041B, /* code page 866*/ + kTextEncodingDOSGreek2 = 0x041C, /* code page 869, IBM Modern Greek*/ + kTextEncodingDOSThai = 0x041D, /* code page 874, also for Windows*/ + kTextEncodingDOSJapanese = 0x0420, /* code page 932, also for Windows; Shift-JIS with additions*/ + kTextEncodingDOSChineseSimplif = 0x0421, /* code page 936, also for Windows; was EUC-CN, now GBK (EUC-CN extended)*/ + kTextEncodingDOSKorean = 0x0422, /* code page 949, also for Windows; Unified Hangul Code (EUC-KR extended)*/ + kTextEncodingDOSChineseTrad = 0x0423, /* code page 950, also for Windows; Big-5*/ + kTextEncodingWindowsLatin1 = 0x0500, /* code page 1252*/ + kTextEncodingWindowsANSI = 0x0500, /* code page 1252 (alternate name)*/ + kTextEncodingWindowsLatin2 = 0x0501, /* code page 1250, Central Europe*/ + kTextEncodingWindowsCyrillic = 0x0502, /* code page 1251, Slavic Cyrillic*/ + kTextEncodingWindowsGreek = 0x0503, /* code page 1253*/ + kTextEncodingWindowsLatin5 = 0x0504, /* code page 1254, Turkish*/ + kTextEncodingWindowsHebrew = 0x0505, /* code page 1255*/ + kTextEncodingWindowsArabic = 0x0506, /* code page 1256*/ + kTextEncodingWindowsBalticRim = 0x0507, /* code page 1257*/ + kTextEncodingWindowsVietnamese = 0x0508, /* code page 1258*/ + kTextEncodingWindowsKoreanJohab = 0x0510 /* code page 1361, for Windows NT*/ +}; + +/* Various national standards begin at 0x600*/ +enum { + kTextEncodingUS_ASCII = 0x0600, + kTextEncodingJIS_X0201_76 = 0x0620, /* JIS Roman and 1-byte katakana (halfwidth)*/ + kTextEncodingJIS_X0208_83 = 0x0621, + kTextEncodingJIS_X0208_90 = 0x0622, + kTextEncodingJIS_X0212_90 = 0x0623, + kTextEncodingJIS_C6226_78 = 0x0624, + kTextEncodingShiftJIS_X0213_00 = 0x0628, /* Shift-JIS format encoding of JIS X0213 planes 1 and 2*/ + kTextEncodingGB_2312_80 = 0x0630, + kTextEncodingGBK_95 = 0x0631, /* annex to GB 13000-93; for Windows 95; EUC-CN extended*/ + kTextEncodingGB_18030_2000 = 0x0632, + kTextEncodingKSC_5601_87 = 0x0640, /* same as KSC 5601-92 without Johab annex*/ + kTextEncodingKSC_5601_92_Johab = 0x0641, /* KSC 5601-92 Johab annex*/ + kTextEncodingCNS_11643_92_P1 = 0x0651, /* CNS 11643-1992 plane 1*/ + kTextEncodingCNS_11643_92_P2 = 0x0652, /* CNS 11643-1992 plane 2*/ + kTextEncodingCNS_11643_92_P3 = 0x0653 /* CNS 11643-1992 plane 3 (was plane 14 in 1986 version)*/ +}; + +/* ISO 2022 collections begin at 0x800*/ +enum { + kTextEncodingISO_2022_JP = 0x0820, /* RFC 1468*/ + kTextEncodingISO_2022_JP_2 = 0x0821, /* RFC 1554*/ + kTextEncodingISO_2022_JP_1 = 0x0822, /* RFC 2237*/ + kTextEncodingISO_2022_JP_3 = 0x0823, /* JIS X0213*/ + kTextEncodingISO_2022_CN = 0x0830, + kTextEncodingISO_2022_CN_EXT = 0x0831, + kTextEncodingISO_2022_KR = 0x0840 +}; + +/* EUC collections begin at 0x900*/ +enum { + kTextEncodingEUC_JP = 0x0920, /* ISO 646, 1-byte katakana, JIS 208, JIS 212*/ + kTextEncodingEUC_CN = 0x0930, /* ISO 646, GB 2312-80*/ + kTextEncodingEUC_TW = 0x0931, /* ISO 646, CNS 11643-1992 Planes 1-16*/ + kTextEncodingEUC_KR = 0x0940 /* ISO 646, KS C 5601-1987*/ +}; + +/* Misc standards begin at 0xA00*/ +enum { + kTextEncodingShiftJIS = 0x0A01, /* plain Shift-JIS*/ + kTextEncodingKOI8_R = 0x0A02, /* Russian internet standard*/ + kTextEncodingBig5 = 0x0A03, /* Big-5 (has variants)*/ + kTextEncodingMacRomanLatin1 = 0x0A04, /* Mac OS Roman permuted to align with ISO Latin-1*/ + kTextEncodingHZ_GB_2312 = 0x0A05, /* HZ (RFC 1842, for Chinese mail & news)*/ + kTextEncodingBig5_HKSCS_1999 = 0x0A06 /* Big-5 with Hong Kong special char set supplement*/ +}; + +/* Other platform encodings*/ +enum { + kTextEncodingNextStepLatin = 0x0B01, /* NextStep Latin encoding*/ + kTextEncodingNextStepJapanese = 0x0B02 /* NextStep Japanese encoding (variant of EUC-JP)*/ +}; + +/* EBCDIC & IBM host encodings begin at 0xC00*/ +enum { + kTextEncodingEBCDIC_US = 0x0C01, /* basic EBCDIC-US*/ + kTextEncodingEBCDIC_CP037 = 0x0C02 /* code page 037, extended EBCDIC (Latin-1 set) for US,Canada...*/ +}; + +/* Special values*/ +enum { + kTextEncodingMultiRun = 0x0FFF, /* Multi-encoding text with external run info*/ + kTextEncodingUnknown = 0xFFFF /* Unknown or unspecified */ +}; + + +/* TextEncodingVariant type & values */ +typedef UInt32 TextEncodingVariant; +/* Default TextEncodingVariant, for any TextEncodingBase*/ +enum { + kTextEncodingDefaultVariant = 0 +}; + +/* Variants of kTextEncodingMacRoman */ +enum { + kMacRomanDefaultVariant = 0, /* meta value, maps to 1 or 2 depending on System */ + kMacRomanCurrencySignVariant = 1, /* Mac OS version < 8.5, 0xDB is CURRENCY SIGN*/ + kMacRomanEuroSignVariant = 2 /* Mac OS version >= 8.5, 0xDB is EURO SIGN */ +}; + +/* Variants of kTextEncodingMacCyrillic (for TEC 1.5 and later) */ +enum { + kMacCyrillicDefaultVariant = 0, /* meta value, maps to 1, 2, or 3 depending on System*/ + kMacCyrillicCurrSignStdVariant = 1, /* Mac OS < 9.0 (RU,BG), 0xFF = CURRENCY SIGN, 0xA2/0xB6 = CENT / PARTIAL DIFF.*/ + kMacCyrillicCurrSignUkrVariant = 2, /* Mac OS < 9.0 (UA,LangKit), 0xFF = CURRENCY SIGN, 0xA2/0xB6 = GHE WITH UPTURN*/ + kMacCyrillicEuroSignVariant = 3 /* Mac OS >= 9.0, 0xFF is EURO SIGN, 0xA2/0xB6 = GHE WITH UPTURN*/ +}; + +/* Variants of kTextEncodingMacIcelandic */ +enum { + kMacIcelandicStdDefaultVariant = 0, /* meta value, maps to 2 or 4 depending on System */ + kMacIcelandicTTDefaultVariant = 1, /* meta value, maps to 3 or 5 depending on System */ + /* The following are for Mac OS version < 8.5, 0xDB is CURRENCY SIGN */ + kMacIcelandicStdCurrSignVariant = 2, /* 0xBB/0xBC are fem./masc. ordinal indicators*/ + kMacIcelandicTTCurrSignVariant = 3, /* 0xBB/0xBC are fi/fl ligatures*/ + /* The following are for Mac OS version >= 8.5, 0xDB is EURO SIGN */ + kMacIcelandicStdEuroSignVariant = 4, /* 0xBB/0xBC are fem./masc. ordinal indicators*/ + kMacIcelandicTTEuroSignVariant = 5 /* 0xBB/0xBC are fi/fl ligatures*/ +}; + +/* Variants of kTextEncodingMacCroatian */ +enum { + kMacCroatianDefaultVariant = 0, /* meta value, maps to 1 or 2 depending on System */ + kMacCroatianCurrencySignVariant = 1, /* Mac OS version < 8.5, 0xDB is CURRENCY SIGN */ + kMacCroatianEuroSignVariant = 2 /* Mac OS version >= 8.5, 0xDB is EURO SIGN */ +}; + + +/* Variants of kTextEncodingMacRomanian */ +enum { + kMacRomanianDefaultVariant = 0, /* meta value, maps to 1 or 2 depending on System */ + kMacRomanianCurrencySignVariant = 1, /* Mac OS version < 8.5, 0xDB is CURRENCY SIGN */ + kMacRomanianEuroSignVariant = 2 /* Mac OS version >= 8.5, 0xDB is EURO SIGN */ +}; + + +/* Variants of kTextEncodingMacJapanese*/ +enum { + kMacJapaneseStandardVariant = 0, + kMacJapaneseStdNoVerticalsVariant = 1, + kMacJapaneseBasicVariant = 2, + kMacJapanesePostScriptScrnVariant = 3, + kMacJapanesePostScriptPrintVariant = 4, + kMacJapaneseVertAtKuPlusTenVariant = 5 +}; + +/* Variants of kTextEncodingMacArabic*/ +enum { + kMacArabicStandardVariant = 0, /* 0xC0 is 8-spoke asterisk, 0x2A & 0xAA are asterisk (e.g. Cairo)*/ + kMacArabicTrueTypeVariant = 1, /* 0xC0 is asterisk, 0x2A & 0xAA are multiply signs (e.g. Baghdad)*/ + kMacArabicThuluthVariant = 2, /* 0xC0 is Arabic five-point star, 0x2A & 0xAA are multiply signs*/ + kMacArabicAlBayanVariant = 3 /* 8-spoke asterisk, multiply sign, Koranic ligatures & parens*/ +}; + +/* Variants of kTextEncodingMacFarsi*/ +enum { + kMacFarsiStandardVariant = 0, /* 0xC0 is 8-spoke asterisk, 0x2A & 0xAA are asterisk (e.g. Tehran)*/ + kMacFarsiTrueTypeVariant = 1 /* asterisk, multiply signs, Koranic ligatures, geometric shapes*/ +}; + +/* Variants of kTextEncodingMacHebrew*/ +enum { + kMacHebrewStandardVariant = 0, + kMacHebrewFigureSpaceVariant = 1 +}; + +/* Variants of kTextEncodingMacVT100 */ +enum { + kMacVT100DefaultVariant = 0, /* meta value, maps to 1 or 2 depending on System */ + kMacVT100CurrencySignVariant = 1, /* Mac OS version < 8.5, 0xDB is CURRENCY SIGN */ + kMacVT100EuroSignVariant = 2 /* Mac OS version >= 8.5, 0xDB is EURO SIGN */ +}; + +/* Variants of Unicode & ISO 10646 encodings*/ +enum { + kUnicodeNoSubset = 0, + kUnicodeCanonicalDecompVariant = 2, /* canonical decomposition (NFD); excludes composed characters*/ + kUnicodeCanonicalCompVariant = 3, /* canonical composition (NFC); uses the composed chars as of Unicode 3.1*/ + kUnicodeHFSPlusDecompVariant = 8, /* decomposition for HFS+; doesn't decompose in 2000-2FFF, F900-FAFF, 2F800-2FAFF*/ + kUnicodeHFSPlusCompVariant = 9 /* composition based on HFS+ decomposition*/ +}; + +/* Variants of Big-5 encoding*/ +enum { + kBig5_BasicVariant = 0, + kBig5_StandardVariant = 1, /* 0xC6A1-0xC7FC: kana, Cyrillic, enclosed numerics*/ + kBig5_ETenVariant = 2 /* adds kana, Cyrillic, radicals, etc with hi bytes C6-C8,F9*/ +}; + +/* Variants of MacRomanLatin1 */ +enum { + kMacRomanLatin1DefaultVariant = 0, /* meta value, maps to others depending on System*/ + kMacRomanLatin1StandardVariant = 2, /* permuted MacRoman, EuroSignVariant*/ + kMacRomanLatin1TurkishVariant = 6, /* permuted MacTurkish*/ + kMacRomanLatin1CroatianVariant = 8, /* permuted MacCroatian, EuroSignVariant*/ + kMacRomanLatin1IcelandicVariant = 11, /* permuted MacIcelandic, StdEuroSignVariant*/ + kMacRomanLatin1RomanianVariant = 14 /* permuted MacRomanian, EuroSignVariant*/ +}; + +/* Unicode variants not yet supported (and not fully defined)*/ +enum { + kUnicodeNoCompatibilityVariant = 1, + kUnicodeNoCorporateVariant = 4 +}; + +/* The following are older names for backward compatibility*/ +enum { + kMacRomanStandardVariant = 0, + kMacIcelandicStandardVariant = 0, + kMacIcelandicTrueTypeVariant = 1, + kJapaneseStandardVariant = 0, + kJapaneseStdNoVerticalsVariant = 1, + kJapaneseBasicVariant = 2, + kJapanesePostScriptScrnVariant = 3, + kJapanesePostScriptPrintVariant = 4, + kJapaneseVertAtKuPlusTenVariant = 5, /* kJapaneseStdNoOneByteKanaVariant = 6, // replaced by kJapaneseNoOneByteKanaOption*/ + /* kJapaneseBasicNoOneByteKanaVariant = 7, // replaced by kJapaneseNoOneByteKanaOption */ + kHebrewStandardVariant = 0, + kHebrewFigureSpaceVariant = 1, + kUnicodeMaxDecomposedVariant = 2, /* replaced by kUnicodeCanonicalDecompVariant*/ + kUnicodeNoComposedVariant = 3, /* this really meant NoComposing; replaced by kUnicodeCanonicalCompVariant*/ + /* The following Japanese variant options were never supported and are now deprecated.*/ + /* In TEC 1.4 and later their functionality is replaced by the Unicode Converter options listed.*/ + kJapaneseNoOneByteKanaOption = 0x20, /* replaced by UnicodeConverter option kUnicodeNoHalfwidthCharsBit*/ + kJapaneseUseAsciiBackslashOption = 0x40 /* replaced by UnicodeConverter option kUnicodeForceASCIIRangeBit*/ +}; + +/* TextEncodingFormat type & values */ +typedef UInt32 TextEncodingFormat; +enum { + /* Default TextEncodingFormat for any TextEncodingBase*/ + kTextEncodingDefaultFormat = 0, /* Formats for Unicode & ISO 10646*/ + kUnicode16BitFormat = 0, + kUnicodeUTF7Format = 1, + kUnicodeUTF8Format = 2, + kUnicode32BitFormat = 3, /* New constants since 10.3?*/ + kUnicodeUTF16Format = 0, /* UTF16 form (16-bit units), native or external byte order (see below)*/ + kUnicodeUTF32Format = 3, /* UTF32 form (32-bit units), native or external byte order (see below)*/ + kUnicodeUTF16BEFormat = 4, /* UTF16 form, explicit big-endian byte order, no BOM*/ + kUnicodeUTF16LEFormat = 5, /* UTF16 form, explicit little-endian byte order, no BOM*/ + kUnicodeUTF32BEFormat = 6, /* UTF32 form, explicit big-endian byte order, no BOM*/ + kUnicodeUTF32LEFormat = 7, /* UTF32 form, explicit little-endian byte order, no BOM*/ + kUnicodeSCSUFormat = 8 /* Std. Compression Scheme for Unicode, Unicode Tech Std. #6*/ +}; + +/* + Note for kUnicodeUTF16Format and kUnicodeUTF32Format: + - An array of UTF16Char (UniChar) or UTF32Char is normally understood to use "internal" or + platform-native byte ordering for kUnicodeUTF16Format and kUnicodeUTF32Format; the array MAY + begin with byte-order mark (BOM), but the BOM should match the internal ordering. + - If an array of bytes (such as char *) that can be in various encodings is specified to be + in Unicode with kUnicodeUTF16Format or kUnicodeUTF32Format (not explicitly BE or LE), then it + is assumed to use "external" byte ordering, which means: If there is a BOM at the beginning + of text, the BOM specifies the byte ordering, otherwise big-endian is assumed. + Synonyms for some Unicode formats +*/ +/* TextEncoding type */ +typedef UInt32 TextEncoding; +/* name part selector for GetTextEncodingName*/ +typedef UInt32 TextEncodingNameSelector; +enum { + kTextEncodingFullName = 0, + kTextEncodingBaseName = 1, + kTextEncodingVariantName = 2, + kTextEncodingFormatName = 3 +}; + +/* Types used in conversion */ +struct TextEncodingRun { + ByteOffset offset; + TextEncoding textEncoding; +}; +typedef struct TextEncodingRun TextEncodingRun; +typedef TextEncodingRun * TextEncodingRunPtr; +typedef const TextEncodingRun * ConstTextEncodingRunPtr; +struct ScriptCodeRun { + ByteOffset offset; + ScriptCode script; +}; +typedef struct ScriptCodeRun ScriptCodeRun; +typedef ScriptCodeRun * ScriptCodeRunPtr; +typedef const ScriptCodeRun * ConstScriptCodeRunPtr; +typedef UInt8 * TextPtr; +typedef const UInt8 * ConstTextPtr; +/* Basic types for Unicode characters and strings:*/ +typedef UniChar * UniCharArrayPtr; +typedef const UniChar * ConstUniCharArrayPtr; +/* + UniCharArrayHandle is a handle type to correspond to UniCharArrayPtr, + i.e. a handle to an array of UniChars (UInt16s). +*/ +typedef UniCharArrayPtr * UniCharArrayHandle; +/* + UniCharArrayOffset is used to indicate an edge offset in an array + of UniChars (UInt16s). +*/ +typedef UInt32 UniCharArrayOffset; +/* enums for TextEncoding Conversion routines*/ +enum { + kTextScriptDontCare = -128, + kTextLanguageDontCare = -128, + kTextRegionDontCare = -128 +}; + +/* struct for TECGetInfo*/ + +struct TECInfo { + UInt16 format; /* format code for this struct*/ + UInt16 tecVersion; /* TEC version in BCD, e.g. 0x0121 for 1.2.1*/ + UInt32 tecTextConverterFeatures; /* bitmask indicating TEC features/fixes*/ + UInt32 tecUnicodeConverterFeatures; /* bitmask indicating UnicodeConverter features/fixes*/ + UInt32 tecTextCommonFeatures; /* bitmask indicating TextCommon features/fixes*/ + Str31 tecTextEncodingsFolderName; /* localized name of Text Encodings folder (pascal string)*/ + Str31 tecExtensionFileName; /* localized name of TEC extension (pascal string)*/ + UInt16 tecLowestTEFileVersion; /* Lowest version (BCD) of all files in Text Encodings folder*/ + UInt16 tecHighestTEFileVersion; /* Highest version (BCD) of all files in Text Encodings folder*/ +}; +typedef struct TECInfo TECInfo; +typedef TECInfo * TECInfoPtr; +typedef TECInfoPtr * TECInfoHandle; +/* Value for TECInfo format code*/ +enum { + kTECInfoCurrentFormat = 2 /* any future formats will just add fields at the end*/ +}; + +/* + Defined feature/fix bits for tecUnicodeConverterFeatures field + Bit: Meaning if set: + ---- --------------- + kTECKeepInfoFixBit Unicode Converter no longer ignores other control flags if + kUnicodeKeepInfoBit is set. Bug fix in TEC Manager 1.2.1. + kTECFallbackTextLengthFixBit Unicode Converter honors the *srcConvLen and *destConvLen + returned by caller-supplied fallback handler for any status it + returns except for kTECUnmappableElementErr (previously it only + honored these values if noErr was returned). Bug fix in TEC + Manager 1.2.1. + kTECTextRunBitClearFixBit ConvertFromUnicodeToTextRun & ConvertFromUnicodeToScriptCodeRun + function correctly if the kUnicodeTextRunBit is set (previously + their determination of best target encoding was incorrect). Bug + fix in TEC Manager 1.3. + kTECTextToUnicodeScanFixBit ConvertFromTextToUnicode uses an improved scanner and maintains + some resulting state information, which it uses for mapping. + This has several effects: + - Improved mapping of 0x30-0x39 digits in Mac OS Arabic, fewer + direction overrides when mapping Mac OS Arabic & Hebrew, and + improved mapping of certain characters in Indic encodings. + - Malformed input produces kTextMalformedInputErr. + - ConvertFromTextToUnicode accepts and uses the control flags + kUnicodeKeepInfoMask and kUnicodeStringUnterminatedMask. + Bug fix and enhancement in TEC Manager 1.3. + kTECAddForceASCIIChangesBit Define new control flag bits kUnicodeForceASCIIRangeBit and + kUnicodeNoHalfwidthCharsBit for use with + ConvertFromTextToUnicode, ConvertFromUnicodeToText, etc. + Enhancement in TEC Manager 1.4. + kTECPreferredEncodingFixBit CreateUnicodeToTextRunInfo and related functions fix a problem + that occurred when a preferred encoding was specified that did + not match the System script; the preferred script was not + actually placed first in the ordered list of encodings to use. + Bug fix in TEC Manager 1.4. + kTECAddTextRunHeuristicsBit Define new control flag bit kUnicodeTextRunHeuristicsBit for + use with ConvertFromUnicodeToTextRun. + kTECAddFallbackInterruptBit Define new option kUnicodeFallbackInterruptSafeMask for use + with SetFallbackUnicodeToText. If a client fallback handler is + installed without specifying this bit, ConvertFromUnicodeToText + will HLock the tables it uses (in case the fallback handler + moves memory); otherwise, it won't. +*/ + +enum { + kTECKeepInfoFixBit = 0, + kTECFallbackTextLengthFixBit = 1, + kTECTextRunBitClearFixBit = 2, + kTECTextToUnicodeScanFixBit = 3, + kTECAddForceASCIIChangesBit = 4, + kTECPreferredEncodingFixBit = 5, + kTECAddTextRunHeuristicsBit = 6, + kTECAddFallbackInterruptBit = 7 +}; + +enum { + kTECKeepInfoFixMask = 1L << kTECKeepInfoFixBit, + kTECFallbackTextLengthFixMask = 1L << kTECFallbackTextLengthFixBit, + kTECTextRunBitClearFixMask = 1L << kTECTextRunBitClearFixBit, + kTECTextToUnicodeScanFixMask = 1L << kTECTextToUnicodeScanFixBit, + kTECAddForceASCIIChangesMask = 1L << kTECAddForceASCIIChangesBit, + kTECPreferredEncodingFixMask = 1L << kTECPreferredEncodingFixBit, + kTECAddTextRunHeuristicsMask = 1L << kTECAddTextRunHeuristicsBit, + kTECAddFallbackInterruptMask = 1L << kTECAddFallbackInterruptBit +}; + +/* + ------------------------------------------------------------------------------------------------- + CONSTANTS for common and special Unicode code values + ------------------------------------------------------------------------------------------------- +*/ + +enum { + kUnicodeByteOrderMark = 0xFEFF, + kUnicodeObjectReplacement = 0xFFFC, /* placeholder for non-text object*/ + kUnicodeReplacementChar = 0xFFFD, /* Unicode replacement for unconvertable input char*/ + kUnicodeSwappedByteOrderMark = 0xFFFE, /* not a Unicode char; byte-swapped version of FEFF*/ + kUnicodeNotAChar = 0xFFFF /* not a Unicode char; may be used as a terminator*/ +}; + +/* + ------------------------------------------------------------------------------------------------- + CONSTANTS & DATA STRUCTURES for Unicode Properties + ------------------------------------------------------------------------------------------------- +*/ +typedef SInt32 UCCharPropertyType; +enum { + kUCCharPropTypeGenlCategory = 1, /* requests enumeration value*/ + kUCCharPropTypeCombiningClass = 2, /* requests numeric value 0..255*/ + kUCCharPropTypeBidiCategory = 3 /* requests enumeration value*/ +}; + +typedef UInt32 UCCharPropertyValue; +/* General Category enumeration values (requested by kUCCharPropTypeGenlCategory)*/ +enum { + /* Normative categories:*/ + kUCGenlCatOtherNotAssigned = 0, /* Cn Other, Not Assigned*/ + kUCGenlCatOtherControl = 1, /* Cc Other, Control*/ + kUCGenlCatOtherFormat = 2, /* Cf Other, Format*/ + kUCGenlCatOtherSurrogate = 3, /* Cs Other, Surrogate*/ + kUCGenlCatOtherPrivateUse = 4, /* Co Other, Private Use*/ + kUCGenlCatMarkNonSpacing = 5, /* Mn Mark, Non-Spacing*/ + kUCGenlCatMarkSpacingCombining = 6, /* Mc Mark, Spacing Combining*/ + kUCGenlCatMarkEnclosing = 7, /* Me Mark, Enclosing*/ + kUCGenlCatNumberDecimalDigit = 8, /* Nd Number, Decimal Digit*/ + kUCGenlCatNumberLetter = 9, /* Nl Number, Letter*/ + kUCGenlCatNumberOther = 10, /* No Number, Other*/ + kUCGenlCatSeparatorSpace = 11, /* Zs Separator, Space*/ + kUCGenlCatSeparatorLine = 12, /* Zl Separator, Line*/ + kUCGenlCatSeparatorParagraph = 13, /* Zp Separator, Paragraph*/ + kUCGenlCatLetterUppercase = 14, /* Lu Letter, Uppercase*/ + kUCGenlCatLetterLowercase = 15, /* Ll Letter, Lowercase*/ + kUCGenlCatLetterTitlecase = 16, /* Lt Letter, Titlecase*/ + /* Informative categories:*/ + kUCGenlCatLetterModifier = 17, /* Lm Letter, Modifier*/ + kUCGenlCatLetterOther = 18, /* Lo Letter, Other*/ + kUCGenlCatPunctConnector = 20, /* Pc Punctuation, Connector*/ + kUCGenlCatPunctDash = 21, /* Pd Punctuation, Dash*/ + kUCGenlCatPunctOpen = 22, /* Ps Punctuation, Open*/ + kUCGenlCatPunctClose = 23, /* Pe Punctuation, Close*/ + kUCGenlCatPunctInitialQuote = 24, /* Pi Punctuation, Initial quote*/ + kUCGenlCatPunctFinalQuote = 25, /* Pf Punctuation, Final quote*/ + kUCGenlCatPunctOther = 26, /* Po Punctuation, Other*/ + kUCGenlCatSymbolMath = 28, /* Sm Symbol, Math*/ + kUCGenlCatSymbolCurrency = 29, /* Sc Symbol, Currency*/ + kUCGenlCatSymbolModifier = 30, /* Sk Symbol, Modifier*/ + kUCGenlCatSymbolOther = 31 /* So Symbol, Other*/ +}; + +/* Bidirectional Category enumeration values (requested by kUCCharPropTypeBidiCategory)*/ +enum { + kUCBidiCatNotApplicable = 0, /* for now use this for unassigned*/ + /* Strong types:*/ + kUCBidiCatLeftRight = 1, /* L Left-to-Right*/ + kUCBidiCatRightLeft = 2, /* R Right-to-Left*/ + /* Weak types:*/ + kUCBidiCatEuroNumber = 3, /* EN European Number*/ + kUCBidiCatEuroNumberSeparator = 4, /* ES European Number Separator*/ + kUCBidiCatEuroNumberTerminator = 5, /* ET European Number Terminator*/ + kUCBidiCatArabicNumber = 6, /* AN Arabic Number*/ + kUCBidiCatCommonNumberSeparator = 7, /* CS Common Number Separator*/ + /* Separators:*/ + kUCBidiCatBlockSeparator = 8, /* B Paragraph Separator (was Block Separator)*/ + kUCBidiCatSegmentSeparator = 9, /* S Segment Separator*/ + /* Neutrals:*/ + kUCBidiCatWhitespace = 10, /* WS Whitespace*/ + kUCBidiCatOtherNeutral = 11, /* ON Other Neutrals (unassigned codes could use this)*/ + /* New categories for Unicode 3.0*/ + kUCBidiCatRightLeftArabic = 12, /* AL Right-to-Left Arabic (was Arabic Letter)*/ + kUCBidiCatLeftRightEmbedding = 13, /* LRE Left-to-Right Embedding*/ + kUCBidiCatRightLeftEmbedding = 14, /* RLE Right-to-Left Embedding*/ + kUCBidiCatLeftRightOverride = 15, /* LRO Left-to-Right Override*/ + kUCBidiCatRightLeftOverride = 16, /* RLO Right-to-Left Override*/ + kUCBidiCatPopDirectionalFormat = 17, /* PDF Pop Directional Format*/ + kUCBidiCatNonSpacingMark = 18, /* NSM Non-Spacing Mark*/ + kUCBidiCatBoundaryNeutral = 19 /* BN Boundary Neutral*/ +}; + +/* + ------------------------------------------------------------------------------------------------- + Prototypes for TextEncoding functions + ------------------------------------------------------------------------------------------------- +*/ + + +/* + * CreateTextEncoding() + * + * Availability: + * Non-Carbon CFM: in TextCommon 1.1 and later + * CarbonLib: in CarbonLib 1.0 and later + * Mac OS X: in version 10.0 and later + */ +EXTERN_API( TextEncoding ) +CreateTextEncoding( + TextEncodingBase encodingBase, + TextEncodingVariant encodingVariant, + TextEncodingFormat encodingFormat); + + +/* + * GetTextEncodingBase() + * + * Availability: + * Non-Carbon CFM: in TextCommon 1.1 and later + * CarbonLib: in CarbonLib 1.0 and later + * Mac OS X: in version 10.0 and later + */ +EXTERN_API( TextEncodingBase ) +GetTextEncodingBase(TextEncoding encoding); + + +/* + * GetTextEncodingVariant() + * + * Availability: + * Non-Carbon CFM: in TextCommon 1.1 and later + * CarbonLib: in CarbonLib 1.0 and later + * Mac OS X: in version 10.0 and later + */ +EXTERN_API( TextEncodingVariant ) +GetTextEncodingVariant(TextEncoding encoding); + + +/* + * GetTextEncodingFormat() + * + * Availability: + * Non-Carbon CFM: in TextCommon 1.1 and later + * CarbonLib: in CarbonLib 1.0 and later + * Mac OS X: in version 10.0 and later + */ +EXTERN_API( TextEncodingFormat ) +GetTextEncodingFormat(TextEncoding encoding); + + +/* + * ResolveDefaultTextEncoding() + * + * Availability: + * Non-Carbon CFM: in TextCommon 1.1 and later + * CarbonLib: in CarbonLib 1.0 and later + * Mac OS X: in version 10.0 and later + */ +EXTERN_API( TextEncoding ) +ResolveDefaultTextEncoding(TextEncoding encoding); + + +/* + * GetTextEncodingName() + * + * Availability: + * Non-Carbon CFM: in TextCommon 1.1 and later + * CarbonLib: in CarbonLib 1.0 and later + * Mac OS X: in version 10.0 and later + */ +EXTERN_API_C( OSStatus ) +GetTextEncodingName( + TextEncoding iEncoding, + TextEncodingNameSelector iNamePartSelector, + RegionCode iPreferredRegion, + TextEncoding iPreferredEncoding, + ByteCount iOutputBufLen, + ByteCount * oNameLength, + RegionCode * oActualRegion, /* can be NULL */ + TextEncoding * oActualEncoding, /* can be NULL */ + TextPtr oEncodingName); + + +/* + * TECGetInfo() + * + * Availability: + * Non-Carbon CFM: in TextCommon 1.2.1 and later + * CarbonLib: in CarbonLib 1.0 and later + * Mac OS X: in version 10.0 and later + */ +EXTERN_API( OSStatus ) +TECGetInfo(TECInfoHandle * tecInfo); + + + +/* + * UpgradeScriptInfoToTextEncoding() + * + * Availability: + * Non-Carbon CFM: in TextCommon 1.1 and later + * CarbonLib: in CarbonLib 1.0 and later + * Mac OS X: in version 10.0 and later + */ +EXTERN_API( OSStatus ) +UpgradeScriptInfoToTextEncoding( + ScriptCode iTextScriptID, + LangCode iTextLanguageID, + RegionCode iRegionID, + ConstStr255Param iTextFontname, + TextEncoding * oEncoding); + + +/* + * RevertTextEncodingToScriptInfo() + * + * Availability: + * Non-Carbon CFM: in TextCommon 1.1 and later + * CarbonLib: in CarbonLib 1.0 and later + * Mac OS X: in version 10.0 and later + */ +EXTERN_API( OSStatus ) +RevertTextEncodingToScriptInfo( + TextEncoding iEncoding, + ScriptCode * oTextScriptID, + LangCode * oTextLanguageID, /* can be NULL */ + Str255 oTextFontname); /* can be NULL */ + + +/* + * GetTextEncodingFromScriptInfo() + * + * Availability: + * Non-Carbon CFM: not available + * CarbonLib: not available in CarbonLib 1.x, is available on Mac OS X version 10.2 and later + * Mac OS X: in version 10.2 and later + */ +EXTERN_API( OSStatus ) +GetTextEncodingFromScriptInfo( + ScriptCode iTextScriptID, + LangCode iTextLanguageID, + RegionCode iTextRegionID, + TextEncoding * oEncoding); + + +/* + * GetScriptInfoFromTextEncoding() + * + * Availability: + * Non-Carbon CFM: not available + * CarbonLib: not available in CarbonLib 1.x, is available on Mac OS X version 10.2 and later + * Mac OS X: in version 10.2 and later + */ +EXTERN_API( OSStatus ) +GetScriptInfoFromTextEncoding( + TextEncoding iEncoding, + ScriptCode * oTextScriptID, + LangCode * oTextLanguageID); /* can be NULL */ + + +/* + * NearestMacTextEncodings() + * + * Availability: + * Non-Carbon CFM: in TextCommon 1.5 and later + * CarbonLib: in CarbonLib 1.0 and later + * Mac OS X: in version 10.0 and later + */ +EXTERN_API( OSStatus ) +NearestMacTextEncodings( + TextEncoding generalEncoding, + TextEncoding * bestMacEncoding, + TextEncoding * alternateMacEncoding); + + +/* + * UCGetCharProperty() + * + * Availability: + * Non-Carbon CFM: in TextCommon 1.5 and later + * CarbonLib: in CarbonLib 1.0 and later + * Mac OS X: in version 10.0 and later + */ +EXTERN_API_C( OSStatus ) +UCGetCharProperty( + const UniChar * charPtr, + UniCharCount textLength, + UCCharPropertyType propType, + UCCharPropertyValue * propValue); + + +/* + ------------------------------------------------------------------------------------------------- + Surrogate pair utilities + ------------------------------------------------------------------------------------------------- +*/ + + +#if !defined(UC_INLINE) + #if defined(__GNUC__) + #define UC_INLINE static __inline__ + #elif defined(__MWERKS__) || defined(__cplusplus) + #define UC_INLINE static inline + #else + #define UC_INLINE static + #endif +#endif + +// surrogate ranges +enum { + kUCHighSurrogateRangeStart = 0xD800UL, + kUCHighSurrogateRangeEnd = 0xDBFFUL, + kUCLowSurrogateRangeStart = 0xDC00UL, + kUCLowSurrogateRangeEnd = 0xDFFFUL +}; + + +/*! + @function UCIsSurrogateHighCharacter + Reports whether or not the character is a high surrogate. + @param character The character to be checked. + @result true, if character is a high surrogate, otherwise false. +*/ +UC_INLINE Boolean UCIsSurrogateHighCharacter( UniChar character ) { + /* return ( ( character >= kUCHighSurrogateRangeStart ) && (character <= kUCHighSurrogateRangeEnd ) ? true : false ); */ + return ( ( character & 0xFC00UL ) == kUCHighSurrogateRangeStart ); +} + +/*! + @function UCIsSurrogateLowCharacter + Reports whether or not the character is a low surrogate. + @param character The character to be checked. + @result true, if character is a low surrogate, otherwise false. +*/ +UC_INLINE Boolean UCIsSurrogateLowCharacter( UniChar character ) { + /* return ( ( character >= kUCLowSurrogateRangeStart ) && ( character <= kUCLowSurrogateRangeEnd ) ? true : false ); */ + return ( ( character & 0xFC00UL ) == kUCLowSurrogateRangeStart ); +} + +/*! + @function UCGetUnicodeScalarValueForSurrogatePair + Returns the UTF-32 value corresponding to the surrogate pair passed in. + @param surrogateHigh The high surrogate character. If this parameter + is not a valid high surrogate character, the behavior is undefined. + @param surrogateLow The low surrogate character. If this parameter + is not a valid low surrogate character, the behavior is undefined. + @result The UTF-32 value for the surrogate pair. +*/ +UC_INLINE UnicodeScalarValue UCGetUnicodeScalarValueForSurrogatePair( UniChar surrogateHigh, UniChar surrogateLow ) { + return ( ( surrogateHigh - kUCHighSurrogateRangeStart ) << 10 ) + ( surrogateLow - kUCLowSurrogateRangeStart ) + 0x0010000UL; +} + + + +#if PRAGMA_STRUCT_ALIGN + #pragma options align=reset +#elif PRAGMA_STRUCT_PACKPUSH + #pragma pack(pop) +#elif PRAGMA_STRUCT_PACK + #pragma pack() +#endif + +#ifdef PRAGMA_IMPORT_OFF +#pragma import off +#elif PRAGMA_IMPORT +#pragma import reset +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* __TEXTCOMMON__ */ + |