// Revision: 93 1.11 source/core/text/unicode/iunicode.hpp, text, ocdev
// iunicode.hpp
/**
 * This file contains the class IUnicode which provides information about the attributes of individual Unicode characters.
 *
 * @package     Text and International
 * @category    International Components
 *
 * @copyright
 *              IBM Open Class Library
 *              (C) Copyright International Business Machines Corporation 1997
 *              Licensed Material - Program-Property of IBM - All Rights Reserved.
 *
 * @author              Helena Shih
 */


#ifndef _IUNICODE_
#define _IUNICODE_

#include <stddef.h>

#include <iprimtyp.hpp>
#include <idatstrm.hpp>

#if __IBMCPP__ >= 400
#pragma namemangling(compat)
#endif

#pragma enum(4)
#pragma pack(push,4)

class IUnicode
{
        public:
        /**
         * Enums and typedefs
         */

                enum EUnicodeBounds                     //      These constants may be replaced with UniChar's at some
                                                                        //              point in the future
                {
                        kLowBoundUnicode                                = 0x0000,
                        kLowBoundASCII                                  = kLowBoundUnicode,
                        kLowBoundLatinOne                               = kLowBoundASCII,
                        kHighBoundASCII                                 = 0x007F,
                        kHighBoundLatinOne                              = 0x00FF,

                        kLowBoundHan    = 0x4E00,                                                                               //      lower limit of currently defined Han range
                        kHighBoundHan   = 0x9FA5,                                                                               //      upper limit of currently defined Han range

                        kLowBoundHangulSyllable                 = 0xAC00,                                               // lower limit of currently defined precomposed Hangul syllable range
                        kHighBoundHangulSyllable                = 0xD7A3,                                               // upper limit of currently defined precomposed Hangul syllable range

                        kLowBoundUserZone                               = 0xE000,                                               // general user chars start here and grow down
                        kHighBoundUserZone                              = 0xF8FF,                                               // corporate user chars start here and grow up
                        kLowBoundDefinedUserZone                = kHighBoundUserZone - 255,             // bottom end of defined corporate user chars

                        kLowBoundCompatibilityZone1     = kHighBoundUserZone + 1,               // CJK compatibility, Arabic forms, etc.
                        kHighBoundCompatibilityZone1    = 0xFEFE,

                        kLowBoundCompatibilityZone2             = 0xFF00,                                               // Half width, etc.
                        kHighBoundCompatibilityZone2    = 0xFFEF,
                        kHighBoundUnicode                               = 0xFFFF
                };

                enum EUnicodeScript

                {
                        kBasicLatin,
            kLatin1Supplement,
                        kLatinExtendedA,
                        kLatinExtendedB,
                        kIPAExtension,
                        kSpacingModifier,
                        kCombiningDiacritical,
                        kGreek,
                        kCyrillic,
                        kArmenian,
                        kHebrew,
                        kArabic,
                        kDevanagari,
                        kBengali,
                        kGurmukhi,
                        kGujarati,
                        kOriya,
                        kTamil,
                        kTelugu,
                        kKannada,
                        kMalayalam,
                        kThai,
                        kLao,
                        kTibetan,
                        kGeorgian,
                        kHangulJamo,
                        kLatinExtendedAdditional,
                        kGreekExtended,
                        kGeneralPunctuation,
                        kSuperSubScript,
                        kCurrencySymbolScript,
                        kSymbolCombiningMark,
                        kLetterlikeSymbol,
                        kNumberForm,
                        kArrow,
                        kMathOperator,
                        kMiscTechnical,
                        kControlPicture,
                        kOpticalCharacter,
                        kEnclosedAlphanumeric,
                        kBoxDrawing,
                        kBlockElement,
                        kGeometricShape,
                        kMiscSymbol,
                        kDingbat,
                        kCJKSymbolPunctuation,
                        kHiragana,
                        kKatakana,
                        kBopomofo,
                        kHangulCompatibilityJamo,
                        kKanbun,
                        kEnclosedCJKLetterMonth,
                        kCJKCompatibility,
                        kCJKUnifiedIdeograph,
                        kHangulSyllable,
                        kHighSurrogate,
                        kHighPrivateUseSurrogate,
                        kLowSurrogate,
                        kPrivateUse,
                        kCJKCompatibilityIdeograph,
                        kAlphabeticPresentation,
                        kArabicPresentationA,
                        kCombiningHalfMark,
                        kCJKCompatibilityForm,
                        kSmallFormVariant,
                        kArabicPresentationB,
                        kNoScript,
                        kHalfwidthFullwidthForm,
                        kScriptCount
                };



                enum ECharacterProperty
                {
                        kNonCharacter                           = 0,

                        kFirstLetter                            = 1,
                        kUppercaseLetter                        = 1,
                        kLowercaseLetter                        = 2,
                        kTitlecaseLetter                        = 3,
                        kModifierLetter                         = 4,
                        kOtherLetter                            = 5,
                        kLastLetter                                     = 5,

                        kFirstMark                                      = 6,
                        kNonSpacingMark                         = 6,
                        kEnclosingMark                          = 7,
                        kCombiningSpacingMark           = 8,
                        kLastMark                                       = 8,

                        kFirstNumber                            = 9,
                        kDecimalNumber                          = 9,
                        kLetterNumber                           = 10,
                        kOtherNumber                            = 11,
                        kLastNumber                                     = 11,

                        kFirstSeparator                         = 12,
                        kSpaceSeparator                         = 12,
                        kLineSeparator                          = 13,
                        kParagraphSeparator                     = 14,
                        kLastSeparator                          = 14,

                        kControlCharacter                       = 15,
                        kFormatCharacter                        = 16,

                        kPrivateUseCharacter            = 17,
                        kSurrogate                                      = 18,

                        kFirstPunctuation                       = 19,
                        kDashPunctuation                        = 19,
                        kOpenPunctuation                        = 20,
                        kClosePunctuation                       = 21,
                        kConnectorPunctuation           = 22,
                        kOtherPunctuation                       = 23,
                        kLastPunctuation                        = 23,

                        kFirstSymbol                            = 24,
                        kMathSymbol                                     = 24,
                        kCurrencySymbol                         = 25,
                        kModifierSymbol                         = 26,
                        kOtherSymbol                            = 27,
                        kLastSymbol                                     = 27,

                        kCharacterPropertiesCount       = 28,


                        kUpperCase                              = kUppercaseLetter,
                        kCompositeUpperCase     = kUppercaseLetter,
                        kLowerCase                              = kLowercaseLetter,
                        kCompositeLowerCase             = kLowercaseLetter,
                        kUncased                                = kOtherLetter,
                        kCompositeUncased               = kOtherLetter,         // e.g. hangul syllables, ethiopian
                        kModifier                               = kModifierLetter,
                        kPresentationModifier   = kFormatCharacter,     // is this correct? --rtg  agreed -- helena
                        kDiacritic                              = kNonSpacingMark,

                        kFirstDigit                             = kFirstNumber,
                        kDecimalDigit                   = kDecimalNumber,
                        kNonDecimalDigit                = kOtherNumber, // is this correct? --rtg agreed -- helena
                        kLastDigit                              = kLastNumber,

                        kGeneralTechnicalSymbol = kOtherSymbol,

                        kFirstWhite                             = kFirstSeparator,
                        kWhiteSpace                     = kSpaceSeparator,
                        kLineTerminator                 = kLineSeparator,
                        kParagraphTerminator    = kParagraphSeparator,
                        kPadSpace                               = kSpaceSeparator,
                        kLastWhite                              = kLastSeparator,

                        kControl                                = kControlCharacter,    // is this correct?  --rtg agreed -- helena
                        kUnknownType                    = kNonCharacter,

                        kLastType                               = kLastSymbol
                };

                enum EDirectionProperty {
                        kLeftToRight              = 0,
                        kRightToLeft              = 1,
            kEuropeanNumber           = 2,
            kEuropeanNumberSeparator  = 3,
            kEuropeanNumberTerminator = 4,
            kArabicNumber             = 5,
                        kCommonNumberSeparator    = 6,
                        kBlockSeparator           = 7,
                        kSegmentSeparator         = 8,
                        kWhiteSpaceNeutral        = 9,
                        kOtherNeutral             = 10 };

                /**
                 * Information Access
                 */

                static EUnicodeScript script(UniChar uc);

                static ECharacterProperty type(UniChar uc);

                static EDirectionProperty characterDirection(UniChar uc);


                /**
                 * C-type Information Access
                 */
                // The following routines mimic the ANSI CType.h routines for the complete Unicode
                // set. This may in fact affect the interpretation of the C definition.
                // This may require additional tuning or even a parallel set of routines that duplicate the
                // exact behaviour.

                //      The character properties of characters within Unicode may not be redefined
                //      As a result, all of these methods are non-virtual

                static bool isLineSeparator (UniChar uc);
                static bool isParagraphSeparator (UniChar uc);
                static bool isLineOrParagraphSeparator (UniChar uc);
                static bool isASpace (UniChar uc);
                static bool isInvisible (UniChar uc);
                static bool isTrailingInvisible( UniChar uc );

                /**
                 *  Base form letters information (letters that can take a diacritic)
                 */
                static bool isUpper(UniChar uc);
                static bool isLower(UniChar uc);
        static bool isUncased(UniChar uc);
                static bool isBaseForm(UniChar uc);
                static bool isDiacritic(UniChar uc);
                static bool isAlpha(UniChar uc);
                static bool isAlphaNumeric(UniChar uc);

        /**
         * Special Characters
         */

                static bool isGraphic(UniChar uc);
                static bool isDigit(UniChar uc);
                static bool isXDigit (UniChar uc);
                static bool isHexDigit(UniChar uc);
                static bool isASCII(UniChar uc);
                static bool isControl(UniChar uc);
                static bool isPrint(UniChar uc);
                static bool isPunctuation (UniChar uc);
                static bool isOpenPunctuation(UniChar uc);
                static bool isClosePunctuation(UniChar uc);
                static bool isSymbol(UniChar uc);
                static bool isInSet(UniChar uc);
                static UniChar matchPunctuation (UniChar searchChar);
                static int digitValue(UniChar uc);

        /**
         * Unicode Standard Related Information
         */

                static double currentVersion();

        private:
                friend class ICharacterPropertyIterator;
                /**
                 *              Constructors and destructor
                 */


                IUnicode();

                IUnicode( const IUnicode& );

                virtual ~IUnicode();

                IUnicode& operator=( const IUnicode& );

                static bool isCJK( UniChar uc );

                static  void printCharacterProperties(UniChar u);
                typedef struct {
                        UniChar                                         fFirstCode;
                        UniChar                                         fLastCode;
                        } IBlockScriptMap;

                static double fVersion;

                static const IBlockScriptMap fgScriptIndex[];
                static const unsigned short     fgCharPropIndices[];
                static const char                       fgCharPropValues[];
                static const long                       fgCharPropOffsetCount;

                // Character directionality attribute
                static const unsigned short     fgCharDirIndices[];
                static const char                       fgCharDirValues[];
                static const long                       fgCharDirOffsetCount;
};              //IUnicode


//=================================================================================
//      class ICharacterPropertyIterator
//=================================================================================

class ICharacterPropertyIterator {

        public:
                /**
                 * Constructor/destructor
                 */

                ICharacterPropertyIterator( IUnicode::ECharacterProperty lowPropertyBound,
                                                                        IUnicode::ECharacterProperty highPropertyBound );
                ICharacterPropertyIterator( const ICharacterPropertyIterator& that );

                virtual ~ICharacterPropertyIterator();

                /**
                 * Assignment
                 */

                ICharacterPropertyIterator&     operator=( const ICharacterPropertyIterator& that);

                void    setRange(       IUnicode::ECharacterProperty lowPropertyBound,
                                                        IUnicode::ECharacterProperty highPropertyBound );

                void    range(  IUnicode::ECharacterProperty& lowPropertyBound,
                                                IUnicode::ECharacterProperty& highPropertyBound ) const;

        /**
         * Cursor Actions
                 */

                void    reset();

        operator                        bool() const;

                UniChar operator*() const;

                /**
                 * Increment Operators
                 */

                ICharacterPropertyIterator& operator++();

                ICharacterPropertyIterator operator++(int);


        private:

                        IUnicode::ECharacterProperty fLowPropertyBound;
                        IUnicode::ECharacterProperty fHighPropertyBound;
                        UniChar fCurrentPosition;

                        IUnicode* fUnicodeImplementation;
};

#pragma pack(pop)
#pragma enum(pop)

#if __IBMCPP__ >= 400
#pragma namemangling()
#endif

#endif // _IUNICODE_
