00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef NORMLZR_H
00010 #define NORMLZR_H
00011
00012 #include "unicode/utypes.h"
00013
00014 #if !UCONFIG_NO_NORMALIZATION
00015
00016 #include "unicode/uobject.h"
00017 #include "unicode/unistr.h"
00018 #include "unicode/chariter.h"
00019 #include "unicode/unorm.h"
00020
00021 struct UCharIterator;
00022 typedef struct UCharIterator UCharIterator;
00024 U_NAMESPACE_BEGIN
00115 class U_COMMON_API Normalizer : public UObject {
00116 public:
00122 enum {
00123 DONE=0xffff
00124 };
00125
00126
00127
00138 Normalizer(const UnicodeString& str, UNormalizationMode mode);
00139
00151 Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
00152
00163 Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
00164
00170 Normalizer(const Normalizer& copy);
00171
00176 ~Normalizer();
00177
00178
00179
00180
00181
00182
00197 static void normalize(const UnicodeString& source,
00198 UNormalizationMode mode, int32_t options,
00199 UnicodeString& result,
00200 UErrorCode &status);
00201
00219 static void compose(const UnicodeString& source,
00220 UBool compat, int32_t options,
00221 UnicodeString& result,
00222 UErrorCode &status);
00223
00241 static void decompose(const UnicodeString& source,
00242 UBool compat, int32_t options,
00243 UnicodeString& result,
00244 UErrorCode &status);
00245
00266 static inline UNormalizationCheckResult
00267 quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
00268
00282 static inline UNormalizationCheckResult
00283 quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
00284
00305 static inline UBool
00306 isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
00307
00323 static inline UBool
00324 isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
00325
00355 static UnicodeString &
00356 concatenate(UnicodeString &left, UnicodeString &right,
00357 UnicodeString &result,
00358 UNormalizationMode mode, int32_t options,
00359 UErrorCode &errorCode);
00360
00425 static inline int32_t
00426 compare(const UnicodeString &s1, const UnicodeString &s2,
00427 uint32_t options,
00428 UErrorCode &errorCode);
00429
00430
00431
00432
00433
00442 UChar32 current(void);
00443
00452 UChar32 first(void);
00453
00462 UChar32 last(void);
00463
00478 UChar32 next(void);
00479
00494 UChar32 previous(void);
00495
00505 void setIndexOnly(int32_t index);
00506
00512 void reset(void);
00513
00528 int32_t getIndex(void) const;
00529
00538 int32_t startIndex(void) const;
00539
00550 int32_t endIndex(void) const;
00551
00560 UBool operator==(const Normalizer& that) const;
00561
00570 inline UBool operator!=(const Normalizer& that) const;
00571
00578 Normalizer* clone(void) const;
00579
00586 int32_t hashCode(void) const;
00587
00588
00589
00590
00591
00607 void setMode(UNormalizationMode newMode);
00608
00619 UNormalizationMode getUMode(void) const;
00620
00637 void setOption(int32_t option,
00638 UBool value);
00639
00650 UBool getOption(int32_t option) const;
00651
00660 void setText(const UnicodeString& newText,
00661 UErrorCode &status);
00662
00671 void setText(const CharacterIterator& newText,
00672 UErrorCode &status);
00673
00683 void setText(const UChar* newText,
00684 int32_t length,
00685 UErrorCode &status);
00692 void getText(UnicodeString& result);
00693
00699 virtual inline UClassID getDynamicClassID() const;
00700
00706 static inline UClassID getStaticClassID();
00707
00708 private:
00709
00710
00711
00712
00713 Normalizer();
00714 Normalizer &operator=(const Normalizer &that);
00715
00716
00717
00718 UBool nextNormalize();
00719 UBool previousNormalize();
00720
00721 void init(CharacterIterator *iter);
00722 void clearBuffer(void);
00723
00724
00725
00726
00727
00728 UNormalizationMode fUMode;
00729 int32_t fOptions;
00730
00731
00732 UCharIterator *text;
00733
00734
00735
00736 int32_t currentIndex, nextIndex;
00737
00738
00739 UnicodeString buffer;
00740 int32_t bufferPos;
00741
00746 static const char fgClassID;
00747 };
00748
00749
00750
00751
00752
00753 inline UClassID
00754 Normalizer::getStaticClassID()
00755 { return (UClassID)&fgClassID; }
00756
00757 inline UClassID
00758 Normalizer::getDynamicClassID() const
00759 { return Normalizer::getStaticClassID(); }
00760
00761 inline UBool
00762 Normalizer::operator!= (const Normalizer& other) const
00763 { return ! operator==(other); }
00764
00765 inline UNormalizationCheckResult
00766 Normalizer::quickCheck(const UnicodeString& source,
00767 UNormalizationMode mode,
00768 UErrorCode &status) {
00769 if(U_FAILURE(status)) {
00770 return UNORM_MAYBE;
00771 }
00772
00773 return unorm_quickCheck(source.getBuffer(), source.length(),
00774 mode, &status);
00775 }
00776
00777 inline UNormalizationCheckResult
00778 Normalizer::quickCheck(const UnicodeString& source,
00779 UNormalizationMode mode, int32_t options,
00780 UErrorCode &status) {
00781 if(U_FAILURE(status)) {
00782 return UNORM_MAYBE;
00783 }
00784
00785 return unorm_quickCheckWithOptions(source.getBuffer(), source.length(),
00786 mode, options, &status);
00787 }
00788
00789 inline UBool
00790 Normalizer::isNormalized(const UnicodeString& source,
00791 UNormalizationMode mode,
00792 UErrorCode &status) {
00793 if(U_FAILURE(status)) {
00794 return FALSE;
00795 }
00796
00797 return unorm_isNormalized(source.getBuffer(), source.length(),
00798 mode, &status);
00799 }
00800
00801 inline UBool
00802 Normalizer::isNormalized(const UnicodeString& source,
00803 UNormalizationMode mode, int32_t options,
00804 UErrorCode &status) {
00805 if(U_FAILURE(status)) {
00806 return FALSE;
00807 }
00808
00809 return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(),
00810 mode, options, &status);
00811 }
00812
00813 inline int32_t
00814 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
00815 uint32_t options,
00816 UErrorCode &errorCode) {
00817
00818 return unorm_compare(s1.getBuffer(), s1.length(),
00819 s2.getBuffer(), s2.length(),
00820 options,
00821 &errorCode);
00822 }
00823
00824 U_NAMESPACE_END
00825
00826 #endif
00827
00828 #endif // NORMLZR_H