Main Page | Class Hierarchy | Alphabetical List | Data Structures | File List | Data Fields | Globals | Related Pages

uniset.h

00001 /*
00002 **********************************************************************
00003 * Copyright (C) 1999-2003, International Business Machines Corporation and others. All Rights Reserved.
00004 **********************************************************************
00005 *   Date        Name        Description
00006 *   10/20/99    alan        Creation.
00007 **********************************************************************
00008 */
00009 
00010 #ifndef UNICODESET_H
00011 #define UNICODESET_H
00012 
00013 #include "unicode/unifilt.h"
00014 #include "unicode/utypes.h"
00015 #include "unicode/unistr.h"
00016 #include "unicode/uchar.h"
00017 #include "unicode/uset.h"
00018 
00019 U_NAMESPACE_BEGIN
00020 
00021 class ParsePosition;
00022 class SymbolTable;
00023 class UVector;
00024 class CaseEquivClass;
00025 
00026     
00258 class U_COMMON_API UnicodeSet : public UnicodeFilter {
00259 
00260     int32_t len; // length of list used; 0 <= len <= capacity
00261     int32_t capacity; // capacity of list
00262     int32_t bufferCapacity; // capacity of buffer
00263     UChar32* list; // MUST be terminated with HIGH
00264     UChar32* buffer; // internal buffer, may be NULL
00265 
00266     UVector* strings; // maintained in sorted order
00267 
00277     UnicodeString pat;
00278 
00279 public:
00280 
00285     static const UChar32 MIN_VALUE;
00286 
00291     static const UChar32 MAX_VALUE;
00292 
00293     //----------------------------------------------------------------
00294     // Constructors &c
00295     //----------------------------------------------------------------
00296 
00297 public:
00298 
00303     UnicodeSet();
00304 
00313     UnicodeSet(UChar32 start, UChar32 end);
00314 
00323     UnicodeSet(const UnicodeString& pattern,
00324                UErrorCode& status);
00325 
00336     UnicodeSet(const UnicodeString& pattern,
00337                uint32_t options,
00338                UErrorCode& status);
00339 
00340 #ifdef U_USE_UNICODESET_DEPRECATES
00341 
00347     UnicodeSet(int8_t category, UErrorCode& status);
00348 #endif
00349 
00354     UnicodeSet(const UnicodeSet& o);
00355 
00360     virtual ~UnicodeSet();
00361 
00366     UnicodeSet& operator=(const UnicodeSet& o);
00367 
00379     virtual UBool operator==(const UnicodeSet& o) const;
00380 
00386     UBool operator!=(const UnicodeSet& o) const;
00387 
00394     virtual UnicodeFunctor* clone() const;
00395 
00403     virtual int32_t hashCode(void) const;
00404 
00405     //----------------------------------------------------------------
00406     // Public API
00407     //----------------------------------------------------------------
00408 
00418     UnicodeSet& set(UChar32 start, UChar32 end);
00419 
00425     static UBool resemblesPattern(const UnicodeString& pattern,
00426                                   int32_t pos);
00427 
00437     virtual UnicodeSet& applyPattern(const UnicodeString& pattern,
00438                                      UErrorCode& status);
00439 
00451     UnicodeSet& applyPattern(const UnicodeString& pattern,
00452                              uint32_t options,
00453                              UErrorCode& status);
00454 
00467     virtual UnicodeString& toPattern(UnicodeString& result,
00468                                      UBool escapeUnprintable = FALSE) const;
00469 
00491     UnicodeSet& applyIntPropertyValue(UProperty prop,
00492                                       int32_t value,
00493                                       UErrorCode& ec);
00494 
00522     UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
00523                                    const UnicodeString& value,
00524                                    UErrorCode& ec);
00525 
00533     virtual int32_t size(void) const;
00534 
00541     virtual UBool isEmpty(void) const;
00542 
00549     virtual UBool contains(UChar32 c) const;
00550     
00559     virtual UBool contains(UChar32 start, UChar32 end) const;
00560 
00568     UBool contains(const UnicodeString& s) const;
00569     
00577     virtual UBool containsAll(const UnicodeSet& c) const;
00578     
00586     UBool containsAll(const UnicodeString& s) const;
00587     
00596     UBool containsNone(UChar32 start, UChar32 end) const;
00597 
00605     UBool containsNone(const UnicodeSet& c) const;
00606     
00614     UBool containsNone(const UnicodeString& s) const;
00615         
00624     inline UBool containsSome(UChar32 start, UChar32 end) const;
00625         
00633     inline UBool containsSome(const UnicodeSet& s) const;
00634         
00642     inline UBool containsSome(const UnicodeString& s) const;
00643         
00648     UMatchDegree matches(const Replaceable& text,
00649                          int32_t& offset,
00650                          int32_t limit,
00651                          UBool incremental);
00652 
00653  private:    
00675     static int32_t matchRest(const Replaceable& text,
00676                              int32_t start, int32_t limit,
00677                              const UnicodeString& s);
00678     
00688     int32_t findCodePoint(UChar32 c) const;
00689 
00690  public:
00691 
00699     void addMatchSetTo(UnicodeSet& toUnionTo) const;
00700 
00709     int32_t indexOf(UChar32 c) const;
00710 
00720     UChar32 charAt(int32_t index) const;
00721 
00735     virtual UnicodeSet& add(UChar32 start, UChar32 end);
00736 
00743     UnicodeSet& add(UChar32 c);
00744 
00755     UnicodeSet& add(const UnicodeString& s);
00756 
00757  private:    
00763     static int32_t getSingleCP(const UnicodeString& s);
00764 
00765     void _add(const UnicodeString& s);
00766     
00767  public:
00775     UnicodeSet& addAll(const UnicodeString& s);
00776 
00784     UnicodeSet& retainAll(const UnicodeString& s);
00785 
00793     UnicodeSet& complementAll(const UnicodeString& s);
00794 
00802     UnicodeSet& removeAll(const UnicodeString& s);
00803 
00812     static UnicodeSet* createFrom(const UnicodeString& s);
00813 
00814     
00822     static UnicodeSet* createFromAll(const UnicodeString& s);
00823 
00836     virtual UnicodeSet& retain(UChar32 start, UChar32 end);
00837 
00838 
00843     UnicodeSet& retain(UChar32 c);
00844 
00857     virtual UnicodeSet& remove(UChar32 start, UChar32 end);
00858 
00865     UnicodeSet& remove(UChar32 c);
00866 
00875     UnicodeSet& remove(const UnicodeString& s);
00876 
00883     virtual UnicodeSet& complement(void);
00884 
00898     virtual UnicodeSet& complement(UChar32 start, UChar32 end);
00899 
00906     UnicodeSet& complement(UChar32 c);
00907 
00917     UnicodeSet& complement(const UnicodeString& s);
00918 
00930     virtual UnicodeSet& addAll(const UnicodeSet& c);
00931 
00942     virtual UnicodeSet& retainAll(const UnicodeSet& c);
00943 
00954     virtual UnicodeSet& removeAll(const UnicodeSet& c);
00955 
00965     virtual UnicodeSet& complementAll(const UnicodeSet& c);
00966 
00972     virtual UnicodeSet& clear(void);
00973 
00997     UnicodeSet& closeOver(int32_t attribute);
00998 
01006     virtual int32_t getRangeCount(void) const;
01007 
01015     virtual UChar32 getRangeStart(int32_t index) const;
01016 
01024     virtual UChar32 getRangeEnd(int32_t index) const;
01025 
01074     int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
01075 
01081     virtual UnicodeSet& compact();
01082 
01094     static UClassID getStaticClassID(void);
01095 
01104     virtual UClassID getDynamicClassID(void) const;
01105 
01106 private:
01107 
01108     // Private API for the USet API
01109 
01110     friend class USetAccess;
01111 
01112     int32_t getStringCount() const;
01113 
01114     const UnicodeString* getString(int32_t index) const;
01115 
01116 private:
01117 
01118     static const char fgClassID;
01119 
01120     //----------------------------------------------------------------
01121     // RuleBasedTransliterator support
01122     //----------------------------------------------------------------
01123 
01124     friend class TransliteratorParser;
01125     friend class TransliteratorIDParser;
01126 
01127     friend class RBBIRuleScanner;
01128     friend class RegexCompile;
01129 
01148     UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
01149                const SymbolTable& symbols,
01150                UErrorCode& status);
01151 
01157     UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
01158                uint32_t options, UErrorCode& status);
01159 
01165     virtual UBool matchesIndexValue(uint8_t v) const;
01166 
01167 private:
01168 
01169     //----------------------------------------------------------------
01170     // Implementation: Pattern parsing
01171     //----------------------------------------------------------------
01172 
01197     void applyPattern(const UnicodeString& pattern,
01198                       ParsePosition& pos,
01199                       uint32_t options,
01200                       const SymbolTable* symbols,
01201                       UErrorCode& status);
01202 
01203     //----------------------------------------------------------------
01204     // Implementation: Utility methods
01205     //----------------------------------------------------------------
01206 
01207     void ensureCapacity(int32_t newLen);
01208 
01209     void ensureBufferCapacity(int32_t newLen);
01210 
01211     void swapBuffers(void);
01212 
01213     UBool allocateStrings();
01214 
01215     void _applyPattern(const UnicodeString& pattern,
01216                        ParsePosition& pos,
01217                        uint32_t options,
01218                        const SymbolTable* symbols,
01219                        UnicodeString& rebuiltPat,
01220                        UErrorCode& status);
01221 
01222     UnicodeString& _toPattern(UnicodeString& result,
01223                               UBool escapeUnprintable) const;
01224 
01225     UnicodeString& _generatePattern(UnicodeString& result,
01226                                     UBool escapeUnprintable) const;
01227 
01228     static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01229 
01230     static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01231 
01232     //----------------------------------------------------------------
01233     // Implementation: Fundamental operators
01234     //----------------------------------------------------------------
01235 
01236     void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01237 
01238     void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01239 
01240     void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01241 
01247     static UBool resemblesPropertyPattern(const UnicodeString& pattern,
01248                                           int32_t pos);
01249 
01288     UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
01289                                      ParsePosition& ppos,
01290                                      UErrorCode &ec);
01291 
01296     typedef UBool (*Filter)(UChar32 codePoint, void* context);
01297 
01306     void applyFilter(Filter filter,
01307                      void* context,
01308                      UErrorCode &status);
01309 
01314     static const UnicodeSet* getInclusions(UErrorCode &errorCode);
01315 
01316     friend class UnicodeSetIterator;
01317 
01318     //----------------------------------------------------------------
01319     // Implementation: closeOver
01320     //----------------------------------------------------------------
01321 
01322     void caseCloseOne(const UnicodeString& folded);
01323 
01324     void caseCloseOne(const CaseEquivClass& c);
01325 
01326     void caseCloseOne(UChar folded);
01327 
01328     static const CaseEquivClass* getCaseMapOf(const UnicodeString& folded);
01329 
01330     static const CaseEquivClass* getCaseMapOf(UChar folded);
01331 };
01332 
01333 inline UClassID
01334 UnicodeSet::getStaticClassID(void)
01335 { return (UClassID)&fgClassID; }
01336 
01337 inline UClassID
01338 UnicodeSet::getDynamicClassID(void) const
01339 { return UnicodeSet::getStaticClassID(); }
01340 
01341 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01342     return !operator==(o);
01343 }
01344 
01345 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01346     return !containsNone(start, end);
01347 }
01348 
01349 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01350     return !containsNone(s);
01351 }
01352 
01353 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01354     return !containsNone(s);
01355 }
01356 
01357 U_NAMESPACE_END
01358 
01359 #endif

Generated on Wed Sep 3 17:47:10 2003 for ICU 2.6 by doxygen 1.3.2