00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #ifndef REGEX_H
00017 #define REGEX_H
00018
00019
00039 #include "unicode/utypes.h"
00040
00041 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
00042
00043 #include "unicode/uobject.h"
00044 #include "unicode/unistr.h"
00045 #include "unicode/parseerr.h"
00046
00047 U_NAMESPACE_BEGIN
00048
00049
00050
00051
00052 class RegexMatcher;
00053 class UVector;
00054 class UVector32;
00055 class UnicodeSet;
00056 struct REStackFrame;
00057 struct Regex8BitSet;
00058
00059
00064 enum {
00066 UREGEX_CANON_EQ = 128,
00067
00069 UREGEX_CASE_INSENSITIVE = 2,
00070
00072 UREGEX_COMMENTS = 4,
00073
00076 UREGEX_DOTALL = 32,
00077
00082 UREGEX_MULTILINE = 8
00083 };
00084
00085
00086
00098 class U_I18N_API RegexPattern: public UObject {
00099 public:
00100
00108 RegexPattern();
00109
00115 RegexPattern(const RegexPattern &source);
00116
00122 virtual ~RegexPattern();
00123
00132 UBool operator==(const RegexPattern& that) const;
00133
00142 inline UBool operator!=(const RegexPattern& that) const {return ! operator ==(that);};
00143
00149 RegexPattern &operator =(const RegexPattern &source);
00150
00158 virtual RegexPattern *clone() const;
00159
00160
00181 static RegexPattern *compile( const UnicodeString ®ex,
00182 UParseError &pe,
00183 UErrorCode &status);
00184
00205 static RegexPattern *compile( const UnicodeString ®ex,
00206 uint32_t flags,
00207 UParseError &pe,
00208 UErrorCode &status);
00209
00210
00229 static RegexPattern *compile( const UnicodeString ®ex,
00230 uint32_t flags,
00231 UErrorCode &status);
00232
00233
00239 virtual uint32_t flags() const;
00240
00253 virtual RegexMatcher *matcher(const UnicodeString &input,
00254 UErrorCode &status) const;
00255
00256
00268 virtual RegexMatcher *matcher(UErrorCode &status) const;
00269
00270
00285 static UBool matches(const UnicodeString ®ex,
00286 const UnicodeString &input,
00287 UParseError &pe,
00288 UErrorCode &status);
00289
00290
00295 virtual UnicodeString pattern() const;
00296
00297
00323 virtual int32_t split(const UnicodeString &input,
00324 UnicodeString dest[],
00325 int32_t destCapacity,
00326 UErrorCode &status) const;
00327
00328
00329
00334 void dump() const;
00335
00341 virtual inline UClassID getDynamicClassID() const;
00342
00348 static inline UClassID getStaticClassID();
00349
00350 private:
00351
00352
00353
00354 UnicodeString fPattern;
00355 uint32_t fFlags;
00356
00357 UVector32 *fCompiledPat;
00358 UnicodeString fLiteralText;
00359
00360
00361 UVector *fSets;
00362 Regex8BitSet *fSets8;
00363
00364
00365 UErrorCode fDeferredStatus;
00366
00367
00368 int32_t fMinMatchLen;
00369
00370
00371
00372
00373 int32_t fFrameSize;
00374
00375
00376 int32_t fDataSize;
00377
00378
00379
00380 UVector32 *fGroupMap;
00381
00382
00383 int32_t fMaxCaptureDigits;
00384
00385 UnicodeSet **fStaticSets;
00386
00387
00388 Regex8BitSet *fStaticSets8;
00389
00390
00391 int32_t fStartType;
00392 int32_t fInitialStringIdx;
00393 int32_t fInitialStringLen;
00394 UnicodeSet *fInitialChars;
00395 UChar32 fInitialChar;
00396 Regex8BitSet *fInitialChars8;
00397
00402 static const char fgClassID;
00403
00404 friend class RegexCompile;
00405 friend class RegexMatcher;
00406
00407
00408
00409
00410 void init();
00411 void zap();
00412 void dumpOp(int32_t index) const;
00413
00414
00415 };
00416
00417
00418
00419
00420
00421
00422
00423
00424
00434 class U_I18N_API RegexMatcher: public UObject {
00435 public:
00436
00451 RegexMatcher(const UnicodeString ®exp, uint32_t flags, UErrorCode &status);
00452
00468 RegexMatcher(const UnicodeString ®exp, const UnicodeString &input,
00469 uint32_t flags, UErrorCode &status);
00470
00471
00477 virtual ~RegexMatcher();
00478
00479
00486 virtual UBool matches(UErrorCode &status);
00487
00488
00489
00502 virtual UBool lookingAt(UErrorCode &status);
00503
00504
00517 virtual UBool find();
00518
00519
00529 virtual UBool find(int32_t start, UErrorCode &status);
00530
00531
00541 virtual UnicodeString group(UErrorCode &status) const;
00542
00543
00556 virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const;
00557
00558
00564 virtual int32_t groupCount() const;
00565
00566
00574 virtual int32_t start(UErrorCode &status) const;
00575
00576
00590 virtual int32_t start(int group, UErrorCode &status) const;
00591
00592
00602 virtual int32_t end(UErrorCode &status) const;
00603
00604
00618 virtual int32_t end(int group, UErrorCode &status) const;
00619
00620
00629 virtual RegexMatcher &reset();
00630
00631
00639 virtual RegexMatcher &reset(const UnicodeString &input);
00640
00641
00648 virtual const UnicodeString &input() const;
00649
00650
00656 virtual const RegexPattern &pattern() const;
00657
00658
00675 virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status);
00676
00677
00698 virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status);
00699
00727 virtual RegexMatcher &appendReplacement(UnicodeString &dest,
00728 const UnicodeString &replacement, UErrorCode &status);
00729
00730
00741 virtual UnicodeString &appendTail(UnicodeString &dest);
00742
00743
00744
00769 virtual int32_t split(const UnicodeString &input,
00770 UnicodeString dest[],
00771 int32_t destCapacity,
00772 UErrorCode &status);
00773
00774
00775
00781 void setTrace(UBool state);
00782
00783
00789 static inline UClassID getStaticClassID();
00790
00796 virtual inline UClassID getDynamicClassID() const;
00797
00798 private:
00799
00800
00801 RegexMatcher();
00802 RegexMatcher(const RegexPattern *pat);
00803 RegexMatcher(const RegexMatcher &other);
00804 RegexMatcher &operator =(const RegexMatcher &rhs);
00805 friend class RegexPattern;
00806
00807
00808
00809
00810
00811
00812 void MatchAt(int32_t startIdx, UErrorCode &status);
00813 inline void backTrack(int32_t &inputIdx, int32_t &patIdx);
00814 UBool isWordBoundary(int32_t pos);
00815 REStackFrame *resetStack();
00816 inline REStackFrame *StateSave(REStackFrame *fp, int32_t savePatIdx,
00817 int32_t frameSize, UErrorCode &status);
00818
00819
00820 const RegexPattern *fPattern;
00821 RegexPattern *fPatternOwned;
00822
00823 const UnicodeString *fInput;
00824
00825 UBool fMatch;
00826 int32_t fMatchStart;
00827 int32_t fMatchEnd;
00828 int32_t fLastMatchEnd;
00829
00830 UVector32 *fStack;
00831 REStackFrame *fFrame;
00832
00833
00834
00835 int32_t *fData;
00836 int32_t fSmallData[8];
00837
00838 UBool fTraceDebug;
00839
00840 UErrorCode fDeferredStatus;
00841
00842
00847 static const char fgClassID;
00848
00849
00850 };
00851
00852 inline UClassID RegexPattern::getStaticClassID() { return (UClassID)&fgClassID; }
00853 inline UClassID RegexPattern::getDynamicClassID() const { return getStaticClassID(); }
00854
00855 inline UClassID RegexMatcher::getStaticClassID() { return (UClassID)&fgClassID; }
00856 inline UClassID RegexMatcher::getDynamicClassID() const { return getStaticClassID(); }
00857
00858
00859 U_NAMESPACE_END
00860 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS
00861 #endif