Reordered files in all libs - now all includes are in "libname/include" dir - logical, isn't it? This should break compilation however.

This commit is contained in:
pelya
2010-10-26 14:43:54 +03:00
parent fc58bc53c0
commit 6b9b163689
520 changed files with 41 additions and 43205 deletions

View File

@@ -0,0 +1,66 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: ASCIIRangeFactory.hpp 678879 2008-07-22 20:05:05Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_ASCIIRANGEFACTORY_HPP)
#define XERCESC_INCLUDE_GUARD_ASCIIRANGEFACTORY_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/regx/RangeFactory.hpp>
XERCES_CPP_NAMESPACE_BEGIN
class XMLUTIL_EXPORT ASCIIRangeFactory: public RangeFactory {
public:
// -----------------------------------------------------------------------
// Constructors and operators
// -----------------------------------------------------------------------
ASCIIRangeFactory();
~ASCIIRangeFactory();
// -----------------------------------------------------------------------
// Initialization methods
// -----------------------------------------------------------------------
void initializeKeywordMap(RangeTokenMap *rangeTokMap = 0);
protected:
// -----------------------------------------------------------------------
// Private Helper methods
// -----------------------------------------------------------------------
void buildRanges(RangeTokenMap *rangeTokMap = 0);
private:
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
ASCIIRangeFactory(const ASCIIRangeFactory&);
ASCIIRangeFactory& operator=(const ASCIIRangeFactory&);
};
XERCES_CPP_NAMESPACE_END
#endif
/**
* End file ASCIIRangeFactory.hpp
*/

View File

@@ -0,0 +1,157 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: BMPattern.hpp 932887 2010-04-11 13:04:59Z borisk $
*/
#if !defined(XERCESC_INCLUDE_GUARD_BMPATTERN_HPP)
#define XERCESC_INCLUDE_GUARD_BMPATTERN_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/XMemory.hpp>
#include <xercesc/util/PlatformUtils.hpp>
XERCES_CPP_NAMESPACE_BEGIN
class XMLUTIL_EXPORT BMPattern : public XMemory
{
public:
// -----------------------------------------------------------------------
// Public Constructors and Destructor
// -----------------------------------------------------------------------
/** @name Constructors */
//@{
/**
* This is the constructor which takes the pattern information. A default
* shift table size is used.
*
* @param pattern The pattern to match against.
*
* @param ignoreCase A flag to indicate whether to ignore case
* matching or not.
*
* @param manager The configurable memory manager
*/
BMPattern
(
const XMLCh* const pattern
, bool ignoreCase
, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
);
/**
* This is the constructor which takes all of the information
* required to construct a BM pattern object.
*
* @param pattern The pattern to match against.
*
* @param tableSize Indicates the size of the shift table.
*
* @param ignoreCase A flag to indicate whether to ignore case
* matching or not.
*
* @param manager The configurable memory manager
*/
BMPattern
(
const XMLCh* const pattern
, int tableSize
, bool ignoreCase
, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
);
//@}
/** @name Destructor. */
//@{
/**
* Destructor of BMPattern
*/
~BMPattern();
//@}
// -----------------------------------------------------------------------
// Matching functions
// -----------------------------------------------------------------------
/** @name Matching Functions */
//@{
/**
* This method will perform a match of the given content against a
* predefined pattern.
*/
int matches(const XMLCh* const content, XMLSize_t start, XMLSize_t limit) const;
//@}
private :
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
BMPattern();
BMPattern(const BMPattern&);
BMPattern& operator=(const BMPattern&);
// -----------------------------------------------------------------------
// This method will perform a case insensitive match
// -----------------------------------------------------------------------
bool matchesIgnoreCase(const XMLCh ch1, const XMLCh ch2);
// -----------------------------------------------------------------------
// Initialize/Clean up methods
// -----------------------------------------------------------------------
void initialize();
void cleanUp();
// -----------------------------------------------------------------------
// Private data members
//
// fPattern
// fUppercasePattern
// This is the pattern to match against, and its upper case form.
//
// fIgnoreCase
// This is an indicator whether cases should be ignored during
// matching.
//
// fShiftTable
// fShiftTableLen
// This is a table of offsets for shifting purposes used by the BM
// search algorithm, and its length.
// -----------------------------------------------------------------------
bool fIgnoreCase;
unsigned int fShiftTableLen;
XMLSize_t* fShiftTable;
XMLCh* fPattern;
XMLCh* fUppercasePattern;
MemoryManager* fMemoryManager;
};
XERCES_CPP_NAMESPACE_END
#endif
/*
* End of file BMPattern.hpp
*/

View File

@@ -0,0 +1,66 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: BlockRangeFactory.hpp 678879 2008-07-22 20:05:05Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_BLOCKRANGEFACTORY_HPP)
#define XERCESC_INCLUDE_GUARD_BLOCKRANGEFACTORY_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/regx/RangeFactory.hpp>
XERCES_CPP_NAMESPACE_BEGIN
class XMLUTIL_EXPORT BlockRangeFactory: public RangeFactory {
public:
// -----------------------------------------------------------------------
// Constructors and operators
// -----------------------------------------------------------------------
BlockRangeFactory();
~BlockRangeFactory();
// -----------------------------------------------------------------------
// Initialization methods
// -----------------------------------------------------------------------
void initializeKeywordMap(RangeTokenMap *rangeTokMap = 0);
protected:
// -----------------------------------------------------------------------
// Private Helper methods
// -----------------------------------------------------------------------
void buildRanges(RangeTokenMap *rangeTokMap = 0);
private:
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
BlockRangeFactory(const BlockRangeFactory&);
BlockRangeFactory& operator=(const BlockRangeFactory&);
};
XERCES_CPP_NAMESPACE_END
#endif
/**
* End file BlockRangeFactory.hpp
*/

View File

@@ -0,0 +1,88 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: CharToken.hpp 678879 2008-07-22 20:05:05Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_CHARTOKEN_HPP)
#define XERCESC_INCLUDE_GUARD_CHARTOKEN_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/regx/Token.hpp>
XERCES_CPP_NAMESPACE_BEGIN
class XMLUTIL_EXPORT CharToken : public Token {
public:
// -----------------------------------------------------------------------
// Public Constructors and Destructor
// -----------------------------------------------------------------------
CharToken(const tokType tkType, const XMLInt32 ch
, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
~CharToken();
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
XMLInt32 getChar() const;
// -----------------------------------------------------------------------
// Match methods
// -----------------------------------------------------------------------
bool match(const XMLInt32 ch);
private:
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
CharToken(const CharToken&);
CharToken& operator=(const CharToken&);
// -----------------------------------------------------------------------
// Private data members
// -----------------------------------------------------------------------
XMLInt32 fCharData;
};
// ---------------------------------------------------------------------------
// CharToken: getter methods
// ---------------------------------------------------------------------------
inline XMLInt32 CharToken::getChar() const {
return fCharData;
}
// ---------------------------------------------------------------------------
// CharToken: getter methods
// ---------------------------------------------------------------------------
inline bool CharToken::match(const XMLInt32 ch){
return ch == fCharData;
}
XERCES_CPP_NAMESPACE_END
#endif
/**
* End of file CharToken.hpp
*/

View File

@@ -0,0 +1,114 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: ClosureToken.hpp 678879 2008-07-22 20:05:05Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_CLOSURETOKEN_HPP)
#define XERCESC_INCLUDE_GUARD_CLOSURETOKEN_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/regx/Token.hpp>
XERCES_CPP_NAMESPACE_BEGIN
class XMLUTIL_EXPORT ClosureToken : public Token {
public:
// -----------------------------------------------------------------------
// Public Constructors and Destructor
// -----------------------------------------------------------------------
ClosureToken(const tokType tkType, Token* const tok
, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
~ClosureToken();
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
XMLSize_t size() const;
int getMin() const;
int getMax() const;
Token* getChild(const XMLSize_t index) const;
// -----------------------------------------------------------------------
// Setter methods
// -----------------------------------------------------------------------
void setMin(const int minVal);
void setMax(const int maxVal);
private:
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
ClosureToken(const ClosureToken&);
ClosureToken& operator=(const ClosureToken&);
// -----------------------------------------------------------------------
// Private data members
// -----------------------------------------------------------------------
int fMin;
int fMax;
Token* fChild;
};
// ---------------------------------------------------------------------------
// ClosureToken: getter methods
// ---------------------------------------------------------------------------
inline XMLSize_t ClosureToken::size() const {
return 1;
}
inline int ClosureToken::getMax() const {
return fMax;
}
inline int ClosureToken::getMin() const {
return fMin;
}
inline Token* ClosureToken::getChild(const XMLSize_t) const {
return fChild;
}
// ---------------------------------------------------------------------------
// ClosureToken: setter methods
// ---------------------------------------------------------------------------
inline void ClosureToken::setMax(const int maxVal) {
fMax = maxVal;
}
inline void ClosureToken::setMin(const int minVal) {
fMin = minVal;
}
XERCES_CPP_NAMESPACE_END
#endif
/**
* End of file ClosureToken.hpp
*/

View File

@@ -0,0 +1,81 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: ConcatToken.hpp 678879 2008-07-22 20:05:05Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_CONCATTOKEN_HPP)
#define XERCESC_INCLUDE_GUARD_CONCATTOKEN_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/regx/Token.hpp>
XERCES_CPP_NAMESPACE_BEGIN
class XMLUTIL_EXPORT ConcatToken : public Token {
public:
// -----------------------------------------------------------------------
// Public Constructors and Destructor
// -----------------------------------------------------------------------
ConcatToken(Token* const tok1, Token* const tok2
, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
~ConcatToken();
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
Token* getChild(const XMLSize_t index) const;
XMLSize_t size() const;
private:
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
ConcatToken(const ConcatToken&);
ConcatToken& operator=(const ConcatToken&);
// -----------------------------------------------------------------------
// Private data members
// -----------------------------------------------------------------------
Token* fChild1;
Token* fChild2;
};
// ---------------------------------------------------------------------------
// StringToken: getter methods
// ---------------------------------------------------------------------------
inline XMLSize_t ConcatToken::size() const {
return 2;
}
inline Token* ConcatToken::getChild(const XMLSize_t index) const {
return index == 0 ? fChild1 : fChild2;
}
XERCES_CPP_NAMESPACE_END
#endif
/**
* End of file ConcatToken.hpp
*/

View File

@@ -0,0 +1,163 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: Match.hpp 678879 2008-07-22 20:05:05Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_MATCH_HPP)
#define XERCESC_INCLUDE_GUARD_MATCH_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/util/ArrayIndexOutOfBoundsException.hpp>
#include <xercesc/util/RuntimeException.hpp>
XERCES_CPP_NAMESPACE_BEGIN
/**
* An instance of this class has ranges captured in matching
*/
class XMLUTIL_EXPORT Match : public XMemory
{
public:
// -----------------------------------------------------------------------
// Public Constructors and Destructor
// -----------------------------------------------------------------------
Match(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
/**
* Copy constructor
*/
Match(const Match& toCopy);
Match& operator=(const Match& toAssign);
virtual ~Match();
// -----------------------------------------------------------------------
// Getter functions
// -----------------------------------------------------------------------
int getNoGroups() const;
int getStartPos(int index) const;
int getEndPos(int index) const;
// -----------------------------------------------------------------------
// Setter functions
// -----------------------------------------------------------------------
void setNoGroups(const int n);
void setStartPos(const int index, const int value);
void setEndPos(const int index, const int value);
private:
// -----------------------------------------------------------------------
// Initialize/Clean up methods
// -----------------------------------------------------------------------
void initialize(const Match& toCopy);
void cleanUp();
// -----------------------------------------------------------------------
// Private data members
//
// fNoGroups
// Represents no of regular expression groups
//
// fStartPositions
// Array of start positions in the target text matched to specific
// regular expression group
//
// fEndPositions
// Array of end positions in the target text matched to specific
// regular expression group
//
// fPositionsSize
// Actual size of Start/EndPositions array.
// -----------------------------------------------------------------------
int fNoGroups;
int fPositionsSize;
int* fStartPositions;
int* fEndPositions;
MemoryManager* fMemoryManager;
};
/**
* Inline Methods
*/
// ---------------------------------------------------------------------------
// Match: getter methods
// ---------------------------------------------------------------------------
inline int Match::getNoGroups() const {
if (fNoGroups < 0)
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_Result_Not_Set, fMemoryManager);
return fNoGroups;
}
inline int Match::getStartPos(int index) const {
if (!fStartPositions)
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_Result_Not_Set, fMemoryManager);
if (index < 0 || fNoGroups <= index)
ThrowXMLwithMemMgr(ArrayIndexOutOfBoundsException, XMLExcepts::Array_BadIndex, fMemoryManager);
return fStartPositions[index];
}
inline int Match::getEndPos(int index) const {
if (!fEndPositions)
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_Result_Not_Set, fMemoryManager);
if (index < 0 || fNoGroups <= index)
ThrowXMLwithMemMgr(ArrayIndexOutOfBoundsException, XMLExcepts::Array_BadIndex, fMemoryManager);
return fEndPositions[index];
}
// ---------------------------------------------------------------------------
// Match: setter methods
// ---------------------------------------------------------------------------
inline void Match::setStartPos(const int index, const int value) {
if (!fStartPositions)
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_Result_Not_Set, fMemoryManager);
if (index < 0 || fNoGroups <= index)
ThrowXMLwithMemMgr(ArrayIndexOutOfBoundsException, XMLExcepts::Array_BadIndex, fMemoryManager);
fStartPositions[index] = value;
}
inline void Match::setEndPos(const int index, const int value) {
if (!fEndPositions)
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_Result_Not_Set, fMemoryManager);
if (index < 0 || fNoGroups <= index)
ThrowXMLwithMemMgr(ArrayIndexOutOfBoundsException, XMLExcepts::Array_BadIndex, fMemoryManager);
fEndPositions[index] = value;
}
XERCES_CPP_NAMESPACE_END
#endif

View File

@@ -0,0 +1,306 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: Op.hpp 932887 2010-04-11 13:04:59Z borisk $
*/
#if !defined(XERCESC_INCLUDE_GUARD_OP_HPP)
#define XERCESC_INCLUDE_GUARD_OP_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/RefVectorOf.hpp>
#include <xercesc/util/RuntimeException.hpp>
XERCES_CPP_NAMESPACE_BEGIN
// ---------------------------------------------------------------------------
// Forward Declaration
// ---------------------------------------------------------------------------
class Token;
class XMLUTIL_EXPORT Op : public XMemory
{
public:
typedef enum {
O_DOT = 0,
O_CHAR = 1,
O_RANGE = 3,
O_NRANGE = 4,
O_ANCHOR = 5,
O_STRING = 6,
O_CLOSURE = 7,
O_NONGREEDYCLOSURE = 8,
O_FINITE_CLOSURE = 9,
O_FINITE_NONGREEDYCLOSURE = 10,
O_QUESTION = 11,
O_NONGREEDYQUESTION = 12,
O_UNION = 13,
O_CAPTURE = 15,
O_BACKREFERENCE = 16
} opType;
// -----------------------------------------------------------------------
// Public Constructors and Destructor
// -----------------------------------------------------------------------
virtual ~Op() { }
// -----------------------------------------------------------------------
// Getter functions
// -----------------------------------------------------------------------
opType getOpType() const;
const Op* getNextOp() const;
virtual XMLInt32 getData() const;
virtual XMLInt32 getData2() const;
virtual XMLSize_t getSize() const;
virtual const Op* elementAt(XMLSize_t index) const;
virtual const Op* getChild() const;
virtual const Token* getToken() const;
virtual const XMLCh* getLiteral() const;
// -----------------------------------------------------------------------
// Setter functions
// -----------------------------------------------------------------------
void setOpType(const opType type);
void setNextOp(const Op* const next);
protected:
// -----------------------------------------------------------------------
// Protected Constructors
// -----------------------------------------------------------------------
Op(const opType type, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
friend class OpFactory;
MemoryManager* const fMemoryManager;
private:
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
Op(const Op&);
Op& operator=(const Op&);
// -----------------------------------------------------------------------
// Private data members
//
// fOpType
// Indicates the type of operation
//
// fNextOp
// Points to the next operation in the chain
// -----------------------------------------------------------------------
opType fOpType;
const Op* fNextOp;
};
class XMLUTIL_EXPORT CharOp: public Op {
public:
// -----------------------------------------------------------------------
// Public Constructors and Destructor
// -----------------------------------------------------------------------
CharOp(const opType type, const XMLInt32 charData, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
~CharOp() {}
// -----------------------------------------------------------------------
// Getter functions
// -----------------------------------------------------------------------
XMLInt32 getData() const;
private:
// Private data members
XMLInt32 fCharData;
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
CharOp(const CharOp&);
CharOp& operator=(const CharOp&);
};
class XMLUTIL_EXPORT UnionOp : public Op {
public:
// -----------------------------------------------------------------------
// Public Constructors and Destructor
// -----------------------------------------------------------------------
UnionOp(const opType type, const XMLSize_t size,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
~UnionOp() { delete fBranches; }
// -----------------------------------------------------------------------
// Getter functions
// -----------------------------------------------------------------------
XMLSize_t getSize() const;
const Op* elementAt(XMLSize_t index) const;
// -----------------------------------------------------------------------
// Setter functions
// -----------------------------------------------------------------------
void addElement(Op* const op);
private:
// Private Data members
RefVectorOf<Op>* fBranches;
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
UnionOp(const UnionOp&);
UnionOp& operator=(const UnionOp&);
};
class XMLUTIL_EXPORT ChildOp: public Op {
public:
// -----------------------------------------------------------------------
// Public Constructors and Destructor
// -----------------------------------------------------------------------
ChildOp(const opType type, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
~ChildOp() {}
// -----------------------------------------------------------------------
// Getter functions
// -----------------------------------------------------------------------
const Op* getChild() const;
// -----------------------------------------------------------------------
// Setter functions
// -----------------------------------------------------------------------
void setChild(const Op* const child);
private:
// Private data members
const Op* fChild;
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
ChildOp(const ChildOp&);
ChildOp& operator=(const ChildOp&);
};
class XMLUTIL_EXPORT ModifierOp: public ChildOp {
public:
// -----------------------------------------------------------------------
// Public Constructors and Destructor
// -----------------------------------------------------------------------
ModifierOp(const opType type, const XMLInt32 v1, const XMLInt32 v2, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
~ModifierOp() {}
// -----------------------------------------------------------------------
// Getter functions
// -----------------------------------------------------------------------
XMLInt32 getData() const;
XMLInt32 getData2() const;
private:
// Private data members
XMLInt32 fVal1;
XMLInt32 fVal2;
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
ModifierOp(const ModifierOp&);
ModifierOp& operator=(const ModifierOp&);
};
class XMLUTIL_EXPORT RangeOp: public Op {
public:
// -----------------------------------------------------------------------
// Public Constructors and Destructor
// -----------------------------------------------------------------------
RangeOp(const opType type, const Token* const token, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
~RangeOp() {}
// -----------------------------------------------------------------------
// Getter functions
// -----------------------------------------------------------------------
const Token* getToken() const;
private:
// Private data members
const Token* fToken;
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
RangeOp(const RangeOp&);
RangeOp& operator=(const RangeOp&);
};
class XMLUTIL_EXPORT StringOp: public Op {
public:
// -----------------------------------------------------------------------
// Public Constructors and Destructor
// -----------------------------------------------------------------------
StringOp(const opType type, const XMLCh* const literal, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
~StringOp() { fMemoryManager->deallocate(fLiteral);}
// -----------------------------------------------------------------------
// Getter functions
// -----------------------------------------------------------------------
const XMLCh* getLiteral() const;
private:
// Private data members
XMLCh* fLiteral;
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
StringOp(const StringOp&);
StringOp& operator=(const StringOp&);
};
// ---------------------------------------------------------------------------
// Op: getter methods
// ---------------------------------------------------------------------------
inline Op::opType Op::getOpType() const {
return fOpType;
}
inline const Op* Op::getNextOp() const {
return fNextOp;
}
// ---------------------------------------------------------------------------
// Op: setter methods
// ---------------------------------------------------------------------------
inline void Op::setOpType(const Op::opType type) {
fOpType = type;
}
inline void Op::setNextOp(const Op* const nextOp) {
fNextOp = nextOp;
}
XERCES_CPP_NAMESPACE_END
#endif
/**
* End of file Op.hpp
*/

View File

@@ -0,0 +1,116 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: OpFactory.hpp 678879 2008-07-22 20:05:05Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_OPFACTORY_HPP)
#define XERCESC_INCLUDE_GUARD_OPFACTORY_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/XMemory.hpp>
#include <xercesc/util/RefVectorOf.hpp>
XERCES_CPP_NAMESPACE_BEGIN
// ---------------------------------------------------------------------------
// Forward Declaration
// ---------------------------------------------------------------------------
class Op;
class CharOp;
class UnionOp;
class ChildOp;
class RangeOp;
class StringOp;
class ModifierOp;
class Token;
/*
* A Factory class used by 'RegularExpression' to create different types of
* operations (Op) objects. The class will keep track of all objects created
* for cleanup purposes. Each 'RegularExpression' object will have its own
* instance of OpFactory and when a 'RegularExpression' object is deleted
* all associated Op objects will be deleted.
*/
class XMLUTIL_EXPORT OpFactory : public XMemory
{
public:
// -----------------------------------------------------------------------
// Constructors and destructors
// -----------------------------------------------------------------------
OpFactory(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
~OpFactory();
// -----------------------------------------------------------------------
// Factory methods
// -----------------------------------------------------------------------
Op* createDotOp();
CharOp* createCharOp(XMLInt32 data);
CharOp* createAnchorOp(XMLInt32 data);
CharOp* createCaptureOp(int number, const Op* const next);
UnionOp* createUnionOp(XMLSize_t size);
ChildOp* createClosureOp(int id);
ChildOp* createNonGreedyClosureOp();
ChildOp* createQuestionOp(bool nonGreedy);
RangeOp* createRangeOp(const Token* const token);
CharOp* createBackReferenceOp(int refNo);
StringOp* createStringOp(const XMLCh* const literal);
// -----------------------------------------------------------------------
// Reset methods
// -----------------------------------------------------------------------
/*
* Remove all created Op objects from Vector
*/
void reset();
private:
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
OpFactory(const OpFactory&);
OpFactory& operator=(const OpFactory&);
// -----------------------------------------------------------------------
// Private data members
//
// fOpVector
// Contains Op objects. Used for memory cleanup.
// -----------------------------------------------------------------------
RefVectorOf<Op>* fOpVector;
MemoryManager* fMemoryManager;
};
// ---------------------------------------------------------------------------
// OpFactory - Factory methods
// ---------------------------------------------------------------------------
inline void OpFactory::reset() {
fOpVector->removeAllElements();
}
XERCES_CPP_NAMESPACE_END
#endif
/**
* End file OpFactory
*/

View File

@@ -0,0 +1,87 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: ParenToken.hpp 678879 2008-07-22 20:05:05Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_PARENTOKEN_HPP)
#define XERCESC_INCLUDE_GUARD_PARENTOKEN_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/regx/Token.hpp>
XERCES_CPP_NAMESPACE_BEGIN
class XMLUTIL_EXPORT ParenToken : public Token {
public:
// -----------------------------------------------------------------------
// Public Constructors and Destructor
// -----------------------------------------------------------------------
ParenToken(const tokType tkType, Token* const tok,
const int noParen, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
~ParenToken();
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
XMLSize_t size() const;
int getNoParen() const;
Token* getChild(const XMLSize_t index) const;
private:
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
ParenToken(const ParenToken&);
ParenToken& operator=(const ParenToken&);
// -----------------------------------------------------------------------
// Private data members
// -----------------------------------------------------------------------
int fNoParen;
Token* fChild;
};
// ---------------------------------------------------------------------------
// ParenToken: getter methods
// ---------------------------------------------------------------------------
inline XMLSize_t ParenToken::size() const {
return 1;
}
inline int ParenToken::getNoParen() const {
return fNoParen;
}
inline Token* ParenToken::getChild(const XMLSize_t) const {
return fChild;
}
XERCES_CPP_NAMESPACE_END
#endif
/**
* End of file ParenToken.hpp
*/

View File

@@ -0,0 +1,86 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: ParserForXMLSchema.hpp 678879 2008-07-22 20:05:05Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_PARSERFORXMLSCHEMA_HPP)
#define XERCESC_INCLUDE_GUARD_PARSERFORXMLSCHEMA_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/regx/RegxParser.hpp>
XERCES_CPP_NAMESPACE_BEGIN
// ---------------------------------------------------------------------------
// Forward Declaration
// ---------------------------------------------------------------------------
class Token;
class RangeToken;
class XMLUTIL_EXPORT ParserForXMLSchema : public RegxParser {
public:
// -----------------------------------------------------------------------
// Public Constructors and Destructor
// -----------------------------------------------------------------------
ParserForXMLSchema(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
~ParserForXMLSchema();
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
protected:
// -----------------------------------------------------------------------
// Parsing/Processing methods
// -----------------------------------------------------------------------
Token* processCaret();
Token* processDollar();
Token* processStar(Token* const tok);
Token* processPlus(Token* const tok);
Token* processQuestion(Token* const tok);
Token* processParen();
Token* processBackReference();
// -----------------------------------------------------------------------
// Helper methods
// -----------------------------------------------------------------------
bool checkQuestion(const XMLSize_t off);
XMLInt32 decodeEscaped();
private:
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
ParserForXMLSchema(const ParserForXMLSchema&);
ParserForXMLSchema& operator=(const ParserForXMLSchema&);
// -----------------------------------------------------------------------
// Private data members
// -----------------------------------------------------------------------
};
XERCES_CPP_NAMESPACE_END
#endif
/**
* End of file ParserForXMLSchema.hpp
*/

View File

@@ -0,0 +1,89 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: RangeFactory.hpp 678879 2008-07-22 20:05:05Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_RANGEFACTORY_HPP)
#define XERCESC_INCLUDE_GUARD_RANGEFACTORY_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/XMemory.hpp>
XERCES_CPP_NAMESPACE_BEGIN
// ---------------------------------------------------------------------------
// Forward Declaration
// ---------------------------------------------------------------------------
class RangeTokenMap;
class XMLUTIL_EXPORT RangeFactory : public XMemory
{
public:
// -----------------------------------------------------------------------
// Constructors and destructors
// -----------------------------------------------------------------------
virtual ~RangeFactory();
//-----------------------------------------------------------------------
// Initialization methods
// -----------------------------------------------------------------------
/**
* To maintain src code compatibility, we added a default parameter.
* The caller is expected to pass in a valid RangeTokenMap instance.
*/
virtual void initializeKeywordMap(RangeTokenMap *rangeTokMap = 0) = 0;
/*
* Used by children to build commonly used ranges
* To maintain src code compatibility, we added a default parameter.
* The caller is expected to pass in a valid RangeTokenMap instance.
*/
virtual void buildRanges(RangeTokenMap *rangeTokMap = 0) = 0;
protected:
// -----------------------------------------------------------------------
// Constructor and destructors
// -----------------------------------------------------------------------
RangeFactory();
//friend class RangeTokenMap;
// -----------------------------------------------------------------------
// Data
// -----------------------------------------------------------------------
bool fRangesCreated;
bool fKeywordsInitialized;
private:
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
RangeFactory(const RangeFactory&);
RangeFactory& operator=(const RangeFactory&);
};
XERCES_CPP_NAMESPACE_END
#endif
/**
* End file RangeFactory.hpp
*/

View File

@@ -0,0 +1,146 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: RangeToken.hpp 678879 2008-07-22 20:05:05Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_RANGETOKEN_HPP)
#define XERCESC_INCLUDE_GUARD_RANGETOKEN_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/util/regx/Token.hpp>
XERCES_CPP_NAMESPACE_BEGIN
// ---------------------------------------------------------------------------
// Forward Declaration
// ---------------------------------------------------------------------------
class TokenFactory;
class XMLUTIL_EXPORT RangeToken : public Token {
public:
// -----------------------------------------------------------------------
// Public Constructors and Destructor
// -----------------------------------------------------------------------
RangeToken(const tokType tkType,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
~RangeToken();
// -----------------------------------------------------------------------
// Public Constants
// -----------------------------------------------------------------------
static const int MAPSIZE;
static const unsigned int INITIALSIZE;
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
RangeToken* getCaseInsensitiveToken(TokenFactory* const tokFactory);
void setCaseInsensitiveToken(RangeToken* tok);
// -----------------------------------------------------------------------
// Setter methods
// -----------------------------------------------------------------------
void setRangeValues(XMLInt32* const rangeValues, const unsigned int count);
// -----------------------------------------------------------------------
// Range manipulation methods
// -----------------------------------------------------------------------
void addRange(const XMLInt32 start, const XMLInt32 end);
void mergeRanges(const Token *const tok);
void sortRanges();
void compactRanges();
void subtractRanges(RangeToken* const tok);
void intersectRanges(RangeToken* const tok);
static RangeToken* complementRanges(RangeToken* const tok,
TokenFactory* const tokFactory,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
bool empty() const;
// -----------------------------------------------------------------------
// Match methods
// -----------------------------------------------------------------------
bool match(const XMLInt32 ch);
// -----------------------------------------------------------------------
// Creates the map. This will happen automatically,
// necessary.
// -----------------------------------------------------------------------
void createMap();
private:
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
RangeToken(const RangeToken&);
RangeToken& operator=(const RangeToken&);
// -----------------------------------------------------------------------
// Private Helper methods
// -----------------------------------------------------------------------
void expand(const unsigned int length);
void doCreateMap();
// -----------------------------------------------------------------------
// Private data members
// -----------------------------------------------------------------------
bool fSorted;
bool fCompacted;
int fNonMapIndex;
unsigned int fElemCount;
unsigned int fMaxCount;
int* fMap;
XMLInt32* fRanges;
RangeToken* fCaseIToken;
MemoryManager* fMemoryManager;
};
inline void RangeToken::setCaseInsensitiveToken(RangeToken* tok)
{
fCaseIToken = tok;
}
inline void RangeToken::createMap()
{
if (!fMap)
{
doCreateMap();
}
}
inline bool RangeToken::empty() const
{
return fElemCount==0;
}
XERCES_CPP_NAMESPACE_END
#endif
/**
* End of file RangeToken.hpp
*/

View File

@@ -0,0 +1,232 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: RangeTokenMap.hpp 678879 2008-07-22 20:05:05Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_RANGETOKENMAP_HPP)
#define XERCESC_INCLUDE_GUARD_RANGETOKENMAP_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/Mutexes.hpp>
#include <xercesc/util/RefHashTableOf.hpp>
XERCES_CPP_NAMESPACE_BEGIN
// ---------------------------------------------------------------------------
// Forward Declaration
// ---------------------------------------------------------------------------
class RangeToken;
class RangeFactory;
class TokenFactory;
class XMLStringPool;
class XMLUTIL_EXPORT RangeTokenElemMap : public XMemory
{
public:
RangeTokenElemMap(unsigned int categoryId);
~RangeTokenElemMap();
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
unsigned int getCategoryId() const;
RangeToken* getRangeToken(const bool complement = false) const;
// -----------------------------------------------------------------------
// Setter methods
// -----------------------------------------------------------------------
void setRangeToken(RangeToken* const tok, const bool complement = false);
void setCategoryId(const unsigned int categId);
private:
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
RangeTokenElemMap(const RangeTokenElemMap&);
RangeTokenElemMap& operator=(const RangeTokenElemMap&);
// Data members
unsigned int fCategoryId;
RangeToken* fRange;
RangeToken* fNRange;
};
class XMLUTIL_EXPORT RangeTokenMap : public XMemory
{
public:
// -----------------------------------------------------------------------
// Putter methods
// -----------------------------------------------------------------------
void addCategory(const XMLCh* const categoryName);
void addRangeMap(const XMLCh* const categoryName,
RangeFactory* const rangeFactory);
void addKeywordMap(const XMLCh* const keyword,
const XMLCh* const categoryName);
// -----------------------------------------------------------------------
// Instance methods
// -----------------------------------------------------------------------
static RangeTokenMap* instance();
// -----------------------------------------------------------------------
// Setter methods
// -----------------------------------------------------------------------
void setRangeToken(const XMLCh* const keyword, RangeToken* const tok,
const bool complement = false);
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
TokenFactory* getTokenFactory() const;
protected:
// -----------------------------------------------------------------------
// Constructor and destructors
// -----------------------------------------------------------------------
RangeTokenMap(MemoryManager* manager);
~RangeTokenMap();
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
/*
* Gets a commonly used RangeToken from the token registry based on the
* range name - Called by TokenFactory.
*/
RangeToken* getRange(const XMLCh* const name,
const bool complement = false);
RefHashTableOf<RangeTokenElemMap>* getTokenRegistry() const;
RefHashTableOf<RangeFactory>* getRangeMap() const;
XMLStringPool* getCategories() const;
private:
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
RangeTokenMap(const RangeTokenMap&);
RangeTokenMap& operator=(const RangeTokenMap&);
// -----------------------------------------------------------------------
// Private Helpers methods
// -----------------------------------------------------------------------
/*
* Initializes the registry with a set of commonly used RangeToken
* objects.
*/
void initializeRegistry();
void buildTokenRanges();
void cleanUp();
friend class TokenFactory;
// -----------------------------------------------------------------------
// Private data members
//
// fTokenRegistry
// Contains a set of commonly used tokens
//
// fRangeMap
// Contains a map between a category name and a RangeFactory object.
//
// fCategories
// Contains range categories names
//
// fTokenFactory
// Token factory object
//
// fInstance
// A RangeTokenMap instance
//
// fMutex
// A mutex object for synchronization
// -----------------------------------------------------------------------
RefHashTableOf<RangeTokenElemMap>* fTokenRegistry;
RefHashTableOf<RangeFactory>* fRangeMap;
XMLStringPool* fCategories;
TokenFactory* fTokenFactory;
XMLMutex fMutex;
static RangeTokenMap* fInstance;
friend class XMLInitializer;
};
// ---------------------------------------------------------------------------
// RangeTokenElemMap: Getter methods
// ---------------------------------------------------------------------------
inline unsigned int RangeTokenElemMap::getCategoryId() const {
return fCategoryId;
}
inline RangeToken* RangeTokenElemMap::getRangeToken(const bool complement) const {
return complement ? fNRange : fRange;
}
// ---------------------------------------------------------------------------
// RangeTokenElemMap: Setter methods
// ---------------------------------------------------------------------------
inline void RangeTokenElemMap::setCategoryId(const unsigned int categId) {
fCategoryId = categId;
}
inline void RangeTokenElemMap::setRangeToken(RangeToken* const tok,
const bool complement) {
if (complement)
fNRange = tok;
else
fRange = tok;
}
// ---------------------------------------------------------------------------
// RangeTokenMap: Getter methods
// ---------------------------------------------------------------------------
inline RefHashTableOf<RangeTokenElemMap>* RangeTokenMap::getTokenRegistry() const {
return fTokenRegistry;
}
inline RefHashTableOf<RangeFactory>* RangeTokenMap::getRangeMap() const {
return fRangeMap;
}
inline XMLStringPool* RangeTokenMap::getCategories() const {
return fCategories;
}
inline TokenFactory* RangeTokenMap::getTokenFactory() const {
return fTokenFactory;
}
XERCES_CPP_NAMESPACE_END
#endif
/**
* End file RangeToken.hpp
*/

View File

@@ -0,0 +1,772 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: RegularExpression.hpp 822158 2009-10-06 07:52:59Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_REGULAREXPRESSION_HPP)
#define XERCESC_INCLUDE_GUARD_REGULAREXPRESSION_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/RefArrayVectorOf.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xercesc/util/Janitor.hpp>
#include <xercesc/util/regx/Op.hpp>
#include <xercesc/util/regx/TokenFactory.hpp>
#include <xercesc/util/regx/BMPattern.hpp>
#include <xercesc/util/regx/OpFactory.hpp>
#include <xercesc/util/regx/RegxUtil.hpp>
XERCES_CPP_NAMESPACE_BEGIN
// ---------------------------------------------------------------------------
// Forward Declaration
// ---------------------------------------------------------------------------
class RangeToken;
class Match;
class RegxParser;
/**
* The RegularExpression class represents a parsed executable regular expression.
* This class is thread safe. Two similar regular expression syntaxes are
* supported:
*
* <ol>
* <li><a href="http://www.w3.org/TR/xpath-functions/#regex-syntax">The XPath 2.0 / XQuery regular expression syntax.</a>
* <li><a href="http://www.w3.org/TR/xmlschema-2/#regexs">The XML Schema regular expression syntax.</a></li>
* </ol>
*
* XPath 2.0 regular expression syntax is used unless the "X" option is specified during construction.
*
* Options can be specified during construction to change the way that the regular expression is handled.
* Options are specified by a string consisting of any number of the following characters:
*
* <table border='1'>
* <tr>
* <th>Character</th>
* <th>Meaning</th>
* </tr>
* <tr>
* <td valign='top' rowspan='1' colspan='1'>i</td>
* <td valign='top' rowspan='1' colspan='1'><a href="http://www.w3.org/TR/xpath-functions/#flags">
* Ignore case</a> when matching the regular expression.</td>
* </tr>
* <tr>
* <td valign='top' rowspan='1' colspan='1'>m</td>
* <td valign='top' rowspan='1' colspan='1'><a href="http://www.w3.org/TR/xpath-functions/#flags">
* Multi-line mode</a>. The meta characters "^" and "$" will match the beginning and end of lines.</td>
* </tr>
* <tr>
* <td valign='top' rowspan='1' colspan='1'>s</td>
* <td valign='top' rowspan='1' colspan='1'><a href="http://www.w3.org/TR/xpath-functions/#flags">
* Single-line mode</a>. The meta character "." will match a newline character.</td>
* </tr>
* <tr>
* <td valign='top' rowspan='1' colspan='1'>x</td>
* <td valign='top' rowspan='1' colspan='1'>Allow extended comments.</td>
* </tr>
* <tr>
* <td valign='top' rowspan='1' colspan='1'>F</td>
* <td valign='top' rowspan='1' colspan='1'>Prohibit the fixed string optimization.</td>
* </tr>
* <tr>
* <td valign='top' rowspan='1' colspan='1'>H</td>
* <td valign='top' rowspan='1' colspan='1'>Prohibit the head character optimization.</td>
* </tr>
* <tr>
* <td valign='top' rowspan='1' colspan='1'>X</td>
* <td valign='top' rowspan='1' colspan='1'>Parse the regular expression according to the
* <a href="http://www.w3.org/TR/xmlschema-2/#regexs">XML Schema regular expression syntax</a>.</td>
* </tr>
* </table>
*/
class XMLUTIL_EXPORT RegularExpression : public XMemory
{
public:
// -----------------------------------------------------------------------
// Public Constructors and Destructor
// -----------------------------------------------------------------------
/** @name Constructors and destructor */
//@{
/** Parses the given regular expression.
*
* @param pattern the regular expression in the local code page
* @param manager the memory manager to use
*/
RegularExpression
(
const char* const pattern
, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
);
/** Parses the given regular expression using the options specified.
*
* @param pattern the regular expression in the local code page
* @param options the options string in the local code page
* @param manager the memory manager to use
*/
RegularExpression
(
const char* const pattern
, const char* const options
, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
);
/** Parses the given regular expression.
*
* @param pattern the regular expression
* @param manager the memory manager to use
*/
RegularExpression
(
const XMLCh* const pattern
, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
);
/** Parses the given regular expression using the options specified.
*
* @param pattern the regular expression
* @param options the options string
* @param manager the memory manager to use
*/
RegularExpression
(
const XMLCh* const pattern
, const XMLCh* const options
, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
);
virtual ~RegularExpression();
//@}
// -----------------------------------------------------------------------
// Public Constants
// -----------------------------------------------------------------------
static const unsigned int IGNORE_CASE;
static const unsigned int SINGLE_LINE;
static const unsigned int MULTIPLE_LINE;
static const unsigned int EXTENDED_COMMENT;
static const unsigned int PROHIBIT_HEAD_CHARACTER_OPTIMIZATION;
static const unsigned int PROHIBIT_FIXED_STRING_OPTIMIZATION;
static const unsigned int XMLSCHEMA_MODE;
typedef enum
{
wordTypeIgnore = 0,
wordTypeLetter = 1,
wordTypeOther = 2
} wordType;
// -----------------------------------------------------------------------
// Public Helper methods
// -----------------------------------------------------------------------
/** @name Public helper methods */
//@{
static int getOptionValue(const XMLCh ch);
static bool isSet(const int options, const int flag);
//@}
// -----------------------------------------------------------------------
// Matching methods
// -----------------------------------------------------------------------
/** @name Matching methods */
//@{
/** Tries to match the given null terminated string against the regular expression, returning
* true if successful.
*
* @param matchString the string to match in the local code page
* @param manager the memory manager to use
*
* @return Whether the string matched the regular expression or not.
*/
bool matches(const char* const matchString,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
/** Tries to match the given string between the specified start and end offsets
* against the regular expression, returning true if successful.
*
* @param matchString the string to match in the local code page
* @param start the offset of the start of the string
* @param end the offset of the end of the string
* @param manager the memory manager to use
*
* @return Whether the string matched the regular expression or not.
*/
bool matches(const char* const matchString, const XMLSize_t start, const XMLSize_t end,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
/** Tries to match the given null terminated string against the regular expression, returning
* true if successful.
*
* @param matchString the string to match in the local code page
* @param pMatch a Match object, which will be populated with the offsets for the
* regular expression match and sub-matches.
* @param manager the memory manager to use
*
* @return Whether the string matched the regular expression or not.
*/
bool matches(const char* const matchString, Match* const pMatch,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
/** Tries to match the given string between the specified start and end offsets
* against the regular expression, returning true if successful.
*
* @param matchString the string to match in the local code page
* @param start the offset of the start of the string
* @param end the offset of the end of the string
* @param pMatch a Match object, which will be populated with the offsets for the
* regular expression match and sub-matches.
* @param manager the memory manager to use
*
* @return Whether the string matched the regular expression or not.
*/
bool matches(const char* const matchString, const XMLSize_t start, const XMLSize_t end,
Match* const pMatch, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
/** Tries to match the given null terminated string against the regular expression, returning
* true if successful.
*
* @param matchString the string to match
* @param manager the memory manager to use
*
* @return Whether the string matched the regular expression or not.
*/
bool matches(const XMLCh* const matchString,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
/** Tries to match the given string between the specified start and end offsets
* against the regular expression, returning true if successful.
*
* @param matchString the string to match
* @param start the offset of the start of the string
* @param end the offset of the end of the string
* @param manager the memory manager to use
*
* @return Whether the string matched the regular expression or not.
*/
bool matches(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
/** Tries to match the given null terminated string against the regular expression, returning
* true if successful.
*
* @param matchString the string to match
* @param pMatch a Match object, which will be populated with the offsets for the
* regular expression match and sub-matches.
* @param manager the memory manager to use
*
* @return Whether the string matched the regular expression or not.
*/
bool matches(const XMLCh* const matchString, Match* const pMatch,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
/** Tries to match the given string between the specified start and end offsets
* against the regular expression, returning true if successful.
*
* @param matchString the string to match
* @param start the offset of the start of the string
* @param end the offset of the end of the string
* @param pMatch a Match object, which will be populated with the offsets for the
* regular expression match and sub-matches.
* @param manager the memory manager to use
*
* @return Whether the string matched the regular expression or not.
*/
bool matches(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end,
Match* const pMatch, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
/** Tries to match the given string between the specified start and end offsets
* against the regular expression. The subEx vector is populated with the details
* for every non-overlapping occurrence of a match in the string.
*
* @param matchString the string to match
* @param start the offset of the start of the string
* @param end the offset of the end of the string
* @param subEx a RefVectorOf Match objects, populated with the offsets for the
* regular expression match and sub-matches.
* @param manager the memory manager to use
*/
void allMatches(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end,
RefVectorOf<Match> *subEx, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
//@}
// -----------------------------------------------------------------------
// Tokenize methods
// -----------------------------------------------------------------------
// Note: The caller owns the string vector that is returned, and is responsible
// for deleting it.
/** @name Tokenize methods */
//@{
/** Tokenizes the null terminated string according to the regular expression, returning
* the parts of the string that do not match the regular expression.
*
* @param matchString the string to match in the local code page
* @param manager the memory manager to use
*
* @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the
* given MemoryManager. The caller owns the string vector that is returned, and is responsible for
* deleting it.
*/
RefArrayVectorOf<XMLCh> *tokenize(const char* const matchString,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
/** Tokenizes the string between the specified start and end offsets according to the regular
* expression, returning the parts of the string that do not match the regular expression.
*
* @param matchString the string to match in the local code page
* @param start the offset of the start of the string
* @param end the offset of the end of the string
* @param manager the memory manager to use
*
* @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the
* given MemoryManager. The caller owns the string vector that is returned, and is responsible for
* deleting it.
*/
RefArrayVectorOf<XMLCh> *tokenize(const char* const matchString, const XMLSize_t start, const XMLSize_t end,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
/** Tokenizes the null terminated string according to the regular expression, returning
* the parts of the string that do not match the regular expression.
*
* @param matchString the string to match
* @param manager the memory manager to use
*
* @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the
* given MemoryManager. The caller owns the string vector that is returned, and is responsible for
* deleting it.
*/
RefArrayVectorOf<XMLCh> *tokenize(const XMLCh* const matchString,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
/** Tokenizes the string between the specified start and end offsets according to the regular
* expression, returning the parts of the string that do not match the regular expression.
*
* @param matchString the string to match
* @param start the offset of the start of the string
* @param end the offset of the end of the string
* @param manager the memory manager to use
*
* @return A RefArrayVectorOf sub-strings that do not match the regular expression allocated using the
* given MemoryManager. The caller owns the string vector that is returned, and is responsible for
* deleting it.
*/
RefArrayVectorOf<XMLCh> *tokenize(const XMLCh* const matchString, const XMLSize_t start, const XMLSize_t end,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
//@}
// -----------------------------------------------------------------------
// Replace methods
// -----------------------------------------------------------------------
// Note: The caller owns the XMLCh* that is returned, and is responsible for
// deleting it.
/** @name Replace methods */
//@{
/** Performs a search and replace on the given null terminated string, replacing
* any substring that matches the regular expression with a string derived from
* the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>.
*
* @param matchString the string to match in the local code page
* @param replaceString the string to replace in the local code page
* @param manager the memory manager to use
*
* @return The resulting string allocated using the given MemoryManager. The caller owns the string
* that is returned, and is responsible for deleting it.
*/
XMLCh *replace(const char* const matchString, const char* const replaceString,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
/** Performs a search and replace on the given string between the specified start and end offsets, replacing
* any substring that matches the regular expression with a string derived from
* the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>.
*
* @param matchString the string to match in the local code page
* @param replaceString the string to replace in the local code page
* @param start the offset of the start of the string
* @param end the offset of the end of the string
* @param manager the memory manager to use
*
* @return The resulting string allocated using the given MemoryManager. The caller owns the string
* that is returned, and is responsible for deleting it.
*/
XMLCh *replace(const char* const matchString, const char* const replaceString,
const XMLSize_t start, const XMLSize_t end,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
/** Performs a search and replace on the given null terminated string, replacing
* any substring that matches the regular expression with a string derived from
* the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>.
*
* @param matchString the string to match
* @param replaceString the string to replace
* @param manager the memory manager to use
*
* @return The resulting string allocated using the given MemoryManager. The caller owns the string
* that is returned, and is responsible for deleting it.
*/
XMLCh *replace(const XMLCh* const matchString, const XMLCh* const replaceString,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
/** Performs a search and replace on the given string between the specified start and end offsets, replacing
* any substring that matches the regular expression with a string derived from
* the <a href="http://www.w3.org/TR/xpath-functions/#func-replace">replacement string</a>.
*
* @param matchString the string to match
* @param replaceString the string to replace
* @param start the offset of the start of the string
* @param end the offset of the end of the string
* @param manager the memory manager to use
*
* @return The resulting string allocated using the given MemoryManager. The caller owns the string
* that is returned, and is responsible for deleting it.
*/
XMLCh *replace(const XMLCh* const matchString, const XMLCh* const replaceString,
const XMLSize_t start, const XMLSize_t end,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) const;
//@}
// -----------------------------------------------------------------------
// Static initialize and cleanup methods
// -----------------------------------------------------------------------
/** @name Static initilize and cleanup methods */
//@{
static void
staticInitialize(MemoryManager* memoryManager);
static void
staticCleanup();
//@}
protected:
virtual RegxParser* getRegexParser(const int options, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
// -----------------------------------------------------------------------
// Cleanup methods
// -----------------------------------------------------------------------
void cleanUp();
// -----------------------------------------------------------------------
// Setter methods
// -----------------------------------------------------------------------
void setPattern(const XMLCh* const pattern, const XMLCh* const options=0);
// -----------------------------------------------------------------------
// Protected data types
// -----------------------------------------------------------------------
class XMLUTIL_EXPORT Context : public XMemory
{
public :
Context(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
Context(Context* src);
~Context();
Context& operator= (const Context& other);
inline const XMLCh* getString() const { return fString; }
void reset(const XMLCh* const string, const XMLSize_t stringLen,
const XMLSize_t start, const XMLSize_t limit, const int noClosures,
const unsigned int options);
bool nextCh(XMLInt32& ch, XMLSize_t& offset);
bool fAdoptMatch;
XMLSize_t fStart;
XMLSize_t fLimit;
XMLSize_t fLength; // fLimit - fStart
int fSize;
XMLSize_t fStringMaxLen;
int* fOffsets;
Match* fMatch;
const XMLCh* fString;
unsigned int fOptions;
MemoryManager* fMemoryManager;
};
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
RegularExpression(const RegularExpression&);
RegularExpression& operator=(const RegularExpression&);
// -----------------------------------------------------------------------
// Protected Helper methods
// -----------------------------------------------------------------------
void prepare();
int parseOptions(const XMLCh* const options);
/**
* Matching helpers
*/
int match(Context* const context, const Op* const operations, XMLSize_t offset) const;
bool matchIgnoreCase(const XMLInt32 ch1, const XMLInt32 ch2) const;
/**
* Helper methods used by match(Context* ...)
*/
bool matchChar(Context* const context, const XMLInt32 ch, XMLSize_t& offset,
const bool ignoreCase) const;
bool matchDot(Context* const context, XMLSize_t& offset) const;
bool matchRange(Context* const context, const Op* const op,
XMLSize_t& offset, const bool ignoreCase) const;
bool matchAnchor(Context* const context, const XMLInt32 ch,
const XMLSize_t offset) const;
bool matchBackReference(Context* const context, const XMLInt32 ch,
XMLSize_t& offset, const bool ignoreCase) const;
bool matchString(Context* const context, const XMLCh* const literal,
XMLSize_t& offset, const bool ignoreCase) const;
int matchUnion(Context* const context, const Op* const op, XMLSize_t offset) const;
int matchCapture(Context* const context, const Op* const op, XMLSize_t offset) const;
/**
* Replace helpers
*/
void subInExp(const XMLCh* const repString,
const XMLCh* const origString,
const Match* subEx,
XMLBuffer &result,
MemoryManager* const manager) const;
/**
* Converts a token tree into an operation tree
*/
void compile(const Token* const token);
Op* compile(const Token* const token, Op* const next,
const bool reverse);
/**
* Helper methods used by compile
*/
Op* compileUnion(const Token* const token, Op* const next,
const bool reverse);
Op* compileParenthesis(const Token* const token, Op* const next,
const bool reverse);
Op* compileConcat(const Token* const token, Op* const next,
const bool reverse);
Op* compileClosure(const Token* const token, Op* const next,
const bool reverse, const Token::tokType tkType);
bool doTokenOverlap(const Op* op, Token* token);
// -----------------------------------------------------------------------
// Protected data members
// -----------------------------------------------------------------------
bool fHasBackReferences;
bool fFixedStringOnly;
int fNoGroups;
XMLSize_t fMinLength;
unsigned int fNoClosures;
unsigned int fOptions;
const BMPattern* fBMPattern;
XMLCh* fPattern;
XMLCh* fFixedString;
const Op* fOperations;
Token* fTokenTree;
RangeToken* fFirstChar;
static RangeToken* fWordRange;
OpFactory fOpFactory;
TokenFactory* fTokenFactory;
MemoryManager* fMemoryManager;
};
// -----------------------------------------------------------------------
// RegularExpression: Static initialize and cleanup methods
// -----------------------------------------------------------------------
inline void RegularExpression::staticCleanup()
{
fWordRange = 0;
}
// ---------------------------------------------------------------------------
// RegularExpression: Cleanup methods
// ---------------------------------------------------------------------------
inline void RegularExpression::cleanUp() {
fMemoryManager->deallocate(fPattern);//delete [] fPattern;
fMemoryManager->deallocate(fFixedString);//delete [] fFixedString;
delete fBMPattern;
delete fTokenFactory;
}
// ---------------------------------------------------------------------------
// RegularExpression: Helper methods
// ---------------------------------------------------------------------------
inline bool RegularExpression::isSet(const int options, const int flag) {
return (options & flag) == flag;
}
inline Op* RegularExpression::compileUnion(const Token* const token,
Op* const next,
const bool reverse) {
XMLSize_t tokSize = token->size();
UnionOp* uniOp = fOpFactory.createUnionOp(tokSize);
for (XMLSize_t i=0; i<tokSize; i++) {
uniOp->addElement(compile(token->getChild(i), next, reverse));
}
return uniOp;
}
inline Op* RegularExpression::compileParenthesis(const Token* const token,
Op* const next,
const bool reverse) {
if (token->getNoParen() == 0)
return compile(token->getChild(0), next, reverse);
Op* captureOp = 0;
if (reverse) {
captureOp = fOpFactory.createCaptureOp(token->getNoParen(), next);
captureOp = compile(token->getChild(0), captureOp, reverse);
return fOpFactory.createCaptureOp(-token->getNoParen(), captureOp);
}
captureOp = fOpFactory.createCaptureOp(-token->getNoParen(), next);
captureOp = compile(token->getChild(0), captureOp, reverse);
return fOpFactory.createCaptureOp(token->getNoParen(), captureOp);
}
inline Op* RegularExpression::compileConcat(const Token* const token,
Op* const next,
const bool reverse) {
Op* ret = next;
XMLSize_t tokSize = token->size();
if (!reverse) {
for (XMLSize_t i= tokSize; i>0; i--) {
ret = compile(token->getChild(i-1), ret, false);
}
}
else {
for (XMLSize_t i= 0; i< tokSize; i++) {
ret = compile(token->getChild(i), ret, true);
}
}
return ret;
}
inline Op* RegularExpression::compileClosure(const Token* const token,
Op* const next,
const bool reverse,
const Token::tokType tkType) {
Op* ret = 0;
Token* childTok = token->getChild(0);
int min = token->getMin();
int max = token->getMax();
if (min >= 0 && min == max) {
ret = next;
for (int i=0; i< min; i++) {
ret = compile(childTok, ret, reverse);
}
return ret;
}
if (min > 0 && max > 0)
max -= min;
if (max > 0) {
ret = next;
for (int i=0; i<max; i++) {
ChildOp* childOp = fOpFactory.createQuestionOp(
tkType == Token::T_NONGREEDYCLOSURE);
childOp->setNextOp(next);
childOp->setChild(compile(childTok, ret, reverse));
ret = childOp;
}
}
else {
ChildOp* childOp = 0;
if (tkType == Token::T_NONGREEDYCLOSURE) {
childOp = fOpFactory.createNonGreedyClosureOp();
}
else {
if (childTok->getMinLength() == 0)
childOp = fOpFactory.createClosureOp(fNoClosures++);
else
childOp = fOpFactory.createClosureOp(-1);
}
childOp->setNextOp(next);
if(next==NULL || !doTokenOverlap(next, childTok))
{
childOp->setOpType(tkType == Token::T_NONGREEDYCLOSURE?Op::O_FINITE_NONGREEDYCLOSURE:Op::O_FINITE_CLOSURE);
childOp->setChild(compile(childTok, NULL, reverse));
}
else
{
childOp->setChild(compile(childTok, childOp, reverse));
}
ret = childOp;
}
if (min > 0) {
for (int i=0; i< min; i++) {
ret = compile(childTok, ret, reverse);
}
}
return ret;
}
XERCES_CPP_NAMESPACE_END
#endif
/**
* End of file RegularExpression.hpp
*/

View File

@@ -0,0 +1,239 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: RegxDefs.hpp 678879 2008-07-22 20:05:05Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_REGXDEFS_HPP)
#define XERCESC_INCLUDE_GUARD_REGXDEFS_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/XercesDefs.hpp>
#include <xercesc/util/XMLUniDefs.hpp>
XERCES_CPP_NAMESPACE_BEGIN
static const XMLCh fgXMLCategory[] =
{
chLatin_X, chLatin_M, chLatin_L, chNull
};
static const XMLCh fgASCIICategory[] =
{
chLatin_A, chLatin_S, chLatin_C, chLatin_I, chLatin_I, chNull
};
static const XMLCh fgUnicodeCategory[] =
{
chLatin_U, chLatin_N, chLatin_I, chLatin_C, chLatin_O, chLatin_D,
chLatin_E, chNull
};
static const XMLCh fgBlockCategory[] =
{
chLatin_B, chLatin_L, chLatin_O, chLatin_C, chLatin_K, chNull
};
static const XMLCh fgXMLSpace[] =
{
chLatin_x, chLatin_m, chLatin_l, chColon, chLatin_i, chLatin_s, chLatin_S,
chLatin_p, chLatin_a, chLatin_c, chLatin_e, chNull
};
static const XMLCh fgXMLDigit[] =
{
chLatin_x, chLatin_m, chLatin_l, chColon, chLatin_i, chLatin_s, chLatin_D,
chLatin_i, chLatin_g, chLatin_i, chLatin_t, chNull
};
static const XMLCh fgXMLWord[] =
{
chLatin_x, chLatin_m, chLatin_l, chColon, chLatin_i, chLatin_s, chLatin_W,
chLatin_o, chLatin_r, chLatin_d, chNull
};
static const XMLCh fgXMLNameChar[] =
{
chLatin_x, chLatin_m, chLatin_l, chColon, chLatin_i, chLatin_s, chLatin_N,
chLatin_a, chLatin_m, chLatin_e, chLatin_C, chLatin_h, chLatin_a,
chLatin_r, chNull
};
static const XMLCh fgXMLInitialNameChar[] =
{
chLatin_x, chLatin_m, chLatin_l, chColon, chLatin_i, chLatin_s, chLatin_I,
chLatin_n, chLatin_i, chLatin_t, chLatin_i, chLatin_a, chLatin_l,
chLatin_N, chLatin_a, chLatin_m, chLatin_e, chLatin_C, chLatin_h,
chLatin_a, chLatin_r, chNull
};
static const XMLCh fgASCII[] =
{
chLatin_a, chLatin_s, chLatin_c, chLatin_i, chLatin_i, chColon, chLatin_i,
chLatin_s, chLatin_A, chLatin_s, chLatin_c, chLatin_i, chLatin_i, chNull
};
static const XMLCh fgASCIIDigit[] =
{
chLatin_a, chLatin_s, chLatin_c, chLatin_i, chLatin_i, chColon, chLatin_i,
chLatin_s, chLatin_D, chLatin_i, chLatin_g, chLatin_i, chLatin_t, chNull
};
static const XMLCh fgASCIIWord[] =
{
chLatin_a, chLatin_s, chLatin_c, chLatin_i, chLatin_i, chColon, chLatin_i,
chLatin_s, chLatin_W, chLatin_o, chLatin_r, chLatin_d, chNull
};
static const XMLCh fgASCIISpace[] =
{
chLatin_a, chLatin_s, chLatin_c, chLatin_i, chLatin_i, chColon, chLatin_i,
chLatin_s, chLatin_S, chLatin_p, chLatin_a, chLatin_c, chLatin_e, chNull
};
static const XMLCh fgASCIIXDigit[] =
{
chLatin_a, chLatin_s, chLatin_c, chLatin_i, chLatin_i, chColon, chLatin_i,
chLatin_s, chLatin_X, chLatin_D, chLatin_i, chLatin_g, chLatin_i,
chLatin_t, chNull
};
static const XMLCh fgUniAll[] =
{
chLatin_A, chLatin_L, chLatin_L, chNull
};
static const XMLCh fgUniIsAlpha[] =
{
chLatin_I, chLatin_s, chLatin_A, chLatin_l, chLatin_p, chLatin_h,
chLatin_a, chNull
};
static const XMLCh fgUniIsAlnum[] =
{
chLatin_I, chLatin_s, chLatin_A, chLatin_l, chLatin_n, chLatin_u,
chLatin_m, chNull
};
static const XMLCh fgUniIsWord[] =
{
chLatin_I, chLatin_s, chLatin_W, chLatin_o, chLatin_r, chLatin_d,
chNull
};
static const XMLCh fgUniIsDigit[] =
{
chLatin_I, chLatin_s, chLatin_D, chLatin_i, chLatin_g, chLatin_i,
chLatin_t, chNull
};
static const XMLCh fgUniIsUpper[] =
{
chLatin_I, chLatin_s, chLatin_U, chLatin_p, chLatin_p, chLatin_e,
chLatin_r, chNull
};
static const XMLCh fgUniIsLower[] =
{
chLatin_I, chLatin_s, chLatin_L, chLatin_o, chLatin_w, chLatin_e,
chLatin_r, chNull
};
static const XMLCh fgUniIsPunct[] =
{
chLatin_I, chLatin_s, chLatin_P, chLatin_u, chLatin_n, chLatin_c,
chLatin_t, chNull
};
static const XMLCh fgUniIsSpace[] =
{
chLatin_I, chLatin_s, chLatin_S, chLatin_p, chLatin_a, chLatin_c,
chLatin_e, chNull
};
static const XMLCh fgUniAssigned[] =
{
chLatin_A, chLatin_S, chLatin_S, chLatin_I, chLatin_G, chLatin_N,
chLatin_E, chLatin_D, chNull
};
static const XMLCh fgUniDecimalDigit[] =
{
chLatin_N, chLatin_d, chNull
};
static const XMLCh fgBlockIsSpecials[] =
{
chLatin_I, chLatin_s, chLatin_S, chLatin_p, chLatin_e, chLatin_c, chLatin_i, chLatin_a,
chLatin_l, chLatin_s, chNull
};
static const XMLCh fgBlockIsPrivateUse[] =
{
chLatin_I, chLatin_s, chLatin_P, chLatin_r, chLatin_i, chLatin_v, chLatin_a, chLatin_t, chLatin_e,
chLatin_U, chLatin_s, chLatin_e, chNull
};
static const XMLCh fgUniLetter[] =
{
chLatin_L, chNull
};
static const XMLCh fgUniNumber[] =
{
chLatin_N, chNull
};
static const XMLCh fgUniMark[] =
{
chLatin_M, chNull
};
static const XMLCh fgUniSeparator[] =
{
chLatin_Z, chNull
};
static const XMLCh fgUniPunctuation[] =
{
chLatin_P, chNull
};
static const XMLCh fgUniControl[] =
{
chLatin_C, chNull
};
static const XMLCh fgUniSymbol[] =
{
chLatin_S, chNull
};
XERCES_CPP_NAMESPACE_END
#endif
/**
* End of file RegxDefs.hpp
*/

View File

@@ -0,0 +1,284 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: RegxParser.hpp 711369 2008-11-04 20:03:14Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_REGXPARSER_HPP)
#define XERCESC_INCLUDE_GUARD_REGXPARSER_HPP
/*
* A regular expression parser
*/
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/RefVectorOf.hpp>
#include <xercesc/util/XMLUniDefs.hpp>
#include <xercesc/util/regx/Token.hpp>
XERCES_CPP_NAMESPACE_BEGIN
// ---------------------------------------------------------------------------
// Forward Declaration
// ---------------------------------------------------------------------------
class Token;
class RangeToken;
class TokenFactory;
class XMLUTIL_EXPORT RegxParser : public XMemory
{
public:
// -----------------------------------------------------------------------
// Public constant data
// -----------------------------------------------------------------------
// Parse tokens
typedef enum {
REGX_T_CHAR = 0,
REGX_T_EOF = 1,
REGX_T_OR = 2,
REGX_T_STAR = 3,
REGX_T_PLUS = 4,
REGX_T_QUESTION = 5,
REGX_T_LPAREN = 6,
REGX_T_RPAREN = 7,
REGX_T_DOT = 8,
REGX_T_LBRACKET = 9,
REGX_T_BACKSOLIDUS = 10,
REGX_T_CARET = 11,
REGX_T_DOLLAR = 12,
REGX_T_XMLSCHEMA_CC_SUBTRACTION = 13
} parserState;
typedef enum {
regexParserStateNormal = 0,
regexParserStateInBrackets = 1
} parserStateContext;
// -----------------------------------------------------------------------
// Public Constructors and Destructor
// -----------------------------------------------------------------------
RegxParser(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
virtual ~RegxParser();
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
parserStateContext getParseContext() const;
parserState getState() const;
XMLInt32 getCharData() const;
int getNoParen() const;
XMLSize_t getOffset() const;
bool hasBackReferences() const;
TokenFactory* getTokenFactory() const;
int getOptions() const;
// -----------------------------------------------------------------------
// Setter methods
// -----------------------------------------------------------------------
void setParseContext(const parserStateContext value);
void setTokenFactory(TokenFactory* const tokFactory);
void setOptions(const int options);
// -----------------------------------------------------------------------
// Public Parsing methods
// -----------------------------------------------------------------------
Token* parse(const XMLCh* const regxStr, const int options);
protected:
// -----------------------------------------------------------------------
// Protected Helper methods
// -----------------------------------------------------------------------
virtual bool checkQuestion(const XMLSize_t off);
virtual XMLInt32 decodeEscaped();
MemoryManager* getMemoryManager() const;
// -----------------------------------------------------------------------
// Protected Parsing/Processing methods
// -----------------------------------------------------------------------
void processNext();
Token* parseRegx(const bool matchingRParen = false);
virtual Token* processCaret();
virtual Token* processDollar();
virtual Token* processBackReference();
virtual Token* processStar(Token* const tok);
virtual Token* processPlus(Token* const tok);
virtual Token* processQuestion(Token* const tok);
virtual Token* processParen();
RangeToken* parseCharacterClass(const bool useNRange);
RangeToken* processBacksolidus_pP(const XMLInt32 ch);
// -----------------------------------------------------------------------
// Protected PreCreated RangeToken access methods
// -----------------------------------------------------------------------
RangeToken* getTokenForShorthand(const XMLInt32 ch);
bool isSet(const int flag);
private:
// -----------------------------------------------------------------------
// Private parsing/processing methods
// -----------------------------------------------------------------------
Token* parseTerm(const bool matchingRParen = false);
Token* parseFactor();
Token* parseAtom();
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
RegxParser(const RegxParser&);
RegxParser& operator=(const RegxParser&);
// -----------------------------------------------------------------------
// Private data types
// -----------------------------------------------------------------------
class ReferencePosition : public XMemory
{
public :
ReferencePosition(const int refNo, const XMLSize_t position);
int fReferenceNo;
XMLSize_t fPosition;
};
// -----------------------------------------------------------------------
// Private Helper methods
// -----------------------------------------------------------------------
int hexChar(const XMLInt32 ch);
// -----------------------------------------------------------------------
// Private data members
// -----------------------------------------------------------------------
MemoryManager* fMemoryManager;
bool fHasBackReferences;
int fOptions;
XMLSize_t fOffset;
int fNoGroups;
parserStateContext fParseContext;
XMLSize_t fStringLen;
parserState fState;
XMLInt32 fCharData;
XMLCh* fString;
RefVectorOf<ReferencePosition>* fReferences;
TokenFactory* fTokenFactory;
};
// ---------------------------------------------------------------------------
// RegxParser: Getter Methods
// ---------------------------------------------------------------------------
inline RegxParser::parserStateContext RegxParser::getParseContext() const {
return fParseContext;
}
inline RegxParser::parserState RegxParser::getState() const {
return fState;
}
inline XMLInt32 RegxParser::getCharData() const {
return fCharData;
}
inline int RegxParser::getNoParen() const {
return fNoGroups;
}
inline XMLSize_t RegxParser::getOffset() const {
return fOffset;
}
inline bool RegxParser::hasBackReferences() const {
return fHasBackReferences;
}
inline TokenFactory* RegxParser::getTokenFactory() const {
return fTokenFactory;
}
inline MemoryManager* RegxParser::getMemoryManager() const {
return fMemoryManager;
}
inline int RegxParser::getOptions() const {
return fOptions;
}
// ---------------------------------------------------------------------------
// RegxParser: Setter Methods
// ---------------------------------------------------------------------------
inline void RegxParser::setParseContext(const RegxParser::parserStateContext value) {
fParseContext = value;
}
inline void RegxParser::setTokenFactory(TokenFactory* const tokFactory) {
fTokenFactory = tokFactory;
}
inline void RegxParser::setOptions(const int options) {
fOptions = options;
}
// ---------------------------------------------------------------------------
// RegxParser: Helper Methods
// ---------------------------------------------------------------------------
inline bool RegxParser::isSet(const int flag) {
return (fOptions & flag) == flag;
}
inline int RegxParser::hexChar(const XMLInt32 ch) {
if (ch < chDigit_0 || ch > chLatin_f)
return -1;
if (ch <= chDigit_9)
return ch - chDigit_0;
if (ch < chLatin_A)
return -1;
if (ch <= chLatin_F)
return ch - chLatin_A + 10;
if (ch < chLatin_a)
return -1;
return ch - chLatin_a + 10;
}
XERCES_CPP_NAMESPACE_END
#endif
/**
* End file RegxParser.hpp
*/

View File

@@ -0,0 +1,109 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: RegxUtil.hpp 678879 2008-07-22 20:05:05Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_REGXUTIL_HPP)
#define XERCESC_INCLUDE_GUARD_REGXUTIL_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/XMLUniDefs.hpp>
XERCES_CPP_NAMESPACE_BEGIN
class MemoryManager;
class XMLUTIL_EXPORT RegxUtil {
public:
// -----------------------------------------------------------------------
// Constructors and destructors
// -----------------------------------------------------------------------
~RegxUtil() {}
static XMLInt32 composeFromSurrogate(const XMLCh high, const XMLCh low);
static bool isEOLChar(const XMLCh);
static bool isWordChar(const XMLCh);
static bool isLowSurrogate(const XMLCh ch);
static bool isHighSurrogate(const XMLCh ch);
static void decomposeToSurrogates(XMLInt32 ch, XMLCh& high, XMLCh& low);
static XMLCh* decomposeToSurrogates(XMLInt32 ch,
MemoryManager* const manager);
static XMLCh* stripExtendedComment(const XMLCh* const expression,
MemoryManager* const manager = 0);
private:
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
RegxUtil();
};
inline bool RegxUtil::isEOLChar(const XMLCh ch) {
return (ch == chLF || ch == chCR || ch == chLineSeparator
|| ch == chParagraphSeparator);
}
inline XMLInt32 RegxUtil::composeFromSurrogate(const XMLCh high, const XMLCh low) {
// see http://unicode.org/unicode/faq/utf_bom.html#35
const XMLInt32 SURROGATE_OFFSET = 0x10000 - (0xD800 << 10) - 0xDC00;
return (high << 10) + low + SURROGATE_OFFSET;
}
inline bool RegxUtil::isLowSurrogate(const XMLCh ch) {
return (ch & 0xFC00) == 0xDC00;
}
inline bool RegxUtil::isHighSurrogate(const XMLCh ch) {
return (ch & 0xFC00) == 0xD800;
}
inline void RegxUtil::decomposeToSurrogates(XMLInt32 ch, XMLCh& high, XMLCh& low) {
// see http://unicode.org/unicode/faq/utf_bom.html#35
const XMLInt32 LEAD_OFFSET = 0xD800 - (0x10000 >> 10);
high = XMLCh(LEAD_OFFSET + (ch >> 10));
low = XMLCh(0xDC00 + (ch & 0x3FF));
}
inline bool RegxUtil::isWordChar(const XMLCh ch) {
if ((ch == chUnderscore)
|| (ch >= chDigit_0 && ch <= chDigit_9)
|| (ch >= chLatin_A && ch <= chLatin_Z)
|| (ch >= chLatin_a && ch <= chLatin_z))
return true;
return false;
}
XERCES_CPP_NAMESPACE_END
#endif
/**
* End of file RegxUtil.hpp
*/

View File

@@ -0,0 +1,100 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: StringToken.hpp 678879 2008-07-22 20:05:05Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_STRINGTOKEN_HPP)
#define XERCESC_INCLUDE_GUARD_STRINGTOKEN_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/regx/Token.hpp>
#include <xercesc/util/XMLString.hpp>
XERCES_CPP_NAMESPACE_BEGIN
class XMLUTIL_EXPORT StringToken : public Token {
public:
// -----------------------------------------------------------------------
// Public Constructors and Destructor
// -----------------------------------------------------------------------
StringToken(const tokType tkType,
const XMLCh* const literal,
const int refNo,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
~StringToken();
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
int getReferenceNo() const;
const XMLCh* getString() const;
// -----------------------------------------------------------------------
// Setter methods
// -----------------------------------------------------------------------
void setString(const XMLCh* const literal);
private:
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
StringToken(const StringToken&);
StringToken& operator=(const StringToken&);
// -----------------------------------------------------------------------
// Private data members
// -----------------------------------------------------------------------
int fRefNo;
XMLCh* fString;
MemoryManager* fMemoryManager;
};
// ---------------------------------------------------------------------------
// StringToken: getter methods
// ---------------------------------------------------------------------------
inline int StringToken::getReferenceNo() const {
return fRefNo;
}
inline const XMLCh* StringToken::getString() const {
return fString;
}
// ---------------------------------------------------------------------------
// StringToken: Setter methods
// ---------------------------------------------------------------------------
inline void StringToken::setString(const XMLCh* const literal) {
fMemoryManager->deallocate(fString);//delete [] fString;
fString = 0;
fString = XMLString::replicate(literal, fMemoryManager);
}
XERCES_CPP_NAMESPACE_END
#endif
/**
* End of file StringToken.hpp
*/

View File

@@ -0,0 +1,262 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: Token.hpp 678879 2008-07-22 20:05:05Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_TOKEN_HPP)
#define XERCESC_INCLUDE_GUARD_TOKEN_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/RuntimeException.hpp>
#include <xercesc/util/PlatformUtils.hpp>
XERCES_CPP_NAMESPACE_BEGIN
// ---------------------------------------------------------------------------
// Forward Declaration
// ---------------------------------------------------------------------------
class RangeToken;
class TokenFactory;
class XMLUTIL_EXPORT Token : public XMemory
{
public:
// -----------------------------------------------------------------------
// Public Constants
// -----------------------------------------------------------------------
// Token types
typedef enum {
T_CHAR = 0,
T_CONCAT = 1,
T_UNION = 2,
T_CLOSURE = 3,
T_RANGE = 4,
T_NRANGE = 5,
T_PAREN = 6,
T_EMPTY = 7,
T_ANCHOR = 8,
T_NONGREEDYCLOSURE = 9,
T_STRING = 10,
T_DOT = 11,
T_BACKREFERENCE = 12
} tokType;
// -----------------------------------------------------------------------
// Public Constructors and Destructor
// -----------------------------------------------------------------------
Token(const tokType tkType
, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
);
virtual ~Token();
static const XMLInt32 UTF16_MAX;
typedef enum {
FC_CONTINUE = 0,
FC_TERMINAL = 1,
FC_ANY = 2
} firstCharacterOptions;
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
tokType getTokenType() const;
XMLSize_t getMinLength() const;
int getMaxLength() const;
virtual Token* getChild(const XMLSize_t index) const;
virtual XMLSize_t size() const;
virtual int getMin() const;
virtual int getMax() const;
virtual int getNoParen() const;
virtual int getReferenceNo() const;
virtual const XMLCh* getString() const;
virtual XMLInt32 getChar() const;
// -----------------------------------------------------------------------
// Setter methods
// -----------------------------------------------------------------------
void setTokenType(const tokType tokType);
virtual void setMin(const int minVal);
virtual void setMax(const int maxVal);
// -----------------------------------------------------------------------
// Range manipulation methods
// -----------------------------------------------------------------------
virtual void addRange(const XMLInt32 start, const XMLInt32 end);
virtual void mergeRanges(const Token *const tok);
virtual void sortRanges();
virtual void compactRanges();
virtual void subtractRanges(RangeToken* const tok);
virtual void intersectRanges(RangeToken* const tok);
// -----------------------------------------------------------------------
// Putter methods
// -----------------------------------------------------------------------
virtual void addChild(Token* const child, TokenFactory* const tokFactory);
// -----------------------------------------------------------------------
// Helper methods
// -----------------------------------------------------------------------
firstCharacterOptions analyzeFirstCharacter(RangeToken* const rangeTok, const int options,
TokenFactory* const tokFactory);
Token* findFixedString(int options, int& outOptions);
private:
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
Token(const Token&);
Token& operator=(const Token&);
// -----------------------------------------------------------------------
// Private Helper methods
// -----------------------------------------------------------------------
bool isSet(const int options, const unsigned int flag);
bool isShorterThan(Token* const tok);
// -----------------------------------------------------------------------
// Private data members
// -----------------------------------------------------------------------
tokType fTokenType;
protected:
MemoryManager* const fMemoryManager;
};
// ---------------------------------------------------------------------------
// Token: getter methods
// ---------------------------------------------------------------------------
inline Token::tokType Token::getTokenType() const {
return fTokenType;
}
inline XMLSize_t Token::size() const {
return 0;
}
inline Token* Token::getChild(const XMLSize_t) const {
return 0;
}
inline int Token::getMin() const {
return -1;
}
inline int Token::getMax() const {
return -1;
}
inline int Token::getReferenceNo() const {
return 0;
}
inline int Token::getNoParen() const {
return 0;
}
inline const XMLCh* Token::getString() const {
return 0;
}
inline XMLInt32 Token::getChar() const {
return -1;
}
// ---------------------------------------------------------------------------
// Token: setter methods
// ---------------------------------------------------------------------------
inline void Token::setTokenType(const Token::tokType tokType) {
fTokenType = tokType;
}
inline void Token::setMax(const int) {
// ClosureToken
}
inline void Token::setMin(const int) {
// ClosureToken
}
inline bool Token::isSet(const int options, const unsigned int flag) {
return (options & flag) == flag;
}
// ---------------------------------------------------------------------------
// Token: setter methods
// ---------------------------------------------------------------------------
inline void Token::addChild(Token* const, TokenFactory* const) {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, fMemoryManager);
}
// ---------------------------------------------------------------------------
// Token: Range manipulation methods
// ---------------------------------------------------------------------------
inline void Token::addRange(const XMLInt32, const XMLInt32) {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, fMemoryManager);
}
inline void Token::mergeRanges(const Token *const) {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, fMemoryManager);
}
inline void Token::sortRanges() {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, fMemoryManager);
}
inline void Token::compactRanges() {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, fMemoryManager);
}
inline void Token::subtractRanges(RangeToken* const) {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, fMemoryManager);
}
inline void Token::intersectRanges(RangeToken* const) {
ThrowXMLwithMemMgr(RuntimeException, XMLExcepts::Regex_NotSupported, fMemoryManager);
}
XERCES_CPP_NAMESPACE_END
#endif
/**
* End of file Token.hpp
*/

View File

@@ -0,0 +1,134 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: TokenFactory.hpp 678879 2008-07-22 20:05:05Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_TOKENFACTORY_HPP)
#define XERCESC_INCLUDE_GUARD_TOKENFACTORY_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/RefVectorOf.hpp>
#include <xercesc/util/regx/Token.hpp>
#include <xercesc/util/Mutexes.hpp>
XERCES_CPP_NAMESPACE_BEGIN
// ---------------------------------------------------------------------------
// Forward Declaration
// ---------------------------------------------------------------------------
class RangeToken;
class CharToken;
class ClosureToken;
class ConcatToken;
class ParenToken;
class StringToken;
class UnionToken;
class XMLUTIL_EXPORT TokenFactory : public XMemory
{
public:
// -----------------------------------------------------------------------
// Constructors and destructors
// -----------------------------------------------------------------------
TokenFactory(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
~TokenFactory();
// -----------------------------------------------------------------------
// Factory methods
// -----------------------------------------------------------------------
Token* createToken(const Token::tokType tkType);
ParenToken* createParenthesis(Token* const token, const int noGroups);
ClosureToken* createClosure(Token* const token, bool isNonGreedy = false);
ConcatToken* createConcat(Token* const token1, Token* const token2);
UnionToken* createUnion(const bool isConcat = false);
RangeToken* createRange(const bool isNegRange = false);
CharToken* createChar(const XMLUInt32 ch, const bool isAnchor = false);
StringToken* createBackReference(const int refNo);
StringToken* createString(const XMLCh* const literal);
//static void printUnicode();
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
/*
* Gets a commonly used RangeToken from the token registry based on the
* range name.
*/
RangeToken* getRange(const XMLCh* const name,const bool complement=false);
Token* getLineBegin();
Token* getLineEnd();
Token* getDot();
MemoryManager* getMemoryManager() const;
static RangeToken* staticGetRange(const XMLCh* const name,const bool complement=false);
// -----------------------------------------------------------------------
// Notification that lazy data has been deleted
// -----------------------------------------------------------------------
static void reinitTokenFactoryMutex();
private:
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
TokenFactory(const TokenFactory&);
TokenFactory& operator=(const TokenFactory&);
// -----------------------------------------------------------------------
// Private data members
//
// fRangeInitialized
// Indicates whether we have initialized the RangeFactory instance or
// not
//
// fToken
// Contains user created Token objects. Used for memory cleanup.
// -----------------------------------------------------------------------
RefVectorOf<Token>* fTokens;
Token* fEmpty;
Token* fLineBegin;
Token* fLineEnd;
Token* fDot;
MemoryManager* fMemoryManager;
};
inline RangeToken* TokenFactory::getRange(const XMLCh* const name,const bool complement)
{
return staticGetRange(name, complement);
}
inline MemoryManager* TokenFactory::getMemoryManager() const
{
return fMemoryManager;
}
XERCES_CPP_NAMESPACE_END
#endif
/**
* End file TokenFactory
*/

View File

@@ -0,0 +1,43 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: TokenInc.hpp 678879 2008-07-22 20:05:05Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_TOKENINC_HPP)
#define XERCESC_INCLUDE_GUARD_TOKENINC_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/regx/RangeToken.hpp>
#include <xercesc/util/regx/CharToken.hpp>
#include <xercesc/util/regx/ClosureToken.hpp>
#include <xercesc/util/regx/ConcatToken.hpp>
#include <xercesc/util/regx/ParenToken.hpp>
#include <xercesc/util/regx/StringToken.hpp>
#include <xercesc/util/regx/UnionToken.hpp>
XERCES_CPP_NAMESPACE_BEGIN
XERCES_CPP_NAMESPACE_END
#endif
/**
* End of file TokenInc.hpp
*/

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,88 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: UnicodeRangeFactory.hpp 671870 2008-06-26 12:19:31Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_UNICODERANGEFACTORY_HPP)
#define XERCESC_INCLUDE_GUARD_UNICODERANGEFACTORY_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/regx/RangeFactory.hpp>
#include <xercesc/util/regx/XMLUniCharacter.hpp>
XERCES_CPP_NAMESPACE_BEGIN
class XMLUTIL_EXPORT UnicodeRangeFactory: public RangeFactory {
public:
// -----------------------------------------------------------------------
// Public Constants
// -----------------------------------------------------------------------
// Unicode categories
enum {
CHAR_LETTER = XMLUniCharacter::FINAL_PUNCTUATION+1,
CHAR_MARK,
CHAR_NUMBER,
CHAR_SEPARATOR,
CHAR_OTHER,
CHAR_PUNCTUATION,
CHAR_SYMBOL,
UNICATEGSIZE
};
// -----------------------------------------------------------------------
// Constructors and operators
// -----------------------------------------------------------------------
UnicodeRangeFactory();
~UnicodeRangeFactory();
// -----------------------------------------------------------------------
// Initialization methods
// -----------------------------------------------------------------------
void initializeKeywordMap(RangeTokenMap *rangeTokMap = 0);
// -----------------------------------------------------------------------
// Helper methods
// -----------------------------------------------------------------------
static unsigned short getUniCategory(const unsigned short type);
protected:
// -----------------------------------------------------------------------
// Private Helper methods
// -----------------------------------------------------------------------
void buildRanges(RangeTokenMap *rangeTokMap = 0);
private:
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
UnicodeRangeFactory(const UnicodeRangeFactory&);
UnicodeRangeFactory& operator=(const UnicodeRangeFactory&);
};
XERCES_CPP_NAMESPACE_END
#endif
/**
* End file UnicodeRangeFactory.hpp
*/

View File

@@ -0,0 +1,91 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: UnionToken.hpp 678879 2008-07-22 20:05:05Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_UNIONTOKEN_HPP)
#define XERCESC_INCLUDE_GUARD_UNIONTOKEN_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/regx/Token.hpp>
#include <xercesc/util/RefVectorOf.hpp>
XERCES_CPP_NAMESPACE_BEGIN
class XMLUTIL_EXPORT UnionToken : public Token {
public:
// -----------------------------------------------------------------------
// Public Constructors and Destructor
// -----------------------------------------------------------------------
UnionToken(const tokType tkType
, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
~UnionToken();
// -----------------------------------------------------------------------
// Getter methods
// -----------------------------------------------------------------------
XMLSize_t size() const;
Token* getChild(const XMLSize_t index) const;
// -----------------------------------------------------------------------
// Children manipulation methods
// -----------------------------------------------------------------------
void addChild(Token* const child, TokenFactory* const tokFactory);
private:
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
UnionToken(const UnionToken&);
UnionToken& operator=(const UnionToken&);
// -----------------------------------------------------------------------
// Private Constants
// -----------------------------------------------------------------------
static const unsigned short INITIALSIZE;
// -----------------------------------------------------------------------
// Private data members
// -----------------------------------------------------------------------
RefVectorOf<Token>* fChildren;
};
// ---------------------------------------------------------------------------
// UnionToken: getter methods
// ---------------------------------------------------------------------------
inline XMLSize_t UnionToken::size() const {
return fChildren == 0 ? 0 : fChildren->size();
}
inline Token* UnionToken::getChild(const XMLSize_t index) const {
return fChildren->elementAt(index);
}
XERCES_CPP_NAMESPACE_END
#endif
/**
* End of file UnionToken.hpp
*/

View File

@@ -0,0 +1,66 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: XMLRangeFactory.hpp 678879 2008-07-22 20:05:05Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_XMLRANGEFACTORY_HPP)
#define XERCESC_INCLUDE_GUARD_XMLRANGEFACTORY_HPP
// ---------------------------------------------------------------------------
// Includes
// ---------------------------------------------------------------------------
#include <xercesc/util/regx/RangeFactory.hpp>
XERCES_CPP_NAMESPACE_BEGIN
class XMLUTIL_EXPORT XMLRangeFactory: public RangeFactory {
public:
// -----------------------------------------------------------------------
// Constructors and operators
// -----------------------------------------------------------------------
XMLRangeFactory();
~XMLRangeFactory();
// -----------------------------------------------------------------------
// Initialization methods
// -----------------------------------------------------------------------
void initializeKeywordMap(RangeTokenMap *rangeTokMap = 0);
protected:
// -----------------------------------------------------------------------
// Protected Helper methods
// -----------------------------------------------------------------------
void buildRanges(RangeTokenMap *rangeTokMap = 0);
private:
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
XMLRangeFactory(const XMLRangeFactory&);
XMLRangeFactory& operator=(const XMLRangeFactory&);
};
XERCES_CPP_NAMESPACE_END
#endif
/**
* End file XMLRangeFactory.hpp
*/

View File

@@ -0,0 +1,101 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: XMLUniCharacter.hpp 671870 2008-06-26 12:19:31Z amassari $
*/
#if !defined(XERCESC_INCLUDE_GUARD_XMLUNICHARACTER_HPP)
#define XERCESC_INCLUDE_GUARD_XMLUNICHARACTER_HPP
#include <xercesc/util/XercesDefs.hpp>
XERCES_CPP_NAMESPACE_BEGIN
/**
* Class for representing unicode characters
*/
class XMLUTIL_EXPORT XMLUniCharacter
{
public:
// -----------------------------------------------------------------------
// Public Constants
// -----------------------------------------------------------------------
// Unicode char types
enum {
UNASSIGNED = 0,
UPPERCASE_LETTER = 1,
LOWERCASE_LETTER = 2,
TITLECASE_LETTER = 3,
MODIFIER_LETTER = 4,
OTHER_LETTER = 5,
NON_SPACING_MARK = 6,
ENCLOSING_MARK = 7,
COMBINING_SPACING_MARK = 8,
DECIMAL_DIGIT_NUMBER = 9,
LETTER_NUMBER = 10,
OTHER_NUMBER = 11,
SPACE_SEPARATOR = 12,
LINE_SEPARATOR = 13,
PARAGRAPH_SEPARATOR = 14,
CONTROL = 15,
FORMAT = 16,
PRIVATE_USE = 17,
SURROGATE = 18,
DASH_PUNCTUATION = 19,
START_PUNCTUATION = 20,
END_PUNCTUATION = 21,
CONNECTOR_PUNCTUATION = 22,
OTHER_PUNCTUATION = 23,
MATH_SYMBOL = 24,
CURRENCY_SYMBOL = 25,
MODIFIER_SYMBOL = 26,
OTHER_SYMBOL = 27,
INITIAL_PUNCTUATION = 28,
FINAL_PUNCTUATION = 29
};
/** destructor */
~XMLUniCharacter() {}
/* Static methods for getting unicode character type */
/** @name Getter functions */
//@{
/** Gets the unicode type of a given character
*
* @param ch The character we want to get its unicode type
*/
static unsigned short getType(const XMLCh ch);
//@}
private :
/** @name Constructors and Destructor */
//@{
/** Unimplemented default constructor */
XMLUniCharacter();
//@}
};
XERCES_CPP_NAMESPACE_END
#endif
/**
* End of file XMLUniCharacter.hpp
*/