123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849 |
- #pragma once
- #include <functional>
- #include "Array.h"
- #include "Critical.h"
- #include "Text.h"
- namespace Framework
- {
- namespace Regex
- {
- class Flags
- {
- public:
- static const int START = 1;
- static const int END = 2;
- };
- template<typename Data> class State;
- template<typename Data> class Transition
- {
- private:
- std::function<bool(Data, int)> conditionFunction;
- State<Data>* zTarget;
- int requiredFlags;
- public:
- Transition()
- : conditionFunction(0),
- zTarget(0),
- requiredFlags(0)
- {}
- Transition(std::function<bool(Data, int)> conditionFunction,
- State<Data>* zTarget,
- int requiredFlags)
- : conditionFunction(conditionFunction),
- zTarget(zTarget),
- requiredFlags(requiredFlags)
- {}
- Transition(const Transition& other)
- : conditionFunction(other.conditionFunction),
- zTarget(other.zTarget),
- requiredFlags(other.requiredFlags)
- {}
- bool test(Data value, int flags) const
- {
- return conditionFunction && conditionFunction(value, flags)
- && (requiredFlags | flags) == flags;
- }
- bool test(int flags) const
- {
- return !conditionFunction && (requiredFlags | flags) == flags;
- }
- State<Data>* zTargetState() const
- {
- return zTarget;
- }
- std::function<bool(Data, int)> getConditionFunction() const
- {
- return conditionFunction;
- }
- int getRequiredFlags() const
- {
- return requiredFlags;
- }
- };
- template<typename Data> class State : public ReferenceCounter
- {
- private:
- Array<Transition<Data>> transitions;
- bool final;
- int groupStart;
- int groupEnd;
- int id;
- public:
- State(int id)
- : ReferenceCounter(),
- final(false),
- groupStart(0),
- groupEnd(0),
- id(id)
- {}
- void addTransition(Transition<Data> transition)
- {
- transitions.add(transition);
- }
- void setFinal(bool isFinal)
- {
- final = isFinal;
- }
- void setGroupStart(int groupStart)
- {
- this->groupStart = groupStart;
- }
- void setGroupEnd(int groupEnd)
- {
- this->groupEnd = groupEnd;
- }
- bool isFinal() const
- {
- return final;
- }
- int getGroupStart() const
- {
- return groupStart;
- }
- int getGroupEnd() const
- {
- return groupEnd;
- }
- int getId() const
- {
- return id;
- }
- const Array<Transition<Data>>& getTransitions() const
- {
- return transitions;
- }
- };
- class Group
- {
- private:
- int start;
- int end;
- int id;
- public:
- DLLEXPORT Group(int start, int end, int id);
- DLLEXPORT Group();
- DLLEXPORT int getStart();
- DLLEXPORT int getEnd();
- DLLEXPORT int getId();
- };
- class Result : public ReferenceCounter
- {
- private:
- Array<Group> groups;
- public:
- DLLEXPORT Result(Array<Group> groups);
- DLLEXPORT int getGroupCount();
- DLLEXPORT Group getGroup(int index);
- DLLEXPORT int getStart();
- DLLEXPORT int getEnd();
- };
- template<typename Data> class ExecutionStackFrame
- {
- private:
- ExecutionStackFrame* before;
- State<Data>* state;
- int transitionIndex;
- int strIndex;
- public:
- ExecutionStackFrame(ExecutionStackFrame<Data>* before,
- State<Data>* zState,
- int strIndex)
- : before(before),
- state(zState),
- transitionIndex(0),
- strIndex(strIndex)
- {}
- ~ExecutionStackFrame()
- {
- if (before)
- {
- delete before;
- }
- }
- ExecutionStackFrame<Data>* getBefore() const
- {
- return before;
- }
- State<Data>* zState() const
- {
- return state;
- }
- void setStrIndex(int index)
- {
- strIndex = index;
- }
- int getStrIndex() const
- {
- return strIndex;
- }
- int getTransitionIndex() const
- {
- return transitionIndex;
- }
- void setTransitionIndex(int transitionIndex)
- {
- this->transitionIndex = transitionIndex;
- }
- void discard()
- {
- before = 0;
- }
- Array<Group> getGroups() const
- {
- Array<Group> groups;
- const ExecutionStackFrame* current = this;
- while (true)
- {
- if (current->getBefore())
- {
- current = current->getBefore();
- }
- else
- {
- break;
- }
- }
- groups.add(Group(current->getStrIndex(), strIndex, 0));
- current = this;
- Array<int> groupEnds;
- Array<int> knownGroups;
- while (current)
- {
- if (current->zState()->getGroupEnd())
- {
- groupEnds.add(current->getStrIndex());
- }
- if (current->zState()->getGroupStart()
- && groupEnds.getEintragAnzahl() > 0)
- {
- if (knownGroups.getWertIndex(
- current->zState()->getGroupStart())
- < 0)
- {
- while (groups.getEintragAnzahl()
- <= current->zState()->getGroupStart())
- {
- groups.add(
- Group(-1, -1, groups.getEintragAnzahl()));
- }
- groups.set(Group(current->getStrIndex(),
- groupEnds.get(0),
- current->zState()->getGroupStart()),
- current->zState()->getGroupStart());
- knownGroups.add(current->zState()->getGroupStart());
- }
- groupEnds.remove(0);
- }
- current = current->getBefore();
- }
- return groups;
- }
- bool isEndlessLoop() const
- {
- const ExecutionStackFrame* current = this;
- while (current)
- {
- if (current->getBefore()
- && current->getBefore()->zState() == state
- && current->getBefore()->getStrIndex() == strIndex)
- {
- return true;
- }
- current = current->getBefore();
- }
- return false;
- }
- };
- template<typename Data> class Automata : public ReferenceCounter
- {
- private:
- RCArray<State<Data>> states;
- public:
- Automata()
- : ReferenceCounter()
- {}
- State<Data>* addState()
- {
- State<Data>* state = new State<Data>(states.getEintragAnzahl());
- states.add(state);
- return state;
- }
- const RCArray<State<Data>>& getStates() const
- {
- return states;
- }
- RCArray<Result>* match(const Data* str, int length)
- {
- RCArray<Result>* results = new RCArray<Result>();
- ExecutionStackFrame<Data>* currentFrame
- = new ExecutionStackFrame<Data>(0, states.z(0), 0);
- int flags = Flags::START;
- int startIndex = 0;
- while (currentFrame)
- {
- if (currentFrame->getStrIndex() >= length)
- {
- flags |= Flags::END;
- }
- else
- {
- flags &= ~Flags::END;
- }
- if (currentFrame->getStrIndex() == 0)
- {
- flags |= Flags::START;
- }
- else
- {
- flags &= ~Flags::START;
- }
- if (currentFrame->zState()->isFinal())
- {
- results->add(new Result(currentFrame->getGroups()));
- int index = currentFrame->getStrIndex();
- if (startIndex < index)
- {
- delete currentFrame;
- currentFrame = new ExecutionStackFrame<Data>(
- 0, states.z(0), index);
- startIndex = currentFrame->getStrIndex();
- if (currentFrame->getStrIndex() > length)
- {
- delete currentFrame;
- currentFrame = 0;
- continue;
- }
- }
- }
- if (currentFrame->isEndlessLoop())
- {
- delete currentFrame;
- results->release();
- return 0; // endless loop
- }
- bool found = 0;
- for (int i = currentFrame->getTransitionIndex();
- i < currentFrame->zState()
- ->getTransitions()
- .getEintragAnzahl();
- i++)
- {
- Transition<Data> t
- = currentFrame->zState()->getTransitions().get(i);
- if (t.test(flags))
- {
- currentFrame->setTransitionIndex(i + 1);
- currentFrame
- = new ExecutionStackFrame<Data>(currentFrame,
- t.zTargetState(),
- currentFrame->getStrIndex());
- found = 1;
- break;
- }
- else if (currentFrame->getStrIndex() < length
- && t.test(
- str[currentFrame->getStrIndex()], flags))
- {
- currentFrame->setTransitionIndex(i + 1);
- currentFrame
- = new ExecutionStackFrame<Data>(currentFrame,
- t.zTargetState(),
- currentFrame->getStrIndex() + 1);
- found = 1;
- break;
- }
- }
- if (!found)
- {
- ExecutionStackFrame<Data>* before
- = currentFrame->getBefore();
- if (before)
- {
- currentFrame->discard();
- delete currentFrame;
- currentFrame = before;
- }
- else
- {
- currentFrame->setStrIndex(
- currentFrame->getStrIndex() + 1);
- startIndex = currentFrame->getStrIndex();
- currentFrame->setTransitionIndex(0);
- if (currentFrame->getStrIndex() > length)
- {
- delete currentFrame;
- currentFrame = 0;
- }
- }
- }
- }
- return results;
- }
- };
- template<typename Data>
- Automata<Data>* oneOf(Automata<Data>* left, Automata<Data>* right)
- {
- Automata<Data>* result = new Automata<Data>();
- State<Data>* start = result->addState();
- int leftStateCount = left->getStates().getEintragAnzahl();
- for (State<Data>* leftState : left->getStates())
- {
- if (leftState)
- {
- result->addState();
- }
- }
- for (State<Data>* leftState : left->getStates())
- {
- State<Data>* newState
- = result->getStates().z(leftState->getId() + 1);
- newState->setFinal(leftState->isFinal());
- newState->setGroupStart(leftState->getGroupStart());
- newState->setGroupEnd(leftState->getGroupEnd());
- for (Transition<Data> transition : leftState->getTransitions())
- {
- newState->addTransition(
- Transition<Data>(transition.getConditionFunction(),
- result->getStates().z(
- transition.zTargetState()->getId() + 1),
- transition.getRequiredFlags()));
- }
- }
- start->addTransition(
- Transition<Data>(0, result->getStates().z(1), 0));
- for (State<Data>* rightState : right->getStates())
- {
- if (rightState)
- {
- result->addState();
- }
- }
- for (State<Data>* rightState : right->getStates())
- {
- State<Data>* newState = result->getStates().z(
- rightState->getId() + 1 + leftStateCount);
- newState->setFinal(rightState->isFinal());
- newState->setGroupStart(rightState->getGroupStart());
- newState->setGroupEnd(rightState->getGroupEnd());
- for (Transition<Data> transition : rightState->getTransitions())
- {
- newState->addTransition(Transition<Data>(
- transition.getConditionFunction(),
- result->getStates().z(transition.zTargetState()->getId()
- + 1 + leftStateCount),
- transition.getRequiredFlags()));
- }
- }
- start->addTransition(Transition<Data>(
- 0, result->getStates().z(1 + leftStateCount), 0));
- left->release();
- right->release();
- return result;
- }
- template<typename Data>
- Automata<Data>* concat(Automata<Data>* left, Automata<Data>* right)
- {
- Automata<Data>* result = new Automata<Data>();
- int leftStateCount = left->getStates().getEintragAnzahl();
- for (State<Data>* leftState : left->getStates())
- {
- if (leftState)
- {
- result->addState();
- }
- }
- bool canSkipFirst = !right->getStates().z(0)->getGroupStart()
- && !right->getStates().z(0)->getGroupEnd();
- int index = 0;
- for (State<Data>* rightState : right->getStates())
- {
- if (rightState && (index != 0 || !canSkipFirst))
- {
- result->addState();
- }
- index++;
- }
- for (State<Data>* leftState : left->getStates())
- {
- State<Data>* newState
- = result->getStates().z(leftState->getId());
- newState->setGroupStart(leftState->getGroupStart());
- newState->setGroupEnd(leftState->getGroupEnd());
- for (Transition<Data> transition : leftState->getTransitions())
- {
- newState->addTransition(
- Transition<Data>(transition.getConditionFunction(),
- result->getStates().z(
- transition.zTargetState()->getId()),
- transition.getRequiredFlags()));
- }
- if (leftState->isFinal())
- {
- if (canSkipFirst)
- {
- State<Data>* oldRightStart = right->getStates().z(0);
- if (oldRightStart->isFinal())
- {
- newState->setFinal(true);
- }
- for (Transition<Data> transition :
- oldRightStart->getTransitions())
- {
- newState->addTransition(Transition<Data>(
- transition.getConditionFunction(),
- result->getStates().z(
- leftStateCount - 1
- + transition.zTargetState()->getId()),
- transition.getRequiredFlags()));
- }
- }
- else
- {
- newState->addTransition(Transition<Data>(
- 0, result->getStates().z(leftStateCount), 0));
- }
- }
- }
- index = 0;
- for (State<Data>* rightState : right->getStates())
- {
- if (index == 0 && canSkipFirst)
- {
- index++;
- continue;
- }
- State<Data>* newState
- = result->getStates().z(rightState->getId() + leftStateCount
- - (canSkipFirst ? 1 : 0));
- newState->setFinal(rightState->isFinal());
- newState->setGroupStart(rightState->getGroupStart());
- newState->setGroupEnd(rightState->getGroupEnd());
- for (Transition<Data> transition : rightState->getTransitions())
- {
- newState->addTransition(
- Transition<Data>(transition.getConditionFunction(),
- result->getStates().z(
- transition.zTargetState()->getId()
- + leftStateCount - (canSkipFirst ? 1 : 0)),
- transition.getRequiredFlags()));
- }
- index++;
- }
- left->release();
- right->release();
- return result;
- }
- template<typename Data>
- Automata<Data>* many(Automata<Data>* before, bool lazy)
- {
- Automata<Data>* result = new Automata<Data>();
- for (State<Data>* beforeState : before->getStates())
- {
- if (beforeState)
- {
- result->addState();
- }
- }
- State<Data>* newFinal = result->addState();
- if (lazy)
- {
- result->getStates().z(0)->addTransition(
- Transition<Data>(0, newFinal, 0));
- }
- newFinal->setFinal(true);
- for (State<Data>* beforeState : before->getStates())
- {
- State<Data>* newState
- = result->getStates().z(beforeState->getId());
- newState->setGroupStart(beforeState->getGroupStart());
- newState->setGroupEnd(beforeState->getGroupEnd());
- for (Transition<Data> transition :
- beforeState->getTransitions())
- {
- newState->addTransition(
- Transition<Data>(transition.getConditionFunction(),
- result->getStates().z(
- transition.zTargetState()->getId()),
- transition.getRequiredFlags()));
- }
- if (beforeState->isFinal())
- {
- newState->addTransition(
- Transition<Data>(0, result->getStates().z(0), 0));
- }
- }
- if (!lazy)
- {
- result->getStates().z(0)->addTransition(
- Transition<Data>(0, newFinal, 0));
- }
- before->release();
- return result;
- }
- template<typename Data>
- Automata<Data>* atLeastOnce(Automata<Data>* before, bool lazy)
- {
- Automata<Data>* result = new Automata<Data>();
- for (State<Data>* beforeState : before->getStates())
- {
- if (beforeState)
- {
- result->addState();
- }
- }
- State<Data>* newFinal = result->addState();
- newFinal->setFinal(true);
- for (State<Data>* beforeState : before->getStates())
- {
- State<Data>* newState
- = result->getStates().z(beforeState->getId());
- newState->setGroupStart(beforeState->getGroupStart());
- newState->setGroupEnd(beforeState->getGroupEnd());
- if (lazy && beforeState->isFinal())
- {
- newState->addTransition(Transition<Data>(0, newFinal, 0));
- }
- for (Transition<Data> transition :
- beforeState->getTransitions())
- {
- newState->addTransition(
- Transition<Data>(transition.getConditionFunction(),
- result->getStates().z(
- transition.zTargetState()->getId()),
- transition.getRequiredFlags()));
- }
- if (beforeState->isFinal())
- {
- newState->addTransition(
- Transition<Data>(0, result->getStates().z(0), 0));
- }
- if (!lazy && beforeState->isFinal())
- {
- newState->addTransition(Transition<Data>(0, newFinal, 0));
- }
- }
- before->release();
- return result;
- }
- template<typename Data> Automata<Data>* maybe(Automata<Data>* before)
- {
- Automata<Data>* result = new Automata<Data>();
- for (State<Data>* beforeState : before->getStates())
- {
- if (beforeState)
- {
- result->addState();
- }
- }
- State<Data>* newFinal = result->addState();
- newFinal->setFinal(true);
- for (State<Data>* beforeState : before->getStates())
- {
- State<Data>* newState
- = result->getStates().z(beforeState->getId());
- newState->setGroupStart(beforeState->getGroupStart());
- newState->setGroupEnd(beforeState->getGroupEnd());
- for (Transition<Data> transition :
- beforeState->getTransitions())
- {
- newState->addTransition(
- Transition<Data>(transition.getConditionFunction(),
- result->getStates().z(
- transition.zTargetState()->getId()),
- transition.getRequiredFlags()));
- }
- if (beforeState->isFinal())
- {
- newState->addTransition(Transition<Data>(0, newFinal, 0));
- }
- }
- result->getStates().z(0)->addTransition(
- Transition<Data>(0, newFinal, 0));
- before->release();
- return result;
- }
- template<typename Data> Automata<Data>* repeat(
- Automata<Data>* before, int minAmount, int maxAmount)
- {
- Automata<Data>* result = new Automata<Data>();
- for (int i = 0; i < maxAmount; i++)
- {
- for (State<Data>* beforeState : before->getStates())
- {
- if (beforeState)
- {
- result->addState();
- }
- }
- for (State<Data>* beforeState : before->getStates())
- {
- State<Data>* newState = result->getStates().z(
- beforeState->getId()
- + before->getStates().getEintragAnzahl() * i);
- newState->setGroupStart(beforeState->getGroupStart());
- newState->setGroupEnd(beforeState->getGroupEnd());
- for (Transition<Data> transition :
- beforeState->getTransitions())
- {
- newState->addTransition(Transition<Data>(
- transition.getConditionFunction(),
- result->getStates().z(
- transition.zTargetState()->getId()
- + before->getStates().getEintragAnzahl() * i),
- transition.getRequiredFlags()));
- }
- }
- }
- State<Data>* newFinal = result->addState();
- newFinal->setFinal(true);
- if (minAmount == 0)
- {
- result->getStates().z(0)->addTransition(
- Transition<Data>(0, newFinal, 0));
- }
- for (int i = 0; i < maxAmount; i++)
- {
- for (State<Data>* beforeState : before->getStates())
- {
- State<Data>* newState = result->getStates().z(
- beforeState->getId()
- + before->getStates().getEintragAnzahl() * i);
- if (beforeState->isFinal())
- {
- if (i < maxAmount - 1)
- {
- newState->addTransition(Transition<Data>(0,
- result->getStates().z(
- before->getStates().getEintragAnzahl()
- * (i + 1)),
- 0));
- }
- if (i >= minAmount - 1)
- {
- newState->addTransition(
- Transition<Data>(0, newFinal, 0));
- }
- }
- }
- }
- before->release();
- return result;
- }
- template<typename Data>
- Automata<Data>* group(Automata<Data>* before, int groupId)
- {
- Automata<Data>* result = new Automata<Data>();
- State<Data>* newStart = result->addState();
- newStart->setGroupStart(groupId);
- State<Data>* groupStart = result->addState();
- newStart->addTransition(Transition<Data>(0, groupStart, 0));
- for (State<Data>* beforeState : before->getStates())
- {
- if (beforeState)
- {
- result->addState();
- }
- }
- groupStart->addTransition(
- Transition<Data>(0, result->getStates().z(2), 0));
- State<Data>* groupEnd = result->addState();
- groupEnd->setGroupEnd(groupId);
- State<Data>* newFinal = result->addState();
- groupEnd->addTransition(Transition<Data>(0, newFinal, 0));
- newFinal->setFinal(true);
- for (State<Data>* beforeState : before->getStates())
- {
- State<Data>* newState
- = result->getStates().z(beforeState->getId() + 2);
- newState->setGroupStart(beforeState->getGroupStart());
- newState->setGroupEnd(beforeState->getGroupEnd());
- for (Transition<Data> transition :
- beforeState->getTransitions())
- {
- newState->addTransition(
- Transition<Data>(transition.getConditionFunction(),
- result->getStates().z(
- transition.zTargetState()->getId() + 2),
- transition.getRequiredFlags()));
- }
- if (beforeState->isFinal())
- {
- newState->addTransition(Transition<Data>(0, groupEnd, 0));
- }
- }
- before->release();
- return result;
- }
- class RegexConfig
- {
- private:
- Text whitespaceChars;
- Text wordChars;
- public:
- DLLEXPORT RegexConfig();
- DLLEXPORT void setWhitespaceChars(Text whitespaceChars);
- DLLEXPORT void setWordChars(Text wordChars);
- DLLEXPORT Text getWhitespaceChars() const;
- DLLEXPORT Text getWordChars() const;
- };
- DLLEXPORT Text quote(const Text& text);
- DLLEXPORT Automata<char>* parse(Text regex);
- DLLEXPORT Automata<char>* parse(Text regex, RegexConfig& config);
- } // namespace Regex
- } // namespace Framework
|