/* * Copyright 2014 Emmanuel Engelhart * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ #ifndef KIWIX_INDEXER_H #define KIWIX_INDEXER_H #include #include #include #include #include #include #include #include #include "common/stringTools.h" #include "common/otherTools.h" #include #include #include #include "reader.h" using namespace std; namespace kiwix { struct indexerToken { string url; string accentedTitle; string title; string keywords; string content; string snippet; string size; string wordCount; }; class Indexer { typedef void (* ProgressCallback)(const unsigned int processedArticleCount, const unsigned int totalArticleCount); public: Indexer(); virtual ~Indexer(); bool start(const string zimPath, const string indexPath, ProgressCallback callback = NULL); bool stop(); bool isRunning(); unsigned int getProgression(); void setVerboseFlag(const bool value); protected: virtual void indexingPrelude(const string indexPath) = 0; virtual void index(const string &url, const string &title, const string &unaccentedTitle, const string &keywords, const string &content, const string &snippet, const string &size, const string &wordCount) = 0; virtual void flush() = 0; virtual void indexingPostlude(const string indexPath) = 0; /* Others */ unsigned int countWords(const string &text); /* Boost factor */ unsigned int keywordsBoostFactor; inline unsigned int getTitleBoostFactor(const unsigned int contentLength) { return contentLength / 500 + 1; } /* Verbose */ pthread_mutex_t verboseMutex; bool getVerboseFlag(); bool verboseFlag; private: ProgressCallback progressCallback; pthread_mutex_t threadIdsMutex; /* Article extraction */ pthread_t articleExtractor; pthread_mutex_t articleExtractorRunningMutex; static void *extractArticles(void *ptr); bool articleExtractorRunningFlag; bool isArticleExtractorRunning(); void articleExtractorRunning(bool value); /* Article parsing */ pthread_t articleParser; pthread_mutex_t articleParserRunningMutex; static void *parseArticles(void *ptr); bool articleParserRunningFlag; bool isArticleParserRunning(); void articleParserRunning(bool value); /* Index writting */ pthread_t articleIndexer; pthread_mutex_t articleIndexerRunningMutex; static void *indexArticles(void *ptr); bool articleIndexerRunningFlag; bool isArticleIndexerRunning(); void articleIndexerRunning(bool value); /* To parse queue */ std::queue toParseQueue; pthread_mutex_t toParseQueueMutex; void pushToParseQueue(indexerToken &token); bool popFromToParseQueue(indexerToken &token); bool isToParseQueueEmpty(); /* To index queue */ std::queue toIndexQueue; pthread_mutex_t toIndexQueueMutex; void pushToIndexQueue(indexerToken &token); bool popFromToIndexQueue(indexerToken &token); bool isToIndexQueueEmpty(); /* Article Count & Progression */ unsigned int articleCount; pthread_mutex_t articleCountMutex; void setArticleCount(const unsigned int articleCount); unsigned int getArticleCount(); /* Progression */ unsigned int progression; pthread_mutex_t progressionMutex; void setProgression(const unsigned int progression); /* getProgression() is public */ /* ZIM path */ pthread_mutex_t zimPathMutex; string zimPath; void setZimPath(const string path); string getZimPath(); /* Index path */ pthread_mutex_t indexPathMutex; string indexPath; void setIndexPath(const string path); string getIndexPath(); /* ZIM id */ pthread_mutex_t zimIdMutex; string zimId; void setZimId(const string id); string getZimId(); }; } #endif