mirror of
https://github.com/kiwix/libkiwix.git
synced 2025-06-26 10:11:30 +00:00
Switch build system to mesonbuild.
There is no more integrated build of dependencies in the build system. Dependencies are discovered using pkg-config except for ctpp2 where there is no pkg-config file.
This commit is contained in:
4
include/common/base64.h
Normal file
4
include/common/base64.h
Normal file
@ -0,0 +1,4 @@
|
||||
#include <string>
|
||||
|
||||
std::string base64_encode(unsigned char const* , unsigned int len);
|
||||
std::string base64_decode(std::string const& s);
|
48
include/common/networkTools.h
Normal file
48
include/common/networkTools.h
Normal file
@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright 2012 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_NETWORKTOOLS_H
|
||||
#define KIWIX_NETWORKTOOLS_H
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <winsock2.h>
|
||||
#include <ws2tcpip.h>
|
||||
#else
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/types.h>
|
||||
#include <net/if.h>
|
||||
#include <netdb.h>
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
namespace kiwix {
|
||||
std::map<std::string, std::string> getNetworkInterfaces();
|
||||
std::string getBestPublicIp();
|
||||
}
|
||||
|
||||
#endif
|
33
include/common/otherTools.h
Normal file
33
include/common/otherTools.h
Normal file
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Copyright 2014 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_OTHERTOOLS_H
|
||||
#define KIWIX_OTHERTOOLS_H
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <Windows.h>
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
namespace kiwix {
|
||||
void sleep(unsigned int milliseconds);
|
||||
}
|
||||
|
||||
#endif
|
60
include/common/pathTools.h
Normal file
60
include/common/pathTools.h
Normal file
@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_PATHTOOLS_H
|
||||
#define KIWIX_PATHTOOLS_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/types.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/stat.h>
|
||||
#include <ios>
|
||||
#include <limits.h>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <direct.h>
|
||||
#endif
|
||||
|
||||
#include "stringTools.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
bool isRelativePath(const string &path);
|
||||
string computeAbsolutePath(const string path, const string relativePath);
|
||||
string computeRelativePath(const string path, const string absolutePath);
|
||||
string removeLastPathElement(const string path, const bool removePreSeparator = false,
|
||||
const bool removePostSeparator = false);
|
||||
string appendToDirectory(const string &directoryPath, const string &filename);
|
||||
|
||||
unsigned int getFileSize(const string &path);
|
||||
string getFileSizeAsString(const string &path);
|
||||
bool fileExists(const string &path);
|
||||
bool makeDirectory(const string &path);
|
||||
bool copyFile(const string &sourcePath, const string &destPath);
|
||||
string getLastPathElement(const string &path);
|
||||
string getExecutablePath();
|
||||
string getCurrentDirectory();
|
||||
bool writeTextFile(const string &path, const string &content);
|
||||
#endif
|
32
include/common/regexTools.h
Normal file
32
include/common/regexTools.h
Normal file
@ -0,0 +1,32 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_REGEXTOOLS_H
|
||||
#define KIWIX_REGEXTOOLS_H
|
||||
|
||||
#include <unicode/regex.h>
|
||||
#include <unicode/ucnv.h>
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
bool matchRegex(const std::string &content, const std::string ®ex);
|
||||
std::string replaceRegex(const std::string &content, const std::string &replacement, const std::string ®ex);
|
||||
std::string appendToFirstOccurence(const std::string &content, const std::string regex, const std::string &replacement);
|
||||
|
||||
#endif
|
39219
include/common/resourceTools.h
Normal file
39219
include/common/resourceTools.h
Normal file
File diff suppressed because it is too large
Load Diff
71
include/common/stringTools.h
Normal file
71
include/common/stringTools.h
Normal file
@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright 2011-2012 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_STRINGTOOLS_H
|
||||
#define KIWIX_STRINGTOOLS_H
|
||||
|
||||
#include <unicode/translit.h>
|
||||
#include <unicode/normlzr.h>
|
||||
#include <unicode/unistr.h>
|
||||
#include <unicode/rep.h>
|
||||
#include <unicode/uniset.h>
|
||||
#include <unicode/ustring.h>
|
||||
#include <unicode/ucnv.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
|
||||
#include "pathTools.h"
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
#ifndef __ANDROID__
|
||||
|
||||
std::string beautifyInteger(const unsigned int number);
|
||||
std::string beautifyFileSize(const unsigned int number);
|
||||
std::string urlEncode(const std::string &c);
|
||||
void printStringInHexadecimal(const char *s);
|
||||
void printStringInHexadecimal(UnicodeString s);
|
||||
void stringReplacement(std::string& str, const std::string& oldStr, const std::string& newStr);
|
||||
std::string encodeDiples(const std::string& str);
|
||||
|
||||
#endif
|
||||
|
||||
std::string removeAccents(const std::string &text);
|
||||
void loadICUExternalTables();
|
||||
std::string urlDecode(const std::string &c);
|
||||
|
||||
std::vector<std::string> split(const std::string&, const std::string&);
|
||||
std::vector<std::string> split(const char*, const char*);
|
||||
std::vector<std::string> split(const std::string&, const char*);
|
||||
std::vector<std::string> split(const char*, const std::string&);
|
||||
|
||||
std::string ucAll(const std::string &word);
|
||||
std::string lcAll(const std::string &word);
|
||||
std::string ucFirst(const std::string &word);
|
||||
std::string lcFirst(const std::string &word);
|
||||
std::string toTitle(const std::string &word);
|
||||
|
||||
std::string normalize(const std::string &word);
|
||||
}
|
||||
|
||||
#endif
|
2786
include/common/tree.h
Normal file
2786
include/common/tree.h
Normal file
File diff suppressed because it is too large
Load Diff
80
include/ctpp2/CTPP2VMStringLoader.hpp
Normal file
80
include/ctpp2/CTPP2VMStringLoader.hpp
Normal file
@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright 2013 Renaud Gaudin <reg@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef _CTPP2_VM_STRING_LOADER_HPP__
|
||||
#define _CTPP2_VM_STRING_LOADER_HPP__ 1
|
||||
|
||||
#include <ctpp2/CTPP2VMLoader.hpp>
|
||||
#include <ctpp2/CTPP2Util.hpp>
|
||||
#include <ctpp2/CTPP2Exception.hpp>
|
||||
#include <ctpp2/CTPP2VMExecutable.hpp>
|
||||
#include <ctpp2/CTPP2VMInstruction.hpp>
|
||||
#include <ctpp2/CTPP2VMMemoryCore.hpp>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
// #include <common/resourceTools.h>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
/**
|
||||
@file VMStringLoader.hpp
|
||||
@brief Load program core from file
|
||||
*/
|
||||
|
||||
namespace CTPP // C++ Template Engine
|
||||
{
|
||||
// FWD
|
||||
struct VMExecutable;
|
||||
|
||||
/**
|
||||
@class VMStringLoader CTPP2VMStringLoader.hpp <CTPP2VMStringLoader.hpp>
|
||||
@brief Load program core from file
|
||||
*/
|
||||
class CTPP2DECL VMStringLoader:
|
||||
public VMLoader
|
||||
{
|
||||
public:
|
||||
/**
|
||||
*/
|
||||
VMStringLoader(CCHAR_P rawContent, size_t rawContentSize);
|
||||
/**
|
||||
@brief Get ready-to-run program
|
||||
*/
|
||||
const VMMemoryCore * GetCore() const;
|
||||
|
||||
/**
|
||||
@brief A destructor
|
||||
*/
|
||||
~VMStringLoader() throw();
|
||||
private:
|
||||
/** Program core */
|
||||
VMExecutable * oCore;
|
||||
/** Ready-to-run program */
|
||||
VMMemoryCore * pVMMemoryCore;
|
||||
};
|
||||
|
||||
} // namespace CTPP
|
||||
#endif // _CTPP2_VM_STRING_LOADER_HPP__
|
||||
// End.
|
175
include/indexer.h
Executable file
175
include/indexer.h
Executable file
@ -0,0 +1,175 @@
|
||||
/*
|
||||
* Copyright 2014 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_INDEXER_H
|
||||
#define KIWIX_INDEXER_H
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <stack>
|
||||
#include <queue>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
|
||||
#include <pthread.h>
|
||||
#include "common/stringTools.h"
|
||||
#include "common/otherTools.h"
|
||||
#include "common/resourceTools.h"
|
||||
#include <zim/file.h>
|
||||
#include <zim/article.h>
|
||||
#include <zim/fileiterator.h>
|
||||
#include "reader.h"
|
||||
#include "xapian/myhtmlparse.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
struct indexerToken {
|
||||
string url;
|
||||
string accentedTitle;
|
||||
string title;
|
||||
string keywords;
|
||||
string content;
|
||||
string snippet;
|
||||
string size;
|
||||
string wordCount;
|
||||
};
|
||||
|
||||
class Indexer {
|
||||
|
||||
typedef void (* ProgressCallback)(const unsigned int processedArticleCount, const unsigned int totalArticleCount);
|
||||
|
||||
public:
|
||||
Indexer();
|
||||
virtual ~Indexer();
|
||||
|
||||
bool start(const string zimPath, const string indexPath, ProgressCallback callback = NULL);
|
||||
bool stop();
|
||||
bool isRunning();
|
||||
unsigned int getProgression();
|
||||
void setVerboseFlag(const bool value);
|
||||
|
||||
protected:
|
||||
virtual void indexingPrelude(const string indexPath) = 0;
|
||||
virtual void index(const string &url,
|
||||
const string &title,
|
||||
const string &unaccentedTitle,
|
||||
const string &keywords,
|
||||
const string &content,
|
||||
const string &snippet,
|
||||
const string &size,
|
||||
const string &wordCount) = 0;
|
||||
virtual void flush() = 0;
|
||||
virtual void indexingPostlude(const string indexPath) = 0;
|
||||
|
||||
/* Stop words */
|
||||
std::vector<std::string> stopWords;
|
||||
void readStopWords(const string languageCode);
|
||||
|
||||
/* Others */
|
||||
unsigned int countWords(const string &text);
|
||||
|
||||
/* Boost factor */
|
||||
unsigned int keywordsBoostFactor;
|
||||
inline unsigned int getTitleBoostFactor(const unsigned int contentLength) {
|
||||
return contentLength / 500 + 1;
|
||||
}
|
||||
|
||||
/* Verbose */
|
||||
pthread_mutex_t verboseMutex;
|
||||
bool getVerboseFlag();
|
||||
bool verboseFlag;
|
||||
|
||||
private:
|
||||
ProgressCallback progressCallback;
|
||||
pthread_mutex_t threadIdsMutex;
|
||||
|
||||
/* Article extraction */
|
||||
pthread_t articleExtractor;
|
||||
pthread_mutex_t articleExtractorRunningMutex;
|
||||
static void *extractArticles(void *ptr);
|
||||
bool articleExtractorRunningFlag;
|
||||
bool isArticleExtractorRunning();
|
||||
void articleExtractorRunning(bool value);
|
||||
|
||||
/* Article parsing */
|
||||
pthread_t articleParser;
|
||||
pthread_mutex_t articleParserRunningMutex;
|
||||
static void *parseArticles(void *ptr);
|
||||
bool articleParserRunningFlag;
|
||||
bool isArticleParserRunning();
|
||||
void articleParserRunning(bool value);
|
||||
|
||||
/* Index writting */
|
||||
pthread_t articleIndexer;
|
||||
pthread_mutex_t articleIndexerRunningMutex;
|
||||
static void *indexArticles(void *ptr);
|
||||
bool articleIndexerRunningFlag;
|
||||
bool isArticleIndexerRunning();
|
||||
void articleIndexerRunning(bool value);
|
||||
|
||||
/* To parse queue */
|
||||
std::queue<indexerToken> toParseQueue;
|
||||
pthread_mutex_t toParseQueueMutex;
|
||||
void pushToParseQueue(indexerToken &token);
|
||||
bool popFromToParseQueue(indexerToken &token);
|
||||
bool isToParseQueueEmpty();
|
||||
|
||||
/* To index queue */
|
||||
std::queue<indexerToken> toIndexQueue;
|
||||
pthread_mutex_t toIndexQueueMutex;
|
||||
void pushToIndexQueue(indexerToken &token);
|
||||
bool popFromToIndexQueue(indexerToken &token);
|
||||
bool isToIndexQueueEmpty();
|
||||
|
||||
/* Article Count & Progression */
|
||||
unsigned int articleCount;
|
||||
pthread_mutex_t articleCountMutex;
|
||||
void setArticleCount(const unsigned int articleCount);
|
||||
unsigned int getArticleCount();
|
||||
|
||||
/* Progression */
|
||||
unsigned int progression;
|
||||
pthread_mutex_t progressionMutex;
|
||||
void setProgression(const unsigned int progression);
|
||||
/* getProgression() is public */
|
||||
|
||||
/* ZIM path */
|
||||
pthread_mutex_t zimPathMutex;
|
||||
string zimPath;
|
||||
void setZimPath(const string path);
|
||||
string getZimPath();
|
||||
|
||||
/* Index path */
|
||||
pthread_mutex_t indexPathMutex;
|
||||
string indexPath;
|
||||
void setIndexPath(const string path);
|
||||
string getIndexPath();
|
||||
|
||||
/* ZIM id */
|
||||
pthread_mutex_t zimIdMutex;
|
||||
string zimId;
|
||||
void setZimId(const string id);
|
||||
string getZimId();
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
26
include/kiwix.h
Normal file
26
include/kiwix.h
Normal file
@ -0,0 +1,26 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_H
|
||||
#define KIWIX_H
|
||||
|
||||
#include "library.h"
|
||||
|
||||
|
||||
#endif
|
107
include/library.h
Normal file
107
include/library.h
Normal file
@ -0,0 +1,107 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_LIBRARY_H
|
||||
#define KIWIX_LIBRARY_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string>
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
#include <stack>
|
||||
|
||||
#include "common/stringTools.h"
|
||||
#include "common/regexTools.h"
|
||||
|
||||
#define KIWIX_LIBRARY_VERSION "20110515"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
enum supportedIndexType { UNKNOWN, XAPIAN };
|
||||
|
||||
class Book {
|
||||
|
||||
public:
|
||||
Book();
|
||||
~Book();
|
||||
|
||||
static bool sortByLastOpen(const Book &a, const Book &b);
|
||||
static bool sortByTitle(const Book &a, const Book &b);
|
||||
static bool sortBySize(const Book &a, const Book &b);
|
||||
static bool sortByDate(const Book &a, const Book &b);
|
||||
static bool sortByCreator(const Book &a, const Book &b);
|
||||
static bool sortByPublisher(const Book &a, const Book &b);
|
||||
static bool sortByLanguage(const Book &a, const Book &b);
|
||||
string getHumanReadableIdFromPath();
|
||||
|
||||
string id;
|
||||
string path;
|
||||
string pathAbsolute;
|
||||
string last;
|
||||
string indexPath;
|
||||
string indexPathAbsolute;
|
||||
supportedIndexType indexType;
|
||||
string title;
|
||||
string description;
|
||||
string language;
|
||||
string creator;
|
||||
string publisher;
|
||||
string date;
|
||||
string url;
|
||||
string name;
|
||||
string tags;
|
||||
string origId;
|
||||
string articleCount;
|
||||
string mediaCount;
|
||||
bool readOnly;
|
||||
string size;
|
||||
string favicon;
|
||||
string faviconMimeType;
|
||||
};
|
||||
|
||||
class Library {
|
||||
|
||||
public:
|
||||
Library();
|
||||
~Library();
|
||||
|
||||
string version;
|
||||
bool addBook(const Book &book);
|
||||
bool removeBookByIndex(const unsigned int bookIndex);
|
||||
vector <kiwix::Book> books;
|
||||
|
||||
/*
|
||||
* 'current' is the variable storing the current content/book id
|
||||
* in the library. This is used to be able to load per default a
|
||||
* content. As Kiwix may work with many library XML files, you may
|
||||
* have "current" defined many time with different values. The
|
||||
* last XML file read has the priority, Although we do not have an
|
||||
* library object for each file, we want to be able to fallback to
|
||||
* an 'old' current book if the one which should be load
|
||||
* failed. That is the reason why we need a stack here
|
||||
*/
|
||||
stack<string> current;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
92
include/manager.h
Normal file
92
include/manager.h
Normal file
@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_MANAGER_H
|
||||
#define KIWIX_MANAGER_H
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <time.h>
|
||||
|
||||
#include <pugixml.hpp>
|
||||
|
||||
#include "common/base64.h"
|
||||
#include "common/regexTools.h"
|
||||
#include "common/pathTools.h"
|
||||
#include "library.h"
|
||||
#include "reader.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
enum supportedListMode { LASTOPEN, REMOTE, LOCAL };
|
||||
enum supportedListSortBy { TITLE, SIZE, DATE, CREATOR, PUBLISHER };
|
||||
|
||||
class Manager {
|
||||
|
||||
public:
|
||||
Manager();
|
||||
~Manager();
|
||||
|
||||
bool readFile(const string path, const bool readOnly = true);
|
||||
bool readFile(const string nativePath, const string UTF8Path, const bool readOnly = true);
|
||||
bool readXml(const string xml, const bool readOnly = true, const string libraryPath = "");
|
||||
bool writeFile(const string path);
|
||||
bool removeBookByIndex(const unsigned int bookIndex);
|
||||
bool removeBookById(const string id);
|
||||
bool setCurrentBookId(const string id);
|
||||
string getCurrentBookId();
|
||||
bool setBookIndex(const string id, const string path, const supportedIndexType type);
|
||||
bool setBookIndex(const string id, const string path);
|
||||
bool setBookPath(const string id, const string path);
|
||||
string addBookFromPathAndGetId(const string pathToOpen, const string pathToSave = "", const string url = "",
|
||||
const bool checkMetaData = false);
|
||||
bool addBookFromPath(const string pathToOpen, const string pathToSave = "", const string url = "",
|
||||
const bool checkMetaData = false);
|
||||
Library cloneLibrary();
|
||||
bool getBookById(const string id, Book &book);
|
||||
bool getCurrentBook(Book &book);
|
||||
unsigned int getBookCount(const bool localBooks, const bool remoteBooks);
|
||||
bool updateBookLastOpenDateById(const string id);
|
||||
void removeBookPaths();
|
||||
bool listBooks(const supportedListMode mode, const supportedListSortBy sortBy, const unsigned int maxSize,
|
||||
const string language, const string creator, const string publisher, const string search);
|
||||
vector<string> getBooksLanguages();
|
||||
vector<string> getBooksCreators();
|
||||
vector<string> getBooksPublishers();
|
||||
vector<string> getBooksIds();
|
||||
|
||||
string writableLibraryPath;
|
||||
|
||||
vector<std::string> bookIdList;
|
||||
|
||||
protected:
|
||||
kiwix::Library library;
|
||||
|
||||
bool readBookFromPath(const string path, Book *book = NULL);
|
||||
bool parseXmlDom(const pugi::xml_document &doc, const bool readOnly, const string libraryPath);
|
||||
|
||||
private:
|
||||
void checkAndCleanBookPaths(Book &book, const string &libraryPath);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
31
include/meson.build
Normal file
31
include/meson.build
Normal file
@ -0,0 +1,31 @@
|
||||
headers = [
|
||||
'indexer.h',
|
||||
'library.h',
|
||||
'manager.h',
|
||||
'reader.h',
|
||||
'searcher.h'
|
||||
]
|
||||
|
||||
if xapian_dep.found()
|
||||
headers += ['xapianIndexer.h', 'xapianSearcher.h']
|
||||
endif
|
||||
|
||||
install_headers(headers, subdir:'kiwix')
|
||||
|
||||
install_headers(
|
||||
'common/base64.h',
|
||||
'common/networkTools.h',
|
||||
'common/otherTools.h',
|
||||
'common/pathTools.h',
|
||||
'common/regexTools.h',
|
||||
'common/resourceTools.h',
|
||||
'common/stringTools.h',
|
||||
'common/tree.h',
|
||||
subdir:'kiwix/common'
|
||||
)
|
||||
|
||||
|
||||
install_headers(
|
||||
'ctpp2/CTPP2VMStringLoader.hpp',
|
||||
subdir:'kiwix/ctpp2'
|
||||
)
|
104
include/reader.h
Normal file
104
include/reader.h
Normal file
@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_READER_H
|
||||
#define KIWIX_READER_H
|
||||
|
||||
#include <zim/zim.h>
|
||||
#include <zim/file.h>
|
||||
#include <zim/article.h>
|
||||
#include <zim/fileiterator.h>
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include <exception>
|
||||
#include <sstream>
|
||||
#include <map>
|
||||
#include "common/pathTools.h"
|
||||
#include "common/stringTools.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
class Reader {
|
||||
|
||||
public:
|
||||
Reader(const string zimFilePath);
|
||||
~Reader();
|
||||
|
||||
void reset();
|
||||
unsigned int getArticleCount();
|
||||
unsigned int getMediaCount();
|
||||
unsigned int getGlobalCount();
|
||||
string getZimFilePath();
|
||||
string getId();
|
||||
string getRandomPageUrl();
|
||||
string getFirstPageUrl();
|
||||
string getMainPageUrl();
|
||||
bool getMetatag(const string &url, string &content);
|
||||
string getTitle();
|
||||
string getDescription();
|
||||
string getLanguage();
|
||||
string getName();
|
||||
string getTags();
|
||||
string getDate();
|
||||
string getCreator();
|
||||
string getPublisher();
|
||||
string getOrigId();
|
||||
bool getFavicon(string &content, string &mimeType);
|
||||
bool getPageUrlFromTitle(const string &title, string &url);
|
||||
bool getMimeTypeByUrl(const string &url, string &mimeType);
|
||||
bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType);
|
||||
bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl);
|
||||
bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType);
|
||||
bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl);
|
||||
bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType);
|
||||
bool searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset = true);
|
||||
bool searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount);
|
||||
bool urlExists(const string &url);
|
||||
bool hasFulltextIndex();
|
||||
std::vector<std::string> getTitleVariants(const std::string &title);
|
||||
bool getNextSuggestion(string &title);
|
||||
bool getNextSuggestion(string &title, string &url);
|
||||
bool canCheckIntegrity();
|
||||
bool isCorrupted();
|
||||
bool parseUrl(const string &url, char *ns, string &title);
|
||||
unsigned int getFileSize();
|
||||
zim::File* getZimFileHandler();
|
||||
bool getArticleObjectByDecodedUrl(const string &url, zim::Article &article);
|
||||
|
||||
protected:
|
||||
zim::File* zimFileHandler;
|
||||
zim::size_type firstArticleOffset;
|
||||
zim::size_type lastArticleOffset;
|
||||
zim::size_type currentArticleOffset;
|
||||
zim::size_type nsACount;
|
||||
zim::size_type nsICount;
|
||||
std::string zimFilePath;
|
||||
|
||||
std::vector< std::vector<std::string> > suggestions;
|
||||
std::vector< std::vector<std::string> >::iterator suggestionsOffset;
|
||||
|
||||
private:
|
||||
std::map<std::string, unsigned int> parseCounterMetadata();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
99
include/searcher.h
Normal file
99
include/searcher.h
Normal file
@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_SEARCHER_H
|
||||
#define KIWIX_SEARCHER_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <locale>
|
||||
#include <cctype>
|
||||
#include <vector>
|
||||
#include "common/resourceTools.h"
|
||||
#include "common/pathTools.h"
|
||||
#include "common/stringTools.h"
|
||||
#include <unicode/putil.h>
|
||||
|
||||
#ifndef __ANDROID__
|
||||
#include <ctpp2/CDT.hpp>
|
||||
#include <ctpp2/CTPP2FileLogger.hpp>
|
||||
#include <ctpp2/CTPP2SimpleVM.hpp>
|
||||
#include "ctpp2/CTPP2VMStringLoader.hpp"
|
||||
|
||||
using namespace CTPP;
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
struct Result
|
||||
{
|
||||
string url;
|
||||
string title;
|
||||
int score;
|
||||
string snippet;
|
||||
int wordCount;
|
||||
int size;
|
||||
};
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
class Searcher {
|
||||
|
||||
public:
|
||||
Searcher();
|
||||
~Searcher();
|
||||
|
||||
void search(std::string &search, unsigned int resultStart,
|
||||
unsigned int resultEnd, const bool verbose=false);
|
||||
bool getNextResult(string &url, string &title, unsigned int &score);
|
||||
unsigned int getEstimatedResultCount();
|
||||
bool setProtocolPrefix(const std::string prefix);
|
||||
bool setSearchProtocolPrefix(const std::string prefix);
|
||||
void reset();
|
||||
void setContentHumanReadableId(const string &contentHumanReadableId);
|
||||
|
||||
#ifndef __ANDROID__
|
||||
string getHtml();
|
||||
#endif
|
||||
|
||||
protected:
|
||||
std::string beautifyInteger(const unsigned int number);
|
||||
virtual void closeIndex() = 0;
|
||||
virtual void searchInIndex(string &search, const unsigned int resultStart,
|
||||
const unsigned int resultEnd, const bool verbose=false) = 0;
|
||||
|
||||
std::vector<Result> results;
|
||||
std::vector<Result>::iterator resultOffset;
|
||||
std::string searchPattern;
|
||||
std::string protocolPrefix;
|
||||
std::string searchProtocolPrefix;
|
||||
std::string template_ct2;
|
||||
unsigned int resultCountPerPage;
|
||||
unsigned int estimatedResultCount;
|
||||
unsigned int resultStart;
|
||||
unsigned int resultEnd;
|
||||
std::string contentHumanReadableId;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
56
include/xapianIndexer.h
Normal file
56
include/xapianIndexer.h
Normal file
@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_XAPIAN_INDEXER_H
|
||||
#define KIWIX_XAPIAN_INDEXER_H
|
||||
|
||||
#include <xapian.h>
|
||||
#include "indexer.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
class XapianIndexer : public Indexer {
|
||||
|
||||
public:
|
||||
XapianIndexer();
|
||||
|
||||
protected:
|
||||
void indexingPrelude(const string indexPath);
|
||||
void index(const string &url,
|
||||
const string &title,
|
||||
const string &unaccentedTitle,
|
||||
const string &keywords,
|
||||
const string &content,
|
||||
const string &snippet,
|
||||
const string &size,
|
||||
const string &wordCount);
|
||||
void flush();
|
||||
void indexingPostlude(const string indexPath);
|
||||
|
||||
Xapian::WritableDatabase writableDatabase;
|
||||
Xapian::Stem stemmer;
|
||||
Xapian::SimpleStopper stopper;
|
||||
Xapian::TermGenerator indexer;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
53
include/xapianSearcher.h
Normal file
53
include/xapianSearcher.h
Normal file
@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Copyright 2011 Emmanuel Engelhart <kelson@kiwix.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 3 of the License, or
|
||||
* any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef KIWIX_XAPIAN_SEARCHER_H
|
||||
#define KIWIX_XAPIAN_SEARCHER_H
|
||||
|
||||
#include <xapian.h>
|
||||
#include "searcher.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace kiwix {
|
||||
|
||||
class NoXapianIndexInZim: public exception {
|
||||
virtual const char* what() const throw() {
|
||||
return "There is no fulltext index in the zim file";
|
||||
}
|
||||
};
|
||||
|
||||
class XapianSearcher : public Searcher {
|
||||
|
||||
public:
|
||||
XapianSearcher(const string &xapianDirectoryPath);
|
||||
void searchInIndex(string &search, const unsigned int resultStart, const unsigned int resultEnd,
|
||||
const bool verbose=false);
|
||||
|
||||
protected:
|
||||
void closeIndex();
|
||||
void openIndex(const string &xapianDirectoryPath);
|
||||
|
||||
Xapian::Database readableDatabase;
|
||||
Xapian::Stem stemmer;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user