Merge pull request #61 from kiwix/code_format

Format all the code using clang-format.
This commit is contained in:
Matthieu Gautier 2017-07-11 17:24:19 +02:00 committed by GitHub
commit 80f6d0bf46
28 changed files with 2647 additions and 2190 deletions

12
.clang-format Normal file
View File

@ -0,0 +1,12 @@
BasedOnStyle: Google
BinPackArguments: false
BinPackParameters: false
BreakBeforeBinaryOperators: All
BreakBeforeBraces: Linux
DerivePointerAlignment: false
SpacesInContainerLiterals: false
Standard: Cpp11
AllowShortFunctionsOnASingleLine: Inline
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: false

36
format_code.sh Executable file
View File

@ -0,0 +1,36 @@
#!/usr/bin/bash
files=(
"include/library.h"
"include/common/stringTools.h"
"include/common/pathTools.h"
"include/common/otherTools.h"
"include/common/regexTools.h"
"include/common/networkTools.h"
"include/manager.h"
"include/reader.h"
"include/kiwix.h"
"include/xapianSearcher.h"
"include/searcher.h"
"src/library.cpp"
"src/android/kiwix.cpp"
"src/android/org/kiwix/kiwixlib/JNIKiwixBool.java"
"src/android/org/kiwix/kiwixlib/JNIKiwix.java"
"src/android/org/kiwix/kiwixlib/JNIKiwixString.java"
"src/android/org/kiwix/kiwixlib/JNIKiwixInt.java"
"src/searcher.cpp"
"src/common/pathTools.cpp"
"src/common/regexTools.cpp"
"src/common/otherTools.cpp"
"src/common/networkTools.cpp"
"src/common/stringTools.cpp"
"src/xapianSearcher.cpp"
"src/manager.cpp"
"src/reader.cpp"
)
for i in "${files[@]}"
do
echo $i
clang-format -i -style=file $i
done

View File

@ -24,25 +24,26 @@
#include <winsock2.h>
#include <ws2tcpip.h>
#else
#include <net/if.h>
#include <netdb.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <net/if.h>
#include <netdb.h>
#include <unistd.h>
#endif
#include <iostream>
#include <vector>
#include <string>
#include <map>
#include <string>
#include <vector>
namespace kiwix {
std::map<std::string, std::string> getNetworkInterfaces();
std::string getBestPublicIp();
namespace kiwix
{
std::map<std::string, std::string> getNetworkInterfaces();
std::string getBestPublicIp();
}
#endif

View File

@ -26,8 +26,9 @@
#include <unistd.h>
#endif
namespace kiwix {
void sleep(unsigned int milliseconds);
namespace kiwix
{
void sleep(unsigned int milliseconds);
}
#endif

View File

@ -20,18 +20,18 @@
#ifndef KIWIX_PATHTOOLS_H
#define KIWIX_PATHTOOLS_H
#include <fcntl.h>
#include <limits.h>
#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fstream>
#include <ios>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
#include <sstream>
#include <iostream>
#include <fstream>
#include <string.h>
#include <stdio.h>
#include <sys/types.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <ios>
#include <limits.h>
#ifdef _WIN32
#include <direct.h>
@ -41,20 +41,21 @@
using namespace std;
bool isRelativePath(const string &path);
bool isRelativePath(const string& path);
string computeAbsolutePath(const string path, const string relativePath);
string computeRelativePath(const string path, const string absolutePath);
string removeLastPathElement(const string path, const bool removePreSeparator = false,
const bool removePostSeparator = false);
string appendToDirectory(const string &directoryPath, const string &filename);
string removeLastPathElement(const string path,
const bool removePreSeparator = false,
const bool removePostSeparator = false);
string appendToDirectory(const string& directoryPath, const string& filename);
unsigned int getFileSize(const string &path);
string getFileSizeAsString(const string &path);
bool fileExists(const string &path);
bool makeDirectory(const string &path);
bool copyFile(const string &sourcePath, const string &destPath);
string getLastPathElement(const string &path);
unsigned int getFileSize(const string& path);
string getFileSizeAsString(const string& path);
bool fileExists(const string& path);
bool makeDirectory(const string& path);
bool copyFile(const string& sourcePath, const string& destPath);
string getLastPathElement(const string& path);
string getExecutablePath();
string getCurrentDirectory();
bool writeTextFile(const string &path, const string &content);
bool writeTextFile(const string& path, const string& content);
#endif

View File

@ -22,11 +22,15 @@
#include <unicode/regex.h>
#include <unicode/ucnv.h>
#include <string>
#include <map>
#include <string>
bool matchRegex(const std::string &content, const std::string &regex);
std::string replaceRegex(const std::string &content, const std::string &replacement, const std::string &regex);
std::string appendToFirstOccurence(const std::string &content, const std::string regex, const std::string &replacement);
bool matchRegex(const std::string& content, const std::string& regex);
std::string replaceRegex(const std::string& content,
const std::string& replacement,
const std::string& regex);
std::string appendToFirstOccurence(const std::string& content,
const std::string regex,
const std::string& replacement);
#endif

View File

@ -22,44 +22,46 @@
#include <unicode/unistr.h>
#include <iostream>
#include <vector>
#include <string>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
#include "pathTools.h"
namespace kiwix {
namespace kiwix
{
#ifndef __ANDROID__
std::string beautifyInteger(const unsigned int number);
std::string beautifyFileSize(const unsigned int number);
std::string urlEncode(const std::string &c);
void printStringInHexadecimal(const char *s);
void printStringInHexadecimal(UnicodeString s);
void stringReplacement(std::string& str, const std::string& oldStr, const std::string& newStr);
std::string encodeDiples(const std::string& str);
std::string beautifyInteger(const unsigned int number);
std::string beautifyFileSize(const unsigned int number);
std::string urlEncode(const std::string& c);
void printStringInHexadecimal(const char* s);
void printStringInHexadecimal(UnicodeString s);
void stringReplacement(std::string& str,
const std::string& oldStr,
const std::string& newStr);
std::string encodeDiples(const std::string& str);
#endif
std::string removeAccents(const std::string &text);
void loadICUExternalTables();
std::string urlDecode(const std::string &c);
std::string removeAccents(const std::string& text);
void loadICUExternalTables();
std::string urlDecode(const std::string& c);
std::vector<std::string> split(const std::string&, const std::string&);
std::vector<std::string> split(const char*, const char*);
std::vector<std::string> split(const std::string&, const char*);
std::vector<std::string> split(const char*, const std::string&);
std::vector<std::string> split(const std::string&, const std::string&);
std::vector<std::string> split(const char*, const char*);
std::vector<std::string> split(const std::string&, const char*);
std::vector<std::string> split(const char*, const std::string&);
std::string ucAll(const std::string &word);
std::string lcAll(const std::string &word);
std::string ucFirst(const std::string &word);
std::string lcFirst(const std::string &word);
std::string toTitle(const std::string &word);
std::string ucAll(const std::string& word);
std::string lcAll(const std::string& word);
std::string ucFirst(const std::string& word);
std::string lcFirst(const std::string& word);
std::string toTitle(const std::string& word);
std::string normalize(const std::string &word);
std::string normalize(const std::string& word);
}
#endif

View File

@ -22,5 +22,4 @@
#include "library.h"
#endif

View File

@ -22,86 +22,85 @@
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <string.h>
#include <vector>
#include <stack>
#include <string>
#include <vector>
#include "common/stringTools.h"
#include "common/regexTools.h"
#include "common/stringTools.h"
#define KIWIX_LIBRARY_VERSION "20110515"
using namespace std;
namespace kiwix {
namespace kiwix
{
enum supportedIndexType { UNKNOWN, XAPIAN };
enum supportedIndexType { UNKNOWN, XAPIAN };
class Book
{
public:
Book();
~Book();
class Book {
static bool sortByLastOpen(const Book& a, const Book& b);
static bool sortByTitle(const Book& a, const Book& b);
static bool sortBySize(const Book& a, const Book& b);
static bool sortByDate(const Book& a, const Book& b);
static bool sortByCreator(const Book& a, const Book& b);
static bool sortByPublisher(const Book& a, const Book& b);
static bool sortByLanguage(const Book& a, const Book& b);
string getHumanReadableIdFromPath();
public:
Book();
~Book();
string id;
string path;
string pathAbsolute;
string last;
string indexPath;
string indexPathAbsolute;
supportedIndexType indexType;
string title;
string description;
string language;
string creator;
string publisher;
string date;
string url;
string name;
string tags;
string origId;
string articleCount;
string mediaCount;
bool readOnly;
string size;
string favicon;
string faviconMimeType;
};
static bool sortByLastOpen(const Book &a, const Book &b);
static bool sortByTitle(const Book &a, const Book &b);
static bool sortBySize(const Book &a, const Book &b);
static bool sortByDate(const Book &a, const Book &b);
static bool sortByCreator(const Book &a, const Book &b);
static bool sortByPublisher(const Book &a, const Book &b);
static bool sortByLanguage(const Book &a, const Book &b);
string getHumanReadableIdFromPath();
class Library
{
public:
Library();
~Library();
string id;
string path;
string pathAbsolute;
string last;
string indexPath;
string indexPathAbsolute;
supportedIndexType indexType;
string title;
string description;
string language;
string creator;
string publisher;
string date;
string url;
string name;
string tags;
string origId;
string articleCount;
string mediaCount;
bool readOnly;
string size;
string favicon;
string faviconMimeType;
};
class Library {
public:
Library();
~Library();
string version;
bool addBook(const Book &book);
bool removeBookByIndex(const unsigned int bookIndex);
vector <kiwix::Book> books;
/*
* 'current' is the variable storing the current content/book id
* in the library. This is used to be able to load per default a
* content. As Kiwix may work with many library XML files, you may
* have "current" defined many time with different values. The
* last XML file read has the priority, Although we do not have an
* library object for each file, we want to be able to fallback to
* an 'old' current book if the one which should be load
* failed. That is the reason why we need a stack here
*/
stack<string> current;
};
string version;
bool addBook(const Book& book);
bool removeBookByIndex(const unsigned int bookIndex);
vector<kiwix::Book> books;
/*
* 'current' is the variable storing the current content/book id
* in the library. This is used to be able to load per default a
* content. As Kiwix may work with many library XML files, you may
* have "current" defined many time with different values. The
* last XML file read has the priority, Although we do not have an
* library object for each file, we want to be able to fallback to
* an 'old' current book if the one which should be load
* failed. That is the reason why we need a stack here
*/
stack<string> current;
};
}
#endif

View File

@ -20,73 +20,89 @@
#ifndef KIWIX_MANAGER_H
#define KIWIX_MANAGER_H
#include <string>
#include <sstream>
#include <time.h>
#include <sstream>
#include <string>
#include <pugixml.hpp>
#include "common/base64.h"
#include "common/regexTools.h"
#include "common/pathTools.h"
#include "common/regexTools.h"
#include "library.h"
#include "reader.h"
using namespace std;
namespace kiwix {
namespace kiwix
{
enum supportedListMode { LASTOPEN, REMOTE, LOCAL };
enum supportedListSortBy { TITLE, SIZE, DATE, CREATOR, PUBLISHER };
enum supportedListMode { LASTOPEN, REMOTE, LOCAL };
enum supportedListSortBy { TITLE, SIZE, DATE, CREATOR, PUBLISHER };
class Manager
{
public:
Manager();
~Manager();
class Manager {
bool readFile(const string path, const bool readOnly = true);
bool readFile(const string nativePath,
const string UTF8Path,
const bool readOnly = true);
bool readXml(const string xml,
const bool readOnly = true,
const string libraryPath = "");
bool writeFile(const string path);
bool removeBookByIndex(const unsigned int bookIndex);
bool removeBookById(const string id);
bool setCurrentBookId(const string id);
string getCurrentBookId();
bool setBookIndex(const string id,
const string path,
const supportedIndexType type);
bool setBookIndex(const string id, const string path);
bool setBookPath(const string id, const string path);
string addBookFromPathAndGetId(const string pathToOpen,
const string pathToSave = "",
const string url = "",
const bool checkMetaData = false);
bool addBookFromPath(const string pathToOpen,
const string pathToSave = "",
const string url = "",
const bool checkMetaData = false);
Library cloneLibrary();
bool getBookById(const string id, Book& book);
bool getCurrentBook(Book& book);
unsigned int getBookCount(const bool localBooks, const bool remoteBooks);
bool updateBookLastOpenDateById(const string id);
void removeBookPaths();
bool listBooks(const supportedListMode mode,
const supportedListSortBy sortBy,
const unsigned int maxSize,
const string language,
const string creator,
const string publisher,
const string search);
vector<string> getBooksLanguages();
vector<string> getBooksCreators();
vector<string> getBooksPublishers();
vector<string> getBooksIds();
public:
Manager();
~Manager();
string writableLibraryPath;
bool readFile(const string path, const bool readOnly = true);
bool readFile(const string nativePath, const string UTF8Path, const bool readOnly = true);
bool readXml(const string xml, const bool readOnly = true, const string libraryPath = "");
bool writeFile(const string path);
bool removeBookByIndex(const unsigned int bookIndex);
bool removeBookById(const string id);
bool setCurrentBookId(const string id);
string getCurrentBookId();
bool setBookIndex(const string id, const string path, const supportedIndexType type);
bool setBookIndex(const string id, const string path);
bool setBookPath(const string id, const string path);
string addBookFromPathAndGetId(const string pathToOpen, const string pathToSave = "", const string url = "",
const bool checkMetaData = false);
bool addBookFromPath(const string pathToOpen, const string pathToSave = "", const string url = "",
const bool checkMetaData = false);
Library cloneLibrary();
bool getBookById(const string id, Book &book);
bool getCurrentBook(Book &book);
unsigned int getBookCount(const bool localBooks, const bool remoteBooks);
bool updateBookLastOpenDateById(const string id);
void removeBookPaths();
bool listBooks(const supportedListMode mode, const supportedListSortBy sortBy, const unsigned int maxSize,
const string language, const string creator, const string publisher, const string search);
vector<string> getBooksLanguages();
vector<string> getBooksCreators();
vector<string> getBooksPublishers();
vector<string> getBooksIds();
vector<std::string> bookIdList;
string writableLibraryPath;
protected:
kiwix::Library library;
vector<std::string> bookIdList;
protected:
kiwix::Library library;
bool readBookFromPath(const string path, Book *book = NULL);
bool parseXmlDom(const pugi::xml_document &doc, const bool readOnly, const string libraryPath);
private:
void checkAndCleanBookPaths(Book &book, const string &libraryPath);
};
bool readBookFromPath(const string path, Book* book = NULL);
bool parseXmlDom(const pugi::xml_document& doc,
const bool readOnly,
const string libraryPath);
private:
void checkAndCleanBookPaths(Book& book, const string& libraryPath);
};
}
#endif

View File

@ -20,85 +20,105 @@
#ifndef KIWIX_READER_H
#define KIWIX_READER_H
#include <zim/zim.h>
#include <zim/file.h>
#include <zim/article.h>
#include <zim/fileiterator.h>
#include <stdio.h>
#include <string>
#include <zim/article.h>
#include <zim/file.h>
#include <zim/fileiterator.h>
#include <zim/zim.h>
#include <exception>
#include <sstream>
#include <map>
#include <sstream>
#include <string>
#include "common/pathTools.h"
#include "common/stringTools.h"
using namespace std;
namespace kiwix {
namespace kiwix
{
class Reader
{
public:
Reader(const string zimFilePath);
~Reader();
class Reader {
void reset();
unsigned int getArticleCount() const;
unsigned int getMediaCount() const;
unsigned int getGlobalCount() const;
string getZimFilePath() const;
string getId() const;
string getRandomPageUrl() const;
string getFirstPageUrl() const;
string getMainPageUrl() const;
bool getMetatag(const string& url, string& content) const;
string getTitle() const;
string getDescription() const;
string getLanguage() const;
string getName() const;
string getTags() const;
string getDate() const;
string getCreator() const;
string getPublisher() const;
string getOrigId() const;
bool getFavicon(string& content, string& mimeType) const;
bool getPageUrlFromTitle(const string& title, string& url) const;
bool getMimeTypeByUrl(const string& url, string& mimeType) const;
bool getContentByUrl(const string& url,
string& content,
unsigned int& contentLength,
string& contentType) const;
bool getContentByEncodedUrl(const string& url,
string& content,
unsigned int& contentLength,
string& contentType,
string& baseUrl) const;
bool getContentByEncodedUrl(const string& url,
string& content,
unsigned int& contentLength,
string& contentType) const;
bool getContentByDecodedUrl(const string& url,
string& content,
unsigned int& contentLength,
string& contentType,
string& baseUrl) const;
bool getContentByDecodedUrl(const string& url,
string& content,
unsigned int& contentLength,
string& contentType) const;
bool searchSuggestions(const string& prefix,
unsigned int suggestionsCount,
const bool reset = true);
bool searchSuggestionsSmart(const string& prefix,
unsigned int suggestionsCount);
bool urlExists(const string& url) const;
bool hasFulltextIndex() const;
std::vector<std::string> getTitleVariants(const std::string& title) const;
bool getNextSuggestion(string& title);
bool getNextSuggestion(string& title, string& url);
bool canCheckIntegrity() const;
bool isCorrupted() const;
bool parseUrl(const string& url, char* ns, string& title) const;
unsigned int getFileSize() const;
zim::File* getZimFileHandler() const;
bool getArticleObjectByDecodedUrl(const string& url,
zim::Article& article) const;
public:
Reader(const string zimFilePath);
~Reader();
protected:
zim::File* zimFileHandler;
zim::size_type firstArticleOffset;
zim::size_type lastArticleOffset;
zim::size_type currentArticleOffset;
zim::size_type nsACount;
zim::size_type nsICount;
std::string zimFilePath;
void reset();
unsigned int getArticleCount() const;
unsigned int getMediaCount() const;
unsigned int getGlobalCount() const;
string getZimFilePath() const;
string getId() const;
string getRandomPageUrl() const;
string getFirstPageUrl() const;
string getMainPageUrl() const;
bool getMetatag(const string &url, string &content) const;
string getTitle() const;
string getDescription() const;
string getLanguage() const;
string getName() const;
string getTags() const;
string getDate() const;
string getCreator() const;
string getPublisher() const;
string getOrigId() const;
bool getFavicon(string &content, string &mimeType) const;
bool getPageUrlFromTitle(const string &title, string &url) const;
bool getMimeTypeByUrl(const string &url, string &mimeType) const;
bool getContentByUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const;
bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const;
bool getContentByEncodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const;
bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType, string &baseUrl) const;
bool getContentByDecodedUrl(const string &url, string &content, unsigned int &contentLength, string &contentType) const;
bool searchSuggestions(const string &prefix, unsigned int suggestionsCount, const bool reset = true);
bool searchSuggestionsSmart(const string &prefix, unsigned int suggestionsCount);
bool urlExists(const string &url) const;
bool hasFulltextIndex() const;
std::vector<std::string> getTitleVariants(const std::string &title) const;
bool getNextSuggestion(string &title);
bool getNextSuggestion(string &title, string &url);
bool canCheckIntegrity() const;
bool isCorrupted() const;
bool parseUrl(const string &url, char *ns, string &title) const;
unsigned int getFileSize() const;
zim::File* getZimFileHandler() const;
bool getArticleObjectByDecodedUrl(const string &url, zim::Article &article) const;
protected:
zim::File* zimFileHandler;
zim::size_type firstArticleOffset;
zim::size_type lastArticleOffset;
zim::size_type currentArticleOffset;
zim::size_type nsACount;
zim::size_type nsICount;
std::string zimFilePath;
std::vector< std::vector<std::string> > suggestions;
std::vector< std::vector<std::string> >::iterator suggestionsOffset;
private:
std::map<const std::string, unsigned int> parseCounterMetadata() const;
};
std::vector<std::vector<std::string>> suggestions;
std::vector<std::vector<std::string>>::iterator suggestionsOffset;
private:
std::map<const std::string, unsigned int> parseCounterMetadata() const;
};
}
#endif

View File

@ -22,73 +22,77 @@
#include <stdio.h>
#include <stdlib.h>
#include <string>
#include <unicode/putil.h>
#include <algorithm>
#include <vector>
#include <locale>
#include <cctype>
#include <locale>
#include <string>
#include <vector>
#include <vector>
#include "common/pathTools.h"
#include "common/stringTools.h"
#include <unicode/putil.h>
#include "kiwix_config.h"
using namespace std;
namespace kiwix {
class Reader;
class Result {
public:
virtual ~Result() {};
virtual std::string get_url() = 0;
virtual std::string get_title() = 0;
virtual int get_score() = 0;
virtual std::string get_snippet() = 0;
virtual int get_wordCount() = 0;
virtual int get_size() = 0;
};
namespace kiwix
{
class Reader;
class Result
{
public:
virtual ~Result(){};
virtual std::string get_url() = 0;
virtual std::string get_title() = 0;
virtual int get_score() = 0;
virtual std::string get_snippet() = 0;
virtual int get_wordCount() = 0;
virtual int get_size() = 0;
};
struct SearcherInternal;
class Searcher
{
public:
Searcher(const string& xapianDirectoryPath, Reader* reader);
~Searcher();
struct SearcherInternal;
class Searcher {
public:
Searcher(const string &xapianDirectoryPath, Reader* reader);
~Searcher();
void search(std::string &search, unsigned int resultStart,
unsigned int resultEnd, const bool verbose=false);
Result* getNextResult();
void restart_search();
unsigned int getEstimatedResultCount();
bool setProtocolPrefix(const std::string prefix);
bool setSearchProtocolPrefix(const std::string prefix);
void reset();
void setContentHumanReadableId(const string &contentHumanReadableId);
void search(std::string& search,
unsigned int resultStart,
unsigned int resultEnd,
const bool verbose = false);
Result* getNextResult();
void restart_search();
unsigned int getEstimatedResultCount();
bool setProtocolPrefix(const std::string prefix);
bool setSearchProtocolPrefix(const std::string prefix);
void reset();
void setContentHumanReadableId(const string& contentHumanReadableId);
#ifdef ENABLE_CTPP2
string getHtml();
string getHtml();
#endif
protected:
std::string beautifyInteger(const unsigned int number);
void closeIndex() ;
void searchInIndex(string &search, const unsigned int resultStart,
const unsigned int resultEnd, const bool verbose=false);
Reader* reader;
SearcherInternal* internal;
std::string searchPattern;
std::string protocolPrefix;
std::string searchProtocolPrefix;
std::string template_ct2;
unsigned int resultCountPerPage;
unsigned int estimatedResultCount;
unsigned int resultStart;
unsigned int resultEnd;
std::string contentHumanReadableId;
};
protected:
std::string beautifyInteger(const unsigned int number);
void closeIndex();
void searchInIndex(string& search,
const unsigned int resultStart,
const unsigned int resultEnd,
const bool verbose = false);
Reader* reader;
SearcherInternal* internal;
std::string searchPattern;
std::string protocolPrefix;
std::string searchProtocolPrefix;
std::string template_ct2;
unsigned int resultCountPerPage;
unsigned int estimatedResultCount;
unsigned int resultStart;
unsigned int resultEnd;
std::string contentHumanReadableId;
};
}
#endif

View File

@ -21,70 +21,76 @@
#define KIWIX_XAPIAN_SEARCHER_H
#include <xapian.h>
#include "searcher.h"
#include "reader.h"
#include "searcher.h"
#include <map>
#include <string>
using namespace std;
namespace kiwix {
namespace kiwix
{
class XapianSearcher;
class XapianSearcher;
class XapianResult : public Result
{
public:
XapianResult(XapianSearcher* searcher, Xapian::MSetIterator& iterator);
virtual ~XapianResult(){};
class XapianResult : public Result {
public:
XapianResult(XapianSearcher* searcher, Xapian::MSetIterator& iterator);
virtual ~XapianResult() {};
virtual std::string get_url();
virtual std::string get_title();
virtual int get_score();
virtual std::string get_snippet();
virtual int get_wordCount();
virtual int get_size();
virtual std::string get_url();
virtual std::string get_title();
virtual int get_score();
virtual std::string get_snippet();
virtual int get_wordCount();
virtual int get_size();
private:
XapianSearcher* searcher;
Xapian::MSetIterator iterator;
Xapian::Document document;
};
private:
XapianSearcher* searcher;
Xapian::MSetIterator iterator;
Xapian::Document document;
};
class NoXapianIndexInZim : public exception
{
virtual const char* what() const throw()
{
return "There is no fulltext index in the zim file";
}
};
class NoXapianIndexInZim: public exception {
virtual const char* what() const throw() {
return "There is no fulltext index in the zim file";
}
};
class XapianSearcher
{
friend class XapianResult;
class XapianSearcher {
friend class XapianResult;
public:
XapianSearcher(const string &xapianDirectoryPath, Reader* reader);
virtual ~XapianSearcher() {};
void searchInIndex(string &search, const unsigned int resultStart, const unsigned int resultEnd,
const bool verbose=false);
virtual Result* getNextResult();
void restart_search();
public:
XapianSearcher(const string& xapianDirectoryPath, Reader* reader);
virtual ~XapianSearcher(){};
void searchInIndex(string& search,
const unsigned int resultStart,
const unsigned int resultEnd,
const bool verbose = false);
virtual Result* getNextResult();
void restart_search();
Xapian::MSet results;
Xapian::MSet results;
protected:
void closeIndex();
void openIndex(const string &xapianDirectoryPath);
void setup_queryParser();
Reader* reader;
Xapian::Database readableDatabase;
std::string language;
std::string stopwords;
Xapian::QueryParser queryParser;
Xapian::Stem stemmer;
Xapian::SimpleStopper stopper;
Xapian::MSetIterator current_result;
std::map<std::string, int> valuesmap;
};
protected:
void closeIndex();
void openIndex(const string& xapianDirectoryPath);
void setup_queryParser();
Reader* reader;
Xapian::Database readableDatabase;
std::string language;
std::string stopwords;
Xapian::QueryParser queryParser;
Xapian::Stem stemmer;
Xapian::SimpleStopper stopper;
Xapian::MSetIterator current_result;
std::map<std::string, int> valuesmap;
};
}
#endif

View File

@ -7,80 +7,87 @@
#include <iostream>
#include <string>
#include "unicode/putil.h"
#include "common/base64.h"
#include "reader.h"
#include "searcher.h"
#include "common/base64.h"
#include "unicode/putil.h"
#include <android/log.h>
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, "kiwix", __VA_ARGS__)
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, "kiwix", __VA_ARGS__)
#include <xapian.h>
#include <zim/zim.h>
#include <zim/file.h>
#include <zim/article.h>
#include <zim/error.h>
#include <zim/file.h>
#include <zim/zim.h>
/* global variables */
kiwix::Reader *reader = NULL;
kiwix::Searcher *searcher = NULL;
kiwix::Reader* reader = NULL;
kiwix::Searcher* searcher = NULL;
static pthread_mutex_t readerLock = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t searcherLock = PTHREAD_MUTEX_INITIALIZER;
/* c2jni type conversion functions */
jboolean c2jni(const bool &val) {
jboolean c2jni(const bool& val)
{
return val ? JNI_TRUE : JNI_FALSE;
}
jstring c2jni(const std::string &val, JNIEnv *env) {
jstring c2jni(const std::string& val, JNIEnv* env)
{
return env->NewStringUTF(val.c_str());
}
jint c2jni(const int val) {
jint c2jni(const int val)
{
return (jint)val;
}
jint c2jni(const unsigned val) {
jint c2jni(const unsigned val)
{
return (unsigned)val;
}
/* jni2c type conversion functions */
bool jni2c(const jboolean &val) {
bool jni2c(const jboolean& val)
{
return val == JNI_TRUE;
}
std::string jni2c(const jstring &val, JNIEnv *env) {
std::string jni2c(const jstring& val, JNIEnv* env)
{
return std::string(env->GetStringUTFChars(val, 0));
}
int jni2c(const jint val) {
int jni2c(const jint val)
{
return (int)val;
}
/* Method to deal with variable passed by reference */
void setStringObjValue(const std::string &value, const jobject obj, JNIEnv *env) {
void setStringObjValue(const std::string& value, const jobject obj, JNIEnv* env)
{
jclass objClass = env->GetObjectClass(obj);
jfieldID objFid = env->GetFieldID(objClass, "value", "Ljava/lang/String;");
env->SetObjectField(obj, objFid, c2jni(value, env));
}
void setIntObjValue(const int value, const jobject obj, JNIEnv *env) {
void setIntObjValue(const int value, const jobject obj, JNIEnv* env)
{
jclass objClass = env->GetObjectClass(obj);
jfieldID objFid = env->GetFieldID(objClass, "value", "I");
env->SetIntField(obj, objFid, value);
}
void setBoolObjValue(const bool value, const jobject obj, JNIEnv *env) {
void setBoolObjValue(const bool value, const jobject obj, JNIEnv* env)
{
jclass objClass = env->GetObjectClass(obj);
jfieldID objFid = env->GetFieldID(objClass, "value", "Z");
env->SetIntField(obj, objFid, c2jni(value));
}
/* Kiwix library functions */
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getMainPage(JNIEnv *env, jobject obj) {
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwix_getMainPage(JNIEnv* env, jobject obj)
{
jstring url;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
try {
@ -91,13 +98,15 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getMainPage(JNIEnv *e
}
}
pthread_mutex_unlock(&readerLock);
return url;
}
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getId(JNIEnv *env, jobject obj) {
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getId(JNIEnv* env,
jobject obj)
{
jstring id;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
try {
@ -108,13 +117,15 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getId(JNIEnv *env, jo
}
}
pthread_mutex_unlock(&readerLock);
return id;
}
JNIEXPORT jint JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getFileSize(JNIEnv *env, jobject obj) {
JNIEXPORT jint JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getFileSize(JNIEnv* env,
jobject obj)
{
jint size;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
try {
@ -125,13 +136,15 @@ JNIEXPORT jint JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getFileSize(JNIEnv *env,
}
}
pthread_mutex_unlock(&readerLock);
return size;
}
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getCreator(JNIEnv *env, jobject obj) {
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwix_getCreator(JNIEnv* env, jobject obj)
{
jstring creator;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
try {
@ -142,13 +155,15 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getCreator(JNIEnv *en
}
}
pthread_mutex_unlock(&readerLock);
return creator;
}
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getPublisher(JNIEnv *env, jobject obj) {
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwix_getPublisher(JNIEnv* env, jobject obj)
{
jstring publisher;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
try {
@ -159,13 +174,15 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getPublisher(JNIEnv *
}
}
pthread_mutex_unlock(&readerLock);
return publisher;
}
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getName(JNIEnv *env, jobject obj) {
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getName(JNIEnv* env,
jobject obj)
{
jstring name;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
try {
@ -176,33 +193,40 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getName(JNIEnv *env,
}
}
pthread_mutex_unlock(&readerLock);
return name;
}
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getFavicon(JNIEnv *env, jobject obj) {
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwix_getFavicon(JNIEnv* env, jobject obj)
{
jstring favicon;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
try {
std::string cContent;
std::string cMime;
reader->getFavicon(cContent, cMime);
favicon = c2jni(base64_encode(reinterpret_cast<const unsigned char*>(cContent.c_str()), cContent.length()), env);
favicon
= c2jni(base64_encode(
reinterpret_cast<const unsigned char*>(cContent.c_str()),
cContent.length()),
env);
} catch (...) {
std::cerr << "Unable to get ZIM favicon" << std::endl;
}
}
pthread_mutex_unlock(&readerLock);
return favicon;
}
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getDate(JNIEnv *env, jobject obj) {
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getDate(JNIEnv* env,
jobject obj)
{
jstring date;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
try {
@ -213,13 +237,15 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getDate(JNIEnv *env,
}
}
pthread_mutex_unlock(&readerLock);
return date;
}
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getLanguage(JNIEnv *env, jobject obj) {
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwix_getLanguage(JNIEnv* env, jobject obj)
{
jstring language;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
try {
@ -230,13 +256,15 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getLanguage(JNIEnv *e
}
}
pthread_mutex_unlock(&readerLock);
return language;
}
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getMimeType(JNIEnv *env, jobject obj, jstring url) {
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getMimeType(
JNIEnv* env, jobject obj, jstring url)
{
jstring mimeType;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
std::string cUrl = jni2c(url, env);
@ -249,17 +277,21 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getMimeType(JNIEnv *e
}
}
pthread_mutex_unlock(&readerLock);
return mimeType;
}
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadZIM(JNIEnv *env, jobject obj, jstring path) {
JNIEXPORT jboolean JNICALL
Java_org_kiwix_kiwixlib_JNIKiwix_loadZIM(JNIEnv* env, jobject obj, jstring path)
{
jboolean retVal = JNI_TRUE;
std::string cPath = jni2c(path, env);
pthread_mutex_lock(&readerLock);
try {
if (reader != NULL) delete reader;
if (reader != NULL) {
delete reader;
}
reader = new kiwix::Reader(cPath);
} catch (...) {
std::cerr << "Unable to load ZIM " << cPath << std::endl;
@ -271,8 +303,9 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadZIM(JNIEnv *env,
return retVal;
}
JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getContent(JNIEnv *env, jobject obj, jstring url, jobject mimeTypeObj, jobject sizeObj) {
JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getContent(
JNIEnv* env, jobject obj, jstring url, jobject mimeTypeObj, jobject sizeObj)
{
/* Default values */
setStringObjValue("", mimeTypeObj, env);
setIntObjValue(0, sizeObj, env);
@ -289,7 +322,8 @@ JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getContent(JNIEnv
try {
if (reader->getContentByUrl(cUrl, cData, cSize, cMimeType)) {
data = env->NewByteArray(cSize);
env->SetByteArrayRegion(data, 0, cSize, reinterpret_cast<const jbyte*>(cData.c_str()));
env->SetByteArrayRegion(
data, 0, cSize, reinterpret_cast<const jbyte*>(cData.c_str()));
setStringObjValue(cMimeType, mimeTypeObj, env);
setIntObjValue(cSize, sizeObj, env);
}
@ -298,12 +332,13 @@ JNIEXPORT jbyteArray JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getContent(JNIEnv
}
pthread_mutex_unlock(&readerLock);
}
return data;
}
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_searchSuggestions
(JNIEnv *env, jobject obj, jstring prefix, jint count) {
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_searchSuggestions(
JNIEnv* env, jobject obj, jstring prefix, jint count)
{
jboolean retVal = JNI_FALSE;
std::string cPrefix = jni2c(prefix, env);
unsigned int cCount = jni2c(count);
@ -316,15 +351,17 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_searchSuggestions
}
}
} catch (...) {
std::cerr << "Unable to search suggestions for pattern " << cPrefix << std::endl;
std::cerr << "Unable to search suggestions for pattern " << cPrefix
<< std::endl;
}
pthread_mutex_unlock(&readerLock);
return retVal;
}
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getNextSuggestion
(JNIEnv *env, jobject obj, jobject titleObj) {
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getNextSuggestion(
JNIEnv* env, jobject obj, jobject titleObj)
{
jboolean retVal = JNI_FALSE;
std::string cTitle;
@ -344,8 +381,9 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getNextSuggestion
return retVal;
}
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getPageUrlFromTitle
(JNIEnv *env, jobject obj, jstring title, jobject urlObj) {
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getPageUrlFromTitle(
JNIEnv* env, jobject obj, jstring title, jobject urlObj)
{
jboolean retVal = JNI_FALSE;
std::string cTitle = jni2c(title, env);
std::string cUrl;
@ -362,12 +400,13 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getPageUrlFromTitle
std::cerr << "Unable to get URL for title " << cTitle << std::endl;
}
pthread_mutex_unlock(&readerLock);
return retVal;
}
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getTitle
(JNIEnv *env , jobject obj, jobject titleObj) {
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getTitle(
JNIEnv* env, jobject obj, jobject titleObj)
{
jboolean retVal = JNI_FALSE;
std::string cTitle;
@ -384,12 +423,13 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getTitle
pthread_mutex_unlock(&readerLock);
return retVal;
}
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getDescription(JNIEnv *env, jobject obj) {
JNIEXPORT jstring JNICALL
Java_org_kiwix_kiwixlib_JNIKiwix_getDescription(JNIEnv* env, jobject obj)
{
jstring description;
pthread_mutex_lock(&readerLock);
if (reader != NULL) {
try {
@ -400,12 +440,13 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getDescription(JNIEnv
}
}
pthread_mutex_unlock(&readerLock);
return description;
}
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getRandomPage
(JNIEnv *env, jobject obj, jobject urlObj) {
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getRandomPage(
JNIEnv* env, jobject obj, jobject urlObj)
{
jboolean retVal = JNI_FALSE;
std::string cUrl;
@ -424,11 +465,12 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_getRandomPage
return retVal;
}
JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_setDataDirectory
(JNIEnv *env, jobject obj, jstring dirStr) {
JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_setDataDirectory(
JNIEnv* env, jobject obj, jstring dirStr)
{
std::string cPath = jni2c(dirStr, env);
pthread_mutex_lock(&readerLock);
pthread_mutex_lock(&readerLock);
try {
u_setDataDirectory(cPath.c_str());
} catch (...) {
@ -437,14 +479,18 @@ JNIEXPORT void JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_setDataDirectory
pthread_mutex_unlock(&readerLock);
}
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(JNIEnv *env, jobject obj, jstring path) {
JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(
JNIEnv* env, jobject obj, jstring path)
{
jboolean retVal = JNI_TRUE;
std::string cPath = jni2c(path, env);
pthread_mutex_lock(&searcherLock);
searcher = NULL;
try {
if (searcher != NULL) delete searcher;
if (searcher != NULL) {
delete searcher;
}
searcher = new kiwix::Searcher(cPath, reader);
} catch (...) {
searcher = NULL;
@ -456,22 +502,23 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(JN
return retVal;
}
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_indexedQuery
(JNIEnv *env, jclass obj, jstring query, jint count) {
JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_indexedQuery(
JNIEnv* env, jclass obj, jstring query, jint count)
{
std::string cQuery = jni2c(query, env);
unsigned int cCount = jni2c(count);
kiwix::Result *p_result;
kiwix::Result* p_result;
std::string result;
pthread_mutex_lock(&searcherLock);
try {
if (searcher != NULL) {
searcher->search(cQuery, 0, count);
while ( (p_result = searcher->getNextResult()) &&
!(p_result->get_title().empty()) &&
!(p_result->get_url().empty())) {
result += p_result->get_title() + "\n";
delete p_result;
while ((p_result = searcher->getNextResult())
&& !(p_result->get_title().empty())
&& !(p_result->get_url().empty())) {
result += p_result->get_title() + "\n";
delete p_result;
}
}
} catch (...) {
@ -481,5 +528,3 @@ JNIEXPORT jstring JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_indexedQuery
return env->NewStringUTF(result.c_str());
}

View File

@ -23,12 +23,9 @@ import org.kiwix.kiwixlib.JNIKiwixString;
import org.kiwix.kiwixlib.JNIKiwixBool;
import org.kiwix.kiwixlib.JNIKiwixInt;
public class JNIKiwix {
static {
System.loadLibrary("kiwix");
}
public class JNIKiwix
{
static { System.loadLibrary("kiwix"); }
public native String getMainPage();
public native String getId();
@ -39,8 +36,8 @@ public class JNIKiwix {
public native boolean loadZIM(String path);
public native boolean loadFulltextIndex(String path);
public native boolean loadFulltextIndex(String path);
public native byte[] getContent(String url, JNIKiwixString mimeType, JNIKiwixInt size);
public native boolean searchSuggestions(String prefix, int count);

View File

@ -19,7 +19,7 @@
package org.kiwix.kiwixlib;
public class JNIKiwixBool {
public class JNIKiwixBool
{
public boolean value;
}

View File

@ -19,8 +19,7 @@
package org.kiwix.kiwixlib;
public class JNIKiwixInt {
public class JNIKiwixInt
{
public int value;
}

View File

@ -19,7 +19,7 @@
package org.kiwix.kiwixlib;
public class JNIKiwixString {
public class JNIKiwixString
{
public String value;
}

View File

@ -19,44 +19,54 @@
#include <common/networkTools.h>
std::map<std::string, std::string> kiwix::getNetworkInterfaces() {
std::map<std::string, std::string> kiwix::getNetworkInterfaces()
{
std::map<std::string, std::string> interfaces;
#ifdef _WIN32
SOCKET sd = WSASocket(AF_INET, SOCK_DGRAM, 0, 0, 0, 0);
if (sd == SOCKET_ERROR) {
std::cerr << "Failed to get a socket. Error " << WSAGetLastError() <<
std::endl;
std::cerr << "Failed to get a socket. Error " << WSAGetLastError()
<< std::endl;
return interfaces;
}
INTERFACE_INFO InterfaceList[20];
unsigned long nBytesReturned;
if (WSAIoctl(sd, SIO_GET_INTERFACE_LIST, 0, 0, &InterfaceList,
sizeof(InterfaceList), &nBytesReturned, 0, 0) == SOCKET_ERROR) {
std::cerr << "Failed calling WSAIoctl: error " << WSAGetLastError() <<
std::endl;
if (WSAIoctl(sd,
SIO_GET_INTERFACE_LIST,
0,
0,
&InterfaceList,
sizeof(InterfaceList),
&nBytesReturned,
0,
0)
== SOCKET_ERROR) {
std::cerr << "Failed calling WSAIoctl: error " << WSAGetLastError()
<< std::endl;
return interfaces;
}
int nNumInterfaces = nBytesReturned / sizeof(INTERFACE_INFO);
for (int i = 0; i < nNumInterfaces; ++i) {
sockaddr_in *pAddress;
pAddress = (sockaddr_in *) & (InterfaceList[i].iiAddress);
sockaddr_in* pAddress;
pAddress = (sockaddr_in*)&(InterfaceList[i].iiAddress);
/* Add to the map */
std::string interfaceName = std::string(inet_ntoa(pAddress->sin_addr));
std::string interfaceIp = std::string(inet_ntoa(pAddress->sin_addr));
interfaces.insert(std::pair<std::string, std::string>(interfaceName, interfaceIp));
interfaces.insert(
std::pair<std::string, std::string>(interfaceName, interfaceIp));
}
#else
/* Get Network interfaces information */
char buf[16384];
struct ifconf ifconf;
int fd = socket(PF_INET, SOCK_DGRAM, 0); /* Only IPV4 */
ifconf.ifc_len=sizeof buf;
ifconf.ifc_buf=buf;
if(ioctl(fd, SIOCGIFCONF, &ifconf)!=0) {
ifconf.ifc_len = sizeof buf;
ifconf.ifc_buf = buf;
if (ioctl(fd, SIOCGIFCONF, &ifconf) != 0) {
perror("ioctl(SIOCGIFCONF)");
exit(EXIT_FAILURE);
}
@ -64,73 +74,86 @@ std::map<std::string, std::string> kiwix::getNetworkInterfaces() {
/* Go through each interface */
int i;
size_t len;
struct ifreq *ifreq;
struct ifreq* ifreq;
ifreq = ifconf.ifc_req;
for (i = 0; i < ifconf.ifc_len; ) {
for (i = 0; i < ifconf.ifc_len;) {
if (ifreq->ifr_addr.sa_family == AF_INET) {
/* Get the network interface ip */
char host[128] = { 0 };
const int error = getnameinfo(&(ifreq->ifr_addr), sizeof ifreq->ifr_addr,
host, sizeof host,
0, 0, NI_NUMERICHOST);
char host[128] = {0};
const int error = getnameinfo(&(ifreq->ifr_addr),
sizeof ifreq->ifr_addr,
host,
sizeof host,
0,
0,
NI_NUMERICHOST);
if (!error) {
std::string interfaceName = std::string(ifreq->ifr_name);
std::string interfaceIp = std::string(host);
/* Add to the map */
interfaces.insert(std::pair<std::string, std::string>(interfaceName, interfaceIp));
interfaces.insert(
std::pair<std::string, std::string>(interfaceName, interfaceIp));
} else {
perror("getnameinfo()");
}
}
/* some systems have ifr_addr.sa_len and adjust the length that
* way, but not mine. weird */
/* some systems have ifr_addr.sa_len and adjust the length that
* way, but not mine. weird */
#ifndef __linux__
len=IFNAMSIZ + ifreq->ifr_addr.sa_len;
len = IFNAMSIZ + ifreq->ifr_addr.sa_len;
#else
len=sizeof *ifreq;
len = sizeof *ifreq;
#endif
ifreq=(struct ifreq*)((char*)ifreq+len);
i+=len;
ifreq = (struct ifreq*)((char*)ifreq + len);
i += len;
}
#endif
return interfaces;
}
std::string kiwix::getBestPublicIp() {
std::string kiwix::getBestPublicIp()
{
std::map<std::string, std::string> interfaces = kiwix::getNetworkInterfaces();
#ifndef _WIN32
const char* const prioritizedNames[] =
{ "eth0", "eth1", "wlan0", "wlan1", "en0", "en1" };
const char* const prioritizedNames[]
= {"eth0", "eth1", "wlan0", "wlan1", "en0", "en1"};
const int count = (sizeof prioritizedNames) / (sizeof prioritizedNames[0]);
for (int i = 0; i < count; ++i) {
std::map<std::string, std::string>::const_iterator it =
interfaces.find(prioritizedNames[i]);
if (it != interfaces.end())
std::map<std::string, std::string>::const_iterator it
= interfaces.find(prioritizedNames[i]);
if (it != interfaces.end()) {
return it->second;
}
}
#endif
for (std::map<std::string, std::string>::iterator iter = interfaces.begin();
iter != interfaces.end(); ++iter) {
iter != interfaces.end();
++iter) {
std::string interfaceIp = iter->second;
if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "192.168")
if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "192.168") {
return interfaceIp;
}
}
for (std::map<std::string, std::string>::iterator iter = interfaces.begin();
iter != interfaces.end(); ++iter) {
iter != interfaces.end();
++iter) {
std::string interfaceIp = iter->second;
if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "172.16.")
if (interfaceIp.length() >= 7 && interfaceIp.substr(0, 7) == "172.16.") {
return interfaceIp;
}
}
for (std::map<std::string, std::string>::iterator iter = interfaces.begin();
iter != interfaces.end(); ++iter) {
iter != interfaces.end();
++iter) {
std::string interfaceIp = iter->second;
if (interfaceIp.length() >= 3 && interfaceIp.substr(0, 3) == "10.")
if (interfaceIp.length() >= 3 && interfaceIp.substr(0, 3) == "10.") {
return interfaceIp;
}
}
return "127.0.0.1";

View File

@ -19,10 +19,11 @@
#include <common/otherTools.h>
void kiwix::sleep(unsigned int milliseconds) {
void kiwix::sleep(unsigned int milliseconds)
{
#ifdef _WIN32
Sleep(milliseconds);
Sleep(milliseconds);
#else
usleep(1000 * milliseconds);
usleep(1000 * milliseconds);
#endif
}

View File

@ -20,13 +20,13 @@
#include <common/pathTools.h>
#ifdef __APPLE__
#include <mach-o/dyld.h>
#include <limits.h>
#include <mach-o/dyld.h>
#elif _WIN32
#include <direct.h>
#include <windows.h>
#include "shlwapi.h"
#include <direct.h>
#define getcwd _getcwd // stupid MSFT "deprecation" warning
#define getcwd _getcwd // stupid MSFT "deprecation" warning
#endif
#ifdef _WIN32
@ -47,7 +47,8 @@
#define PATH_MAX 1024
#endif
bool isRelativePath(const string &path) {
bool isRelativePath(const string& path)
{
#ifdef _WIN32
return path.empty() || path.substr(1, 2) == ":\\" ? false : true;
#else
@ -55,19 +56,21 @@ bool isRelativePath(const string &path) {
#endif
}
string computeRelativePath(const string path, const string absolutePath) {
string computeRelativePath(const string path, const string absolutePath)
{
std::vector<std::string> pathParts = kiwix::split(path, SEPARATOR);
std::vector<std::string> absolutePathParts = kiwix::split(absolutePath, SEPARATOR);
std::vector<std::string> absolutePathParts
= kiwix::split(absolutePath, SEPARATOR);
unsigned int commonCount = 0;
while (commonCount < pathParts.size() &&
commonCount < absolutePathParts.size() &&
pathParts[commonCount] == absolutePathParts[commonCount]) {
while (commonCount < pathParts.size()
&& commonCount < absolutePathParts.size()
&& pathParts[commonCount] == absolutePathParts[commonCount]) {
if (!pathParts[commonCount].empty()) {
commonCount++;
}
}
string relativePath;
#ifdef _WIN32
/* On Windows you have a token more because the root is represented
@ -77,10 +80,10 @@ string computeRelativePath(const string path, const string absolutePath) {
}
#endif
for (unsigned int i = commonCount ; i < pathParts.size() ; i++) {
for (unsigned int i = commonCount; i < pathParts.size(); i++) {
relativePath += "../";
}
for (unsigned int i = commonCount ; i < absolutePathParts.size() ; i++) {
for (unsigned int i = commonCount; i < absolutePathParts.size(); i++) {
relativePath += absolutePathParts[i];
relativePath += i + 1 < absolutePathParts.size() ? "/" : "";
}
@ -89,11 +92,12 @@ string computeRelativePath(const string path, const string absolutePath) {
}
/* Warning: the relative path must be with slashes */
string computeAbsolutePath(const string path, const string relativePath) {
string computeAbsolutePath(const string path, const string relativePath)
{
string absolutePath;
if (path.empty()) {
char *path=NULL;
char* path = NULL;
size_t size = 0;
#ifdef _WIN32
@ -104,15 +108,17 @@ string computeAbsolutePath(const string path, const string relativePath) {
absolutePath = string(path) + SEPARATOR;
} else {
absolutePath = path.substr(path.length() - 1, 1) == SEPARATOR ? path : path + SEPARATOR;
absolutePath = path.substr(path.length() - 1, 1) == SEPARATOR
? path
: path + SEPARATOR;
}
#if _WIN32
char *cRelativePath = _strdup(relativePath.c_str());
char* cRelativePath = _strdup(relativePath.c_str());
#else
char *cRelativePath = strdup(relativePath.c_str());
char* cRelativePath = strdup(relativePath.c_str());
#endif
char *token = strtok(cRelativePath, "/");
char* token = strtok(cRelativePath, "/");
while (token != NULL) {
if (string(token) == "..") {
@ -121,8 +127,9 @@ string computeAbsolutePath(const string path, const string relativePath) {
} else if (strcmp(token, ".") && strcmp(token, "")) {
absolutePath += string(token);
token = strtok(NULL, "/");
if (token != NULL)
absolutePath += SEPARATOR;
if (token != NULL) {
absolutePath += SEPARATOR;
}
} else {
token = strtok(NULL, "/");
}
@ -131,31 +138,38 @@ string computeAbsolutePath(const string path, const string relativePath) {
return absolutePath;
}
string removeLastPathElement(const string path, const bool removePreSeparator, const bool removePostSeparator) {
string removeLastPathElement(const string path,
const bool removePreSeparator,
const bool removePostSeparator)
{
string newPath = path;
size_t offset = newPath.find_last_of(SEPARATOR);
if (removePreSeparator &&
if (removePreSeparator &&
#ifndef _WIN32
offset != newPath.find_first_of(SEPARATOR) &&
offset != newPath.find_first_of(SEPARATOR) &&
#endif
offset == newPath.length()-1) {
offset == newPath.length() - 1) {
newPath = newPath.substr(0, offset);
offset = newPath.find_last_of(SEPARATOR);
}
newPath = removePostSeparator ? newPath.substr(0, offset) : newPath.substr(0, offset+1);
newPath = removePostSeparator ? newPath.substr(0, offset)
: newPath.substr(0, offset + 1);
return newPath;
}
string appendToDirectory(const string &directoryPath, const string &filename) {
string appendToDirectory(const string& directoryPath, const string& filename)
{
string newPath = directoryPath + SEPARATOR + filename;
return newPath;
}
string getLastPathElement(const string &path) {
string getLastPathElement(const string& path)
{
return path.substr(path.find_last_of(SEPARATOR) + 1);
}
unsigned int getFileSize(const string &path) {
unsigned int getFileSize(const string& path)
{
#ifdef _WIN32
struct _stat filestatus;
_stat(path.c_str(), &filestatus);
@ -167,12 +181,15 @@ unsigned int getFileSize(const string &path) {
return filestatus.st_size / 1024;
}
string getFileSizeAsString(const string &path) {
ostringstream convert; convert << getFileSize(path);
string getFileSizeAsString(const string& path)
{
ostringstream convert;
convert << getFileSize(path);
return convert.str();
}
bool fileExists(const string &path) {
bool fileExists(const string& path)
{
#ifdef _WIN32
return PathFileExists(path.c_str());
#else
@ -187,7 +204,8 @@ bool fileExists(const string &path) {
#endif
}
bool makeDirectory(const string &path) {
bool makeDirectory(const string& path)
{
#ifdef _WIN32
int status = _mkdir(path.c_str());
#else
@ -197,18 +215,19 @@ bool makeDirectory(const string &path) {
}
/* Try to create a link and if does not work then make a copy */
bool copyFile(const string &sourcePath, const string &destPath) {
bool copyFile(const string& sourcePath, const string& destPath)
{
try {
#ifndef _WIN32
if (link(sourcePath.c_str(), destPath.c_str()) != 0) {
#endif
std::ifstream infile(sourcePath.c_str(), std::ios_base::binary);
std::ofstream outfile(destPath.c_str(), std::ios_base::binary);
outfile << infile.rdbuf();
std::ifstream infile(sourcePath.c_str(), std::ios_base::binary);
std::ofstream outfile(destPath.c_str(), std::ios_base::binary);
outfile << infile.rdbuf();
#ifndef _WIN32
}
#endif
} catch (exception &e) {
} catch (exception& e) {
cerr << e.what() << endl;
return false;
}
@ -216,18 +235,19 @@ bool copyFile(const string &sourcePath, const string &destPath) {
return true;
}
string getExecutablePath() {
string getExecutablePath()
{
char binRootPath[PATH_MAX];
#ifdef _WIN32
GetModuleFileName( NULL, binRootPath, PATH_MAX);
GetModuleFileName(NULL, binRootPath, PATH_MAX);
return std::string(binRootPath);
#elif __APPLE__
uint32_t max = (uint32_t)PATH_MAX;
_NSGetExecutablePath(binRootPath, &max);
return std::string(binRootPath);
#else
ssize_t size = readlink("/proc/self/exe", binRootPath, PATH_MAX);
ssize_t size = readlink("/proc/self/exe", binRootPath, PATH_MAX);
if (size != -1) {
return std::string(binRootPath, size);
}
@ -236,7 +256,8 @@ string getExecutablePath() {
return "";
}
bool writeTextFile(const string &path, const string &content) {
bool writeTextFile(const string& path, const string& content)
{
std::ofstream file;
file.open(path.c_str());
file << content;
@ -244,8 +265,9 @@ bool writeTextFile(const string &path, const string &content) {
return true;
}
string getCurrentDirectory() {
char* a_cwd = getcwd(NULL,0);
string getCurrentDirectory()
{
char* a_cwd = getcwd(NULL, 0);
string s_cwd(a_cwd);
free(a_cwd);
return s_cwd;

View File

@ -21,10 +21,11 @@
std::map<std::string, RegexMatcher*> regexCache;
RegexMatcher *buildRegex(const std::string &regex) {
RegexMatcher *matcher;
RegexMatcher* buildRegex(const std::string& regex)
{
RegexMatcher* matcher;
std::map<std::string, RegexMatcher*>::iterator itr = regexCache.find(regex);
/* Regex is in cache */
if (itr != regexCache.end()) {
matcher = itr->second;
@ -42,22 +43,26 @@ RegexMatcher *buildRegex(const std::string &regex) {
}
/* todo */
void freeRegexCache() {
void freeRegexCache()
{
}
bool matchRegex(const std::string &content, const std::string &regex) {
bool matchRegex(const std::string& content, const std::string& regex)
{
ucnv_setDefaultName("UTF-8");
UnicodeString ucontent = UnicodeString(content.c_str());
RegexMatcher *matcher = buildRegex(regex);
RegexMatcher* matcher = buildRegex(regex);
matcher->reset(ucontent);
return matcher->find();
}
std::string replaceRegex(const std::string &content, const std::string &replacement, const std::string &regex) {
std::string replaceRegex(const std::string& content,
const std::string& replacement,
const std::string& regex)
{
ucnv_setDefaultName("UTF-8");
UnicodeString ucontent = UnicodeString(content.c_str());
UnicodeString ureplacement = UnicodeString(replacement.c_str());
RegexMatcher *matcher = buildRegex(regex);
RegexMatcher* matcher = buildRegex(regex);
matcher->reset(ucontent);
UErrorCode status = U_ZERO_ERROR;
UnicodeString uresult = matcher->replaceAll(ureplacement, status);
@ -66,16 +71,19 @@ std::string replaceRegex(const std::string &content, const std::string &replacem
return tmp;
}
std::string appendToFirstOccurence(const std::string &content, const std::string regex, const std::string &replacement) {
std::string appendToFirstOccurence(const std::string& content,
const std::string regex,
const std::string& replacement)
{
ucnv_setDefaultName("UTF-8");
UnicodeString ucontent = UnicodeString(content.c_str());
UnicodeString ureplacement = UnicodeString(replacement.c_str());
RegexMatcher *matcher = buildRegex(regex);
RegexMatcher* matcher = buildRegex(regex);
matcher->reset(ucontent);
if (matcher->find()) {
UErrorCode status = U_ZERO_ERROR;
ucontent.insert(matcher->end(status), ureplacement);
ucontent.insert(matcher->end(status), ureplacement);
std::string tmp;
ucontent.toUTF8String(tmp);
return tmp;
@ -83,4 +91,3 @@ std::string appendToFirstOccurence(const std::string &content, const std::strin
return content;
}

View File

@ -19,32 +19,36 @@
#include <common/stringTools.h>
#include <unicode/translit.h>
#include <unicode/normlzr.h>
#include <unicode/ustring.h>
#include <unicode/rep.h>
#include <unicode/uniset.h>
#include <unicode/translit.h>
#include <unicode/ucnv.h>
#include <unicode/uniset.h>
#include <unicode/ustring.h>
/* tell ICU where to find its dat file (tables) */
void kiwix::loadICUExternalTables() {
void kiwix::loadICUExternalTables()
{
#ifdef __APPLE__
std::string executablePath = getExecutablePath();
std::string executableDirectory = removeLastPathElement(executablePath);
std::string datPath = computeAbsolutePath(executableDirectory, "icudt49l.dat");
try {
u_setDataDirectory(datPath.c_str());
} catch (exception &e) {
std::cerr << e.what() << std::endl;
}
std::string executablePath = getExecutablePath();
std::string executableDirectory = removeLastPathElement(executablePath);
std::string datPath
= computeAbsolutePath(executableDirectory, "icudt49l.dat");
try {
u_setDataDirectory(datPath.c_str());
} catch (exception& e) {
std::cerr << e.what() << std::endl;
}
#endif
}
std::string kiwix::removeAccents(const std::string &text) {
std::string kiwix::removeAccents(const std::string& text)
{
loadICUExternalTables();
ucnv_setDefaultName("UTF-8");
UErrorCode status = U_ZERO_ERROR;
Transliterator *removeAccentsTrans = Transliterator::createInstance("Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status);
Transliterator* removeAccentsTrans = Transliterator::createInstance(
"Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status);
UnicodeString ustring = UnicodeString(text.c_str());
removeAccentsTrans->transliterate(ustring);
delete removeAccentsTrans;
@ -56,7 +60,8 @@ std::string kiwix::removeAccents(const std::string &text) {
#ifndef __ANDROID__
/* Prepare integer for display */
std::string kiwix::beautifyInteger(const unsigned int number) {
std::string kiwix::beautifyInteger(const unsigned int number)
{
std::stringstream numberStream;
numberStream << number;
std::string numberString = numberStream.str();
@ -70,49 +75,58 @@ std::string kiwix::beautifyInteger(const unsigned int number) {
return numberString;
}
std::string kiwix::beautifyFileSize(const unsigned int number) {
if (number > 1024*1024) {
return kiwix::beautifyInteger(number/(1024*1024)) + " GB";
std::string kiwix::beautifyFileSize(const unsigned int number)
{
if (number > 1024 * 1024) {
return kiwix::beautifyInteger(number / (1024 * 1024)) + " GB";
} else {
return kiwix::beautifyInteger(number/1024 !=
0 ? number/1024 : 1) + " MB";
return kiwix::beautifyInteger(number / 1024 != 0 ? number / 1024 : 1)
+ " MB";
}
}
void kiwix::printStringInHexadecimal(UnicodeString s) {
void kiwix::printStringInHexadecimal(UnicodeString s)
{
std::cout << std::showbase << std::hex;
for (int i=0; i<s.length(); i++) {
for (int i = 0; i < s.length(); i++) {
char c = (char)((s.getTerminatedBuffer())[i]);
if (c & 0x80)
if (c & 0x80) {
std::cout << (c & 0xffff) << " ";
else
} else {
std::cout << c << " ";
}
}
std::cout << std::endl;
}
void kiwix::printStringInHexadecimal(const char *s) {
void kiwix::printStringInHexadecimal(const char* s)
{
std::cout << std::showbase << std::hex;
for (char const* pc = s; *pc; ++pc) {
if (*pc & 0x80)
if (*pc & 0x80) {
std::cout << (*pc & 0xffff);
else
} else {
std::cout << *pc;
}
std::cout << ' ';
}
std::cout << std::endl;
}
void kiwix::stringReplacement(std::string& str, const std::string& oldStr, const std::string& newStr) {
void kiwix::stringReplacement(std::string& str,
const std::string& oldStr,
const std::string& newStr)
{
size_t pos = 0;
while((pos = str.find(oldStr, pos)) != std::string::npos) {
while ((pos = str.find(oldStr, pos)) != std::string::npos) {
str.replace(pos, oldStr.length(), newStr);
pos += newStr.length();
}
}
/* Encode string to avoid XSS attacks */
std::string kiwix::encodeDiples(const std::string& str) {
std::string kiwix::encodeDiples(const std::string& str)
{
std::string result = str;
kiwix::stringReplacement(result, "<", "&lt;");
kiwix::stringReplacement(result, ">", "&gt;");
@ -120,58 +134,68 @@ std::string kiwix::encodeDiples(const std::string& str) {
}
// Urlencode
//based on javascript encodeURIComponent()
// based on javascript encodeURIComponent()
std::string char2hex(char dec) {
char dig1 = (dec&0xF0)>>4;
char dig2 = (dec&0x0F);
if ( 0<= dig1 && dig1<= 9) dig1+=48; //0,48inascii
if (10<= dig1 && dig1<=15) dig1+=97-10; //a,97inascii
if ( 0<= dig2 && dig2<= 9) dig2+=48;
if (10<= dig2 && dig2<=15) dig2+=97-10;
std::string char2hex(char dec)
{
char dig1 = (dec & 0xF0) >> 4;
char dig2 = (dec & 0x0F);
if (0 <= dig1 && dig1 <= 9) {
dig1 += 48; // 0,48inascii
}
if (10 <= dig1 && dig1 <= 15) {
dig1 += 97 - 10; // a,97inascii
}
if (0 <= dig2 && dig2 <= 9) {
dig2 += 48;
}
if (10 <= dig2 && dig2 <= 15) {
dig2 += 97 - 10;
}
std::string r;
r.append( &dig1, 1);
r.append( &dig2, 1);
r.append(&dig1, 1);
r.append(&dig2, 1);
return r;
}
std::string kiwix::urlEncode(const std::string &c) {
std::string escaped="";
std::string kiwix::urlEncode(const std::string& c)
{
std::string escaped = "";
int max = c.length();
for(int i=0; i<max; i++)
{
if ( (48 <= c[i] && c[i] <= 57) ||//0-9
(65 <= c[i] && c[i] <= 90) ||//abc...xyz
(97 <= c[i] && c[i] <= 122) || //ABC...XYZ
(c[i]=='~' || c[i]=='!' || c[i]=='*' || c[i]=='(' || c[i]==')' || c[i]=='\'')
)
{
escaped.append( &c[i], 1);
}
else
{
escaped.append("%");
escaped.append( char2hex(c[i]) );//converts char 255 to string "ff"
}
for (int i = 0; i < max; i++) {
if ((48 <= c[i] && c[i] <= 57) || // 0-9
(65 <= c[i] && c[i] <= 90)
|| // abc...xyz
(97 <= c[i] && c[i] <= 122)
|| // ABC...XYZ
(c[i] == '~' || c[i] == '!' || c[i] == '*' || c[i] == '(' || c[i] == ')'
|| c[i] == '\'')) {
escaped.append(&c[i], 1);
} else {
escaped.append("%");
escaped.append(char2hex(c[i])); // converts char 255 to string "ff"
}
}
return escaped;
}
#endif
static char charFromHex(std::string a) {
static char charFromHex(std::string a)
{
std::istringstream Blat(a);
int Z;
Blat >> std::hex >> Z;
return char (Z);
return char(Z);
}
std::string kiwix::urlDecode(const std::string &originalUrl) {
std::string kiwix::urlDecode(const std::string& originalUrl)
{
std::string url = originalUrl;
std::string::size_type pos = 0;
while ((pos = url.find('%', pos)) != std::string::npos &&
pos + 2 < url.length()) {
while ((pos = url.find('%', pos)) != std::string::npos
&& pos + 2 < url.length()) {
url.replace(pos, 3, 1, charFromHex(url.substr(pos + 1, 2)));
++pos;
}
@ -179,39 +203,43 @@ std::string kiwix::urlDecode(const std::string &originalUrl) {
}
/* Split string in a token array */
std::vector<std::string> kiwix::split(const std::string & str,
const std::string & delims=" *-")
std::vector<std::string> kiwix::split(const std::string& str,
const std::string& delims = " *-")
{
std::string::size_type lastPos = str.find_first_not_of(delims, 0);
std::string::size_type pos = str.find_first_of(delims, lastPos);
std::vector<std::string> tokens;
while (std::string::npos != pos || std::string::npos != lastPos)
{
tokens.push_back(str.substr(lastPos, pos - lastPos));
lastPos = str.find_first_not_of(delims, pos);
pos = str.find_first_of(delims, lastPos);
}
while (std::string::npos != pos || std::string::npos != lastPos) {
tokens.push_back(str.substr(lastPos, pos - lastPos));
lastPos = str.find_first_not_of(delims, pos);
pos = str.find_first_of(delims, lastPos);
}
return tokens;
}
std::vector<std::string> kiwix::split(const char* lhs, const char* rhs){
const std::string m1 (lhs), m2 (rhs);
std::vector<std::string> kiwix::split(const char* lhs, const char* rhs)
{
const std::string m1(lhs), m2(rhs);
return split(m1, m2);
}
std::vector<std::string> kiwix::split(const char* lhs, const std::string& rhs){
std::vector<std::string> kiwix::split(const char* lhs, const std::string& rhs)
{
return split(lhs, rhs.c_str());
}
std::vector<std::string> kiwix::split(const std::string& lhs, const char* rhs){
std::vector<std::string> kiwix::split(const std::string& lhs, const char* rhs)
{
return split(lhs.c_str(), rhs);
}
std::string kiwix::ucFirst (const std::string &word) {
if (word.empty())
std::string kiwix::ucFirst(const std::string& word)
{
if (word.empty()) {
return "";
}
std::string result;
@ -223,9 +251,11 @@ std::string kiwix::ucFirst (const std::string &word) {
return result;
}
std::string kiwix::ucAll (const std::string &word) {
if (word.empty())
std::string kiwix::ucAll(const std::string& word)
{
if (word.empty()) {
return "";
}
std::string result;
@ -235,9 +265,11 @@ std::string kiwix::ucAll (const std::string &word) {
return result;
}
std::string kiwix::lcFirst (const std::string &word) {
if (word.empty())
std::string kiwix::lcFirst(const std::string& word)
{
if (word.empty()) {
return "";
}
std::string result;
@ -249,9 +281,11 @@ std::string kiwix::lcFirst (const std::string &word) {
return result;
}
std::string kiwix::lcAll (const std::string &word) {
if (word.empty())
std::string kiwix::lcAll(const std::string& word)
{
if (word.empty()) {
return "";
}
std::string result;
@ -261,9 +295,11 @@ std::string kiwix::lcAll (const std::string &word) {
return result;
}
std::string kiwix::toTitle (const std::string &word) {
if (word.empty())
std::string kiwix::toTitle(const std::string& word)
{
if (word.empty()) {
return "";
}
std::string result;
@ -274,6 +310,7 @@ std::string kiwix::toTitle (const std::string &word) {
return result;
}
std::string kiwix::normalize (const std::string &word) {
std::string kiwix::normalize(const std::string& word)
{
return kiwix::lcAll(word);
}

View File

@ -19,125 +19,136 @@
#include "library.h"
namespace kiwix {
namespace kiwix
{
/* Constructor */
Book::Book() : readOnly(false)
{
}
/* Destructor */
Book::~Book()
{
}
/* Sort functions */
bool Book::sortByLastOpen(const kiwix::Book& a, const kiwix::Book& b)
{
return atoi(a.last.c_str()) > atoi(b.last.c_str());
}
/* Constructor */
Book::Book():
readOnly(false) {
}
/* Destructor */
Book::~Book() {
}
bool Book::sortByTitle(const kiwix::Book& a, const kiwix::Book& b)
{
return strcmp(a.title.c_str(), b.title.c_str()) < 0;
}
/* Sort functions */
bool Book::sortByLastOpen(const kiwix::Book &a, const kiwix::Book &b) {
return atoi(a.last.c_str()) > atoi(b.last.c_str());
}
bool Book::sortByDate(const kiwix::Book& a, const kiwix::Book& b)
{
return strcmp(a.date.c_str(), b.date.c_str()) > 0;
}
bool Book::sortByTitle(const kiwix::Book &a, const kiwix::Book &b) {
return strcmp(a.title.c_str(), b.title.c_str()) < 0;
}
bool Book::sortBySize(const kiwix::Book& a, const kiwix::Book& b)
{
return atoi(a.size.c_str()) < atoi(b.size.c_str());
}
bool Book::sortByDate(const kiwix::Book &a, const kiwix::Book &b) {
return strcmp(a.date.c_str(), b.date.c_str()) > 0;
}
bool Book::sortByPublisher(const kiwix::Book& a, const kiwix::Book& b)
{
return strcmp(a.publisher.c_str(), b.publisher.c_str()) < 0;
}
bool Book::sortBySize(const kiwix::Book &a, const kiwix::Book &b) {
return atoi(a.size.c_str()) < atoi(b.size.c_str());
}
bool Book::sortByCreator(const kiwix::Book& a, const kiwix::Book& b)
{
return strcmp(a.creator.c_str(), b.creator.c_str()) < 0;
}
bool Book::sortByPublisher(const kiwix::Book &a, const kiwix::Book &b) {
return strcmp(a.publisher.c_str(), b.publisher.c_str()) < 0;
}
bool Book::sortByLanguage(const kiwix::Book& a, const kiwix::Book& b)
{
return strcmp(a.language.c_str(), b.language.c_str()) < 0;
}
bool Book::sortByCreator(const kiwix::Book &a, const kiwix::Book &b) {
return strcmp(a.creator.c_str(), b.creator.c_str()) < 0;
}
bool Book::sortByLanguage(const kiwix::Book &a, const kiwix::Book &b) {
return strcmp(a.language.c_str(), b.language.c_str()) < 0;
}
std::string Book::getHumanReadableIdFromPath() {
std::string id = pathAbsolute;
if (!id.empty()) {
kiwix::removeAccents(id);
std::string Book::getHumanReadableIdFromPath()
{
std::string id = pathAbsolute;
if (!id.empty()) {
kiwix::removeAccents(id);
#ifdef _WIN32
id = replaceRegex(id, "", "^.*\\\\");
id = replaceRegex(id, "", "^.*\\\\");
#else
id = replaceRegex(id, "", "^.*/");
id = replaceRegex(id, "", "^.*/");
#endif
id = replaceRegex(id, "", "\\.zim[a-z]*$");
id = replaceRegex(id, "_", " ");
id = replaceRegex(id, "plus", "\\+");
}
return id;
id = replaceRegex(id, "", "\\.zim[a-z]*$");
id = replaceRegex(id, "_", " ");
id = replaceRegex(id, "plus", "\\+");
}
/* Constructor */
Library::Library():
version(KIWIX_LIBRARY_VERSION) {
}
/* Destructor */
Library::~Library() {
}
bool Library::addBook(const Book &book) {
/* Try to find it */
std::vector<kiwix::Book>::iterator itr;
for ( itr = this->books.begin(); itr != this->books.end(); ++itr ) {
if (itr->id == book.id) {
if (!itr->readOnly) {
itr->readOnly = book.readOnly;
if (itr->path.empty())
itr->path = book.path;
if (itr->pathAbsolute.empty())
itr->pathAbsolute = book.pathAbsolute;
if (itr->url.empty())
itr->url = book.url;
if (itr->tags.empty())
itr->tags = book.tags;
if (itr->name.empty())
itr->name = book.name;
if (itr->indexPath.empty()) {
itr->indexPath = book.indexPath;
itr->indexType = book.indexType;
}
if (itr->indexPathAbsolute.empty()) {
itr->indexPathAbsolute = book.indexPathAbsolute;
itr->indexType = book.indexType;
}
if (itr->faviconMimeType.empty()) {
itr->favicon = book.favicon;
itr->faviconMimeType = book.faviconMimeType;
}
}
return false;
}
}
/* otherwise */
this->books.push_back(book);
return true;
}
bool Library::removeBookByIndex(const unsigned int bookIndex) {
books.erase(books.begin()+bookIndex);
return true;
}
return id;
}
/* Constructor */
Library::Library() : version(KIWIX_LIBRARY_VERSION)
{
}
/* Destructor */
Library::~Library()
{
}
bool Library::addBook(const Book& book)
{
/* Try to find it */
std::vector<kiwix::Book>::iterator itr;
for (itr = this->books.begin(); itr != this->books.end(); ++itr) {
if (itr->id == book.id) {
if (!itr->readOnly) {
itr->readOnly = book.readOnly;
if (itr->path.empty()) {
itr->path = book.path;
}
if (itr->pathAbsolute.empty()) {
itr->pathAbsolute = book.pathAbsolute;
}
if (itr->url.empty()) {
itr->url = book.url;
}
if (itr->tags.empty()) {
itr->tags = book.tags;
}
if (itr->name.empty()) {
itr->name = book.name;
}
if (itr->indexPath.empty()) {
itr->indexPath = book.indexPath;
itr->indexType = book.indexType;
}
if (itr->indexPathAbsolute.empty()) {
itr->indexPathAbsolute = book.indexPathAbsolute;
itr->indexType = book.indexType;
}
if (itr->faviconMimeType.empty()) {
itr->favicon = book.favicon;
itr->faviconMimeType = book.faviconMimeType;
}
}
return false;
}
}
/* otherwise */
this->books.push_back(book);
return true;
}
bool Library::removeBookByIndex(const unsigned int bookIndex)
{
books.erase(books.begin() + bookIndex);
return true;
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -18,9 +18,9 @@
*/
#include "searcher.h"
#include "xapianSearcher.h"
#include "reader.h"
#include "kiwixlib-resources.h"
#include "reader.h"
#include "xapianSearcher.h"
#include <zim/search.h>
@ -33,268 +33,287 @@
using namespace CTPP;
#endif
namespace kiwix
{
class _Result : public Result
{
public:
_Result(Searcher* searcher, zim::Search::iterator& iterator);
virtual ~_Result(){};
namespace kiwix {
virtual std::string get_url();
virtual std::string get_title();
virtual int get_score();
virtual std::string get_snippet();
virtual int get_wordCount();
virtual int get_size();
class _Result : public Result {
public:
_Result(Searcher* searcher, zim::Search::iterator& iterator);
virtual ~_Result() {};
private:
Searcher* searcher;
zim::Search::iterator iterator;
};
virtual std::string get_url();
virtual std::string get_title();
virtual int get_score();
virtual std::string get_snippet();
virtual int get_wordCount();
virtual int get_size();
struct SearcherInternal {
const zim::Search* _search;
XapianSearcher* _xapianSearcher;
zim::Search::iterator current_iterator;
private:
Searcher* searcher;
zim::Search::iterator iterator;
};
struct SearcherInternal {
const zim::Search *_search;
XapianSearcher *_xapianSearcher;
zim::Search::iterator current_iterator;
SearcherInternal() :
_search(NULL),
_xapianSearcher(NULL)
{}
~SearcherInternal() {
if ( _search != NULL )
delete _search;
if ( _xapianSearcher != NULL )
delete _xapianSearcher;
}
};
/* Constructor */
Searcher::Searcher(const string &xapianDirectoryPath, Reader* reader) :
reader(reader),
internal(new SearcherInternal()),
searchPattern(""),
protocolPrefix("zim://"),
searchProtocolPrefix("search://?"),
resultCountPerPage(0),
estimatedResultCount(0),
resultStart(0),
resultEnd(0)
SearcherInternal() : _search(NULL), _xapianSearcher(NULL) {}
~SearcherInternal()
{
template_ct2 = RESOURCE::results_ct2;
loadICUExternalTables();
if ( !reader || !reader->hasFulltextIndex() ) {
internal->_xapianSearcher = new XapianSearcher(xapianDirectoryPath, reader);
if (_search != NULL) {
delete _search;
}
if (_xapianSearcher != NULL) {
delete _xapianSearcher;
}
}
/* Destructor */
Searcher::~Searcher() {
delete internal;
};
/* Constructor */
Searcher::Searcher(const string& xapianDirectoryPath, Reader* reader)
: reader(reader),
internal(new SearcherInternal()),
searchPattern(""),
protocolPrefix("zim://"),
searchProtocolPrefix("search://?"),
resultCountPerPage(0),
estimatedResultCount(0),
resultStart(0),
resultEnd(0)
{
template_ct2 = RESOURCE::results_ct2;
loadICUExternalTables();
if (!reader || !reader->hasFulltextIndex()) {
internal->_xapianSearcher = new XapianSearcher(xapianDirectoryPath, reader);
}
/* Search strings in the database */
void Searcher::search(std::string &search, unsigned int resultStart,
unsigned int resultEnd, const bool verbose) {
this->reset();
}
if (verbose == true) {
cout << "Performing query `" << search << "'" << endl;
}
/* Destructor */
Searcher::~Searcher()
{
delete internal;
}
/* Search strings in the database */
void Searcher::search(std::string& search,
unsigned int resultStart,
unsigned int resultEnd,
const bool verbose)
{
this->reset();
/* If resultEnd & resultStart inverted */
if (resultStart > resultEnd) {
resultEnd += resultStart;
resultStart = resultEnd - resultStart;
resultEnd -= resultStart;
}
/* Try to find results */
if (resultStart != resultEnd) {
/* Avoid big researches */
this->resultCountPerPage = resultEnd - resultStart;
if (this->resultCountPerPage > 70) {
resultEnd = resultStart + 70;
this->resultCountPerPage = 70;
}
/* Perform the search */
this->searchPattern = search;
this->resultStart = resultStart;
this->resultEnd = resultEnd;
string unaccentedSearch = removeAccents(search);
if ( internal->_xapianSearcher ) {
internal->_xapianSearcher->searchInIndex(unaccentedSearch, resultStart, resultEnd, verbose);
this->estimatedResultCount = internal->_xapianSearcher->results.get_matches_estimated();
} else {
internal->_search = this->reader->getZimFileHandler()->search(unaccentedSearch, resultStart, resultEnd);
internal->current_iterator = internal->_search->begin();
this->estimatedResultCount = internal->_search->get_matches_estimated();
}
}
return;
if (verbose == true) {
cout << "Performing query `" << search << "'" << endl;
}
void Searcher::restart_search() {
if ( internal->_xapianSearcher ) {
internal->_xapianSearcher->restart_search();
/* If resultEnd & resultStart inverted */
if (resultStart > resultEnd) {
resultEnd += resultStart;
resultStart = resultEnd - resultStart;
resultEnd -= resultStart;
}
/* Try to find results */
if (resultStart != resultEnd) {
/* Avoid big researches */
this->resultCountPerPage = resultEnd - resultStart;
if (this->resultCountPerPage > 70) {
resultEnd = resultStart + 70;
this->resultCountPerPage = 70;
}
/* Perform the search */
this->searchPattern = search;
this->resultStart = resultStart;
this->resultEnd = resultEnd;
string unaccentedSearch = removeAccents(search);
if (internal->_xapianSearcher) {
internal->_xapianSearcher->searchInIndex(
unaccentedSearch, resultStart, resultEnd, verbose);
this->estimatedResultCount
= internal->_xapianSearcher->results.get_matches_estimated();
} else {
internal->_search = this->reader->getZimFileHandler()->search(
unaccentedSearch, resultStart, resultEnd);
internal->current_iterator = internal->_search->begin();
this->estimatedResultCount = internal->_search->get_matches_estimated();
}
}
Result* Searcher::getNextResult() {
if ( internal->_xapianSearcher ) {
return internal->_xapianSearcher->getNextResult();
} else if (internal->current_iterator != internal->_search->end()) {
Result* result = new _Result(this, internal->current_iterator);
internal->current_iterator++;
return result;
}
return NULL;
return;
}
void Searcher::restart_search()
{
if (internal->_xapianSearcher) {
internal->_xapianSearcher->restart_search();
} else {
internal->current_iterator = internal->_search->begin();
}
}
/* Reset the results */
void Searcher::reset() {
this->estimatedResultCount = 0;
this->searchPattern = "";
return;
Result* Searcher::getNextResult()
{
if (internal->_xapianSearcher) {
return internal->_xapianSearcher->getNextResult();
} else if (internal->current_iterator != internal->_search->end()) {
Result* result = new _Result(this, internal->current_iterator);
internal->current_iterator++;
return result;
}
return NULL;
}
/* Return the result count estimation */
unsigned int Searcher::getEstimatedResultCount() {
return this->estimatedResultCount;
}
/* Reset the results */
void Searcher::reset()
{
this->estimatedResultCount = 0;
this->searchPattern = "";
return;
}
bool Searcher::setProtocolPrefix(const std::string prefix) {
this->protocolPrefix = prefix;
return true;
}
/* Return the result count estimation */
unsigned int Searcher::getEstimatedResultCount()
{
return this->estimatedResultCount;
}
bool Searcher::setSearchProtocolPrefix(const std::string prefix) {
this->searchProtocolPrefix = prefix;
return true;
}
bool Searcher::setProtocolPrefix(const std::string prefix)
{
this->protocolPrefix = prefix;
return true;
}
void Searcher::setContentHumanReadableId(const string &contentHumanReadableId) {
this->contentHumanReadableId = contentHumanReadableId;
}
bool Searcher::setSearchProtocolPrefix(const std::string prefix)
{
this->searchProtocolPrefix = prefix;
return true;
}
_Result::_Result(Searcher* searcher, zim::Search::iterator& iterator):
searcher(searcher),
iterator(iterator)
{
}
void Searcher::setContentHumanReadableId(const string& contentHumanReadableId)
{
this->contentHumanReadableId = contentHumanReadableId;
}
std::string _Result::get_url() {
return iterator.get_url();
}
std::string _Result::get_title() {
return iterator.get_title();
}
int _Result::get_score() {
return iterator.get_score();
}
std::string _Result::get_snippet() {
return iterator.get_snippet();
}
int _Result::get_size() {
return iterator.get_size();
}
int _Result::get_wordCount() {
return iterator.get_wordCount();
}
_Result::_Result(Searcher* searcher, zim::Search::iterator& iterator)
: searcher(searcher), iterator(iterator)
{
}
std::string _Result::get_url()
{
return iterator.get_url();
}
std::string _Result::get_title()
{
return iterator.get_title();
}
int _Result::get_score()
{
return iterator.get_score();
}
std::string _Result::get_snippet()
{
return iterator.get_snippet();
}
int _Result::get_size()
{
return iterator.get_size();
}
int _Result::get_wordCount()
{
return iterator.get_wordCount();
}
#ifdef ENABLE_CTPP2
string Searcher::getHtml() {
SimpleVM oSimpleVM;
string Searcher::getHtml()
{
SimpleVM oSimpleVM;
// Fill data
CDT oData;
CDT resultsCDT(CDT::ARRAY_VAL);
// Fill data
CDT oData;
CDT resultsCDT(CDT::ARRAY_VAL);
this->restart_search();
Result * p_result = NULL;
while ( (p_result = this->getNextResult()) ) {
CDT result;
result["title"] = p_result->get_title();
result["url"] = p_result->get_url();
result["snippet"] = p_result->get_snippet();
this->restart_search();
Result* p_result = NULL;
while ((p_result = this->getNextResult())) {
CDT result;
result["title"] = p_result->get_title();
result["url"] = p_result->get_url();
result["snippet"] = p_result->get_snippet();
if (p_result->get_size() >= 0)
result["size"] = kiwix::beautifyInteger(p_result->get_size());
if (p_result->get_wordCount() >= 0)
result["wordCount"] = kiwix::beautifyInteger(p_result->get_wordCount());
resultsCDT.PushBack(result);
delete p_result;
if (p_result->get_size() >= 0) {
result["size"] = kiwix::beautifyInteger(p_result->get_size());
}
this->restart_search();
oData["results"] = resultsCDT;
// pages
CDT pagesCDT(CDT::ARRAY_VAL);
unsigned int pageStart = this->resultStart / this->resultCountPerPage >= 5 ? this->resultStart / this->resultCountPerPage - 4 : 0;
unsigned int pageCount = this->estimatedResultCount / this->resultCountPerPage + 1 - pageStart;
if (pageCount > 10)
pageCount = 10;
else if (pageCount == 1)
pageCount = 0;
for (unsigned int i=pageStart; i<pageStart+pageCount; i++) {
CDT page;
page["label"] = i + 1;
page["start"] = i * this->resultCountPerPage;
page["end"] = (i+1) * this->resultCountPerPage;
if (i * this->resultCountPerPage == this->resultStart)
page["selected"] = true;
pagesCDT.PushBack(page);
if (p_result->get_wordCount() >= 0) {
result["wordCount"] = kiwix::beautifyInteger(p_result->get_wordCount());
}
oData["pages"] = pagesCDT;
oData["count"] = kiwix::beautifyInteger(this->estimatedResultCount);
oData["searchPattern"] = kiwix::encodeDiples(this->searchPattern);
oData["searchPatternEncoded"] = urlEncode(this->searchPattern);
oData["resultStart"] = this->resultStart + 1;
oData["resultEnd"] = (this->resultEnd > this->estimatedResultCount ? this->estimatedResultCount : this->resultEnd);
oData["resultRange"] = this->resultCountPerPage;
oData["resultLastPageStart"] = this->estimatedResultCount > this->resultCountPerPage ? this->estimatedResultCount - this->resultCountPerPage : 0;
oData["protocolPrefix"] = this->protocolPrefix;
oData["searchProtocolPrefix"] = this->searchProtocolPrefix;
oData["contentId"] = this->contentHumanReadableId;
VMStringLoader oLoader(template_ct2.c_str(), template_ct2.size());
FileLogger oLogger(stderr);
// DEBUG only (write output to stdout)
// oSimpleVM.Run(oData, oLoader, stdout, oLogger);
std::string sResult;
oSimpleVM.Run(oData, oLoader, sResult, oLogger);
return sResult;
resultsCDT.PushBack(result);
delete p_result;
}
this->restart_search();
oData["results"] = resultsCDT;
// pages
CDT pagesCDT(CDT::ARRAY_VAL);
unsigned int pageStart
= this->resultStart / this->resultCountPerPage >= 5
? this->resultStart / this->resultCountPerPage - 4
: 0;
unsigned int pageCount
= this->estimatedResultCount / this->resultCountPerPage + 1 - pageStart;
if (pageCount > 10) {
pageCount = 10;
} else if (pageCount == 1) {
pageCount = 0;
}
for (unsigned int i = pageStart; i < pageStart + pageCount; i++) {
CDT page;
page["label"] = i + 1;
page["start"] = i * this->resultCountPerPage;
page["end"] = (i + 1) * this->resultCountPerPage;
if (i * this->resultCountPerPage == this->resultStart) {
page["selected"] = true;
}
pagesCDT.PushBack(page);
}
oData["pages"] = pagesCDT;
oData["count"] = kiwix::beautifyInteger(this->estimatedResultCount);
oData["searchPattern"] = kiwix::encodeDiples(this->searchPattern);
oData["searchPatternEncoded"] = urlEncode(this->searchPattern);
oData["resultStart"] = this->resultStart + 1;
oData["resultEnd"] = (this->resultEnd > this->estimatedResultCount
? this->estimatedResultCount
: this->resultEnd);
oData["resultRange"] = this->resultCountPerPage;
oData["resultLastPageStart"]
= this->estimatedResultCount > this->resultCountPerPage
? this->estimatedResultCount - this->resultCountPerPage
: 0;
oData["protocolPrefix"] = this->protocolPrefix;
oData["searchProtocolPrefix"] = this->searchProtocolPrefix;
oData["contentId"] = this->contentHumanReadableId;
VMStringLoader oLoader(template_ct2.c_str(), template_ct2.size());
FileLogger oLogger(stderr);
// DEBUG only (write output to stdout)
// oSimpleVM.Run(oData, oLoader, stdout, oLogger);
std::string sResult;
oSimpleVM.Run(oData, oLoader, sResult, oLogger);
return sResult;
}
#endif
}

View File

@ -18,196 +18,204 @@
*/
#include "xapianSearcher.h"
#include "xapian/myhtmlparse.h"
#include <zim/zim.h>
#include <zim/file.h>
#include <sys/types.h>
#include <unicode/locid.h>
#include <unistd.h>
#include <zim/article.h>
#include <zim/error.h>
#include <sys/types.h>
#include <unistd.h>
#include <unicode/locid.h>
#include <zim/file.h>
#include <zim/zim.h>
#include "xapian/myhtmlparse.h"
#include <vector>
namespace kiwix {
std::map<std::string, int> read_valuesmap(const std::string &s) {
std::map<std::string, int> result;
std::vector<std::string> elems = split(s, ";");
for(std::vector<std::string>::iterator elem = elems.begin();
elem != elems.end();
elem++)
{
std::vector<std::string> tmp_elems = split(*elem, ":");
result.insert( std::pair<std::string, int>(tmp_elems[0], atoi(tmp_elems[1].c_str())) );
}
return result;
namespace kiwix
{
std::map<std::string, int> read_valuesmap(const std::string& s)
{
std::map<std::string, int> result;
std::vector<std::string> elems = split(s, ";");
for (std::vector<std::string>::iterator elem = elems.begin();
elem != elems.end();
elem++) {
std::vector<std::string> tmp_elems = split(*elem, ":");
result.insert(
std::pair<std::string, int>(tmp_elems[0], atoi(tmp_elems[1].c_str())));
}
return result;
}
/* Constructor */
XapianSearcher::XapianSearcher(const string &xapianDirectoryPath, Reader* reader)
/* Constructor */
XapianSearcher::XapianSearcher(const string& xapianDirectoryPath,
Reader* reader)
: reader(reader)
{
this->openIndex(xapianDirectoryPath);
}
{
this->openIndex(xapianDirectoryPath);
}
/* Open Xapian readable database */
void XapianSearcher::openIndex(const string &directoryPath) {
this->readableDatabase = Xapian::Database(directoryPath);
this->valuesmap = read_valuesmap(this->readableDatabase.get_metadata("valuesmap"));
this->language = this->readableDatabase.get_metadata("language");
this->stopwords = this->readableDatabase.get_metadata("stopwords");
setup_queryParser();
}
/* Close Xapian writable database */
void XapianSearcher::closeIndex() {
return;
}
/* Open Xapian readable database */
void XapianSearcher::openIndex(const string& directoryPath)
{
this->readableDatabase = Xapian::Database(directoryPath);
this->valuesmap
= read_valuesmap(this->readableDatabase.get_metadata("valuesmap"));
this->language = this->readableDatabase.get_metadata("language");
this->stopwords = this->readableDatabase.get_metadata("stopwords");
setup_queryParser();
}
void XapianSearcher::setup_queryParser()
{
queryParser.set_database(readableDatabase);
if ( ! language.empty() )
{
/* Build ICU Local object to retrieve ISO-639 language code (from
ISO-639-3) */
icu::Locale languageLocale(language.c_str());
/* Close Xapian writable database */
void XapianSearcher::closeIndex()
{
return;
}
void XapianSearcher::setup_queryParser()
{
queryParser.set_database(readableDatabase);
if (!language.empty()) {
/* Build ICU Local object to retrieve ISO-639 language code (from
ISO-639-3) */
icu::Locale languageLocale(language.c_str());
/* Configuring language base steemming */
try {
stemmer = Xapian::Stem(languageLocale.getLanguage());
queryParser.set_stemmer(stemmer);
queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_ALL);
} catch (...) {
std::cout << "No steemming for language '" << languageLocale.getLanguage() << "'" << std::endl;
}
}
if ( ! stopwords.empty() )
{
std::string stopWord;
std::istringstream file(this->stopwords);
while (std::getline(file, stopWord, '\n')) {
this->stopper.add(stopWord);
}
queryParser.set_stopper(&(this->stopper));
/* Configuring language base steemming */
try {
stemmer = Xapian::Stem(languageLocale.getLanguage());
queryParser.set_stemmer(stemmer);
queryParser.set_stemming_strategy(Xapian::QueryParser::STEM_ALL);
} catch (...) {
std::cout << "No steemming for language '" << languageLocale.getLanguage()
<< "'" << std::endl;
}
}
/* Search strings in the database */
void XapianSearcher::searchInIndex(string &search, const unsigned int resultStart,
const unsigned int resultEnd, const bool verbose) {
/* Create the query */
Xapian::Query query = queryParser.parse_query(search);
/* Create the enquire object */
Xapian::Enquire enquire(this->readableDatabase);
enquire.set_query(query);
/* Get the results */
this->results = enquire.get_mset(resultStart, resultEnd - resultStart);
this->current_result = this->results.begin();
}
/* Get next result */
Result* XapianSearcher::getNextResult() {
if (this->current_result != this->results.end()) {
XapianResult* result = new XapianResult(this, this->current_result);
this->current_result++;
return result;
if (!stopwords.empty()) {
std::string stopWord;
std::istringstream file(this->stopwords);
while (std::getline(file, stopWord, '\n')) {
this->stopper.add(stopWord);
}
return NULL;
queryParser.set_stopper(&(this->stopper));
}
}
void XapianSearcher::restart_search() {
this->current_result = this->results.begin();
/* Search strings in the database */
void XapianSearcher::searchInIndex(string& search,
const unsigned int resultStart,
const unsigned int resultEnd,
const bool verbose)
{
/* Create the query */
Xapian::Query query = queryParser.parse_query(search);
/* Create the enquire object */
Xapian::Enquire enquire(this->readableDatabase);
enquire.set_query(query);
/* Get the results */
this->results = enquire.get_mset(resultStart, resultEnd - resultStart);
this->current_result = this->results.begin();
}
/* Get next result */
Result* XapianSearcher::getNextResult()
{
if (this->current_result != this->results.end()) {
XapianResult* result = new XapianResult(this, this->current_result);
this->current_result++;
return result;
}
return NULL;
}
XapianResult::XapianResult(XapianSearcher* searcher, Xapian::MSetIterator& iterator):
searcher(searcher),
iterator(iterator),
document(iterator.get_document())
{
void XapianSearcher::restart_search()
{
this->current_result = this->results.begin();
}
XapianResult::XapianResult(XapianSearcher* searcher,
Xapian::MSetIterator& iterator)
: searcher(searcher), iterator(iterator), document(iterator.get_document())
{
}
std::string XapianResult::get_url()
{
return document.get_data();
}
std::string XapianResult::get_title()
{
if (searcher->valuesmap.empty()) {
/* This is the old legacy version. Guess and try */
return document.get_value(0);
} else if (searcher->valuesmap.find("title") != searcher->valuesmap.end()) {
return document.get_value(searcher->valuesmap["title"]);
}
return "";
}
std::string XapianResult::get_url() {
return document.get_data();
int XapianResult::get_score()
{
return iterator.get_percent();
}
std::string XapianResult::get_snippet()
{
if (searcher->valuesmap.empty()) {
/* This is the old legacy version. Guess and try */
std::string stored_snippet = document.get_value(1);
if (!stored_snippet.empty()) {
return stored_snippet;
}
/* Let's continue here, and see if we can genenate one */
} else if (searcher->valuesmap.find("snippet") != searcher->valuesmap.end()) {
return document.get_value(searcher->valuesmap["snippet"]);
}
std::string XapianResult::get_title() {
if ( searcher->valuesmap.empty() )
{
/* This is the old legacy version. Guess and try */
return document.get_value(0);
}
else if ( searcher->valuesmap.find("title") != searcher->valuesmap.end() )
{
return document.get_value(searcher->valuesmap["title"]);
}
return "";
/* No reader, no snippet */
if (!searcher->reader) {
return "";
}
int XapianResult::get_score() {
return iterator.get_percent();
/* Get the content of the article to generate a snippet.
We parse it and use the html dump to avoid remove html tags in the
content and be able to nicely cut the text at random place. */
MyHtmlParser htmlParser;
std::string content;
unsigned int contentLength;
std::string contentType;
searcher->reader->getContentByUrl(
get_url(), content, contentLength, contentType);
try {
htmlParser.parse_html(content, "UTF-8", true);
} catch (...) {
}
return searcher->results.snippet(htmlParser.dump, 500);
}
std::string XapianResult::get_snippet() {
if ( searcher->valuesmap.empty() )
{
/* This is the old legacy version. Guess and try */
std::string stored_snippet = document.get_value(1);
if ( ! stored_snippet.empty() )
return stored_snippet;
/* Let's continue here, and see if we can genenate one */
}
else if ( searcher->valuesmap.find("snippet") != searcher->valuesmap.end() )
{
return document.get_value(searcher->valuesmap["snippet"]);
}
/* No reader, no snippet */
if ( ! searcher->reader )
return "";
/* Get the content of the article to generate a snippet.
We parse it and use the html dump to avoid remove html tags in the
content and be able to nicely cut the text at random place. */
MyHtmlParser htmlParser;
std::string content;
unsigned int contentLength;
std::string contentType;
searcher->reader->getContentByUrl(get_url(), content, contentLength, contentType);
try {
htmlParser.parse_html(content, "UTF-8", true);
} catch (...) {}
return searcher->results.snippet(htmlParser.dump, 500);
int XapianResult::get_size()
{
if (searcher->valuesmap.empty()) {
/* This is the old legacy version. Guess and try */
return document.get_value(2).empty() == true
? -1
: atoi(document.get_value(2).c_str());
} else if (searcher->valuesmap.find("size") != searcher->valuesmap.end()) {
return atoi(document.get_value(searcher->valuesmap["size"]).c_str());
}
/* The size is never used. Do we really want to get the content and
calculate the size ? */
return -1;
}
int XapianResult::get_size() {
if ( searcher->valuesmap.empty() )
{
/* This is the old legacy version. Guess and try */
return document.get_value(2).empty() == true ? -1 : atoi(document.get_value(2).c_str());
}
else if ( searcher->valuesmap.find("size") != searcher->valuesmap.end() )
{
return atoi(document.get_value(searcher->valuesmap["size"]).c_str());
}
/* The size is never used. Do we really want to get the content and
calculate the size ? */
return -1;
int XapianResult::get_wordCount()
{
if (searcher->valuesmap.empty()) {
/* This is the old legacy version. Guess and try */
return document.get_value(3).empty() == true
? -1
: atoi(document.get_value(3).c_str());
} else if (searcher->valuesmap.find("wordcount")
!= searcher->valuesmap.end()) {
return atoi(document.get_value(searcher->valuesmap["wordcount"]).c_str());
}
return -1;
}
int XapianResult::get_wordCount() {
if ( searcher->valuesmap.empty() )
{
/* This is the old legacy version. Guess and try */
return document.get_value(3).empty() == true ? -1 : atoi(document.get_value(3).c_str());
}
else if ( searcher->valuesmap.find("wordcount") != searcher->valuesmap.end() )
{
return atoi(document.get_value(searcher->valuesmap["wordcount"]).c_str());
}
return -1;
}
} // Kiwix namespace
} // Kiwix namespace