Merge pull request #66 from kiwix/multisearch

Multisearch
2017-07-18 16:07:46 +02:00 · 2017-07-18 16:07:46 +02:00 · 473b62c9b8
parent c56e1f0446 bc5f4f5de4
commit 473b62c9b8
6 changed files with 144 additions and 37 deletions
--- a/include/searcher.h
+++ b/include/searcher.h
@ -46,17 +46,21 @@ class Result
  virtual std::string get_title() = 0;
  virtual int get_score() = 0;
  virtual std::string get_snippet() = 0;
+  virtual std::string get_content() = 0;
  virtual int get_wordCount() = 0;
  virtual int get_size() = 0;
+  virtual int get_readerIndex() = 0;
 };

 struct SearcherInternal;
 class Searcher
 {
 public:
+  Searcher();
  Searcher(const string& xapianDirectoryPath, Reader* reader);
  ~Searcher();

+  void add_reader(Reader* reader, const std::string& humanReaderName);
  void search(std::string& search,
              unsigned int resultStart,
              unsigned int resultEnd,
@ -82,7 +86,8 @@ class Searcher
                     const unsigned int resultEnd,
                     const bool verbose = false);

-  Reader* reader;
+  std::vector<Reader*> readers;
+  std::vector<std::string> humanReaderNames;
  SearcherInternal* internal;
  std::string searchPattern;
  std::string protocolPrefix;
--- a/include/xapianSearcher.h
+++ b/include/xapianSearcher.h
@ -43,8 +43,10 @@ class XapianResult : public Result
  virtual std::string get_title();
  virtual int get_score();
  virtual std::string get_snippet();
+  virtual std::string get_content();
  virtual int get_wordCount();
  virtual int get_size();
+  virtual int get_readerIndex() { return 0; };

 private:
  XapianSearcher* searcher;
--- a/src/android/kiwix.cpp
+++ b/src/android/kiwix.cpp
@ -486,12 +486,19 @@ JNIEXPORT jboolean JNICALL Java_org_kiwix_kiwixlib_JNIKiwix_loadFulltextIndex(
  std::string cPath = jni2c(path, env);

  pthread_mutex_lock(&searcherLock);
-  searcher = NULL;
  try {
    if (searcher != NULL) {
      delete searcher;
    }
-    searcher = new kiwix::Searcher(cPath, reader);
+    if (!reader || !reader->hasFulltextIndex()) {
+      // Use old API (no embedded full text index).
+      searcher = new kiwix::Searcher(cPath, reader);
+    } else {
+      // Use the new API. We don't care about the human readable name as
+      // we don't use it (in android).
+      searcher = new kiwix::Searcher();
+      searcher->add_reader(reader, "");
+    }
  } catch (...) {
    searcher = NULL;
    retVal = JNI_FALSE;
--- a/src/searcher.cpp
+++ b/src/searcher.cpp
@ -45,8 +45,10 @@ class _Result : public Result
  virtual std::string get_title();
  virtual int get_score();
  virtual std::string get_snippet();
+  virtual std::string get_content();
  virtual int get_wordCount();
  virtual int get_size();
+  virtual int get_readerIndex();

 private:
  Searcher* searcher;
@ -72,8 +74,7 @@ struct SearcherInternal {

 /* Constructor */
 Searcher::Searcher(const string& xapianDirectoryPath, Reader* reader)
-    : reader(reader),
-      internal(new SearcherInternal()),
+    : internal(new SearcherInternal()),
      searchPattern(""),
      protocolPrefix("zim://"),
      searchProtocolPrefix("search://?"),
@ -89,11 +90,32 @@ Searcher::Searcher(const string& xapianDirectoryPath, Reader* reader)
  }
 }

+Searcher::Searcher()
+    : internal(new SearcherInternal()),
+      searchPattern(""),
+      protocolPrefix("zim://"),
+      searchProtocolPrefix("search://?"),
+      resultCountPerPage(0),
+      estimatedResultCount(0),
+      resultStart(0),
+      resultEnd(0)
+{
+  template_ct2 = RESOURCE::results_ct2;
+  loadICUExternalTables();
+}
+
 /* Destructor */
 Searcher::~Searcher()
 {
  delete internal;
 }
+
+void Searcher::add_reader(Reader* reader, const std::string& humanReadableName)
+{
+  this->readers.push_back(reader);
+  this->humanReaderNames.push_back(humanReadableName);
+}
+
 /* Search strings in the database */
 void Searcher::search(std::string& search,
                      unsigned int resultStart,
@ -133,8 +155,15 @@ void Searcher::search(std::string& search,
      this->estimatedResultCount
          = internal->_xapianSearcher->results.get_matches_estimated();
    } else {
-      internal->_search = this->reader->getZimFileHandler()->search(
-          unaccentedSearch, resultStart, resultEnd);
+      std::vector<const zim::File*> zims;
+      for (auto current = this->readers.begin(); current != this->readers.end();
+           current++) {
+        zims.push_back((*current)->getZimFileHandler());
+      }
+      zim::Search* search = new zim::Search(zims);
+      search->set_query(unaccentedSearch);
+      search->set_range(resultStart, resultEnd);
+      internal->_search = search;
      internal->current_iterator = internal->_search->begin();
      this->estimatedResultCount = internal->_search->get_matches_estimated();
    }
@ -190,8 +219,16 @@ void Searcher::suggestions(std::string& search, const bool verbose)
     * We do not support that. */
    this->estimatedResultCount = 0;
  } else {
-    internal->_search = this->reader->getZimFileHandler()->suggestions(
-          unaccentedSearch, resultStart, resultEnd);
+    std::vector<const zim::File*> zims;
+    for (auto current = this->readers.begin(); current != this->readers.end();
+         current++) {
+      zims.push_back((*current)->getZimFileHandler());
+    }
+    zim::Search* search = new zim::Search(zims);
+    search->set_query(unaccentedSearch);
+    search->set_range(resultStart, resultEnd);
+    search->set_suggestion_mode(true);
+    internal->_search = search;
    internal->current_iterator = internal->_search->begin();
    this->estimatedResultCount = internal->_search->get_matches_estimated();
  }
@ -241,6 +278,13 @@ std::string _Result::get_snippet()
 {
  return iterator.get_snippet();
 }
+std::string _Result::get_content()
+{
+  if (iterator->good()) {
+    return iterator->getData();
+  }
+  return "";
+}
 int _Result::get_size()
 {
  return iterator.get_size();
@ -249,6 +293,10 @@ int _Result::get_wordCount()
 {
  return iterator.get_wordCount();
 }
+int _Result::get_readerIndex()
+{
+  return iterator.get_fileIndex();
+}
 #ifdef ENABLE_CTPP2

 string Searcher::getHtml()
@ -266,6 +314,7 @@ string Searcher::getHtml()
    result["title"] = p_result->get_title();
    result["url"] = p_result->get_url();
    result["snippet"] = p_result->get_snippet();
+    result["contentId"] = humanReaderNames[p_result->get_readerIndex()];

    if (p_result->get_size() >= 0) {
      result["size"] = kiwix::beautifyInteger(p_result->get_size());
--- a/src/xapianSearcher.cpp
+++ b/src/xapianSearcher.cpp
@ -177,11 +177,10 @@ std::string XapianResult::get_snippet()
     We parse it and use the html dump to avoid remove html tags in the
     content and be able to nicely cut the text at random place. */
  MyHtmlParser htmlParser;
-  std::string content;
-  unsigned int contentLength;
-  std::string contentType;
-  searcher->reader->getContentByUrl(
-      get_url(), content, contentLength, contentType);
+  std::string content = get_content();
+  if (content.empty()) {
+    return content;
+  }
  try {
    htmlParser.parse_html(content, "UTF-8", true);
  } catch (...) {
@ -189,6 +188,19 @@ std::string XapianResult::get_snippet()
  return searcher->results.snippet(htmlParser.dump, 500);
 }

+std::string XapianResult::get_content()
+{
+  if (!searcher->reader) {
+    return "";
+  }
+  std::string content;
+  unsigned int contentLength;
+  std::string contentType;
+  searcher->reader->getContentByUrl(
+      get_url(), content, contentLength, contentType);
+  return content;
+}
+
 int XapianResult::get_size()
 {
  if (searcher->valuesmap.empty()) {
--- a/static/results.tmpl
+++ b/static/results.tmpl
@ -92,36 +92,68 @@

    </style>
    <title>Search: <TMPL_var searchPattern></title>
-</head>
-<body bgcolor="white">
+  </head>
+  <body bgcolor="white">
    <div class="header">
-        <TMPL_if results>Results <b><TMPL_var resultStart>-<TMPL_var resultEnd></b> of <b><TMPL_var count></b> for <b><TMPL_var searchPattern></b><TMPL_else>No result were found for <b><TMPL_var searchPattern></b></TMPL_if>
+      <TMPL_if results>
+        Results
+        <b>
+          <TMPL_var resultStart>-<TMPL_var resultEnd>
+        </b> of <b>
+          <TMPL_var count>
+        </b> for <b>
+          <TMPL_var searchPattern>
+        </b>
+      <TMPL_else>
+        No result were found for <b><TMPL_var searchPattern></b>
+      </TMPL_if>
    </div>

    <div class="results">
-        <ul>
+      <ul>
        <TMPL_foreach results as result>
-            <li><a href="<TMPL_var protocolPrefix><TMPL_var contentId>/<TMPL_var result.url>"><TMPL_var result.title></a>
-            <cite><TMPL_if result.snippet><TMPL_var result.snippet>...</TMPL_if></cite>
-            <TMPL_if wordCount><div class="informations"><TMPL_var wordCount> words</div></TMPL_if>
-            </li>
+          <li>
+            <a href="<TMPL_var protocolPrefix><TMPL_var result.contentId>/<TMPL_var result.url>">
+              <TMPL_var result.title>
+            </a>
+            <cite>
+              <TMPL_if result.snippet>
+                <TMPL_var result.snippet>...
+              </TMPL_if>
+            </cite>
+            <TMPL_if wordCount>
+              <div class="informations"><TMPL_var wordCount> words</div>
+            </TMPL_if>
+          </li>
        </TMPL_foreach>
-        </ul>
+      </ul>
    </div>

    <div class="footer">
-        <ul>
-            <TMPL_if (resultLastPageStart>0)>
-            <li><a href="<TMPL_var searchProtocolPrefix>content=<TMPL_var contentId>&pattern=<TMPL_var searchPatternEncoded>&start=0&end=<TMPL_var resultRange>">◀</a></li>
-            </TMPL_if>
-            <TMPL_foreach pages as page>
-            <li><a <TMPL_if page.selected>class="selected"</TMPL_if> href="<TMPL_var searchProtocolPrefix>content=<TMPL_var contentId>&pattern=<TMPL_var searchPatternEncoded>&start=<TMPL_var page.start>&end=<TMPL_var page.end>"><TMPL_var page.label></a></li>
-            </TMPL_foreach>
-            <TMPL_if (resultLastPageStart>0)>
-            <li><a href="<TMPL_var searchProtocolPrefix>content=<TMPL_var contentId>&pattern=<TMPL_var searchPatternEncoded>&start=<TMPL_var resultLastPageStart>&end=<TMPL_var (resultLastPageStart+resultRange)>">▶</a></li>
-            </TMPL_if>
-        </ul>
-     </div>
-
-</body>
+      <ul>
+        <TMPL_if (resultLastPageStart>0)>
+          <li>
+            <a href="<TMPL_var searchProtocolPrefix>pattern=<TMPL_var searchPatternEncoded><TMPL_if contentId>&content=<TMPL_var contentId></TMPL_if>&start=0&end=<TMPL_var resultRange>">
+              ◀
+            </a>
+          </li>
+        </TMPL_if>
+        <TMPL_foreach pages as page>
+          <li>
+            <a <TMPL_if page.selected>class="selected"</TMPL_if>
+               href="<TMPL_var searchProtocolPrefix>pattern=<TMPL_var searchPatternEncoded><TMPL_if contentId>&content=<TMPL_var contentId></TMPL_if>&start=<TMPL_var page.start>&end=<TMPL_var page.end>">
+              <TMPL_var page.label>
+            </a>
+          </li>
+        </TMPL_foreach>
+        <TMPL_if (resultLastPageStart>0)>
+          <li>
+            <a href="<TMPL_var searchProtocolPrefix>pattern=<TMPL_var searchPatternEncoded><TMPL_if contentId>&content=<TMPL_var contentId></TMPL_if>&start=<TMPL_var resultLastPageStart>&end=<TMPL_var (resultLastPageStart+resultRange)>">
+              ▶
+            </a>
+          </li>
+        </TMPL_if>
+      </ul>
+    </div>
+  </body>
 </html>