Merge pull request #429 from kiwix/open_zimfile_by_fd

JNI interface to opening ZIM archives (including embedded ones) by fd
This commit is contained in:
Kelson 2021-02-26 09:20:58 +01:00 committed by GitHub
commit d3f2e08b35
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 344 additions and 59 deletions

View File

@ -111,7 +111,7 @@ class Entry
* The offset is the offset to read in the file. * The offset is the offset to read in the file.
* Return <"",0> if is not possible to read directly. * Return <"",0> if is not possible to read directly.
*/ */
std::pair<std::string, offset_type> getDirectAccessInfo() const { return entry.getItem().getDirectAccessInformation(); } zim::Item::DirectAccessInfo getDirectAccessInfo() const { return entry.getItem().getDirectAccessInformation(); }
/** /**
* Get the size of the entry. * Get the size of the entry.

View File

@ -55,7 +55,11 @@ class Reader
* unsplitted path as if the file were not splitted * unsplitted path as if the file were not splitted
* (.zim extesion). * (.zim extesion).
*/ */
Reader(const string zimFilePath); explicit Reader(const string zimFilePath);
#ifndef _WIN32
explicit Reader(int fd);
Reader(int fd, zim::offset_type offset, zim::size_type size);
#endif
~Reader() = default; ~Reader() = default;
/** /**

View File

@ -86,6 +86,24 @@ Reader::Reader(const string zimFilePath)
srand(time(nullptr)); srand(time(nullptr));
} }
#ifndef _WIN32
Reader::Reader(int fd)
: zimArchive(new zim::Archive(fd)),
zimFilePath("")
{
/* initialize random seed: */
srand(time(nullptr));
}
Reader::Reader(int fd, zim::offset_type offset, zim::size_type size)
: zimArchive(new zim::Archive(fd, offset, size)),
zimFilePath("")
{
/* initialize random seed: */
srand(time(nullptr));
}
#endif // #ifndef _WIN32
zim::Archive* Reader::getZimArchive() const zim::Archive* Reader::getZimArchive() const
{ {
return zimArchive.get(); return zimArchive.get();

View File

@ -45,6 +45,72 @@ JNIEXPORT jlong JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getNativeReader(
} }
} }
namespace
{
int jni2fd(const jobject& fdObj, JNIEnv* env)
{
jclass class_fdesc = env->FindClass("java/io/FileDescriptor");
jfieldID field_fd = env->GetFieldID(class_fdesc, "fd", "I");
if ( field_fd == NULL )
{
env->ExceptionClear();
// Under Android the (private) 'fd' field of java.io.FileDescriptor has been
// renamed to 'descriptor'. See, for example,
// https://android.googlesource.com/platform/libcore/+/refs/tags/android-8.1.0_r1/ojluni/src/main/java/java/io/FileDescriptor.java#55
field_fd = env->GetFieldID(class_fdesc, "descriptor", "I");
}
return env->GetIntField(fdObj, field_fd);
}
} // unnamed namespace
JNIEXPORT jlong JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getNativeReaderByFD(
JNIEnv* env, jobject obj, jobject fdObj)
{
#ifndef _WIN32
int fd = jni2fd(fdObj, env);
LOG("Attempting to create reader with fd: %d", fd);
Lock l;
try {
kiwix::Reader* reader = new kiwix::Reader(fd);
return reinterpret_cast<jlong>(new Handle<kiwix::Reader>(reader));
} catch (std::exception& e) {
LOG("Error opening ZIM file");
LOG(e.what());
return 0;
}
#else
jclass exception = env->FindClass("java/lang/UnsupportedOperationException");
env->ThrowNew(exception, "org.kiwix.kiwixlib.JNIKiwixReader.getNativeReaderByFD() is not supported under Windows");
return 0;
#endif
}
JNIEXPORT jlong JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getNativeReaderEmbedded(
JNIEnv* env, jobject obj, jobject fdObj, jlong offset, jlong size)
{
#ifndef _WIN32
int fd = jni2fd(fdObj, env);
LOG("Attempting to create reader with fd: %d", fd);
Lock l;
try {
kiwix::Reader* reader = new kiwix::Reader(fd, offset, size);
return reinterpret_cast<jlong>(new Handle<kiwix::Reader>(reader));
} catch (std::exception& e) {
LOG("Error opening ZIM file");
LOG(e.what());
return 0;
}
#else
jclass exception = env->FindClass("java/lang/UnsupportedOperationException");
env->ThrowNew(exception, "org.kiwix.kiwixlib.JNIKiwixReader.getNativeReaderEmbedded() is not supported under Windows");
return 0;
#endif
}
JNIEXPORT void JNICALL JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_dispose(JNIEnv* env, jobject obj) Java_org_kiwix_kiwixlib_JNIKiwixReader_dispose(JNIEnv* env, jobject obj)
{ {
@ -325,22 +391,22 @@ JNIEXPORT jobject JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getDirectAccessInformation( Java_org_kiwix_kiwixlib_JNIKiwixReader_getDirectAccessInformation(
JNIEnv* env, jobject obj, jstring url) JNIEnv* env, jobject obj, jstring url)
{ {
jclass classPair = env->FindClass("org/kiwix/kiwixlib/Pair"); jclass daiClass = env->FindClass("org/kiwix/kiwixlib/DirectAccessInfo");
jmethodID midPairinit = env->GetMethodID(classPair, "<init>", "()V"); jmethodID daiInitMethod = env->GetMethodID(daiClass, "<init>", "()V");
jobject pair = env->NewObject(classPair, midPairinit); jobject dai = env->NewObject(daiClass, daiInitMethod);
setPairObjValue("", 0, pair, env); setDaiObjValue("", 0, dai, env);
std::string cUrl = jni2c(url, env); std::string cUrl = jni2c(url, env);
try { try {
auto entry = READER->getEntryFromEncodedPath(cUrl); auto entry = READER->getEntryFromEncodedPath(cUrl);
entry = entry.getFinalEntry(); entry = entry.getFinalEntry();
auto part_info = entry.getDirectAccessInfo(); auto part_info = entry.getDirectAccessInfo();
setPairObjValue(part_info.first, part_info.second, pair, env); setDaiObjValue(part_info.first, part_info.second, dai, env);
} catch (std::exception& e) { } catch (std::exception& e) {
LOG("Unable to get direct access info for url: %s", cUrl.c_str()); LOG("Unable to get direct access info for url: %s", cUrl.c_str());
LOG(e.what()); LOG(e.what());
} }
return pair; return dai;
} }
JNIEXPORT jboolean JNICALL JNIEXPORT jboolean JNICALL

View File

@ -12,7 +12,7 @@ java_sources = files([
'org/kiwix/kiwixlib/JNIKiwixString.java', 'org/kiwix/kiwixlib/JNIKiwixString.java',
'org/kiwix/kiwixlib/JNIKiwixBool.java', 'org/kiwix/kiwixlib/JNIKiwixBool.java',
'org/kiwix/kiwixlib/JNIKiwixException.java', 'org/kiwix/kiwixlib/JNIKiwixException.java',
'org/kiwix/kiwixlib/Pair.java' 'org/kiwix/kiwixlib/DirectAccessInfo.java'
]) ])
kiwix_jni = custom_target('jni', kiwix_jni = custom_target('jni',

View File

@ -19,7 +19,7 @@
package org.kiwix.kiwixlib; package org.kiwix.kiwixlib;
public class Pair public class DirectAccessInfo
{ {
public String filename; public String filename;
public long offset; public long offset;

View File

@ -24,7 +24,8 @@ import org.kiwix.kiwixlib.JNIKiwixException;
import org.kiwix.kiwixlib.JNIKiwixString; import org.kiwix.kiwixlib.JNIKiwixString;
import org.kiwix.kiwixlib.JNIKiwixInt; import org.kiwix.kiwixlib.JNIKiwixInt;
import org.kiwix.kiwixlib.JNIKiwixSearcher; import org.kiwix.kiwixlib.JNIKiwixSearcher;
import org.kiwix.kiwixlib.Pair; import org.kiwix.kiwixlib.DirectAccessInfo;
import java.io.FileDescriptor;
public class JNIKiwixReader public class JNIKiwixReader
{ {
@ -102,13 +103,13 @@ public class JNIKiwixReader
* the zim file (or zim part) and directly read the content from it (and so * the zim file (or zim part) and directly read the content from it (and so
* bypassing the libzim). * bypassing the libzim).
* *
* Return a `Pair` (filename, offset) where the content is located. * Return a `DirectAccessInfo` (filename, offset) where the content is located.
* *
* If the content cannot be directly accessed (content is compressed or zim * If the content cannot be directly accessed (content is compressed or zim
* file is cut in the middle of the content), the filename is an empty string * file is cut in the middle of the content), the filename is an empty string
* and offset is zero. * and offset is zero.
*/ */
public native Pair getDirectAccessInformation(String url); public native DirectAccessInfo getDirectAccessInformation(String url);
public native boolean searchSuggestions(String prefix, int count); public native boolean searchSuggestions(String prefix, int count);
@ -151,11 +152,31 @@ public class JNIKiwixReader
throw new JNIKiwixException("Cannot open zimfile "+filename); throw new JNIKiwixException("Cannot open zimfile "+filename);
} }
} }
public JNIKiwixReader(FileDescriptor fd) throws JNIKiwixException
{
nativeHandle = getNativeReaderByFD(fd);
if (nativeHandle == 0) {
throw new JNIKiwixException("Cannot open zimfile by fd "+fd.toString());
}
}
public JNIKiwixReader(FileDescriptor fd, long offset, long size)
throws JNIKiwixException
{
nativeHandle = getNativeReaderEmbedded(fd, offset, size);
if (nativeHandle == 0) {
throw new JNIKiwixException(String.format("Cannot open embedded zimfile (fd=%s, offset=%d, size=%d)", fd, offset, size));
}
}
public JNIKiwixReader() { public JNIKiwixReader() {
} }
public native void dispose(); public native void dispose();
private native long getNativeReader(String filename); private native long getNativeReader(String filename);
private native long getNativeReaderByFD(FileDescriptor fd);
private native long getNativeReaderEmbedded(FileDescriptor fd, long offset, long size);
private long nativeHandle; private long nativeHandle;
} }

View File

@ -0,0 +1,19 @@
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:opds="http://opds-spec.org/2010/catalog">
<id>00000000-0000-0000-0000-000000000000</id>
<entry>
<title>Test ZIM file</title>
<id>urn:uuid:86c91e51-55bf-8882-464e-072aca37a3e8</id>
<icon>/meta?name=favicon&amp;content=small</icon>
<updated>2020-11-27:00::00:Z</updated>
<language>en</language>
<summary>This is a ZIM file used in libzim unit-tests</summary>
<tags>unit;test</tags>
<link type="text/html" href="/small" />
<author>
<name>Kiwix</name>
</author>
<link rel="http://opds-spec.org/acquisition/open-access" type="application/x-zim" href="http://localhost/small.zim" length="78982" />
<link rel="http://opds-spec.org/image/thumbnail" type="image/png" href="/meta?name=favicon&amp;content=small" />
</entry>
</feed>

View File

@ -0,0 +1,37 @@
#!/usr/bin/bash
# This script compiles and runs the unit test to test the java wrapper.
# This is not integrated in meson because ... this is not so easy.
die()
{
echo >&2 "!!! ERROR: $*"
exit 1
}
KIWIX_LIB_JAR=$1
if [ -z $KIWIX_LIB_JAR ]
then
die "You must give the path to the kiwixlib.jar as first argument"
fi
KIWIX_LIB_DIR=$2
if [ -z $KIWIX_LIB_DIR ]
then
die "You must give the path to directory containing libkiwix.so as second argument"
fi
KIWIX_LIB_JAR=$(readlink -f "$KIWIX_LIB_JAR")
KIWIX_LIB_DIR=$(readlink -f "$KIWIX_LIB_DIR")
TEST_SOURCE_DIR=$(dirname "$(readlink -f $0)")
cd "$TEST_SOURCE_DIR"
javac -g -d . -s . -cp "junit-4.13.jar:$KIWIX_LIB_JAR" test.java \
|| die "Compilation failed"
java -Djava.library.path="$KIWIX_LIB_DIR" \
-cp "junit-4.13.jar:hamcrest-core-1.3.jar:$KIWIX_LIB_JAR:." \
org.junit.runner.JUnitCore test \
|| die "Unit test failed"

View File

@ -1,26 +0,0 @@
#!/usr/bin/bash
# This script compile the unit test to test the java wrapper.
# This is not integrated in meson because ... this is not so easy.
KIWIX_LIB_JAR=$1
if [ -z $KIWIX_LIB_JAR ]
then
echo "You must give the path to the kiwixlib.jar as first argument"
exit 1
fi
KIWIX_LIB_DIR=$2
if [ -z $KIWIX_LIB_DIR ]
then
echo "You must give the path to directory containing libkiwix.so as second argument"
exit 1
fi
TEST_SOURCE_DIR=$(dirname $(readlink -f $0))
javac -g -d . -s . -cp $TEST_SOURCE_DIR/junit-4.13.jar:$KIWIX_LIB_JAR $TEST_SOURCE_DIR/test.java
java -Djava.library.path=$KIWIX_LIB_DIR -cp $TEST_SOURCE_DIR/junit-4.13.jar:$TEST_SOURCE_DIR/hamcrest-core-1.3.jar:$KIWIX_LIB_JAR:. org.junit.runner.JUnitCore test

View File

@ -0,0 +1,28 @@
#!/usr/bin/env bash
die()
{
echo >&2 "!!! ERROR: $*"
exit 1
}
cd "$(dirname "$0")"
rm -f small.zim
zimwriterfs --withoutFTIndex \
-w main.html \
-f favicon.png \
-l en \
-t "Test ZIM file" \
-d "N/A" \
-c "N/A" \
-p "N/A" \
small_zimfile_data \
small.zim \
&& echo 'small.zim was successfully created' \
|| die 'Failed to create small.zim'
printf "BEGINZIM" > small.zim.embedded \
&& cat small.zim >> small.zim.embedded \
&& printf "ENDZIM" >> small.zim.embedded \
&& echo 'small.zim.embedded was successfully created' \
|| die 'Failed to create small.zim.embedded'

Binary file not shown.

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.1 KiB

View File

@ -0,0 +1,11 @@
<html>
<head>
<meta charset="UTF-8">
<title>Test ZIM file</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body>
Test ZIM file
</body>
</html>

View File

@ -10,36 +10,143 @@ static {
System.loadLibrary("kiwix"); System.loadLibrary("kiwix");
} }
private static String getCatalogContent() private static byte[] getFileContent(String path)
throws IOException throws IOException
{ {
BufferedReader reader = new BufferedReader(new FileReader("catalog.xml")); File file = new File(path);
String line; DataInputStream in = new DataInputStream(
StringBuilder sb = new StringBuilder(); new BufferedInputStream(
while ((line = reader.readLine()) != null) new FileInputStream(file)));
{ byte[] data = new byte[(int)file.length()];
sb.append(line + "\n"); in.read(data);
} return data;
reader.close(); }
return sb.toString();
private static byte[] getFileContentPartial(String path, int offset, int size)
throws IOException
{
File file = new File(path);
DataInputStream in = new DataInputStream(
new BufferedInputStream(
new FileInputStream(file)));
byte[] data = new byte[size];
in.skipBytes(offset);
in.read(data, 0, size);
return data;
}
private static String getTextFileContent(String path)
throws IOException
{
return new String(getFileContent(path));
} }
@Test @Test
public void testSome() public void testReader()
throws JNIKiwixException, IOException
{
JNIKiwixReader reader = new JNIKiwixReader("small.zim");
assertEquals("Test ZIM file", reader.getTitle());
assertEquals(45, reader.getFileSize()); // The file size is in KiB
assertEquals("A/main.html", reader.getMainPage());
String s = getTextFileContent("small_zimfile_data/main.html");
byte[] c = reader.getContent(new JNIKiwixString("A/main.html"),
new JNIKiwixString(),
new JNIKiwixString(),
new JNIKiwixInt());
assertEquals(s, new String(c));
byte[] faviconData = getFileContent("small_zimfile_data/favicon.png");
assertEquals(faviconData.length, reader.getArticleSize("I/favicon.png"));
c = reader.getContent(new JNIKiwixString("I/favicon.png"),
new JNIKiwixString(),
new JNIKiwixString(),
new JNIKiwixInt());
assertTrue(Arrays.equals(faviconData, c));
DirectAccessInfo dai = reader.getDirectAccessInformation("I/favicon.png");
assertNotEquals("", dai.filename);
c = getFileContentPartial(dai.filename, (int)dai.offset, faviconData.length);
assertTrue(Arrays.equals(faviconData, c));
}
@Test
public void testReaderByFd()
throws JNIKiwixException, IOException
{
FileInputStream fis = new FileInputStream("small.zim");
JNIKiwixReader reader = new JNIKiwixReader(fis.getFD());
assertEquals("Test ZIM file", reader.getTitle());
assertEquals(45, reader.getFileSize()); // The file size is in KiB
assertEquals("A/main.html", reader.getMainPage());
String s = getTextFileContent("small_zimfile_data/main.html");
byte[] c = reader.getContent(new JNIKiwixString("A/main.html"),
new JNIKiwixString(),
new JNIKiwixString(),
new JNIKiwixInt());
assertEquals(s, new String(c));
byte[] faviconData = getFileContent("small_zimfile_data/favicon.png");
assertEquals(faviconData.length, reader.getArticleSize("I/favicon.png"));
c = reader.getContent(new JNIKiwixString("I/favicon.png"),
new JNIKiwixString(),
new JNIKiwixString(),
new JNIKiwixInt());
assertTrue(Arrays.equals(faviconData, c));
DirectAccessInfo dai = reader.getDirectAccessInformation("I/favicon.png");
assertNotEquals("", dai.filename);
c = getFileContentPartial(dai.filename, (int)dai.offset, faviconData.length);
assertTrue(Arrays.equals(faviconData, c));
}
@Test
public void testReaderWithAnEmbeddedArchive()
throws JNIKiwixException, IOException
{
File plainArchive = new File("small.zim");
FileInputStream fis = new FileInputStream("small.zim.embedded");
JNIKiwixReader reader = new JNIKiwixReader(fis.getFD(), 8, plainArchive.length());
assertEquals("Test ZIM file", reader.getTitle());
assertEquals(45, reader.getFileSize()); // The file size is in KiB
assertEquals("A/main.html", reader.getMainPage());
String s = getTextFileContent("small_zimfile_data/main.html");
byte[] c = reader.getContent(new JNIKiwixString("A/main.html"),
new JNIKiwixString(),
new JNIKiwixString(),
new JNIKiwixInt());
assertEquals(s, new String(c));
byte[] faviconData = getFileContent("small_zimfile_data/favicon.png");
assertEquals(faviconData.length, reader.getArticleSize("I/favicon.png"));
c = reader.getContent(new JNIKiwixString("I/favicon.png"),
new JNIKiwixString(),
new JNIKiwixString(),
new JNIKiwixInt());
assertTrue(Arrays.equals(faviconData, c));
DirectAccessInfo dai = reader.getDirectAccessInformation("I/favicon.png");
assertNotEquals("", dai.filename);
c = getFileContentPartial(dai.filename, (int)dai.offset, faviconData.length);
assertTrue(Arrays.equals(faviconData, c));
}
@Test
public void testLibrary()
throws IOException throws IOException
{ {
Library lib = new Library(); Library lib = new Library();
Manager manager = new Manager(lib); Manager manager = new Manager(lib);
String content = getCatalogContent(); String content = getTextFileContent("catalog.xml");
manager.readOpds(content, "https://library.kiwix.org"); manager.readOpds(content, "http://localhost");
assertEquals(lib.getBookCount(true, true), 10); assertEquals(lib.getBookCount(true, true), 1);
String[] bookIds = lib.getBooksIds(); String[] bookIds = lib.getBooksIds();
assertEquals(bookIds.length, 10); assertEquals(bookIds.length, 1);
Book book = lib.getBookById(bookIds[0]); Book book = lib.getBookById(bookIds[0]);
assertEquals(book.getTitle(), "Wikisource"); assertEquals(book.getTitle(), "Test ZIM file");
assertEquals(book.getTags(), "wikisource;_category:wikisource;_pictures:no;_videos:no;_details:yes;_ftindex:yes"); assertEquals(book.getTags(), "unit;test");
assertEquals(book.getFaviconUrl(), "https://library.kiwix.org/meta?name=favicon&content=wikisource_fr_all_nopic_2020-01"); assertEquals(book.getFaviconUrl(), "http://localhost/meta?name=favicon&content=small");
assertEquals(book.getUrl(), "http://download.kiwix.org/zim/wikisource/wikisource_fr_all_nopic_2020-01.zim.meta4"); assertEquals(book.getUrl(), "http://localhost/small.zim");
} }
static static

View File

@ -246,7 +246,7 @@ inline void setBoolObjValue(const bool value, const jobject obj, JNIEnv* env)
env->SetIntField(obj, objFid, c2jni(value, env)); env->SetIntField(obj, objFid, c2jni(value, env));
} }
inline void setPairObjValue(const std::string& filename, const long offset, inline void setDaiObjValue(const std::string& filename, const long offset,
const jobject obj, JNIEnv* env) const jobject obj, JNIEnv* env)
{ {
jclass objClass = env->GetObjectClass(obj); jclass objClass = env->GetObjectClass(obj);