Merge pull request #429 from kiwix/open_zimfile_by_fd

JNI interface to opening ZIM archives (including embedded ones) by fd
This commit is contained in:
Kelson 2021-02-26 09:20:58 +01:00 committed by GitHub
commit d3f2e08b35
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 344 additions and 59 deletions

View File

@ -111,7 +111,7 @@ class Entry
* The offset is the offset to read in the file.
* Return <"",0> if is not possible to read directly.
*/
std::pair<std::string, offset_type> getDirectAccessInfo() const { return entry.getItem().getDirectAccessInformation(); }
zim::Item::DirectAccessInfo getDirectAccessInfo() const { return entry.getItem().getDirectAccessInformation(); }
/**
* Get the size of the entry.

View File

@ -55,7 +55,11 @@ class Reader
* unsplitted path as if the file were not splitted
* (.zim extesion).
*/
Reader(const string zimFilePath);
explicit Reader(const string zimFilePath);
#ifndef _WIN32
explicit Reader(int fd);
Reader(int fd, zim::offset_type offset, zim::size_type size);
#endif
~Reader() = default;
/**

View File

@ -86,6 +86,24 @@ Reader::Reader(const string zimFilePath)
srand(time(nullptr));
}
#ifndef _WIN32
Reader::Reader(int fd)
: zimArchive(new zim::Archive(fd)),
zimFilePath("")
{
/* initialize random seed: */
srand(time(nullptr));
}
Reader::Reader(int fd, zim::offset_type offset, zim::size_type size)
: zimArchive(new zim::Archive(fd, offset, size)),
zimFilePath("")
{
/* initialize random seed: */
srand(time(nullptr));
}
#endif // #ifndef _WIN32
zim::Archive* Reader::getZimArchive() const
{
return zimArchive.get();

View File

@ -45,6 +45,72 @@ JNIEXPORT jlong JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getNativeReader(
}
}
namespace
{
int jni2fd(const jobject& fdObj, JNIEnv* env)
{
jclass class_fdesc = env->FindClass("java/io/FileDescriptor");
jfieldID field_fd = env->GetFieldID(class_fdesc, "fd", "I");
if ( field_fd == NULL )
{
env->ExceptionClear();
// Under Android the (private) 'fd' field of java.io.FileDescriptor has been
// renamed to 'descriptor'. See, for example,
// https://android.googlesource.com/platform/libcore/+/refs/tags/android-8.1.0_r1/ojluni/src/main/java/java/io/FileDescriptor.java#55
field_fd = env->GetFieldID(class_fdesc, "descriptor", "I");
}
return env->GetIntField(fdObj, field_fd);
}
} // unnamed namespace
JNIEXPORT jlong JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getNativeReaderByFD(
JNIEnv* env, jobject obj, jobject fdObj)
{
#ifndef _WIN32
int fd = jni2fd(fdObj, env);
LOG("Attempting to create reader with fd: %d", fd);
Lock l;
try {
kiwix::Reader* reader = new kiwix::Reader(fd);
return reinterpret_cast<jlong>(new Handle<kiwix::Reader>(reader));
} catch (std::exception& e) {
LOG("Error opening ZIM file");
LOG(e.what());
return 0;
}
#else
jclass exception = env->FindClass("java/lang/UnsupportedOperationException");
env->ThrowNew(exception, "org.kiwix.kiwixlib.JNIKiwixReader.getNativeReaderByFD() is not supported under Windows");
return 0;
#endif
}
JNIEXPORT jlong JNICALL Java_org_kiwix_kiwixlib_JNIKiwixReader_getNativeReaderEmbedded(
JNIEnv* env, jobject obj, jobject fdObj, jlong offset, jlong size)
{
#ifndef _WIN32
int fd = jni2fd(fdObj, env);
LOG("Attempting to create reader with fd: %d", fd);
Lock l;
try {
kiwix::Reader* reader = new kiwix::Reader(fd, offset, size);
return reinterpret_cast<jlong>(new Handle<kiwix::Reader>(reader));
} catch (std::exception& e) {
LOG("Error opening ZIM file");
LOG(e.what());
return 0;
}
#else
jclass exception = env->FindClass("java/lang/UnsupportedOperationException");
env->ThrowNew(exception, "org.kiwix.kiwixlib.JNIKiwixReader.getNativeReaderEmbedded() is not supported under Windows");
return 0;
#endif
}
JNIEXPORT void JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_dispose(JNIEnv* env, jobject obj)
{
@ -325,22 +391,22 @@ JNIEXPORT jobject JNICALL
Java_org_kiwix_kiwixlib_JNIKiwixReader_getDirectAccessInformation(
JNIEnv* env, jobject obj, jstring url)
{
jclass classPair = env->FindClass("org/kiwix/kiwixlib/Pair");
jmethodID midPairinit = env->GetMethodID(classPair, "<init>", "()V");
jobject pair = env->NewObject(classPair, midPairinit);
setPairObjValue("", 0, pair, env);
jclass daiClass = env->FindClass("org/kiwix/kiwixlib/DirectAccessInfo");
jmethodID daiInitMethod = env->GetMethodID(daiClass, "<init>", "()V");
jobject dai = env->NewObject(daiClass, daiInitMethod);
setDaiObjValue("", 0, dai, env);
std::string cUrl = jni2c(url, env);
try {
auto entry = READER->getEntryFromEncodedPath(cUrl);
entry = entry.getFinalEntry();
auto part_info = entry.getDirectAccessInfo();
setPairObjValue(part_info.first, part_info.second, pair, env);
setDaiObjValue(part_info.first, part_info.second, dai, env);
} catch (std::exception& e) {
LOG("Unable to get direct access info for url: %s", cUrl.c_str());
LOG(e.what());
}
return pair;
return dai;
}
JNIEXPORT jboolean JNICALL

View File

@ -12,7 +12,7 @@ java_sources = files([
'org/kiwix/kiwixlib/JNIKiwixString.java',
'org/kiwix/kiwixlib/JNIKiwixBool.java',
'org/kiwix/kiwixlib/JNIKiwixException.java',
'org/kiwix/kiwixlib/Pair.java'
'org/kiwix/kiwixlib/DirectAccessInfo.java'
])
kiwix_jni = custom_target('jni',

View File

@ -19,7 +19,7 @@
package org.kiwix.kiwixlib;
public class Pair
public class DirectAccessInfo
{
public String filename;
public long offset;

View File

@ -24,7 +24,8 @@ import org.kiwix.kiwixlib.JNIKiwixException;
import org.kiwix.kiwixlib.JNIKiwixString;
import org.kiwix.kiwixlib.JNIKiwixInt;
import org.kiwix.kiwixlib.JNIKiwixSearcher;
import org.kiwix.kiwixlib.Pair;
import org.kiwix.kiwixlib.DirectAccessInfo;
import java.io.FileDescriptor;
public class JNIKiwixReader
{
@ -102,13 +103,13 @@ public class JNIKiwixReader
* the zim file (or zim part) and directly read the content from it (and so
* bypassing the libzim).
*
* Return a `Pair` (filename, offset) where the content is located.
* Return a `DirectAccessInfo` (filename, offset) where the content is located.
*
* If the content cannot be directly accessed (content is compressed or zim
* file is cut in the middle of the content), the filename is an empty string
* and offset is zero.
*/
public native Pair getDirectAccessInformation(String url);
public native DirectAccessInfo getDirectAccessInformation(String url);
public native boolean searchSuggestions(String prefix, int count);
@ -151,11 +152,31 @@ public class JNIKiwixReader
throw new JNIKiwixException("Cannot open zimfile "+filename);
}
}
public JNIKiwixReader(FileDescriptor fd) throws JNIKiwixException
{
nativeHandle = getNativeReaderByFD(fd);
if (nativeHandle == 0) {
throw new JNIKiwixException("Cannot open zimfile by fd "+fd.toString());
}
}
public JNIKiwixReader(FileDescriptor fd, long offset, long size)
throws JNIKiwixException
{
nativeHandle = getNativeReaderEmbedded(fd, offset, size);
if (nativeHandle == 0) {
throw new JNIKiwixException(String.format("Cannot open embedded zimfile (fd=%s, offset=%d, size=%d)", fd, offset, size));
}
}
public JNIKiwixReader() {
}
public native void dispose();
private native long getNativeReader(String filename);
private native long getNativeReaderByFD(FileDescriptor fd);
private native long getNativeReaderEmbedded(FileDescriptor fd, long offset, long size);
private long nativeHandle;
}

View File

@ -0,0 +1,19 @@
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:opds="http://opds-spec.org/2010/catalog">
<id>00000000-0000-0000-0000-000000000000</id>
<entry>
<title>Test ZIM file</title>
<id>urn:uuid:86c91e51-55bf-8882-464e-072aca37a3e8</id>
<icon>/meta?name=favicon&amp;content=small</icon>
<updated>2020-11-27:00::00:Z</updated>
<language>en</language>
<summary>This is a ZIM file used in libzim unit-tests</summary>
<tags>unit;test</tags>
<link type="text/html" href="/small" />
<author>
<name>Kiwix</name>
</author>
<link rel="http://opds-spec.org/acquisition/open-access" type="application/x-zim" href="http://localhost/small.zim" length="78982" />
<link rel="http://opds-spec.org/image/thumbnail" type="image/png" href="/meta?name=favicon&amp;content=small" />
</entry>
</feed>

View File

@ -0,0 +1,37 @@
#!/usr/bin/bash
# This script compiles and runs the unit test to test the java wrapper.
# This is not integrated in meson because ... this is not so easy.
die()
{
echo >&2 "!!! ERROR: $*"
exit 1
}
KIWIX_LIB_JAR=$1
if [ -z $KIWIX_LIB_JAR ]
then
die "You must give the path to the kiwixlib.jar as first argument"
fi
KIWIX_LIB_DIR=$2
if [ -z $KIWIX_LIB_DIR ]
then
die "You must give the path to directory containing libkiwix.so as second argument"
fi
KIWIX_LIB_JAR=$(readlink -f "$KIWIX_LIB_JAR")
KIWIX_LIB_DIR=$(readlink -f "$KIWIX_LIB_DIR")
TEST_SOURCE_DIR=$(dirname "$(readlink -f $0)")
cd "$TEST_SOURCE_DIR"
javac -g -d . -s . -cp "junit-4.13.jar:$KIWIX_LIB_JAR" test.java \
|| die "Compilation failed"
java -Djava.library.path="$KIWIX_LIB_DIR" \
-cp "junit-4.13.jar:hamcrest-core-1.3.jar:$KIWIX_LIB_JAR:." \
org.junit.runner.JUnitCore test \
|| die "Unit test failed"

View File

@ -1,26 +0,0 @@
#!/usr/bin/bash
# This script compile the unit test to test the java wrapper.
# This is not integrated in meson because ... this is not so easy.
KIWIX_LIB_JAR=$1
if [ -z $KIWIX_LIB_JAR ]
then
echo "You must give the path to the kiwixlib.jar as first argument"
exit 1
fi
KIWIX_LIB_DIR=$2
if [ -z $KIWIX_LIB_DIR ]
then
echo "You must give the path to directory containing libkiwix.so as second argument"
exit 1
fi
TEST_SOURCE_DIR=$(dirname $(readlink -f $0))
javac -g -d . -s . -cp $TEST_SOURCE_DIR/junit-4.13.jar:$KIWIX_LIB_JAR $TEST_SOURCE_DIR/test.java
java -Djava.library.path=$KIWIX_LIB_DIR -cp $TEST_SOURCE_DIR/junit-4.13.jar:$TEST_SOURCE_DIR/hamcrest-core-1.3.jar:$KIWIX_LIB_JAR:. org.junit.runner.JUnitCore test

View File

@ -0,0 +1,28 @@
#!/usr/bin/env bash
die()
{
echo >&2 "!!! ERROR: $*"
exit 1
}
cd "$(dirname "$0")"
rm -f small.zim
zimwriterfs --withoutFTIndex \
-w main.html \
-f favicon.png \
-l en \
-t "Test ZIM file" \
-d "N/A" \
-c "N/A" \
-p "N/A" \
small_zimfile_data \
small.zim \
&& echo 'small.zim was successfully created' \
|| die 'Failed to create small.zim'
printf "BEGINZIM" > small.zim.embedded \
&& cat small.zim >> small.zim.embedded \
&& printf "ENDZIM" >> small.zim.embedded \
&& echo 'small.zim.embedded was successfully created' \
|| die 'Failed to create small.zim.embedded'

Binary file not shown.

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.1 KiB

View File

@ -0,0 +1,11 @@
<html>
<head>
<meta charset="UTF-8">
<title>Test ZIM file</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0">
</head>
<body>
Test ZIM file
</body>
</html>

View File

@ -10,36 +10,143 @@ static {
System.loadLibrary("kiwix");
}
private static String getCatalogContent()
private static byte[] getFileContent(String path)
throws IOException
{
BufferedReader reader = new BufferedReader(new FileReader("catalog.xml"));
String line;
StringBuilder sb = new StringBuilder();
while ((line = reader.readLine()) != null)
{
sb.append(line + "\n");
}
reader.close();
return sb.toString();
File file = new File(path);
DataInputStream in = new DataInputStream(
new BufferedInputStream(
new FileInputStream(file)));
byte[] data = new byte[(int)file.length()];
in.read(data);
return data;
}
private static byte[] getFileContentPartial(String path, int offset, int size)
throws IOException
{
File file = new File(path);
DataInputStream in = new DataInputStream(
new BufferedInputStream(
new FileInputStream(file)));
byte[] data = new byte[size];
in.skipBytes(offset);
in.read(data, 0, size);
return data;
}
private static String getTextFileContent(String path)
throws IOException
{
return new String(getFileContent(path));
}
@Test
public void testSome()
public void testReader()
throws JNIKiwixException, IOException
{
JNIKiwixReader reader = new JNIKiwixReader("small.zim");
assertEquals("Test ZIM file", reader.getTitle());
assertEquals(45, reader.getFileSize()); // The file size is in KiB
assertEquals("A/main.html", reader.getMainPage());
String s = getTextFileContent("small_zimfile_data/main.html");
byte[] c = reader.getContent(new JNIKiwixString("A/main.html"),
new JNIKiwixString(),
new JNIKiwixString(),
new JNIKiwixInt());
assertEquals(s, new String(c));
byte[] faviconData = getFileContent("small_zimfile_data/favicon.png");
assertEquals(faviconData.length, reader.getArticleSize("I/favicon.png"));
c = reader.getContent(new JNIKiwixString("I/favicon.png"),
new JNIKiwixString(),
new JNIKiwixString(),
new JNIKiwixInt());
assertTrue(Arrays.equals(faviconData, c));
DirectAccessInfo dai = reader.getDirectAccessInformation("I/favicon.png");
assertNotEquals("", dai.filename);
c = getFileContentPartial(dai.filename, (int)dai.offset, faviconData.length);
assertTrue(Arrays.equals(faviconData, c));
}
@Test
public void testReaderByFd()
throws JNIKiwixException, IOException
{
FileInputStream fis = new FileInputStream("small.zim");
JNIKiwixReader reader = new JNIKiwixReader(fis.getFD());
assertEquals("Test ZIM file", reader.getTitle());
assertEquals(45, reader.getFileSize()); // The file size is in KiB
assertEquals("A/main.html", reader.getMainPage());
String s = getTextFileContent("small_zimfile_data/main.html");
byte[] c = reader.getContent(new JNIKiwixString("A/main.html"),
new JNIKiwixString(),
new JNIKiwixString(),
new JNIKiwixInt());
assertEquals(s, new String(c));
byte[] faviconData = getFileContent("small_zimfile_data/favicon.png");
assertEquals(faviconData.length, reader.getArticleSize("I/favicon.png"));
c = reader.getContent(new JNIKiwixString("I/favicon.png"),
new JNIKiwixString(),
new JNIKiwixString(),
new JNIKiwixInt());
assertTrue(Arrays.equals(faviconData, c));
DirectAccessInfo dai = reader.getDirectAccessInformation("I/favicon.png");
assertNotEquals("", dai.filename);
c = getFileContentPartial(dai.filename, (int)dai.offset, faviconData.length);
assertTrue(Arrays.equals(faviconData, c));
}
@Test
public void testReaderWithAnEmbeddedArchive()
throws JNIKiwixException, IOException
{
File plainArchive = new File("small.zim");
FileInputStream fis = new FileInputStream("small.zim.embedded");
JNIKiwixReader reader = new JNIKiwixReader(fis.getFD(), 8, plainArchive.length());
assertEquals("Test ZIM file", reader.getTitle());
assertEquals(45, reader.getFileSize()); // The file size is in KiB
assertEquals("A/main.html", reader.getMainPage());
String s = getTextFileContent("small_zimfile_data/main.html");
byte[] c = reader.getContent(new JNIKiwixString("A/main.html"),
new JNIKiwixString(),
new JNIKiwixString(),
new JNIKiwixInt());
assertEquals(s, new String(c));
byte[] faviconData = getFileContent("small_zimfile_data/favicon.png");
assertEquals(faviconData.length, reader.getArticleSize("I/favicon.png"));
c = reader.getContent(new JNIKiwixString("I/favicon.png"),
new JNIKiwixString(),
new JNIKiwixString(),
new JNIKiwixInt());
assertTrue(Arrays.equals(faviconData, c));
DirectAccessInfo dai = reader.getDirectAccessInformation("I/favicon.png");
assertNotEquals("", dai.filename);
c = getFileContentPartial(dai.filename, (int)dai.offset, faviconData.length);
assertTrue(Arrays.equals(faviconData, c));
}
@Test
public void testLibrary()
throws IOException
{
Library lib = new Library();
Manager manager = new Manager(lib);
String content = getCatalogContent();
manager.readOpds(content, "https://library.kiwix.org");
assertEquals(lib.getBookCount(true, true), 10);
String content = getTextFileContent("catalog.xml");
manager.readOpds(content, "http://localhost");
assertEquals(lib.getBookCount(true, true), 1);
String[] bookIds = lib.getBooksIds();
assertEquals(bookIds.length, 10);
assertEquals(bookIds.length, 1);
Book book = lib.getBookById(bookIds[0]);
assertEquals(book.getTitle(), "Wikisource");
assertEquals(book.getTags(), "wikisource;_category:wikisource;_pictures:no;_videos:no;_details:yes;_ftindex:yes");
assertEquals(book.getFaviconUrl(), "https://library.kiwix.org/meta?name=favicon&content=wikisource_fr_all_nopic_2020-01");
assertEquals(book.getUrl(), "http://download.kiwix.org/zim/wikisource/wikisource_fr_all_nopic_2020-01.zim.meta4");
assertEquals(book.getTitle(), "Test ZIM file");
assertEquals(book.getTags(), "unit;test");
assertEquals(book.getFaviconUrl(), "http://localhost/meta?name=favicon&content=small");
assertEquals(book.getUrl(), "http://localhost/small.zim");
}
static

View File

@ -246,7 +246,7 @@ inline void setBoolObjValue(const bool value, const jobject obj, JNIEnv* env)
env->SetIntField(obj, objFid, c2jni(value, env));
}
inline void setPairObjValue(const std::string& filename, const long offset,
inline void setDaiObjValue(const std::string& filename, const long offset,
const jobject obj, JNIEnv* env)
{
jclass objClass = env->GetObjectClass(obj);