diff --git a/src/tools/concurrent_cache.h b/src/tools/concurrent_cache.h new file mode 100644 index 000000000..1b5175b2e --- /dev/null +++ b/src/tools/concurrent_cache.h @@ -0,0 +1,95 @@ +/* + * Copyright (C) 2021 Matthieu Gautier + * Copyright (C) 2020 Veloman Yunkan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and + * NON-INFRINGEMENT. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef ZIM_CONCURRENT_CACHE_H +#define ZIM_CONCURRENT_CACHE_H + +#include "lrucache.h" + +#include +#include + +namespace kiwix +{ + +/** + ConcurrentCache implements a concurrent thread-safe cache + + Compared to kiwix::lru_cache, each access operation is slightly more expensive. + However, different slots of the cache can be safely accessed concurrently + with minimal blocking. Concurrent access to the same element is also + safe, and, in case of a cache miss, will block until that element becomes + available. + */ +template +class ConcurrentCache +{ +private: // types + typedef std::shared_future ValuePlaceholder; + typedef lru_cache Impl; + +public: // types + explicit ConcurrentCache(size_t maxEntries) + : impl_(maxEntries) + {} + + // Gets the entry corresponding to the given key. If the entry is not in the + // cache, it is obtained by calling f() (without any arguments) and the + // result is put into the cache. + // + // The cache as a whole is locked only for the duration of accessing + // the respective slot. If, in the case of the a cache miss, the generation + // of the missing element takes a long time, only attempts to access that + // element will block - the rest of the cache remains open to concurrent + // access. + template + Value getOrPut(const Key& key, F f) + { + std::promise valuePromise; + std::unique_lock l(lock_); + const auto x = impl_.getOrPut(key, valuePromise.get_future().share()); + l.unlock(); + if ( x.miss() ) { + try { + valuePromise.set_value(f()); + } catch (std::exception& e) { + drop(key); + throw; + } + } + + return x.value().get(); + } + + bool drop(const Key& key) + { + std::unique_lock l(lock_); + return impl_.drop(key); + } + +private: // data + Impl impl_; + std::mutex lock_; +}; + +} // namespace kiwix + +#endif // ZIM_CONCURRENT_CACHE_H + diff --git a/src/tools/lrucache.h b/src/tools/lrucache.h new file mode 100644 index 000000000..bd90c3128 --- /dev/null +++ b/src/tools/lrucache.h @@ -0,0 +1,160 @@ +/* + * Copyrigth (c) 2021, Matthieu Gautier + * Copyright (c) 2020, Veloman Yunkan + * Copyright (c) 2014, lamerman + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of lamerman nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * File: lrucache.hpp + * Author: Alexander Ponomarev + * + * Created on June 20, 2013, 5:09 PM + */ + +#ifndef _LRUCACHE_HPP_INCLUDED_ +#define _LRUCACHE_HPP_INCLUDED_ + +#include +#include +#include +#include +#include + +namespace kiwix { + +template +class lru_cache { +public: // types + typedef typename std::pair key_value_pair_t; + typedef typename std::list::iterator list_iterator_t; + + enum AccessStatus { + HIT, // key was found in the cache + PUT, // key was not in the cache but was created by the getOrPut() access + MISS // key was not in the cache; get() access failed + }; + + class AccessResult + { + const AccessStatus status_; + const value_t val_; + public: + AccessResult(const value_t& val, AccessStatus status) + : status_(status), val_(val) + {} + AccessResult() : status_(MISS), val_() {} + + bool hit() const { return status_ == HIT; } + bool miss() const { return !hit(); } + const value_t& value() const + { + if ( status_ == MISS ) + throw std::range_error("There is no such key in cache"); + return val_; + } + + operator const value_t& () const { return value(); } + }; + +public: // functions + explicit lru_cache(size_t max_size) : + _max_size(max_size) { + } + + // If 'key' is present in the cache, returns the associated value, + // otherwise puts the given value into the cache (and returns it with + // a status of a cache miss). + AccessResult getOrPut(const key_t& key, const value_t& value) { + auto it = _cache_items_map.find(key); + if (it != _cache_items_map.end()) { + _cache_items_list.splice(_cache_items_list.begin(), _cache_items_list, it->second); + return AccessResult(it->second->second, HIT); + } else { + putMissing(key, value); + return AccessResult(value, PUT); + } + } + + void put(const key_t& key, const value_t& value) { + auto it = _cache_items_map.find(key); + if (it != _cache_items_map.end()) { + _cache_items_list.splice(_cache_items_list.begin(), _cache_items_list, it->second); + it->second->second = value; + } else { + putMissing(key, value); + } + } + + AccessResult get(const key_t& key) { + auto it = _cache_items_map.find(key); + if (it == _cache_items_map.end()) { + return AccessResult(); + } else { + _cache_items_list.splice(_cache_items_list.begin(), _cache_items_list, it->second); + return AccessResult(it->second->second, HIT); + } + } + + bool drop(const key_t& key) { + try { + auto list_it = _cache_items_map.at(key); + _cache_items_list.erase(list_it); + _cache_items_map.erase(key); + return true; + } catch (std::out_of_range& e) { + return false; + } + } + + bool exists(const key_t& key) const { + return _cache_items_map.find(key) != _cache_items_map.end(); + } + + size_t size() const { + return _cache_items_map.size(); + } + +private: // functions + void putMissing(const key_t& key, const value_t& value) { + assert(_cache_items_map.find(key) == _cache_items_map.end()); + _cache_items_list.push_front(key_value_pair_t(key, value)); + _cache_items_map[key] = _cache_items_list.begin(); + if (_cache_items_map.size() > _max_size) { + _cache_items_map.erase(_cache_items_list.back().first); + _cache_items_list.pop_back(); + } + } + +private: // data + std::list _cache_items_list; + std::map _cache_items_map; + size_t _max_size; +}; + +} // namespace kiwix + +#endif /* _LRUCACHE_HPP_INCLUDED_ */