Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/shortterm cache #229

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion libursa/OnDiskDataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ std::string OnDiskDataset::get_file_name(FileId fid) const {
QueryResult OnDiskDataset::query(const Query &query,
QueryCounters *counters) const {
std::set<PrimitiveQuery> seen;
std::map<std::vector<PrimitiveQuery>, SortedRun> string_cache;
return query.run(
[this, &seen](PrimitiveQuery primitive, QueryCounters *counters) {
std::optional<QueryOperation> operation;
Expand All @@ -90,7 +91,7 @@ QueryResult OnDiskDataset::query(const Query &query,
}
}
},
counters);
&string_cache, counters);
}

void OnDiskDataset::execute(const Query &query, ResultWriter *out,
Expand Down
27 changes: 24 additions & 3 deletions libursa/Query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ void Query::prefetch(int from_index, int howmany, bool only_last,

QueryResult Query::run(const QueryPrimitive &primitive,
const PrefetchFunc &prefetcher,
std::map<std::vector<PrimitiveQuery>, SortedRun> *cache,
QueryCounters *counters) const {
// Case: primitive query - reduces to AND with tokens from query plan.
if (type == QueryType::PRIMITIVE) {
Expand All @@ -244,23 +245,31 @@ QueryResult Query::run(const QueryPrimitive &primitive,

// Case: and. Short circuits when result is already empty.
if (type == QueryType::AND) {
auto cache_key = get_cache_key();
auto cached_it = cache->find(cache_key);
if (cached_it != cache->end()) {
return QueryResult(cached_it->second.clone());
}
auto result = QueryResult::everything();
for (int i = 0; i < queries.size(); i++) {
prefetch(i + 1, PRETECTH_RANGE, true, prefetcher);
const auto &query = queries[i];
result.do_and(query.run(primitive, prefetcher, counters),
result.do_and(query.run(primitive, prefetcher, cache, counters),
&counters->ands());
if (result.is_empty()) {
break;
}
}
if (!cache_key.empty() && !result.is_everything()) {
cache->emplace(std::move(cache_key), result.vector().clone());
}
return result;
}
// Case: or. Short circuits when result is already everything.
if (type == QueryType::OR) {
auto result = QueryResult::empty();
for (auto &query : queries) {
result.do_or(query.run(primitive, prefetcher, counters),
result.do_or(query.run(primitive, prefetcher, cache, counters),
&counters->ors());
if (result.is_everything()) {
break;
Expand All @@ -281,7 +290,8 @@ QueryResult Query::run(const QueryPrimitive &primitive,
int cutoff = count;
int nonempty_sources = queries.size();
for (const auto &query : queries) {
QueryResult next = query.run(primitive, prefetcher, counters);
QueryResult next =
query.run(primitive, prefetcher, cache, counters);
if (next.is_everything()) {
cutoff -= 1;
if (cutoff <= 0) {
Expand All @@ -302,3 +312,14 @@ QueryResult Query::run(const QueryPrimitive &primitive,
}
throw std::runtime_error("Unexpected query type");
}

std::vector<PrimitiveQuery> Query::get_cache_key() const {
std::vector<PrimitiveQuery> result;
for (const auto &query : queries) {
if (query.get_type() != QueryType::PRIMITIVE) {
return {};
}
result.push_back(query.as_ngram());
}
return result;
}
3 changes: 3 additions & 0 deletions libursa/Query.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,12 @@ class Query {

QueryResult run(const QueryPrimitive &primitive,
const PrefetchFunc &prefetch,
std::map<std::vector<PrimitiveQuery>, SortedRun> *cache,
QueryCounters *counters) const;
Query plan(const std::unordered_set<IndexType> &types_to_query) const;

std::vector<PrimitiveQuery> get_cache_key() const;

private:
void prefetch(int from_index, int howmany, bool only_last,
const PrefetchFunc &prefetch) const;
Expand Down
Loading