filterrb.cpp   [plain text]


// © 2018 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html

#include <iostream>
#include <stack>

#include "filterrb.h"
#include "errmsg.h"


const char* PathFilter::kEInclusionNames[] = {
    "INCLUDE",
    "PARTIAL",
    "EXCLUDE"
};


ResKeyPath::ResKeyPath() {}

ResKeyPath::ResKeyPath(const std::string& path, UErrorCode& status) {
    if (path.empty() || path[0] != '/') {
        std::cerr << "genrb error: path must start with /: " << path << std::endl;
        status = U_PARSE_ERROR;
        return;
    }
    if (path.length() == 1) {
        return;
    }
    size_t i;
    size_t j = 0;
    while (true) {
        i = j + 1;
        j = path.find('/', i);
        std::string key = path.substr(i, j - i);
        if (key.empty()) {
            std::cerr << "genrb error: empty subpaths and trailing slashes are not allowed: " << path << std::endl;
            status = U_PARSE_ERROR;
            return;
        }
        push(key);
        if (j == std::string::npos) {
            break;
        }
    }
}

void ResKeyPath::push(const std::string& key) {
    fPath.push_back(key);
}

void ResKeyPath::pop() {
    fPath.pop_back();
}

const std::list<std::string>& ResKeyPath::pieces() const {
    return fPath;
}

std::ostream& operator<<(std::ostream& out, const ResKeyPath& value) {
    if (value.pieces().empty()) {
        out << "/";
    } else for (auto& key : value.pieces()) {
        out << "/" << key;
    }
    return out;
}


PathFilter::~PathFilter() = default;


void SimpleRuleBasedPathFilter::addRule(const std::string& ruleLine, UErrorCode& status) {
    if (ruleLine.empty()) {
        std::cerr << "genrb error: empty filter rules are not allowed" << std::endl;
        status = U_PARSE_ERROR;
        return;
    }
    bool inclusionRule = false;
    if (ruleLine[0] == '+') {
        inclusionRule = true;
    } else if (ruleLine[0] != '-') {
        std::cerr << "genrb error: rules must start with + or -: " << ruleLine << std::endl;
        status = U_PARSE_ERROR;
        return;
    }
    ResKeyPath path(ruleLine.substr(1), status);
    addRule(path, inclusionRule, status);
}

void SimpleRuleBasedPathFilter::addRule(const ResKeyPath& path, bool inclusionRule, UErrorCode& status) {
    if (U_FAILURE(status)) {
        return;
    }
    fRoot.applyRule(path, path.pieces().begin(), inclusionRule, status);
}

PathFilter::EInclusion SimpleRuleBasedPathFilter::match(const ResKeyPath& path) const {
    const Tree* node = &fRoot;

    // defaultResult "bubbles up" the nearest "definite" inclusion/exclusion rule
    EInclusion defaultResult = INCLUDE;
    if (node->fIncluded != PARTIAL) {
        // rules handled here: "+/" and "-/"
        defaultResult = node->fIncluded;
    }

    // isLeaf is whether the filter tree can provide no additional information
    // even if additional subpaths are added to the given key
    bool isLeaf = false;

    for (auto& key : path.pieces()) {
        auto child = node->fChildren.find(key);
        // Leaf case 1: input path descends outside the filter tree
        if (child == node->fChildren.end()) {
            if (node->fWildcard) {
                // A wildcard pattern is present; continue checking
                node = node->fWildcard.get();
            } else {
                isLeaf = true;
                break;
            }
        } else {
            node = &child->second;
        }
        if (node->fIncluded != PARTIAL) {
            defaultResult = node->fIncluded;
        }
    }

    // Leaf case 2: input path exactly matches a filter leaf
    if (node->isLeaf()) {
        isLeaf = true;
    }

    // Always return PARTIAL if we are not at a leaf
    if (!isLeaf) {
        return PARTIAL;
    }

    // If leaf node is PARTIAL, return the default
    if (node->fIncluded == PARTIAL) {
        return defaultResult;
    }

    return node->fIncluded;
}


SimpleRuleBasedPathFilter::Tree::Tree(const Tree& other)
        : fIncluded(other.fIncluded), fChildren(other.fChildren) {
    // Note: can't use the default copy assignment because of the std::unique_ptr
    if (other.fWildcard) {
        fWildcard.reset(new Tree(*other.fWildcard));
    }
}

bool SimpleRuleBasedPathFilter::Tree::isLeaf() const {
    return fChildren.empty() && !fWildcard;
}

void SimpleRuleBasedPathFilter::Tree::applyRule(
        const ResKeyPath& path,
        std::list<std::string>::const_iterator it,
        bool inclusionRule,
        UErrorCode& status) {

    // Base Case
    if (it == path.pieces().end()) {
        if (isVerbose() && (fIncluded != PARTIAL || !isLeaf())) {
            std::cout << "genrb info: rule on path " << path
                << " overrides previous rules" << std::endl;
        }
        fIncluded = inclusionRule ? INCLUDE : EXCLUDE;
        fChildren.clear();
        fWildcard.reset();
        return;
    }

    // Recursive Step
    auto& key = *it;
    if (key == "*") {
        // Case 1: Wildcard
        if (!fWildcard) {
            fWildcard.reset(new Tree());
        }
        // Apply the rule to fWildcard and also to all existing children.
        it++;
        fWildcard->applyRule(path, it, inclusionRule, status);
        for (auto& child : fChildren) {
            child.second.applyRule(path, it, inclusionRule, status);
        }
        it--;

    } else {
        // Case 2: Normal Key
        auto search = fChildren.find(key);
        if (search == fChildren.end()) {
            if (fWildcard) {
                // Deep-copy the existing wildcard tree into the new key
                search = fChildren.emplace(key, Tree(*fWildcard)).first;
            } else {
                search = fChildren.emplace(key, Tree()).first;
            }
        }
        it++;
        search->second.applyRule(path, it, inclusionRule, status);
        it--;
    }
}

void SimpleRuleBasedPathFilter::Tree::print(std::ostream& out, int32_t indent) const {
    for (int32_t i=0; i<indent; i++) out << "\t";
    out << "included: " << kEInclusionNames[fIncluded] << std::endl;
    for (auto& child : fChildren) {
        for (int32_t i=0; i<indent; i++) out << "\t";
        out << child.first << ": {" << std::endl;
        child.second.print(out, indent + 1);
        for (int32_t i=0; i<indent; i++) out << "\t";
        out << "}" << std::endl;
    }
    if (fWildcard) {
        for (int32_t i=0; i<indent; i++) out << "\t";
        out << "* {" << std::endl;
        fWildcard->print(out, indent + 1);
        for (int32_t i=0; i<indent; i++) out << "\t";
        out << "}" << std::endl;
    }
}

void SimpleRuleBasedPathFilter::print(std::ostream& out) const {
    out << "SimpleRuleBasedPathFilter {" << std::endl;
    fRoot.print(out, 1);
    out << "}" << std::endl;
}

std::ostream& operator<<(std::ostream& out, const SimpleRuleBasedPathFilter& value) {
    value.print(out);
    return out;
}