initial commit

This commit is contained in:
Sascha Kühl 2025-10-10 12:29:17 +02:00
commit 09e10f6e3b
19 changed files with 711 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.idea

8
CMakeLists.txt Normal file
View File

@ -0,0 +1,8 @@
cmake_minimum_required(VERSION 3.10)
project(rsql_parser)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
add_subdirectory(src)

21
README.md Normal file
View File

@ -0,0 +1,21 @@
# RSQL Parser (C++) with SQL WHERE Generator
This small example demonstrates:
- A lexer + parser for a subset of RSQL (supports `;` AND, `,` OR, parentheses).
- `=in=` and `=out=` list operators.
- An AST implementing `accept()` for the Visitor pattern.
- A `SQLBuilderVisitor` that generates an SQL `WHERE` clause.
Build:
```bash
mkdir build && cd build
cmake ..
cmake --build .
```
Run (example):
```bash
./rsql_parser "(status=in=(OPEN,CLOSED);priority==HIGH),category=out=(internal,test)"
```
The program prints the AST (evaluator) and the generated SQL WHERE clause.

View File

@ -0,0 +1,50 @@
#ifndef RSQL_PARSER_BINARY_CONDITION_NODE_HPP
#define RSQL_PARSER_BINARY_CONDITION_NODE_HPP
#include "node.hpp"
#include <map>
#include <string>
namespace matador::rsql {
enum class binary_operator {
EQUALS,
NOT_EQUALS,
GREATER_THAN,
GREATER_THAN_OR_EQUAL,
LESS_THAN,
LESS_THAN_OR_EQUAL,
};
static const std::map<std::string, binary_operator> binary_operators = {
{ "==", binary_operator::EQUALS },
{ "!=", binary_operator::NOT_EQUALS },
{ ">", binary_operator::GREATER_THAN },
{ "=gt*", binary_operator::GREATER_THAN },
{ ">=", binary_operator::GREATER_THAN_OR_EQUAL },
{ "=ge=", binary_operator::GREATER_THAN_OR_EQUAL },
{ "<", binary_operator::LESS_THAN },
{ "=lt=", binary_operator::LESS_THAN },
{ "<=", binary_operator::LESS_THAN_OR_EQUAL },
{ "=le=", binary_operator::LESS_THAN_OR_EQUAL }
};
class node_visitor;
class binary_condition_node final : public node {
public:
binary_condition_node(std::string field, binary_operator op, std::string value);
void accept(node_visitor& visitor) const override;
const std::string& field() const;
binary_operator operand() const;
const std::string& value() const;
private:
std::string field_;
binary_operator op_;
std::string value_;
};
};
#endif //RSQL_PARSER_BINARY_CONDITION_NODE_HPP

View File

@ -0,0 +1,38 @@
#ifndef RSQL_PARSER_COLLECTION_CONDITION_NODE_HPP
#define RSQL_PARSER_COLLECTION_CONDITION_NODE_HPP
#include "node.hpp"
#include <map>
#include <string>
#include <vector>
namespace matador::rsql {
enum class collection_operator {
IN,
OUT
};
static const std::map<std::string, collection_operator> collection_operators = {
{ "=in=", collection_operator::IN },
{ "=out=", collection_operator::OUT }
};
class collection_condition_node final : public node {
public:
collection_condition_node(std::string field, collection_operator op, std::vector<std::string> value);
void accept(node_visitor& visitor) const override;
const std::string& field() const;
collection_operator operand() const;
const std::vector<std::string>& values() const;
private:
std::string field_;
collection_operator op_;
std::vector<std::string> value_;
};
}
#endif //RSQL_PARSER_COLLECTION_CONDITION_NODE_HPP

View File

@ -0,0 +1,13 @@
#ifndef RSQL_PARSER_LEXER_HPP
#define RSQL_PARSER_LEXER_HPP
#include "token.hpp"
#include <regex>
#include <string>
#include <vector>
namespace matador::rsql::lexer {
std::vector<token> tokenize(const std::string& input);
}
#endif //RSQL_PARSER_LEXER_HPP

View File

@ -0,0 +1,33 @@
#ifndef RSQL_PARSER_LOGICAL_NODE_HPP
#define RSQL_PARSER_LOGICAL_NODE_HPP
#include "node.hpp"
#include <string>
#include <vector>
#include <memory>
namespace matador::rsql {
enum class logical_operator {
AND,
OR,
};
class node_visitor;
class logical_node final : public node {
public:
explicit logical_node(logical_operator op);
void accept(node_visitor& visitor) const override;
const std::vector<std::shared_ptr<node>>& children() const;
logical_operator operand() const;
private:
friend class parser;
logical_operator op_; // ";" for AND, "," for OR
std::vector<std::shared_ptr<node>> children_;
};
}
#endif //RSQL_PARSER_LOGICAL_NODE_HPP

View File

@ -0,0 +1,12 @@
#ifndef RSQL_PARSER_NODE_HPP
#define RSQL_PARSER_NODE_HPP
namespace matador::rsql {
class node_visitor;
class node {
public:
virtual ~node() = default;
virtual void accept(node_visitor& visitor) const = 0;
};
}
#endif //RSQL_PARSER_NODE_HPP

View File

@ -0,0 +1,19 @@
#ifndef RSQL_PARSER_NODE_VISITOR_HPP
#define RSQL_PARSER_NODE_VISITOR_HPP
namespace matador::rsql {
class binary_condition_node;
class collection_condition_node;
class logical_node;
class node_visitor {
public:
virtual ~node_visitor() = default;
virtual void visit(const binary_condition_node& node) = 0;
virtual void visit(const collection_condition_node& node) = 0;
virtual void visit(const logical_node& node) = 0;
};
}
#endif //RSQL_PARSER_NODE_VISITOR_HPP

View File

@ -0,0 +1,39 @@
#ifndef RSQL_PARSER_PARSER_HPP
#define RSQL_PARSER_PARSER_HPP
#include "token.hpp"
#include "token_type.hpp"
#include <memory>
#include <vector>
namespace matador::rsql {
class node;
class parser {
public:
explicit parser(std::vector<token> t);
std::shared_ptr<node> parse();
private:
// OR level
std::shared_ptr<node> parse_or_expression();
// AND level
std::shared_ptr<node> parse_and_expression();
// Primary: parenthesis or condition
std::shared_ptr<node> parse_primary();
std::shared_ptr<node> parse_condition();
bool match(const std::vector<token_type>& types);
const token& previous() const;
private:
std::vector<token> tokens_;
size_t current_size_ = 0;
};
}
#endif //RSQL_PARSER_PARSER_HPP

View File

@ -0,0 +1,14 @@
#ifndef RSQL_PARSER_TOKEN_HPP
#define RSQL_PARSER_TOKEN_HPP
#include "token_type.hpp"
#include <string>
namespace matador::rsql {
struct token {
token_type type;
std::string value;
};
}
#endif //RSQL_PARSER_TOKEN_HPP

View File

@ -0,0 +1,17 @@
#ifndef RSQL_PARSER_TOKEN_TYPE_HPP
#define RSQL_PARSER_TOKEN_TYPE_HPP
namespace matador::rsql {
enum class token_type {
IDENTIFIER,
OPERATOR,
VALUE,
LOGICAL_AND,
LOGICAL_OR,
OPEN_PAREN,
CLOSE_PAREN,
UNKNOWN
};
}
#endif //RSQL_PARSER_TOKEN_TYPE_HPP

25
src/CMakeLists.txt Normal file
View File

@ -0,0 +1,25 @@
SET(SOURCE
../include/matador/rsql/binary_condition_node.hpp
../include/matador/rsql/collection_condition_node.hpp
../include/matador/rsql/lexer.hpp
../include/matador/rsql/logical_node.hpp
../include/matador/rsql/node.hpp
../include/matador/rsql/node_visitor.hpp
../include/matador/rsql/parser.hpp
../include/matador/rsql/token.hpp
../include/matador/rsql/token_type.hpp
rsql/binary_condition_node.cpp
rsql/collection_condition_node.cpp
rsql/lexer.cpp
rsql/logical_node.cpp
rsql/parser.cpp
)
add_library(matador-rsql STATIC ${SOURCE})
target_include_directories(matador-rsql PUBLIC ../include)
add_executable(rsql_parser main.cpp)
target_include_directories(rsql_parser PUBLIC ../include)
target_link_libraries(rsql_parser matador-rsql)

198
src/main.cpp Normal file
View File

@ -0,0 +1,198 @@
#include "matador/rsql/binary_condition_node.hpp"
#include "matador/rsql/collection_condition_node.hpp"
#include "matador/rsql/lexer.hpp"
#include "matador/rsql/logical_node.hpp"
#include "matador/rsql/node_visitor.hpp"
#include "matador/rsql/parser.hpp"
#include <iostream>
// ---------------- Evaluator Visitor (prints AST) ----------------
class Evaluator final : public matador::rsql::node_visitor {
public:
void visit(const matador::rsql::binary_condition_node& node) override {
std::cout << "Condition: " << node.field() << " " << to_string(node.operand()) << " " << node.value() << "\n";
}
void visit( const matador::rsql::collection_condition_node& node ) override {
std::cout << "Condition: " << node.field() << " " << to_string(node.operand()) << " ";
std::cout << "(";
for (size_t i = 0; i < node.values().size(); ++i) {
std::cout << node.values()[i];
if (i + 1 < node.values().size()) std::cout << ",";
}
std::cout << ")\n";
}
void visit(const matador::rsql::logical_node& node) override {
std::cout << "Logical (" << (node.operand() == matador::rsql::logical_operator::AND ? "AND" : "OR") << ")\n";
for (const auto& child : node.children()) {
child->accept(*this);
}
}
private:
static std::string to_string(const matador::rsql::binary_operator op) {
static std::map<matador::rsql::binary_operator, std::string> bin_to_string {
{ matador::rsql::binary_operator::EQUALS, "==" },
{ matador::rsql::binary_operator::NOT_EQUALS, "!=" },
{ matador::rsql::binary_operator::GREATER_THAN, ">" },
{ matador::rsql::binary_operator::GREATER_THAN_OR_EQUAL, ">=" },
{ matador::rsql::binary_operator::LESS_THAN, "<" },
{ matador::rsql::binary_operator::LESS_THAN_OR_EQUAL, "<=" },
};
return bin_to_string.at(op);
}
static std::string to_string(const matador::rsql::collection_operator op) {
static std::map<matador::rsql::collection_operator, std::string> col_to_string {
{ matador::rsql::collection_operator::IN, "=IN=" },
{ matador::rsql::collection_operator::OUT, "=OUT=" }
};
return col_to_string.at(op);
}
};
// ---------------- SQLBuilderVisitor ----------------
class SQLBuilderVisitor final : public matador::rsql::node_visitor {
public:
std::string result;
void visit(const matador::rsql::binary_condition_node& node) override {
result += "(" + field(node.field()) + " " + binary_op_string(node.operand(), node.value()) + ")";
}
void visit( const matador::rsql::collection_condition_node& node ) override {
result += "(" + field(node.field()) + " " + collection_op_string(node.operand(), node.values()) + ")";
}
void visit(const matador::rsql::logical_node& node) override {
// Build children SQL and join
std::vector<std::string> parts;
for (const auto& child : node.children()) {
SQLBuilderVisitor sub;
child->accept(sub);
parts.push_back(sub.result);
}
const std::string operand = (node.operand() == matador::rsql::logical_operator::AND ? " AND " : " OR ");
result += "(";
for (size_t i = 0; i < parts.size(); ++i) {
result += parts[i];
if (i + 1 < parts.size()) {
result += operand;
}
}
result += ")";
}
private:
static std::string escape_sql(const std::string& s) {
std::string out;
out.reserve(s.size() + 2);
for (const char c : s) {
if (c == '\'') out.push_back('\''); // double single-quote
out.push_back(c);
}
return out;
}
static bool is_number(const std::string& s) {
if (s.empty()) {
return false;
}
// allow optional leading '-' and digits, optional decimal point
size_t i = 0;
if (s[0] == '-') i = 1;
bool seenDigit = false;
bool seenDot = false;
for (; i < s.size(); ++i) {
if (std::isdigit(static_cast<unsigned char>(s[i]))) seenDigit = true;
else if (s[i] == '.' && !seenDot) seenDot = true;
else return false;
}
return seenDigit;
}
static std::string quote(const std::string& str) {
if (is_number(str)) {
return str;
}
return std::string("'") + escape_sql(str) + std::string("'");
}
static std::string field(const std::string& f) {
// Todo: add quoting rules
return f;
}
static std::string binary_op_string(const matador::rsql::binary_operator op, const std::string& val) {
using namespace matador::rsql;
switch (op) {
case binary_operator::EQUALS:
return "= " + quote(val);
case binary_operator::NOT_EQUALS:
return "<> " + quote(val);
case binary_operator::GREATER_THAN:
return "> " + quote(val);
case binary_operator::GREATER_THAN_OR_EQUAL:
return ">= " + quote(val);
case binary_operator::LESS_THAN:
return "< " + quote(val);
case binary_operator::LESS_THAN_OR_EQUAL:
return "<= " + quote(val);
default:
return "??";
}
}
static std::string collection_op_string(const matador::rsql::collection_operator op, const std::vector<std::string>& list) {
if (list.empty()) {
return (op == matador::rsql::collection_operator::OUT ? "NOT IN (NULL)" : "IN (NULL)");
}
std::string joined;
joined += "(";
for (size_t i = 0; i < list.size(); ++i) {
joined += quote(list[i]);
if (i + 1 < list.size()) joined += ", ";
}
joined += ")";
if (op == matador::rsql::collection_operator::OUT) {
return "IN " + joined;
}
return "NOT IN " + joined;
}
};
// ---------------- Main / usage ----------------
int main(const int argc, char** argv) {
using namespace matador::rsql;
std::string input;
if (argc >= 2) {
input = argv[1];
} else {
// example default
input = "(status=in=(OPEN,CLOSED);priority==HIGH),category=out=(internal,test)";
std::cout << "No query provided, using default example:\n" << input << "\n\n";
}
try {
const auto tokens = lexer::tokenize(input);
parser p(tokens);
const auto ast = p.parse();
std::cout << "=== AST (Evaluator) ===\n";
Evaluator ev;
ast->accept(ev);
std::cout << "\n=== SQL WHERE clause ===\n";
SQLBuilderVisitor sql;
ast->accept(sql);
std::cout << sql.result << "\n";
} catch (const std::exception& ex) {
std::cerr << "Error: " << ex.what() << "\n";
return 2;
}
return 0;
}

View File

@ -0,0 +1,26 @@
#include "matador/rsql/binary_condition_node.hpp"
#include "matador/rsql/node_visitor.hpp"
namespace matador::rsql {
binary_condition_node::binary_condition_node(std::string field, const binary_operator op, std::string value)
: field_(std::move(field))
, op_(op)
, value_(std::move(value))
{}
inline void binary_condition_node::accept( node_visitor& visitor ) const {
visitor.visit(*this);
}
const std::string& binary_condition_node::field() const {
return field_;
}
binary_operator binary_condition_node::operand() const {
return op_;
}
const std::string& binary_condition_node::value() const {
return value_;
}
}

View File

@ -0,0 +1,27 @@
#include "matador/rsql/collection_condition_node.hpp"
#include "matador/rsql/node_visitor.hpp"
namespace matador::rsql {
collection_condition_node::collection_condition_node(std::string field, const collection_operator op, std::vector<std::string> value)
: field_(std::move(field))
, op_(op)
, value_(std::move(value))
{}
void collection_condition_node::accept(node_visitor& visitor) const {
visitor.visit(*this);
}
const std::string& collection_condition_node::field() const {
return field_;
}
collection_operator collection_condition_node::operand() const {
return op_;
}
const std::vector<std::string>& collection_condition_node::values() const {
return value_;
}
}

38
src/rsql/lexer.cpp Normal file
View File

@ -0,0 +1,38 @@
#include "matador/rsql/lexer.hpp"
#include "matador/rsql/token_type.hpp"
namespace matador::rsql::lexer {
std::vector<token> tokenize( const std::string& input ) {
std::vector<token> tokens;
// Pattern catches: =in=, =out=, operators, quoted strings, identifiers, parentheses, commas/semicolons
std::regex tokenRegex(R"((=in=|=out=|[=!><~]+)|('[^']*'|"[^"]*")|(\()|(\))|(;|,)|([A-Za-z_]\w*)|([^ \t\n\r]+))");
auto begin = std::sregex_iterator(input.begin(), input.end(), tokenRegex);
auto end = std::sregex_iterator();
for (auto it = begin; it != end; ++it) {
const std::smatch& m = *it;
if (std::string match = m.str(); match == ";") {
tokens.push_back({token_type::LOGICAL_AND, match});
} else if (match == ",") {
tokens.push_back({token_type::LOGICAL_OR, match});
} else if (match == "(") {
tokens.push_back({token_type::OPEN_PAREN, match});
} else if (match == ")") {
tokens.push_back({token_type::CLOSE_PAREN, match});
} else if (std::regex_match(match, std::regex(R"(=in=|=out=|[=!><~]+)"))) {
tokens.push_back({token_type::OPERATOR, match});
} else if ((match.size() >= 2 && ((match.front() == '\'' && match.back() == '\'') ||
(match.front() == '"' && match.back() == '"')))) {
tokens.push_back({token_type::VALUE, match.substr(1, match.size() - 2)});
} else if (std::regex_match(match, std::regex(R"([A-Za-z_]\w*)"))) {
tokens.push_back({token_type::IDENTIFIER, match});
} else {
tokens.push_back({token_type::VALUE, match});
}
}
return tokens;
}
}

21
src/rsql/logical_node.cpp Normal file
View File

@ -0,0 +1,21 @@
#include "matador/rsql/logical_node.hpp"
#include "matador/rsql/node_visitor.hpp"
namespace matador::rsql {
logical_node::logical_node(const logical_operator op)
: op_(op) {}
void logical_node::accept( node_visitor& visitor ) const {
visitor.visit(*this);
}
logical_operator logical_node::operand() const {
return op_;
}
const std::vector<std::shared_ptr<node>>& logical_node::children() const {
return children_;
}
}

111
src/rsql/parser.cpp Normal file
View File

@ -0,0 +1,111 @@
#include "matador/rsql/parser.hpp"
#include "matador/rsql/binary_condition_node.hpp"
#include "matador/rsql/collection_condition_node.hpp"
#include "matador/rsql/logical_node.hpp"
#include <map>
#include <stdexcept>
namespace matador::rsql {
parser::parser( std::vector<token> t )
: tokens_(std::move(t)) {}
std::shared_ptr<node> parser::parse() {
if (tokens_.empty()) {
throw std::runtime_error("Empty input");
}
auto res = parse_or_expression();
if (current_size_ != tokens_.size()) {
throw std::runtime_error("Unexpected token after end: " + tokens_[current_size_].value);
}
return res;
}
std::shared_ptr<node> parser::parse_or_expression() {
auto node = parse_and_expression();
while (match({token_type::LOGICAL_OR})) {
std::string op = previous().value;
const auto logical = std::make_shared<logical_node>(logical_operator::OR);
logical->children_.push_back(node);
logical->children_.push_back(parse_and_expression());
node = logical;
}
return node;
}
std::shared_ptr<node> parser::parse_and_expression() {
auto node = parse_primary();
while (match({token_type::LOGICAL_AND})) {
std::string op = previous().value;
const auto logical = std::make_shared<logical_node>(logical_operator::AND);
logical->children_.push_back(node);
logical->children_.push_back(parse_primary());
node = logical;
}
return node;
}
std::shared_ptr<node> parser::parse_primary() {
if (match({token_type::OPEN_PAREN})) {
auto node = parse_or_expression();
if (!match({token_type::CLOSE_PAREN})) {
throw std::runtime_error("Missing closing parenthesis");
}
return node;
}
return parse_condition();
}
std::shared_ptr<node> parser::parse_condition() {
if (!match({token_type::IDENTIFIER})) {
throw std::runtime_error("Expected field name");
}
std::string field = previous().value;
if (!match({token_type::OPERATOR})) {
throw std::runtime_error("Expected operator after field");
}
const std::string op = previous().value;
if (op == "=in=" || op == "=out=") {
if (!match({token_type::OPEN_PAREN})) {
throw std::runtime_error("Expected '(' after " + op);
}
std::vector<std::string> values;
bool first = true;
while (!match({token_type::CLOSE_PAREN})) {
if (!first) {
// accept comma as separator: tokens use LOGICAL_OR for comma in lexer
if (!match({token_type::LOGICAL_OR})) {
throw std::runtime_error("Expected ',' between list values");
}
}
if (!match({token_type::VALUE, token_type::IDENTIFIER})) {
throw std::runtime_error("Expected value in list");
}
values.push_back(previous().value);
first = false;
}
return std::make_shared<collection_condition_node>(field, collection_operators.at(op), values);
}
if (!match({token_type::VALUE, token_type::IDENTIFIER})) {
throw std::runtime_error("Expected value after operator");
}
return std::make_shared<binary_condition_node>(field, binary_operators.at(op), previous().value);
}
bool parser::match( const std::vector<token_type>& types ) {
if (current_size_ < tokens_.size() &&
std::find(types.begin(), types.end(), tokens_[current_size_].type) != types.end()) {
++current_size_;
return true;
}
return false;
}
const token& parser::previous() const {
return tokens_[current_size_ - 1];
}
}