Compare commits

...

No commits in common. "f35f574b2ae1140de4b2525f52bed9a9d8053164" and "617ca05b2d9b619e50c35418adef30abd8fdb7f4" have entirely different histories.

20 changed files with 712 additions and 11 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
.idea

8
CMakeLists.txt Normal file
View File

@ -0,0 +1,8 @@
cmake_minimum_required(VERSION 3.10)
project(rsql_parser)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
add_subdirectory(src)

View File

@ -1,9 +0,0 @@
MIT License
Copyright (c) 2025 sascha
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -1,3 +1,23 @@
# rsql-parser # RSQL Parser (C++) with SQL WHERE Generator
A simple C++ RSQL parser. It uses the visitor patter to allow using it for your custom needs. A simple C++ RSQL parser. It uses the visitor patter to allow using it for your custom needs.
This small example demonstrates:
- A lexer + parser for a subset of RSQL (supports `;` AND, `,` OR, parentheses).
- `=in=` and `=out=` list operators.
- An AST implementing `accept()` for the Visitor pattern.
- A `SQLBuilderVisitor` that generates an SQL `WHERE` clause.
Build:
```bash
mkdir build && cd build
cmake ..
cmake --build .
```
Run (example):
```bash
./rsql_parser "(status=in=(OPEN,CLOSED);priority==HIGH),category=out=(internal,test)"
```
The program prints the AST (evaluator) and the generated SQL WHERE clause.

View File

@ -0,0 +1,50 @@
#ifndef RSQL_PARSER_BINARY_CONDITION_NODE_HPP
#define RSQL_PARSER_BINARY_CONDITION_NODE_HPP
#include "node.hpp"
#include <map>
#include <string>
namespace matador::rsql {
enum class binary_operator {
EQUALS,
NOT_EQUALS,
GREATER_THAN,
GREATER_THAN_OR_EQUAL,
LESS_THAN,
LESS_THAN_OR_EQUAL,
};
static const std::map<std::string, binary_operator> binary_operators = {
{ "==", binary_operator::EQUALS },
{ "!=", binary_operator::NOT_EQUALS },
{ ">", binary_operator::GREATER_THAN },
{ "=gt*", binary_operator::GREATER_THAN },
{ ">=", binary_operator::GREATER_THAN_OR_EQUAL },
{ "=ge=", binary_operator::GREATER_THAN_OR_EQUAL },
{ "<", binary_operator::LESS_THAN },
{ "=lt=", binary_operator::LESS_THAN },
{ "<=", binary_operator::LESS_THAN_OR_EQUAL },
{ "=le=", binary_operator::LESS_THAN_OR_EQUAL }
};
class node_visitor;
class binary_condition_node final : public node {
public:
binary_condition_node(std::string field, binary_operator op, std::string value);
void accept(node_visitor& visitor) const override;
const std::string& field() const;
binary_operator operand() const;
const std::string& value() const;
private:
std::string field_;
binary_operator op_;
std::string value_;
};
};
#endif //RSQL_PARSER_BINARY_CONDITION_NODE_HPP

View File

@ -0,0 +1,38 @@
#ifndef RSQL_PARSER_COLLECTION_CONDITION_NODE_HPP
#define RSQL_PARSER_COLLECTION_CONDITION_NODE_HPP
#include "node.hpp"
#include <map>
#include <string>
#include <vector>
namespace matador::rsql {
enum class collection_operator {
IN,
OUT
};
static const std::map<std::string, collection_operator> collection_operators = {
{ "=in=", collection_operator::IN },
{ "=out=", collection_operator::OUT }
};
class collection_condition_node final : public node {
public:
collection_condition_node(std::string field, collection_operator op, std::vector<std::string> value);
void accept(node_visitor& visitor) const override;
const std::string& field() const;
collection_operator operand() const;
const std::vector<std::string>& values() const;
private:
std::string field_;
collection_operator op_;
std::vector<std::string> value_;
};
}
#endif //RSQL_PARSER_COLLECTION_CONDITION_NODE_HPP

View File

@ -0,0 +1,13 @@
#ifndef RSQL_PARSER_LEXER_HPP
#define RSQL_PARSER_LEXER_HPP
#include "token.hpp"
#include <regex>
#include <string>
#include <vector>
namespace matador::rsql::lexer {
std::vector<token> tokenize(const std::string& input);
}
#endif //RSQL_PARSER_LEXER_HPP

View File

@ -0,0 +1,33 @@
#ifndef RSQL_PARSER_LOGICAL_NODE_HPP
#define RSQL_PARSER_LOGICAL_NODE_HPP
#include "node.hpp"
#include <string>
#include <vector>
#include <memory>
namespace matador::rsql {
enum class logical_operator {
AND,
OR,
};
class node_visitor;
class logical_node final : public node {
public:
explicit logical_node(logical_operator op);
void accept(node_visitor& visitor) const override;
const std::vector<std::shared_ptr<node>>& children() const;
logical_operator operand() const;
private:
friend class parser;
logical_operator op_; // ";" for AND, "," for OR
std::vector<std::shared_ptr<node>> children_;
};
}
#endif //RSQL_PARSER_LOGICAL_NODE_HPP

View File

@ -0,0 +1,12 @@
#ifndef RSQL_PARSER_NODE_HPP
#define RSQL_PARSER_NODE_HPP
namespace matador::rsql {
class node_visitor;
class node {
public:
virtual ~node() = default;
virtual void accept(node_visitor& visitor) const = 0;
};
}
#endif //RSQL_PARSER_NODE_HPP

View File

@ -0,0 +1,19 @@
#ifndef RSQL_PARSER_NODE_VISITOR_HPP
#define RSQL_PARSER_NODE_VISITOR_HPP
namespace matador::rsql {
class binary_condition_node;
class collection_condition_node;
class logical_node;
class node_visitor {
public:
virtual ~node_visitor() = default;
virtual void visit(const binary_condition_node& node) = 0;
virtual void visit(const collection_condition_node& node) = 0;
virtual void visit(const logical_node& node) = 0;
};
}
#endif //RSQL_PARSER_NODE_VISITOR_HPP

View File

@ -0,0 +1,39 @@
#ifndef RSQL_PARSER_PARSER_HPP
#define RSQL_PARSER_PARSER_HPP
#include "token.hpp"
#include "token_type.hpp"
#include <memory>
#include <vector>
namespace matador::rsql {
class node;
class parser {
public:
explicit parser(std::vector<token> t);
std::shared_ptr<node> parse();
private:
// OR level
std::shared_ptr<node> parse_or_expression();
// AND level
std::shared_ptr<node> parse_and_expression();
// Primary: parenthesis or condition
std::shared_ptr<node> parse_primary();
std::shared_ptr<node> parse_condition();
bool match(const std::vector<token_type>& types);
const token& previous() const;
private:
std::vector<token> tokens_;
size_t current_size_ = 0;
};
}
#endif //RSQL_PARSER_PARSER_HPP

View File

@ -0,0 +1,14 @@
#ifndef RSQL_PARSER_TOKEN_HPP
#define RSQL_PARSER_TOKEN_HPP
#include "token_type.hpp"
#include <string>
namespace matador::rsql {
struct token {
token_type type;
std::string value;
};
}
#endif //RSQL_PARSER_TOKEN_HPP

View File

@ -0,0 +1,17 @@
#ifndef RSQL_PARSER_TOKEN_TYPE_HPP
#define RSQL_PARSER_TOKEN_TYPE_HPP
namespace matador::rsql {
enum class token_type {
IDENTIFIER,
OPERATOR,
VALUE,
LOGICAL_AND,
LOGICAL_OR,
OPEN_PAREN,
CLOSE_PAREN,
UNKNOWN
};
}
#endif //RSQL_PARSER_TOKEN_TYPE_HPP

25
src/CMakeLists.txt Normal file
View File

@ -0,0 +1,25 @@
SET(SOURCE
../include/matador/rsql/binary_condition_node.hpp
../include/matador/rsql/collection_condition_node.hpp
../include/matador/rsql/lexer.hpp
../include/matador/rsql/logical_node.hpp
../include/matador/rsql/node.hpp
../include/matador/rsql/node_visitor.hpp
../include/matador/rsql/parser.hpp
../include/matador/rsql/token.hpp
../include/matador/rsql/token_type.hpp
rsql/binary_condition_node.cpp
rsql/collection_condition_node.cpp
rsql/lexer.cpp
rsql/logical_node.cpp
rsql/parser.cpp
)
add_library(matador-rsql STATIC ${SOURCE})
target_include_directories(matador-rsql PUBLIC ../include)
add_executable(rsql_parser main.cpp)
target_include_directories(rsql_parser PUBLIC ../include)
target_link_libraries(rsql_parser matador-rsql)

198
src/main.cpp Normal file
View File

@ -0,0 +1,198 @@
#include "matador/rsql/binary_condition_node.hpp"
#include "matador/rsql/collection_condition_node.hpp"
#include "matador/rsql/lexer.hpp"
#include "matador/rsql/logical_node.hpp"
#include "matador/rsql/node_visitor.hpp"
#include "matador/rsql/parser.hpp"
#include <iostream>
// ---------------- Evaluator Visitor (prints AST) ----------------
class Evaluator final : public matador::rsql::node_visitor {
public:
void visit(const matador::rsql::binary_condition_node& node) override {
std::cout << "Condition: " << node.field() << " " << to_string(node.operand()) << " " << node.value() << "\n";
}
void visit( const matador::rsql::collection_condition_node& node ) override {
std::cout << "Condition: " << node.field() << " " << to_string(node.operand()) << " ";
std::cout << "(";
for (size_t i = 0; i < node.values().size(); ++i) {
std::cout << node.values()[i];
if (i + 1 < node.values().size()) std::cout << ",";
}
std::cout << ")\n";
}
void visit(const matador::rsql::logical_node& node) override {
std::cout << "Logical (" << (node.operand() == matador::rsql::logical_operator::AND ? "AND" : "OR") << ")\n";
for (const auto& child : node.children()) {
child->accept(*this);
}
}
private:
static std::string to_string(const matador::rsql::binary_operator op) {
static std::map<matador::rsql::binary_operator, std::string> bin_to_string {
{ matador::rsql::binary_operator::EQUALS, "==" },
{ matador::rsql::binary_operator::NOT_EQUALS, "!=" },
{ matador::rsql::binary_operator::GREATER_THAN, ">" },
{ matador::rsql::binary_operator::GREATER_THAN_OR_EQUAL, ">=" },
{ matador::rsql::binary_operator::LESS_THAN, "<" },
{ matador::rsql::binary_operator::LESS_THAN_OR_EQUAL, "<=" },
};
return bin_to_string.at(op);
}
static std::string to_string(const matador::rsql::collection_operator op) {
static std::map<matador::rsql::collection_operator, std::string> col_to_string {
{ matador::rsql::collection_operator::IN, "=IN=" },
{ matador::rsql::collection_operator::OUT, "=OUT=" }
};
return col_to_string.at(op);
}
};
// ---------------- SQLBuilderVisitor ----------------
class SQLBuilderVisitor final : public matador::rsql::node_visitor {
public:
std::string result;
void visit(const matador::rsql::binary_condition_node& node) override {
result += "(" + field(node.field()) + " " + binary_op_string(node.operand(), node.value()) + ")";
}
void visit( const matador::rsql::collection_condition_node& node ) override {
result += "(" + field(node.field()) + " " + collection_op_string(node.operand(), node.values()) + ")";
}
void visit(const matador::rsql::logical_node& node) override {
// Build children SQL and join
std::vector<std::string> parts;
for (const auto& child : node.children()) {
SQLBuilderVisitor sub;
child->accept(sub);
parts.push_back(sub.result);
}
const std::string operand = (node.operand() == matador::rsql::logical_operator::AND ? " AND " : " OR ");
result += "(";
for (size_t i = 0; i < parts.size(); ++i) {
result += parts[i];
if (i + 1 < parts.size()) {
result += operand;
}
}
result += ")";
}
private:
static std::string escape_sql(const std::string& s) {
std::string out;
out.reserve(s.size() + 2);
for (const char c : s) {
if (c == '\'') out.push_back('\''); // double single-quote
out.push_back(c);
}
return out;
}
static bool is_number(const std::string& s) {
if (s.empty()) {
return false;
}
// allow optional leading '-' and digits, optional decimal point
size_t i = 0;
if (s[0] == '-') i = 1;
bool seenDigit = false;
bool seenDot = false;
for (; i < s.size(); ++i) {
if (std::isdigit(static_cast<unsigned char>(s[i]))) seenDigit = true;
else if (s[i] == '.' && !seenDot) seenDot = true;
else return false;
}
return seenDigit;
}
static std::string quote(const std::string& str) {
if (is_number(str)) {
return str;
}
return std::string("'") + escape_sql(str) + std::string("'");
}
static std::string field(const std::string& f) {
// Todo: add quoting rules
return f;
}
static std::string binary_op_string(const matador::rsql::binary_operator op, const std::string& val) {
using namespace matador::rsql;
switch (op) {
case binary_operator::EQUALS:
return "= " + quote(val);
case binary_operator::NOT_EQUALS:
return "<> " + quote(val);
case binary_operator::GREATER_THAN:
return "> " + quote(val);
case binary_operator::GREATER_THAN_OR_EQUAL:
return ">= " + quote(val);
case binary_operator::LESS_THAN:
return "< " + quote(val);
case binary_operator::LESS_THAN_OR_EQUAL:
return "<= " + quote(val);
default:
return "??";
}
}
static std::string collection_op_string(const matador::rsql::collection_operator op, const std::vector<std::string>& list) {
if (list.empty()) {
return (op == matador::rsql::collection_operator::OUT ? "NOT IN (NULL)" : "IN (NULL)");
}
std::string joined;
joined += "(";
for (size_t i = 0; i < list.size(); ++i) {
joined += quote(list[i]);
if (i + 1 < list.size()) joined += ", ";
}
joined += ")";
if (op == matador::rsql::collection_operator::OUT) {
return "IN " + joined;
}
return "NOT IN " + joined;
}
};
// ---------------- Main / usage ----------------
int main(const int argc, char** argv) {
using namespace matador::rsql;
std::string input;
if (argc >= 2) {
input = argv[1];
} else {
// example default
input = "(status=in=(OPEN,CLOSED);priority==HIGH),category=out=(internal,test)";
std::cout << "No query provided, using default example:\n" << input << "\n\n";
}
try {
const auto tokens = lexer::tokenize(input);
parser p(tokens);
const auto ast = p.parse();
std::cout << "=== AST (Evaluator) ===\n";
Evaluator ev;
ast->accept(ev);
std::cout << "\n=== SQL WHERE clause ===\n";
SQLBuilderVisitor sql;
ast->accept(sql);
std::cout << sql.result << "\n";
} catch (const std::exception& ex) {
std::cerr << "Error: " << ex.what() << "\n";
return 2;
}
return 0;
}

View File

@ -0,0 +1,26 @@
#include "matador/rsql/binary_condition_node.hpp"
#include "matador/rsql/node_visitor.hpp"
namespace matador::rsql {
binary_condition_node::binary_condition_node(std::string field, const binary_operator op, std::string value)
: field_(std::move(field))
, op_(op)
, value_(std::move(value))
{}
inline void binary_condition_node::accept( node_visitor& visitor ) const {
visitor.visit(*this);
}
const std::string& binary_condition_node::field() const {
return field_;
}
binary_operator binary_condition_node::operand() const {
return op_;
}
const std::string& binary_condition_node::value() const {
return value_;
}
}

View File

@ -0,0 +1,27 @@
#include "matador/rsql/collection_condition_node.hpp"
#include "matador/rsql/node_visitor.hpp"
namespace matador::rsql {
collection_condition_node::collection_condition_node(std::string field, const collection_operator op, std::vector<std::string> value)
: field_(std::move(field))
, op_(op)
, value_(std::move(value))
{}
void collection_condition_node::accept(node_visitor& visitor) const {
visitor.visit(*this);
}
const std::string& collection_condition_node::field() const {
return field_;
}
collection_operator collection_condition_node::operand() const {
return op_;
}
const std::vector<std::string>& collection_condition_node::values() const {
return value_;
}
}

38
src/rsql/lexer.cpp Normal file
View File

@ -0,0 +1,38 @@
#include "matador/rsql/lexer.hpp"
#include "matador/rsql/token_type.hpp"
namespace matador::rsql::lexer {
std::vector<token> tokenize( const std::string& input ) {
std::vector<token> tokens;
// Pattern catches: =in=, =out=, operators, quoted strings, identifiers, parentheses, commas/semicolons
std::regex tokenRegex(R"((=in=|=out=|[=!><~]+)|('[^']*'|"[^"]*")|(\()|(\))|(;|,)|([A-Za-z_]\w*)|([^ \t\n\r]+))");
auto begin = std::sregex_iterator(input.begin(), input.end(), tokenRegex);
auto end = std::sregex_iterator();
for (auto it = begin; it != end; ++it) {
const std::smatch& m = *it;
if (std::string match = m.str(); match == ";") {
tokens.push_back({token_type::LOGICAL_AND, match});
} else if (match == ",") {
tokens.push_back({token_type::LOGICAL_OR, match});
} else if (match == "(") {
tokens.push_back({token_type::OPEN_PAREN, match});
} else if (match == ")") {
tokens.push_back({token_type::CLOSE_PAREN, match});
} else if (std::regex_match(match, std::regex(R"(=in=|=out=|[=!><~]+)"))) {
tokens.push_back({token_type::OPERATOR, match});
} else if ((match.size() >= 2 && ((match.front() == '\'' && match.back() == '\'') ||
(match.front() == '"' && match.back() == '"')))) {
tokens.push_back({token_type::VALUE, match.substr(1, match.size() - 2)});
} else if (std::regex_match(match, std::regex(R"([A-Za-z_]\w*)"))) {
tokens.push_back({token_type::IDENTIFIER, match});
} else {
tokens.push_back({token_type::VALUE, match});
}
}
return tokens;
}
}

21
src/rsql/logical_node.cpp Normal file
View File

@ -0,0 +1,21 @@
#include "matador/rsql/logical_node.hpp"
#include "matador/rsql/node_visitor.hpp"
namespace matador::rsql {
logical_node::logical_node(const logical_operator op)
: op_(op) {}
void logical_node::accept( node_visitor& visitor ) const {
visitor.visit(*this);
}
logical_operator logical_node::operand() const {
return op_;
}
const std::vector<std::shared_ptr<node>>& logical_node::children() const {
return children_;
}
}

111
src/rsql/parser.cpp Normal file
View File

@ -0,0 +1,111 @@
#include "matador/rsql/parser.hpp"
#include "matador/rsql/binary_condition_node.hpp"
#include "matador/rsql/collection_condition_node.hpp"
#include "matador/rsql/logical_node.hpp"
#include <map>
#include <stdexcept>
namespace matador::rsql {
parser::parser( std::vector<token> t )
: tokens_(std::move(t)) {}
std::shared_ptr<node> parser::parse() {
if (tokens_.empty()) {
throw std::runtime_error("Empty input");
}
auto res = parse_or_expression();
if (current_size_ != tokens_.size()) {
throw std::runtime_error("Unexpected token after end: " + tokens_[current_size_].value);
}
return res;
}
std::shared_ptr<node> parser::parse_or_expression() {
auto node = parse_and_expression();
while (match({token_type::LOGICAL_OR})) {
std::string op = previous().value;
const auto logical = std::make_shared<logical_node>(logical_operator::OR);
logical->children_.push_back(node);
logical->children_.push_back(parse_and_expression());
node = logical;
}
return node;
}
std::shared_ptr<node> parser::parse_and_expression() {
auto node = parse_primary();
while (match({token_type::LOGICAL_AND})) {
std::string op = previous().value;
const auto logical = std::make_shared<logical_node>(logical_operator::AND);
logical->children_.push_back(node);
logical->children_.push_back(parse_primary());
node = logical;
}
return node;
}
std::shared_ptr<node> parser::parse_primary() {
if (match({token_type::OPEN_PAREN})) {
auto node = parse_or_expression();
if (!match({token_type::CLOSE_PAREN})) {
throw std::runtime_error("Missing closing parenthesis");
}
return node;
}
return parse_condition();
}
std::shared_ptr<node> parser::parse_condition() {
if (!match({token_type::IDENTIFIER})) {
throw std::runtime_error("Expected field name");
}
std::string field = previous().value;
if (!match({token_type::OPERATOR})) {
throw std::runtime_error("Expected operator after field");
}
const std::string op = previous().value;
if (op == "=in=" || op == "=out=") {
if (!match({token_type::OPEN_PAREN})) {
throw std::runtime_error("Expected '(' after " + op);
}
std::vector<std::string> values;
bool first = true;
while (!match({token_type::CLOSE_PAREN})) {
if (!first) {
// accept comma as separator: tokens use LOGICAL_OR for comma in lexer
if (!match({token_type::LOGICAL_OR})) {
throw std::runtime_error("Expected ',' between list values");
}
}
if (!match({token_type::VALUE, token_type::IDENTIFIER})) {
throw std::runtime_error("Expected value in list");
}
values.push_back(previous().value);
first = false;
}
return std::make_shared<collection_condition_node>(field, collection_operators.at(op), values);
}
if (!match({token_type::VALUE, token_type::IDENTIFIER})) {
throw std::runtime_error("Expected value after operator");
}
return std::make_shared<binary_condition_node>(field, binary_operators.at(op), previous().value);
}
bool parser::match( const std::vector<token_type>& types ) {
if (current_size_ < tokens_.size() &&
std::find(types.begin(), types.end(), tokens_[current_size_].type) != types.end()) {
++current_size_;
return true;
}
return false;
}
const token& parser::previous() const {
return tokens_[current_size_ - 1];
}
}