-
Notifications
You must be signed in to change notification settings - Fork 0
olap_analytics
makr-code edited this page Nov 30, 2025
·
1 revision
Status: ✅ Implementiert
Version: 1.0
Datum: 30. November 2025
ThemisDB unterstützt fortgeschrittene OLAP (Online Analytical Processing) Features für Business Intelligence und Datenanalyse.
-
Aggregations-Funktionen
- COUNT, SUM, AVG, MIN, MAX
- STDDEV, VARIANCE
- MEDIAN, PERCENTILE
- COUNT_DISTINCT
- FIRST, LAST
-
Grouping Operators
- Simple GROUP BY
- CUBE (alle Kombinationen)
- ROLLUP (hierarchisch)
- GROUPING SETS (benutzerdefiniert)
-
Window Functions
- PARTITION BY
- ORDER BY
- ROWS PRECEDING/FOLLOWING
-
Columnar Store
- Spaltenorientierte Speicherung
- Vektorisierte Aggregationen
- Column Statistics
-
Materialized Views
- Pre-computed Aggregations
- Manual/Periodic Refresh
- Incremental Updates (geplant)
#include "analytics/olap.h"
using namespace themis::analytics;
OLAPQuery query;
query.collection = "sales";
// Dimensionen
query.dimensions.push_back({"region", "", true});
query.dimensions.push_back({"product", "", true});
// Measures
query.measures.push_back({"total_sales", "amount", Measure::Function::Sum});
query.measures.push_back({"avg_sales", "amount", Measure::Function::Avg});
query.measures.push_back({"order_count", "id", Measure::Function::Count});
// Filter
Filter filter;
filter.field = "year";
filter.op = Filter::Operator::Eq;
filter.value = int64_t(2024);
query.filters.push_back(filter);
// Sortierung
query.sorts.push_back({"total_sales", false, false}); // DESC
// Pagination
query.limit = 100;
query.offset = 0;OLAPEngine engine;
auto result = engine.execute(query);
std::cout << "Rows: " << result.total_rows << std::endl;
std::cout << "Execution time: " << result.execution_time_ms << " ms" << std::endl;
for (const auto& row : result.rows) {
auto region = std::get<std::string>(row.values.at("region"));
auto total = std::get<double>(row.values.at("total_sales"));
std::cout << region << ": " << total << std::endl;
}CUBE generiert alle möglichen Gruppierungskombinationen:
OLAPQuery query;
query.collection = "sales";
query.grouping_mode = OLAPQuery::GroupingMode::Cube;
query.dimensions.push_back({"region", "", true});
query.dimensions.push_back({"product", "", true});
query.dimensions.push_back({"year", "", true});
query.measures.push_back({"total", "amount", Measure::Function::Sum});
// Generiert:
// - (region, product, year) - Detail
// - (region, product) - year aggregiert
// - (region, year) - product aggregiert
// - (product, year) - region aggregiert
// - (region) - product, year aggregiert
// - (product) - region, year aggregiert
// - (year) - region, product aggregiert
// - () - Grand Total
auto cells = engine.executeCube("sales", query.dimensions, query.measures);
for (const auto& cell : cells) {
std::cout << "Grouping ID: " << cell.grouping_id << std::endl;
for (const auto& [dim, value] : cell.dimensions) {
if (value) {
std::cout << " " << dim << ": " << *value << std::endl;
} else {
std::cout << " " << dim << ": (ALL)" << std::endl;
}
}
std::cout << " Total: " << cell.measures.at("total") << std::endl;
}ROLLUP generiert hierarchische Aggregationen:
OLAPQuery query;
query.collection = "sales";
query.grouping_mode = OLAPQuery::GroupingMode::Rollup;
// Hierarchie: Jahr > Quartal > Monat
query.dimensions.push_back({"year", "", true});
query.dimensions.push_back({"quarter", "", true});
query.dimensions.push_back({"month", "", true});
query.measures.push_back({"total", "amount", Measure::Function::Sum});
// Generiert:
// - (year, quarter, month) - Detail
// - (year, quarter) - Monatssummen
// - (year) - Quartalssummen
// - () - Grand Total
auto rows = engine.executeRollup("sales", query.dimensions, query.measures);
for (const auto& row : rows) {
std::cout << "Level: " << row.level << std::endl;
// Level 0 = Detail, höhere Level = Subtotals
}Benutzerdefinierte Gruppierungssätze:
OLAPQuery query;
query.collection = "sales";
query.grouping_mode = OLAPQuery::GroupingMode::GroupingSets;
query.dimensions.push_back({"region", "", true});
query.dimensions.push_back({"product", "", true});
query.dimensions.push_back({"year", "", true});
// Spezifische Kombinationen
query.grouping_sets.push_back({{"region", "product"}});
query.grouping_sets.push_back({{"region", "year"}});
query.grouping_sets.push_back({{"product"}});
auto result = engine.execute(query);OLAPQuery::WindowSpec window;
window.name = "rolling_avg";
window.partition_by = {"region"};
window.order_by.push_back({"date", true, false}); // ASC
window.rows_preceding = 2; // 3-Tage gleitender Durchschnitt
window.rows_following = 0;
query.windows.push_back(window);
std::vector<Measure> measures;
measures.push_back({"avg_sales", "amount", Measure::Function::Avg});
std::vector<std::unordered_map<std::string, double>> data = {
{{"region", 1.0}, {"date", 1.0}, {"amount", 100.0}},
{{"region", 1.0}, {"date", 2.0}, {"amount", 150.0}},
{{"region", 1.0}, {"date", 3.0}, {"amount", 200.0}},
// ...
};
auto windowResults = engine.evaluateWindowFunctions(data, measures, window);auto plan = engine.explain(query);
std::cout << "Estimated rows: " << plan.estimated_rows << std::endl;
std::cout << "Estimated cost: " << plan.estimated_cost << std::endl;
for (const auto& note : plan.optimization_notes) {
std::cout << "- " << note << std::endl;
}ColumnarStore store;
// Spalten erstellen
store.createColumn("id", "string");
store.createColumn("region", "string");
store.createColumn("amount", "double");
// Daten einfügen
using Value = std::variant<std::nullptr_t, bool, int64_t, double, std::string>;
std::vector<std::unordered_map<std::string, Value>> rows = {
{{"id", std::string("1")}, {"region", std::string("North")}, {"amount", 100.0}},
{{"id", std::string("2")}, {"region", std::string("South")}, {"amount", 200.0}},
{{"id", std::string("3")}, {"region", std::string("North")}, {"amount", 150.0}}
};
store.appendRows(rows);
// Aggregationen
std::cout << "Sum: " << store.sum("amount") << std::endl;
std::cout << "Avg: " << store.avg("amount") << std::endl;
std::cout << "Min: " << store.min("amount") << std::endl;
std::cout << "Max: " << store.max("amount") << std::endl;
std::cout << "Count: " << store.count("amount") << std::endl;
std::cout << "Distinct: " << store.countDistinct("region") << std::endl;std::vector<bool> mask = {true, false, true}; // Include rows 0 and 2
double sum = store.sumWhere("amount", mask); // 100 + 150 = 250auto stats = store.getColumnStats("amount");
std::cout << "Rows: " << stats.row_count << std::endl;
std::cout << "Nulls: " << stats.null_count << std::endl;
std::cout << "Distinct: " << stats.distinct_count << std::endl;
std::cout << "Min: " << *stats.min_value << std::endl;
std::cout << "Max: " << *stats.max_value << std::endl;
std::cout << "Avg: " << stats.avg_value << std::endl;MaterializedView::Definition def;
def.name = "sales_by_region";
def.source_collection = "sales";
def.dimensions.push_back({"region", "", true});
def.measures.push_back({"total_sales", "amount", Measure::Function::Sum});
def.measures.push_back({"order_count", "id", Measure::Function::Count});
def.refresh_mode = MaterializedView::Definition::RefreshMode::Periodic;
def.refresh_interval_seconds = 3600; // Stündlich
MaterializedView view(def);
view.refresh(); // Erste Aktualisierungstd::vector<Filter> filters;
Filter f;
f.field = "region";
f.op = Filter::Operator::Eq;
f.value = std::string("North");
filters.push_back(f);
auto result = view.query(filters, {}, 10);if (view.isStale()) {
view.refresh();
}
std::cout << "Last refresh: "
<< std::chrono::system_clock::to_time_t(view.lastRefreshTime())
<< std::endl;
std::cout << "Row count: " << view.rowCount() << std::endl;| Funktion | Beschreibung |
|---|---|
| COUNT | Anzahl der Werte |
| SUM | Summe aller Werte |
| AVG | Durchschnitt |
| MIN | Minimum |
| MAX | Maximum |
| STDDEV | Standardabweichung |
| VARIANCE | Varianz |
| MEDIAN | Median (50. Perzentil) |
| PERCENTILE | Beliebiges Perzentil |
| COUNT_DISTINCT | Anzahl eindeutiger Werte |
| FIRST | Erster Wert |
| LAST | Letzter Wert |
Measure m;
m.name = "p95_latency";
m.field = "latency";
m.function = Measure::Function::Percentile;
m.percentile_value = 95.0; // 95. Perzentil| Operator | Beschreibung |
|---|---|
| Eq | Gleichheit (=) |
| Ne | Ungleichheit (!=) |
| Lt | Kleiner als (<) |
| Le | Kleiner gleich (<=) |
| Gt | Größer als (>) |
| Ge | Größer gleich (>=) |
| In | Enthält in Liste |
| NotIn | Nicht in Liste |
| Contains | String enthält |
| StartsWith | String beginnt mit |
| EndsWith | String endet mit |
| IsNull | Ist NULL |
| IsNotNull | Ist nicht NULL |
| Between | Zwischen zwei Werten |
- Filter auf indizierte Spalten verwenden
- Sortierung nach indizierten Spalten
- Häufig verwendete Aggregationen vorberechnen
- Refresh-Intervall an Aktualitätsanforderungen anpassen
- Für große Datasets verwenden
- Vektorisierte Operationen nutzen
- EXPLAIN zur Plananalyse
- Limit für Ergebnismengen
- Selektive Filter verwenden
- Keine echte Columnar-Persistenz (in-memory)
- Keine automatische View-Auswahl
- Keine parallele Aggregation (single-threaded)
- Persistente Columnar Storage
- Parallel Aggregation
- Automatic View Selection
- Incremental View Refresh
- Apache Arrow Integration
- GPU-beschleunigte Aggregation
Letzte Aktualisierung: 30. November 2025
Maintainer: ThemisDB Team
- AQL Overview
- AQL Syntax Reference
- EXPLAIN and PROFILE
- Hybrid Queries
- Pattern Matching
- Subquery Implementation
- Subquery Quick Reference
- Fulltext Release Notes
- Hybrid Search Design
- Fulltext Search API
- Content Search
- Pagination Benchmarks
- Stemming
- Hybrid Fusion API
- Performance Tuning
- Migration Guide
- Storage Overview
- RocksDB Layout
- Geo Schema
- Index Types
- Index Statistics
- Index Backup
- HNSW Persistence
- Vector Index
- Graph Index
- Secondary Index
- Security Overview
- RBAC and Authorization
- TLS Setup
- Certificate Pinning
- Encryption Strategy
- Column Encryption
- Key Management
- Key Rotation
- HSM Integration
- PKI Integration
- eIDAS Signatures
- PII Detection
- PII API
- Threat Model
- Hardening Guide
- Incident Response
- SBOM
- Enterprise Overview
- Scalability Features
- Scalability Strategy
- HTTP Client Pool
- Enterprise Build Guide
- Enterprise Ingestion
- Benchmarks Overview
- Compression Benchmarks
- Compression Strategy
- Memory Tuning
- Hardware Acceleration
- GPU Acceleration Plan
- CUDA Backend
- Vulkan Backend
- Multi-CPU Support
- TBB Integration
- Time Series
- Vector Operations
- Graph Features
- Temporal Graphs
- Path Constraints
- Recursive Queries
- Audit Logging
- Change Data Capture
- Transactions
- Semantic Cache
- Cursor Pagination
- Compliance Features
- GNN Embeddings
- Geo Overview
- Geo Architecture
- 3D Game Acceleration
- Geo Feature Tiering
- G3 Phase 2 Status
- G5 Implementation
- Integration Guide
- Content Architecture
- Content Pipeline
- Content Manager
- JSON Ingestion
- Content Ingestion
- Filesystem API
- Image Processor
- Geo Processor
- Policy Implementation
- Developer Guide
- Implementation Status
- Development Roadmap
- Build Strategy
- Build Acceleration
- Code Quality Guide
- AQL LET Implementation
- Audit API Implementation
- SAGA API Implementation
- PKI eIDAS
- WAL Archiving
- Architecture Overview
- Strategic Overview
- Ecosystem
- MVCC Design
- Base Entity
- Caching Strategy
- Caching Data Structures
- Docker Build
- Docker Status
- Multi-Arch CI/CD
- ARM Build Guide
- ARM Packages
- Raspberry Pi Tuning
- Packaging Guide
- Package Maintainers
- Roadmap
- Changelog
- Database Capabilities
- Implementation Summary
- Sachstandsbericht 2025
- Enterprise Final Report
- Test Report
- Build Success Report
- Integration Analysis
- Source Overview
- API Implementation
- Query Engine
- Storage Layer
- Security Implementation
- CDC Implementation
- Time Series
- Utils and Helpers
Updated: 2025-11-30