Pogosim
Loading...
Searching...
No Matches
data_logger.h
Go to the documentation of this file.
1
2#ifndef DATA_LOGGER_H
3#define DATA_LOGGER_H
4
5#include <arrow/api.h>
6#include <arrow/io/file.h>
7#include <arrow/ipc/api.h>
8#include <arrow/util/compression.h>
9#include <unordered_map>
10#include <variant>
11#include <vector>
12
25public:
31 DataLogger() = default;
32
40 explicit DataLogger(int64_t flush_row_count);
41
48 virtual ~DataLogger();
49
57 bool column_exists(const std::string& column_name);
58
67 bool column_value_already_set(const std::string& column_name);
68
80 void add_metadata(const std::string& key, const std::string& value);
81
95 void add_field(const std::string& name, std::shared_ptr<arrow::DataType> type, bool ignore_existing_name = false);
96
109 void open_file(const std::string& filename);
110
121 void set_value(const std::string& column_name, int64_t value);
122
133 void set_value(const std::string& column_name, int32_t value);
134
145 void set_value(const std::string& column_name, int16_t value);
146
157 void set_value(const std::string& column_name, int8_t value);
158
169 void set_value(const std::string& column_name, double value);
170
181 void set_value(const std::string& column_name, float value);
182
193 void set_value(const std::string& column_name, const std::string& value);
194
205 void set_value(const std::string& column_name, bool value);
206
217 void set_value_float16(const std::string& column_name, float value);
218
228 void save_row();
229
238 void flush();
239
240private:
241 // Store Arrow float16 values as their raw 16-bit representation
242 using half_float_t = arrow::NumericBuilder<arrow::HalfFloatType>::value_type;
243
245 std::vector<std::shared_ptr<arrow::Field>> fields_;
247 std::shared_ptr<arrow::Schema> schema_;
249 std::shared_ptr<arrow::io::OutputStream> outfile_;
251 std::shared_ptr<arrow::ipc::RecordBatchWriter> writer_;
253 std::vector<std::shared_ptr<arrow::ArrayBuilder>> builders_;
255 std::unordered_map<std::string, size_t> column_indices_;
257 std::unordered_map<std::string, std::variant<int64_t, int32_t, int16_t, int8_t, float, double, std::string, bool, half_float_t>> row_values_;
259 bool file_opened_ = false;
261 std::unordered_map<std::string, std::string> user_metadata_;
263 int64_t flush_row_count_ = 1024;
265 int64_t buffered_row_count_ = 0;
266
276 void check_column(const std::string& column_name);
277
283 void reset_row();
284
292 void initialize_builders();
293
301 void append_current_row_to_builders();
302
303 // Convert float32 -> IEEE-754 half payload (16-bit)
304 static half_float_t float_to_half_bits(float f);
305};
306
307#endif // DATA_LOGGER_H
308
void flush()
Flushes all currently buffered rows to the Feather file.
Definition data_logger.cpp:254
bool column_value_already_set(const std::string &column_name)
Checks if the specified column value has been set for the current row.
Definition data_logger.cpp:293
void set_value_float16(const std::string &column_name, float value)
Sets the value for a specified column in the current row.
Definition data_logger.cpp:215
bool column_exists(const std::string &column_name)
Checks if the specified column exists.
Definition data_logger.cpp:289
void open_file(const std::string &filename)
Opens the output file for writing.
Definition data_logger.cpp:126
void save_row()
Saves the current row to the Feather file.
Definition data_logger.cpp:220
void add_metadata(const std::string &key, const std::string &value)
Adds arbitrary string metadata to be embedded in the Feather file.
Definition data_logger.cpp:98
void add_field(const std::string &name, std::shared_ptr< arrow::DataType > type, bool ignore_existing_name=false)
Adds a new field to the schema.
Definition data_logger.cpp:112
virtual ~DataLogger()
Destructor.
Definition data_logger.cpp:81
DataLogger()=default
Default constructor.
void set_value(const std::string &column_name, int64_t value)
Sets the value for a specified column in the current row.
Definition data_logger.cpp:175