Raw C API#

struct DFArrowArray#
#include <datafusion.h>

Same as the ArrowArray struct in the Arrow C data interface.

See also: https://arrow.apache.org/docs/format/CDataInterface.html#the-arrowarray-structure

Public Members

int64_t length#
int64_t null_count#
int64_t offset#
int64_t n_buffers#
int64_t n_children#
const void **buffers#
struct DFArrowArray **children#
struct DFArrowArray *dictionary#
void (*release)(struct DFArrowArray *array)#
void *private_data#
struct DFArrowSchema#
#include <datafusion.h>

Same as the ArrowSchema struct in the Arrow C data interface.

See also: https://arrow.apache.org/docs/format/CDataInterface.html#the-arrowschema-structure

Public Members

const char *format#
const char *name#
const char *metadata#
int64_t flags#
int64_t n_children#
struct DFArrowSchema **children#
struct DFArrowSchema *dictionary#
void (*release)(struct DFArrowSchema *schema)#
void *private_data#
struct DFDataFrame#
#include <datafusion.h>

A struct for data frame.

You get execution result as a data frame.

You need to free data frame by df_data_frame_free() when no longer needed.

struct DFError#
#include <datafusion.h>

A struct that holds error information.

You can access to error information by df_error_get_code() and df_error_get_message().

You need to free error information by df_error_free() when no longer needed.

struct DFParquertWriterProperties#
#include <datafusion.h>

A struct to customize how to write an Apache Parquet file.

You need to free this by df_parquet_writer_properties_free() when no longer needed.

struct DFSessionContext#
#include <datafusion.h>

An entry point of DataFusion API.

You need to create DFSessionContext to use DataFusion API.

file datafusion.h
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>

Defines

DATAFUSION_MAJOR#
DATAFUSION_MINOR#
DATAFUSION_PATCH#

Typedefs

typedef enum DFErrorCode DFErrorCode
typedef struct DFCSVReadOptions DFCSVReadOptions#
typedef struct DFDataFrame DFDataFrame
typedef struct DFError DFError
typedef struct DFParquetReadOptions DFParquetReadOptions#
typedef struct DFParquetWriterProperties DFParquetWriterProperties#
typedef struct DFSessionContext DFSessionContext
typedef struct DFArrowSchema DFArrowSchema
typedef struct DFArrowArray DFArrowArray

Enums

enum DFErrorCode#

Error category.

Values:

enumerator DF_ERROR_CODE_ARROW#
enumerator DF_ERROR_CODE_PARQUET#
enumerator DF_ERROR_CODE_AVRO#
enumerator DF_ERROR_CODE_OBJECT_STORE#
enumerator DF_ERROR_CODE_IO#
enumerator DF_ERROR_CODE_SQL#
enumerator DF_ERROR_CODE_NOT_IMPLEMENTED#
enumerator DF_ERROR_CODE_INTERNAL#
enumerator DF_ERROR_CODE_PLAN#
enumerator DF_ERROR_CODE_SCHEMA#
enumerator DF_ERROR_CODE_EXECUTION#
enumerator DF_ERROR_CODE_RESOURCES_EXHAUSTED#
enumerator DF_ERROR_CODE_EXTERNAL#
enumerator DF_ERROR_CODE_JIT#
enumerator DF_ERROR_CODE_CONTEXT#
enumerator DF_ERROR_CODE_SUBSTRAIT#

Functions

struct DFError *df_error_new(enum DFErrorCode code, const char *message)#
void df_error_free(struct DFError *_error)#

Free the given DFError.

Safety#

This function should not be called with error that is not created by df_error_new().

This function should not be called for the same error multiple times.

Parameters:
  • _error – A DFError returned by df_*() functions.

const char *df_error_get_message(struct DFError *error)#

Get a message of this error.

Safety#

This function should not be called with error that is not created by df_error_new().

This function should not be called with error that is freed by df_error_free().

Parameters:
Returns:

A message of this error.

enum DFErrorCode df_error_get_code(struct DFError *error)#

Get a code of this error.

Safety#

This function should not be called with error that is not created by df_error_new().

This function should not be called with error that is freed by df_error_free().

Parameters:
Returns:

A code of this error.

struct DFParquetWriterProperties *df_parquet_writer_properties_new(void)#
void df_parquet_writer_properties_free(struct DFParquetWriterProperties *_properties)#
void df_parquet_writer_properties_set_max_row_group_size(struct DFParquetWriterProperties *properties, uintptr_t size)#
void df_data_frame_free(struct DFDataFrame *_data_frame)#

Free the given DFDataFrame.

Safety#

This function should not be called for the same data_frame multiple times.

Parameters:
void df_data_frame_show(struct DFDataFrame *data_frame, struct DFError **error)#

Show the given data frame contents to the standard output.

Parameters:
  • data_frame – A DFDataFrame to be shown.

  • error – Return location for a DFError or NULL.

bool df_data_frame_write_parquet(struct DFDataFrame *data_frame, const char *path, const struct DFParquetWriterProperties *writer_properties, struct DFError **error)#

Write the given data frame contents as Apache Parquet format.

Parameters:
  • data_frame – A DFDataFrame to be written.

  • path – An output path.

  • writer_properties – Properties how to write Apache Parquet files.

  • error – Return location for a DFError or NULL.

int64_t df_data_frame_export(struct DFDataFrame *data_frame, struct DFArrowSchema **c_abi_schema_out, struct DFArrowArray ***c_abi_record_batches_out, struct DFError **error)#
struct DFSessionContext *df_session_context_new(void)#

Create a new DFSessionContext.

It should be freed by df_session_context_free() when no longer needed.

Returns:

A newly created DFSessionContext.

void df_session_context_free(struct DFSessionContext *_context)#

Free the given DFSessionContext.

Safety#

This function should not be called with context that is not created by df_session_context_new().

This function should not be called for the same context multiple times.

Parameters:
struct DFDataFrame *df_session_context_sql(struct DFSessionContext *context, const char *sql, struct DFError **error)#
bool df_session_context_deregister(struct DFSessionContext *context, const char *name, struct DFError **error)#
bool df_session_context_register_record_batches(struct DFSessionContext *context, const char *name, struct DFArrowSchema *c_abi_schema, struct DFArrowArray **c_abi_record_batches, size_t n_record_batches, struct DFError **error)#
struct DFCSVReadOptions *df_csv_read_options_new(void)#
void df_csv_read_options_free(struct DFCSVReadOptions *_options)#
void df_csv_read_options_set_has_header(struct DFCSVReadOptions *options, bool has_header)#
bool df_csv_read_options_get_has_header(struct DFCSVReadOptions *options)#
void df_csv_read_options_set_delimiter(struct DFCSVReadOptions *options, uint8_t delimiter)#
uint8_t df_csv_read_options_get_delimiter(struct DFCSVReadOptions *options)#
bool df_csv_read_options_set_schema(struct DFCSVReadOptions *options, struct DFArrowSchema *schema, struct DFError **error)#
struct DFArrowSchema *df_csv_read_options_get_schema(struct DFCSVReadOptions *options, struct DFError **error)#
void df_csv_read_options_set_schema_infer_max_records(struct DFCSVReadOptions *options, uintptr_t n)#
uintptr_t df_csv_read_options_get_schema_infer_max_records(struct DFCSVReadOptions *options)#
bool df_csv_read_options_set_file_extension(struct DFCSVReadOptions *options, const char *file_extension, struct DFError **error)#
char *df_csv_read_options_get_file_extension(struct DFCSVReadOptions *options)#
bool df_csv_read_options_set_table_partition_columns(struct DFCSVReadOptions *options, struct DFArrowSchema *schema, struct DFError **error)#
struct DFArrowSchema *df_csv_read_options_get_table_partition_columns(struct DFCSVReadOptions *options, struct DFError **error)#
bool df_session_context_register_csv(struct DFSessionContext *context, const char *name, const char *url, struct DFCSVReadOptions *options, struct DFError **error)#
struct DFParquetReadOptions *df_parquet_read_options_new(void)#
void df_parquet_read_options_free(struct DFParquetReadOptions *_options)#
bool df_parquet_read_options_set_file_extension(struct DFParquetReadOptions *options, const char *file_extension, struct DFError **error)#
char *df_parquet_read_options_get_file_extension(struct DFParquetReadOptions *options)#
bool df_parquet_read_options_set_table_partition_columns(struct DFParquetReadOptions *options, struct DFArrowSchema *schema, struct DFError **error)#
struct DFArrowSchema *df_parquet_read_options_get_table_partition_columns(struct DFParquetReadOptions *options, struct DFError **error)#
void df_parquet_read_options_set_pruning(struct DFParquetReadOptions *options, bool pruning)#
void df_parquet_read_options_unset_pruning(struct DFParquetReadOptions *options)#
bool df_parquet_read_options_is_set_pruning(struct DFParquetReadOptions *options)#
bool df_parquet_read_options_get_pruning(struct DFParquetReadOptions *options)#
bool df_session_context_register_parquet(struct DFSessionContext *context, const char *name, const char *url, struct DFParquetReadOptions *options, struct DFError **error)#