Skip to content
This repository was archived by the owner on May 10, 2024. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ set(THRIFT_VERSION "0.9.1")

# Brotli 0.5.2 does not install headers/libraries yet, but 0.6.0.dev does
set(BROTLI_VERSION "5db62dcc9d386579609540cdf8869e95ad334bbd")
set(ARROW_VERSION "4226adfbc6b3dff10b3fe7a6691b30bcc94140bd")
set(ARROW_VERSION "5439b71586f4b0f9a36544b9e2417ee6ad7b48e8")

# find boost headers and libs
set(Boost_DEBUG TRUE)
Expand Down
9 changes: 6 additions & 3 deletions src/parquet/arrow/arrow-reader-writer-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ TEST_F(TestInt96ParquetIO, ReadIntoTimestamp) {
// 2nd January 1970, 11:35min 145738543ns
Int96 day;
day.value[2] = 2440589l;
int64_t seconds = ((1 * 24 + 11) * 60 + 35) * 60;
int64_t seconds = (11 * 60 + 35) * 60;
*(reinterpret_cast<int64_t*>(&(day.value))) =
seconds * 1000l * 1000l * 1000l + 145738543;
// Compute the corresponding nanosecond timestamp
Expand Down Expand Up @@ -587,16 +587,19 @@ TEST_F(TestUInt32ParquetIO, Parquet_1_0_Compability) {
int64_data_ptr[i] = static_cast<int64_t>(uint32_data_ptr[i]);
}
}

const int32_t kOffset = 0;
ASSERT_OK(MakePrimitiveArray(std::make_shared<::arrow::Int64Type>(), values->length(),
int64_data, values->null_count(), values->null_bitmap(), &expected_values));
int64_data, values->null_bitmap(), values->null_count(), kOffset,
&expected_values));
this->ReadAndCheckSingleColumnTable(expected_values);
}

using TestStringParquetIO = TestParquetIO<::arrow::StringType>;

TEST_F(TestStringParquetIO, EmptyStringColumnRequiredWrite) {
std::shared_ptr<Array> values;
::arrow::StringBuilder builder(::arrow::default_memory_pool(), ::arrow::utf8());
::arrow::StringBuilder builder(::arrow::default_memory_pool());
for (size_t i = 0; i < SMALL_SIZE; i++) {
builder.Append("");
}
Expand Down
8 changes: 4 additions & 4 deletions src/parquet/arrow/reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,7 @@ Status ColumnReader::Impl::WrapIntoListArray(const int16_t* def_levels,
auto list_type = std::make_shared<::arrow::ListType>(
std::make_shared<Field>("item", output->type(), nullable[j + 1]));
output = std::make_shared<::arrow::ListArray>(
list_type, list_lengths[j], offsets[j], output, null_counts[j], valid_bits[j]);
list_type, list_lengths[j], offsets[j], output, valid_bits[j], null_counts[j]);
}
*array = output;
}
Expand Down Expand Up @@ -667,7 +667,7 @@ Status ColumnReader::Impl::TypedReadBatch(int batch_size, std::shared_ptr<Array>
::arrow::BitUtil::CeilByte(valid_bits_idx_) / 8, false));
}
*out = std::make_shared<ArrayType<ArrowType>>(
field_->type, valid_bits_idx_, data_buffer_, null_count_, valid_bits_buffer_);
field_->type, valid_bits_idx_, data_buffer_, valid_bits_buffer_, null_count_);
// Relase the ownership as the Buffer is now part of a new Array
valid_bits_buffer_.reset();
} else {
Expand Down Expand Up @@ -741,7 +741,7 @@ Status ColumnReader::Impl::TypedReadBatch<::arrow::BooleanType, BooleanType>(
valid_bits_buffer_ = valid_bits_buffer;
}
*out = std::make_shared<BooleanArray>(
field_->type, valid_bits_idx_, data_buffer_, null_count_, valid_bits_buffer_);
field_->type, valid_bits_idx_, data_buffer_, valid_bits_buffer_, null_count_);
// Relase the ownership
data_buffer_.reset();
valid_bits_buffer_.reset();
Expand Down Expand Up @@ -770,7 +770,7 @@ Status ColumnReader::Impl::ReadByteArrayBatch(
int16_t* rep_levels = reinterpret_cast<int16_t*>(rep_levels_buffer_.mutable_data());

int values_to_read = batch_size;
BuilderType builder(pool_, field_->type);
BuilderType builder(pool_);
while ((values_to_read > 0) && column_reader_) {
RETURN_NOT_OK(values_buffer_.Resize(values_to_read * sizeof(ByteArray), false));
auto reader = dynamic_cast<TypedColumnReader<ByteArrayType>*>(column_reader_.get());
Expand Down
21 changes: 10 additions & 11 deletions src/parquet/arrow/test-util.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ typename std::enable_if<is_arrow_float<ArrowType>::value, Status>::type NonNullA
size_t size, std::shared_ptr<Array>* out) {
std::vector<typename ArrowType::c_type> values;
::arrow::test::random_real<typename ArrowType::c_type>(size, 0, 0, 1, &values);
::arrow::NumericBuilder<ArrowType> builder(
::arrow::default_memory_pool(), std::make_shared<ArrowType>());
::arrow::NumericBuilder<ArrowType> builder(::arrow::default_memory_pool());
builder.Append(values.data(), values.size());
return builder.Finish(out);
}
Expand All @@ -58,6 +57,8 @@ typename std::enable_if<is_arrow_int<ArrowType>::value, Status>::type NonNullArr
size_t size, std::shared_ptr<Array>* out) {
std::vector<typename ArrowType::c_type> values;
::arrow::test::randint<typename ArrowType::c_type>(size, 0, 64, &values);

// Passing data type so this will work with TimestampType too
::arrow::NumericBuilder<ArrowType> builder(
::arrow::default_memory_pool(), std::make_shared<ArrowType>());
builder.Append(values.data(), values.size());
Expand All @@ -69,7 +70,7 @@ typename std::enable_if<
is_arrow_string<ArrowType>::value || is_arrow_binary<ArrowType>::value, Status>::type
NonNullArray(size_t size, std::shared_ptr<Array>* out) {
using BuilderType = typename ::arrow::TypeTraits<ArrowType>::BuilderType;
BuilderType builder(::arrow::default_memory_pool(), std::make_shared<ArrowType>());
BuilderType builder(::arrow::default_memory_pool());
for (size_t i = 0; i < size; i++) {
builder.Append("test-string");
}
Expand All @@ -81,8 +82,7 @@ typename std::enable_if<is_arrow_bool<ArrowType>::value, Status>::type NonNullAr
size_t size, std::shared_ptr<Array>* out) {
std::vector<uint8_t> values;
::arrow::test::randint<uint8_t>(size, 0, 1, &values);
::arrow::BooleanBuilder builder(
::arrow::default_memory_pool(), std::make_shared<::arrow::BooleanType>());
::arrow::BooleanBuilder builder(::arrow::default_memory_pool());
builder.Append(values.data(), values.size());
return builder.Finish(out);
}
Expand All @@ -100,8 +100,7 @@ typename std::enable_if<is_arrow_float<ArrowType>::value, Status>::type Nullable
valid_bytes[i * 2] = 0;
}

::arrow::NumericBuilder<ArrowType> builder(
::arrow::default_memory_pool(), std::make_shared<ArrowType>());
::arrow::NumericBuilder<ArrowType> builder(::arrow::default_memory_pool());
builder.Append(values.data(), values.size(), valid_bytes.data());
return builder.Finish(out);
}
Expand All @@ -121,6 +120,7 @@ typename std::enable_if<is_arrow_int<ArrowType>::value, Status>::type NullableAr
valid_bytes[i * 2] = 0;
}

// Passing data type so this will work with TimestampType too
::arrow::NumericBuilder<ArrowType> builder(
::arrow::default_memory_pool(), std::make_shared<ArrowType>());
builder.Append(values.data(), values.size(), valid_bytes.data());
Expand All @@ -140,7 +140,7 @@ NullableArray(
}

using BuilderType = typename ::arrow::TypeTraits<ArrowType>::BuilderType;
BuilderType builder(::arrow::default_memory_pool(), std::make_shared<ArrowType>());
BuilderType builder(::arrow::default_memory_pool());

const int kBufferSize = 10;
uint8_t buffer[kBufferSize];
Expand Down Expand Up @@ -171,8 +171,7 @@ typename std::enable_if<is_arrow_bool<ArrowType>::value, Status>::type NullableA
valid_bytes[i * 2] = 0;
}

::arrow::BooleanBuilder builder(
::arrow::default_memory_pool(), std::make_shared<::arrow::BooleanType>());
::arrow::BooleanBuilder builder(::arrow::default_memory_pool());
builder.Append(values.data(), values.size(), valid_bytes.data());
return builder.Finish(out);
}
Expand Down Expand Up @@ -211,7 +210,7 @@ Status MakeListArary(const std::shared_ptr<Array>& values, int64_t size,
auto value_field =
std::make_shared<::arrow::Field>("item", values->type(), nullable_values);
*out = std::make_shared<::arrow::ListArray>(
::arrow::list(value_field), size, offsets, values, null_count, null_bitmap);
::arrow::list(value_field), size, offsets, values, null_bitmap, null_count);

return Status::OK();
}
Expand Down
6 changes: 3 additions & 3 deletions src/parquet/arrow/writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,10 @@ class LevelBuilder : public ::arrow::ArrayVisitor {
Status Visit(const ListArray& array) override {
valid_bitmaps_.push_back(array.null_bitmap_data());
null_counts_.push_back(array.null_count());
offsets_.push_back(array.raw_offsets());
offsets_.push_back(array.raw_value_offsets());

min_offset_idx_ = array.raw_offsets()[min_offset_idx_];
max_offset_idx_ = array.raw_offsets()[max_offset_idx_];
min_offset_idx_ = array.raw_value_offsets()[min_offset_idx_];
max_offset_idx_ = array.raw_value_offsets()[max_offset_idx_];

return array.values()->Accept(this);
}
Expand Down
6 changes: 3 additions & 3 deletions src/parquet/util/cpu-info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,9 @@ void CpuInfo::Init() {
#else
#ifndef _SC_LEVEL1_DCACHE_SIZE
// Provide reasonable default values if no info
cache_sizes_[0] = 32 * 1024; // Level 1: 32k
cache_sizes_[1] = 256 * 1024; // Level 2: 256k
cache_sizes_[2] = 3072 * 1024; // Level 3: 3M
cache_sizes_[0] = 32 * 1024; // Level 1: 32k
cache_sizes_[1] = 256 * 1024; // Level 2: 256k
cache_sizes_[2] = 3072 * 1024; // Level 3: 3M
#else
// Call sysconf to query for the cache sizes
cache_sizes_[0] = sysconf(_SC_LEVEL1_DCACHE_SIZE);
Expand Down