From 04635154fcee5441e72ffa26f8de69c388b5721d Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Mon, 19 Jan 2026 18:20:50 +0900 Subject: [PATCH] GH-48888: [Ruby] Add support for writing boolean array --- .../lib/arrow-format/buffer-alignable.rb | 35 +++++++++++++++++++ .../lib/arrow-format/record-batch.rb | 17 +++++---- .../lib/arrow-format/streaming-writer.rb | 26 ++++++++++---- .../red-arrow-format/lib/arrow-format/type.rb | 4 +++ .../red-arrow-format/red-arrow-format.gemspec | 2 +- ruby/red-arrow-format/test/test-writer.rb | 22 ++++++++++++ 6 files changed, 93 insertions(+), 13 deletions(-) create mode 100644 ruby/red-arrow-format/lib/arrow-format/buffer-alignable.rb diff --git a/ruby/red-arrow-format/lib/arrow-format/buffer-alignable.rb b/ruby/red-arrow-format/lib/arrow-format/buffer-alignable.rb new file mode 100644 index 00000000000..1ed806d715b --- /dev/null +++ b/ruby/red-arrow-format/lib/arrow-format/buffer-alignable.rb @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require_relative "flat-buffers" + +module ArrowFormat + module BufferAlignable + include FlatBuffers::Alignable + + BUFFER_ALIGNMENT_SIZE = 64 + + private + def buffer_padding_size(buffer) + compute_padding_size(buffer.size, BUFFER_ALIGNMENT_SIZE) + end + + def aligned_buffer_size(buffer) + buffer.size + buffer_padding_size(buffer) + end + end +end diff --git a/ruby/red-arrow-format/lib/arrow-format/record-batch.rb b/ruby/red-arrow-format/lib/arrow-format/record-batch.rb index e413b6f8e81..51e0583f0e9 100644 --- a/ruby/red-arrow-format/lib/arrow-format/record-batch.rb +++ b/ruby/red-arrow-format/lib/arrow-format/record-batch.rb @@ -14,8 +14,12 @@ # specific language governing permissions and limitations # under the License. +require_relative "buffer-alignable" + module ArrowFormat class RecordBatch + include BufferAlignable + attr_reader :schema attr_reader :n_rows attr_reader :columns @@ -44,15 +48,16 @@ def to_flat_buffers end offset = 0 fb_record_batch.buffers = all_buffers_enumerator.collect do |buffer| - buffer_flat_buffesr = FB::Buffer::Data.new - buffer_flat_buffesr.offset = offset + fb_buffer = FB::Buffer::Data.new + fb_buffer.offset = offset if buffer - offset += buffer.size - buffer_flat_buffesr.length = buffer.size + aligned_size = aligned_buffer_size(buffer) + offset += aligned_size + fb_buffer.length = aligned_size else - buffer_flat_buffesr.length = 0 + fb_buffer.length = 0 end - buffer_flat_buffesr + fb_buffer end # body_compression = FB::BodyCompression::Data.new # body_compression.codec = ... diff --git a/ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb b/ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb index 211c0bbdb58..a9e323b6751 100644 --- a/ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb +++ b/ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb @@ -15,11 +15,11 @@ # specific language governing permissions and limitations # under the License. -require_relative "flat-buffers" +require_relative "buffer-alignable" module ArrowFormat class StreamingWriter - include FlatBuffers::Alignable + include BufferAlignable ALIGNMENT_SIZE = IO::Buffer.size_of(:u64) CONTINUATION = "\xFF\xFF\xFF\xFF".b.freeze @@ -40,7 +40,7 @@ def start(schema) def write_record_batch(record_batch) body_length = 0 record_batch.all_buffers_enumerator.each do |buffer| - body_length += buffer.size if buffer + body_length += aligned_buffer_size(buffer) if buffer end metadata = build_metadata(record_batch.to_flat_buffers, body_length) fb_block = FB::Block::Data.new @@ -53,7 +53,7 @@ def write_record_batch(record_batch) @fb_record_batch_blocks << fb_block write_message(metadata) do record_batch.all_buffers_enumerator.each do |buffer| - write_data(buffer) if buffer + write_buffer(buffer) if buffer end end end @@ -69,8 +69,22 @@ def finish private def write_data(data) - @output << data - @offset += data.bytesize + case data + when IO::Buffer + # TODO: We should use IO::Buffer#write to avoid needless copy. + # data.write(@output) + @output << data.get_string + @offset += data.size + else + @output << data + @offset += data.bytesize + end + end + + def write_buffer(buffer) + write_data(buffer) + padding_size = buffer_padding_size(buffer) + write_data(padding(padding_size)) if padding_size > 0 end def build_metadata(header, body_length=0) diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb b/ruby/red-arrow-format/lib/arrow-format/type.rb index 4f71b39187b..b6db79327d3 100644 --- a/ruby/red-arrow-format/lib/arrow-format/type.rb +++ b/ruby/red-arrow-format/lib/arrow-format/type.rb @@ -52,6 +52,10 @@ def name def build_array(size, validity_buffer, values_buffer) BooleanArray.new(self, size, validity_buffer, values_buffer) end + + def to_flat_buffers + FB::Bool::Data.new + end end class NumberType < Type diff --git a/ruby/red-arrow-format/red-arrow-format.gemspec b/ruby/red-arrow-format/red-arrow-format.gemspec index 5e489f14ae5..bccb51dce3c 100644 --- a/ruby/red-arrow-format/red-arrow-format.gemspec +++ b/ruby/red-arrow-format/red-arrow-format.gemspec @@ -46,7 +46,7 @@ Gem::Specification.new do |spec| spec.files += Dir.glob("lib/**/*.rb") spec.files += Dir.glob("doc/text/*") - spec.add_runtime_dependency("red-flatbuffers", ">=0.0.4") + spec.add_runtime_dependency("red-flatbuffers", ">=0.0.5") github_url = "https://github.com/apache/arrow" spec.metadata = { diff --git a/ruby/red-arrow-format/test/test-writer.rb b/ruby/red-arrow-format/test/test-writer.rb index 7acf068d9cf..82b27301e7c 100644 --- a/ruby/red-arrow-format/test/test-writer.rb +++ b/ruby/red-arrow-format/test/test-writer.rb @@ -20,14 +20,25 @@ def convert_type(red_arrow_type) case red_arrow_type when Arrow::NullDataType ArrowFormat::NullType.singleton + when Arrow::BooleanDataType + ArrowFormat::BooleanType.singleton end end + def convert_buffer(buffer) + return nil if buffer.nil? + IO::Buffer.for(buffer.data.to_s) + end + def convert_array(red_arrow_array) type = convert_type(red_arrow_array.value_data_type) case type when ArrowFormat::NullType type.build_array(red_arrow_array.size) + when ArrowFormat::BooleanType + type.build_array(red_arrow_array.size, + convert_buffer(red_arrow_array.null_bitmap), + convert_buffer(red_arrow_array.data_buffer)) end end @@ -44,6 +55,17 @@ def test_write @values) end end + + sub_test_case("Boolean") do + def build_array + Arrow::BooleanArray.new([true, nil, false]) + end + + def test_write + assert_equal([true, nil, false], + @values) + end + end end end end