Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions ruby/red-arrow-format/lib/arrow-format/buffer-alignable.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

require_relative "flat-buffers"

module ArrowFormat
module BufferAlignable
include FlatBuffers::Alignable

BUFFER_ALIGNMENT_SIZE = 64
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: What do you think about adding units like BUFFER_ALIGNMENT_BYTES or BUFFER_ALIGNMENT_BITS? (I didn't know the unit)

Copy link
Member Author

@kou kou Jan 19, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm. I don't think that we need it (at least for now). In general, alignment unit is bytes.


private
def buffer_padding_size(buffer)
compute_padding_size(buffer.size, BUFFER_ALIGNMENT_SIZE)
end

def aligned_buffer_size(buffer)
buffer.size + buffer_padding_size(buffer)
end
end
end
17 changes: 11 additions & 6 deletions ruby/red-arrow-format/lib/arrow-format/record-batch.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,12 @@
# specific language governing permissions and limitations
# under the License.

require_relative "buffer-alignable"

module ArrowFormat
class RecordBatch
include BufferAlignable

attr_reader :schema
attr_reader :n_rows
attr_reader :columns
Expand Down Expand Up @@ -44,15 +48,16 @@ def to_flat_buffers
end
offset = 0
fb_record_batch.buffers = all_buffers_enumerator.collect do |buffer|
buffer_flat_buffesr = FB::Buffer::Data.new
buffer_flat_buffesr.offset = offset
fb_buffer = FB::Buffer::Data.new
fb_buffer.offset = offset
if buffer
offset += buffer.size
buffer_flat_buffesr.length = buffer.size
aligned_size = aligned_buffer_size(buffer)
offset += aligned_size
fb_buffer.length = aligned_size
else
buffer_flat_buffesr.length = 0
fb_buffer.length = 0
end
buffer_flat_buffesr
fb_buffer
end
# body_compression = FB::BodyCompression::Data.new
# body_compression.codec = ...
Expand Down
26 changes: 20 additions & 6 deletions ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
# specific language governing permissions and limitations
# under the License.

require_relative "flat-buffers"
require_relative "buffer-alignable"

module ArrowFormat
class StreamingWriter
include FlatBuffers::Alignable
include BufferAlignable

ALIGNMENT_SIZE = IO::Buffer.size_of(:u64)
CONTINUATION = "\xFF\xFF\xFF\xFF".b.freeze
Expand All @@ -40,7 +40,7 @@ def start(schema)
def write_record_batch(record_batch)
body_length = 0
record_batch.all_buffers_enumerator.each do |buffer|
body_length += buffer.size if buffer
body_length += aligned_buffer_size(buffer) if buffer
end
metadata = build_metadata(record_batch.to_flat_buffers, body_length)
fb_block = FB::Block::Data.new
Expand All @@ -53,7 +53,7 @@ def write_record_batch(record_batch)
@fb_record_batch_blocks << fb_block
write_message(metadata) do
record_batch.all_buffers_enumerator.each do |buffer|
write_data(buffer) if buffer
write_buffer(buffer) if buffer
end
end
end
Expand All @@ -69,8 +69,22 @@ def finish

private
def write_data(data)
@output << data
@offset += data.bytesize
case data
when IO::Buffer
# TODO: We should use IO::Buffer#write to avoid needless copy.
# data.write(@output)
@output << data.get_string
@offset += data.size
else
@output << data
@offset += data.bytesize
end
end

def write_buffer(buffer)
write_data(buffer)
padding_size = buffer_padding_size(buffer)
write_data(padding(padding_size)) if padding_size > 0
end

def build_metadata(header, body_length=0)
Expand Down
4 changes: 4 additions & 0 deletions ruby/red-arrow-format/lib/arrow-format/type.rb
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ def name
def build_array(size, validity_buffer, values_buffer)
BooleanArray.new(self, size, validity_buffer, values_buffer)
end

def to_flat_buffers
FB::Bool::Data.new
end
end

class NumberType < Type
Expand Down
2 changes: 1 addition & 1 deletion ruby/red-arrow-format/red-arrow-format.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Gem::Specification.new do |spec|
spec.files += Dir.glob("lib/**/*.rb")
spec.files += Dir.glob("doc/text/*")

spec.add_runtime_dependency("red-flatbuffers", ">=0.0.4")
spec.add_runtime_dependency("red-flatbuffers", ">=0.0.5")

github_url = "https://github.com/apache/arrow"
spec.metadata = {
Expand Down
22 changes: 22 additions & 0 deletions ruby/red-arrow-format/test/test-writer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,25 @@ def convert_type(red_arrow_type)
case red_arrow_type
when Arrow::NullDataType
ArrowFormat::NullType.singleton
when Arrow::BooleanDataType
ArrowFormat::BooleanType.singleton
end
end

def convert_buffer(buffer)
return nil if buffer.nil?
IO::Buffer.for(buffer.data.to_s)
end

def convert_array(red_arrow_array)
type = convert_type(red_arrow_array.value_data_type)
case type
when ArrowFormat::NullType
type.build_array(red_arrow_array.size)
when ArrowFormat::BooleanType
type.build_array(red_arrow_array.size,
convert_buffer(red_arrow_array.null_bitmap),
convert_buffer(red_arrow_array.data_buffer))
end
end

Expand All @@ -44,6 +55,17 @@ def test_write
@values)
end
end

sub_test_case("Boolean") do
def build_array
Arrow::BooleanArray.new([true, nil, false])
end

def test_write
assert_equal([true, nil, false],
@values)
end
end
end
end
end
Expand Down
Loading