diff --git a/ruby/red-arrow/lib/arrow/column-containable.rb b/ruby/red-arrow/lib/arrow/column-containable.rb index 32cdb7b372f..150c126b0ce 100644 --- a/ruby/red-arrow/lib/arrow/column-containable.rb +++ b/ruby/red-arrow/lib/arrow/column-containable.rb @@ -152,5 +152,70 @@ def [](selector) def column_names @column_names ||= columns.collect(&:name) end + + # Merges columns from the given container or Hash and creates + # a new container. + # + # @param other [Hash, self] + # The columns to be merged. + # + # @return [self] + def merge(other) + added_columns = {} + removed_columns = {} + + case other + when Hash + other.each do |name, value| + name = name.to_s + if value + added_columns[name] = ensure_raw_column(name, value) + else + removed_columns[name] = true + end + end + when self.class + other.columns.each do |column| + name = column.name + added_columns[name] = ensure_raw_column(name, column) + end + else + message = "merge target must be Hash or #{self.class}: " + + "<#{other.inspect}>: #{inspect}" + raise ArgumentError, message + end + + new_columns = [] + + columns.each do |column| + column_name = column.name + new_column = added_columns.delete(column_name) + + if new_column + new_columns << new_column + next + end + + next if removed_columns.key?(column_name) + + new_columns << ensure_raw_column(column_name, column) + end + + added_columns.each_value do |new_column| + new_columns << new_column + end + + new_fields = [] + new_arrays = [] + + new_columns.each do |new_column| + new_fields << new_column[:field] + new_arrays << new_column[:data] + end + + merged = self.class.new(new_fields, new_arrays) + share_input(merged) + merged + end end end diff --git a/ruby/red-arrow/lib/arrow/record-batch.rb b/ruby/red-arrow/lib/arrow/record-batch.rb index 0a2ba359b3b..b765edc32e7 100644 --- a/ruby/red-arrow/lib/arrow/record-batch.rb +++ b/ruby/red-arrow/lib/arrow/record-batch.rb @@ -37,7 +37,12 @@ def new(*args) super(schema, n_rows, values) when 2 schema, data = args - RecordBatchBuilder.build(schema, data) + schema = Schema.new(schema) unless schema.is_a?(Schema) + if !data.empty? and data.all? {|array| array.is_a?(Arrow::Array)} + super(schema, data[0].size, data) + else + RecordBatchBuilder.build(schema, data) + end when 3 super else @@ -63,63 +68,6 @@ def to_table table end - def merge(other) - added_columns = {} - removed_columns = {} - - case other - when Hash - other.each do |name, value| - name = name.to_s - if value - added_columns[name] = ensure_raw_column(name, value) - else - removed_columns[name] = true - end - end - when RecordBatch - other.columns.each do |column| - name = column.name - added_columns[name] = ensure_raw_column(name, column) - end - else - message = "merge target must be Hash or Arrow::RecordBatch: " + - "<#{other.inspect}>: #{inspect}" - raise ArgumentError, message - end - - new_columns = [] - columns.each do |column| - column_name = column.name - new_column = added_columns.delete(column_name) - if new_column - new_columns << new_column - next - end - next if removed_columns.key?(column_name) - new_columns << ensure_raw_column(column_name, column) - end - - added_columns.each_value do |new_column| - new_columns << new_column - end - - new_fields = [] - new_arrays = [] - new_columns.each do |new_column| - new_fields << new_column[:field] - new_arrays << new_column[:data] - end - - record_batch = self.class.new( - Schema.new(new_fields), - n_rows, - new_arrays, - ) - share_input(record_batch) - record_batch - end - def respond_to_missing?(name, include_private) return true if find_column(name) super diff --git a/ruby/red-arrow/lib/arrow/table.rb b/ruby/red-arrow/lib/arrow/table.rb index 0ce5962fce6..45d54b71096 100644 --- a/ruby/red-arrow/lib/arrow/table.rb +++ b/ruby/red-arrow/lib/arrow/table.rb @@ -354,60 +354,6 @@ def slice(*args) sliced_table end - # TODO - # - # @return [Arrow::Table] - def merge(other) - added_columns = {} - removed_columns = {} - - case other - when Hash - other.each do |name, value| - name = name.to_s - if value - added_columns[name] = ensure_raw_column(name, value) - else - removed_columns[name] = true - end - end - when Table - added_columns = {} - other.columns.each do |column| - name = column.name - added_columns[name] = ensure_raw_column(name, column) - end - else - message = "merge target must be Hash or Arrow::Table: " + - "<#{other.inspect}>: #{inspect}" - raise ArgumentError, message - end - - new_columns = [] - columns.each do |column| - column_name = column.name - new_column = added_columns.delete(column_name) - if new_column - new_columns << new_column - next - end - next if removed_columns.key?(column_name) - new_columns << ensure_raw_column(column_name, column) - end - added_columns.each do |name, new_column| - new_columns << new_column - end - new_fields = [] - new_arrays = [] - new_columns.each do |new_column| - new_fields << new_column[:field] - new_arrays << new_column[:data] - end - table = self.class.new(new_fields, new_arrays) - share_input(table) - table - end - alias_method :remove_column_raw, :remove_column def remove_column(name_or_index) case name_or_index