avro-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From marti...@apache.org
Subject svn commit: r1725988 - in /avro/trunk: CHANGES.txt lang/ruby/lib/avro/io.rb lang/ruby/lib/avro/ipc.rb lang/ruby/lib/avro/schema.rb lang/ruby/test/test_io.rb
Date Thu, 21 Jan 2016 15:28:06 GMT
Author: martinkl
Date: Thu Jan 21 15:28:06 2016
New Revision: 1725988

URL: http://svn.apache.org/viewvc?rev=1725988&view=rev
Log:
AVRO-1783. Ruby: Ensure correct binary encoding for byte strings.

Modified:
    avro/trunk/CHANGES.txt
    avro/trunk/lang/ruby/lib/avro/io.rb
    avro/trunk/lang/ruby/lib/avro/ipc.rb
    avro/trunk/lang/ruby/lib/avro/schema.rb
    avro/trunk/lang/ruby/test/test_io.rb

Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=1725988&r1=1725987&r2=1725988&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Thu Jan 21 15:28:06 2016
@@ -278,6 +278,9 @@ Avro 1.8.0 (15 December 2015)
 
     AVRO-1775. Ruby: Use test-unit gem for running tests. (martinkl)
 
+    AVRO-1783. Ruby: Ensure correct binary encoding for byte strings.
+    (martinkl)
+
 Avro 1.7.7 (23 July 2014)
 
   NEW FEATURES

Modified: avro/trunk/lang/ruby/lib/avro/io.rb
URL: http://svn.apache.org/viewvc/avro/trunk/lang/ruby/lib/avro/io.rb?rev=1725988&r1=1725987&r2=1725988&view=diff
==============================================================================
--- avro/trunk/lang/ruby/lib/avro/io.rb (original)
+++ avro/trunk/lang/ruby/lib/avro/io.rb Thu Jan 21 15:28:06 2016
@@ -209,7 +209,7 @@ module Avro
       # A string is encoded as a long followed by that many bytes of
       # UTF-8 encoded character data
       def write_string(datum)
-        # FIXME utf-8 encode this in 1.9
+        datum = datum.encode('utf-8') if datum.respond_to? :encode
         write_bytes(datum)
       end
 

Modified: avro/trunk/lang/ruby/lib/avro/ipc.rb
URL: http://svn.apache.org/viewvc/avro/trunk/lang/ruby/lib/avro/ipc.rb?rev=1725988&r1=1725987&r2=1725988&view=diff
==============================================================================
--- avro/trunk/lang/ruby/lib/avro/ipc.rb (original)
+++ avro/trunk/lang/ruby/lib/avro/ipc.rb Thu Jan 21 15:28:06 2016
@@ -100,7 +100,7 @@ module Avro::IPC
     def request(message_name, request_datum)
       # Writes a request message and reads a response or error message.
       # build handshake and call request
-      buffer_writer = StringIO.new('', 'w+')
+      buffer_writer = StringIO.new(''.force_encoding('BINARY'))
       buffer_encoder = Avro::IO::BinaryEncoder.new(buffer_writer)
       write_handshake_request(buffer_encoder)
       write_call_request(message_name, request_datum, buffer_encoder)
@@ -244,7 +244,7 @@ module Avro::IPC
     # a response or error. Compare to 'handle()' in Thrift.
     def respond(call_request, transport=nil)
       buffer_decoder = Avro::IO::BinaryDecoder.new(StringIO.new(call_request))
-      buffer_writer = StringIO.new('', 'w+')
+      buffer_writer = StringIO.new(''.force_encoding('BINARY'))
       buffer_encoder = Avro::IO::BinaryEncoder.new(buffer_writer)
       error = nil
       response_metadata = {}
@@ -294,6 +294,7 @@ module Avro::IPC
         end
       rescue Avro::AvroError => e
         error = AvroRemoteException.new(e.to_s)
+        # TODO does the stuff written here ever get used?
         buffer_encoder = Avro::IO::BinaryEncoder.new(StringIO.new)
         META_WRITER.write(response_metadata, buffer_encoder)
         buffer_encoder.write_boolean(true)
@@ -393,7 +394,7 @@ module Avro::IPC
     def read_framed_message
       message = []
       loop do
-        buffer = StringIO.new
+        buffer = StringIO.new(''.force_encoding('BINARY'))
         buffer_length = read_buffer_length
         if buffer_length == 0
           return message.join
@@ -410,7 +411,7 @@ module Avro::IPC
     end
 
     def write_framed_message(message)
-      message_length = message.size
+      message_length = message.bytesize
       total_bytes_sent = 0
       while message_length - total_bytes_sent > 0
         if message_length - total_bytes_sent > BUFFER_SIZE
@@ -426,7 +427,7 @@ module Avro::IPC
     end
 
     def write_buffer(chunk)
-      buffer_length = chunk.size
+      buffer_length = chunk.bytesize
       write_buffer_length(buffer_length)
       total_bytes_sent = 0
       while total_bytes_sent < buffer_length
@@ -467,7 +468,7 @@ module Avro::IPC
     end
 
     def write_framed_message(message)
-      message_size = message.size
+      message_size = message.bytesize
       total_bytes_sent = 0
       while message_size - total_bytes_sent > 0
         if message_size - total_bytes_sent > BUFFER_SIZE
@@ -485,7 +486,7 @@ module Avro::IPC
 
     private
     def write_buffer(chunk)
-      buffer_size = chunk.size
+      buffer_size = chunk.bytesize
       write_buffer_size(buffer_size)
       writer << chunk
     end
@@ -505,13 +506,13 @@ module Avro::IPC
     def read_framed_message
       message = []
       loop do
-        buffer = ""
+        buffer = ''.force_encoding('BINARY')
         buffer_size = read_buffer_size
 
         return message.join if buffer_size == 0
 
-        while buffer.size < buffer_size
-          chunk = reader.read(buffer_size - buffer.size)
+        while buffer.bytesize < buffer_size
+          chunk = reader.read(buffer_size - buffer.bytesize)
           chunk_error?(chunk)
           buffer << chunk
         end
@@ -541,7 +542,7 @@ module Avro::IPC
     end
 
     def transceive(message)
-      writer = FramedWriter.new(StringIO.new)
+      writer = FramedWriter.new(StringIO.new(''.force_encoding('BINARY')))
       writer.write_framed_message(message)
       resp = @conn.post('/', writer.to_s, {'Content-Type' => 'avro/binary'})
       FramedReader.new(StringIO.new(resp.body)).read_framed_message

Modified: avro/trunk/lang/ruby/lib/avro/schema.rb
URL: http://svn.apache.org/viewvc/avro/trunk/lang/ruby/lib/avro/schema.rb?rev=1725988&r1=1725987&r2=1725988&view=diff
==============================================================================
--- avro/trunk/lang/ruby/lib/avro/schema.rb (original)
+++ avro/trunk/lang/ruby/lib/avro/schema.rb Thu Jan 21 15:28:06 2016
@@ -108,7 +108,7 @@ module Avro
       when :float, :double
         datum.is_a?(Float) || datum.is_a?(Fixnum) || datum.is_a?(Bignum)
       when :fixed
-        datum.is_a?(String) && datum.size == expected_schema.size
+        datum.is_a?(String) && datum.bytesize == expected_schema.size
       when :enum
         expected_schema.symbols.include? datum
       when :array

Modified: avro/trunk/lang/ruby/test/test_io.rb
URL: http://svn.apache.org/viewvc/avro/trunk/lang/ruby/test/test_io.rb?rev=1725988&r1=1725987&r2=1725988&view=diff
==============================================================================
--- avro/trunk/lang/ruby/test/test_io.rb (original)
+++ avro/trunk/lang/ruby/test/test_io.rb Thu Jan 21 15:28:06 2016
@@ -210,6 +210,51 @@ EOS
     end
   end
 
+  def test_utf8_string_encoding
+    [
+      "\xC3".force_encoding('ISO-8859-1'),
+      "\xC3\x83".force_encoding('UTF-8')
+    ].each do |value|
+      output = ''.force_encoding('BINARY')
+      encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
+      datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"string"'))
+      datum_writer.write(value, encoder)
+
+      assert_equal "\x04\xc3\x83".force_encoding('BINARY'), output
+    end
+  end
+
+  def test_bytes_encoding
+    [
+      "\xC3\x83".force_encoding('BINARY'),
+      "\xC3\x83".force_encoding('ISO-8859-1'),
+      "\xC3\x83".force_encoding('UTF-8')
+    ].each do |value|
+      output = ''.force_encoding('BINARY')
+      encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
+      datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse('"bytes"'))
+      datum_writer.write(value, encoder)
+
+      assert_equal "\x04\xc3\x83".force_encoding('BINARY'), output
+    end
+  end
+
+  def test_fixed_encoding
+    [
+      "\xC3\x83".force_encoding('BINARY'),
+      "\xC3\x83".force_encoding('ISO-8859-1'),
+      "\xC3\x83".force_encoding('UTF-8')
+    ].each do |value|
+      output = ''.force_encoding('BINARY')
+      encoder = Avro::IO::BinaryEncoder.new(StringIO.new(output))
+      schema = '{"type": "fixed", "name": "TwoBytes", "size": 2}'
+      datum_writer = Avro::IO::DatumWriter.new(Avro::Schema.parse(schema))
+      datum_writer.write(value, encoder)
+
+      assert_equal "\xc3\x83".force_encoding('BINARY'), output
+    end
+  end
+
   def test_skip_long
     for value_to_skip, hex_encoding in BINARY_INT_ENCODINGS
       value_to_read = 6253



Mime
View raw message