diff --git a/avro4s-core/src/main/scala/com/sksamuel/avro4s/encoders/strings.scala b/avro4s-core/src/main/scala/com/sksamuel/avro4s/encoders/strings.scala index 3142bf9d..b0507a61 100644 --- a/avro4s-core/src/main/scala/com/sksamuel/avro4s/encoders/strings.scala +++ b/avro4s-core/src/main/scala/com/sksamuel/avro4s/encoders/strings.scala @@ -7,6 +7,7 @@ import org.apache.avro.generic.GenericData import org.apache.avro.util.Utf8 import java.nio.ByteBuffer +import java.nio.charset.StandardCharsets import java.util.UUID trait StringEncoders: @@ -37,13 +38,15 @@ object UTF8StringEncoder extends Encoder[String] : * An [[Encoder]] for Strings that encodes as [[ByteBuffer]]s. */ object ByteStringEncoder extends Encoder[String] : - override def encode(schema: Schema): String => Any = string => ByteBuffer.wrap(string.getBytes) + override def encode(schema: Schema): String => Any = string => + ByteBuffer.wrap(string.getBytes(StandardCharsets.UTF_8)) /** * An [[Encoder]] for Strings that encodes as [[GenericFixed]]s. */ object FixedStringEncoder extends Encoder[String] : override def encode(schema: Schema): String => Any = string => - if (string.getBytes.length > schema.getFixedSize) - throw new Avro4sEncodingException(s"Cannot write string with ${string.getBytes.length} bytes to fixed type of size ${schema.getFixedSize}") - GenericData.get.createFixed(null, ByteBuffer.allocate(schema.getFixedSize).put(string.getBytes).array, schema).asInstanceOf[GenericData.Fixed] \ No newline at end of file + val bytes = string.getBytes(StandardCharsets.UTF_8) + if (bytes.length > schema.getFixedSize) + throw new Avro4sEncodingException(s"Cannot write string with ${bytes.length} bytes to fixed type of size ${schema.getFixedSize}") + GenericData.get.createFixed(null, ByteBuffer.allocate(schema.getFixedSize).put(bytes).array, schema).asInstanceOf[GenericData.Fixed] diff --git a/avro4s-core/src/test/scala/com/sksamuel/avro4s/examples/ReadMeExamples.scala b/avro4s-core/src/test/scala/com/sksamuel/avro4s/examples/ReadMeExamples.scala index f8710e6c..35fca176 100644 --- a/avro4s-core/src/test/scala/com/sksamuel/avro4s/examples/ReadMeExamples.scala +++ b/avro4s-core/src/test/scala/com/sksamuel/avro4s/examples/ReadMeExamples.scala @@ -4,6 +4,8 @@ import com.sksamuel.avro4s.AvroSchema import org.scalatest.matchers.should.Matchers import org.scalatest.wordspec.AnyWordSpec +import java.nio.charset.StandardCharsets + /** * Tests created from README examples * @@ -51,7 +53,7 @@ class ReadMeExamples extends AnyWordSpec with Matchers { json shouldBe ("{\"name\":\"ennio morricone\",\"birthplace\":\"rome\",\"compositions\":[\"legend of 1900\",\"ecstasy of gold\"]}") - val in = new ByteArrayInputStream(json.getBytes("UTF-8")) + val in = new ByteArrayInputStream(json.getBytes(StandardCharsets.UTF_8)) val schema = AvroSchema[Composer] val input = AvroInputStream.json[Composer].from(in).build(schema) val result = input.iterator.toSeq diff --git a/avro4s-core/src/test/scala/com/sksamuel/avro4s/record/decoder/StringDecoderTest.scala b/avro4s-core/src/test/scala/com/sksamuel/avro4s/record/decoder/StringDecoderTest.scala index a0304334..19b05ea0 100644 --- a/avro4s-core/src/test/scala/com/sksamuel/avro4s/record/decoder/StringDecoderTest.scala +++ b/avro4s-core/src/test/scala/com/sksamuel/avro4s/record/decoder/StringDecoderTest.scala @@ -7,6 +7,7 @@ import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers import java.nio.ByteBuffer +import java.nio.charset.StandardCharsets class StringDecoderTest extends AnyFunSuite with Matchers { @@ -43,14 +44,14 @@ class StringDecoderTest extends AnyFunSuite with Matchers { test("decode from byte buffers to strings") { val schema = AvroSchema[FooString] val record = new GenericData.Record(schema) - record.put("str", ByteBuffer.wrap("hello".getBytes)) + record.put("str", ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8))) Decoder[FooString].decode(schema).apply(record) shouldBe FooString("hello") } test("decode from byte arrays to strings") { val schema = AvroSchema[FooString] val record = new GenericData.Record(schema) - record.put("str", "hello".getBytes) + record.put("str", "hello".getBytes(StandardCharsets.UTF_8)) Decoder[FooString].decode(schema).apply(record) shouldBe FooString("hello") } } diff --git a/avro4s-core/src/test/scala/com/sksamuel/avro4s/record/encoder/ByteArrayEncoderTest.scala b/avro4s-core/src/test/scala/com/sksamuel/avro4s/record/encoder/ByteArrayEncoderTest.scala index 5ffd147a..9faf3e5b 100644 --- a/avro4s-core/src/test/scala/com/sksamuel/avro4s/record/encoder/ByteArrayEncoderTest.scala +++ b/avro4s-core/src/test/scala/com/sksamuel/avro4s/record/encoder/ByteArrayEncoderTest.scala @@ -1,12 +1,14 @@ package com.sksamuel.avro4s.record.encoder -import java.nio.ByteBuffer import com.sksamuel.avro4s.{AvroSchema, Encoder, SchemaFor} import org.apache.avro.SchemaBuilder import org.apache.avro.generic.{GenericFixed, GenericRecord} import org.scalatest.funsuite.AnyFunSuite import org.scalatest.matchers.should.Matchers +import java.nio.ByteBuffer +import java.nio.charset.StandardCharsets + class ByteArrayEncoderTest extends AnyFunSuite with Matchers { test("encode byte arrays as BYTES type") { @@ -72,7 +74,7 @@ class ByteArrayEncoderTest extends AnyFunSuite with Matchers { val schema = SchemaBuilder.fixed("foo").size(7) val fixed = Encoder[Array[Byte]] .encode(schema) - .apply("hello".getBytes) + .apply("hello".getBytes(StandardCharsets.UTF_8)) .asInstanceOf[GenericFixed] fixed.bytes().toList shouldBe Seq(104, 101, 108, 108, 111, 0, 0) fixed.bytes().length shouldBe 7 @@ -82,7 +84,7 @@ class ByteArrayEncoderTest extends AnyFunSuite with Matchers { val schema = SchemaBuilder.fixed("foo").size(7) val fixed = Encoder[ByteBuffer] .encode(schema) - .apply(ByteBuffer.wrap("hello".getBytes)) + .apply(ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8))) .asInstanceOf[GenericFixed] fixed.bytes().toList shouldBe Seq(104, 101, 108, 108, 111, 0, 0) fixed.bytes().length shouldBe 7 diff --git a/avro4s-core/src/test/scala/com/sksamuel/avro4s/streams/output/AvroDataOutputStreamCodecTest.scala b/avro4s-core/src/test/scala/com/sksamuel/avro4s/streams/output/AvroDataOutputStreamCodecTest.scala index 302485d2..c18edfc8 100644 --- a/avro4s-core/src/test/scala/com/sksamuel/avro4s/streams/output/AvroDataOutputStreamCodecTest.scala +++ b/avro4s-core/src/test/scala/com/sksamuel/avro4s/streams/output/AvroDataOutputStreamCodecTest.scala @@ -1,6 +1,8 @@ package com.sksamuel.avro4s.streams.output import java.io.ByteArrayOutputStream +import java.nio.charset.StandardCharsets + import com.sksamuel.avro4s.{AvroOutputStream, AvroSchema, Encoder} import org.apache.avro.file.CodecFactory import org.scalatest.matchers.should.Matchers @@ -18,8 +20,8 @@ class AvroDataOutputStreamCodecTest extends AnyWordSpec with Matchers { val output = AvroOutputStream.data[Composer](schema, Encoder[Composer]).to(baos).build() output.write(ennio) output.close() - new String(baos.toByteArray) should include("birthplace") - new String(baos.toByteArray) should include("compositions") + baos.toString(StandardCharsets.UTF_8.name()) should include("birthplace") + baos.toString(StandardCharsets.UTF_8.name()) should include("compositions") } "include deflate coded in metadata when serialized with deflate" in { @@ -27,7 +29,7 @@ class AvroDataOutputStreamCodecTest extends AnyWordSpec with Matchers { val output = AvroOutputStream.data[Composer](schema, Encoder[Composer]).to(baos).withCodec(CodecFactory.deflateCodec(CodecFactory.DEFAULT_DEFLATE_LEVEL)).build() output.write(ennio) output.close() - new String(baos.toByteArray) should include("deflate") + baos.toString(StandardCharsets.UTF_8.name()) should include("deflate") } "include bzip2 coded in metadata when serialized with bzip2" in { @@ -35,7 +37,7 @@ class AvroDataOutputStreamCodecTest extends AnyWordSpec with Matchers { val output = AvroOutputStream.data[Composer](schema, Encoder[Composer]).to(baos).withCodec(CodecFactory.bzip2Codec).build() output.write(ennio) output.close() - new String(baos.toByteArray) should include("bzip2") + baos.toString(StandardCharsets.UTF_8.name()) should include("bzip2") } } }