From ebeb6977d6731388477bbf010b14d6321fe9f7bc Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Fri, 14 Aug 2015 11:02:40 -0700 Subject: [PATCH 1/3] fix serialization of UTF8String across different JVM --- .../apache/spark/unsafe/types/UTF8String.java | 25 +++++++++++++++---- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index 667c00900f2c5..71d4c5cb67344 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -18,8 +18,7 @@ package org.apache.spark.unsafe.types; import javax.annotation.Nonnull; -import java.io.Serializable; -import java.io.UnsupportedEncodingException; +import java.io.*; import java.nio.ByteOrder; import java.util.Arrays; import java.util.Map; @@ -41,9 +40,9 @@ public final class UTF8String implements Comparable, Serializable { @Nonnull - private final Object base; - private final long offset; - private final int numBytes; + private Object base; + private long offset; + private int numBytes; public Object getBaseObject() { return base; } public long getBaseOffset() { return offset; } @@ -978,4 +977,20 @@ public UTF8String soundex() { } return UTF8String.fromBytes(sx); } + + private static final long serialVersionUID = 42L; + + private void writeObject(ObjectOutputStream out) throws IOException { + byte[] bytes = getBytes(); + out.writeInt(bytes.length); + out.write(bytes); + } + + private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { + offset = BYTE_ARRAY_OFFSET; + numBytes = in.readInt(); + base = new byte[numBytes]; + in.readFully((byte[]) base); + } + } From 4ba1ac0c76377d53c80b7996b54d79ddb2244550 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Fri, 14 Aug 2015 11:34:19 -0700 Subject: [PATCH 2/3] change to use Externalizable --- .../apache/spark/unsafe/types/UTF8String.java | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index 71d4c5cb67344..12ad6a7c3746e 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -37,8 +37,9 @@ *

* Note: This is not designed for general use cases, should not be used outside SQL. */ -public final class UTF8String implements Comparable, Serializable { +public final class UTF8String implements Comparable, Externalizable { + // These are only updated by readExternal() @Nonnull private Object base; private long offset; @@ -126,6 +127,11 @@ protected UTF8String(Object base, long offset, int numBytes) { this.numBytes = numBytes; } + // for serialization + public UTF8String() { + this(null, 0, 0); + } + /** * Writes the content of this string into a memory address, identified by an object and an offset. * The target memory address must already been allocated, and have enough space to hold all the @@ -978,15 +984,17 @@ public UTF8String soundex() { return UTF8String.fromBytes(sx); } - private static final long serialVersionUID = 42L; - - private void writeObject(ObjectOutputStream out) throws IOException { + public void writeExternal(ObjectOutput out) throws IOException { byte[] bytes = getBytes(); out.writeInt(bytes.length); out.write(bytes); } - private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException { + static { + + } + + public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { offset = BYTE_ARRAY_OFFSET; numBytes = in.readInt(); base = new byte[numBytes]; From e677bc3a81f6f5c5e883017f879f3e58711cf9dc Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Fri, 14 Aug 2015 12:33:40 -0700 Subject: [PATCH 3/3] Update UTF8String.java --- .../main/java/org/apache/spark/unsafe/types/UTF8String.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index 12ad6a7c3746e..cbcab958c05a9 100644 --- a/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -990,10 +990,6 @@ public void writeExternal(ObjectOutput out) throws IOException { out.write(bytes); } - static { - - } - public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { offset = BYTE_ARRAY_OFFSET; numBytes = in.readInt();