Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;

import java.math.BigDecimal;
import java.nio.ByteOrder;

import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.BufferAllocator;
Expand All @@ -43,6 +44,7 @@
*/
public final class Decimal256Vector extends BaseFixedWidthVector {
public static final byte TYPE_WIDTH = 32;
private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
private final FieldReader reader;

private final int precision;
Expand Down Expand Up @@ -197,7 +199,7 @@ public void set(int index, ArrowBuf buffer) {

/**
* Set the decimal element at given index to the provided array of bytes.
* Decimal256 is now implemented as Little Endian. This API allows the user
* Decimal256 is now implemented as Native Endian. This API allows the user
* to pass a decimal value in the form of byte array in BE byte order.
*
* <p>Consumers of Arrow code can use this API instead of first swapping
Expand All @@ -218,25 +220,38 @@ public void setBigEndian(int index, byte[] value) {
valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH);

long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
// swap bytes to convert BE to LE
for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
PlatformDependent.putByte(outAddress + byteIdx, value[length - 1 - byteIdx]);
}

if (length == TYPE_WIDTH) {
return;
}

if (length == 0) {
PlatformDependent.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH, (byte) 0);
} else if (length < TYPE_WIDTH) {
// sign extend
final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
PlatformDependent.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad);
return;
}
if (LITTLE_ENDIAN) {
// swap bytes to convert BE to LE
for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
PlatformDependent.putByte(outAddress + byteIdx, value[length - 1 - byteIdx]);
}

if (length == TYPE_WIDTH) {
return;
}

if (length < TYPE_WIDTH) {
// sign extend
final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
PlatformDependent.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad);
return;
}
} else {
throw new IllegalArgumentException(
"Invalid decimal value length. Valid length in [1 - 32], got " + length);
if (length <= TYPE_WIDTH) {
// copy data from value to outAddress
PlatformDependent.copyMemory(value, 0, outAddress + Decimal256Vector.TYPE_WIDTH - length, length);
// sign extend
final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
PlatformDependent.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad);
return;
}
}
throw new IllegalArgumentException(
"Invalid decimal value length. Valid length in [1 - 32], got " + length);
}

/**
Expand All @@ -255,7 +270,7 @@ public void set(int index, long start, ArrowBuf buffer) {
* Sets the element at given index using the buffer whose size maybe <= 32 bytes.
* @param index index to write the decimal to
* @param start start of value in the buffer
* @param buffer contains the decimal in little endian bytes
* @param buffer contains the decimal in native endian bytes
* @param length length of the value in the buffer
*/
public void setSafe(int index, long start, ArrowBuf buffer, int length) {
Expand All @@ -268,12 +283,22 @@ public void setSafe(int index, long start, ArrowBuf buffer, int length) {

long inAddress = buffer.memoryAddress() + start;
long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
PlatformDependent.copyMemory(inAddress, outAddress, length);
// sign extend
if (length < 32) {
byte msb = PlatformDependent.getByte(inAddress + length - 1);
final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
PlatformDependent.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad);
if (LITTLE_ENDIAN) {
PlatformDependent.copyMemory(inAddress, outAddress, length);
// sign extend
if (length < TYPE_WIDTH) {
byte msb = PlatformDependent.getByte(inAddress + length - 1);
final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
PlatformDependent.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad);
}
} else {
PlatformDependent.copyMemory(inAddress, outAddress + Decimal256Vector.TYPE_WIDTH - length, length);
// sign extend
if (length < TYPE_WIDTH) {
byte msb = PlatformDependent.getByte(inAddress);
final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
PlatformDependent.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad);
}
}
}

Expand All @@ -296,16 +321,26 @@ public void setBigEndianSafe(int index, long start, ArrowBuf buffer, int length)
// not using buffer.getByte() to avoid boundary checks for every byte.
long inAddress = buffer.memoryAddress() + start;
long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
// swap bytes to convert BE to LE
for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
byte val = PlatformDependent.getByte((inAddress + length - 1) - byteIdx);
PlatformDependent.putByte(outAddress + byteIdx, val);
}
// sign extend
if (length < 32) {
byte msb = PlatformDependent.getByte(inAddress);
final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
PlatformDependent.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad);
if (LITTLE_ENDIAN) {
// swap bytes to convert BE to LE
for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
byte val = PlatformDependent.getByte((inAddress + length - 1) - byteIdx);
PlatformDependent.putByte(outAddress + byteIdx, val);
}
// sign extend
if (length < 32) {
byte msb = PlatformDependent.getByte(inAddress);
final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
PlatformDependent.setMemory(outAddress + length, Decimal256Vector.TYPE_WIDTH - length, pad);
}
} else {
PlatformDependent.copyMemory(inAddress, outAddress + Decimal256Vector.TYPE_WIDTH - length, length);
// sign extend
if (length < TYPE_WIDTH) {
byte msb = PlatformDependent.getByte(inAddress);
final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
PlatformDependent.setMemory(outAddress, Decimal256Vector.TYPE_WIDTH - length, pad);
}
}
}

Expand All @@ -329,12 +364,7 @@ public void set(int index, BigDecimal value) {
*/
public void set(int index, long value) {
BitVectorHelper.setBit(validityBuffer, index);
final long addressOfValue = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
PlatformDependent.putLong(addressOfValue, value);
final long padValue = Long.signum(value) == -1 ? -1L : 0L;
PlatformDependent.putLong(addressOfValue + Long.BYTES, padValue);
PlatformDependent.putLong(addressOfValue + 2 * Long.BYTES, padValue);
PlatformDependent.putLong(addressOfValue + 3 * Long.BYTES, padValue);
DecimalUtility.writeLongToArrowBuf(value, valueBuffer, index, TYPE_WIDTH);
}

/**
Expand Down
103 changes: 69 additions & 34 deletions java/vector/src/main/java/org/apache/arrow/vector/DecimalVector.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED;

import java.math.BigDecimal;
import java.nio.ByteOrder;

import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.BufferAllocator;
Expand All @@ -43,6 +44,7 @@
*/
public final class DecimalVector extends BaseFixedWidthVector {
public static final byte TYPE_WIDTH = 16;
private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN;
private final FieldReader reader;

private final int precision;
Expand Down Expand Up @@ -197,7 +199,7 @@ public void set(int index, ArrowBuf buffer) {

/**
* Set the decimal element at given index to the provided array of bytes.
* Decimal is now implemented as Little Endian. This API allows the user
* Decimal is now implemented as Native Endian. This API allows the user
* to pass a decimal value in the form of byte array in BE byte order.
*
* <p>Consumers of Arrow code can use this API instead of first swapping
Expand All @@ -218,25 +220,38 @@ public void setBigEndian(int index, byte[] value) {
valueBuffer.checkBytes((long) index * TYPE_WIDTH, (long) (index + 1) * TYPE_WIDTH);

long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
// swap bytes to convert BE to LE
for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
PlatformDependent.putByte(outAddress + byteIdx, value[length - 1 - byteIdx]);
}

if (length == TYPE_WIDTH) {
return;
}

if (length == 0) {
PlatformDependent.setMemory(outAddress, DecimalVector.TYPE_WIDTH, (byte) 0);
} else if (length < TYPE_WIDTH) {
// sign extend
final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
return;
}
if (LITTLE_ENDIAN) {
// swap bytes to convert BE to LE
for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
PlatformDependent.putByte(outAddress + byteIdx, value[length - 1 - byteIdx]);
}

if (length == TYPE_WIDTH) {
return;
}

if (length < TYPE_WIDTH) {
// sign extend
final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
return;
}
} else {
throw new IllegalArgumentException(
"Invalid decimal value length. Valid length in [1 - 16], got " + length);
if (length <= TYPE_WIDTH) {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not return if length == TYPE_WIDTH?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we add if (length == TYPE_WIDTH) return before line 244, no data is copied from value to outAddress. I will add a comment copy data from value to outAddress after line 244.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, I see above it is already set during the swap. I guess there is no harm in calling PlatformDependent.setMemory(outAddress, DecimalVector.TYPE_WIDTH - length, pad) if length == TYPE_WIDTH

// copy data from value to outAddress
PlatformDependent.copyMemory(value, 0, outAddress + DecimalVector.TYPE_WIDTH - length, length);
// sign extend
final byte pad = (byte) (value[0] < 0 ? 0xFF : 0x00);
PlatformDependent.setMemory(outAddress, DecimalVector.TYPE_WIDTH - length, pad);
return;
}
}
throw new IllegalArgumentException(
"Invalid decimal value length. Valid length in [1 - 16], got " + length);

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should probably be in a Preconditions.checkArgument, but we are not trying to change things in this PR so don't need to do that here

}

/**
Expand All @@ -255,7 +270,7 @@ public void set(int index, long start, ArrowBuf buffer) {
* Sets the element at given index using the buffer whose size maybe <= 16 bytes.
* @param index index to write the decimal to
* @param start start of value in the buffer
* @param buffer contains the decimal in little endian bytes
* @param buffer contains the decimal in native endian bytes
* @param length length of the value in the buffer
*/
public void setSafe(int index, long start, ArrowBuf buffer, int length) {
Expand All @@ -268,12 +283,22 @@ public void setSafe(int index, long start, ArrowBuf buffer, int length) {

long inAddress = buffer.memoryAddress() + start;
long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
PlatformDependent.copyMemory(inAddress, outAddress, length);
// sign extend
if (length < 16) {
byte msb = PlatformDependent.getByte(inAddress + length - 1);
final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
if (LITTLE_ENDIAN) {
PlatformDependent.copyMemory(inAddress, outAddress, length);
// sign extend
if (length < TYPE_WIDTH) {
byte msb = PlatformDependent.getByte(inAddress + length - 1);
final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
}
} else {
PlatformDependent.copyMemory(inAddress, outAddress + DecimalVector.TYPE_WIDTH - length, length);
// sign extend
if (length < TYPE_WIDTH) {
byte msb = PlatformDependent.getByte(inAddress);
final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
PlatformDependent.setMemory(outAddress, DecimalVector.TYPE_WIDTH - length, pad);
}
}
}

Expand All @@ -296,16 +321,26 @@ public void setBigEndianSafe(int index, long start, ArrowBuf buffer, int length)
// not using buffer.getByte() to avoid boundary checks for every byte.
long inAddress = buffer.memoryAddress() + start;
long outAddress = valueBuffer.memoryAddress() + (long) index * TYPE_WIDTH;
// swap bytes to convert BE to LE
for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
byte val = PlatformDependent.getByte((inAddress + length - 1) - byteIdx);
PlatformDependent.putByte(outAddress + byteIdx, val);
}
// sign extend
if (length < 16) {
byte msb = PlatformDependent.getByte(inAddress);
final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
if (LITTLE_ENDIAN) {
// swap bytes to convert BE to LE
for (int byteIdx = 0; byteIdx < length; ++byteIdx) {
byte val = PlatformDependent.getByte((inAddress + length - 1) - byteIdx);
PlatformDependent.putByte(outAddress + byteIdx, val);
}
// sign extend
if (length < TYPE_WIDTH) {
byte msb = PlatformDependent.getByte(inAddress);
final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
PlatformDependent.setMemory(outAddress + length, DecimalVector.TYPE_WIDTH - length, pad);
}
} else {
PlatformDependent.copyMemory(inAddress, outAddress + DecimalVector.TYPE_WIDTH - length, length);
// sign extend
if (length < TYPE_WIDTH) {
byte msb = PlatformDependent.getByte(inAddress);
final byte pad = (byte) (msb < 0 ? 0xFF : 0x00);
PlatformDependent.setMemory(outAddress, DecimalVector.TYPE_WIDTH - length, pad);
}
}
}

Expand All @@ -329,7 +364,7 @@ public void set(int index, BigDecimal value) {
*/
public void set(int index, long value) {
BitVectorHelper.setBit(validityBuffer, index);
DecimalUtility.writeLongToArrowBuf(value, valueBuffer, index);
DecimalUtility.writeLongToArrowBuf(value, valueBuffer, index, TYPE_WIDTH);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@ public class MessageSerializer {
public static final int IPC_CONTINUATION_TOKEN = -1;

/**
* Convert an array of 4 bytes to a little endian i32 value.
* Convert an array of 4 bytes in little-endian to an native-endian i32 value.
*
* @param bytes byte array with minimum length of 4
* @return converted little endian 32-bit integer
* @param bytes byte array with minimum length of 4 in little-endian
* @return converted an native-endian 32-bit integer
*/
public static int bytesToInt(byte[] bytes) {
return ((bytes[3] & 255) << 24) +
Expand Down
Loading