Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions java/vector/src/main/codegen/data/ArrowTypes.tdd
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@
fields: [{name: "mode", type: short, valueType: UnionMode}, {name: "typeIds", type: "int[]"}],
complex: true
},
{
name: "Map",
fields: [{name: "keysSorted", type: boolean}],
complex: true
},
{
name: "Int",
fields: [{name: "bitWidth", type: int}, {name: "isSigned", type: boolean}],
Expand Down
14 changes: 14 additions & 0 deletions java/vector/src/main/codegen/templates/AbstractFieldWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,20 @@
*/
@SuppressWarnings("unused")
abstract class AbstractFieldWriter extends AbstractBaseWriter implements FieldWriter {

protected boolean addVectorAsNullable = true;

/**
* Set flag to control the FieldType.nullable property when a writer creates a new vector.
* If true then vectors created will be nullable, this is the default behavior. If false then
* vectors created will be non-nullable.
*
* @param nullable Whether or not to create nullable vectors (default behavior is true)
*/
public void setAddVectorAsNullable(boolean nullable) {
addVectorAsNullable = nullable;
}
Comment thread
BryanCutler marked this conversation as resolved.

@Override
public void start() {
throw new IllegalStateException(String.format("You tried to start when you are using a ValueWriter of type %s.", this.getClass().getSimpleName()));
Expand Down
1 change: 1 addition & 0 deletions java/vector/src/main/codegen/templates/ComplexCopier.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ private static void writeValue(FieldReader reader, FieldWriter writer) {
switch (mt) {

case LIST:
case MAP:
if (reader.isSet()) {
writer.startList();
while (reader.next()) {
Expand Down
15 changes: 9 additions & 6 deletions java/vector/src/main/codegen/templates/StructWriters.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ public class ${mode}StructWriter extends AbstractFieldWriter {
list(child.getName());
break;
case UNION:
UnionWriter writer = new UnionWriter(container.addOrGet(child.getName(), FieldType.nullable(MinorType.UNION.getType()), UnionVector.class), getNullableStructWriterFactory());
FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.UNION.getType(), null, null);

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what are the nulls being passed through here?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

they are for DictionaryEncoding and metadata. If you want to do dictionary encoding, I don't think it is supported with the writers. If metadata is initialized to null, it creates an empty map.

UnionWriter writer = new UnionWriter(container.addOrGet(child.getName(), fieldType, UnionVector.class), getNullableStructWriterFactory());
fields.put(handleCase(child.getName()), writer);
break;
<#list vv.types as type><#list type.minor as minor>
Expand Down Expand Up @@ -122,7 +123,8 @@ public StructWriter struct(String name) {
FieldWriter writer = fields.get(finalName);
if(writer == null){
int vectorCount=container.size();
StructVector vector = container.addOrGet(name, FieldType.nullable(MinorType.STRUCT.getType()), StructVector.class);
FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.STRUCT.getType(), null, null);
StructVector vector = container.addOrGet(name, fieldType, StructVector.class);
writer = new PromotableWriter(vector, container, getNullableStructWriterFactory());
if(vectorCount != container.size()) {
writer.allocate();
Expand Down Expand Up @@ -166,7 +168,8 @@ public ListWriter list(String name) {
FieldWriter writer = fields.get(finalName);
int vectorCount = container.size();
if(writer == null) {
writer = new PromotableWriter(container.addOrGet(name, FieldType.nullable(MinorType.LIST.getType()), ListVector.class), container, getNullableStructWriterFactory());
FieldType fieldType = new FieldType(addVectorAsNullable, MinorType.LIST.getType(), null, null);
writer = new PromotableWriter(container.addOrGet(name, fieldType, ListVector.class), container, getNullableStructWriterFactory());
if (container.size() > vectorCount) {
writer.allocate();
}
Expand Down Expand Up @@ -232,8 +235,8 @@ public void end() {
if(writer == null) {
ValueVector vector;
ValueVector currentVector = container.getChild(name);
${vectName}Vector v = container.addOrGet(name,
FieldType.nullable(
${vectName}Vector v = container.addOrGet(name,
new FieldType(addVectorAsNullable,
<#if minor.typeParams??>
<#if minor.arrowTypeConstructorParams??>
<#assign constructorParams = minor.arrowTypeConstructorParams />
Expand All @@ -247,7 +250,7 @@ public void end() {
<#else>
MinorType.${upperName}.getType()
</#if>
),
,null, null),
${vectName}Vector.class);
writer = new PromotableWriter(v, container, getNullableStructWriterFactory());
vector = v;
Expand Down
4 changes: 2 additions & 2 deletions java/vector/src/main/codegen/templates/UnionListWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@
@SuppressWarnings("unused")
public class UnionListWriter extends AbstractFieldWriter {

private ListVector vector;
private PromotableWriter writer;
protected ListVector vector;
protected PromotableWriter writer;
private boolean inStruct = false;
private String structName;
private int lastIndex = 0;
Expand Down
195 changes: 195 additions & 0 deletions java/vector/src/main/codegen/templates/UnionMapWriter.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

import io.netty.buffer.ArrowBuf;
import org.apache.arrow.vector.complex.writer.DecimalWriter;
import org.apache.arrow.vector.holders.DecimalHolder;

import java.lang.UnsupportedOperationException;
import java.math.BigDecimal;

<@pp.dropOutputFile />
<@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/UnionMapWriter.java" />


<#include "/@includes/license.ftl" />

package org.apache.arrow.vector.complex.impl;

<#include "/@includes/vv_imports.ftl" />

/*
* This class is generated using freemarker and the ${.template_name} template.
*/

/**
* <p>Writer for MapVectors. This extends UnionListWriter to simplify writing map entries to a list
* of struct elements, with "key" and "value" fields. The procedure for writing a map begin with
* {@link #startMap()} followed by {@link #startEntry()}. An entry is written by using the
* {@link #key()} writer to write the key, then the {@link #value()} writer to write a value. After
* writing the value, call {@link #endEntry()} to complete the entry. Each map can have 1 or more
* entries. When done writing entries, call {@link #endMap()} to complete the map.
*
* <p>NOTE: the MapVector can have NULL values by not writing to position. If a map is started with
* {@link #startMap()}, then it must have a key written. The value of a map entry can be NULL by
* not using the {@link #value()} writer.
*
* <p>Example to write the following map to position 5 of a vector
* <pre>{@code
* // {
* // 1 -> 3,
* // 2 -> 4,
* // 3 -> NULL
* // }
*
* UnionMapWriter writer = ...
*
* writer.setPosition(5);
* writer.startMap();
* writer.startEntry();
* writer.key().integer().writeInt(1);
* writer.value().integer().writeInt(3);
* writer.endEntry();
* writer.startEntry();
* writer.key().integer().writeInt(2);
* writer.value().integer().writeInt(4);
* writer.endEntry();
* writer.startEntry();
* writer.key().integer().writeInt(3);
* writer.endEntry();
* writer.endMap();
* </pre>
* </p>
*/
@SuppressWarnings("unused")
public class UnionMapWriter extends UnionListWriter {
Comment thread
BryanCutler marked this conversation as resolved.

/**
* Current mode for writing map entries, set by calling {@link #key()} or {@link #value()}
* and reset with a call to {@link #endEntry()}. With KEY mode, a struct writer with field
* named "key" is returned. With VALUE mode, a struct writer with field named "value" is
* returned. In OFF mode, the writer will behave like a standard UnionListWriter
*/
private enum MapWriteMode {
OFF,
Comment thread
BryanCutler marked this conversation as resolved.
KEY,
VALUE,
}

private MapWriteMode mode = MapWriteMode.OFF;
private StructWriter entryWriter;

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should the writer enforce uniqueness/sorted-ness (I suppose this would be difficult in the general case)?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that was discussed somewhere else, and was decided it is up to the application to ensure these things. The sortedKeys field is just used as a hint

public UnionMapWriter(MapVector vector) {
super(vector);
entryWriter = struct();
}

/** Start writing a map that consists of 1 or more entries. */
public void startMap() {
startList();
}

/** Complete the map. */
public void endMap() {
endList();
}

/**
* Start a map entry that should be followed by calls to {@link #key()} and {@link #value()}
* writers. Call {@link #endEntry()} to complete the entry.
*/
public void startEntry() {
writer.setAddVectorAsNullable(false);
entryWriter.start();
}

/** Complete the map entry. */
public void endEntry() {
entryWriter.end();
mode = MapWriteMode.OFF;
writer.setAddVectorAsNullable(true);
}

/** Return the key writer that is used to write to the "key" field. */
public UnionMapWriter key() {
writer.setAddVectorAsNullable(false);
mode = MapWriteMode.KEY;
return this;
}

/** Return the value writer that is used to write to the "value" field. */
public UnionMapWriter value() {
writer.setAddVectorAsNullable(true);
mode = MapWriteMode.VALUE;
return this;
}

<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />
<#assign fields = minor.fields!type.fields />
<#assign uncappedName = name?uncap_first/>
<#if uncappedName == "int" ><#assign uncappedName = "integer" /></#if>
<#if !minor.typeParams?? >
@Override
public ${name}Writer ${uncappedName}() {
switch (mode) {
case KEY:
return entryWriter.${uncappedName}(MapVector.KEY_NAME);
case VALUE:
return entryWriter.${uncappedName}(MapVector.VALUE_NAME);
default:
return this;
}
}

</#if>
</#list></#list>
@Override
public DecimalWriter decimal() {
switch (mode) {
case KEY:
return entryWriter.decimal(MapVector.KEY_NAME);
case VALUE:
return entryWriter.decimal(MapVector.VALUE_NAME);
default:
return this;
}
}

@Override
public StructWriter struct() {
switch (mode) {
case KEY:
return entryWriter.struct(MapVector.KEY_NAME);
case VALUE:
return entryWriter.struct(MapVector.VALUE_NAME);
default:
return super.struct();
}
}

@Override
public ListWriter list() {
switch (mode) {
case KEY:
return entryWriter.list(MapVector.KEY_NAME);
case VALUE:
return entryWriter.list(MapVector.VALUE_NAME);
default:
return super.list();
}
}
}
10 changes: 10 additions & 0 deletions java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.arrow.vector.types.pojo.ArrowType.FloatingPoint;
import org.apache.arrow.vector.types.pojo.ArrowType.Int;
import org.apache.arrow.vector.types.pojo.ArrowType.Interval;
import org.apache.arrow.vector.types.pojo.ArrowType.Map;
import org.apache.arrow.vector.types.pojo.ArrowType.Null;
import org.apache.arrow.vector.types.pojo.ArrowType.Struct;
import org.apache.arrow.vector.types.pojo.ArrowType.Time;
Expand Down Expand Up @@ -115,6 +116,15 @@ public TypeLayout visit(FixedSizeList type) {
return new TypeLayout(vectors);
}

@Override
public TypeLayout visit(Map type) {
List<BufferLayout> vectors = asList(
BufferLayout.validityVector(),
BufferLayout.offsetBuffer()
);
return new TypeLayout(vectors);
}

@Override
public TypeLayout visit(FloatingPoint type) {
int bitWidth;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ public static ListVector empty(String name, BufferAllocator allocator) {
}

protected ArrowBuf validityBuffer;
private UnionListReader reader;
protected UnionListReader reader;
private CallBack callBack;
private final FieldType fieldType;
private int validityAllocationSizeInBytes;
Expand All @@ -94,7 +94,7 @@ public ListVector(String name, BufferAllocator allocator, DictionaryEncoding dic
* Constructs a new instance.
*
* @param name The name of the instance.
* @param allocator The allocator to use to allocating/reallocating buffers.
* @param allocator The allocator to use for allocating/reallocating buffers.
* @param fieldType The type of this list.
* @param callBack A schema change callback.
*/
Expand Down
Loading