From d6bf52ae6bebd3eb6682c76dd7bc1b9a3fd67922 Mon Sep 17 00:00:00 2001 From: Simone Carlo Surace Date: Sat, 20 Apr 2024 20:17:29 +0200 Subject: [PATCH 1/4] Add Julia example --- http/get_simple/julia/Project.toml | 4 +++ http/get_simple/julia/client/README.md | 32 +++++++++++++++++++++ http/get_simple/julia/client/client.jl | 33 +++++++++++++++++++++ http/get_simple/julia/server/README.md | 32 +++++++++++++++++++++ http/get_simple/julia/server/server.jl | 40 ++++++++++++++++++++++++++ 5 files changed, 141 insertions(+) create mode 100644 http/get_simple/julia/Project.toml create mode 100644 http/get_simple/julia/client/README.md create mode 100644 http/get_simple/julia/client/client.jl create mode 100644 http/get_simple/julia/server/README.md create mode 100644 http/get_simple/julia/server/server.jl diff --git a/http/get_simple/julia/Project.toml b/http/get_simple/julia/Project.toml new file mode 100644 index 0000000..4f0811e --- /dev/null +++ b/http/get_simple/julia/Project.toml @@ -0,0 +1,4 @@ +[deps] +Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" +HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" +Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" diff --git a/http/get_simple/julia/client/README.md b/http/get_simple/julia/client/README.md new file mode 100644 index 0000000..3dbed30 --- /dev/null +++ b/http/get_simple/julia/client/README.md @@ -0,0 +1,32 @@ + + +# HTTP GET Arrow Data: Simple Julia Client Example + +This directory contains a minimal example of an HTTP client implemented in Julia. The client: +1. Sends an HTTP GET request to a server. +2. Receives an HTTP 200 response from the server, with the response body containing an Arrow IPC stream of record batches. +3. Adds the record batches to a list as they are received. + +To run this example, first start one of the server examples in the parent directory, then: + +```sh +julia --project=.. -e "using Pkg; Pkg.instantiate()" +julia --project=.. client.jl +``` diff --git a/http/get_simple/julia/client/client.jl b/http/get_simple/julia/client/client.jl new file mode 100644 index 0000000..1aa2073 --- /dev/null +++ b/http/get_simple/julia/client/client.jl @@ -0,0 +1,33 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +using Arrow, HTTP + +function get_batches() + res = HTTP.get("http://localhost:8008") + buffer = res.body + stream = Arrow.Stream(res.body) + batches = collect(stream) + + println("$(length(buffer)) bytes received") + println("$(length(batches)) record batches received") + + return batches +end + +execution_time = @elapsed get_batches() +println("$(execution_time) seconds elapsed") diff --git a/http/get_simple/julia/server/README.md b/http/get_simple/julia/server/README.md new file mode 100644 index 0000000..19abc3a --- /dev/null +++ b/http/get_simple/julia/server/README.md @@ -0,0 +1,32 @@ + + +# HTTP GET Arrow Data: Simple Julia Server Example + +This directory contains a minimal example of an HTTP server implemented in Julia. The server: +1. Creates a list of record batches and populates it with synthesized data. +2. Listens for HTTP GET requests from clients. +3. Upon receiving a request, sends an HTTP 200 response with the body containing an Arrow IPC stream of record batches. + +To run this example: + +```sh +julia --project=.. -e "using Pkg; Pkg.instantiate()" +julia --project=.. server.jl +``` diff --git a/http/get_simple/julia/server/server.jl b/http/get_simple/julia/server/server.jl new file mode 100644 index 0000000..3a9ba2d --- /dev/null +++ b/http/get_simple/julia/server/server.jl @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +using Arrow, HTTP, Tables + +function get_stream(::HTTP.Request) + total_records = 10_000_000 + batch_len = 4096 + stream = Tables.partitioner(Iterators.partition(1:total_records, batch_len)) do indices + nrows = length(indices) + return ( + a = rand(Int, nrows), + b = rand(Int, nrows), + c = rand(Int, nrows), + d = rand(Int, nrows) + ) + end + buffer = IOBuffer() + Arrow.write(buffer, stream) + return HTTP.Response(200, take!(buffer)) +end + +const ARROW_ROUTER = HTTP.Router() +HTTP.register!(ARROW_ROUTER, "GET", "/", get_stream) +println("Serving on localhost:8008...") +server = HTTP.serve!(ARROW_ROUTER, "127.0.0.1", 8008) From fa99ad02cf42ea1e0b1a20ba3c9cac3896a3f4d3 Mon Sep 17 00:00:00 2001 From: Simone Carlo Surace Date: Wed, 24 Apr 2024 22:25:03 +0200 Subject: [PATCH 2/4] Increase number of records --- http/get_simple/julia/server/server.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/http/get_simple/julia/server/server.jl b/http/get_simple/julia/server/server.jl index 3a9ba2d..8ca7e36 100644 --- a/http/get_simple/julia/server/server.jl +++ b/http/get_simple/julia/server/server.jl @@ -18,7 +18,7 @@ using Arrow, HTTP, Tables function get_stream(::HTTP.Request) - total_records = 10_000_000 + total_records = 100_000_000 batch_len = 4096 stream = Tables.partitioner(Iterators.partition(1:total_records, batch_len)) do indices nrows = length(indices) From b3ee8ae402a23ee19d3abff64733fdf4f0cc034b Mon Sep 17 00:00:00 2001 From: Simone Carlo Surace Date: Sun, 28 Apr 2024 17:03:21 +0200 Subject: [PATCH 3/4] Make server blocking --- http/get_simple/julia/server/server.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/http/get_simple/julia/server/server.jl b/http/get_simple/julia/server/server.jl index 8ca7e36..a4a40b0 100644 --- a/http/get_simple/julia/server/server.jl +++ b/http/get_simple/julia/server/server.jl @@ -34,7 +34,5 @@ function get_stream(::HTTP.Request) return HTTP.Response(200, take!(buffer)) end -const ARROW_ROUTER = HTTP.Router() -HTTP.register!(ARROW_ROUTER, "GET", "/", get_stream) println("Serving on localhost:8008...") -server = HTTP.serve!(ARROW_ROUTER, "127.0.0.1", 8008) +server = HTTP.serve(get_stream, "127.0.0.1", 8008) From 5f0a1a0de57d0b43884ec3b34594fb25f6bfb718 Mon Sep 17 00:00:00 2001 From: Simone Carlo Surace Date: Wed, 1 May 2024 12:04:58 +0200 Subject: [PATCH 4/4] Make columns nullable --- http/get_simple/julia/Project.toml | 1 + http/get_simple/julia/server/server.jl | 16 +++++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/http/get_simple/julia/Project.toml b/http/get_simple/julia/Project.toml index 4f0811e..0929efe 100644 --- a/http/get_simple/julia/Project.toml +++ b/http/get_simple/julia/Project.toml @@ -1,4 +1,5 @@ [deps] Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" diff --git a/http/get_simple/julia/server/server.jl b/http/get_simple/julia/server/server.jl index a4a40b0..8f56524 100644 --- a/http/get_simple/julia/server/server.jl +++ b/http/get_simple/julia/server/server.jl @@ -15,7 +15,13 @@ # specific language governing permissions and limitations # under the License. -using Arrow, HTTP, Tables +using Arrow, HTTP, Random, Tables + +function randint_nullable(n::Integer) + v = Vector{Union{Missing, Int}}(undef, n) + rand!(v, Int) + return v +end function get_stream(::HTTP.Request) total_records = 100_000_000 @@ -23,10 +29,10 @@ function get_stream(::HTTP.Request) stream = Tables.partitioner(Iterators.partition(1:total_records, batch_len)) do indices nrows = length(indices) return ( - a = rand(Int, nrows), - b = rand(Int, nrows), - c = rand(Int, nrows), - d = rand(Int, nrows) + a = randint_nullable(nrows), + b = randint_nullable(nrows), + c = randint_nullable(nrows), + d = randint_nullable(nrows) ) end buffer = IOBuffer()