From 91d8f0d0b59641bec5b20dde36ec3c674afdaa2e Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Mon, 29 Jun 2015 20:51:59 -0600 Subject: [PATCH 1/3] mmap overhaul --- base/datafmt.jl | 2 +- base/deprecated.jl | 86 +++++++++++ base/exports.jl | 4 +- base/libc.jl | 63 +------- base/mmap.jl | 312 +++++++++++++++++++++----------------- base/sharedarray.jl | 6 +- base/sysimg.jl | 3 +- doc/stdlib/io-network.rst | 33 ++-- test/file.jl | 78 ---------- test/mmap.jl | 284 ++++++++++++++++++++++++++++++++++ 10 files changed, 573 insertions(+), 298 deletions(-) create mode 100644 test/mmap.jl diff --git a/base/datafmt.jl b/base/datafmt.jl index 0b123ec705808..24f2b2218ea48 100644 --- a/base/datafmt.jl +++ b/base/datafmt.jl @@ -61,7 +61,7 @@ end function as_mmap(fname::AbstractString, fsz::Int64) open(fname) do io - mmap_array(UInt8, (Int(fsz),), io) + Mmap.mmap(io, Vector{UInt8}, (Int(fsz),)) end end diff --git a/base/deprecated.jl b/base/deprecated.jl index ccd6c1c9904af..6cc4c498ad81f 100644 --- a/base/deprecated.jl +++ b/base/deprecated.jl @@ -538,3 +538,89 @@ export UnionType const MathConst = Irrational export MathConst + +# 11280, mmap + +export msync +msync{T}(A::Array{T}) = msync(pointer(A), length(A)*sizeof(T)) +msync(B::BitArray) = msync(pointer(B.chunks), length(B.chunks)*sizeof(UInt64)) + +@unix_only begin + +function mmap(len::Integer, prot::Integer, flags::Integer, fd, offset::Integer) + depwarn("`mmap` is deprecated, use `mmap(io, Array{T,N}, dims, offset)` instead to return an mmapped-array", :mmap) + const pagesize::Int = ccall(:jl_getpagesize, Clong, ()) + # Check that none of the computations will overflow + if len < 0 + throw(ArgumentError("requested size must be ≥ 0, got $len")) + end + if len > typemax(Int)-pagesize + throw(ArgumentError("requested size must be ≤ $(typemax(Int)-pagesize), got $len")) + end + # Set the offset to a page boundary + offset_page::FileOffset = floor(Integer,offset/pagesize)*pagesize + len_page::Int = (offset-offset_page) + len + # Mmap the file + p = ccall(:jl_mmap, Ptr{Void}, (Ptr{Void}, Csize_t, Cint, Cint, Cint, FileOffset), C_NULL, len_page, prot, flags, fd, offset_page) + systemerror("memory mapping failed", reinterpret(Int,p) == -1) + # Also return a pointer that compensates for any adjustment in the offset + return p, Int(offset-offset_page) +end + +function munmap(p::Ptr,len::Integer) + depwarn("`munmap` is deprecated, `mmap` Arrays are automatically munmapped when finalized", :munmap) + systemerror("munmap", ccall(:munmap,Cint,(Ptr{Void},Int),p,len) != 0) +end + +const MS_ASYNC = 1 +const MS_INVALIDATE = 2 +const MS_SYNC = 4 +function msync(p::Ptr, len::Integer, flags::Integer=MS_SYNC) + depwarn("`msync` is deprecated, use `Mmap.sync!(array)` instead", :msync) + systemerror("msync", ccall(:msync, Cint, (Ptr{Void}, Csize_t, Cint), p, len, flags) != 0) +end +end + + +@windows_only begin +function munmap(viewhandle::Ptr, mmaphandle::Ptr) + depwarn("`munmap` is deprecated, `mmap` Arrays are automatically munmapped when finalized", :munmap) + status = ccall(:UnmapViewOfFile, stdcall, Cint, (Ptr{Void},), viewhandle)!=0 + status |= ccall(:CloseHandle, stdcall, Cint, (Ptr{Void},), mmaphandle)!=0 + if !status + error("could not unmap view: $(FormatMessage())") + end +end + +function msync(p::Ptr, len::Integer) + depwarn("`msync` is deprecated, use `Mmap.sync!(array)` instead", :msync) + status = ccall(:FlushViewOfFile, stdcall, Cint, (Ptr{Void}, Csize_t), p, len)!=0 + if !status + error("could not msync: $(FormatMessage())") + end +end + +end + +@unix_only @deprecate mmap_array{T,N}(::Type{T}, dims::NTuple{N,Integer}, s::IO, offset=position(s)) mmap(s, Array{T,N}, dims, offset) + +@windows_only begin +type SharedMemSpec + name :: AbstractString + readonly :: Bool + create :: Bool +end +export mmap_array +function mmap_array{T,N}(::Type{T}, dims::NTuple{N,Integer}, s::Union(IO,SharedMemSpec), offset::FileOffset) + depwarn("`mmap_array` is deprecated, use `mmap(io, Array{T,N}, dims, offset)` instead to return an mmapped-array", :mmap_array) + if isa(s,SharedMemSpec) + a = Mmap.AnonymousMmap(s.name, s.readonly, s.create) + else + a = s + end + return mmap(a, Array{T,N}, dims, offset) +end +end + +@deprecate mmap_bitarray{N}(::Type{Bool}, dims::NTuple{N,Integer}, s::IOStream, offset::FileOffset=position(s)) mmap(s, BitArray, dims, offset) +@deprecate mmap_bitarray{N}(dims::NTuple{N,Integer}, s::IOStream, offset=position(s)) mmap(s, BitArray, dims, offset) \ No newline at end of file diff --git a/base/exports.jl b/base/exports.jl index 2971b68c4194a..285134b6e5ac4 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -13,6 +13,7 @@ export Test, Libc, Libdl, + Mmap, LinAlg, BLAS, LAPACK, @@ -1142,9 +1143,6 @@ export listenany, ltoh, mark, - mmap_array, - mmap_bitarray, - msync, nb_available, ntoh, open, diff --git a/base/libc.jl b/base/libc.jl index 1bfe75a82cdfd..36619f5f94343 100644 --- a/base/libc.jl +++ b/base/libc.jl @@ -3,8 +3,7 @@ module Libc export FILE, TmStruct, strftime, strptime, getpid, gethostname, free, malloc, calloc, realloc, - errno, strerror, flush_cstdio, systemsleep, time, - MS_ASYNC, MS_INVALIDATE, MS_SYNC, mmap, munmap, msync + errno, strerror, flush_cstdio, systemsleep, time include("errno.jl") @@ -163,64 +162,4 @@ malloc(size::Integer) = ccall(:malloc, Ptr{Void}, (Csize_t,), size) realloc(p::Ptr, size::Integer) = ccall(:realloc, Ptr{Void}, (Ptr{Void}, Csize_t), p, size) calloc(num::Integer, size::Integer) = ccall(:calloc, Ptr{Void}, (Csize_t, Csize_t), num, size) -## mmap ## - -msync{T}(A::Array{T}) = msync(pointer(A), length(A)*sizeof(T)) - -msync(B::BitArray) = msync(pointer(B.chunks), length(B.chunks)*sizeof(UInt64)) - -@unix_only begin -# Low-level routines -# These are needed for things like MAP_ANONYMOUS -function mmap(len::Integer, prot::Integer, flags::Integer, fd, offset::Integer) - const pagesize::Int = ccall(:jl_getpagesize, Clong, ()) - # Check that none of the computations will overflow - if len < 0 - throw(ArgumentError("requested size must be ≥ 0, got $len")) - end - if len > typemax(Int)-pagesize - throw(ArgumentError("requested size must be ≤ $(typemax(Int)-pagesize), got $len")) - end - # Set the offset to a page boundary - offset_page::FileOffset = floor(Integer,offset/pagesize)*pagesize - len_page::Int = (offset-offset_page) + len - # Mmap the file - p = ccall(:jl_mmap, Ptr{Void}, (Ptr{Void}, Csize_t, Cint, Cint, Cint, FileOffset), C_NULL, len_page, prot, flags, fd, offset_page) - systemerror("memory mapping failed", reinterpret(Int,p) == -1) - # Also return a pointer that compensates for any adjustment in the offset - return p, Int(offset-offset_page) -end - -function munmap(p::Ptr,len::Integer) - systemerror("munmap", ccall(:munmap,Cint,(Ptr{Void},Int),p,len) != 0) -end - -const MS_ASYNC = 1 -const MS_INVALIDATE = 2 -const MS_SYNC = 4 -function msync(p::Ptr, len::Integer, flags::Integer) - systemerror("msync", ccall(:msync, Cint, (Ptr{Void}, Csize_t, Cint), p, len, flags) != 0) -end -msync(p::Ptr, len::Integer) = msync(p, len, MS_SYNC) -end - - -@windows_only begin -function munmap(viewhandle::Ptr, mmaphandle::Ptr) - status = ccall(:UnmapViewOfFile, stdcall, Cint, (Ptr{Void},), viewhandle)!=0 - status |= ccall(:CloseHandle, stdcall, Cint, (Ptr{Void},), mmaphandle)!=0 - if !status - error("could not unmap view: $(FormatMessage())") - end -end - -function msync(p::Ptr, len::Integer) - status = ccall(:FlushViewOfFile, stdcall, Cint, (Ptr{Void}, Csize_t), p, len)!=0 - if !status - error("could not msync: $(FormatMessage())") - end -end - -end - end # module diff --git a/base/mmap.jl b/base/mmap.jl index 3c65fd66ec75b..5e656c0d2e058 100644 --- a/base/mmap.jl +++ b/base/mmap.jl @@ -1,168 +1,175 @@ # This file is a part of Julia. License is MIT: http://julialang.org/license -### Generic interface ### +module Mmap -# Arrays -mmap_array{T,N}(::Type{T}, dims::NTuple{N,Integer}, s::IO) = mmap_array(T, dims, s, position(s)) +const PAGESIZE = Int(@unix ? ccall(:jl_getpagesize, Clong, ()) : ccall(:jl_getallocationgranularity, Clong, ())) -# BitArrays -mmap_bitarray{N}(::Type{Bool}, dims::NTuple{N,Integer}, s::IOStream, offset::FileOffset) = - mmap_bitarray(dims, s, offset) -mmap_bitarray{N}(::Type{Bool}, dims::NTuple{N,Integer}, s::IOStream) = mmap_bitarray(dims, s, position(s)) -mmap_bitarray{N}(dims::NTuple{N,Integer}, s::IOStream) = mmap_bitarray(dims, s, position(s)) +# for mmaps not backed by files +type Anonymous <: IO + name::AbstractString + readonly::Bool + create::Bool +end + +Anonymous() = Anonymous("",false,true) +Base.isopen(::Anonymous) = true +Base.isreadable(::Anonymous) = true +Base.iswritable(a::Anonymous) = !a.readonly -### UNIX implementation ### +const INVALID_HANDLE_VALUE = -1 +# const used for zeroed, anonymous memory; same value on Windows & Unix; say what?! +gethandle(io::Anonymous) = INVALID_HANDLE_VALUE +# platform-specific mmap utilities @unix_only begin -# Higher-level functions -# Before mapping, grow the file to sufficient size -# (Required if you're going to write to a new memory-mapped file) -# -# Note: a few mappable streams do not support lseek. When Julia -# supports structures in ccall, switch to fstat. -function mmap_grow(len::Integer, prot::Integer, flags::Integer, fd::Integer, offset::FileOffset) - const SEEK_SET::Cint = 0 - const SEEK_CUR::Cint = 1 - const SEEK_END::Cint = 2 - # Save current file position so we can restore it later - cpos = ccall(:jl_lseek, FileOffset, (Cint, FileOffset, Cint), fd, 0, SEEK_CUR) - systemerror("lseek", cpos < 0) - filelen = ccall(:jl_lseek, FileOffset, (Cint, FileOffset, Cint), fd, 0, SEEK_END) - systemerror("lseek", filelen < 0) - if (filelen < offset + len) - systemerror("pwrite", ccall(:jl_pwrite, Cssize_t, (Cint, Ptr{Void}, UInt, FileOffset), fd, Int8[0], 1, offset + len - 1) < 1) - end - cpos = ccall(:jl_lseek, FileOffset, (Cint, FileOffset, Cint), fd, cpos, SEEK_SET) - systemerror("lseek", cpos < 0) - return Libc.mmap(len, prot, flags, fd, offset) -end +const PROT_READ = convert(Cint,1) +const PROT_WRITE = convert(Cint,2) +const MAP_SHARED = convert(Cint,1) +const MAP_PRIVATE = convert(Cint,2) +const MAP_ANONYMOUS = convert(Cint, @osx? 0x1000 : 0x20) +const F_GETFL = convert(Cint,3) -# Determine a stream's read/write mode, and return prot & flags -# appropriate for mmap -# We could use isreadonly here, but it's worth checking that it's readable too -function mmap_stream_settings(s::IO) - const PROT_READ::Cint = 1 - const PROT_WRITE::Cint = 2 - const MAP_SHARED::Cint = 1 - const F_GETFL::Cint = 3 - mode = ccall(:fcntl,Cint,(Cint,Cint),fd(s),F_GETFL) - systemerror("fcntl F_GETFL", mode == -1) - mode = mode & 3 - if mode == 0 - prot = PROT_READ - elseif mode == 1 - prot = PROT_WRITE - else +gethandle(io::IO) = fd(io) + +# Determine a stream's read/write mode, and return prot & flags appropriate for mmap +function settings(s::Int, shared::Bool) + flags = shared ? MAP_SHARED : MAP_PRIVATE + if s == INVALID_HANDLE_VALUE + flags |= MAP_ANONYMOUS prot = PROT_READ | PROT_WRITE + else + mode = ccall(:fcntl,Cint,(Cint,Cint),s,F_GETFL) + systemerror("fcntl F_GETFL", mode == -1) + mode = mode & 3 + prot = mode == 0 ? PROT_READ : mode == 1 ? PROT_WRITE : PROT_READ | PROT_WRITE + if prot & PROT_READ == 0 + throw(ArgumentError("mmap requires read permissions on the file (choose r+)")) + end end - if prot & PROT_READ == 0 - throw(ArgumentError("mmap requires read permissions on the file (choose r+)")) - end - flags = MAP_SHARED return prot, flags, (prot & PROT_WRITE) > 0 end -# Mmapped-array constructor -function mmap_array{T,N}(::Type{T}, dims::NTuple{N,Integer}, s::IO, offset::FileOffset; grow::Bool=true) - prot, flags, iswrite = mmap_stream_settings(s) - len = prod(dims)*sizeof(T) - if len > typemax(Int) - throw(ArgumentError("file is too large to memory-map on this platform")) - end - if iswrite && grow - pmap, delta = mmap_grow(len, prot, flags, fd(s), offset) - else - pmap, delta = Libc.mmap(len, prot, flags, fd(s), offset) +# Before mapping, grow the file to sufficient size +# Note: a few mappable streams do not support lseek. When Julia +# supports structures in ccall, switch to fstat. +grow!(::Anonymous,o::Integer,l::Integer) = return +function grow!(io::IO, offset::Integer, len::Integer) + pos = position(io) + filelen = filesize(io) + if filelen < offset + len + write(io, Base.zeros(UInt8,(offset + len) - filelen)) + flush(io) end - A = pointer_to_array(convert(Ptr{T}, UInt(pmap)+delta), dims) - finalizer(A,x->Libc.munmap(pmap,len+delta)) - return A + seek(io, pos) + return end +end # @unix_only -end +@windows_only begin -### Windows implementation ### +const PAGE_READONLY = UInt32(0x02) +const PAGE_READWRITE = UInt32(0x04) +const PAGE_WRITECOPY = UInt32(0x08) +const PAGE_EXECUTE_READ = UInt32(0x20) +const PAGE_EXECUTE_READWRITE = UInt32(0x40) +const PAGE_EXECUTE_WRITECOPY = UInt32(0x80) +const FILE_MAP_COPY = UInt32(0x01) +const FILE_MAP_WRITE = UInt32(0x02) +const FILE_MAP_READ = UInt32(0x04) +const FILE_MAP_EXECUTE = UInt32(0x20) -@windows_only type SharedMemSpec - name :: AbstractString - readonly :: Bool - create :: Bool +function gethandle(io::IO) + handle = Base._get_osfhandle(RawFD(fd(io))).handle + systemerror("could not get handle for file to map: $(Base.FormatMessage())", handle == -1) + return Int(handle) end -@windows_only begin -# Mmapped-array constructor -function mmap_array{T,N}(::Type{T}, dims::NTuple{N,Integer}, s::Union{IO,SharedMemSpec}, offset::FileOffset) - if isa(s,IO) - hdl = _get_osfhandle(RawFD(fd(s))).handle - if Int(hdl) == -1 - error("could not get handle for file to map: $(FormatMessage())") - end - name = Ptr{Cwchar_t}(C_NULL) - ro = isreadonly(s) - create = true - else - # shared memory - hdl = -1 - name = utf16(s.name) - ro = s.readonly - create = s.create - end - len = prod(dims)*sizeof(T) - const granularity::Int = ccall(:jl_getallocationgranularity, Clong, ()) - if len < 0 - throw(ArgumentError("requested size must be ≥ 0, got $len")) - end - if len > typemax(Int)-granularity - throw(ArgumentError("file is too large ot memory-map on this platform")) - end - # Set the offset to a page boundary - offset_page::FileOffset = div(offset, granularity)*granularity - szfile = convert(Csize_t, len + offset) - szarray = szfile - convert(Csize_t, offset_page) - access = ro ? 4 : 2 - if create - flprotect = ro ? 0x02 : 0x04 - mmaphandle = ccall(:CreateFileMappingW, stdcall, Ptr{Void}, (Cptrdiff_t, Ptr{Void}, Cint, Cint, Cint, Cwstring), - hdl, C_NULL, flprotect, szfile>>32, szfile&typemax(UInt32), name) - else - mmaphandle = ccall(:OpenFileMappingW, stdcall, Ptr{Void}, (Cint, Cint, Cwstring), - access, true, name) - end - if mmaphandle == C_NULL - error("could not create file mapping: $(FormatMessage())") - end - viewhandle = ccall(:MapViewOfFile, stdcall, Ptr{Void}, (Ptr{Void}, Cint, Cint, Cint, Csize_t), - mmaphandle, access, offset_page>>32, offset_page&typemax(UInt32), szarray) - if viewhandle == C_NULL - error("could not create mapping view: $(FormatMessage())") - end - A = pointer_to_array(convert(Ptr{T}, viewhandle+offset-offset_page), dims) - finalizer(A, x->Libc.munmap(viewhandle, mmaphandle)) +settings(sh::Anonymous) = utf16(sh.name), sh.readonly, sh.create +settings(io::IO) = convert(Ptr{Cwchar_t},C_NULL), isreadonly(io), true +end # @windows_only + +# core impelementation of mmap +function mmap{T,N}(io::IO, + ::Type{Array{T,N}}=Vector{UInt8}, + dims::NTuple{N,Integer}=(div(filesize(io)-position(io),sizeof(T)),), + offset::Integer=position(io); grow::Bool=true, shared::Bool=true) + # check inputs + isopen(io) || throw(ArgumentError("$io must be open to mmap")) + isbits(T) || throw(ArgumentError("unable to mmap $T; must satisfy isbits(T) == true")) + + len = prod(dims) * sizeof(T) + len > 0 || throw(ArgumentError("requested size must be > 0, got $len")) + len < typemax(Int) - PAGESIZE || throw(ArgumentError("requested size must be < $(typemax(Int)-PAGESIZE), got $len")) + + offset >= 0 || throw(ArgumentError("requested offset must be ≥ 0, got $offset")) + + # shift `offset` to start of page boundary + offset_page::FileOffset = div(offset, PAGESIZE) * PAGESIZE + # add (offset - offset_page) to `len` to get total length of memory-mapped region + mmaplen = (offset - offset_page) + len + + file_desc = gethandle(io) + # platform-specific mmapping + @unix_only begin + prot, flags, iswrite = settings(file_desc, shared) + iswrite && grow && grow!(io, offset, len) + # mmap the file + ptr = ccall(:jl_mmap, Ptr{Void}, (Ptr{Void}, Csize_t, Cint, Cint, Cint, FileOffset), C_NULL, mmaplen, prot, flags, file_desc, offset_page) + systemerror("memory mapping failed", reinterpret(Int,ptr) == -1) + end # @unix_only + + @windows_only begin + name, readonly, create = settings(io) + szfile = convert(Csize_t, len + offset) + readonly && szfile > filesize(io) && throw(ArgumentError("unable to increase file size to $szfile due to read-only permissions")) + handle = create ? ccall(:CreateFileMappingW, stdcall, Ptr{Void}, (Cptrdiff_t, Ptr{Void}, Cint, Cint, Cint, Cwstring), + file_desc, C_NULL, readonly ? PAGE_READONLY : PAGE_READWRITE, szfile >> 32, szfile & typemax(UInt32), name) : + ccall(:OpenFileMappingW, stdcall, Ptr{Void}, (Cint, Cint, Cwstring), + readonly ? FILE_MAP_READ : FILE_MAP_WRITE, true, name) + handle == C_NULL && error("could not create file mapping: $(Base.FormatMessage())") + ptr = ccall(:MapViewOfFile, stdcall, Ptr{Void}, (Ptr{Void}, Cint, Cint, Cint, Csize_t), + handle, readonly ? FILE_MAP_READ : FILE_MAP_WRITE, offset_page >> 32, offset_page & typemax(UInt32), (offset - offset_page) + len) + ptr == C_NULL && error("could not create mapping view: $(Base.FormatMessage())") + end # @windows_only + # convert mmapped region to Julia Array at `ptr + (offset - offset_page)` since file was mapped at offset_page + A = pointer_to_array(convert(Ptr{T}, UInt(ptr) + UInt(offset - offset_page)), dims) + @unix_only finalizer(A, x -> systemerror("munmap", ccall(:munmap,Cint,(Ptr{Void},Int),ptr,mmaplen) != 0)) + @windows_only finalizer(A, x -> begin + status = ccall(:UnmapViewOfFile, stdcall, Cint, (Ptr{Void},), ptr)!=0 + status |= ccall(:CloseHandle, stdcall, Cint, (Ptr{Void},), handle)!=0 + status || error("could not unmap view: $(Base.FormatMessage())") + end) return A end -end +mmap{T<:Array,N}(file::AbstractString, + ::Type{T}=Vector{UInt8}, + dims::NTuple{N,Integer}=(div(filesize(file),sizeof(eltype(T))),), + offset::Integer=Int64(0); grow::Bool=true, shared::Bool=true) = + open(io->mmap(io, T, dims, offset; grow=grow, shared=shared), file, isfile(file) ? "r+" : "w+")::Array{eltype(T),N} -# Mmapped-bitarray constructor -function mmap_bitarray{N}(dims::NTuple{N,Integer}, s::IOStream, offset::FileOffset) - iswrite = !isreadonly(s) - n = 1 - for (i, d) in enumerate(dims) - if d < 0 - throw(ArgumentError("dimension size must be ≥ 0, got $d size for dimension $i")) - end - n *= d - end - nc = num_bit_chunks(n) - if nc > typemax(Int) - throw(ArgumentError("file is too large to memory-map on this platform")) - end - chunks = mmap_array(UInt64, (nc,), s, offset) - if iswrite - chunks[end] &= _msk_end(n) +# using a length argument instead of dims +mmap{T<:Array}(io::IO, ::Type{T}, len::Integer, offset::Integer=position(io); grow::Bool=true, shared::Bool=true) = + mmap(io, T, (len,), offset; grow=grow, shared=shared) +mmap{T<:Array}(file::AbstractString, ::Type{T}, len::Integer, offset::Integer=Int64(0); grow::Bool=true, shared::Bool=true) = + open(io->mmap(io, T, (len,), offset; grow=grow, shared=shared), file, isfile(file) ? "r+" : "w+")::Vector{eltype(T)} + +# constructors for non-file-backed (anonymous) mmaps +mmap{T<:Array,N}(::Type{T}, dims::NTuple{N,Integer}; shared::Bool=true) = mmap(Anonymous(), T, dims, Int64(0); shared=shared) +mmap{T<:Array}(::Type{T}, i::Integer...; shared::Bool=true) = mmap(Anonymous(), T, convert(Tuple{Vararg{Int}},i), Int64(0); shared=shared) + +function mmap{T<:BitArray,N}(io::IOStream, + ::Type{T}, + dims::NTuple{N,Integer}, + offset::FileOffset=position(io); grow::Bool=true, shared::Bool=true) + n = prod(dims) + nc = Base.num_bit_chunks(n) + chunks = mmap(io, Vector{UInt64}, (nc,), offset; grow=grow, shared=shared) + if !isreadonly(io) + chunks[end] &= Base._msk_end(n) else - if chunks[end] != chunks[end] & _msk_end(n) + if chunks[end] != chunks[end] & Base._msk_end(n) throw(ArgumentError("the given file does not contain a valid BitArray of size $(join(dims, 'x')) (open with \"r+\" mode to override)")) end end @@ -174,3 +181,30 @@ function mmap_bitarray{N}(dims::NTuple{N,Integer}, s::IOStream, offset::FileOffs end return B end + +mmap{T<:BitArray,N}(file::AbstractString, ::Type{T}, dims::NTuple{N,Integer}, offset::Integer=Int64(0); grow::Bool=true, shared::Bool=true) = + open(io->mmap(io, T, dims, offset; grow=grow, shared=shared), file, isfile(file) ? "r+" : "w+")::BitArray{N} + +# using a length argument instead of dims +mmap{T<:BitArray}(io::IO, ::Type{T}, len::Integer, offset::Integer=position(io); grow::Bool=true, shared::Bool=true) = + mmap(io, T, (len,), offset; grow=grow, shared=shared) +mmap{T<:BitArray}(file::AbstractString, ::Type{T}, len::Integer, offset::Integer=Int64(0); grow::Bool=true, shared::Bool=true) = + open(io->mmap(io, T, (len,), offset; grow=grow, shared=shared), file, isfile(file) ? "r+" : "w+")::BitVector + +# constructors for non-file-backed (anonymous) mmaps +mmap{T<:BitArray,N}(::Type{T}, dims::NTuple{N,Integer}; shared::Bool=true) = mmap(Anonymous(), T, dims, Int64(0); shared=shared) +mmap{T<:BitArray}(::Type{T}, i::Integer...; shared::Bool=true) = mmap(Anonymous(), T, convert(Tuple{Vararg{Int}},i), Int64(0); shared=shared) + +# msync flags for unix +const MS_ASYNC = 1 +const MS_INVALIDATE = 2 +const MS_SYNC = 4 + +function sync!{T}(m::Array{T}, flags::Integer=MS_SYNC) + @unix_only systemerror("msync", ccall(:msync, Cint, (Ptr{Void}, Csize_t, Cint), pointer(m), length(m)*sizeof(T), flags) != 0) + @windows_only systemerror("could not FlushViewOfFile: $(Base.FormatMessage())", + ccall(:FlushViewOfFile, stdcall, Cint, (Ptr{Void}, Csize_t), pointer(m), length(m)) == 0) +end +sync!(B::BitArray, flags::Integer=MS_SYNC) = sync!(B.chunks, flags) + +end # module diff --git a/base/sharedarray.jl b/base/sharedarray.jl index a73a3db243a63..33e4cc3a934a6 100644 --- a/base/sharedarray.jl +++ b/base/sharedarray.jl @@ -392,7 +392,7 @@ function _shm_mmap_array(T, dims, shm_seg_name, mode) systemerror("ftruncate() failed for shm segment " * shm_seg_name, rc != 0) end - mmap_array(T, dims, s, zero(FileOffset), grow=false) + Mmap.mmap(s, Array{T,length(dims)}, dims, zero(FileOffset); grow=false) end shm_unlink(shm_seg_name) = ccall(:shm_unlink, Cint, (Cstring,), shm_seg_name) @@ -405,8 +405,8 @@ end # @unix_only function _shm_mmap_array(T, dims, shm_seg_name, mode) readonly = !((mode & JL_O_RDWR) == JL_O_RDWR) create = (mode & JL_O_CREAT) == JL_O_CREAT - s = SharedMemSpec(shm_seg_name, readonly, create) - mmap_array(T, dims, s, zero(FileOffset)) + s = Mmap.Anonymous(shm_seg_name, readonly, create) + Mmap.mmap(s, Array{T,length(dims)}, dims, zero(FileOffset)) end # no-op in windows diff --git a/base/sysimg.jl b/base/sysimg.jl index e573a3705f707..db3bc2f1ccff0 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -110,7 +110,7 @@ include("iostream.jl") # system & environment include("libc.jl") -using .Libc: getpid, gethostname, time, msync +using .Libc: getpid, gethostname, time include("libdl.jl") using .Libdl: DL_LOAD_PATH include("env.jl") @@ -221,6 +221,7 @@ include("poll.jl") # memory-mapped and shared arrays include("mmap.jl") +import .Mmap include("sharedarray.jl") # utilities - timing, help, edit diff --git a/doc/stdlib/io-network.rst b/doc/stdlib/io-network.rst index e7c32ec862adf..52d6507843263 100644 --- a/doc/stdlib/io-network.rst +++ b/doc/stdlib/io-network.rst @@ -607,22 +607,33 @@ stack with: Memory-mapped I/O ----------------- -.. function:: mmap_array(type, dims, stream, [offset]) +.. function:: Mmap.Anonymous(name, readonly, create) + + Create an ``IO``-like object for creating zeroed-out mmapped-memory that is not tied to a file for use in ``Mmap.mmap``. Used by ``SharedArray`` for creating shared memory arrays. + +.. function:: Mmap.mmap(io::Union(IOStream,AbstractString,Mmap.AnonymousMmap)[, type::Type{Array{T,N}}, dims, offset]; grow::Bool=true, shared::Bool=true) + Mmap.mmap(type::Type{Array{T,N}}, dims) Create an ``Array`` whose values are linked to a file, using memory-mapping. This provides a convenient way of working with data too large to fit in the computer's memory. - The type determines how the bytes of the array are interpreted. Note that the file must be stored in binary format, and no format conversions are possible (this is a limitation of operating systems, not Julia). + The type is an ``Array{T,N}`` with a bits-type element of ``T`` and dimension ``N`` that determines how the bytes of the array are interpreted. Note that the file must be stored in binary format, and no format conversions are possible (this is a limitation of operating systems, not Julia). + + ``dims`` is a tuple or single ``Integer`` specifying the size or length of the array. + + The file is passed via the stream argument, either as an open ``IOStream`` or filename string. When you initialize the stream, use ``"r"`` for a "read-only" array, and ``"w+"`` to create a new array used to write values to disk. + + If no ``type`` argument is specified, the default is ``Vector{UInt8}``. - ``dims`` is a tuple specifying the size of the array. + Optionally, you can specify an offset (in bytes) if, for example, you want to skip over a header in the file. The default value for the offset is the current stream position for an ``IOStream``. - The file is passed via the stream argument. When you initialize the stream, use ``"r"`` for a "read-only" array, and ``"w+"`` to create a new array used to write values to disk. + The ``grow`` keyword argument specifies whether the disk file should be grown to accomodate the requested size of array (if the total file size is < requested array size). Write privileges are required to grow the file. - Optionally, you can specify an offset (in bytes) if, for example, you want to skip over a header in the file. The default value for the offset is the current stream position. + The ``shared`` keyword argument specifies whether the resulting ``Array`` and changes made to it will be visible to other processes mapping the same file. For example, the following code:: # Create a file for mmapping - # (you could alternatively use mmap_array to do this step, too) + # (you could alternatively use mmap to do this step, too) A = rand(1:20, 5, 30) s = open("/tmp/mmap.bin", "w+") # We'll write the dimensions of the array as the first two Ints in the file @@ -636,21 +647,21 @@ Memory-mapped I/O s = open("/tmp/mmap.bin") # default is read-only m = read(s, Int) n = read(s, Int) - A2 = mmap_array(Int, (m,n), s) + A2 = Mmap.mmap(s, Matrix{Int}, (m,n)) creates a ``m``-by-``n`` ``Matrix{Int}``, linked to the file associated with stream ``s``. A more portable file would need to encode the word size---32 bit or 64 bit---and endianness information in the header. In practice, consider encoding binary data using standard formats like HDF5 (which can be used with memory-mapping). -.. function:: mmap_bitarray([type,] dims, stream, [offset]) +.. function:: Mmap.mmap(io, BitArray, [dims, offset]) - Create a ``BitArray`` whose values are linked to a file, using memory-mapping; it has the same purpose, works in the same way, and has the same arguments, as :func:`mmap_array`, but the byte representation is different. The ``type`` parameter is optional, and must be ``Bool`` if given. + Create a ``BitArray`` whose values are linked to a file, using memory-mapping; it has the same purpose, works in the same way, and has the same arguments, as :func:`mmap`, but the byte representation is different. - **Example**: ``B = mmap_bitarray((25,30000), s)`` + **Example**: ``B = Mmap.mmap(s, BitArray, (25,30000))`` This would create a 25-by-30000 ``BitArray``, linked to the file associated with stream ``s``. -.. function:: msync(array) +.. function:: Mmap.sync!(array) Forces synchronization between the in-memory version of a memory-mapped ``Array`` or ``BitArray`` and the on-disk version. diff --git a/test/file.jl b/test/file.jl index ec89ee7583c3c..ed2b0eaff85ce 100644 --- a/test/file.jl +++ b/test/file.jl @@ -187,84 +187,6 @@ test_monitor(2) test_monitor_wait(0.1) test_monitor_wait_poll(0.5) -########## -# mmap # -########## - -s = open(file, "w") -write(s, "Hello World\n") -close(s) -s = open(file, "r") -@test isreadonly(s) == true -c = mmap_array(UInt8, (11,), s) -@test c == "Hello World".data -c = mmap_array(UInt8, (UInt16(11),), s) -@test c == "Hello World".data -@test_throws ArgumentError mmap_array(UInt8, (Int16(-11),), s) -@test_throws ArgumentError mmap_array(UInt8, (typemax(UInt),), s) -close(s) -s = open(file, "r+") -@test isreadonly(s) == false -c = mmap_array(UInt8, (11,), s) -c[5] = UInt8('x') -Libc.msync(c) -close(s) -s = open(file, "r") -# FIXME: Disable for now because of memory corruption. See #11691 -# c = mmap_array(UInt8, (11,), s) -# @test_throws ReadOnlyMemoryError c[5] = UInt8('x') -str = readline(s) -close(s) -@test startswith(str, "Hellx World") -c=nothing; gc(); gc(); # cause munmap finalizer to run & free resources - -s = open(file, "w") -write(s, [0xffffffffffffffff, - 0xffffffffffffffff, - 0xffffffffffffffff, - 0x000000001fffffff]) -close(s) -s = open(file, "r") -@test isreadonly(s) -b = mmap_bitarray((17,13), s) -@test b == trues(17,13) -@test_throws ArgumentError mmap_bitarray((7,3), s) -close(s) -s = open(file, "r+") -b = mmap_bitarray((17,19), s) -rand!(b) -Libc.msync(b) -b0 = copy(b) -close(s) -s = open(file, "r") -@test isreadonly(s) -b = mmap_bitarray((17,19), s) -@test b == b0 -close(s) -b=nothing; b0=nothing; gc(); gc(); # cause munmap finalizer to run & free resources - -# mmap with an offset -A = rand(1:20, 500, 300) -fname = tempname() -s = open(fname, "w+") -write(s, size(A,1)) -write(s, size(A,2)) -write(s, A) -close(s) -s = open(fname) -m = read(s, Int) -n = read(s, Int) -A2 = mmap_array(Int, (m,n), s) -@test A == A2 -seek(s, 0) -A3 = mmap_array(Int, (m,n), s, convert(FileOffset,2*sizeof(Int))) -@test A == A3 -A4 = mmap_array(Int, (m,150), s, convert(FileOffset,(2+150*m)*sizeof(Int))) -@test A[:, 151:end] == A4 -close(s) -A2=nothing; A3=nothing; A4=nothing; gc(); gc(); # cause munmap finalizer to run & free resources -rm(fname) - ############## # mark/reset # ############## diff --git a/test/mmap.jl b/test/mmap.jl new file mode 100644 index 0000000000000..6c35bb44fb0ec --- /dev/null +++ b/test/mmap.jl @@ -0,0 +1,284 @@ +# This file is a part of Julia. License is MIT: http://julialang.org/license + +file = tempname() +s = open(file, "w") do f + write(f, "Hello World\n") +end +t = "Hello World".data +@test Mmap.mmap(file, Array{UInt8,3}, (11,1,1)) == reshape(t,(11,1,1)) +gc() +@test Mmap.mmap(file, Array{UInt8,3}, (1,11,1)) == reshape(t,(1,11,1)) +gc() +@test Mmap.mmap(file, Array{UInt8,3}, (1,1,11)) == reshape(t,(1,1,11)) +gc() +@test_throws ArgumentError Mmap.mmap(file, Array{UInt8,3}, (11,0,1)) # 0-dimension results in len=0 +@test Mmap.mmap(file, Vector{UInt8}, (11,)) == t +gc() +@test Mmap.mmap(file, Array{UInt8,2}, (1,11)) == t' +gc() +@test_throws ArgumentError Mmap.mmap(file, Array{UInt8,2}, (0,12)) +m = Mmap.mmap(file, Array{UInt8,3}, (1,2,1)) +@test m == reshape("He".data,(1,2,1)) +m=nothing; gc() + +# constructors +@test length(Mmap.mmap(file)) == 12 +@test length(Mmap.mmap(file, Vector{Int8})) == 12 +@test length(Mmap.mmap(file, Matrix{Int8}, (12,1))) == 12 +@test length(Mmap.mmap(file, Matrix{Int8}, (12,1), 0)) == 12 +@test length(Mmap.mmap(file, Matrix{Int8}, (12,1), 0; grow=false)) == 12 +@test length(Mmap.mmap(file, Matrix{Int8}, (12,1), 0; shared=false)) == 12 +@test length(Mmap.mmap(file, Vector{Int8}, 12)) == 12 +@test length(Mmap.mmap(file, Vector{Int8}, 12, 0)) == 12 +@test length(Mmap.mmap(file, Vector{Int8}, 12, 0; grow=false)) == 12 +@test length(Mmap.mmap(file, Vector{Int8}, 12, 0; shared=false)) == 12 +s = open(file) +@test length(Mmap.mmap(s)) == 12 +@test length(Mmap.mmap(s, Vector{Int8})) == 12 +@test length(Mmap.mmap(s, Matrix{Int8}, (12,1))) == 12 +@test length(Mmap.mmap(s, Matrix{Int8}, (12,1), 0)) == 12 +@test length(Mmap.mmap(s, Matrix{Int8}, (12,1), 0; grow=false)) == 12 +@test length(Mmap.mmap(s, Matrix{Int8}, (12,1), 0; shared=false)) == 12 +@test length(Mmap.mmap(s, Vector{Int8}, 12)) == 12 +@test length(Mmap.mmap(s, Vector{Int8}, 12, 0)) == 12 +@test length(Mmap.mmap(s, Vector{Int8}, 12, 0; grow=false)) == 12 +@test length(Mmap.mmap(s, Vector{Int8}, 12, 0; shared=false)) == 12 +close(s) +@test_throws ErrorException Mmap.mmap(file, Vector{Ref}) # must be bit-type +gc() + +s = open(f->f,file,"w") +@test_throws ArgumentError Mmap.mmap(file) # requested len=0 on empty file +@test_throws ArgumentError Mmap.mmap(file,Vector{UInt8},0) +m = Mmap.mmap(file,Vector{UInt8},12) +m[:] = "Hello World\n".data +Mmap.sync!(m) +m=nothing; gc() +@test open(readall,file) == "Hello World\n" + +s = open(file, "r") +close(s) +@test_throws Base.UVError Mmap.mmap(s) # closed IOStream +@test_throws ArgumentError Mmap.mmap(s,Vector{UInt8},12,0) # closed IOStream +@test_throws SystemError Mmap.mmap("") + +# negative length +@test_throws ArgumentError Mmap.mmap(file, Vector{UInt8}, -1) +# negative offset +@test_throws ArgumentError Mmap.mmap(file, Vector{UInt8}, 1, -1) + +for i = 0x01:0x0c + @test length(Mmap.mmap(file, Vector{UInt8}, i)) == Int(i) +end +gc() + +sz = filesize(file) +m = Mmap.mmap(file, Vector{UInt8}, sz+1) +@test length(m) == sz+1 # test growing +@test m[end] == 0x00 +m=nothing; gc() +sz = filesize(file) +m = Mmap.mmap(file, Vector{UInt8}, 1, sz) +@test length(m) == 1 +@test m[1] == 0x00 +m=nothing; gc() +sz = filesize(file) +# test where offset is actually > than size of file; file is grown with zeroed bytes +m = Mmap.mmap(file, Vector{UInt8}, 1, sz+1) +@test length(m) == 1 +@test m[1] == 0x00 +m=nothing; gc() + +# Uncomment out once #11351 is resolved +# s = open(file, "r") +# m = Mmap.mmap(s) +# @test_throws ReadOnlyMemoryError m[5] = Vector{UInt8}('x') # tries to setindex! on read-only array +# m=nothing; gc() + +s = open(file, "w") do f + write(f, "Hello World\n") +end + +s = open(file, "r") +m = Mmap.mmap(s) +close(s) +m=nothing; gc() +m = Mmap.mmap(file) +s = open(file, "r+") +c = Mmap.mmap(s) +d = Mmap.mmap(s) +c[1] = UInt8('J') +Mmap.sync!(c) +close(s) +@test m[1] == UInt8('J') +@test d[1] == UInt8('J') +m=nothing; c=nothing; d=nothing; gc() + +s = open(file, "w") do f + write(f, "Hello World\n") +end + +s = open(file, "r") +@test isreadonly(s) == true +c = Mmap.mmap(s, Vector{UInt8}, (11,)) +@test c == "Hello World".data +c=nothing; gc() +c = Mmap.mmap(s, Vector{UInt8}, (UInt16(11),)) +@test c == "Hello World".data +c=nothing; gc() +@test_throws ArgumentError Mmap.mmap(s, Vector{UInt8}, (Int16(-11),)) +@test_throws ArgumentError Mmap.mmap(s, Vector{UInt8}, (typemax(UInt),)) +close(s) +s = open(file, "r+") +@test isreadonly(s) == false +c = Mmap.mmap(s, Vector{UInt8}, (11,)) +c[5] = UInt8('x') +Mmap.sync!(c) +close(s) +s = open(file, "r") +str = readline(s) +close(s) +@test startswith(str, "Hellx World") +c=nothing; gc() + +c = Mmap.mmap(file) +@test c == "Hellx World\n".data +c=nothing; gc() +c = Mmap.mmap(file, Vector{UInt8}, 3) +@test c == "Hel".data +c=nothing; gc() +s = open(file, "r") +c = Mmap.mmap(s, Vector{UInt8}, 6) +@test c == "Hellx ".data +close(s) +c=nothing; gc() +c = Mmap.mmap(file, Vector{UInt8}, 5, 6) +@test c == "World".data +c=nothing; gc() + +s = open(file, "w") +write(s, "Hello World\n") +close(s) + +# test Mmap.mmap +m = Mmap.mmap(file) +t = "Hello World\n" +for i = 1:12 + @test m[i] == t.data[i] +end +@test_throws BoundsError m[13] +m=nothing; gc() + +m = Mmap.mmap(file,Vector{UInt8},6) +@test m[1] == "H".data[1] +@test m[2] == "e".data[1] +@test m[3] == "l".data[1] +@test m[4] == "l".data[1] +@test m[5] == "o".data[1] +@test m[6] == " ".data[1] +@test_throws BoundsError m[7] +m=nothing; gc() + +m = Mmap.mmap(file,Vector{UInt8},2,6) +@test m[1] == "W".data[1] +@test m[2] == "o".data[1] +@test_throws BoundsError m[3] +finalize(m); gc() + +s = open(file, "w") +write(s, [0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0x000000001fffffff]) +close(s) +s = open(file, "r") +@test isreadonly(s) +b = Mmap.mmap(s, BitArray, (17,13)) +@test b == trues(17,13) +@test_throws ArgumentError Mmap.mmap(s, BitArray, (7,3)) +close(s) +s = open(file, "r+") +b = Mmap.mmap(s, BitArray, (17,19)) +rand!(b) +Mmap.sync!(b) +b0 = copy(b) +close(s) +s = open(file, "r") +@test isreadonly(s) +b = Mmap.mmap(s, BitArray, (17,19)) +@test b == b0 +close(s) +finalize(b); finalize(b0) +b = nothing; b0 = nothing +gc() + +open(file,"w") do f + write(f,UInt64(1)) + write(f,UInt8(1)) +end +@test filesize(file) == 9 +m = Mmap.mmap(file, BitArray, (72,)) +@test length(m) == 72 + +rm(file) + +# Mmap.mmap with an offset +A = rand(1:20, 500, 300) +fname = tempname() +s = open(fname, "w+") +write(s, size(A,1)) +write(s, size(A,2)) +write(s, A) +close(s) +s = open(fname) +m = read(s, Int) +n = read(s, Int) +A2 = Mmap.mmap(s, Matrix{Int}, (m,n)) +@test A == A2 +seek(s, 0) +A3 = Mmap.mmap(s, Matrix{Int}, (m,n), convert(FileOffset,2*sizeof(Int))) +@test A == A3 +A4 = Mmap.mmap(s, Matrix{Int}, (m,150), convert(FileOffset,(2+150*m)*sizeof(Int))) +@test A[:, 151:end] == A4 +close(s) +finalize(A2); finalize(A3); finalize(A4) +gc() +rm(fname) + +# Mmap.Anonymous +m = Mmap.Anonymous() +@test m.name == "" +@test !m.readonly +@test m.create +@test isopen(m) +@test isreadable(m) +@test iswritable(m) + +m = Mmap.mmap(Vector{UInt8}, 12) +@test length(m) == 12 +@test all(m .== 0x00) +@test m[1] === 0x00 +@test m[end] === 0x00 +m[1] = 0x0a +Mmap.sync!(m) +@test m[1] === 0x0a +m = Mmap.mmap(Vector{UInt8}, 12; shared=false) +m = Mmap.mmap(Vector{Int}, 12) +@test length(m) == 12 +@test all(m .== 0) +@test m[1] === 0 +@test m[end] === 0 +m = Mmap.mmap(Vector{Float64}, 12) +@test length(m) == 12 +@test all(m .== 0.0) +m = Mmap.mmap(Matrix{Int8}, (12,12)) +@test size(m) == (12,12) +@test all(m == zeros(Int8, (12,12))) +@test sizeof(m) == prod((12,12)) +n = similar(m) +@test size(n) == (12,12) +n = similar(m, (2,2)) +@test size(n) == (2,2) +n = similar(m, 12) +@test length(n) == 12 +@test size(n) == (12,) From 1c1e16832ddee42e250965d47eb86ed7e2f56361 Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Mon, 29 Jun 2015 21:27:34 -0600 Subject: [PATCH 2/3] Make sure to add new test/mmap.jl file to test suite --- test/choosetests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/choosetests.jl b/test/choosetests.jl index 72310a5cdaa0e..866138a421b6b 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -22,7 +22,7 @@ function choosetests(choices = []) "bitarray", "copy", "math", "fastmath", "functional", "operators", "path", "ccall", "unicode", "bigint", "sorting", "statistics", "spawn", "backtrace", - "priorityqueue", "file", "version", "resolve", + "priorityqueue", "file", "mmap", "version", "resolve", "pollfd", "mpfr", "broadcast", "complex", "socket", "floatapprox", "readdlm", "reflection", "regex", "float16", "combinatorics", "sysinfo", "rounding", "ranges", "mod2pi", From 877c62818d03c98e01c58b462137a933ccf9f618 Mon Sep 17 00:00:00 2001 From: Jacob Quinn Date: Tue, 30 Jun 2015 20:21:40 -0600 Subject: [PATCH 3/3] Fix failing windows test for mmap.jl --- test/mmap.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/mmap.jl b/test/mmap.jl index 6c35bb44fb0ec..2149f4025fec0 100644 --- a/test/mmap.jl +++ b/test/mmap.jl @@ -219,7 +219,7 @@ end @test filesize(file) == 9 m = Mmap.mmap(file, BitArray, (72,)) @test length(m) == 72 - +finalize(m); m = nothing; gc() rm(file) # Mmap.mmap with an offset