Fixed #28300 -- Allowed GDALRasters to use the vsimem filesystem.

Thanks Tim Graham for the review and edits.
This commit is contained in:
Daniel Wiesmann 2017-06-16 17:09:05 +01:00 committed by Tim Graham
parent f6800a081a
commit 6f44f714c9
6 changed files with 244 additions and 16 deletions

View File

@ -2,7 +2,7 @@
This module houses the ctypes function prototypes for GDAL DataSource (raster)
related data structures.
"""
from ctypes import POINTER, c_char_p, c_double, c_int, c_void_p
from ctypes import POINTER, c_bool, c_char_p, c_double, c_int, c_void_p
from functools import partial
from django.contrib.gis.gdal.libgdal import GDAL_VERSION, std_call
@ -102,3 +102,9 @@ auto_create_warped_vrt = voidptr_output(
std_call('GDALAutoCreateWarpedVRT'),
[c_void_p, c_char_p, c_char_p, c_int, c_double, c_void_p]
)
# Create VSI gdal raster files from in-memory buffers.
# http://gdal.org/cpl__vsi_8h.html
create_vsi_file_from_mem_buffer = voidptr_output(std_call('VSIFileFromMemBuffer'), [c_char_p, c_void_p, c_int, c_int])
get_mem_buffer_from_vsi_file = voidptr_output(std_call('VSIGetMemFileBuffer'), [c_char_p, POINTER(c_int), c_bool])
unlink_vsi_file = int_output(std_call('VSIUnlink'), [c_char_p])

View File

@ -43,3 +43,13 @@ GDAL_RESAMPLE_ALGORITHMS = {
'Average': 5,
'Mode': 6,
}
# Fixed base path for buffer-based GDAL in-memory files.
VSI_FILESYSTEM_BASE_PATH = '/vsimem/'
# Should the memory file system take ownership of the buffer, freeing it when
# the file is deleted? (No, GDALRaster.__del__() will delete the buffer.)
VSI_TAKE_BUFFER_OWNERSHIP = False
# Should a VSI file be removed when retrieving its buffer?
VSI_DELETE_BUFFER_ON_READ = False

View File

@ -1,13 +1,20 @@
import json
import os
from ctypes import addressof, byref, c_char_p, c_double, c_void_p
import sys
import uuid
from ctypes import (
addressof, byref, c_buffer, c_char_p, c_double, c_int, c_void_p, string_at,
)
from django.contrib.gis.gdal.driver import Driver
from django.contrib.gis.gdal.error import GDALException
from django.contrib.gis.gdal.prototypes import raster as capi
from django.contrib.gis.gdal.raster.band import BandList
from django.contrib.gis.gdal.raster.base import GDALRasterBase
from django.contrib.gis.gdal.raster.const import GDAL_RESAMPLE_ALGORITHMS
from django.contrib.gis.gdal.raster.const import (
GDAL_RESAMPLE_ALGORITHMS, VSI_DELETE_BUFFER_ON_READ,
VSI_FILESYSTEM_BASE_PATH, VSI_TAKE_BUFFER_OWNERSHIP,
)
from django.contrib.gis.gdal.srs import SpatialReference, SRSException
from django.contrib.gis.geometry.regex import json_regex
from django.utils.encoding import force_bytes, force_text
@ -66,13 +73,36 @@ class GDALRaster(GDALRasterBase):
# If input is a valid file path, try setting file as source.
if isinstance(ds_input, str):
if not os.path.exists(ds_input):
raise GDALException('Unable to read raster source input "{}"'.format(ds_input))
try:
# GDALOpen will auto-detect the data source type.
self._ptr = capi.open_ds(force_bytes(ds_input), self._write)
except GDALException as err:
raise GDALException('Could not open the datasource at "{}" ({}).'.format(ds_input, err))
elif isinstance(ds_input, bytes):
# Create a new raster in write mode.
self._write = 1
# Get size of buffer.
size = sys.getsizeof(ds_input)
# Pass data to ctypes, keeping a reference to the ctypes object so
# that the vsimem file remains available until the GDALRaster is
# deleted.
self._ds_input = c_buffer(ds_input)
# Create random name to reference in vsimem filesystem.
vsi_path = os.path.join(VSI_FILESYSTEM_BASE_PATH, str(uuid.uuid4()))
# Create vsimem file from buffer.
capi.create_vsi_file_from_mem_buffer(
force_bytes(vsi_path),
byref(self._ds_input),
size,
VSI_TAKE_BUFFER_OWNERSHIP,
)
# Open the new vsimem file as a GDALRaster.
try:
self._ptr = capi.open_ds(force_bytes(vsi_path), self._write)
except GDALException:
# Remove the broken file from the VSI filesystem.
capi.unlink_vsi_file(force_bytes(vsi_path))
raise GDALException('Failed creating VSI raster from the input buffer.')
elif isinstance(ds_input, dict):
# A new raster needs to be created in write mode
self._write = 1
@ -151,6 +181,12 @@ class GDALRaster(GDALRasterBase):
else:
raise GDALException('Invalid data source input type: "{}".'.format(type(ds_input)))
def __del__(self):
if self.is_vsi_based:
# Remove the temporary file from the VSI in-memory filesystem.
capi.unlink_vsi_file(force_bytes(self.name))
super().__del__()
def __str__(self):
return self.name
@ -172,6 +208,25 @@ class GDALRaster(GDALRasterBase):
raise GDALException('Raster needs to be opened in write mode to change values.')
capi.flush_ds(self._ptr)
@property
def vsi_buffer(self):
if not self.is_vsi_based:
return None
# Prepare an integer that will contain the buffer length.
out_length = c_int()
# Get the data using the vsi file name.
dat = capi.get_mem_buffer_from_vsi_file(
force_bytes(self.name),
byref(out_length),
VSI_DELETE_BUFFER_ON_READ,
)
# Read the full buffer pointer.
return string_at(dat, out_length.value)
@cached_property
def is_vsi_based(self):
return self.name.startswith(VSI_FILESYSTEM_BASE_PATH)
@property
def name(self):
"""

View File

@ -1104,16 +1104,27 @@ blue.
.. class:: GDALRaster(ds_input, write=False)
The constructor for ``GDALRaster`` accepts two parameters. The first parameter
defines the raster source, it is either a path to a file or spatial data with
values defining the properties of a new raster (such as size and name). If the
input is a file path, the second parameter specifies if the raster should
be opened with write access. If the input is raw data, the parameters ``width``,
``height``, and ``srid`` are required. The following example shows how rasters
can be created from different input sources (using the sample data from the
GeoDjango tests, see also the :ref:`gdal_sample_data` section). For a
detailed description of how to create rasters using dictionary input, see
the :ref:`gdal-raster-ds-input` section.
The constructor for ``GDALRaster`` accepts two parameters. The first
parameter defines the raster source, and the second parameter defines if a
raster should be opened in write mode. For newly-created rasters, the second
parameter is ignored and the new raster is always created in write mode.
The first parameter can take three forms: a string representing a file
path, a dictionary with values defining a new raster, or a bytes object
representing a raster file.
If the input is a file path, the raster is opened from there. If the input
is raw data in a dictionary, the parameters ``width``, ``height``, and
``srid`` are required. If the input is a bytes object, it will be opened
using a GDAL virtual filesystem.
For a detailed description of how to create rasters using dictionary input,
see :ref:`gdal-raster-ds-input`. For a detailed description of how to
create rasters in the virtual filesystem, see :ref:`gdal-raster-vsimem`.
The following example shows how rasters can be created from different input
sources (using the sample data from the GeoDjango tests; see also the
:ref:`gdal_sample_data` section).
>>> from django.contrib.gis.gdal import GDALRaster
>>> rst = GDALRaster('/path/to/your/raster.tif', write=False)
@ -1143,6 +1154,13 @@ blue.
[5, 2, 3, 5],
[5, 2, 3, 5],
[5, 5, 5, 5]], dtype=uint8)
>>> rst_file = open('/path/to/your/raster.tif', 'rb')
>>> rst_bytes = rst_file.read()
>>> rst = GDALRaster(rst_bytes)
>>> rst.is_vsi_based
True
>>> rst.name # Stored in a random path in the vsimem filesystem.
'/vsimem/da300bdb-129d-49a8-b336-e410a9428dad'
.. versionchanged:: 1.11
@ -1153,6 +1171,12 @@ blue.
the :meth:`GDALBand.data()<django.contrib.gis.gdal.GDALBand.data>`
method.
.. versionchanged:: 2.0
Added the ability to read and write rasters in GDAL's memory-based
virtual filesystem. ``GDALRaster`` objects can now be converted to and
from binary data in-memory.
.. attribute:: name
The name of the source which is equivalent to the input file path or the name
@ -1425,6 +1449,20 @@ blue.
>>> rst.metadata
{'DEFAULT': {'VERSION': '2.0'}}
.. attribute:: vsi_buffer
.. versionadded:: 2.0
A ``bytes`` representation of this raster. Returns ``None`` for rasters
that are not stored in GDAL's virtual filesystem.
.. attribute:: is_vsi_based
.. versionadded:: 2.0
A boolean indicating if this raster is stored in GDAL's virtual
filesystem.
``GDALBand``
------------
@ -1639,7 +1677,9 @@ Key Default Usage
.. object:: name
String representing the name of the raster. When creating a file-based
raster, this parameter must be the file path for the new raster.
raster, this parameter must be the file path for the new raster. If the
name starts with ``/vsimem/``, the raster is created in GDAL's virtual
filesystem.
.. object:: datatype
@ -1731,6 +1771,56 @@ Key Default Usage
``offset`` ``(0, 0)`` Passed to the :meth:`~GDALBand.data` method
================ ================================= ======================================================
.. _gdal-raster-vsimem:
Using GDAL's Virtual Filesystem
-------------------------------
GDAL has an internal memory-based filesystem, which allows treating blocks of
memory as files. It can be used to read and write :class:`GDALRaster` objects
to and from binary file buffers.
This is useful in web contexts where rasters might be obtained as a buffer
from a remote storage or returned from a view without being written to disk.
:class:`GDALRaster` objects are created in the virtual filesystem when a
``bytes`` object is provided as input, or when the file path starts with
``/vsimem/``.
Input provided as ``bytes`` has to be a full binary representation of a file.
For instance::
# Read a raster as a file object from a remote source.
>>> from urllib.request import urlopen
>>> dat = urlopen('http://example.com/raster.tif').read()
# Instantiate a raster from the bytes object.
>>> rst = GDALRaster(dat)
# The name starts with /vsimem/, indicating that the raster lives in the
# virtual filesystem.
>>> rst.name
'/vsimem/da300bdb-129d-49a8-b336-e410a9428dad'
To create a new virtual file-based raster from scratch, use the ``ds_input``
dictionary representation and provide a ``name`` argument that starts with
``/vsimem/`` (for detail of the dictionary representation, see
:ref:`gdal-raster-ds-input`). For virtual file-based rasters, the
:attr:`~GDALRaster.vsi_buffer` attribute returns the ``bytes`` representation
of the raster.
Here's how to create a raster and return it as a file in an
:class:`~django.http.HttpResponse`::
>>> from django.http import HttpResponse
>>> rst = GDALRaster({
... 'name': '/vsimem/temporarymemfile',
... 'driver': 'tif',
... 'width': 6, 'height': 6, 'srid': 3086,
... 'origin': [500000, 400000],
... 'scale': [100, -100],
... 'bands': [{'data': range(36), 'nodata_value': 99}]
... })
>>> HttpResponse(rast.vsi_buffer, 'image/tiff')
Settings
========

View File

@ -86,6 +86,10 @@ Minor features
* Allowed passing driver-specific creation options to
:class:`~django.contrib.gis.gdal.GDALRaster` objects using ``papsz_options``.
* Allowed creating :class:`~django.contrib.gis.gdal.GDALRaster` objects in
GDAL's internal virtual filesystem. Rasters can now be :ref:`created from and
converted to binary data <gdal-raster-vsimem>` in-memory.
:mod:`django.contrib.messages`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -155,6 +155,69 @@ class GDALRasterTests(SimpleTestCase):
else:
self.assertEqual(restored_raster.bands[0].data(), self.rs.bands[0].data())
def test_vsi_raster_creation(self):
# Open a raster as a file object.
with open(self.rs_path, 'rb') as dat:
# Instantiate a raster from the file binary buffer.
vsimem = GDALRaster(dat.read())
# The data of the in-memory file is equal to the source file.
result = vsimem.bands[0].data()
target = self.rs.bands[0].data()
if numpy:
result = result.flatten().tolist()
target = target.flatten().tolist()
self.assertEqual(result, target)
def test_vsi_raster_deletion(self):
path = '/vsimem/raster.tif'
# Create a vsi-based raster from scratch.
vsimem = GDALRaster({
'name': path,
'driver': 'tif',
'width': 4,
'height': 4,
'srid': 4326,
'bands': [{
'data': range(16),
}],
})
# The virtual file exists.
rst = GDALRaster(path)
self.assertEqual(rst.width, 4)
# Delete GDALRaster.
del vsimem
del rst
# The virtual file has been removed.
msg = 'Could not open the datasource at "/vsimem/raster.tif"'
with self.assertRaisesMessage(GDALException, msg):
GDALRaster(path)
def test_vsi_invalid_buffer_error(self):
msg = 'Failed creating VSI raster from the input buffer.'
with self.assertRaisesMessage(GDALException, msg):
GDALRaster(b'not-a-raster-buffer')
def test_vsi_buffer_property(self):
# Create a vsi-based raster from scratch.
rast = GDALRaster({
'name': '/vsimem/raster.tif',
'driver': 'tif',
'width': 4,
'height': 4,
'srid': 4326,
'bands': [{
'data': range(16),
}],
})
# Do a round trip from raster to buffer to raster.
result = GDALRaster(rast.vsi_buffer).bands[0].data()
if numpy:
result = result.flatten().tolist()
# Band data is equal to nodata value except on input block of ones.
self.assertEqual(result, list(range(16)))
# The vsi buffer is None for rasters that are not vsi based.
self.assertIsNone(self.rs.vsi_buffer)
def test_offset_size_and_shape_on_raster_creation(self):
rast = GDALRaster({
'datatype': 1,