From 6f44f714c92d2966dca390ebd3054e5fb0bb0c80 Mon Sep 17 00:00:00 2001 From: Daniel Wiesmann Date: Fri, 16 Jun 2017 17:09:05 +0100 Subject: [PATCH] Fixed #28300 -- Allowed GDALRasters to use the vsimem filesystem. Thanks Tim Graham for the review and edits. --- django/contrib/gis/gdal/prototypes/raster.py | 8 +- django/contrib/gis/gdal/raster/const.py | 10 ++ django/contrib/gis/gdal/raster/source.py | 63 ++++++++++- docs/ref/contrib/gis/gdal.txt | 112 +++++++++++++++++-- docs/releases/2.0.txt | 4 + tests/gis_tests/gdal_tests/test_raster.py | 63 +++++++++++ 6 files changed, 244 insertions(+), 16 deletions(-) diff --git a/django/contrib/gis/gdal/prototypes/raster.py b/django/contrib/gis/gdal/prototypes/raster.py index 7e95cbc9a4..72702ab178 100644 --- a/django/contrib/gis/gdal/prototypes/raster.py +++ b/django/contrib/gis/gdal/prototypes/raster.py @@ -2,7 +2,7 @@ This module houses the ctypes function prototypes for GDAL DataSource (raster) related data structures. """ -from ctypes import POINTER, c_char_p, c_double, c_int, c_void_p +from ctypes import POINTER, c_bool, c_char_p, c_double, c_int, c_void_p from functools import partial from django.contrib.gis.gdal.libgdal import GDAL_VERSION, std_call @@ -102,3 +102,9 @@ auto_create_warped_vrt = voidptr_output( std_call('GDALAutoCreateWarpedVRT'), [c_void_p, c_char_p, c_char_p, c_int, c_double, c_void_p] ) + +# Create VSI gdal raster files from in-memory buffers. +# http://gdal.org/cpl__vsi_8h.html +create_vsi_file_from_mem_buffer = voidptr_output(std_call('VSIFileFromMemBuffer'), [c_char_p, c_void_p, c_int, c_int]) +get_mem_buffer_from_vsi_file = voidptr_output(std_call('VSIGetMemFileBuffer'), [c_char_p, POINTER(c_int), c_bool]) +unlink_vsi_file = int_output(std_call('VSIUnlink'), [c_char_p]) diff --git a/django/contrib/gis/gdal/raster/const.py b/django/contrib/gis/gdal/raster/const.py index 38f19e294f..8b2656d33f 100644 --- a/django/contrib/gis/gdal/raster/const.py +++ b/django/contrib/gis/gdal/raster/const.py @@ -43,3 +43,13 @@ GDAL_RESAMPLE_ALGORITHMS = { 'Average': 5, 'Mode': 6, } + +# Fixed base path for buffer-based GDAL in-memory files. +VSI_FILESYSTEM_BASE_PATH = '/vsimem/' + +# Should the memory file system take ownership of the buffer, freeing it when +# the file is deleted? (No, GDALRaster.__del__() will delete the buffer.) +VSI_TAKE_BUFFER_OWNERSHIP = False + +# Should a VSI file be removed when retrieving its buffer? +VSI_DELETE_BUFFER_ON_READ = False diff --git a/django/contrib/gis/gdal/raster/source.py b/django/contrib/gis/gdal/raster/source.py index e871fe4cf5..358bb44b8f 100644 --- a/django/contrib/gis/gdal/raster/source.py +++ b/django/contrib/gis/gdal/raster/source.py @@ -1,13 +1,20 @@ import json import os -from ctypes import addressof, byref, c_char_p, c_double, c_void_p +import sys +import uuid +from ctypes import ( + addressof, byref, c_buffer, c_char_p, c_double, c_int, c_void_p, string_at, +) from django.contrib.gis.gdal.driver import Driver from django.contrib.gis.gdal.error import GDALException from django.contrib.gis.gdal.prototypes import raster as capi from django.contrib.gis.gdal.raster.band import BandList from django.contrib.gis.gdal.raster.base import GDALRasterBase -from django.contrib.gis.gdal.raster.const import GDAL_RESAMPLE_ALGORITHMS +from django.contrib.gis.gdal.raster.const import ( + GDAL_RESAMPLE_ALGORITHMS, VSI_DELETE_BUFFER_ON_READ, + VSI_FILESYSTEM_BASE_PATH, VSI_TAKE_BUFFER_OWNERSHIP, +) from django.contrib.gis.gdal.srs import SpatialReference, SRSException from django.contrib.gis.geometry.regex import json_regex from django.utils.encoding import force_bytes, force_text @@ -66,13 +73,36 @@ class GDALRaster(GDALRasterBase): # If input is a valid file path, try setting file as source. if isinstance(ds_input, str): - if not os.path.exists(ds_input): - raise GDALException('Unable to read raster source input "{}"'.format(ds_input)) try: # GDALOpen will auto-detect the data source type. self._ptr = capi.open_ds(force_bytes(ds_input), self._write) except GDALException as err: raise GDALException('Could not open the datasource at "{}" ({}).'.format(ds_input, err)) + elif isinstance(ds_input, bytes): + # Create a new raster in write mode. + self._write = 1 + # Get size of buffer. + size = sys.getsizeof(ds_input) + # Pass data to ctypes, keeping a reference to the ctypes object so + # that the vsimem file remains available until the GDALRaster is + # deleted. + self._ds_input = c_buffer(ds_input) + # Create random name to reference in vsimem filesystem. + vsi_path = os.path.join(VSI_FILESYSTEM_BASE_PATH, str(uuid.uuid4())) + # Create vsimem file from buffer. + capi.create_vsi_file_from_mem_buffer( + force_bytes(vsi_path), + byref(self._ds_input), + size, + VSI_TAKE_BUFFER_OWNERSHIP, + ) + # Open the new vsimem file as a GDALRaster. + try: + self._ptr = capi.open_ds(force_bytes(vsi_path), self._write) + except GDALException: + # Remove the broken file from the VSI filesystem. + capi.unlink_vsi_file(force_bytes(vsi_path)) + raise GDALException('Failed creating VSI raster from the input buffer.') elif isinstance(ds_input, dict): # A new raster needs to be created in write mode self._write = 1 @@ -151,6 +181,12 @@ class GDALRaster(GDALRasterBase): else: raise GDALException('Invalid data source input type: "{}".'.format(type(ds_input))) + def __del__(self): + if self.is_vsi_based: + # Remove the temporary file from the VSI in-memory filesystem. + capi.unlink_vsi_file(force_bytes(self.name)) + super().__del__() + def __str__(self): return self.name @@ -172,6 +208,25 @@ class GDALRaster(GDALRasterBase): raise GDALException('Raster needs to be opened in write mode to change values.') capi.flush_ds(self._ptr) + @property + def vsi_buffer(self): + if not self.is_vsi_based: + return None + # Prepare an integer that will contain the buffer length. + out_length = c_int() + # Get the data using the vsi file name. + dat = capi.get_mem_buffer_from_vsi_file( + force_bytes(self.name), + byref(out_length), + VSI_DELETE_BUFFER_ON_READ, + ) + # Read the full buffer pointer. + return string_at(dat, out_length.value) + + @cached_property + def is_vsi_based(self): + return self.name.startswith(VSI_FILESYSTEM_BASE_PATH) + @property def name(self): """ diff --git a/docs/ref/contrib/gis/gdal.txt b/docs/ref/contrib/gis/gdal.txt index 892f045fe6..c2d0345be8 100644 --- a/docs/ref/contrib/gis/gdal.txt +++ b/docs/ref/contrib/gis/gdal.txt @@ -1104,16 +1104,27 @@ blue. .. class:: GDALRaster(ds_input, write=False) - The constructor for ``GDALRaster`` accepts two parameters. The first parameter - defines the raster source, it is either a path to a file or spatial data with - values defining the properties of a new raster (such as size and name). If the - input is a file path, the second parameter specifies if the raster should - be opened with write access. If the input is raw data, the parameters ``width``, - ``height``, and ``srid`` are required. The following example shows how rasters - can be created from different input sources (using the sample data from the - GeoDjango tests, see also the :ref:`gdal_sample_data` section). For a - detailed description of how to create rasters using dictionary input, see - the :ref:`gdal-raster-ds-input` section. + The constructor for ``GDALRaster`` accepts two parameters. The first + parameter defines the raster source, and the second parameter defines if a + raster should be opened in write mode. For newly-created rasters, the second + parameter is ignored and the new raster is always created in write mode. + + The first parameter can take three forms: a string representing a file + path, a dictionary with values defining a new raster, or a bytes object + representing a raster file. + + If the input is a file path, the raster is opened from there. If the input + is raw data in a dictionary, the parameters ``width``, ``height``, and + ``srid`` are required. If the input is a bytes object, it will be opened + using a GDAL virtual filesystem. + + For a detailed description of how to create rasters using dictionary input, + see :ref:`gdal-raster-ds-input`. For a detailed description of how to + create rasters in the virtual filesystem, see :ref:`gdal-raster-vsimem`. + + The following example shows how rasters can be created from different input + sources (using the sample data from the GeoDjango tests; see also the + :ref:`gdal_sample_data` section). >>> from django.contrib.gis.gdal import GDALRaster >>> rst = GDALRaster('/path/to/your/raster.tif', write=False) @@ -1143,6 +1154,13 @@ blue. [5, 2, 3, 5], [5, 2, 3, 5], [5, 5, 5, 5]], dtype=uint8) + >>> rst_file = open('/path/to/your/raster.tif', 'rb') + >>> rst_bytes = rst_file.read() + >>> rst = GDALRaster(rst_bytes) + >>> rst.is_vsi_based + True + >>> rst.name # Stored in a random path in the vsimem filesystem. + '/vsimem/da300bdb-129d-49a8-b336-e410a9428dad' .. versionchanged:: 1.11 @@ -1153,6 +1171,12 @@ blue. the :meth:`GDALBand.data()` method. + .. versionchanged:: 2.0 + + Added the ability to read and write rasters in GDAL's memory-based + virtual filesystem. ``GDALRaster`` objects can now be converted to and + from binary data in-memory. + .. attribute:: name The name of the source which is equivalent to the input file path or the name @@ -1425,6 +1449,20 @@ blue. >>> rst.metadata {'DEFAULT': {'VERSION': '2.0'}} + .. attribute:: vsi_buffer + + .. versionadded:: 2.0 + + A ``bytes`` representation of this raster. Returns ``None`` for rasters + that are not stored in GDAL's virtual filesystem. + + .. attribute:: is_vsi_based + + .. versionadded:: 2.0 + + A boolean indicating if this raster is stored in GDAL's virtual + filesystem. + ``GDALBand`` ------------ @@ -1639,7 +1677,9 @@ Key Default Usage .. object:: name String representing the name of the raster. When creating a file-based - raster, this parameter must be the file path for the new raster. + raster, this parameter must be the file path for the new raster. If the + name starts with ``/vsimem/``, the raster is created in GDAL's virtual + filesystem. .. object:: datatype @@ -1731,6 +1771,56 @@ Key Default Usage ``offset`` ``(0, 0)`` Passed to the :meth:`~GDALBand.data` method ================ ================================= ====================================================== +.. _gdal-raster-vsimem: + +Using GDAL's Virtual Filesystem +------------------------------- + +GDAL has an internal memory-based filesystem, which allows treating blocks of +memory as files. It can be used to read and write :class:`GDALRaster` objects +to and from binary file buffers. + +This is useful in web contexts where rasters might be obtained as a buffer +from a remote storage or returned from a view without being written to disk. + +:class:`GDALRaster` objects are created in the virtual filesystem when a +``bytes`` object is provided as input, or when the file path starts with +``/vsimem/``. + +Input provided as ``bytes`` has to be a full binary representation of a file. +For instance:: + + # Read a raster as a file object from a remote source. + >>> from urllib.request import urlopen + >>> dat = urlopen('http://example.com/raster.tif').read() + # Instantiate a raster from the bytes object. + >>> rst = GDALRaster(dat) + # The name starts with /vsimem/, indicating that the raster lives in the + # virtual filesystem. + >>> rst.name + '/vsimem/da300bdb-129d-49a8-b336-e410a9428dad' + +To create a new virtual file-based raster from scratch, use the ``ds_input`` +dictionary representation and provide a ``name`` argument that starts with +``/vsimem/`` (for detail of the dictionary representation, see +:ref:`gdal-raster-ds-input`). For virtual file-based rasters, the +:attr:`~GDALRaster.vsi_buffer` attribute returns the ``bytes`` representation +of the raster. + +Here's how to create a raster and return it as a file in an +:class:`~django.http.HttpResponse`:: + + >>> from django.http import HttpResponse + >>> rst = GDALRaster({ + ... 'name': '/vsimem/temporarymemfile', + ... 'driver': 'tif', + ... 'width': 6, 'height': 6, 'srid': 3086, + ... 'origin': [500000, 400000], + ... 'scale': [100, -100], + ... 'bands': [{'data': range(36), 'nodata_value': 99}] + ... }) + >>> HttpResponse(rast.vsi_buffer, 'image/tiff') + Settings ======== diff --git a/docs/releases/2.0.txt b/docs/releases/2.0.txt index dae5dab9fe..6af7545199 100644 --- a/docs/releases/2.0.txt +++ b/docs/releases/2.0.txt @@ -86,6 +86,10 @@ Minor features * Allowed passing driver-specific creation options to :class:`~django.contrib.gis.gdal.GDALRaster` objects using ``papsz_options``. +* Allowed creating :class:`~django.contrib.gis.gdal.GDALRaster` objects in + GDAL's internal virtual filesystem. Rasters can now be :ref:`created from and + converted to binary data ` in-memory. + :mod:`django.contrib.messages` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/tests/gis_tests/gdal_tests/test_raster.py b/tests/gis_tests/gdal_tests/test_raster.py index 756b7a2726..9534681d44 100644 --- a/tests/gis_tests/gdal_tests/test_raster.py +++ b/tests/gis_tests/gdal_tests/test_raster.py @@ -155,6 +155,69 @@ class GDALRasterTests(SimpleTestCase): else: self.assertEqual(restored_raster.bands[0].data(), self.rs.bands[0].data()) + def test_vsi_raster_creation(self): + # Open a raster as a file object. + with open(self.rs_path, 'rb') as dat: + # Instantiate a raster from the file binary buffer. + vsimem = GDALRaster(dat.read()) + # The data of the in-memory file is equal to the source file. + result = vsimem.bands[0].data() + target = self.rs.bands[0].data() + if numpy: + result = result.flatten().tolist() + target = target.flatten().tolist() + self.assertEqual(result, target) + + def test_vsi_raster_deletion(self): + path = '/vsimem/raster.tif' + # Create a vsi-based raster from scratch. + vsimem = GDALRaster({ + 'name': path, + 'driver': 'tif', + 'width': 4, + 'height': 4, + 'srid': 4326, + 'bands': [{ + 'data': range(16), + }], + }) + # The virtual file exists. + rst = GDALRaster(path) + self.assertEqual(rst.width, 4) + # Delete GDALRaster. + del vsimem + del rst + # The virtual file has been removed. + msg = 'Could not open the datasource at "/vsimem/raster.tif"' + with self.assertRaisesMessage(GDALException, msg): + GDALRaster(path) + + def test_vsi_invalid_buffer_error(self): + msg = 'Failed creating VSI raster from the input buffer.' + with self.assertRaisesMessage(GDALException, msg): + GDALRaster(b'not-a-raster-buffer') + + def test_vsi_buffer_property(self): + # Create a vsi-based raster from scratch. + rast = GDALRaster({ + 'name': '/vsimem/raster.tif', + 'driver': 'tif', + 'width': 4, + 'height': 4, + 'srid': 4326, + 'bands': [{ + 'data': range(16), + }], + }) + # Do a round trip from raster to buffer to raster. + result = GDALRaster(rast.vsi_buffer).bands[0].data() + if numpy: + result = result.flatten().tolist() + # Band data is equal to nodata value except on input block of ones. + self.assertEqual(result, list(range(16))) + # The vsi buffer is None for rasters that are not vsi based. + self.assertIsNone(self.rs.vsi_buffer) + def test_offset_size_and_shape_on_raster_creation(self): rast = GDALRaster({ 'datatype': 1,