Ticket #2002: 0001-ticket-2002-support-subdataset-levels-crs-filtering-.patch
File 0001-ticket-2002-support-subdataset-levels-crs-filtering-.patch, 13.2 KB (added by , 6 years ago) |
---|
-
applications/wcst_import/recipes_custom/sentinel2/recipe.py
From 220033de28112882eab07d0b89839388a3872e19 Mon Sep 17 00:00:00 2001 From: Dimitar Misev <misev@rasdaman.com> Date: Mon, 25 Feb 2019 08:37:29 +0100 Subject: [PATCH] ticket:2002 - support subdataset / levels / crs filtering in sentinel2 recipe --- .../recipes_custom/sentinel2/recipe.py | 70 +++++++++++--- applications/wcst_import/session.py | 8 ++ doc/main/05_geo-services-guide.rst | 93 ++++++++++++++++++- 3 files changed, 152 insertions(+), 19 deletions(-) diff --git a/applications/wcst_import/recipes_custom/sentinel2/recipe.py b/applications/wcst_import/recipes_custom/sentinel2/recipe.py index 46e5dea8..64627d2e 100644
a b from master.evaluator.evaluator_slice_factory import EvaluatorSliceFactory 26 26 from master.importer.importer import Importer 27 27 from master.importer.multi_importer import MultiImporter 28 28 from master.error.runtime_exception import RuntimeException 29 from master.error.validate_exception import RecipeValidationException 29 30 from master.evaluator.sentence_evaluator import SentenceEvaluator 30 31 from master.evaluator.expression_evaluator_factory import ExpressionEvaluatorFactory 31 32 from master.helper.user_band import UserBand … … class Recipe(GeneralCoverageRecipe): 48 49 # supported product levels 49 50 LVL_L1C = 'L1C' 50 51 LVL_L2A = 'L2A' 52 LEVELS = [LVL_L1C, LVL_L2A] 53 51 54 # resolutions in a single Sentinel 2 dataset; TCI (True Color Image) is 10m 52 55 RES_10m = '10m' 53 56 RES_20m = '20m' 54 57 RES_60m = '60m' 55 58 RES_TCI = 'TCI' 56 57 RES_DICT = {RES_10m: [1, 10, -10], RES_20m: [1, 20, -20], RES_60m: [1, 60, -60], RES_TCI: [1, 10, -10]} 59 # resolution (subdataset name) -> actual resolution numbers 60 RES_DICT = {RES_10m: [1, 10, -10], 61 RES_20m: [1, 20, -20], 62 RES_60m: [1, 60, -60], 63 RES_TCI: [1, 10, -10]} 64 # list of subdatasets to import 65 SUBDATASETS = [RES_10m, RES_20m, RES_60m, RES_TCI] 58 66 59 67 # variables that can be used to template the coverage id 60 68 VAR_CRS_CODE = '${crsCode}' … … class Recipe(GeneralCoverageRecipe): 98 106 RES_TCI: BANDS_L1C[RES_TCI], 99 107 } 100 108 BANDS = { LVL_L1C: BANDS_L1C, LVL_L2A: BANDS_L2A } 101 # number of subdatasets in a Sentinel 2 dataset102 SUBDATASETS = 4103 109 DEFAULT_CRS = "OGC/0/AnsiDate@EPSG/0/${crsCode}" 104 110 DEFAULT_IMPORT_ORDER = GdalToCoverageConverter.IMPORT_ORDER_ASCENDING 105 111 … … class Recipe(GeneralCoverageRecipe): 110 116 def __init__(self, session): 111 117 super(Recipe, self).__init__(session) 112 118 self._init_options() 113 # subdatasets have a specific path scheme and prepending "file://" interferes with it114 # TODO: however uncommenting the below causes another error:115 # The URL provided in the coverageRef parameter is malformed.116 # ConfigManager.root_url = ""117 119 118 120 def validate(self): 119 121 super(Recipe, self).validate() 122 if len(self.resolutions) == 0: 123 raise RecipeValidationException("No resolutions to import provided.") 124 for res in self.resolutions: 125 if res not in self.SUBDATASETS: 126 raise RecipeValidationException("Invalid resolution '" + str(res) + 127 "' provided, expected a subset of " + str(self.SUBDATASETS)) 128 for lvl in self.levels: 129 if lvl not in self.LEVELS: 130 raise RecipeValidationException("Invalid level '" + str(lvl) + 131 "' provided, expected a subset of " + str(self.LEVELS)) 120 132 121 133 def describe(self): 122 134 log.info("The recipe has been validated and is ready to run.") … … class Recipe(GeneralCoverageRecipe): 152 164 153 165 def _init_options(self): 154 166 self._init_coverage_options() 167 self._init_input_options() 155 168 self.coverage_id = self.session.get_coverage_id() 156 169 self.import_order = self._set_option(self.options, 'import_order', self.DEFAULT_IMPORT_ORDER) 157 170 self.wms_import = self._set_option(self.options, 'wms_import', False) … … class Recipe(GeneralCoverageRecipe): 163 176 self.crs = self._set_option(covopts, 'crs', self.DEFAULT_CRS) 164 177 self._set_option(covopts, 'slicer', {}) 165 178 self._init_slicer_options(covopts) 179 180 def _init_input_options(self): 181 # specify a subset of resolutions to ingest 182 inputopts = self.session.get_input() 183 self.resolutions = self._set_option(inputopts, 'resolutions', None) 184 if self.resolutions is None: 185 self.resolutions = self._set_option(inputopts, 'subdatasets', None) 186 if self.resolutions is None: 187 self.resolutions = self.SUBDATASETS 188 # allow to ingest data with only particular crss 189 self.crss = self._set_option(inputopts, 'crss', []) 190 # ingest data if it's the specified levels 191 self.levels = self._set_option(inputopts, 'levels', []) 166 192 167 193 def _init_slicer_options(self, covopts): 168 194 sliceropts = covopts['slicer'] 169 195 self._set_option(sliceropts, 'type', 'gdal') 170 196 self._set_option(sliceropts, 'pixelIsPoint', False) 171 if 'axes' not in sliceropts: 172 self._init_axes_options(sliceropts) 197 axesopts = self._init_axes_options() 198 if 'axes' in sliceropts: 199 for axis in sliceropts['axes']: 200 if axis not in axesopts: 201 raise RecipeValidationException("Invalid axis '" + axis + "', expected one of ansi/E/N.") 202 for k in sliceropts['axes'][axis]: 203 axesopts[axis][k] = sliceropts['axes'][axis][k] 204 sliceropts['axes'] = axesopts 173 205 174 def _init_axes_options(self , sliceropts):175 sliceropts['axes'] ={206 def _init_axes_options(self): 207 return { 176 208 'ansi': { 177 209 "min": "datetime(regex_extract('${file:path}', '.*?/S2[^_]+_MSI[^_]+_([\\d]+)T[\\d]+_', 1), 'YYYYMMDD')", 178 210 "gridOrder": 0, … … class Recipe(GeneralCoverageRecipe): 230 262 gdal_ds.close() 231 263 232 264 level = self._get_level(f.get_filepath()) 265 if len(self.levels) > 0 and level not in self.levels: 266 # skip file, as it's not in the list of levels provided in the ingredients file 267 log.debug("Skipping " + level + " data") 268 continue 233 269 crs_code = "" 234 270 235 271 evaluator_slice = None 236 272 237 for res in [self.RES_10m, self.RES_20m, self.RES_60m, self.RES_TCI]:273 for res in self.resolutions: 238 274 subds_file = self._get_subdataset_file(subdatasets, res) 239 275 crs_code = self._get_crs_code(subds_file.get_filepath(), crs_code) 276 if len(self.crss) > 0 and crs_code not in self.crss: 277 # skip CRS, it's not in the list of CRSs provided in the ingredients file 278 log.debug("Skipping data with CRS " + crs_code) 279 continue 240 280 cov_id = self._get_coverage_id(self.coverage_id, crs_code, level, res) 241 281 conv = self._get_convertor(convertors, cov_id, crs_code, level, res) 242 282 … … class Recipe(GeneralCoverageRecipe): 263 303 264 304 def _get_subdatasets(self, gdal_ds, f): 265 305 subdatasets = gdal_ds.get_subdatasets() 266 if len(subdatasets) != self.SUBDATASETS:306 if len(subdatasets) != len(self.SUBDATASETS): 267 307 raise RuntimeException("Cannot handle Sentinel 2 file " + f.get_filepath() + 268 308 ": GDAL reported " + str(len(subdatasets)) + 269 " subdatasets, expected " + str( self.SUBDATASETS) + ".")309 " subdatasets, expected " + str(len(self.SUBDATASETS)) + ".") 270 310 return [name for (name, _) in subdatasets] 271 311 272 312 def _get_subdataset_file(self, subdatasets, res): -
applications/wcst_import/session.py
diff --git a/applications/wcst_import/session.py b/applications/wcst_import/session.py index 2cc053d8..a26d8d09 100644
a b class Session: 53 53 self.files = self.parse_input(inp['paths'] if 'paths' in inp else []) 54 54 self.coverage_id = inp['coverage_id'] if 'coverage_id' in inp else None 55 55 self.recipe = recipe 56 self.input = inp 56 57 self.wcs_service = config['service_url'] if "service_url" in config else None 57 58 if "tmp_directory" in config: 58 59 self.tmp_directory = config['tmp_directory'] … … class Session: 258 259 """ 259 260 return self.recipe 260 261 262 def get_input(self): 263 """ 264 Returns the input section of the ingredients 265 :rtype dict[str,str] 266 """ 267 return self.input 268 261 269 def get_coverage_id(self): 262 270 """ 263 271 Returns the coverage id for this session -
doc/main/05_geo-services-guide.rst
diff --git a/doc/main/05_geo-services-guide.rst b/doc/main/05_geo-services-guide.rst index eae156db..0e8f0c31 100644
a b As of now, these recipes are provided: 1604 1604 * :ref:`Irregular timeseries <data-import-recipe-irregular-timeseries>` 1605 1605 * :ref:`General coverage <data-import-recipe-general-coverage>` 1606 1606 * :ref:`Import from external WCS <data-import-recipe-wcs_extract>` 1607 * Specialized recipes 1608 1609 - :ref:`Sentinel 2 <data-import-recipe-sentinel2>` 1607 1610 1608 1611 For each one of these there is an ingredients example under the 1609 1612 `ingredients/ <http://rasdaman.org/browser/applications/wcst_import/ingredients>`_ … … petascope. Parameters are explained below. 2560 2563 { 2561 2564 "config": { 2562 2565 "service_url": "http://localhost:8080/rasdaman/ows", 2563 "tmp_directory": "/tmp/",2564 2566 "default_crs": "http://localhost:8080/def/crs/EPSG/0/4326", 2565 "mock": false, 2566 "automated": true, 2567 "track_files": false 2567 "automated": true 2568 2568 }, 2569 2569 "input": { 2570 2570 "coverage_id": "test_wcs_extract" … … petascope. Parameters are explained below. 2588 2588 } 2589 2589 2590 2590 2591 .. _data-import-recipe-sentinel2: 2592 2593 Import Sentinel 2 data 2594 ^^^^^^^^^^^^^^^^^^^^^^ 2595 2596 This is a convenience recipe for importing Sentinel 2 data in particular. It 2597 relies on support for Sentinel 2 in `more recent GDAL versions 2598 <https://gdal.org/frmt_sentinel2.html>`__. Importing zipped Sentinel 2 is also 2599 possible and automatically handled. 2600 2601 Below is an example: 2602 2603 .. code-block:: json 2604 2605 { 2606 "config": { 2607 "service_url": "http://localhost:8080/rasdaman/ows", 2608 "automated": true 2609 }, 2610 "input": { 2611 "coverage_id": "S2_${crsCode}_${resolution}_${level}", 2612 "paths": [ "S2*.zip" ], 2613 // Optional filtering settings 2614 "resolutions": ["10m", "20m", "60m", "TCI"], 2615 "levels": ["L1C", "L2A"], 2616 "crss": ["32757"] // remove or leave empty to ingest any CRS 2617 }, 2618 "recipe": { 2619 "name": "sentinel2", 2620 "options": { 2621 "coverage": { 2622 "metadata": { 2623 "type": "xml", 2624 "global": { 2625 "Title": "'Sentinel-2 data served by rasdaman'" 2626 } 2627 } 2628 }, 2629 "tiling": "ALIGNED [0:0, 0:1999, 0:1999] TILE SIZE 32000000", 2630 "wms_import": true 2631 } 2632 } 2633 } 2634 2635 The recipe extends `general_coverage <data-import-recipe-wcs_extract>`_ so 2636 the ``"recipe"`` section has the same structure. However, a lot of information 2637 is automatically filled in by the recipe now, so the ingredients file is much 2638 simpler as the example above shows. 2639 2640 The other obvious difference is that the ``"coverage_id"`` is templated with 2641 several variables enclosed in ``${`` and ``}`` which are automatically replaced 2642 to generate the actual coverage name during import: 2643 2644 - ``crsCode`` - the CRS EPSG code of the imported files, e.g. ``32757`` for 2645 WGS 84 / UTM zone 57S. 2646 2647 - ``resolution`` - Sentinel 2 products bundle several subdatasets of different 2648 resolutions: ``10m``, ``20m``, ``60m``, and ``TCI`` (True Color Image, also 2649 10m as it is derived from the B02, B03, and B04 10m bands). 2650 2651 - ``level`` - ``L1C`` or ``L2A`` 2652 2653 If the files collected by ``"paths"`` are varying in any of these parameters, 2654 the corresponding variables must appear somewhere in the ``"coverage_id"``. 2655 Otherwise, the ingestion will either fail or result in invalid coverages. E.g. 2656 if all data is level ``L1C`` with CRS ``32757``, but still different 2657 resolutions, the ``"coverage_id"`` could be ``"MyCoverage_${resolution}"``; 2658 the other variables can still be specified though, so 2659 ``"MyCoverage_${resolution}_${crsCode}"`` is valid as well. 2660 2661 In addition, the data to be ingested can be optionall filtered with the 2662 following options in the ``"input"`` section: 2663 2664 - ``resolutions`` - specify a subset of resolutions to ingest from the data, 2665 e.g. only the "10m" subdataset; if not specified, data of all supported 2666 resolutions will be ingested. 2667 2668 - ``levels`` - specify a subset of levels to ingest, so that files of other 2669 levels will be fully skipped; if not specified, data of all supported levels 2670 will be ingested. 2671 2672 - ``crss`` - specify a subset of CRSs to ingest; if not specified or empty 2673 data of any CRS will be ingested. 2674 2675 2591 2676 .. _wms-image-pyramids: 2592 2677 2593 2678 Image pyramids