""""Raster data import
Written by geodienste.ch
Created on 24.04.2025
Last modified on 28.05.2025

This script uploads raster data to S3 storage and starts an import of the data into geodienste.ch.

Requirements
------------

Python >= 3.6 and modules 'boto3' and 'requests' are required. The modules can be installed with the 
following commands:

pip install boto3
pip install requests

Setup
-----

Before the script can be used, the information between

--->>> START: TO BE FILLED OUT BY USER <<<---

and

--->>> END: TO BE FILLED OUT BY USER <<<---

in the code below needs to be filled out.

The script can then be run with the following command:

python3 rasterdata_import.py -t luftbild -p /path/to/rasterdata -e production

Note that the name of the python command can vary (e.g. python or other).

Usage
-----

python3 rasterdata_import.py [-h] -t {luftbild} -p PATH [-f FILE_FILTER] -e {integration,production} [-u] [-d] [-l {warning,info,debug}]

Run

python3 rasterdata_import.py -h

to see a list of all arguments.

The flag -d starts a dry run, i.e. no data is actually uploaded and no imported is started.
Use it to check if the provided arguments are valid and to show all files that would be 
uploaded and processed.

The flag -u can be used to only upload the files to S3 storage without starting an import.
The import can then be started later manually, e.g. with cURL:

curl -u username:password -F topic=luftbild "https://www.geodienste.ch/data_agg/raster/import"

Examples
--------

Upload files and start import:
python3 rasterdata_import.py -t luftbild -p /path/to/rasterdata -e production -l debug

Upload files with filter and start import:
python3 rasterdata_import.py -t luftbild -p /path/to/rasterdata -f ABC12*.tif -e production

Dry run:
python3 rasterdata_import.py -t luftbild -p /path/to/rasterdata -e production -d

Only upload files:
python3 rasterdata_import.py -t luftbild -p /path/to/rasterdata -e integration -u -l info

"""
import argparse
import boto3
import logging
from os import listdir
from os.path import basename, isfile, join
import requests
from glob import glob
import sys

# --->>> START: TO BE FILLED OUT BY USER <<<---

# Code of canton (e.g. "lu", "zg"). Use all small letters.
CANTON = ""

# Access and secret key for S3 bucket. Used to upload files to S3 storage.
S3_ACCESS_KEY = ""
S3_SECRET_KEY = ""

# Username and password of geodienste account. Used to make API call to import raster data.
# This information is not needed when starting the script with flag --dry-run or --upload_only.
GEODIENSTE_USERNAME = ""
GEODIENSTE_PASSWORD = ""

# List of supported topics. Extend this list when geodienste.ch supports new topics.
SUPPORTED_TOPICS = ["luftbild", "klimakarte_physiologisch_aequivalente_temperatur"]

# --->>> END: TO BE FILLED OUT BY USER <<<---


class ArgumentParser():
    def __init__(self):
        self.argument_parser = argparse.ArgumentParser(
            description="Upload and import raster data")

        self.argument_parser.add_argument(
            "-t",
            "--topic",
            required=True,
            choices=SUPPORTED_TOPICS,
            help="Topic to process"
        )

        self.argument_parser.add_argument(
            "-p",
            "--path",
            required=True,
            help="Absolute path to the folder containing the files to upload"
        )

        self.argument_parser.add_argument(
            "-f",
            "--file_filter",
            help="Filter to select a subset of the files inside path. \
                Use patterns according to the rules used by the Unix shell, e.g. ABC123*.tif. \
                If not specified, all files with extensions .tif, .tiff, .TIF or .TIFF are selected."
        )

        self.argument_parser.add_argument(
            "-e",
            "--environment",
            required=True,
            choices=["integration", "production"],
            help="The environment in which the upload / import will take place"
        )

        self.argument_parser.add_argument(
            "-u",
            "--upload_only",
            action="store_true",
            help="Only upload files, no import is started"
        )

        self.argument_parser.add_argument(
            "-r",
            "--retry",
            action="store_true",
            help="Set this flag if an import fails and you want to start another import to generate only the remaining data \
                that the failed import failed to generate. This can drastically reduce the run time of the import. \
                The import fails if this flag is used and no existing data is found."
        )

        self.argument_parser.add_argument(
            "-d",
            "--dry_run",
            action="store_true",
            help="Set this flag to test the script. No files are actually uploaded and no import is started."
        )

        self.argument_parser.add_argument(
            "-l",
            "--loglevel",
            choices=["warning", "info", "debug"],
            default="warning",
            help="Level of logging (default: warning)"
        )

    def parse(self):
        self.args = self.argument_parser.parse_args()
        return self.args


class DerivedArguments():
    def __init__(self, args):
        if args.loglevel == "warning":
            self.loglevel = logging.WARNING
        elif args.loglevel == "info":
            self.loglevel = logging.INFO
        elif args.loglevel == "debug":
            self.loglevel = logging.DEBUG
        else:
            self.loglevel = logging.WARNING

        if args.environment == "integration":
            self.import_url = "https://integration.geodienste.ch/data_agg/raster/import"
        elif args.environment == "production":
            self.import_url = "https://www.geodienste.ch/data_agg/raster/import"

    def get_loglevel(self):
        return self.loglevel

    def get_import_url(self):
        return self.import_url


class Printer():
    def __init__(self, args):
        self.args = args

    def print_start_summary(self):

        print("Starting raster data import with arguments:")
        print("topic = " + self.args.topic)
        print("path = " + self.args.path)
        
        if self.args.file_filter:
            print("file_filter = " + self.args.file_filter)
        else:
            print("file_filter = all TIFs")

        print("environment = " + self.args.environment)
        print("upload_only = " + str(self.args.upload_only))
        print("retry = " + str(self.args.retry))
        print("dry_run = " + str(self.args.dry_run))
        print("loglevel = " + self.args.loglevel)

        if not self.args.upload_only and not self.args.dry_run:
            print("Data will first be uploaded and then imported.")
        elif self.args.dry_run:
            print("Dry run: no data will be uploaded and no import will be started.")
        elif self.args.upload_only:
            print("Data will only be uploaded. No import will be started.")

    def print_end_summary(self):
        if self.args.dry_run:
            print("Successfully finished")
        elif self.args.upload_only:
            print("Successfully finished: raster data uploaded")
        else:
            print("Successfully finished: raster data uploaded and import started")

    def print_file_paths(self, file_paths):
        print("The following files have been found and will be uploaded:")
        [print(file_path) for file_path in file_paths]

    def print_file_upload(self, file_path, i, n_files):
        print("Uploading file " + file_path +
              " (" + str(i) + "/" + str(n_files) + ")")

    def print_start_upload(self):
        if self.args.dry_run:
            print("Start uploading (dry run, no files are actually uploaded) ...")
        else:
            print("Start uploading ...")

    def print_end_upload(self):
        print("All files successfully uploaded")

    def print_start_import(self):
        if self.args.dry_run or self.args.upload_only:
            print("Skipping import: dry run or upload only")
        else:
            print("Starting import ...")

    def print_end_import(self, json_reponse):
        print("Import started with response: " + str(json_reponse))

    def print_request_import(self, import_url):
        print("Requesting import at " + import_url + " with data:")
        print("topic = " + self.args.topic)
        print("username = " + GEODIENSTE_USERNAME)
        print("password = " + GEODIENSTE_PASSWORD)
        print("retry = " + str(self.args.retry))

    def print_error(self, exception):
        print("Raster data import aborted with an error: " + str(exception))


def main():
    BUCKET = "geodienste-" + CANTON

    argument_parser = ArgumentParser()
    args = argument_parser.parse()
    derived_arguments = DerivedArguments(args)
    loglevel = derived_arguments.get_loglevel()
    import_url = derived_arguments.get_import_url()
    printer = Printer(args)

    logging.basicConfig(level=loglevel)

    try:
        if args.file_filter:
            file_paths = glob(join(args.path, args.file_filter))
        else:
            if sys.platform.startswith('win32'):
                # Windows is case-insensitive
                file_paths = glob(join(args.path, "*.tif")) + glob(join(args.path, "*.tiff"))
            else:
                # Unix platforms are case-sensitive
                file_paths = glob(join(args.path, "*.tif")) + glob(join(args.path, "*.tiff")) + glob(join(args.path, "*.TIF")) + glob(join(args.path, "*.TIFF"))

        n_files = len(file_paths)

        printer.print_start_summary()
        printer.print_file_paths(file_paths)
        printer.print_start_upload()

        s3 = boto3.resource(
            "s3",
            "ch-dk-2",
            aws_access_key_id=S3_ACCESS_KEY,
            aws_secret_access_key=S3_SECRET_KEY,
            endpoint_url="https://sos-ch-dk-2.exo.io",
            use_ssl=True,
            verify=True)

        bucket = s3.Bucket(BUCKET)
        for i, file_path in enumerate(file_paths):
            printer.print_file_upload(file_path, i + 1, n_files)
            file_name = basename(file_path)
            key = args.environment + "/" + args.topic + "/" + file_name
            if not args.dry_run:
                bucket.upload_file(file_path, Key=key)

        printer.print_end_upload()
        printer.print_start_import()

        if not args.dry_run and not args.upload_only:
            printer.print_request_import(import_url)
            response = requests.post(import_url, data={"topic": args.topic, "retry": args.retry},
                                     auth=requests.auth.HTTPBasicAuth(GEODIENSTE_USERNAME, GEODIENSTE_PASSWORD))
            printer.print_end_import(response.json())

        printer.print_end_summary()

    except Exception as ex:
        printer.print_error(ex)


if __name__ == "__main__":
    main()
