# -*- coding: utf-8 -*-
"""
Class definition for NutsTools. Can be used in your own Python code to retrieve the nuts data from the EU-website
and convert postal codes in NUTS codes
Examples:
In order to download the EU NUTS data to the default location, use create the NutsData object::
from nutstools.postalnuts import NutsData
nuts_data = NutsData()
In case the NUTS data was downloaded before, only the nuts_data object is created, but the data will not be
downloaded again.
As a next step, you can use the NutsPostalCode class to import the NUTS-data and convert postal codes to NUTS::
nuts = NutsPostalCode(nuts_data.nuts_codes_file)
post_code = "2612AB"
nuts_code = nuts.one_postal2nuts(postal_code=post_code)
print(f"Postal code {post_code} has nuts code {nuts_code}")
The returned *nuts_code* is a string with the NUTS code, so the output looks like:
.. code-block:: text
Postal code 2612AB has nuts code NL333
You can also convert a list or a serie of postal codes like this::
postal_codes = [
"8277 AM",
"2871 KA",
]
all_codes = nuts.postal2nuts(postal_codes=postal_codes)
print(all_codes)
The returned *all_codes* is a Series which looks like:
.. code-block:: text
8277AM NL211
2871KA NL33B
9408BJ NL131
3076KA NL33C
3068LM NL33C
7543GV NL213
4181DG NL224
"""
import logging
from pathlib import Path
import re
import appdirs
import pandas as pd
import requests
try:
import requests_kerberos_proxy
except ImportError:
requests_kerberos_proxy = None
else:
try:
from requests_kerberos_proxy.util import get_session
except ImportError as err:
raise ImportError(
"Module 'request_kerberos_proxy' was found but 'get_session' could not be imported"
)
import yaml
from .nutsdata import (
COUNTRY_CODES,
DEFAULT_YEAR,
DEFAULT_COUNTRY,
NUTS_YEARS,
NUTS_DATA,
NUTS_CODE_DEFAULT_DIRECTORY,
NUTS_CODE_DEFAULT_SETTINGS_FILE_NAME,
)
from ._typings import SeriesLike, PathLike
_logger = logging.getLogger(__name__)
[docs]
class NutsPostalCode:
"""
Class to hold the postal nuts code
Args:
file_name (Path|str): The nuts input file holding all the nuts codes. Can be either a pathlib Path or a string.
Attributes:
file_name (Path|str): Path of the file contains the nuts code downloaded from the Eurostat website
nuts_data (DataFrame): All the nuts data loaded from the file
nuts_key (str): Name of column containing the NUTS codes. Equal to the first column of the input data file
postal_codes_key (str): Name of the column containing the postal codes. Equal to the second column of the input
data file
"""
def __init__(self, file_name: PathLike):
"""
The constructor to initialize the object
"""
self.file_name = Path(file_name)
_logger.info(f"Reading data {file_name}")
if self.file_name.suffix == ".zip":
compression = "zip"
else:
compression = None
self.nuts_data = pd.read_csv(
self.file_name.as_posix(), sep=";", compression=compression
)
self.nuts_key = self.nuts_data.columns[0]
self.postal_codes_key = self.nuts_data.columns[1]
for column_name in self.nuts_data.columns:
self.nuts_data[column_name] = (
self.nuts_data[column_name]
.str.replace("'", "")
.replace(r"\s", "", regex=True)
)
self.nuts_data = self.nuts_data.set_index(self.postal_codes_key, drop=True)[
self.nuts_key
]
_logger.debug(f"Done")
[docs]
def postal2nuts(self, postal_codes: SeriesLike, level: int = 3):
"""
Convert the series or list of postal codes to a series of nuts code at level
Args:
postal_codes (DataFrame or Series): Series or list of postal codes to be converted to NUTS codes
level (int, optional): Level of the nuts codes. Either, 0, 1, 2 or 3. Default is 3
Returns:
Series: The converted NUTS codes. The postal codes are put on the index.
"""
if level not in (0, 1, 2, 3):
raise ValueError("Level of nuts codes must be in range 0..3")
if isinstance(postal_codes, list):
# turn list into Series
postal_codes = pd.Series(postal_codes)
# remove white spaces, leading and trailing spaces, and force to upper
postal_codes = postal_codes.str.replace(r"\s", "", regex=True)
postal_codes = postal_codes.str.upper()
nuts_codes = self.nuts_data.reindex(postal_codes)
# in case a nuts level lower than 3 is given, remove the last digits
if level == 2:
nuts_codes = nuts_codes.str.replace(".$", "", regex=True)
nuts_codes = nuts_codes.rename("NUTS2")
elif level == 1:
nuts_codes = nuts_codes.str.replace("..$", "", regex=True)
nuts_codes = nuts_codes.rename("NUTS1")
elif level == 0:
nuts_codes = nuts_codes.str.replace("...$", "", regex=True)
nuts_codes = nuts_codes.rename("NUTS0")
return nuts_codes
[docs]
def one_postal2nuts(self, postal_code: str, level: int = 3):
"""
Return the NUTS code for a single postal code
Args:
postal_code (str): The postal code to retrieve the data for
level (int, optional): The nuts level. Default = 3
Returns:
str: The nuts code belonging to the postal code
"""
try:
postal_code = postal_code.replace(" ", "")
except AttributeError:
raise AttributeError(
f"Postal code {postal_code} is not a string. Please check your input"
)
else:
postal_code = postal_code.upper()
try:
nuts_code = self.nuts_data.loc[postal_code]
except KeyError:
_logger.warning(f"Could not find NUTS code for postal code {postal_code}")
return None
# in case a nuts level lower than 3 is given, remove the last digits
if level == 2:
nuts_code = re.sub(".$", "", nuts_code)
elif level == 1:
nuts_code = re.sub("..$", "", nuts_code)
elif level == 0:
nuts_code = re.sub("...$", "", nuts_code)
return nuts_code
[docs]
class NutsData:
"""
Class to hold all the references to NUTS data
Args:
year (str, optional): Year of the NUTS data. Default is *2021*.
country (str, optional ): Two-letter code of the country to use for the NUTS data. Defaults to *NL*.
nuts_file_name (Path|str, optional): Name of the file of the downloaded nuts data. Default is
*nutstools_settings.yml*.
nuts_code_directory (Path|str, optional): Name of the directory where the NUTS data is stored.
Defaults to *None*, in which case the NUTS data will be stored to the default location
(see *directory* attribute).
If an alternative location is passed to this argument, the *directory* attributed is set to this location.
update_settings (bool, optional): If true, the settings file is updated with the new options passed to this
class. The defaults can also be altered in the *nuts_file_name* settings file it self.
Attributes:
directory (Path): Location of the configuration settings file. Default is *nutstools* in eiter
*C:\\\\Users\\\\username\\\\AppData\\\\Local* (Windows) or */home/username/.local/share* (linux)
cache_directory (Path): directory where downloaded data is stored for reuse. Defaults to *Cache* relative
*directory*.
settings_file_name (Path): Name of the settings file to store the default settings. Default is
*nutstools_settings.yml* located in *directory*.
This file is created the first run and read every next run. Altering the values in this file alters the
default behaviour. The default behaviour can also be overwritten by using the update_settings command line
argument
url (str): The URL to the NUTS data at the EU website. Is stored in the settings file and can be altered there.
year (str): The year for which the NUTS data is retrieved. Default is *2021* (current latest version), but
can be altered in the settings file.
country (str): Two-letter code to set the country for which we want to download the NUTS data. Default is
*NL*. Can be altered using the *country* command line option combined with *update_settings* in order
to force to rewrite the settings file
nuts_codes_file (Path): The filename to the NUTS data downloaded from the EU website
nuts_data (DataFrame): The Dataframe where the NUTS data is stored after reading the *nuts_codes_file*
"""
def __init__(
self,
year: str = None,
country: str = None,
nuts_file_name: PathLike = None,
nuts_code_directory: str = None,
update_settings: bool = False,
force_download: bool = False,
):
if nuts_code_directory is None:
self.directory = Path(
appdirs.user_config_dir(NUTS_CODE_DEFAULT_DIRECTORY)
).parent
else:
self.directory = Path(nuts_code_directory)
self.cache_directory = self.directory / Path("Cache")
self.directory.mkdir(exist_ok=True, parents=True)
self.cache_directory.mkdir(exist_ok=True, parents=True)
self.settings_file_name = self.directory / Path(
NUTS_CODE_DEFAULT_SETTINGS_FILE_NAME
)
self.url = None
if year is not None:
self.year = year
else:
self.year = DEFAULT_YEAR
if country is not None:
self.country = country
else:
self.country = DEFAULT_COUNTRY
self.nuts_codes_file: Path = Path(".")
default_settings = dict(
DEFAULT_YEAR=self.year,
DEFAULT_COUNTRY=self.country,
NUTS_CODE_DEFAULT_DIRECTORY=self.directory.as_posix(),
COUNTRY_CODES=COUNTRY_CODES,
NUTS_YEARS=NUTS_YEARS,
NUTS_DATA=NUTS_DATA,
)
if not self.settings_file_name.exists() or update_settings:
_logger.info(f"Writing default settings to {self.settings_file_name}")
with open(self.settings_file_name, "w") as stream:
yaml.dump(default_settings, stream)
_logger.info(f"Reading settings from {self.settings_file_name}")
with open(self.settings_file_name) as stream:
self.settings = yaml.safe_load(stream)
self.impose_nuts_settings()
if nuts_file_name is not None:
nuts_file_name = Path(nuts_file_name)
if nuts_file_name.exists():
self.nuts_codes_file = nuts_file_name
if not self.nuts_codes_file.exists() or force_download:
self.download_nuts_codes()
else:
_logger.info(f"File {self.nuts_codes_file} already downloaded!")
if self.nuts_codes_file.suffix == ".zip":
self.nuts_data = pd.read_csv(
self.nuts_codes_file, sep=";", compression="zip"
)
else:
self.nuts_data = pd.read_csv(self.nuts_codes_file, sep=";")
[docs]
def impose_nuts_settings(self):
"""
Read the settings of the tool from the stored settings file
"""
self.year = self.settings["DEFAULT_YEAR"]
self.country = self.settings["DEFAULT_COUNTRY"]
try:
nuts_year_prop = NUTS_DATA[self.year]
except KeyError as nuts_err:
_logger.warning(nuts_err)
raise KeyError(f"Year {self.year} not available. Please pick another one")
self.url = nuts_year_prop["url"]
nuts_files = nuts_year_prop["files"]
try:
remote_file_name = nuts_files[self.country]
except KeyError as remote_err:
_logger.warning(remote_err)
raise KeyError(
f"Country {self.country} not available. Please pick another one"
)
else:
self.url = "/".join([self.url, remote_file_name])
self.nuts_codes_file = self.cache_directory / Path(remote_file_name)
[docs]
def download_nuts_codes(self):
"""
Download the NUTS data from the EU website
Notes
-----
* Open a session, either via kerberos and a proxy or via a normal request session
Returns:
bool: True for success, False for failed download.
"""
if requests_kerberos_proxy is not None:
session = get_session()
else:
_logger.debug("Trying to connection using plain requests")
session = requests.Session()
_logger.debug(f"Requesting {self.url}")
request = session.get(self.url)
success = False
if request.ok:
_logger.debug(f"Url exists : {self.url}.")
_logger.info(f"Downloading data from : {self.url}.")
with open(self.nuts_codes_file, "wb") as stream:
stream.write(request.content)
_logger.info(f"Success!")
success = True
else:
_logger.warning(f"Cannot fine data set: {self.url}")
return success