remove eu and add eurostat
This commit is contained in:
parent
c82e19e011
commit
721a0359fa
2069
CEDA/macroecon/eu.py
2069
CEDA/macroecon/eu.py
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,94 @@
|
|||
import re
|
||||
import requests
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
from fake_useragent import UserAgent
|
||||
|
||||
class EurostatData(object):
|
||||
|
||||
"""
|
||||
for more information: https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1&file=BulkDownload_Guidelines.pdf
|
||||
"""
|
||||
|
||||
def __init__(self, language:str="en"):
|
||||
self.language = language
|
||||
self.url = "https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/"
|
||||
self.toc_url = "https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1&file=table_of_contents_{}.txt".format(language)
|
||||
|
||||
def toc(self) -> pd.DataFrame:
|
||||
"""
|
||||
the return value includes 8 columns:
|
||||
'title'
|
||||
'code'
|
||||
'type'
|
||||
'last update of data'
|
||||
'last table structure change'
|
||||
'data start'
|
||||
'data end'
|
||||
'values'
|
||||
"""
|
||||
toc = pd.read_csv(self.toc_url, sep="\t")
|
||||
return toc
|
||||
|
||||
def search_toc(self, query:str=None):
|
||||
"""
|
||||
fuzzy search in the "title"
|
||||
"""
|
||||
toc = self.toc()
|
||||
if query == None:
|
||||
return ValueError("rex is invalid.")
|
||||
else:
|
||||
return toc[toc["title"].str.contains(query)].reset_index(drop=True)
|
||||
|
||||
def download_data(self, datasetcode:str=None, geo:str=None, unit:str=None):
|
||||
url = self.url + "BulkDownloadListing?sort=1&file=data%2F" + datasetcode + ".tsv.gz"
|
||||
data = pd.read_csv(url, sep = "\t", compression="gzip")
|
||||
data = data.drop(data.columns[0], axis=1).join(data[data.columns[0]].str.split(",", expand=True))
|
||||
columns_list = list(data.columns)[:-3] + ["unit", "na_item", "geo"]
|
||||
data.columns = columns_list
|
||||
columns_list = columns_list[-3:] + columns_list[:-3]
|
||||
data = data[columns_list]
|
||||
if geo != None and unit != None:
|
||||
data = data.loc[(data["geo"] == geo) & (data["unit"] == unit)]
|
||||
for i in range(4, len(list(data.columns))):
|
||||
data[data.columns[i]] = data[data.columns[i]].str.extract(r'(\d+.\d+)').astype("float")
|
||||
return data
|
||||
|
||||
elif geo != None and unit == None:
|
||||
data = data.loc[(data["geo"] == geo)]
|
||||
for i in range(4, len(list(data.columns))):
|
||||
data[data.columns[i]] = data[data.columns[i]].str.extract(r'(\d+.\d+)').astype("float")
|
||||
return data
|
||||
|
||||
elif geo == None and unit != None:
|
||||
data = data.loc[(data["geo"] == geo)]
|
||||
for i in range(4, len(list(data.columns))):
|
||||
data[data.columns[i]] = data[data.columns[i]].str.extract(r'(\d+.\d+)').astype("float")
|
||||
return data
|
||||
|
||||
elif geo == None and unit == None:
|
||||
for i in range(4, len(list(data.columns))):
|
||||
data[data.columns[i]] = data[data.columns[i]].str.extract(r'(\d+.\d+)').astype("float")
|
||||
return data
|
||||
|
||||
def download_dic(self, category:str=None):
|
||||
url = self.url + "BulkDownloadListing?sort=1&file=dic%2F{}".format(self.language) + "%2F" + category + "dic"
|
||||
return pd.read_csv(url, sep="\t")
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
eu = EurostatData(language="en", version=2.1)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
1130
CEDA/macroecon/us.py
1130
CEDA/macroecon/us.py
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue