CEDApy/CEDA/economic/OECD.py

151 lines
5.4 KiB
Python

import requests
import xmltodict
import pandas as pd
url = {
"OECD":"https://stats.oecd.org/SDMX-JSON/data/",
"OECD-Key": "https://stats.oecd.org/RestSDMX/sdmx.ashx/GetKeyFamily/all",
"OECD-Schema": "http://stats.oecd.org/restsdmx/sdmx.ashx/GetSchema/"
}
class OECDData(object):
def __init__(self, language:str="en"):
self.language = language
def toc(self) -> pd.DataFrame:
tmp_url = url["OECD-Key"]
r = requests.get(tmp_url)
xpars = xmltodict.parse(r.text)
KeyFamily = xpars['message:Structure']['message:KeyFamilies']['KeyFamily']
FamilyID, FamilyName = [], []
for key in KeyFamily:
key_id = key["@id"]
key_name = key["Name"]
if isinstance(key_name, list):
if self.language == "en":
key_name = key_name[0]["#text"]
else:
key_name = key_name[1]["#text"]
elif isinstance(key_name, dict):
key_name = key_name["#text"]
FamilyID.append(key_id)
FamilyName.append(key_name)
toc = pd.DataFrame({"FamilyID":FamilyID, "FamilyName":FamilyName})
return toc
def search_toc(self, query:str=None) ->pd.DataFrame:
toc = self.toc()
if query == None:
return ValueError("rex is invalid.")
else:
return toc[toc["FamilyName"].str.contains(query)].reset_index(drop=True)
def tos(self, dataset:str=None) -> dict:
if dataset == None:
return ValueError("ID is missing")
else:
tmp_url = url["OECD-Schema"] + dataset
r = requests.get(tmp_url, timeout=10)
xpars = xmltodict.parse(r.text)
location = xpars['xs:schema']['xs:simpleType'][0]["xs:restriction"]["xs:enumeration"]
transact = xpars['xs:schema']['xs:simpleType'][1]["xs:restriction"]["xs:enumeration"]
measures = xpars['xs:schema']['xs:simpleType'][2]["xs:restriction"]["xs:enumeration"]
frequencies = xpars['xs:schema']['xs:simpleType'][3]["xs:restriction"]["xs:enumeration"]
code, fullname, transaction, fulltransaction = [], [], [], []
measure_list, full_measure, frequency_list, full_frequency = [], [], [], []
for loc in location:
locate = loc["@value"]
name = loc["xs:annotation"]["xs:documentation"]
if isinstance(name, list):
if self.language == "en":
name = name[0]["#text"]
else:
name = name[1]["#text"]
elif isinstance(name, dict):
name = name["#text"]
code.append(locate)
fullname.append(name)
for tran in transact:
trans = tran["@value"]
fulltrans = tran["xs:annotation"]["xs:documentation"]
if isinstance(fulltrans, list):
if self.language == "en":
fulltrans = fulltrans[0]["#text"]
else:
fulltrans = fulltrans[1]["#text"]
elif isinstance(fulltrans, dict):
fulltrans = fulltrans["#text"]
transaction.append(trans)
fulltransaction.append(fulltrans)
for measure in measures:
meas = measure["@value"]
full_meas = measure["xs:annotation"]["xs:documentation"]
if isinstance(full_meas, list):
if self.language == "en":
full_meas = full_meas[0]["#text"]
else:
full_meas = full_meas[1]["#text"]
elif isinstance(full_meas, dict):
full_meas = full_meas["#text"]
measure_list.append(meas)
full_measure.append(full_meas)
for frequency in frequencies:
freq = frequency["@value"]
full_freq = frequency["xs:annotation"]["xs:documentation"]
if isinstance(full_freq, list):
if self.language == "en":
full_freq = full_freq[0]["#text"]
else:
full_freq = full_freq[1]["#text"]
elif isinstance(full_freq, dict):
full_freq = full_freq["#text"]
frequency_list.append(freq)
full_frequency.append(full_freq)
data = {
"code":code,
"fullname": fullname,
"transaction_code": transaction,
"transaction": fulltransaction,
"measurement_code": measure_list,
"measurement": full_measure,
"frequency code": frequency_list,
"frequency": full_frequency
}
return data
def download_data(self, dataset:str=None, query:str=None):
tmp_url = url["OECD"] + "{}/".format(dataset) + query + "/all"
r = requests.get(tmp_url)
data =r.json()
return data
if __name__ == "__main__":
oecd = OECDData()
oecd_toc = oecd.toc()
oecd_tos = oecd.tos(dataset="QNA")
data = oecd.download_data(dataset="QNA", query="QNA/CAN.B1_GE.CQRSA.Q")