diff --git a/CEDA/economic/BOJData.py b/CEDA/economic/BOJ.py similarity index 100% rename from CEDA/economic/BOJData.py rename to CEDA/economic/BOJ.py diff --git a/CEDA/economic/ECB.py b/CEDA/economic/ECB.py new file mode 100644 index 0000000..ddb436e --- /dev/null +++ b/CEDA/economic/ECB.py @@ -0,0 +1,33 @@ +import requests +import pandas as pd +from bs4 import BeautifulSoup + +url = { + "ECB": "https://sdw.ecb.europa.eu/", + "ECB-API": "http://sdw-wsrest.ecb.europa.eu/service/data/" +} + +class ECBData(object): + def __init__(self) -> None: + pass + + def toc(self): + r = requests.get(url["ECB"] + "browse.do?node=9689727") + dataset_list = BeautifulSoup(r.text, "html.parser").find_all("div", {"id": "currentMaximizeNode0"}) + uls = dataset_list[0].find_all("ul") + lis = [li for ul in uls for li in ul.find_all("li")] + li_text = [li.text.strip() for li in lis] + name, metadata = [], [] + for i in range(0, len(li_text)): + name.append(li_text[i].split("-")[0]) + metadata.append(li_text[i].split("-")[1]) + + li_urls = [url["ECB"] + li.a.get("href") for li in lis] + toc = pd.DataFrame({"name": name, "metadata":metadata, "url":li_urls}) + return toc + + + def download_data(self, datasetname:str=None): + tmp_url = url["ECB-API"] + "{}?format=csvdata".format(datasetname) + data = pd.read_csv(tmp_url) + return data \ No newline at end of file diff --git a/CEDA/economic/ECBData.py b/CEDA/economic/ECBData.py deleted file mode 100644 index 832421a..0000000 --- a/CEDA/economic/ECBData.py +++ /dev/null @@ -1,13 +0,0 @@ -import pandas as pd - -url = { - "ECB": "http://sdw-wsrest.ecb.europa.eu/service/data/" -} - -class ECBData(object): - def __init__(self) -> None: - pass - def get_data(dataset:str=None): - tmp_url = url["ECB"] + "{}?format=csvdata".format(dataset) - data = pd.read_csv(tmp_url) - return data \ No newline at end of file diff --git a/CEDA/economic/EPU.py b/CEDA/economic/EPU.py index 3a220d2..cee8bd5 100644 --- a/CEDA/economic/EPU.py +++ b/CEDA/economic/EPU.py @@ -9,21 +9,20 @@ url = { "EPU": "https://www.policyuncertainty.com/" } +def country_list(): + country_list = ["Global", "USA", "Australia", "Belgium", + "Brazil", "Canada", "Chile", "China", + "Colombia", "Croatia", "Denmark", "France", + "Germany", "Greece", "HKSAR", "MACAUSAR", + "India", "Ireland", "Italy", "Japan", + "Korea", "Mexico", "Netherlands", "Pakistan", + "Russia", "Singapore", "Spain", "Sweden", "UK"] + annotations = "Disambiguation: the word 'Korea' in here stands for 'South Korea'" + return country_list, annotations class EPUData(object): def __init__(self, country:str=None): self.country = country - - def country_list(self): - country_list = ["Global", "USA", "Australia", "Belgium", - "Brazil", "Canada", "Chile", "China", - "Colombia", "Croatia", "Denmark", "France", - "Germany", "Greece", "HKSAR", "MACAUSAR", - "India", "Ireland", "Italy", "Japan", - "Korea", "Mexico", "Netherlands", "Pakistan", - "Russia", "Singapore", "Spain", "Sweden", "UK"] - annotations = "Disambiguation: the word 'Korea' in here stands for 'South Korea'" - return country_list, annotations def download(self): if self.country == "China": diff --git a/CEDA/economic/EurostatData.py b/CEDA/economic/Eurostat.py similarity index 84% rename from CEDA/economic/EurostatData.py rename to CEDA/economic/Eurostat.py index 440d2c2..8c8eb91 100644 --- a/CEDA/economic/EurostatData.py +++ b/CEDA/economic/Eurostat.py @@ -50,24 +50,24 @@ class EurostatData(object): if geo != None and unit != None: data = data.loc[(data["geo"] == geo) & (data["unit"] == unit)] for i in range(4, len(list(data.columns))): - data[data.columns[i]] = data[data.columns[i]].str.extract(r'(\d+.\d+)').astype("float") + data[data.columns[i]] = data[data.columns[i]].astype(str).str.extract(r'(\d+.\d+)').astype("float") return data elif geo != None and unit == None: data = data.loc[(data["geo"] == geo)] for i in range(4, len(list(data.columns))): - data[data.columns[i]] = data[data.columns[i]].str.extract(r'(\d+.\d+)').astype("float") + data[data.columns[i]] = data[data.columns[i]].astype(str).str.extract(r'(\d+.\d+)').astype("float") return data elif geo == None and unit != None: data = data.loc[(data["geo"] == geo)] for i in range(4, len(list(data.columns))): - data[data.columns[i]] = data[data.columns[i]].str.extract(r'(\d+.\d+)').astype("float") + data[data.columns[i]] = data[data.columns[i]].astype(str).str.extract(r'(\d+.\d+)').astype("float") return data elif geo == None and unit == None: for i in range(4, len(list(data.columns))): - data[data.columns[i]] = data[data.columns[i]].str.extract(r'(\d+.\d+)').astype("float") + data[data.columns[i]] = data[data.columns[i]].astype(str).str.extract(r'(\d+.\d+)').astype("float") return data def download_dic(self, category:str=None): @@ -77,7 +77,7 @@ class EurostatData(object): if __name__ == "__main__": - eu = EurostatData(language="en", version=2.1) + eu = EurostatData(language="en") diff --git a/CEDA/economic/FredData.py b/CEDA/economic/Fred.py similarity index 62% rename from CEDA/economic/FredData.py rename to CEDA/economic/Fred.py index 04f6bb2..1ff4520 100644 --- a/CEDA/economic/FredData.py +++ b/CEDA/economic/Fred.py @@ -18,7 +18,8 @@ ssl._create_default_https_context = ssl._create_unverified_context url = { "fred_econ": "https://fred.stlouisfed.org/graph/fredgraph.csv?", "fred_series": "https://fred.stlouisfed.org/series/", - "philfed": "https://www.philadelphiafed.org/surveys-and-data/real-time-data-research/", + "philfed": + "https://www.philadelphiafed.org/surveys-and-data/real-time-data-research/", "chicagofed": "https://www.chicagofed.org/~/media/publications/", "OECD": "https://stats.oecd.org/sdmx-json/data/DP_LIVE/" } @@ -41,12 +42,29 @@ def get_metadata(id: str = None) -> dict: tmp_url = url["fred_series"] + id r = requests.get(tmp_url) metadata = { - "name": (" ".join(BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "page-title"})[0].span.text.split())), - "id": id, - "update_time": datetime.strftime(dparser.parse(BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "pull-left meta-col"})[0].find_all('span')[3].text, fuzzy=True), format="%Y-%m-%d"), - "units": BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "pull-left meta-col"})[1].find_all('span')[0].text.split(" ")[0], - "frequency": BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "pull-left meta-col"})[2].find_all('span')[0].text.split(" ")[1].split(" ")[1], - "tags": get_tag(id) + "name": (" ".join( + BeautifulSoup(r.text, "html.parser").find_all( + 'div', {"class": "page-title"})[0].span.text.split())), + "id": + id, + "update_time": + datetime.strftime(dparser.parse( + BeautifulSoup(r.text, "html.parser").find_all( + 'div', + {"class": "pull-left meta-col"})[0].find_all('span')[3].text, + fuzzy=True), + format="%Y-%m-%d"), + "units": + BeautifulSoup(r.text, "html.parser").find_all( + 'div', {"class": "pull-left meta-col" + })[1].find_all('span')[0].text.split(" ")[0], + "frequency": + BeautifulSoup(r.text, + "html.parser").find_all('div', + {"class": "pull-left meta-col"}) + [2].find_all('span')[0].text.split(" ")[1].split(" ")[1], + "tags": + get_tag(id) } return metadata @@ -54,17 +72,20 @@ def get_metadata(id: str = None) -> dict: def date_transform(df, format_origin, format_after): return_list = [] for i in range(0, len(df)): - return_list.append(datetime.strptime( - df[i], format_origin).strftime(format_after)) + return_list.append( + datetime.strptime(df[i], format_origin).strftime(format_after)) return return_list class FredData(object): + def __init__(self, country: str = "usa"): self.country = country - __annotations__ = {"name": "Main Economic Indicators", - "url": "https://fred.stlouisfed.org/tags/series?t=mei"} + __annotations__ = { + "name": "Main Economic Indicators", + "url": "https://fred.stlouisfed.org/tags/series?t=mei" + } def get_id(self, url: str) -> list: id_list = [] @@ -93,6 +114,20 @@ class FredData(object): id_list = list(set(id_list)) return id_list + def toc(self): + sid = self.extract_id() + name = [] + for i in range(0, len(sid)): + name.append(get_metadata(id=sid[i])["name"]) + time.sleep(2) + + toc = pd.DataFrame({"name": name, "id": sid}) + return toc + + def download_data(self, sid: str = None): + data = pd.read_csv(url["fred_econ"] + "id={}".format(sid)) + return data + if __name__ == "__main__": usa = FredData(country="usa") diff --git a/CEDA/economic/NBSCData.py b/CEDA/economic/NBSC.py similarity index 100% rename from CEDA/economic/NBSCData.py rename to CEDA/economic/NBSC.py diff --git a/CEDA/economic/XHData.py b/CEDA/economic/XinHua.py similarity index 100% rename from CEDA/economic/XHData.py rename to CEDA/economic/XinHua.py diff --git a/example/economic.md b/example/economic.md new file mode 100644 index 0000000..1317db7 --- /dev/null +++ b/example/economic.md @@ -0,0 +1,79 @@ +## Fred + +For the ST.Louis Fred data, we mainly focus on the ["Main Economic Indicators" Series](https://fred.stlouisfed.org/tags/series?t=mei) + +```python +from CEDA.economic.Fred import * +usa = FredData(country="usa") +usa_toc = usa.toc() +data = usa.download_data(sid="LFAC24FEUSM647N") +``` + +## Eurostat + +```python +from CEDA.economic.Eurostat import * +eurostat = EurostatData(language="en") +eurostat_toc = eurostat.toc() +GDP_related = eurostat.search_toc(query="GDP") +nama_10_gdp = eurostat.download_data(datasetcode="nama_10_gdp") +tet00004 = eurostat.download_data(datasetcode="tet00004") +``` + +## ECB + +```python +from CEDA.economic.ECB import * +ecb = ECBData() +ecb_toc = ecb.toc() +AME = ecb.download_data(datasetname="AME") +``` + +## OECD + +```python +from CEDA.economic.OECD import * +oecd = OECDData() +oecd_toc = oecd.toc() +oecd_tos = oecd.tos(dataset="QNA") +data = oecd.download_data(dataset="QNA", query="QNA/CAN.B1_GE.CQRSA.Q") +``` + +## NBSC + +```python +from CEDA.economic.NBSC import * +nbsc = NBSCData(language="en") +nbsc_nodes = nbsc.tree_generation() +nbsc_toc = nbsc.toc(nodes=nbsc_nodes) +nbsc_toc[nbsc_toc["name"].str.contains("GDP")] +A0203 = nbsc.download_data(nid="A0203") +``` + +## Xinhua +```python +from CEDA.economic.XinHua import * +xhdata = XHData() +toc = xhdata.toc() +data = xhdata.download_data(iid=12006) # GDP +``` + +## BOJ + +```python +from CEDA.economic.BOJ import * +boj = BOJData() +boj_toc = boj.toc() +survey = boj.download_data("Survey") +``` + +## EPU + +```python +from CEDA.economic.EPU import * +country_list, annotations = country_list() +can_epu = EPUData(country="Canada") +mainland_china_epu = EPUData(country="China") +can_data, can_reference = can_epu.download() +mainland_china_data, cn_reference = mainland_china_epu.download() +``` \ No newline at end of file diff --git a/example/market.md b/example/market.md new file mode 100644 index 0000000..e69de29