add example
This commit is contained in:
parent
c584206693
commit
0a2df84927
|
@ -0,0 +1,33 @@
|
||||||
|
import requests
|
||||||
|
import pandas as pd
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
url = {
|
||||||
|
"ECB": "https://sdw.ecb.europa.eu/",
|
||||||
|
"ECB-API": "http://sdw-wsrest.ecb.europa.eu/service/data/"
|
||||||
|
}
|
||||||
|
|
||||||
|
class ECBData(object):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def toc(self):
|
||||||
|
r = requests.get(url["ECB"] + "browse.do?node=9689727")
|
||||||
|
dataset_list = BeautifulSoup(r.text, "html.parser").find_all("div", {"id": "currentMaximizeNode0"})
|
||||||
|
uls = dataset_list[0].find_all("ul")
|
||||||
|
lis = [li for ul in uls for li in ul.find_all("li")]
|
||||||
|
li_text = [li.text.strip() for li in lis]
|
||||||
|
name, metadata = [], []
|
||||||
|
for i in range(0, len(li_text)):
|
||||||
|
name.append(li_text[i].split("-")[0])
|
||||||
|
metadata.append(li_text[i].split("-")[1])
|
||||||
|
|
||||||
|
li_urls = [url["ECB"] + li.a.get("href") for li in lis]
|
||||||
|
toc = pd.DataFrame({"name": name, "metadata":metadata, "url":li_urls})
|
||||||
|
return toc
|
||||||
|
|
||||||
|
|
||||||
|
def download_data(self, datasetname:str=None):
|
||||||
|
tmp_url = url["ECB-API"] + "{}?format=csvdata".format(datasetname)
|
||||||
|
data = pd.read_csv(tmp_url)
|
||||||
|
return data
|
|
@ -1,13 +0,0 @@
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
url = {
|
|
||||||
"ECB": "http://sdw-wsrest.ecb.europa.eu/service/data/"
|
|
||||||
}
|
|
||||||
|
|
||||||
class ECBData(object):
|
|
||||||
def __init__(self) -> None:
|
|
||||||
pass
|
|
||||||
def get_data(dataset:str=None):
|
|
||||||
tmp_url = url["ECB"] + "{}?format=csvdata".format(dataset)
|
|
||||||
data = pd.read_csv(tmp_url)
|
|
||||||
return data
|
|
|
@ -9,12 +9,7 @@ url = {
|
||||||
"EPU": "https://www.policyuncertainty.com/"
|
"EPU": "https://www.policyuncertainty.com/"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def country_list():
|
||||||
class EPUData(object):
|
|
||||||
def __init__(self, country:str=None):
|
|
||||||
self.country = country
|
|
||||||
|
|
||||||
def country_list(self):
|
|
||||||
country_list = ["Global", "USA", "Australia", "Belgium",
|
country_list = ["Global", "USA", "Australia", "Belgium",
|
||||||
"Brazil", "Canada", "Chile", "China",
|
"Brazil", "Canada", "Chile", "China",
|
||||||
"Colombia", "Croatia", "Denmark", "France",
|
"Colombia", "Croatia", "Denmark", "France",
|
||||||
|
@ -25,6 +20,10 @@ class EPUData(object):
|
||||||
annotations = "Disambiguation: the word 'Korea' in here stands for 'South Korea'"
|
annotations = "Disambiguation: the word 'Korea' in here stands for 'South Korea'"
|
||||||
return country_list, annotations
|
return country_list, annotations
|
||||||
|
|
||||||
|
class EPUData(object):
|
||||||
|
def __init__(self, country:str=None):
|
||||||
|
self.country = country
|
||||||
|
|
||||||
def download(self):
|
def download(self):
|
||||||
if self.country == "China":
|
if self.country == "China":
|
||||||
r = requests.get(url["EPU-China"])
|
r = requests.get(url["EPU-China"])
|
||||||
|
|
|
@ -50,24 +50,24 @@ class EurostatData(object):
|
||||||
if geo != None and unit != None:
|
if geo != None and unit != None:
|
||||||
data = data.loc[(data["geo"] == geo) & (data["unit"] == unit)]
|
data = data.loc[(data["geo"] == geo) & (data["unit"] == unit)]
|
||||||
for i in range(4, len(list(data.columns))):
|
for i in range(4, len(list(data.columns))):
|
||||||
data[data.columns[i]] = data[data.columns[i]].str.extract(r'(\d+.\d+)').astype("float")
|
data[data.columns[i]] = data[data.columns[i]].astype(str).str.extract(r'(\d+.\d+)').astype("float")
|
||||||
return data
|
return data
|
||||||
|
|
||||||
elif geo != None and unit == None:
|
elif geo != None and unit == None:
|
||||||
data = data.loc[(data["geo"] == geo)]
|
data = data.loc[(data["geo"] == geo)]
|
||||||
for i in range(4, len(list(data.columns))):
|
for i in range(4, len(list(data.columns))):
|
||||||
data[data.columns[i]] = data[data.columns[i]].str.extract(r'(\d+.\d+)').astype("float")
|
data[data.columns[i]] = data[data.columns[i]].astype(str).str.extract(r'(\d+.\d+)').astype("float")
|
||||||
return data
|
return data
|
||||||
|
|
||||||
elif geo == None and unit != None:
|
elif geo == None and unit != None:
|
||||||
data = data.loc[(data["geo"] == geo)]
|
data = data.loc[(data["geo"] == geo)]
|
||||||
for i in range(4, len(list(data.columns))):
|
for i in range(4, len(list(data.columns))):
|
||||||
data[data.columns[i]] = data[data.columns[i]].str.extract(r'(\d+.\d+)').astype("float")
|
data[data.columns[i]] = data[data.columns[i]].astype(str).str.extract(r'(\d+.\d+)').astype("float")
|
||||||
return data
|
return data
|
||||||
|
|
||||||
elif geo == None and unit == None:
|
elif geo == None and unit == None:
|
||||||
for i in range(4, len(list(data.columns))):
|
for i in range(4, len(list(data.columns))):
|
||||||
data[data.columns[i]] = data[data.columns[i]].str.extract(r'(\d+.\d+)').astype("float")
|
data[data.columns[i]] = data[data.columns[i]].astype(str).str.extract(r'(\d+.\d+)').astype("float")
|
||||||
return data
|
return data
|
||||||
|
|
||||||
def download_dic(self, category:str=None):
|
def download_dic(self, category:str=None):
|
||||||
|
@ -77,7 +77,7 @@ class EurostatData(object):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
eu = EurostatData(language="en", version=2.1)
|
eu = EurostatData(language="en")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,8 @@ ssl._create_default_https_context = ssl._create_unverified_context
|
||||||
url = {
|
url = {
|
||||||
"fred_econ": "https://fred.stlouisfed.org/graph/fredgraph.csv?",
|
"fred_econ": "https://fred.stlouisfed.org/graph/fredgraph.csv?",
|
||||||
"fred_series": "https://fred.stlouisfed.org/series/",
|
"fred_series": "https://fred.stlouisfed.org/series/",
|
||||||
"philfed": "https://www.philadelphiafed.org/surveys-and-data/real-time-data-research/",
|
"philfed":
|
||||||
|
"https://www.philadelphiafed.org/surveys-and-data/real-time-data-research/",
|
||||||
"chicagofed": "https://www.chicagofed.org/~/media/publications/",
|
"chicagofed": "https://www.chicagofed.org/~/media/publications/",
|
||||||
"OECD": "https://stats.oecd.org/sdmx-json/data/DP_LIVE/"
|
"OECD": "https://stats.oecd.org/sdmx-json/data/DP_LIVE/"
|
||||||
}
|
}
|
||||||
|
@ -41,12 +42,29 @@ def get_metadata(id: str = None) -> dict:
|
||||||
tmp_url = url["fred_series"] + id
|
tmp_url = url["fred_series"] + id
|
||||||
r = requests.get(tmp_url)
|
r = requests.get(tmp_url)
|
||||||
metadata = {
|
metadata = {
|
||||||
"name": (" ".join(BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "page-title"})[0].span.text.split())),
|
"name": (" ".join(
|
||||||
"id": id,
|
BeautifulSoup(r.text, "html.parser").find_all(
|
||||||
"update_time": datetime.strftime(dparser.parse(BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "pull-left meta-col"})[0].find_all('span')[3].text, fuzzy=True), format="%Y-%m-%d"),
|
'div', {"class": "page-title"})[0].span.text.split())),
|
||||||
"units": BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "pull-left meta-col"})[1].find_all('span')[0].text.split(" ")[0],
|
"id":
|
||||||
"frequency": BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "pull-left meta-col"})[2].find_all('span')[0].text.split(" ")[1].split(" ")[1],
|
id,
|
||||||
"tags": get_tag(id)
|
"update_time":
|
||||||
|
datetime.strftime(dparser.parse(
|
||||||
|
BeautifulSoup(r.text, "html.parser").find_all(
|
||||||
|
'div',
|
||||||
|
{"class": "pull-left meta-col"})[0].find_all('span')[3].text,
|
||||||
|
fuzzy=True),
|
||||||
|
format="%Y-%m-%d"),
|
||||||
|
"units":
|
||||||
|
BeautifulSoup(r.text, "html.parser").find_all(
|
||||||
|
'div', {"class": "pull-left meta-col"
|
||||||
|
})[1].find_all('span')[0].text.split(" ")[0],
|
||||||
|
"frequency":
|
||||||
|
BeautifulSoup(r.text,
|
||||||
|
"html.parser").find_all('div',
|
||||||
|
{"class": "pull-left meta-col"})
|
||||||
|
[2].find_all('span')[0].text.split(" ")[1].split(" ")[1],
|
||||||
|
"tags":
|
||||||
|
get_tag(id)
|
||||||
}
|
}
|
||||||
return metadata
|
return metadata
|
||||||
|
|
||||||
|
@ -54,17 +72,20 @@ def get_metadata(id: str = None) -> dict:
|
||||||
def date_transform(df, format_origin, format_after):
|
def date_transform(df, format_origin, format_after):
|
||||||
return_list = []
|
return_list = []
|
||||||
for i in range(0, len(df)):
|
for i in range(0, len(df)):
|
||||||
return_list.append(datetime.strptime(
|
return_list.append(
|
||||||
df[i], format_origin).strftime(format_after))
|
datetime.strptime(df[i], format_origin).strftime(format_after))
|
||||||
return return_list
|
return return_list
|
||||||
|
|
||||||
|
|
||||||
class FredData(object):
|
class FredData(object):
|
||||||
|
|
||||||
def __init__(self, country: str = "usa"):
|
def __init__(self, country: str = "usa"):
|
||||||
self.country = country
|
self.country = country
|
||||||
|
|
||||||
__annotations__ = {"name": "Main Economic Indicators",
|
__annotations__ = {
|
||||||
"url": "https://fred.stlouisfed.org/tags/series?t=mei"}
|
"name": "Main Economic Indicators",
|
||||||
|
"url": "https://fred.stlouisfed.org/tags/series?t=mei"
|
||||||
|
}
|
||||||
|
|
||||||
def get_id(self, url: str) -> list:
|
def get_id(self, url: str) -> list:
|
||||||
id_list = []
|
id_list = []
|
||||||
|
@ -93,6 +114,20 @@ class FredData(object):
|
||||||
id_list = list(set(id_list))
|
id_list = list(set(id_list))
|
||||||
return id_list
|
return id_list
|
||||||
|
|
||||||
|
def toc(self):
|
||||||
|
sid = self.extract_id()
|
||||||
|
name = []
|
||||||
|
for i in range(0, len(sid)):
|
||||||
|
name.append(get_metadata(id=sid[i])["name"])
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
toc = pd.DataFrame({"name": name, "id": sid})
|
||||||
|
return toc
|
||||||
|
|
||||||
|
def download_data(self, sid: str = None):
|
||||||
|
data = pd.read_csv(url["fred_econ"] + "id={}".format(sid))
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
usa = FredData(country="usa")
|
usa = FredData(country="usa")
|
|
@ -0,0 +1,79 @@
|
||||||
|
## Fred
|
||||||
|
|
||||||
|
For the ST.Louis Fred data, we mainly focus on the ["Main Economic Indicators" Series](https://fred.stlouisfed.org/tags/series?t=mei)
|
||||||
|
|
||||||
|
```python
|
||||||
|
from CEDA.economic.Fred import *
|
||||||
|
usa = FredData(country="usa")
|
||||||
|
usa_toc = usa.toc()
|
||||||
|
data = usa.download_data(sid="LFAC24FEUSM647N")
|
||||||
|
```
|
||||||
|
|
||||||
|
## Eurostat
|
||||||
|
|
||||||
|
```python
|
||||||
|
from CEDA.economic.Eurostat import *
|
||||||
|
eurostat = EurostatData(language="en")
|
||||||
|
eurostat_toc = eurostat.toc()
|
||||||
|
GDP_related = eurostat.search_toc(query="GDP")
|
||||||
|
nama_10_gdp = eurostat.download_data(datasetcode="nama_10_gdp")
|
||||||
|
tet00004 = eurostat.download_data(datasetcode="tet00004")
|
||||||
|
```
|
||||||
|
|
||||||
|
## ECB
|
||||||
|
|
||||||
|
```python
|
||||||
|
from CEDA.economic.ECB import *
|
||||||
|
ecb = ECBData()
|
||||||
|
ecb_toc = ecb.toc()
|
||||||
|
AME = ecb.download_data(datasetname="AME")
|
||||||
|
```
|
||||||
|
|
||||||
|
## OECD
|
||||||
|
|
||||||
|
```python
|
||||||
|
from CEDA.economic.OECD import *
|
||||||
|
oecd = OECDData()
|
||||||
|
oecd_toc = oecd.toc()
|
||||||
|
oecd_tos = oecd.tos(dataset="QNA")
|
||||||
|
data = oecd.download_data(dataset="QNA", query="QNA/CAN.B1_GE.CQRSA.Q")
|
||||||
|
```
|
||||||
|
|
||||||
|
## NBSC
|
||||||
|
|
||||||
|
```python
|
||||||
|
from CEDA.economic.NBSC import *
|
||||||
|
nbsc = NBSCData(language="en")
|
||||||
|
nbsc_nodes = nbsc.tree_generation()
|
||||||
|
nbsc_toc = nbsc.toc(nodes=nbsc_nodes)
|
||||||
|
nbsc_toc[nbsc_toc["name"].str.contains("GDP")]
|
||||||
|
A0203 = nbsc.download_data(nid="A0203")
|
||||||
|
```
|
||||||
|
|
||||||
|
## Xinhua
|
||||||
|
```python
|
||||||
|
from CEDA.economic.XinHua import *
|
||||||
|
xhdata = XHData()
|
||||||
|
toc = xhdata.toc()
|
||||||
|
data = xhdata.download_data(iid=12006) # GDP
|
||||||
|
```
|
||||||
|
|
||||||
|
## BOJ
|
||||||
|
|
||||||
|
```python
|
||||||
|
from CEDA.economic.BOJ import *
|
||||||
|
boj = BOJData()
|
||||||
|
boj_toc = boj.toc()
|
||||||
|
survey = boj.download_data("Survey")
|
||||||
|
```
|
||||||
|
|
||||||
|
## EPU
|
||||||
|
|
||||||
|
```python
|
||||||
|
from CEDA.economic.EPU import *
|
||||||
|
country_list, annotations = country_list()
|
||||||
|
can_epu = EPUData(country="Canada")
|
||||||
|
mainland_china_epu = EPUData(country="China")
|
||||||
|
can_data, can_reference = can_epu.download()
|
||||||
|
mainland_china_data, cn_reference = mainland_china_epu.download()
|
||||||
|
```
|
Loading…
Reference in New Issue