add example
This commit is contained in:
parent
c584206693
commit
0a2df84927
|
@ -0,0 +1,33 @@
|
|||
import requests
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
url = {
|
||||
"ECB": "https://sdw.ecb.europa.eu/",
|
||||
"ECB-API": "http://sdw-wsrest.ecb.europa.eu/service/data/"
|
||||
}
|
||||
|
||||
class ECBData(object):
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def toc(self):
|
||||
r = requests.get(url["ECB"] + "browse.do?node=9689727")
|
||||
dataset_list = BeautifulSoup(r.text, "html.parser").find_all("div", {"id": "currentMaximizeNode0"})
|
||||
uls = dataset_list[0].find_all("ul")
|
||||
lis = [li for ul in uls for li in ul.find_all("li")]
|
||||
li_text = [li.text.strip() for li in lis]
|
||||
name, metadata = [], []
|
||||
for i in range(0, len(li_text)):
|
||||
name.append(li_text[i].split("-")[0])
|
||||
metadata.append(li_text[i].split("-")[1])
|
||||
|
||||
li_urls = [url["ECB"] + li.a.get("href") for li in lis]
|
||||
toc = pd.DataFrame({"name": name, "metadata":metadata, "url":li_urls})
|
||||
return toc
|
||||
|
||||
|
||||
def download_data(self, datasetname:str=None):
|
||||
tmp_url = url["ECB-API"] + "{}?format=csvdata".format(datasetname)
|
||||
data = pd.read_csv(tmp_url)
|
||||
return data
|
|
@ -1,13 +0,0 @@
|
|||
import pandas as pd
|
||||
|
||||
url = {
|
||||
"ECB": "http://sdw-wsrest.ecb.europa.eu/service/data/"
|
||||
}
|
||||
|
||||
class ECBData(object):
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
def get_data(dataset:str=None):
|
||||
tmp_url = url["ECB"] + "{}?format=csvdata".format(dataset)
|
||||
data = pd.read_csv(tmp_url)
|
||||
return data
|
|
@ -9,22 +9,21 @@ url = {
|
|||
"EPU": "https://www.policyuncertainty.com/"
|
||||
}
|
||||
|
||||
def country_list():
|
||||
country_list = ["Global", "USA", "Australia", "Belgium",
|
||||
"Brazil", "Canada", "Chile", "China",
|
||||
"Colombia", "Croatia", "Denmark", "France",
|
||||
"Germany", "Greece", "HKSAR", "MACAUSAR",
|
||||
"India", "Ireland", "Italy", "Japan",
|
||||
"Korea", "Mexico", "Netherlands", "Pakistan",
|
||||
"Russia", "Singapore", "Spain", "Sweden", "UK"]
|
||||
annotations = "Disambiguation: the word 'Korea' in here stands for 'South Korea'"
|
||||
return country_list, annotations
|
||||
|
||||
class EPUData(object):
|
||||
def __init__(self, country:str=None):
|
||||
self.country = country
|
||||
|
||||
def country_list(self):
|
||||
country_list = ["Global", "USA", "Australia", "Belgium",
|
||||
"Brazil", "Canada", "Chile", "China",
|
||||
"Colombia", "Croatia", "Denmark", "France",
|
||||
"Germany", "Greece", "HKSAR", "MACAUSAR",
|
||||
"India", "Ireland", "Italy", "Japan",
|
||||
"Korea", "Mexico", "Netherlands", "Pakistan",
|
||||
"Russia", "Singapore", "Spain", "Sweden", "UK"]
|
||||
annotations = "Disambiguation: the word 'Korea' in here stands for 'South Korea'"
|
||||
return country_list, annotations
|
||||
|
||||
def download(self):
|
||||
if self.country == "China":
|
||||
r = requests.get(url["EPU-China"])
|
||||
|
|
|
@ -50,24 +50,24 @@ class EurostatData(object):
|
|||
if geo != None and unit != None:
|
||||
data = data.loc[(data["geo"] == geo) & (data["unit"] == unit)]
|
||||
for i in range(4, len(list(data.columns))):
|
||||
data[data.columns[i]] = data[data.columns[i]].str.extract(r'(\d+.\d+)').astype("float")
|
||||
data[data.columns[i]] = data[data.columns[i]].astype(str).str.extract(r'(\d+.\d+)').astype("float")
|
||||
return data
|
||||
|
||||
elif geo != None and unit == None:
|
||||
data = data.loc[(data["geo"] == geo)]
|
||||
for i in range(4, len(list(data.columns))):
|
||||
data[data.columns[i]] = data[data.columns[i]].str.extract(r'(\d+.\d+)').astype("float")
|
||||
data[data.columns[i]] = data[data.columns[i]].astype(str).str.extract(r'(\d+.\d+)').astype("float")
|
||||
return data
|
||||
|
||||
elif geo == None and unit != None:
|
||||
data = data.loc[(data["geo"] == geo)]
|
||||
for i in range(4, len(list(data.columns))):
|
||||
data[data.columns[i]] = data[data.columns[i]].str.extract(r'(\d+.\d+)').astype("float")
|
||||
data[data.columns[i]] = data[data.columns[i]].astype(str).str.extract(r'(\d+.\d+)').astype("float")
|
||||
return data
|
||||
|
||||
elif geo == None and unit == None:
|
||||
for i in range(4, len(list(data.columns))):
|
||||
data[data.columns[i]] = data[data.columns[i]].str.extract(r'(\d+.\d+)').astype("float")
|
||||
data[data.columns[i]] = data[data.columns[i]].astype(str).str.extract(r'(\d+.\d+)').astype("float")
|
||||
return data
|
||||
|
||||
def download_dic(self, category:str=None):
|
||||
|
@ -77,7 +77,7 @@ class EurostatData(object):
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
eu = EurostatData(language="en", version=2.1)
|
||||
eu = EurostatData(language="en")
|
||||
|
||||
|
||||
|
|
@ -18,7 +18,8 @@ ssl._create_default_https_context = ssl._create_unverified_context
|
|||
url = {
|
||||
"fred_econ": "https://fred.stlouisfed.org/graph/fredgraph.csv?",
|
||||
"fred_series": "https://fred.stlouisfed.org/series/",
|
||||
"philfed": "https://www.philadelphiafed.org/surveys-and-data/real-time-data-research/",
|
||||
"philfed":
|
||||
"https://www.philadelphiafed.org/surveys-and-data/real-time-data-research/",
|
||||
"chicagofed": "https://www.chicagofed.org/~/media/publications/",
|
||||
"OECD": "https://stats.oecd.org/sdmx-json/data/DP_LIVE/"
|
||||
}
|
||||
|
@ -41,12 +42,29 @@ def get_metadata(id: str = None) -> dict:
|
|||
tmp_url = url["fred_series"] + id
|
||||
r = requests.get(tmp_url)
|
||||
metadata = {
|
||||
"name": (" ".join(BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "page-title"})[0].span.text.split())),
|
||||
"id": id,
|
||||
"update_time": datetime.strftime(dparser.parse(BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "pull-left meta-col"})[0].find_all('span')[3].text, fuzzy=True), format="%Y-%m-%d"),
|
||||
"units": BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "pull-left meta-col"})[1].find_all('span')[0].text.split(" ")[0],
|
||||
"frequency": BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "pull-left meta-col"})[2].find_all('span')[0].text.split(" ")[1].split(" ")[1],
|
||||
"tags": get_tag(id)
|
||||
"name": (" ".join(
|
||||
BeautifulSoup(r.text, "html.parser").find_all(
|
||||
'div', {"class": "page-title"})[0].span.text.split())),
|
||||
"id":
|
||||
id,
|
||||
"update_time":
|
||||
datetime.strftime(dparser.parse(
|
||||
BeautifulSoup(r.text, "html.parser").find_all(
|
||||
'div',
|
||||
{"class": "pull-left meta-col"})[0].find_all('span')[3].text,
|
||||
fuzzy=True),
|
||||
format="%Y-%m-%d"),
|
||||
"units":
|
||||
BeautifulSoup(r.text, "html.parser").find_all(
|
||||
'div', {"class": "pull-left meta-col"
|
||||
})[1].find_all('span')[0].text.split(" ")[0],
|
||||
"frequency":
|
||||
BeautifulSoup(r.text,
|
||||
"html.parser").find_all('div',
|
||||
{"class": "pull-left meta-col"})
|
||||
[2].find_all('span')[0].text.split(" ")[1].split(" ")[1],
|
||||
"tags":
|
||||
get_tag(id)
|
||||
}
|
||||
return metadata
|
||||
|
||||
|
@ -54,17 +72,20 @@ def get_metadata(id: str = None) -> dict:
|
|||
def date_transform(df, format_origin, format_after):
|
||||
return_list = []
|
||||
for i in range(0, len(df)):
|
||||
return_list.append(datetime.strptime(
|
||||
df[i], format_origin).strftime(format_after))
|
||||
return_list.append(
|
||||
datetime.strptime(df[i], format_origin).strftime(format_after))
|
||||
return return_list
|
||||
|
||||
|
||||
class FredData(object):
|
||||
|
||||
def __init__(self, country: str = "usa"):
|
||||
self.country = country
|
||||
|
||||
__annotations__ = {"name": "Main Economic Indicators",
|
||||
"url": "https://fred.stlouisfed.org/tags/series?t=mei"}
|
||||
__annotations__ = {
|
||||
"name": "Main Economic Indicators",
|
||||
"url": "https://fred.stlouisfed.org/tags/series?t=mei"
|
||||
}
|
||||
|
||||
def get_id(self, url: str) -> list:
|
||||
id_list = []
|
||||
|
@ -93,6 +114,20 @@ class FredData(object):
|
|||
id_list = list(set(id_list))
|
||||
return id_list
|
||||
|
||||
def toc(self):
|
||||
sid = self.extract_id()
|
||||
name = []
|
||||
for i in range(0, len(sid)):
|
||||
name.append(get_metadata(id=sid[i])["name"])
|
||||
time.sleep(2)
|
||||
|
||||
toc = pd.DataFrame({"name": name, "id": sid})
|
||||
return toc
|
||||
|
||||
def download_data(self, sid: str = None):
|
||||
data = pd.read_csv(url["fred_econ"] + "id={}".format(sid))
|
||||
return data
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
usa = FredData(country="usa")
|
|
@ -0,0 +1,79 @@
|
|||
## Fred
|
||||
|
||||
For the ST.Louis Fred data, we mainly focus on the ["Main Economic Indicators" Series](https://fred.stlouisfed.org/tags/series?t=mei)
|
||||
|
||||
```python
|
||||
from CEDA.economic.Fred import *
|
||||
usa = FredData(country="usa")
|
||||
usa_toc = usa.toc()
|
||||
data = usa.download_data(sid="LFAC24FEUSM647N")
|
||||
```
|
||||
|
||||
## Eurostat
|
||||
|
||||
```python
|
||||
from CEDA.economic.Eurostat import *
|
||||
eurostat = EurostatData(language="en")
|
||||
eurostat_toc = eurostat.toc()
|
||||
GDP_related = eurostat.search_toc(query="GDP")
|
||||
nama_10_gdp = eurostat.download_data(datasetcode="nama_10_gdp")
|
||||
tet00004 = eurostat.download_data(datasetcode="tet00004")
|
||||
```
|
||||
|
||||
## ECB
|
||||
|
||||
```python
|
||||
from CEDA.economic.ECB import *
|
||||
ecb = ECBData()
|
||||
ecb_toc = ecb.toc()
|
||||
AME = ecb.download_data(datasetname="AME")
|
||||
```
|
||||
|
||||
## OECD
|
||||
|
||||
```python
|
||||
from CEDA.economic.OECD import *
|
||||
oecd = OECDData()
|
||||
oecd_toc = oecd.toc()
|
||||
oecd_tos = oecd.tos(dataset="QNA")
|
||||
data = oecd.download_data(dataset="QNA", query="QNA/CAN.B1_GE.CQRSA.Q")
|
||||
```
|
||||
|
||||
## NBSC
|
||||
|
||||
```python
|
||||
from CEDA.economic.NBSC import *
|
||||
nbsc = NBSCData(language="en")
|
||||
nbsc_nodes = nbsc.tree_generation()
|
||||
nbsc_toc = nbsc.toc(nodes=nbsc_nodes)
|
||||
nbsc_toc[nbsc_toc["name"].str.contains("GDP")]
|
||||
A0203 = nbsc.download_data(nid="A0203")
|
||||
```
|
||||
|
||||
## Xinhua
|
||||
```python
|
||||
from CEDA.economic.XinHua import *
|
||||
xhdata = XHData()
|
||||
toc = xhdata.toc()
|
||||
data = xhdata.download_data(iid=12006) # GDP
|
||||
```
|
||||
|
||||
## BOJ
|
||||
|
||||
```python
|
||||
from CEDA.economic.BOJ import *
|
||||
boj = BOJData()
|
||||
boj_toc = boj.toc()
|
||||
survey = boj.download_data("Survey")
|
||||
```
|
||||
|
||||
## EPU
|
||||
|
||||
```python
|
||||
from CEDA.economic.EPU import *
|
||||
country_list, annotations = country_list()
|
||||
can_epu = EPUData(country="Canada")
|
||||
mainland_china_epu = EPUData(country="China")
|
||||
can_data, can_reference = can_epu.download()
|
||||
mainland_china_data, cn_reference = mainland_china_epu.download()
|
||||
```
|
Loading…
Reference in New Issue