commit
b3b8c60934
|
@ -5,6 +5,9 @@
|
|||
### Linux ###
|
||||
*~
|
||||
|
||||
### HOW TO MAKE ###
|
||||
HOWTO.md
|
||||
|
||||
# temporary files which can be created if a process still has a handle open of a deleted file
|
||||
.fuse_hidden*
|
||||
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
{
|
||||
"python.formatting.provider": "yapf"
|
||||
}
|
|
@ -11,5 +11,5 @@ if sys.version_info < (3, 6):
|
|||
del sys
|
||||
|
||||
from CEDA import *
|
||||
from CEDA import macroecon
|
||||
from CEDA import economic
|
||||
from CEDA import market
|
|
@ -0,0 +1,52 @@
|
|||
import requests
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
url = {
|
||||
"BOJ": "https://www.stat-search.boj.or.jp"
|
||||
}
|
||||
|
||||
class BOJData(object):
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def toc(self):
|
||||
tmp_url = url["BOJ"] + "/index_en.html"
|
||||
r = requests.get(tmp_url)
|
||||
main_statistics_table = BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "clearfix"})[1]
|
||||
uls = main_statistics_table.find_all("ul")
|
||||
lis = [li for ul in uls for li in ul.find_all("li", {"class": "icoSimpleRightArrowForMainTime-series mainTimeSeriesName"})]
|
||||
li_text = [li.text.strip() for li in lis]
|
||||
li_urls = [url["BOJ"] + li.a.get("href") for li in lis]
|
||||
toc = pd.DataFrame({"title": li_text, "url":li_urls})
|
||||
return toc
|
||||
|
||||
def _download(self, down_url:str=None):
|
||||
r = requests.get(down_url)
|
||||
table = BeautifulSoup(r.text, "html.parser").find_all("table")
|
||||
data = pd.read_html(str(table))[0]
|
||||
header = ["time"] + list(data.loc[0][1:])
|
||||
data.columns = header
|
||||
data = data[1:]
|
||||
return data
|
||||
|
||||
def download_data(self, query:str=None):
|
||||
toc = self.toc()
|
||||
if query == None:
|
||||
return ValueError("rex is invalid.")
|
||||
else:
|
||||
data = toc[toc["title"].str.contains(query)].reset_index(drop=True)
|
||||
if data.empty:
|
||||
return ValueError("No related dataset, check the query again")
|
||||
else:
|
||||
output = []
|
||||
for i in range(0, len(data)):
|
||||
output.append(self._download(down_url=data.loc[i]["url"]))
|
||||
|
||||
return output
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
import requests
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
url = {
|
||||
"ECB": "https://sdw.ecb.europa.eu/",
|
||||
"ECB-API": "http://sdw-wsrest.ecb.europa.eu/service/data/"
|
||||
}
|
||||
|
||||
class ECBData(object):
|
||||
def __init__(self) -> None:
|
||||
pass
|
||||
|
||||
def toc(self):
|
||||
r = requests.get(url["ECB"] + "browse.do?node=9689727")
|
||||
dataset_list = BeautifulSoup(r.text, "html.parser").find_all("div", {"id": "currentMaximizeNode0"})
|
||||
uls = dataset_list[0].find_all("ul")
|
||||
lis = [li for ul in uls for li in ul.find_all("li")]
|
||||
li_text = [li.text.strip() for li in lis]
|
||||
name, metadata = [], []
|
||||
for i in range(0, len(li_text)):
|
||||
name.append(li_text[i].split("-")[0])
|
||||
metadata.append(li_text[i].split("-")[1])
|
||||
|
||||
li_urls = [url["ECB"] + li.a.get("href") for li in lis]
|
||||
toc = pd.DataFrame({"name": name, "metadata":metadata, "url":li_urls})
|
||||
return toc
|
||||
|
||||
|
||||
def download_data(self, datasetname:str=None):
|
||||
tmp_url = url["ECB-API"] + "{}?format=csvdata".format(datasetname)
|
||||
data = pd.read_csv(tmp_url)
|
||||
return data
|
|
@ -0,0 +1,82 @@
|
|||
from lxml import html
|
||||
import requests
|
||||
import pandas as pd
|
||||
|
||||
url = {
|
||||
"EPU-China": "https://economicpolicyuncertaintyinchina.weebly.com",
|
||||
"EPU-HKSAR": "https://economicpolicyuncertaintyinchina.weebly.com/epu-in-hong-kong.html",
|
||||
"EPU-MACAUSAR": "https://economicpolicyuncertaintyinchina.weebly.com/epu-in-macao.html",
|
||||
"EPU": "https://www.policyuncertainty.com/"
|
||||
}
|
||||
|
||||
def country_list():
|
||||
country_list = ["Global", "USA", "Australia", "Belgium",
|
||||
"Brazil", "Canada", "Chile", "China",
|
||||
"Colombia", "Croatia", "Denmark", "France",
|
||||
"Germany", "Greece", "HKSAR", "MACAUSAR",
|
||||
"India", "Ireland", "Italy", "Japan",
|
||||
"Korea", "Mexico", "Netherlands", "Pakistan",
|
||||
"Russia", "Singapore", "Spain", "Sweden", "UK"]
|
||||
annotations = "Disambiguation: the word 'Korea' in here stands for 'South Korea'"
|
||||
return country_list, annotations
|
||||
|
||||
class EPUData(object):
|
||||
def __init__(self, country:str=None):
|
||||
self.country = country
|
||||
|
||||
def download(self):
|
||||
if self.country == "China":
|
||||
r = requests.get(url["EPU-China"])
|
||||
webpage = html.fromstring(r.content)
|
||||
urls = pd.Series(webpage.xpath("//a/@href"))
|
||||
urls_data = urls[urls.str.contains("xlsx")]
|
||||
urls_cite = urls[urls.str.contains("pdf")]
|
||||
urls_data = [url["EPU-China"] + i for i in urls_data]
|
||||
urls_cite = [url["EPU-China"] + i for i in urls_cite]
|
||||
output_data = []
|
||||
for i in range(0, len(urls_data)):
|
||||
output_data.append(pd.read_excel(urls_data[i]))
|
||||
|
||||
return {"data":output_data, "reference":urls_cite}
|
||||
|
||||
elif self.country == "HKSAR":
|
||||
r = requests.get(url["EPU-HKSAR"])
|
||||
webpage = html.fromstring(r.content)
|
||||
urls = pd.Series(webpage.xpath("//a/@href"))
|
||||
urls_data = urls[urls.str.contains("xlsx")]
|
||||
urls_cite = urls[urls.str.contains("pdf")]
|
||||
urls_data = [url["EPU-China"] + i for i in urls_data]
|
||||
urls_cite = [url["EPU-China"] + i for i in urls_cite]
|
||||
output_data = []
|
||||
for i in range(0, len(urls_data)):
|
||||
output_data.append(pd.read_excel(urls_data[i]))
|
||||
|
||||
return {"data":output_data, "reference":urls_cite}
|
||||
|
||||
elif self.country == "MACAUSAR":
|
||||
r = requests.get(url["EPU-MACAUSAR"])
|
||||
webpage = html.fromstring(r.content)
|
||||
urls = pd.Series(webpage.xpath("//a/@href"))
|
||||
urls_data = urls[urls.str.contains("xlsx")]
|
||||
urls_cite = urls[urls.str.contains("pdf")]
|
||||
urls_data = [url["EPU-China"] + i for i in urls_data]
|
||||
urls_cite = [url["EPU-China"] + i for i in urls_cite]
|
||||
output_data = []
|
||||
for i in range(0, len(urls_data)):
|
||||
output_data.append(pd.read_excel(urls_data[i]))
|
||||
|
||||
return {"data":output_data, "reference":urls_cite}
|
||||
|
||||
else:
|
||||
r = requests.get(url["EPU"] + self.country.lower() + "_monthly.html")
|
||||
webpage = html.fromstring(r.content)
|
||||
urls = pd.Series(webpage.xpath("//a/@href"))
|
||||
urls_data = urls[urls.str.contains("xlsx")]
|
||||
urls_cite = urls[urls.str.contains("pdf")]
|
||||
urls_data = [url["EPU"] + i for i in urls_data]
|
||||
urls_cite = [url["EPU"] + i for i in urls_cite]
|
||||
output_data = []
|
||||
for i in range(0, len(urls_data)):
|
||||
output_data.append(pd.read_excel(urls_data[i]))
|
||||
|
||||
return {"data":output_data, "reference":urls_cite}
|
|
@ -0,0 +1,93 @@
|
|||
import pandas as pd
|
||||
|
||||
class EurostatData(object):
|
||||
|
||||
"""
|
||||
for more information: https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1&file=BulkDownload_Guidelines.pdf
|
||||
"""
|
||||
|
||||
def __init__(self, language:str="en"):
|
||||
self.language = language
|
||||
self.url = "https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/"
|
||||
self.toc_url = "https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1&file=table_of_contents_{}.txt".format(language)
|
||||
|
||||
__annotations__ = {"name": "eurostat",
|
||||
"url": "https://ec.europa.eu/eurostat"}
|
||||
|
||||
def toc(self) -> pd.DataFrame:
|
||||
"""
|
||||
the return value includes 8 columns:
|
||||
'title'
|
||||
'code'
|
||||
'type'
|
||||
'last update of data'
|
||||
'last table structure change'
|
||||
'data start'
|
||||
'data end'
|
||||
'values'
|
||||
"""
|
||||
toc = pd.read_csv(self.toc_url, sep="\t")
|
||||
return toc
|
||||
|
||||
def search_toc(self, query:str=None):
|
||||
"""
|
||||
fuzzy search in the "title"
|
||||
"""
|
||||
toc = self.toc()
|
||||
if query == None:
|
||||
return ValueError("rex is invalid.")
|
||||
else:
|
||||
return toc[toc["title"].str.contains(query)].reset_index(drop=True)
|
||||
|
||||
def download_data(self, datasetcode:str=None, geo:str=None, unit:str=None):
|
||||
url = self.url + "BulkDownloadListing?sort=1&file=data%2F" + datasetcode + ".tsv.gz"
|
||||
data = pd.read_csv(url, sep = "\t", compression="gzip")
|
||||
data = data.drop(data.columns[0], axis=1).join(data[data.columns[0]].str.split(",", expand=True))
|
||||
columns_list = list(data.columns)[:-3] + ["unit", "na_item", "geo"]
|
||||
data.columns = columns_list
|
||||
columns_list = columns_list[-3:] + columns_list[:-3]
|
||||
data = data[columns_list]
|
||||
if geo != None and unit != None:
|
||||
data = data.loc[(data["geo"] == geo) & (data["unit"] == unit)]
|
||||
for i in range(4, len(list(data.columns))):
|
||||
data[data.columns[i]] = data[data.columns[i]].astype(str).str.extract(r'(\d+.\d+)').astype("float")
|
||||
return data
|
||||
|
||||
elif geo != None and unit == None:
|
||||
data = data.loc[(data["geo"] == geo)]
|
||||
for i in range(4, len(list(data.columns))):
|
||||
data[data.columns[i]] = data[data.columns[i]].astype(str).str.extract(r'(\d+.\d+)').astype("float")
|
||||
return data
|
||||
|
||||
elif geo == None and unit != None:
|
||||
data = data.loc[(data["geo"] == geo)]
|
||||
for i in range(4, len(list(data.columns))):
|
||||
data[data.columns[i]] = data[data.columns[i]].astype(str).str.extract(r'(\d+.\d+)').astype("float")
|
||||
return data
|
||||
|
||||
elif geo == None and unit == None:
|
||||
for i in range(4, len(list(data.columns))):
|
||||
data[data.columns[i]] = data[data.columns[i]].astype(str).str.extract(r'(\d+.\d+)').astype("float")
|
||||
return data
|
||||
|
||||
def download_dic(self, category:str=None):
|
||||
url = self.url + "BulkDownloadListing?sort=1&file=dic%2F{}".format(self.language) + "%2F" + category + "dic"
|
||||
return pd.read_csv(url, sep="\t")
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
eu = EurostatData(language="en")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,140 @@
|
|||
import io
|
||||
import os
|
||||
import ssl
|
||||
import time
|
||||
import json
|
||||
import tqdm
|
||||
import requests
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
from bs4 import BeautifulSoup
|
||||
import dateutil.parser as dparser
|
||||
from fake_useragent import UserAgent
|
||||
|
||||
ssl._create_default_https_context = ssl._create_unverified_context
|
||||
|
||||
# Main Economic Indicators: https://alfred.stlouisfed.org/release?rid=205
|
||||
url = {
|
||||
"fred_econ": "https://fred.stlouisfed.org/graph/fredgraph.csv?",
|
||||
"fred_series": "https://fred.stlouisfed.org/series/",
|
||||
"philfed":
|
||||
"https://www.philadelphiafed.org/surveys-and-data/real-time-data-research/",
|
||||
"chicagofed": "https://www.chicagofed.org/~/media/publications/",
|
||||
"OECD": "https://stats.oecd.org/sdmx-json/data/DP_LIVE/"
|
||||
}
|
||||
|
||||
|
||||
def get_tag(id: str) -> list:
|
||||
tmp_url = url["fred_series"] + id
|
||||
r = requests.get(tmp_url)
|
||||
tags = []
|
||||
tags_series = BeautifulSoup(r.text, "html.parser").find_all(
|
||||
"div", {"class": "series-tag-cloud"})
|
||||
for i in tqdm.tqdm(range(0, len(tags_series))):
|
||||
subtable = tags_series[i].find_all("a")
|
||||
for j in tqdm.tqdm(range(0, len(subtable)), leave=False):
|
||||
tags.append((" ".join(subtable[j].text.split())))
|
||||
return tags
|
||||
|
||||
|
||||
def get_metadata(id: str = None) -> dict:
|
||||
tmp_url = url["fred_series"] + id
|
||||
r = requests.get(tmp_url)
|
||||
metadata = {
|
||||
"name": (" ".join(
|
||||
BeautifulSoup(r.text, "html.parser").find_all(
|
||||
'div', {"class": "page-title"})[0].span.text.split())),
|
||||
"id":
|
||||
id,
|
||||
"update_time":
|
||||
datetime.strftime(dparser.parse(
|
||||
BeautifulSoup(r.text, "html.parser").find_all(
|
||||
'div',
|
||||
{"class": "pull-left meta-col"})[0].find_all('span')[3].text,
|
||||
fuzzy=True),
|
||||
format="%Y-%m-%d"),
|
||||
"units":
|
||||
BeautifulSoup(r.text, "html.parser").find_all(
|
||||
'div', {"class": "pull-left meta-col"
|
||||
})[1].find_all('span')[0].text.split(" ")[0],
|
||||
"frequency":
|
||||
BeautifulSoup(r.text,
|
||||
"html.parser").find_all('div',
|
||||
{"class": "pull-left meta-col"})
|
||||
[2].find_all('span')[0].text.split(" ")[1].split(" ")[1],
|
||||
"tags":
|
||||
get_tag(id)
|
||||
}
|
||||
return metadata
|
||||
|
||||
|
||||
def date_transform(df, format_origin, format_after):
|
||||
return_list = []
|
||||
for i in range(0, len(df)):
|
||||
return_list.append(
|
||||
datetime.strptime(df[i], format_origin).strftime(format_after))
|
||||
return return_list
|
||||
|
||||
|
||||
class FredData(object):
|
||||
|
||||
def __init__(self, country: str = "usa"):
|
||||
self.country = country
|
||||
|
||||
__annotations__ = {
|
||||
"name": "Main Economic Indicators",
|
||||
"url": "https://fred.stlouisfed.org/tags/series?t=mei"
|
||||
}
|
||||
|
||||
def get_id(self, url: str) -> list:
|
||||
id_list = []
|
||||
r = requests.get(url)
|
||||
table = BeautifulSoup(r.text, "html.parser").find_all("table")
|
||||
for i in range(0, len(table)):
|
||||
subtable = table[i].find_all("a")
|
||||
for j in range(0, len(subtable)):
|
||||
id_list.append(subtable[j].get("href").split("/")[-1])
|
||||
return id_list
|
||||
|
||||
def extract_id(self):
|
||||
id_list = []
|
||||
for i in tqdm.tqdm(range(1, 100)):
|
||||
tmp_url = "https://fred.stlouisfed.org/tags/series?ob=pv&od=desc&t=mei%3B{}&pageID={}".format(
|
||||
self.country, str(i))
|
||||
id_list.append(self.get_id(tmp_url))
|
||||
if i > 20:
|
||||
r = requests.get(tmp_url)
|
||||
if "No series" in r.text:
|
||||
break
|
||||
else:
|
||||
continue
|
||||
|
||||
id_list = [item for sublist in id_list for item in sublist]
|
||||
id_list = list(set(id_list))
|
||||
return id_list
|
||||
|
||||
def toc(self):
|
||||
sid = self.extract_id()
|
||||
name = []
|
||||
for i in range(0, len(sid)):
|
||||
name.append(get_metadata(id=sid[i])["name"])
|
||||
time.sleep(2)
|
||||
|
||||
toc = pd.DataFrame({"name": name, "id": sid})
|
||||
return toc
|
||||
|
||||
def download_data(self, sid: str = None):
|
||||
data = pd.read_csv(url["fred_econ"] + "id={}".format(sid))
|
||||
return data
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
usa = FredData(country="usa")
|
||||
usa_list = usa.extract_id()
|
||||
china = FredData(country="china")
|
||||
china_list = china.extract_id()
|
||||
japan = FredData(country="japan")
|
||||
japan_list = japan.extract_id()
|
||||
eu = FredData(country="eu")
|
||||
eu_list = eu.extract_id()
|
|
@ -0,0 +1,125 @@
|
|||
"""
|
||||
NBSC's api information:
|
||||
|
||||
url: "https://data.stats.gov.cn/english/easyquery.htm"
|
||||
params: id=zb&dbcode=hgnd&wdcode=ab&m=getTree
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
import pickle
|
||||
import random
|
||||
import requests
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
from fake_useragent import UserAgent
|
||||
from requests.packages.urllib3.exceptions import InsecureRequestWarning
|
||||
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
|
||||
|
||||
|
||||
class NBSCData(object):
|
||||
def __init__(self, language:str="en"):
|
||||
|
||||
self.dbcode = []
|
||||
self.nid = []
|
||||
self.pid = []
|
||||
self.name = []
|
||||
self.wdcode= []
|
||||
|
||||
if language == "cn":
|
||||
self.url = "https://data.stats.gov.cn/easyquery.htm"
|
||||
self.BASE_DIR = os.path.dirname(__file__)
|
||||
self.__TREE_PATH__ = os.path.join(self.BASE_DIR, "NBSCTree", "data.pkl")
|
||||
elif language == "en":
|
||||
self.url = "https://data.stats.gov.cn/english/easyquery.htm"
|
||||
self.BASE_DIR = os.path.dirname(__file__)
|
||||
self.__TREE_PATH__ = os.path.join(self.BASE_DIR, "NBSCTree", "data_en.pkl")
|
||||
|
||||
def generate_header(self):
|
||||
ua = UserAgent()
|
||||
header = {'User-Agent':str(ua.chrome)}
|
||||
return header
|
||||
|
||||
|
||||
def tree_generation(self, rid="zb", mode:str="fast"):
|
||||
"""
|
||||
inspired by a blog: https://www.cnblogs.com/wang_yb/p/14636575.html
|
||||
"""
|
||||
parent = []
|
||||
r = requests.post("{}?id={}&dbcode=hgnd&wdcode=zb&m=getTree".format(self.url, rid), headers=self.generate_header(), verify=False)
|
||||
data = r.json()
|
||||
|
||||
for i in range(0, len(data)):
|
||||
node = data[i]
|
||||
print("[+] Downloading {} ...".format(node["name"]))
|
||||
if node["isParent"]:
|
||||
parent.append(node["id"])
|
||||
node["children"] = self.tree_generation(rid=node["id"])
|
||||
if i % 100 == 0:
|
||||
print("[-] Due to the web scraping policy, sleep for 2 seconds")
|
||||
time.sleep(1)
|
||||
if mode == "slow":
|
||||
if i % 1000 == 0:
|
||||
print("[-] Due to the web scraping policy, sleep for 10 seconds")
|
||||
time.sleep(10)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def toc(self, nodes):
|
||||
"""
|
||||
inspired by a blog: https://www.cnblogs.com/wang_yb/p/14636575.html
|
||||
"""
|
||||
for i in range(0, len(nodes)):
|
||||
node = nodes[i]
|
||||
if node["isParent"]:
|
||||
self.toc(node["children"])
|
||||
else:
|
||||
self.dbcode.append(node["dbcode"])
|
||||
self.nid.append(node["id"])
|
||||
self.name.append(node["name"])
|
||||
self.pid.append(node["pid"])
|
||||
self.wdcode.append(node["wdcode"])
|
||||
|
||||
data = pd.DataFrame({"dbcode":self.dbcode, "nid":self.nid,
|
||||
"name":self.name, "pid":self.pid, "wdcode":self.wdcode})
|
||||
return data
|
||||
|
||||
def download_data(self, nid:str=None, sj="1978-"):
|
||||
params = {
|
||||
"dbcode": "hgnd",
|
||||
"rowcode": "zb",
|
||||
"m": "QueryData",
|
||||
"colcode": "sj",
|
||||
"wds": "[]",
|
||||
"dfwds": '[{"wdcode":"zb","valuecode":"'
|
||||
+ nid
|
||||
+ '"},{"wdcode":"sj","valuecode":"'
|
||||
+ sj
|
||||
+ '"}]',
|
||||
}
|
||||
r = requests.get(self.url, params=params, verify=False, headers=self.generate_header())
|
||||
if r.ok:
|
||||
data = r.json()["returndata"]["datanodes"]
|
||||
date, value = [], []
|
||||
for i in range(0, len(data)):
|
||||
date.append(data[i]["wds"][1]["valuecode"])
|
||||
value.append(data[i]["data"]["data"])
|
||||
|
||||
output = pd.DataFrame({"date":date, "value":value})
|
||||
return output
|
||||
|
||||
if __name__ == "__main__":
|
||||
nbsc = NBSCData(language="en")
|
||||
nodes = nbsc.tree_generation()
|
||||
toc = nbsc.toc(nodes=nodes)
|
||||
toc[toc["name"].str.contains("GDP")]
|
||||
data = nbsc.download_data(nid="A0203")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,150 @@
|
|||
import requests
|
||||
import xmltodict
|
||||
import pandas as pd
|
||||
|
||||
url = {
|
||||
"OECD":"https://stats.oecd.org/SDMX-JSON/data/",
|
||||
"OECD-Key": "https://stats.oecd.org/RestSDMX/sdmx.ashx/GetKeyFamily/all",
|
||||
"OECD-Schema": "http://stats.oecd.org/restsdmx/sdmx.ashx/GetSchema/"
|
||||
}
|
||||
|
||||
class OECDData(object):
|
||||
def __init__(self, language:str="en"):
|
||||
self.language = language
|
||||
|
||||
def toc(self) -> pd.DataFrame:
|
||||
tmp_url = url["OECD-Key"]
|
||||
r = requests.get(tmp_url)
|
||||
xpars = xmltodict.parse(r.text)
|
||||
KeyFamily = xpars['message:Structure']['message:KeyFamilies']['KeyFamily']
|
||||
FamilyID, FamilyName = [], []
|
||||
|
||||
for key in KeyFamily:
|
||||
key_id = key["@id"]
|
||||
key_name = key["Name"]
|
||||
if isinstance(key_name, list):
|
||||
if self.language == "en":
|
||||
key_name = key_name[0]["#text"]
|
||||
else:
|
||||
key_name = key_name[1]["#text"]
|
||||
elif isinstance(key_name, dict):
|
||||
key_name = key_name["#text"]
|
||||
FamilyID.append(key_id)
|
||||
FamilyName.append(key_name)
|
||||
|
||||
toc = pd.DataFrame({"FamilyID":FamilyID, "FamilyName":FamilyName})
|
||||
return toc
|
||||
|
||||
|
||||
def search_toc(self, query:str=None) ->pd.DataFrame:
|
||||
toc = self.toc()
|
||||
if query == None:
|
||||
return ValueError("rex is invalid.")
|
||||
else:
|
||||
return toc[toc["FamilyName"].str.contains(query)].reset_index(drop=True)
|
||||
|
||||
def tos(self, dataset:str=None) -> dict:
|
||||
if dataset == None:
|
||||
return ValueError("ID is missing")
|
||||
else:
|
||||
tmp_url = url["OECD-Schema"] + dataset
|
||||
r = requests.get(tmp_url, timeout=10)
|
||||
xpars = xmltodict.parse(r.text)
|
||||
location = xpars['xs:schema']['xs:simpleType'][0]["xs:restriction"]["xs:enumeration"]
|
||||
transact = xpars['xs:schema']['xs:simpleType'][1]["xs:restriction"]["xs:enumeration"]
|
||||
measures = xpars['xs:schema']['xs:simpleType'][2]["xs:restriction"]["xs:enumeration"]
|
||||
frequencies = xpars['xs:schema']['xs:simpleType'][3]["xs:restriction"]["xs:enumeration"]
|
||||
|
||||
code, fullname, transaction, fulltransaction = [], [], [], []
|
||||
measure_list, full_measure, frequency_list, full_frequency = [], [], [], []
|
||||
|
||||
for loc in location:
|
||||
locate = loc["@value"]
|
||||
name = loc["xs:annotation"]["xs:documentation"]
|
||||
if isinstance(name, list):
|
||||
if self.language == "en":
|
||||
name = name[0]["#text"]
|
||||
else:
|
||||
name = name[1]["#text"]
|
||||
elif isinstance(name, dict):
|
||||
name = name["#text"]
|
||||
code.append(locate)
|
||||
fullname.append(name)
|
||||
|
||||
for tran in transact:
|
||||
trans = tran["@value"]
|
||||
fulltrans = tran["xs:annotation"]["xs:documentation"]
|
||||
if isinstance(fulltrans, list):
|
||||
if self.language == "en":
|
||||
fulltrans = fulltrans[0]["#text"]
|
||||
else:
|
||||
fulltrans = fulltrans[1]["#text"]
|
||||
elif isinstance(fulltrans, dict):
|
||||
fulltrans = fulltrans["#text"]
|
||||
transaction.append(trans)
|
||||
fulltransaction.append(fulltrans)
|
||||
|
||||
for measure in measures:
|
||||
meas = measure["@value"]
|
||||
full_meas = measure["xs:annotation"]["xs:documentation"]
|
||||
if isinstance(full_meas, list):
|
||||
if self.language == "en":
|
||||
full_meas = full_meas[0]["#text"]
|
||||
else:
|
||||
full_meas = full_meas[1]["#text"]
|
||||
elif isinstance(full_meas, dict):
|
||||
full_meas = full_meas["#text"]
|
||||
measure_list.append(meas)
|
||||
full_measure.append(full_meas)
|
||||
|
||||
for frequency in frequencies:
|
||||
freq = frequency["@value"]
|
||||
full_freq = frequency["xs:annotation"]["xs:documentation"]
|
||||
if isinstance(full_freq, list):
|
||||
if self.language == "en":
|
||||
full_freq = full_freq[0]["#text"]
|
||||
else:
|
||||
full_freq = full_freq[1]["#text"]
|
||||
elif isinstance(full_freq, dict):
|
||||
full_freq = full_freq["#text"]
|
||||
frequency_list.append(freq)
|
||||
full_frequency.append(full_freq)
|
||||
|
||||
data = {
|
||||
"code":code,
|
||||
"fullname": fullname,
|
||||
"transaction_code": transaction,
|
||||
"transaction": fulltransaction,
|
||||
"measurement_code": measure_list,
|
||||
"measurement": full_measure,
|
||||
"frequency code": frequency_list,
|
||||
"frequency": full_frequency
|
||||
}
|
||||
|
||||
return data
|
||||
|
||||
def download_data(self, dataset:str=None, query:str=None):
|
||||
tmp_url = url["OECD"] + "{}/".format(dataset) + query + "/all"
|
||||
r = requests.get(tmp_url)
|
||||
data =r.json()
|
||||
return data
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
oecd = OECDData()
|
||||
oecd_toc = oecd.toc()
|
||||
oecd_tos = oecd.tos(dataset="QNA")
|
||||
data = oecd.download_data(dataset="QNA", query="QNA/CAN.B1_GE.CQRSA.Q")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
import ast
|
||||
import json
|
||||
import requests
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
from pygtrans import Translate
|
||||
|
||||
def translate(text:str=None):
|
||||
client = Translate()
|
||||
text = client.translate(text, target="en")
|
||||
return text.translatedText
|
||||
|
||||
url = {
|
||||
"CNFIN": "https://api.cnfin.com/roll/charts/"
|
||||
}
|
||||
|
||||
class XHData(object):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
|
||||
def toc(self):
|
||||
urls, tid, titles, titles_en = [], [], [], []
|
||||
for i in tqdm(range(12005, 12100)):
|
||||
url = "https://api.cnfin.com/roll/charts/getContent?ids={}".format(i)
|
||||
r = requests.get(url)
|
||||
if r.ok:
|
||||
data = r.json()
|
||||
if data["data"] == "图表数据不存在":
|
||||
pass
|
||||
else:
|
||||
urls.append(url)
|
||||
tid.append(i)
|
||||
title = json.loads(data["data"]["list"][0]["modelCode"])["title"]["text"]
|
||||
titles.append(title)
|
||||
titles_en.append(translate(text=title))
|
||||
|
||||
return pd.DataFrame({"urls":urls, "id":tid, "title_zh":titles, "title_en":titles_en})
|
||||
|
||||
def download_data(self, iid:int=None):
|
||||
tmp_url = url["CNFIN"] + "getContent?ids={}".format(iid)
|
||||
r = requests.get(tmp_url)
|
||||
if r.ok:
|
||||
raw_data = r.json()
|
||||
data = pd.DataFrame(ast.literal_eval(raw_data["data"]["list"][0]["content"]))
|
||||
data.columns = ["date", "data"]
|
||||
return data
|
||||
else:
|
||||
return ValueError("Something went wrong, try again later")
|
||||
|
||||
if __name__ == "__main__":
|
||||
xhdata = XHData()
|
||||
toc = xhdata.toc()
|
||||
data = xhdata.download_data(iid=12006) # GDP
|
1309
CEDA/macroecon/cn.py
1309
CEDA/macroecon/cn.py
File diff suppressed because it is too large
Load Diff
2069
CEDA/macroecon/eu.py
2069
CEDA/macroecon/eu.py
File diff suppressed because it is too large
Load Diff
1107
CEDA/macroecon/us.py
1107
CEDA/macroecon/us.py
File diff suppressed because it is too large
Load Diff
|
@ -9,7 +9,7 @@ from urllib.parse import quote, urlencode
|
|||
from fake_useragent import UserAgent
|
||||
|
||||
url = {
|
||||
"dukascopy": "http://data.deluxelau.com/forex/api/v1.0/getdata?"
|
||||
"dukascopy": "https://data.deluxelau.com/api/v1.0/finance/getdata?"
|
||||
}
|
||||
|
||||
#?instrument=usdcnh&startdate=2014-01-01&enddate=2014-12-31&timeframe=d1&pricetype=ask&utc=0&volume=false&flat=false
|
||||
|
@ -34,12 +34,12 @@ def dukascopy(
|
|||
"utc": "{}".format(utc),
|
||||
"pricetype": "{}".format(pricetype),
|
||||
"volume": "{}".format(str(volume).lower()),
|
||||
"flat": "{}".format(str(flat).lower())
|
||||
"flat": "{}".format(str(flat).lower()),
|
||||
"token": "token=6dc8797f-aa4b-4b8c-b137-cfe9a9ace5a1"
|
||||
|
||||
}
|
||||
r = requests.get(tmp_url, params=request_params, headers=request_header)
|
||||
data_text = r.text
|
||||
output_file = demjson.decode(data_text)
|
||||
output_file = r.json()
|
||||
return pd.json_normalize(output_file)
|
||||
|
||||
# example:
|
||||
|
@ -47,7 +47,7 @@ def dukascopy(
|
|||
df = dukascopy(instrument = "usdcnh",
|
||||
startdate = "2014-01-01",
|
||||
enddate = "2020-01-01",
|
||||
timeframe = "h1",
|
||||
timeframe = "m1",
|
||||
pricetype = "bid",
|
||||
utc = 0,
|
||||
volume = False,
|
||||
|
|
59
README.md
59
README.md
|
@ -1,14 +1,45 @@
|
|||
# Centralized Economic Data API
|
||||
|
||||
![py_version](https://img.shields.io/badge/python-3.6+-brightgreen)
|
||||
[![PyPI Version](https://img.shields.io/pypi/v/CEDApy.svg)](https://pypi.org/project/CEDApy)
|
||||
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5229506.svg)](https://doi.org/10.5281/zenodo.5229506)
|
||||
|
||||
## Introduction
|
||||
|
||||
This is a economic data collecting library, with both `python` and `R`
|
||||
* "Centralized" means all-in-one, "all data" you need in one library
|
||||
* "Economic data" means economic data :)
|
||||
|
||||
* `python` version: [https://github.com/TerenceLiu98/CEDApy](https://github.com/TerenceLiu98/CEDApy)
|
||||
* `R` version: [https://github.com/TerenceLiu98/CEDAr](https://github.com/TerenceLiu98/CEDAr) *Not Start Yet*
|
||||
|
||||
### Economic Data
|
||||
|
||||
We have included multiple API for banks or statistics deparment of countries/region:
|
||||
|
||||
* North America:
|
||||
- [x] `FredData` for [Fred Reserve Bank](https://fred.stlouisfed.org/)
|
||||
|
||||
* Europe:
|
||||
- [x] `ECBData` for [European Central Bank](https://www.ecb.europa.eu/home/html/index.en.html)
|
||||
- [x] `EurostatData` for [European Statistics](https://ec.europa.eu/eurostat)
|
||||
|
||||
* Asia:
|
||||
- [x] `NBSCData` for [National Bureau of Statistics of China](http://www.stats.gov.cn/english/)
|
||||
- [x] `XHData` for [Xinhua](https://www.cnfin.com/data/macro-data/index.html)
|
||||
- [x] `BOJData` for [Bank of Japan](https://www.boj.or.jp/en/index.htm/)
|
||||
|
||||
### Market Data
|
||||
|
||||
We have two api for approaching the market data:
|
||||
|
||||
- [x] `marketwatch` for [MarketWatch](https://www.marketwatch.com/)
|
||||
- [x] `dukascopy` for [Dukascopy Historial Data](https://www.dukascopy.com/swiss/english/marketwatch/historical/)
|
||||
|
||||
*Recommandation is welcome! Tell us what data you need and we may put it into the to-do list :)*
|
||||
|
||||
### Other
|
||||
|
||||
We also collect some interesting data which may useful in your reserach or project
|
||||
- [x] `EPU` for [Economic Policy Uncertainty](https://www.policyuncertainty.com/) and [Economic Policy Uncertainty in China](https://economicpolicyuncertaintyinchina.weebly.com/)
|
||||
|
||||
## Installation
|
||||
|
||||
|
@ -31,15 +62,21 @@ Please check [Wiki](https://github.com/TerenceLiu98/CEDApy/wiki)
|
|||
|
||||
## Acknowledgement
|
||||
|
||||
* Thanks [akshare](https://github.com/jindaxiang/akshare/)
|
||||
* Thanks [EastMoney](https://www.eastmoney.com)
|
||||
* Thanks [St.Louis Federal Reserve Bank](https://fred.stlouisfed.org/)
|
||||
* Thanks [Chicago Federal Reserve Bank](https://www.chicagofed.org/)
|
||||
* Thanks [Philadelphia Federal Reserve Bank](https://www.philadelphiafed.org/)
|
||||
* Thanks [eurostat Economic Indicators](https://ec.europa.eu/eurostat/cache/infographs/economy/desktop/index.html)
|
||||
* Thanks [Europen Central Bank](https://www.ecb.europa.eu)
|
||||
* Thanks [MarketWatch](https://www.marketwatch.com/)
|
||||
* Thansk [Dukascopy](https://www.dukascopy.bank/swiss)
|
||||
* [St.Louis Federal Reserve Bank](https://fred.stlouisfed.org/), [Chicago Federal Reserve Bank](https://www.chicagofed.org/), [Philadelphia Federal Reserve Bank](https://www.philadelphiafed.org/)
|
||||
* [eurostat Economic Indicators](https://ec.europa.eu/eurostat/cache/infographs/economy/desktop/index.html)
|
||||
* [Europen Central Bank](https://www.ecb.europa.eu)
|
||||
* [National bureau of Statistics China](http://www.stats.gov.cn/english/)
|
||||
* [Bank of Japan](https://www.boj.or.jp/en/index.htm/)
|
||||
* [MarketWatch](https://www.marketwatch.com/)
|
||||
* [Dukascopy](https://www.dukascopy.bank/swiss)
|
||||
|
||||
## Other Interesting Project
|
||||
|
||||
Here is a list for some related packages or tools that may help you finding the data you want:
|
||||
|
||||
* [akshare](https://github.com/jindaxiang/akshare/) - an elegant and simple financial data interface library for Python, built for human beings
|
||||
* [tushare](https://github.com/waditu/tushare) - a utility for crawling historial data of China stocks
|
||||
* [investpy](https://github.com/alvarobartt/investpy) - Financial Data Extraction from Investing.com with Python
|
||||
|
||||
## If you want to cite...
|
||||
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
## Fred
|
||||
|
||||
For the ST.Louis Fred data, we mainly focus on the ["Main Economic Indicators" Series](https://fred.stlouisfed.org/tags/series?t=mei)
|
||||
|
||||
```python
|
||||
from CEDA.economic.Fred import *
|
||||
usa = FredData(country="usa")
|
||||
usa_toc = usa.toc()
|
||||
data = usa.download_data(sid="LFAC24FEUSM647N")
|
||||
```
|
||||
|
||||
## Eurostat
|
||||
|
||||
```python
|
||||
from CEDA.economic.Eurostat import *
|
||||
eurostat = EurostatData(language="en")
|
||||
eurostat_toc = eurostat.toc()
|
||||
GDP_related = eurostat.search_toc(query="GDP")
|
||||
nama_10_gdp = eurostat.download_data(datasetcode="nama_10_gdp")
|
||||
tet00004 = eurostat.download_data(datasetcode="tet00004")
|
||||
```
|
||||
|
||||
## ECB
|
||||
|
||||
```python
|
||||
from CEDA.economic.ECB import *
|
||||
ecb = ECBData()
|
||||
ecb_toc = ecb.toc()
|
||||
AME = ecb.download_data(datasetname="AME")
|
||||
```
|
||||
|
||||
## OECD
|
||||
|
||||
```python
|
||||
from CEDA.economic.OECD import *
|
||||
oecd = OECDData()
|
||||
oecd_toc = oecd.toc()
|
||||
oecd_tos = oecd.tos(dataset="QNA")
|
||||
data = oecd.download_data(dataset="QNA", query="QNA/CAN.B1_GE.CQRSA.Q")
|
||||
```
|
||||
|
||||
## NBSC
|
||||
|
||||
```python
|
||||
from CEDA.economic.NBSC import *
|
||||
nbsc = NBSCData(language="en")
|
||||
nbsc_nodes = nbsc.tree_generation()
|
||||
nbsc_toc = nbsc.toc(nodes=nbsc_nodes)
|
||||
nbsc_toc[nbsc_toc["name"].str.contains("GDP")]
|
||||
A0203 = nbsc.download_data(nid="A0203")
|
||||
```
|
||||
|
||||
## Xinhua
|
||||
```python
|
||||
from CEDA.economic.XinHua import *
|
||||
xhdata = XHData()
|
||||
toc = xhdata.toc()
|
||||
data = xhdata.download_data(iid=12006) # GDP
|
||||
```
|
||||
|
||||
## BOJ
|
||||
|
||||
```python
|
||||
from CEDA.economic.BOJ import *
|
||||
boj = BOJData()
|
||||
boj_toc = boj.toc()
|
||||
survey = boj.download_data("Survey")
|
||||
```
|
||||
|
||||
## EPU
|
||||
|
||||
```python
|
||||
from CEDA.economic.EPU import *
|
||||
country_list, annotations = country_list()
|
||||
can_epu = EPUData(country="Canada")
|
||||
mainland_china_epu = EPUData(country="China")
|
||||
can_data, can_reference = can_epu.download()
|
||||
mainland_china_data, cn_reference = mainland_china_epu.download()
|
||||
```
|
|
@ -1,6 +1,6 @@
|
|||
certifi==2020.12.5
|
||||
chardet==4.0.0
|
||||
demjson==2.2.4
|
||||
demjson
|
||||
fake-useragent==0.1.11
|
||||
idna==2.10
|
||||
numpy==1.20.3
|
||||
|
@ -11,3 +11,4 @@ requests==2.25.1
|
|||
six==1.16.0
|
||||
urllib3==1.26.5
|
||||
wincertstore==0.2
|
||||
beautifulsoup4==4-4.10.1
|
||||
|
|
Loading…
Reference in New Issue