Merge pull request #6 from TerenceLiu98/dev

Dev merge to Master
This commit is contained in:
TerenceLau 2022-01-29 12:05:03 +08:00 committed by GitHub
commit b3b8c60934
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 871 additions and 4504 deletions

3
.gitignore vendored
View File

@ -5,6 +5,9 @@
### Linux ###
*~
### HOW TO MAKE ###
HOWTO.md
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*

3
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,3 @@
{
"python.formatting.provider": "yapf"
}

View File

@ -11,5 +11,5 @@ if sys.version_info < (3, 6):
del sys
from CEDA import *
from CEDA import macroecon
from CEDA import economic
from CEDA import market

52
CEDA/economic/BOJ.py Normal file
View File

@ -0,0 +1,52 @@
import requests
import pandas as pd
from bs4 import BeautifulSoup
url = {
"BOJ": "https://www.stat-search.boj.or.jp"
}
class BOJData(object):
def __init__(self) -> None:
pass
def toc(self):
tmp_url = url["BOJ"] + "/index_en.html"
r = requests.get(tmp_url)
main_statistics_table = BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "clearfix"})[1]
uls = main_statistics_table.find_all("ul")
lis = [li for ul in uls for li in ul.find_all("li", {"class": "icoSimpleRightArrowForMainTime-series mainTimeSeriesName"})]
li_text = [li.text.strip() for li in lis]
li_urls = [url["BOJ"] + li.a.get("href") for li in lis]
toc = pd.DataFrame({"title": li_text, "url":li_urls})
return toc
def _download(self, down_url:str=None):
r = requests.get(down_url)
table = BeautifulSoup(r.text, "html.parser").find_all("table")
data = pd.read_html(str(table))[0]
header = ["time"] + list(data.loc[0][1:])
data.columns = header
data = data[1:]
return data
def download_data(self, query:str=None):
toc = self.toc()
if query == None:
return ValueError("rex is invalid.")
else:
data = toc[toc["title"].str.contains(query)].reset_index(drop=True)
if data.empty:
return ValueError("No related dataset, check the query again")
else:
output = []
for i in range(0, len(data)):
output.append(self._download(down_url=data.loc[i]["url"]))
return output

33
CEDA/economic/ECB.py Normal file
View File

@ -0,0 +1,33 @@
import requests
import pandas as pd
from bs4 import BeautifulSoup
url = {
"ECB": "https://sdw.ecb.europa.eu/",
"ECB-API": "http://sdw-wsrest.ecb.europa.eu/service/data/"
}
class ECBData(object):
def __init__(self) -> None:
pass
def toc(self):
r = requests.get(url["ECB"] + "browse.do?node=9689727")
dataset_list = BeautifulSoup(r.text, "html.parser").find_all("div", {"id": "currentMaximizeNode0"})
uls = dataset_list[0].find_all("ul")
lis = [li for ul in uls for li in ul.find_all("li")]
li_text = [li.text.strip() for li in lis]
name, metadata = [], []
for i in range(0, len(li_text)):
name.append(li_text[i].split("-")[0])
metadata.append(li_text[i].split("-")[1])
li_urls = [url["ECB"] + li.a.get("href") for li in lis]
toc = pd.DataFrame({"name": name, "metadata":metadata, "url":li_urls})
return toc
def download_data(self, datasetname:str=None):
tmp_url = url["ECB-API"] + "{}?format=csvdata".format(datasetname)
data = pd.read_csv(tmp_url)
return data

82
CEDA/economic/EPU.py Normal file
View File

@ -0,0 +1,82 @@
from lxml import html
import requests
import pandas as pd
url = {
"EPU-China": "https://economicpolicyuncertaintyinchina.weebly.com",
"EPU-HKSAR": "https://economicpolicyuncertaintyinchina.weebly.com/epu-in-hong-kong.html",
"EPU-MACAUSAR": "https://economicpolicyuncertaintyinchina.weebly.com/epu-in-macao.html",
"EPU": "https://www.policyuncertainty.com/"
}
def country_list():
country_list = ["Global", "USA", "Australia", "Belgium",
"Brazil", "Canada", "Chile", "China",
"Colombia", "Croatia", "Denmark", "France",
"Germany", "Greece", "HKSAR", "MACAUSAR",
"India", "Ireland", "Italy", "Japan",
"Korea", "Mexico", "Netherlands", "Pakistan",
"Russia", "Singapore", "Spain", "Sweden", "UK"]
annotations = "Disambiguation: the word 'Korea' in here stands for 'South Korea'"
return country_list, annotations
class EPUData(object):
def __init__(self, country:str=None):
self.country = country
def download(self):
if self.country == "China":
r = requests.get(url["EPU-China"])
webpage = html.fromstring(r.content)
urls = pd.Series(webpage.xpath("//a/@href"))
urls_data = urls[urls.str.contains("xlsx")]
urls_cite = urls[urls.str.contains("pdf")]
urls_data = [url["EPU-China"] + i for i in urls_data]
urls_cite = [url["EPU-China"] + i for i in urls_cite]
output_data = []
for i in range(0, len(urls_data)):
output_data.append(pd.read_excel(urls_data[i]))
return {"data":output_data, "reference":urls_cite}
elif self.country == "HKSAR":
r = requests.get(url["EPU-HKSAR"])
webpage = html.fromstring(r.content)
urls = pd.Series(webpage.xpath("//a/@href"))
urls_data = urls[urls.str.contains("xlsx")]
urls_cite = urls[urls.str.contains("pdf")]
urls_data = [url["EPU-China"] + i for i in urls_data]
urls_cite = [url["EPU-China"] + i for i in urls_cite]
output_data = []
for i in range(0, len(urls_data)):
output_data.append(pd.read_excel(urls_data[i]))
return {"data":output_data, "reference":urls_cite}
elif self.country == "MACAUSAR":
r = requests.get(url["EPU-MACAUSAR"])
webpage = html.fromstring(r.content)
urls = pd.Series(webpage.xpath("//a/@href"))
urls_data = urls[urls.str.contains("xlsx")]
urls_cite = urls[urls.str.contains("pdf")]
urls_data = [url["EPU-China"] + i for i in urls_data]
urls_cite = [url["EPU-China"] + i for i in urls_cite]
output_data = []
for i in range(0, len(urls_data)):
output_data.append(pd.read_excel(urls_data[i]))
return {"data":output_data, "reference":urls_cite}
else:
r = requests.get(url["EPU"] + self.country.lower() + "_monthly.html")
webpage = html.fromstring(r.content)
urls = pd.Series(webpage.xpath("//a/@href"))
urls_data = urls[urls.str.contains("xlsx")]
urls_cite = urls[urls.str.contains("pdf")]
urls_data = [url["EPU"] + i for i in urls_data]
urls_cite = [url["EPU"] + i for i in urls_cite]
output_data = []
for i in range(0, len(urls_data)):
output_data.append(pd.read_excel(urls_data[i]))
return {"data":output_data, "reference":urls_cite}

93
CEDA/economic/Eurostat.py Normal file
View File

@ -0,0 +1,93 @@
import pandas as pd
class EurostatData(object):
"""
for more information: https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1&file=BulkDownload_Guidelines.pdf
"""
def __init__(self, language:str="en"):
self.language = language
self.url = "https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/"
self.toc_url = "https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?sort=1&file=table_of_contents_{}.txt".format(language)
__annotations__ = {"name": "eurostat",
"url": "https://ec.europa.eu/eurostat"}
def toc(self) -> pd.DataFrame:
"""
the return value includes 8 columns:
'title'
'code'
'type'
'last update of data'
'last table structure change'
'data start'
'data end'
'values'
"""
toc = pd.read_csv(self.toc_url, sep="\t")
return toc
def search_toc(self, query:str=None):
"""
fuzzy search in the "title"
"""
toc = self.toc()
if query == None:
return ValueError("rex is invalid.")
else:
return toc[toc["title"].str.contains(query)].reset_index(drop=True)
def download_data(self, datasetcode:str=None, geo:str=None, unit:str=None):
url = self.url + "BulkDownloadListing?sort=1&file=data%2F" + datasetcode + ".tsv.gz"
data = pd.read_csv(url, sep = "\t", compression="gzip")
data = data.drop(data.columns[0], axis=1).join(data[data.columns[0]].str.split(",", expand=True))
columns_list = list(data.columns)[:-3] + ["unit", "na_item", "geo"]
data.columns = columns_list
columns_list = columns_list[-3:] + columns_list[:-3]
data = data[columns_list]
if geo != None and unit != None:
data = data.loc[(data["geo"] == geo) & (data["unit"] == unit)]
for i in range(4, len(list(data.columns))):
data[data.columns[i]] = data[data.columns[i]].astype(str).str.extract(r'(\d+.\d+)').astype("float")
return data
elif geo != None and unit == None:
data = data.loc[(data["geo"] == geo)]
for i in range(4, len(list(data.columns))):
data[data.columns[i]] = data[data.columns[i]].astype(str).str.extract(r'(\d+.\d+)').astype("float")
return data
elif geo == None and unit != None:
data = data.loc[(data["geo"] == geo)]
for i in range(4, len(list(data.columns))):
data[data.columns[i]] = data[data.columns[i]].astype(str).str.extract(r'(\d+.\d+)').astype("float")
return data
elif geo == None and unit == None:
for i in range(4, len(list(data.columns))):
data[data.columns[i]] = data[data.columns[i]].astype(str).str.extract(r'(\d+.\d+)').astype("float")
return data
def download_dic(self, category:str=None):
url = self.url + "BulkDownloadListing?sort=1&file=dic%2F{}".format(self.language) + "%2F" + category + "dic"
return pd.read_csv(url, sep="\t")
if __name__ == "__main__":
eu = EurostatData(language="en")

140
CEDA/economic/Fred.py Normal file
View File

@ -0,0 +1,140 @@
import io
import os
import ssl
import time
import json
import tqdm
import requests
import numpy as np
import pandas as pd
from datetime import datetime
from bs4 import BeautifulSoup
import dateutil.parser as dparser
from fake_useragent import UserAgent
ssl._create_default_https_context = ssl._create_unverified_context
# Main Economic Indicators: https://alfred.stlouisfed.org/release?rid=205
url = {
"fred_econ": "https://fred.stlouisfed.org/graph/fredgraph.csv?",
"fred_series": "https://fred.stlouisfed.org/series/",
"philfed":
"https://www.philadelphiafed.org/surveys-and-data/real-time-data-research/",
"chicagofed": "https://www.chicagofed.org/~/media/publications/",
"OECD": "https://stats.oecd.org/sdmx-json/data/DP_LIVE/"
}
def get_tag(id: str) -> list:
tmp_url = url["fred_series"] + id
r = requests.get(tmp_url)
tags = []
tags_series = BeautifulSoup(r.text, "html.parser").find_all(
"div", {"class": "series-tag-cloud"})
for i in tqdm.tqdm(range(0, len(tags_series))):
subtable = tags_series[i].find_all("a")
for j in tqdm.tqdm(range(0, len(subtable)), leave=False):
tags.append((" ".join(subtable[j].text.split())))
return tags
def get_metadata(id: str = None) -> dict:
tmp_url = url["fred_series"] + id
r = requests.get(tmp_url)
metadata = {
"name": (" ".join(
BeautifulSoup(r.text, "html.parser").find_all(
'div', {"class": "page-title"})[0].span.text.split())),
"id":
id,
"update_time":
datetime.strftime(dparser.parse(
BeautifulSoup(r.text, "html.parser").find_all(
'div',
{"class": "pull-left meta-col"})[0].find_all('span')[3].text,
fuzzy=True),
format="%Y-%m-%d"),
"units":
BeautifulSoup(r.text, "html.parser").find_all(
'div', {"class": "pull-left meta-col"
})[1].find_all('span')[0].text.split(" ")[0],
"frequency":
BeautifulSoup(r.text,
"html.parser").find_all('div',
{"class": "pull-left meta-col"})
[2].find_all('span')[0].text.split(" ")[1].split(" ")[1],
"tags":
get_tag(id)
}
return metadata
def date_transform(df, format_origin, format_after):
return_list = []
for i in range(0, len(df)):
return_list.append(
datetime.strptime(df[i], format_origin).strftime(format_after))
return return_list
class FredData(object):
def __init__(self, country: str = "usa"):
self.country = country
__annotations__ = {
"name": "Main Economic Indicators",
"url": "https://fred.stlouisfed.org/tags/series?t=mei"
}
def get_id(self, url: str) -> list:
id_list = []
r = requests.get(url)
table = BeautifulSoup(r.text, "html.parser").find_all("table")
for i in range(0, len(table)):
subtable = table[i].find_all("a")
for j in range(0, len(subtable)):
id_list.append(subtable[j].get("href").split("/")[-1])
return id_list
def extract_id(self):
id_list = []
for i in tqdm.tqdm(range(1, 100)):
tmp_url = "https://fred.stlouisfed.org/tags/series?ob=pv&od=desc&t=mei%3B{}&pageID={}".format(
self.country, str(i))
id_list.append(self.get_id(tmp_url))
if i > 20:
r = requests.get(tmp_url)
if "No series" in r.text:
break
else:
continue
id_list = [item for sublist in id_list for item in sublist]
id_list = list(set(id_list))
return id_list
def toc(self):
sid = self.extract_id()
name = []
for i in range(0, len(sid)):
name.append(get_metadata(id=sid[i])["name"])
time.sleep(2)
toc = pd.DataFrame({"name": name, "id": sid})
return toc
def download_data(self, sid: str = None):
data = pd.read_csv(url["fred_econ"] + "id={}".format(sid))
return data
if __name__ == "__main__":
usa = FredData(country="usa")
usa_list = usa.extract_id()
china = FredData(country="china")
china_list = china.extract_id()
japan = FredData(country="japan")
japan_list = japan.extract_id()
eu = FredData(country="eu")
eu_list = eu.extract_id()

125
CEDA/economic/NBSC.py Normal file
View File

@ -0,0 +1,125 @@
"""
NBSC's api information:
url: "https://data.stats.gov.cn/english/easyquery.htm"
params: id=zb&dbcode=hgnd&wdcode=ab&m=getTree
"""
import os
import time
import pickle
import random
import requests
import pandas as pd
from tqdm import tqdm
from fake_useragent import UserAgent
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
class NBSCData(object):
def __init__(self, language:str="en"):
self.dbcode = []
self.nid = []
self.pid = []
self.name = []
self.wdcode= []
if language == "cn":
self.url = "https://data.stats.gov.cn/easyquery.htm"
self.BASE_DIR = os.path.dirname(__file__)
self.__TREE_PATH__ = os.path.join(self.BASE_DIR, "NBSCTree", "data.pkl")
elif language == "en":
self.url = "https://data.stats.gov.cn/english/easyquery.htm"
self.BASE_DIR = os.path.dirname(__file__)
self.__TREE_PATH__ = os.path.join(self.BASE_DIR, "NBSCTree", "data_en.pkl")
def generate_header(self):
ua = UserAgent()
header = {'User-Agent':str(ua.chrome)}
return header
def tree_generation(self, rid="zb", mode:str="fast"):
"""
inspired by a blog: https://www.cnblogs.com/wang_yb/p/14636575.html
"""
parent = []
r = requests.post("{}?id={}&dbcode=hgnd&wdcode=zb&m=getTree".format(self.url, rid), headers=self.generate_header(), verify=False)
data = r.json()
for i in range(0, len(data)):
node = data[i]
print("[+] Downloading {} ...".format(node["name"]))
if node["isParent"]:
parent.append(node["id"])
node["children"] = self.tree_generation(rid=node["id"])
if i % 100 == 0:
print("[-] Due to the web scraping policy, sleep for 2 seconds")
time.sleep(1)
if mode == "slow":
if i % 1000 == 0:
print("[-] Due to the web scraping policy, sleep for 10 seconds")
time.sleep(10)
return data
def toc(self, nodes):
"""
inspired by a blog: https://www.cnblogs.com/wang_yb/p/14636575.html
"""
for i in range(0, len(nodes)):
node = nodes[i]
if node["isParent"]:
self.toc(node["children"])
else:
self.dbcode.append(node["dbcode"])
self.nid.append(node["id"])
self.name.append(node["name"])
self.pid.append(node["pid"])
self.wdcode.append(node["wdcode"])
data = pd.DataFrame({"dbcode":self.dbcode, "nid":self.nid,
"name":self.name, "pid":self.pid, "wdcode":self.wdcode})
return data
def download_data(self, nid:str=None, sj="1978-"):
params = {
"dbcode": "hgnd",
"rowcode": "zb",
"m": "QueryData",
"colcode": "sj",
"wds": "[]",
"dfwds": '[{"wdcode":"zb","valuecode":"'
+ nid
+ '"},{"wdcode":"sj","valuecode":"'
+ sj
+ '"}]',
}
r = requests.get(self.url, params=params, verify=False, headers=self.generate_header())
if r.ok:
data = r.json()["returndata"]["datanodes"]
date, value = [], []
for i in range(0, len(data)):
date.append(data[i]["wds"][1]["valuecode"])
value.append(data[i]["data"]["data"])
output = pd.DataFrame({"date":date, "value":value})
return output
if __name__ == "__main__":
nbsc = NBSCData(language="en")
nodes = nbsc.tree_generation()
toc = nbsc.toc(nodes=nodes)
toc[toc["name"].str.contains("GDP")]
data = nbsc.download_data(nid="A0203")

150
CEDA/economic/OECD.py Normal file
View File

@ -0,0 +1,150 @@
import requests
import xmltodict
import pandas as pd
url = {
"OECD":"https://stats.oecd.org/SDMX-JSON/data/",
"OECD-Key": "https://stats.oecd.org/RestSDMX/sdmx.ashx/GetKeyFamily/all",
"OECD-Schema": "http://stats.oecd.org/restsdmx/sdmx.ashx/GetSchema/"
}
class OECDData(object):
def __init__(self, language:str="en"):
self.language = language
def toc(self) -> pd.DataFrame:
tmp_url = url["OECD-Key"]
r = requests.get(tmp_url)
xpars = xmltodict.parse(r.text)
KeyFamily = xpars['message:Structure']['message:KeyFamilies']['KeyFamily']
FamilyID, FamilyName = [], []
for key in KeyFamily:
key_id = key["@id"]
key_name = key["Name"]
if isinstance(key_name, list):
if self.language == "en":
key_name = key_name[0]["#text"]
else:
key_name = key_name[1]["#text"]
elif isinstance(key_name, dict):
key_name = key_name["#text"]
FamilyID.append(key_id)
FamilyName.append(key_name)
toc = pd.DataFrame({"FamilyID":FamilyID, "FamilyName":FamilyName})
return toc
def search_toc(self, query:str=None) ->pd.DataFrame:
toc = self.toc()
if query == None:
return ValueError("rex is invalid.")
else:
return toc[toc["FamilyName"].str.contains(query)].reset_index(drop=True)
def tos(self, dataset:str=None) -> dict:
if dataset == None:
return ValueError("ID is missing")
else:
tmp_url = url["OECD-Schema"] + dataset
r = requests.get(tmp_url, timeout=10)
xpars = xmltodict.parse(r.text)
location = xpars['xs:schema']['xs:simpleType'][0]["xs:restriction"]["xs:enumeration"]
transact = xpars['xs:schema']['xs:simpleType'][1]["xs:restriction"]["xs:enumeration"]
measures = xpars['xs:schema']['xs:simpleType'][2]["xs:restriction"]["xs:enumeration"]
frequencies = xpars['xs:schema']['xs:simpleType'][3]["xs:restriction"]["xs:enumeration"]
code, fullname, transaction, fulltransaction = [], [], [], []
measure_list, full_measure, frequency_list, full_frequency = [], [], [], []
for loc in location:
locate = loc["@value"]
name = loc["xs:annotation"]["xs:documentation"]
if isinstance(name, list):
if self.language == "en":
name = name[0]["#text"]
else:
name = name[1]["#text"]
elif isinstance(name, dict):
name = name["#text"]
code.append(locate)
fullname.append(name)
for tran in transact:
trans = tran["@value"]
fulltrans = tran["xs:annotation"]["xs:documentation"]
if isinstance(fulltrans, list):
if self.language == "en":
fulltrans = fulltrans[0]["#text"]
else:
fulltrans = fulltrans[1]["#text"]
elif isinstance(fulltrans, dict):
fulltrans = fulltrans["#text"]
transaction.append(trans)
fulltransaction.append(fulltrans)
for measure in measures:
meas = measure["@value"]
full_meas = measure["xs:annotation"]["xs:documentation"]
if isinstance(full_meas, list):
if self.language == "en":
full_meas = full_meas[0]["#text"]
else:
full_meas = full_meas[1]["#text"]
elif isinstance(full_meas, dict):
full_meas = full_meas["#text"]
measure_list.append(meas)
full_measure.append(full_meas)
for frequency in frequencies:
freq = frequency["@value"]
full_freq = frequency["xs:annotation"]["xs:documentation"]
if isinstance(full_freq, list):
if self.language == "en":
full_freq = full_freq[0]["#text"]
else:
full_freq = full_freq[1]["#text"]
elif isinstance(full_freq, dict):
full_freq = full_freq["#text"]
frequency_list.append(freq)
full_frequency.append(full_freq)
data = {
"code":code,
"fullname": fullname,
"transaction_code": transaction,
"transaction": fulltransaction,
"measurement_code": measure_list,
"measurement": full_measure,
"frequency code": frequency_list,
"frequency": full_frequency
}
return data
def download_data(self, dataset:str=None, query:str=None):
tmp_url = url["OECD"] + "{}/".format(dataset) + query + "/all"
r = requests.get(tmp_url)
data =r.json()
return data
if __name__ == "__main__":
oecd = OECDData()
oecd_toc = oecd.toc()
oecd_tos = oecd.tos(dataset="QNA")
data = oecd.download_data(dataset="QNA", query="QNA/CAN.B1_GE.CQRSA.Q")

54
CEDA/economic/XinHua.py Normal file
View File

@ -0,0 +1,54 @@
import ast
import json
import requests
import pandas as pd
from tqdm import tqdm
from pygtrans import Translate
def translate(text:str=None):
client = Translate()
text = client.translate(text, target="en")
return text.translatedText
url = {
"CNFIN": "https://api.cnfin.com/roll/charts/"
}
class XHData(object):
def __init__(self):
pass
def toc(self):
urls, tid, titles, titles_en = [], [], [], []
for i in tqdm(range(12005, 12100)):
url = "https://api.cnfin.com/roll/charts/getContent?ids={}".format(i)
r = requests.get(url)
if r.ok:
data = r.json()
if data["data"] == "图表数据不存在":
pass
else:
urls.append(url)
tid.append(i)
title = json.loads(data["data"]["list"][0]["modelCode"])["title"]["text"]
titles.append(title)
titles_en.append(translate(text=title))
return pd.DataFrame({"urls":urls, "id":tid, "title_zh":titles, "title_en":titles_en})
def download_data(self, iid:int=None):
tmp_url = url["CNFIN"] + "getContent?ids={}".format(iid)
r = requests.get(tmp_url)
if r.ok:
raw_data = r.json()
data = pd.DataFrame(ast.literal_eval(raw_data["data"]["list"][0]["content"]))
data.columns = ["date", "data"]
return data
else:
return ValueError("Something went wrong, try again later")
if __name__ == "__main__":
xhdata = XHData()
toc = xhdata.toc()
data = xhdata.download_data(iid=12006) # GDP

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -9,7 +9,7 @@ from urllib.parse import quote, urlencode
from fake_useragent import UserAgent
url = {
"dukascopy": "http://data.deluxelau.com/forex/api/v1.0/getdata?"
"dukascopy": "https://data.deluxelau.com/api/v1.0/finance/getdata?"
}
#?instrument=usdcnh&startdate=2014-01-01&enddate=2014-12-31&timeframe=d1&pricetype=ask&utc=0&volume=false&flat=false
@ -34,12 +34,12 @@ def dukascopy(
"utc": "{}".format(utc),
"pricetype": "{}".format(pricetype),
"volume": "{}".format(str(volume).lower()),
"flat": "{}".format(str(flat).lower())
"flat": "{}".format(str(flat).lower()),
"token": "token=6dc8797f-aa4b-4b8c-b137-cfe9a9ace5a1"
}
r = requests.get(tmp_url, params=request_params, headers=request_header)
data_text = r.text
output_file = demjson.decode(data_text)
output_file = r.json()
return pd.json_normalize(output_file)
# example:
@ -47,7 +47,7 @@ def dukascopy(
df = dukascopy(instrument = "usdcnh",
startdate = "2014-01-01",
enddate = "2020-01-01",
timeframe = "h1",
timeframe = "m1",
pricetype = "bid",
utc = 0,
volume = False,

View File

@ -1,14 +1,45 @@
# Centralized Economic Data API
![py_version](https://img.shields.io/badge/python-3.6+-brightgreen)
[![PyPI Version](https://img.shields.io/pypi/v/CEDApy.svg)](https://pypi.org/project/CEDApy)
[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.5229506.svg)](https://doi.org/10.5281/zenodo.5229506)
## Introduction
This is a economic data collecting library, with both `python` and `R`
* "Centralized" means all-in-one, "all data" you need in one library
* "Economic data" means economic data :)
* `python` version: [https://github.com/TerenceLiu98/CEDApy](https://github.com/TerenceLiu98/CEDApy)
* `R` version: [https://github.com/TerenceLiu98/CEDAr](https://github.com/TerenceLiu98/CEDAr) *Not Start Yet*
### Economic Data
We have included multiple API for banks or statistics deparment of countries/region:
* North America:
- [x] `FredData` for [Fred Reserve Bank](https://fred.stlouisfed.org/)
* Europe:
- [x] `ECBData` for [European Central Bank](https://www.ecb.europa.eu/home/html/index.en.html)
- [x] `EurostatData` for [European Statistics](https://ec.europa.eu/eurostat)
* Asia:
- [x] `NBSCData` for [National Bureau of Statistics of China](http://www.stats.gov.cn/english/)
- [x] `XHData` for [Xinhua](https://www.cnfin.com/data/macro-data/index.html)
- [x] `BOJData` for [Bank of Japan](https://www.boj.or.jp/en/index.htm/)
### Market Data
We have two api for approaching the market data:
- [x] `marketwatch` for [MarketWatch](https://www.marketwatch.com/)
- [x] `dukascopy` for [Dukascopy Historial Data](https://www.dukascopy.com/swiss/english/marketwatch/historical/)
*Recommandation is welcome! Tell us what data you need and we may put it into the to-do list :)*
### Other
We also collect some interesting data which may useful in your reserach or project
- [x] `EPU` for [Economic Policy Uncertainty](https://www.policyuncertainty.com/) and [Economic Policy Uncertainty in China](https://economicpolicyuncertaintyinchina.weebly.com/)
## Installation
@ -31,15 +62,21 @@ Please check [Wiki](https://github.com/TerenceLiu98/CEDApy/wiki)
## Acknowledgement
* Thanks [akshare](https://github.com/jindaxiang/akshare/)
* Thanks [EastMoney](https://www.eastmoney.com)
* Thanks [St.Louis Federal Reserve Bank](https://fred.stlouisfed.org/)
* Thanks [Chicago Federal Reserve Bank](https://www.chicagofed.org/)
* Thanks [Philadelphia Federal Reserve Bank](https://www.philadelphiafed.org/)
* Thanks [eurostat Economic Indicators](https://ec.europa.eu/eurostat/cache/infographs/economy/desktop/index.html)
* Thanks [Europen Central Bank](https://www.ecb.europa.eu)
* Thanks [MarketWatch](https://www.marketwatch.com/)
* Thansk [Dukascopy](https://www.dukascopy.bank/swiss)
* [St.Louis Federal Reserve Bank](https://fred.stlouisfed.org/), [Chicago Federal Reserve Bank](https://www.chicagofed.org/), [Philadelphia Federal Reserve Bank](https://www.philadelphiafed.org/)
* [eurostat Economic Indicators](https://ec.europa.eu/eurostat/cache/infographs/economy/desktop/index.html)
* [Europen Central Bank](https://www.ecb.europa.eu)
* [National bureau of Statistics China](http://www.stats.gov.cn/english/)
* [Bank of Japan](https://www.boj.or.jp/en/index.htm/)
* [MarketWatch](https://www.marketwatch.com/)
* [Dukascopy](https://www.dukascopy.bank/swiss)
## Other Interesting Project
Here is a list for some related packages or tools that may help you finding the data you want:
* [akshare](https://github.com/jindaxiang/akshare/) - an elegant and simple financial data interface library for Python, built for human beings
* [tushare](https://github.com/waditu/tushare) - a utility for crawling historial data of China stocks
* [investpy](https://github.com/alvarobartt/investpy) - Financial Data Extraction from Investing.com with Python
## If you want to cite...
@ -54,4 +91,4 @@ Please check [Wiki](https://github.com/TerenceLiu98/CEDApy/wiki)
doi = {10.5281/zenodo.5229506},
url = {https://doi.org/10.5281/zenodo.5229506}
}
```
```

79
example/economic.md Normal file
View File

@ -0,0 +1,79 @@
## Fred
For the ST.Louis Fred data, we mainly focus on the ["Main Economic Indicators" Series](https://fred.stlouisfed.org/tags/series?t=mei)
```python
from CEDA.economic.Fred import *
usa = FredData(country="usa")
usa_toc = usa.toc()
data = usa.download_data(sid="LFAC24FEUSM647N")
```
## Eurostat
```python
from CEDA.economic.Eurostat import *
eurostat = EurostatData(language="en")
eurostat_toc = eurostat.toc()
GDP_related = eurostat.search_toc(query="GDP")
nama_10_gdp = eurostat.download_data(datasetcode="nama_10_gdp")
tet00004 = eurostat.download_data(datasetcode="tet00004")
```
## ECB
```python
from CEDA.economic.ECB import *
ecb = ECBData()
ecb_toc = ecb.toc()
AME = ecb.download_data(datasetname="AME")
```
## OECD
```python
from CEDA.economic.OECD import *
oecd = OECDData()
oecd_toc = oecd.toc()
oecd_tos = oecd.tos(dataset="QNA")
data = oecd.download_data(dataset="QNA", query="QNA/CAN.B1_GE.CQRSA.Q")
```
## NBSC
```python
from CEDA.economic.NBSC import *
nbsc = NBSCData(language="en")
nbsc_nodes = nbsc.tree_generation()
nbsc_toc = nbsc.toc(nodes=nbsc_nodes)
nbsc_toc[nbsc_toc["name"].str.contains("GDP")]
A0203 = nbsc.download_data(nid="A0203")
```
## Xinhua
```python
from CEDA.economic.XinHua import *
xhdata = XHData()
toc = xhdata.toc()
data = xhdata.download_data(iid=12006) # GDP
```
## BOJ
```python
from CEDA.economic.BOJ import *
boj = BOJData()
boj_toc = boj.toc()
survey = boj.download_data("Survey")
```
## EPU
```python
from CEDA.economic.EPU import *
country_list, annotations = country_list()
can_epu = EPUData(country="Canada")
mainland_china_epu = EPUData(country="China")
can_data, can_reference = can_epu.download()
mainland_china_data, cn_reference = mainland_china_epu.download()
```

0
example/market.md Normal file
View File

View File

@ -1,6 +1,6 @@
certifi==2020.12.5
chardet==4.0.0
demjson==2.2.4
demjson
fake-useragent==0.1.11
idna==2.10
numpy==1.20.3
@ -11,3 +11,4 @@ requests==2.25.1
six==1.16.0
urllib3==1.26.5
wincertstore==0.2
beautifulsoup4==4-4.10.1