This commit is contained in:
TerenceLiu98 2022-01-13 00:42:04 +08:00
parent 14f21f6a8e
commit c82e19e011
6 changed files with 186 additions and 54 deletions

3
.gitignore vendored
View File

@ -5,6 +5,9 @@
### Linux ### ### Linux ###
*~ *~
### HOW TO MAKE ###
HOWTO.md
# temporary files which can be created if a process still has a handle open of a deleted file # temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden* .fuse_hidden*

105
CEDA/macroecon/FredData.py Normal file
View File

@ -0,0 +1,105 @@
import io
import os
import ssl
import time
import json
import tqdm
import requests
import numpy as np
import pandas as pd
from datetime import datetime
from bs4 import BeautifulSoup
import dateutil.parser as dparser
from fake_useragent import UserAgent
ssl._create_default_https_context = ssl._create_unverified_context
# Main Economic Indicators: https://alfred.stlouisfed.org/release?rid=205
url = {
"fred_econ": "https://fred.stlouisfed.org/graph/fredgraph.csv?",
"fred_series": "https://fred.stlouisfed.org/series/",
"philfed": "https://www.philadelphiafed.org/surveys-and-data/real-time-data-research/",
"chicagofed": "https://www.chicagofed.org/~/media/publications/",
"OECD": "https://stats.oecd.org/sdmx-json/data/DP_LIVE/"
}
def get_tag(id: str) -> list:
tmp_url = url["fred_series"] + id
r = requests.get(tmp_url)
tags = []
tags_series = BeautifulSoup(r.text, "html.parser").find_all(
"div", {"class": "series-tag-cloud"})
for i in tqdm.tqdm(range(0, len(tags_series))):
subtable = tags_series[i].find_all("a")
for j in tqdm.tqdm(range(0, len(subtable)), leave=False):
tags.append((" ".join(subtable[j].text.split())))
return tags
def get_metadata(id: str = None) -> dict:
tmp_url = url["fred_series"] + id
r = requests.get(tmp_url)
metadata = {
"name": (" ".join(BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "page-title"})[0].span.text.split())),
"id": id,
"update_time": datetime.strftime(dparser.parse(BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "pull-left meta-col"})[0].find_all('span')[3].text, fuzzy=True), format="%Y-%m-%d"),
"units": BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "pull-left meta-col"})[1].find_all('span')[0].text.split(" ")[0],
"frequency": BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "pull-left meta-col"})[2].find_all('span')[0].text.split(" ")[1].split(" ")[1],
"tags": get_tag(id)
}
return metadata
def date_transform(df, format_origin, format_after):
return_list = []
for i in range(0, len(df)):
return_list.append(datetime.strptime(
df[i], format_origin).strftime(format_after))
return return_list
class FredData(object):
def __init__(self, country: str = "usa"):
self.country = country
__annotations__ = {"name": "Main Economic Indicators",
"url": "https://fred.stlouisfed.org/tags/series?t=mei"}
def get_id(self, url: str) -> list:
id_list = []
r = requests.get(url)
table = BeautifulSoup(r.text, "html.parser").find_all("table")
for i in range(0, len(table)):
subtable = table[i].find_all("a")
for j in range(0, len(subtable)):
id_list.append(subtable[j].get("href").split("/")[-1])
return id_list
def extract_id(self):
id_list = []
for i in tqdm.tqdm(range(1, 100)):
tmp_url = "https://fred.stlouisfed.org/tags/series?ob=pv&od=desc&t=mei%3B{}&pageID={}".format(
self.country, str(i))
id_list.append(self.get_id(tmp_url))
if i > 20:
r = requests.get(tmp_url)
if "No series" in r.text:
break
else:
continue
id_list = [item for sublist in id_list for item in sublist]
id_list = list(set(id_list))
return id_list
if __name__ == "__main__":
usa = FredData(country="usa")
usa_list = usa.extract_id()
china = FredData(country="china")
china_list = china.extract_id()
japan = FredData(country="japan")
japan_list = japan.extract_id()
eu = FredData(country="eu")
eu_list = eu.extract_id()

View File

@ -1,14 +1,17 @@
import pandas as pd
import numpy as np
import requests
from fake_useragent import UserAgent
import io import io
import os import os
import ssl
import time import time
import json import json
import demjson import requests
import numpy as np
import pandas as pd
from datetime import datetime from datetime import datetime
import ssl from bs4 import BeautifulSoup
from multiprocessing import Pool
import dateutil.parser as dparser
from fake_useragent import UserAgent
ssl._create_default_https_context = ssl._create_unverified_context ssl._create_default_https_context = ssl._create_unverified_context
# Main Economic Indicators: https://alfred.stlouisfed.org/release?rid=205 # Main Economic Indicators: https://alfred.stlouisfed.org/release?rid=205
@ -19,13 +22,16 @@ url = {
"OECD": "https://stats.oecd.org/sdmx-json/data/DP_LIVE/" "OECD": "https://stats.oecd.org/sdmx-json/data/DP_LIVE/"
} }
def date_transform(df, format_origin, format_after): def date_transform(df, format_origin, format_after):
return_list = [] return_list = []
for i in range(0, len(df)): for i in range(0, len(df)):
return_list.append(datetime.strptime(df[i], format_origin).strftime(format_after)) return_list.append(datetime.strptime(
df[i], format_origin).strftime(format_after))
return return_list return return_list
def gdp_quarterly(startdate="1947-01-01", enddate="2021-01-01"): def gdp_quarterly(startdate="1947-01-01", enddate="2021-01-01"):
""" """
Full Name: Gross Domestic Product Full Name: Gross Domestic Product
@ -44,7 +50,7 @@ def gdp_quarterly(startdate="1947-01-01", enddate="2021-01-01"):
data_text = r.content data_text = r.content
df = pd.read_csv(io.StringIO(data_text.decode('utf-8'))) df = pd.read_csv(io.StringIO(data_text.decode('utf-8')))
df.columns = ["Date", "GDP"] df.columns = ["Date", "GDP"]
df["Date"] = pd.to_datetime(df["Date"], format = "%Y-%m-%d") df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
df["GDP"] = df["GDP"].astype(float) df["GDP"] = df["GDP"].astype(float)
return df return df
@ -107,10 +113,11 @@ def payems_monthly(startdate="1939-01-01", enddate="2021-01-01"):
data_text = r.content data_text = r.content
df = pd.read_csv(io.StringIO(data_text.decode('utf-8'))) df = pd.read_csv(io.StringIO(data_text.decode('utf-8')))
df.columns = ["Date", "Payems"] df.columns = ["Date", "Payems"]
df["Date"] = pd.to_datetime(df["Date"], format = "%Y-%m-%d") df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
df["Payems"] = df["Payems"].astype(float) df["Payems"] = df["Payems"].astype(float)
return df return df
def ppi(): def ppi():
tmp_url = url["fred_econ"] + "bgcolor=%23e1e9f0&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=968&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=PPIACO,PCUOMFGOMFG&scale=left,left&cosd=1913-01-01,1984-12-01&coed=2021-04-01,2021-04-01&line_color=%234572a7,%23aa4643&link_values=false,false&line_style=solid,solid&mark_type=none,none&mw=3,3&lw=2,2&ost=-99999,-99999&oet=99999,99999&mma=0,0&fml=a,a&fq=Monthly,Monthly&fam=avg,avg&fgst=lin,lin&fgsnd=2020-02-01,2020-02-01&line_index=1,2&transformation=lin,lin&vintage_date=2021-06-10,2021-06-10&revision_date=2021-06-10,2021-06-10&nd=1913-01-01,1984-12-01" tmp_url = url["fred_econ"] + "bgcolor=%23e1e9f0&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=968&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=PPIACO,PCUOMFGOMFG&scale=left,left&cosd=1913-01-01,1984-12-01&coed=2021-04-01,2021-04-01&line_color=%234572a7,%23aa4643&link_values=false,false&line_style=solid,solid&mark_type=none,none&mw=3,3&lw=2,2&ost=-99999,-99999&oet=99999,99999&mma=0,0&fml=a,a&fq=Monthly,Monthly&fam=avg,avg&fgst=lin,lin&fgsnd=2020-02-01,2020-02-01&line_index=1,2&transformation=lin,lin&vintage_date=2021-06-10,2021-06-10&revision_date=2021-06-10,2021-06-10&nd=1913-01-01,1984-12-01"
ua = UserAgent(verify_ssl=False) ua = UserAgent(verify_ssl=False)
@ -124,18 +131,20 @@ def ppi():
"PPIACO": "Producer Price Index by Commodity: All Commodities", "PPIACO": "Producer Price Index by Commodity: All Commodities",
"PCUOMFGOMFG": "Producer Price Index by Industry: Total Manufacturing Industries" "PCUOMFGOMFG": "Producer Price Index by Industry: Total Manufacturing Industries"
} }
df.replace(".", np.nan, inplace = True) df.replace(".", np.nan, inplace=True)
df.columns = ["Date", "PPI_C", "PPI_I"] df.columns = ["Date", "PPI_C", "PPI_I"]
df["Date"] = pd.to_datetime(df["Date"], format = "%Y-%m-%d") df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
df[["PPI_C", "PPI_I"]] = df[["PPI_C", "PPI_I"]].astype(float) df[["PPI_C", "PPI_I"]] = df[["PPI_C", "PPI_I"]].astype(float)
return df return df
def pmi(): def pmi():
t = time.time() t = time.time()
res = requests.get( res = requests.get(
f"https://cdn.jin10.com/dc/reports/dc_usa_ism_pmi_all.js?v={str(int(round(t * 1000))), str(int(round(t * 1000)) + 90)}" f"https://cdn.jin10.com/dc/reports/dc_usa_ism_pmi_all.js?v={str(int(round(t * 1000))), str(int(round(t * 1000)) + 90)}"
) )
json_data = json.loads(res.text[res.text.find("{"): res.text.rfind("}") + 1]) json_data = json.loads(
res.text[res.text.find("{"): res.text.rfind("}") + 1])
date_list = [item["date"] for item in json_data["list"]] date_list = [item["date"] for item in json_data["list"]]
value_list = [item["datas"]["美国ISM制造业PMI报告"] for item in json_data["list"]] value_list = [item["datas"]["美国ISM制造业PMI报告"] for item in json_data["list"]]
value_df = pd.DataFrame(value_list) value_df = pd.DataFrame(value_list)
@ -180,16 +189,18 @@ def pmi():
temp_df.name = "usa_ism_pmi" temp_df.name = "usa_ism_pmi"
temp_df = temp_df.astype("float") temp_df = temp_df.astype("float")
PMI_I = pd.DataFrame() PMI_I = pd.DataFrame()
PMI_I["Date"] = pd.to_datetime(temp_df.index, format = "%Y-%m-%d") PMI_I["Date"] = pd.to_datetime(temp_df.index, format="%Y-%m-%d")
PMI_I["ISM_PMI_I"] = np.array(temp_df).astype(float) PMI_I["ISM_PMI_I"] = np.array(temp_df).astype(float)
t = time.time() t = time.time()
res = requests.get( res = requests.get(
f"https://cdn.jin10.com/dc/reports/dc_usa_ism_non_pmi_all.js?v={str(int(round(t * 1000))), str(int(round(t * 1000)) + 90)}" f"https://cdn.jin10.com/dc/reports/dc_usa_ism_non_pmi_all.js?v={str(int(round(t * 1000))), str(int(round(t * 1000)) + 90)}"
) )
json_data = json.loads(res.text[res.text.find("{"): res.text.rfind("}") + 1]) json_data = json.loads(
res.text[res.text.find("{"): res.text.rfind("}") + 1])
date_list = [item["date"] for item in json_data["list"]] date_list = [item["date"] for item in json_data["list"]]
value_list = [item["datas"]["美国ISM非制造业PMI报告"] for item in json_data["list"]] value_list = [item["datas"]["美国ISM非制造业PMI报告"]
for item in json_data["list"]]
value_df = pd.DataFrame(value_list) value_df = pd.DataFrame(value_list)
value_df.columns = json_data["kinds"] value_df.columns = json_data["kinds"]
value_df.index = pd.to_datetime(date_list) value_df.index = pd.to_datetime(date_list)
@ -232,9 +243,9 @@ def pmi():
temp_df.name = "usa_ism_non_pmi" temp_df.name = "usa_ism_non_pmi"
temp_df = temp_df.astype("float") temp_df = temp_df.astype("float")
PMI_NI = pd.DataFrame() PMI_NI = pd.DataFrame()
PMI_NI["Date"] = pd.to_datetime(temp_df.index, format = "%Y-%m-%d") PMI_NI["Date"] = pd.to_datetime(temp_df.index, format="%Y-%m-%d")
PMI_NI["ISM_PMI_NI"] = np.array(temp_df).astype(float) PMI_NI["ISM_PMI_NI"] = np.array(temp_df).astype(float)
PMI = pd.merge_asof(PMI_I, PMI_NI, on = "Date") PMI = pd.merge_asof(PMI_I, PMI_NI, on="Date")
return PMI return PMI
@ -410,8 +421,9 @@ def cpi(startdate="1960-01-01", enddate="2021-01-01"):
direction="backward") direction="backward")
df = pd.merge_asof(df, df_annually, on="DATE", direction="backward") df = pd.merge_asof(df, df_annually, on="DATE", direction="backward")
df.columns = ["Date", "CPI_Monthly", "CPI_Quarterly", "CPI_Annually"] df.columns = ["Date", "CPI_Monthly", "CPI_Quarterly", "CPI_Annually"]
df["Date"] = pd.to_datetime(df["Date"], format = "%Y-%m-%d") df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
df[["CPI_Monthly", "CPI_Quarterly", "CPI_Annually"]] = df[["CPI_Monthly", "CPI_Quarterly", "CPI_Annually"]].astype(float) df[["CPI_Monthly", "CPI_Quarterly", "CPI_Annually"]] = df[[
"CPI_Monthly", "CPI_Quarterly", "CPI_Annually"]].astype(float)
return df return df
@ -676,7 +688,7 @@ def cci(startdate="1955-01-01", enddate="2021-01-01"):
data_text = r.content data_text = r.content
df = pd.read_csv(io.StringIO(data_text.decode('utf-8'))) df = pd.read_csv(io.StringIO(data_text.decode('utf-8')))
df.columns = ["Date", "CCI_Monthly"] df.columns = ["Date", "CCI_Monthly"]
df["Date"] = pd.to_datetime(df["Date"], format = "%Y-%m-%d") df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
return df return df
@ -698,7 +710,7 @@ def bci(startdate="1955-01-01", enddate="2021-01-01"):
data_text = r.content data_text = r.content
df = pd.read_csv(io.StringIO(data_text.decode('utf-8'))) df = pd.read_csv(io.StringIO(data_text.decode('utf-8')))
df.columns = ["Date", "BCI_Annually"] df.columns = ["Date", "BCI_Annually"]
df["Date"] = pd.to_datetime(df["Date"], format = "%Y-%m-%d") df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
return df return df
@ -969,7 +981,8 @@ def inflation_nowcasting():
for i in range(0, len(tmp_df)): for i in range(0, len(tmp_df)):
date = tmp_df['chart'][i]['subcaption'][:4] + "/" + \ date = tmp_df['chart'][i]['subcaption'][:4] + "/" + \
pd.DataFrame(tmp_df["dataset"][i][0]['data'])['tooltext'].str.extract(r"\b(0?[1-9]|1[0-2])/(0?[1-9]|[12][0-9]|3[01])\b")[0] + "/" + \ pd.DataFrame(tmp_df["dataset"][i][0]['data'])['tooltext'].str.extract(r"\b(0?[1-9]|1[0-2])/(0?[1-9]|[12][0-9]|3[01])\b")[0] + "/" + \
pd.DataFrame(tmp_df["dataset"][i][0]['data'])['tooltext'].str.extract(r"\b(0?[1-9]|1[0-2])/(0?[1-9]|[12][0-9]|3[01])\b")[1] pd.DataFrame(tmp_df["dataset"][i][0]['data'])['tooltext'].str.extract(
r"\b(0?[1-9]|1[0-2])/(0?[1-9]|[12][0-9]|3[01])\b")[1]
CPI_I = pd.DataFrame( CPI_I = pd.DataFrame(
(pd.DataFrame(tmp_df["dataset"][i])['data'])[0])["value"] (pd.DataFrame(tmp_df["dataset"][i])['data'])[0])["value"]
C_CPI_I = pd.DataFrame( C_CPI_I = pd.DataFrame(
@ -1006,7 +1019,7 @@ def bbki():
tmp_url = url["chicagofed"] + "bbki/bbki-monthly-data-series-csv.csv" tmp_url = url["chicagofed"] + "bbki/bbki-monthly-data-series-csv.csv"
df = pd.read_csv(tmp_url) df = pd.read_csv(tmp_url)
df["Date"] = date_transform(df["Date"], "%m/%d/%Y", "%Y-%m-%d") df["Date"] = date_transform(df["Date"], "%m/%d/%Y", "%Y-%m-%d")
df["Date"] = pd.to_datetime(df["Date"], format = "%Y-%m-%d") df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
return df return df
@ -1014,7 +1027,7 @@ def cfnai():
tmp_url = url["chicagofed"] + "cfnai/cfnai-data-series-csv.csv" tmp_url = url["chicagofed"] + "cfnai/cfnai-data-series-csv.csv"
df = pd.read_csv(tmp_url) df = pd.read_csv(tmp_url)
df["Date"] = date_transform(df["Date"], "%Y/%m", "%Y-%m-%d") df["Date"] = date_transform(df["Date"], "%Y/%m", "%Y-%m-%d")
df["Date"] = pd.to_datetime(df["Date"], format = "%Y-%m-%d") df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
return df return df
@ -1022,7 +1035,7 @@ def cfsbc():
tmp_url = url["chicagofed"] + "cfsbc/cfsbc-data-xlsx.xlsx" tmp_url = url["chicagofed"] + "cfsbc/cfsbc-data-xlsx.xlsx"
df = pd.read_excel(tmp_url) df = pd.read_excel(tmp_url)
df["Date"] = date_transform(df["Date"], "%Y-%m", "%Y-%m-%d") df["Date"] = date_transform(df["Date"], "%Y-%m", "%Y-%m-%d")
df["Date"] = pd.to_datetime(df["Date"], format = "%Y-%m-%d") df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
return df return df
@ -1031,10 +1044,11 @@ def nfci():
df = pd.read_csv(tmp_url) df = pd.read_csv(tmp_url)
df.columns = ["Date", "NFCI", "Risk", "Credit", "Leverage"] df.columns = ["Date", "NFCI", "Risk", "Credit", "Leverage"]
df["Date"] = date_transform(df["Date"], "%Y/%m/%d", "%Y-%m-%d") df["Date"] = date_transform(df["Date"], "%Y/%m/%d", "%Y-%m-%d")
df["Date"] = pd.to_datetime(df["Date"], format = "%Y-%m-%d") df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
return df return df
def Leading_Indicators_OECD(startdate = "1950-01", enddate = "2021-05"):
def Leading_Indicators_OECD(startdate="1950-01", enddate="2021-05"):
# CLI # CLI
tmp_url = url["OECD"] + "USA.CLI.AMPLITUD.LTRENDIDX.M/OECD" tmp_url = url["OECD"] + "USA.CLI.AMPLITUD.LTRENDIDX.M/OECD"
ua = UserAgent(verify_ssl=False) ua = UserAgent(verify_ssl=False)
@ -1047,13 +1061,14 @@ def Leading_Indicators_OECD(startdate = "1950-01", enddate = "2021-05"):
"endPeriod": "{}".format(enddate) "endPeriod": "{}".format(enddate)
} }
request_header = {"User-Agent": ua.random} request_header = {"User-Agent": ua.random}
r = requests.get(tmp_url, params = request_params, headers=request_header) r = requests.get(tmp_url, params=request_params, headers=request_header)
data_text = r.content data_text = r.content
df_cli = pd.read_csv(io.StringIO(data_text.decode('utf-8')))[["TIME", "Value"]] df_cli = pd.read_csv(io.StringIO(
data_text.decode('utf-8')))[["TIME", "Value"]]
df_cli.columns = ["Date", "US_OECD_CLI"] df_cli.columns = ["Date", "US_OECD_CLI"]
df_cli["Date"] = pd.to_datetime(df_cli["Date"], format = "%Y-%m") df_cli["Date"] = pd.to_datetime(df_cli["Date"], format="%Y-%m")
df_cli["US_OECD_CLI"] = df_cli["US_OECD_CLI"].astype(float) df_cli["US_OECD_CLI"] = df_cli["US_OECD_CLI"].astype(float)
#BCI # BCI
tmp_url = url["OECD"] + "USA.BCI.AMPLITUD.LTRENDIDX.M/OECD" tmp_url = url["OECD"] + "USA.BCI.AMPLITUD.LTRENDIDX.M/OECD"
ua = UserAgent(verify_ssl=False) ua = UserAgent(verify_ssl=False)
request_params = { request_params = {
@ -1065,11 +1080,12 @@ def Leading_Indicators_OECD(startdate = "1950-01", enddate = "2021-05"):
"endPeriod": "{}".format(enddate) "endPeriod": "{}".format(enddate)
} }
request_header = {"User-Agent": ua.random} request_header = {"User-Agent": ua.random}
r = requests.get(tmp_url, params = request_params, headers=request_header) r = requests.get(tmp_url, params=request_params, headers=request_header)
data_text = r.content data_text = r.content
df_bci = pd.read_csv(io.StringIO(data_text.decode('utf-8')))[["TIME", "Value"]] df_bci = pd.read_csv(io.StringIO(
data_text.decode('utf-8')))[["TIME", "Value"]]
df_bci.columns = ["Date", "US_OECD_BCI"] df_bci.columns = ["Date", "US_OECD_BCI"]
df_bci["Date"] = pd.to_datetime(df_bci["Date"], format = "%Y-%m") df_bci["Date"] = pd.to_datetime(df_bci["Date"], format="%Y-%m")
df_bci["US_OECD_BCI"] = df_bci["US_OECD_BCI"].astype(float) df_bci["US_OECD_BCI"] = df_bci["US_OECD_BCI"].astype(float)
# CCI # CCI
tmp_url = url["OECD"] + "USA.CCI.AMPLITUD.LTRENDIDX.M/OECD" tmp_url = url["OECD"] + "USA.CCI.AMPLITUD.LTRENDIDX.M/OECD"
@ -1083,25 +1099,32 @@ def Leading_Indicators_OECD(startdate = "1950-01", enddate = "2021-05"):
"endPeriod": "{}".format(enddate) "endPeriod": "{}".format(enddate)
} }
request_header = {"User-Agent": ua.random} request_header = {"User-Agent": ua.random}
r = requests.get(tmp_url, params = request_params, headers=request_header) r = requests.get(tmp_url, params=request_params, headers=request_header)
data_text = r.content data_text = r.content
df_cci = pd.read_csv(io.StringIO(data_text.decode('utf-8')))[["TIME", "Value"]] df_cci = pd.read_csv(io.StringIO(
data_text.decode('utf-8')))[["TIME", "Value"]]
df_cci.columns = ["Date", "US_OECD_CCI"] df_cci.columns = ["Date", "US_OECD_CCI"]
df_cci["Date"] = pd.to_datetime(df_cci["Date"], format = "%Y-%m") df_cci["Date"] = pd.to_datetime(df_cci["Date"], format="%Y-%m")
df_cci["US_OECD_CCI"] = df_cci["US_OECD_CCI"].astype(float) df_cci["US_OECD_CCI"] = df_cci["US_OECD_CCI"].astype(float)
df = pd.merge_asof(df_cli, df_bci, on = "Date") df = pd.merge_asof(df_cli, df_bci, on="Date")
df = pd.merge_asof(df, df_cci, on = "Date") df = pd.merge_asof(df, df_cci, on="Date")
return df return df
def US_EPU_Monthly(): def US_EPU_Monthly():
df = pd.read_excel("https://www.policyuncertainty.com/media/US_Policy_Uncertainty_Data.xlsx")[:-1] df = pd.read_excel(
df['Date']=pd.to_datetime(df['Year'].apply(str).str.cat(df['Month'].apply(int).apply(str),sep='-'), format='%Y-%m') "https://www.policyuncertainty.com/media/US_Policy_Uncertainty_Data.xlsx")[:-1]
df['Date'] = pd.to_datetime(df['Year'].apply(str).str.cat(
df['Month'].apply(int).apply(str), sep='-'), format='%Y-%m')
df = df[["Date", "Three_Component_Index", "News_Based_Policy_Uncert_Index"]] df = df[["Date", "Three_Component_Index", "News_Based_Policy_Uncert_Index"]]
return df return df
def US_EPU_Daily(): def US_EPU_Daily():
df = pd.read_csv("https://www.policyuncertainty.com/media/All_Daily_Policy_Data.csv")[:-1] df = pd.read_csv(
df['Date']=pd.to_datetime(df['year'].apply(str).str.cat(df['month'].apply(str),sep='-').apply(str).str.cat(df['day'].apply(str),sep='-'), format='%Y-%m-%d') "https://www.policyuncertainty.com/media/All_Daily_Policy_Data.csv")[:-1]
df['Date'] = pd.to_datetime(df['year'].apply(str).str.cat(df['month'].apply(
str), sep='-').apply(str).str.cat(df['day'].apply(str), sep='-'), format='%Y-%m-%d')
df = df.drop(["year", "month", "day"], axis=1) df = df.drop(["year", "month", "day"], axis=1)
return df return df

View File

@ -9,7 +9,7 @@ from urllib.parse import quote, urlencode
from fake_useragent import UserAgent from fake_useragent import UserAgent
url = { url = {
"dukascopy": "http://data.deluxelau.com/forex/api/v1.0/getdata?" "dukascopy": "https://data.deluxelau.com/api/v1.0/finance/getdata?"
} }
#?instrument=usdcnh&startdate=2014-01-01&enddate=2014-12-31&timeframe=d1&pricetype=ask&utc=0&volume=false&flat=false #?instrument=usdcnh&startdate=2014-01-01&enddate=2014-12-31&timeframe=d1&pricetype=ask&utc=0&volume=false&flat=false
@ -34,12 +34,12 @@ def dukascopy(
"utc": "{}".format(utc), "utc": "{}".format(utc),
"pricetype": "{}".format(pricetype), "pricetype": "{}".format(pricetype),
"volume": "{}".format(str(volume).lower()), "volume": "{}".format(str(volume).lower()),
"flat": "{}".format(str(flat).lower()) "flat": "{}".format(str(flat).lower()),
"token": "token=6dc8797f-aa4b-4b8c-b137-cfe9a9ace5a1"
} }
r = requests.get(tmp_url, params=request_params, headers=request_header) r = requests.get(tmp_url, params=request_params, headers=request_header)
data_text = r.text output_file = r.json()
output_file = demjson.decode(data_text)
return pd.json_normalize(output_file) return pd.json_normalize(output_file)
# example: # example:
@ -47,7 +47,7 @@ def dukascopy(
df = dukascopy(instrument = "usdcnh", df = dukascopy(instrument = "usdcnh",
startdate = "2014-01-01", startdate = "2014-01-01",
enddate = "2020-01-01", enddate = "2020-01-01",
timeframe = "h1", timeframe = "m1",
pricetype = "bid", pricetype = "bid",
utc = 0, utc = 0,
volume = False, volume = False,

View File

@ -1,6 +1,6 @@
certifi==2020.12.5 certifi==2020.12.5
chardet==4.0.0 chardet==4.0.0
demjson==2.2.4 demjson
fake-useragent==0.1.11 fake-useragent==0.1.11
idna==2.10 idna==2.10
numpy==1.20.3 numpy==1.20.3
@ -11,3 +11,4 @@ requests==2.25.1
six==1.16.0 six==1.16.0
urllib3==1.26.5 urllib3==1.26.5
wincertstore==0.2 wincertstore==0.2
beautifulsoup4==4-4.10.1