This commit is contained in:
TerenceLiu98 2022-01-13 00:42:04 +08:00
parent 14f21f6a8e
commit c82e19e011
6 changed files with 186 additions and 54 deletions

3
.gitignore vendored
View File

@ -5,6 +5,9 @@
### Linux ###
*~
### HOW TO MAKE ###
HOWTO.md
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*

105
CEDA/macroecon/FredData.py Normal file
View File

@ -0,0 +1,105 @@
import io
import os
import ssl
import time
import json
import tqdm
import requests
import numpy as np
import pandas as pd
from datetime import datetime
from bs4 import BeautifulSoup
import dateutil.parser as dparser
from fake_useragent import UserAgent
ssl._create_default_https_context = ssl._create_unverified_context
# Main Economic Indicators: https://alfred.stlouisfed.org/release?rid=205
url = {
"fred_econ": "https://fred.stlouisfed.org/graph/fredgraph.csv?",
"fred_series": "https://fred.stlouisfed.org/series/",
"philfed": "https://www.philadelphiafed.org/surveys-and-data/real-time-data-research/",
"chicagofed": "https://www.chicagofed.org/~/media/publications/",
"OECD": "https://stats.oecd.org/sdmx-json/data/DP_LIVE/"
}
def get_tag(id: str) -> list:
tmp_url = url["fred_series"] + id
r = requests.get(tmp_url)
tags = []
tags_series = BeautifulSoup(r.text, "html.parser").find_all(
"div", {"class": "series-tag-cloud"})
for i in tqdm.tqdm(range(0, len(tags_series))):
subtable = tags_series[i].find_all("a")
for j in tqdm.tqdm(range(0, len(subtable)), leave=False):
tags.append((" ".join(subtable[j].text.split())))
return tags
def get_metadata(id: str = None) -> dict:
tmp_url = url["fred_series"] + id
r = requests.get(tmp_url)
metadata = {
"name": (" ".join(BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "page-title"})[0].span.text.split())),
"id": id,
"update_time": datetime.strftime(dparser.parse(BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "pull-left meta-col"})[0].find_all('span')[3].text, fuzzy=True), format="%Y-%m-%d"),
"units": BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "pull-left meta-col"})[1].find_all('span')[0].text.split(" ")[0],
"frequency": BeautifulSoup(r.text, "html.parser").find_all('div', {"class": "pull-left meta-col"})[2].find_all('span')[0].text.split(" ")[1].split(" ")[1],
"tags": get_tag(id)
}
return metadata
def date_transform(df, format_origin, format_after):
return_list = []
for i in range(0, len(df)):
return_list.append(datetime.strptime(
df[i], format_origin).strftime(format_after))
return return_list
class FredData(object):
def __init__(self, country: str = "usa"):
self.country = country
__annotations__ = {"name": "Main Economic Indicators",
"url": "https://fred.stlouisfed.org/tags/series?t=mei"}
def get_id(self, url: str) -> list:
id_list = []
r = requests.get(url)
table = BeautifulSoup(r.text, "html.parser").find_all("table")
for i in range(0, len(table)):
subtable = table[i].find_all("a")
for j in range(0, len(subtable)):
id_list.append(subtable[j].get("href").split("/")[-1])
return id_list
def extract_id(self):
id_list = []
for i in tqdm.tqdm(range(1, 100)):
tmp_url = "https://fred.stlouisfed.org/tags/series?ob=pv&od=desc&t=mei%3B{}&pageID={}".format(
self.country, str(i))
id_list.append(self.get_id(tmp_url))
if i > 20:
r = requests.get(tmp_url)
if "No series" in r.text:
break
else:
continue
id_list = [item for sublist in id_list for item in sublist]
id_list = list(set(id_list))
return id_list
if __name__ == "__main__":
usa = FredData(country="usa")
usa_list = usa.extract_id()
china = FredData(country="china")
china_list = china.extract_id()
japan = FredData(country="japan")
japan_list = japan.extract_id()
eu = FredData(country="eu")
eu_list = eu.extract_id()

View File

@ -1,14 +1,17 @@
import pandas as pd
import numpy as np
import requests
from fake_useragent import UserAgent
import io
import os
import ssl
import time
import json
import demjson
import requests
import numpy as np
import pandas as pd
from datetime import datetime
import ssl
from bs4 import BeautifulSoup
from multiprocessing import Pool
import dateutil.parser as dparser
from fake_useragent import UserAgent
ssl._create_default_https_context = ssl._create_unverified_context
# Main Economic Indicators: https://alfred.stlouisfed.org/release?rid=205
@ -19,13 +22,16 @@ url = {
"OECD": "https://stats.oecd.org/sdmx-json/data/DP_LIVE/"
}
def date_transform(df, format_origin, format_after):
return_list = []
for i in range(0, len(df)):
return_list.append(datetime.strptime(df[i], format_origin).strftime(format_after))
return_list.append(datetime.strptime(
df[i], format_origin).strftime(format_after))
return return_list
def gdp_quarterly(startdate="1947-01-01", enddate="2021-01-01"):
"""
Full Name: Gross Domestic Product
@ -111,6 +117,7 @@ def payems_monthly(startdate="1939-01-01", enddate="2021-01-01"):
df["Payems"] = df["Payems"].astype(float)
return df
def ppi():
tmp_url = url["fred_econ"] + "bgcolor=%23e1e9f0&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=968&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=PPIACO,PCUOMFGOMFG&scale=left,left&cosd=1913-01-01,1984-12-01&coed=2021-04-01,2021-04-01&line_color=%234572a7,%23aa4643&link_values=false,false&line_style=solid,solid&mark_type=none,none&mw=3,3&lw=2,2&ost=-99999,-99999&oet=99999,99999&mma=0,0&fml=a,a&fq=Monthly,Monthly&fam=avg,avg&fgst=lin,lin&fgsnd=2020-02-01,2020-02-01&line_index=1,2&transformation=lin,lin&vintage_date=2021-06-10,2021-06-10&revision_date=2021-06-10,2021-06-10&nd=1913-01-01,1984-12-01"
ua = UserAgent(verify_ssl=False)
@ -130,12 +137,14 @@ def ppi():
df[["PPI_C", "PPI_I"]] = df[["PPI_C", "PPI_I"]].astype(float)
return df
def pmi():
t = time.time()
res = requests.get(
f"https://cdn.jin10.com/dc/reports/dc_usa_ism_pmi_all.js?v={str(int(round(t * 1000))), str(int(round(t * 1000)) + 90)}"
)
json_data = json.loads(res.text[res.text.find("{"): res.text.rfind("}") + 1])
json_data = json.loads(
res.text[res.text.find("{"): res.text.rfind("}") + 1])
date_list = [item["date"] for item in json_data["list"]]
value_list = [item["datas"]["美国ISM制造业PMI报告"] for item in json_data["list"]]
value_df = pd.DataFrame(value_list)
@ -187,9 +196,11 @@ def pmi():
res = requests.get(
f"https://cdn.jin10.com/dc/reports/dc_usa_ism_non_pmi_all.js?v={str(int(round(t * 1000))), str(int(round(t * 1000)) + 90)}"
)
json_data = json.loads(res.text[res.text.find("{"): res.text.rfind("}") + 1])
json_data = json.loads(
res.text[res.text.find("{"): res.text.rfind("}") + 1])
date_list = [item["date"] for item in json_data["list"]]
value_list = [item["datas"]["美国ISM非制造业PMI报告"] for item in json_data["list"]]
value_list = [item["datas"]["美国ISM非制造业PMI报告"]
for item in json_data["list"]]
value_df = pd.DataFrame(value_list)
value_df.columns = json_data["kinds"]
value_df.index = pd.to_datetime(date_list)
@ -411,7 +422,8 @@ def cpi(startdate="1960-01-01", enddate="2021-01-01"):
df = pd.merge_asof(df, df_annually, on="DATE", direction="backward")
df.columns = ["Date", "CPI_Monthly", "CPI_Quarterly", "CPI_Annually"]
df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
df[["CPI_Monthly", "CPI_Quarterly", "CPI_Annually"]] = df[["CPI_Monthly", "CPI_Quarterly", "CPI_Annually"]].astype(float)
df[["CPI_Monthly", "CPI_Quarterly", "CPI_Annually"]] = df[[
"CPI_Monthly", "CPI_Quarterly", "CPI_Annually"]].astype(float)
return df
@ -969,7 +981,8 @@ def inflation_nowcasting():
for i in range(0, len(tmp_df)):
date = tmp_df['chart'][i]['subcaption'][:4] + "/" + \
pd.DataFrame(tmp_df["dataset"][i][0]['data'])['tooltext'].str.extract(r"\b(0?[1-9]|1[0-2])/(0?[1-9]|[12][0-9]|3[01])\b")[0] + "/" + \
pd.DataFrame(tmp_df["dataset"][i][0]['data'])['tooltext'].str.extract(r"\b(0?[1-9]|1[0-2])/(0?[1-9]|[12][0-9]|3[01])\b")[1]
pd.DataFrame(tmp_df["dataset"][i][0]['data'])['tooltext'].str.extract(
r"\b(0?[1-9]|1[0-2])/(0?[1-9]|[12][0-9]|3[01])\b")[1]
CPI_I = pd.DataFrame(
(pd.DataFrame(tmp_df["dataset"][i])['data'])[0])["value"]
C_CPI_I = pd.DataFrame(
@ -1034,6 +1047,7 @@ def nfci():
df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d")
return df
def Leading_Indicators_OECD(startdate="1950-01", enddate="2021-05"):
# CLI
tmp_url = url["OECD"] + "USA.CLI.AMPLITUD.LTRENDIDX.M/OECD"
@ -1049,7 +1063,8 @@ def Leading_Indicators_OECD(startdate = "1950-01", enddate = "2021-05"):
request_header = {"User-Agent": ua.random}
r = requests.get(tmp_url, params=request_params, headers=request_header)
data_text = r.content
df_cli = pd.read_csv(io.StringIO(data_text.decode('utf-8')))[["TIME", "Value"]]
df_cli = pd.read_csv(io.StringIO(
data_text.decode('utf-8')))[["TIME", "Value"]]
df_cli.columns = ["Date", "US_OECD_CLI"]
df_cli["Date"] = pd.to_datetime(df_cli["Date"], format="%Y-%m")
df_cli["US_OECD_CLI"] = df_cli["US_OECD_CLI"].astype(float)
@ -1067,7 +1082,8 @@ def Leading_Indicators_OECD(startdate = "1950-01", enddate = "2021-05"):
request_header = {"User-Agent": ua.random}
r = requests.get(tmp_url, params=request_params, headers=request_header)
data_text = r.content
df_bci = pd.read_csv(io.StringIO(data_text.decode('utf-8')))[["TIME", "Value"]]
df_bci = pd.read_csv(io.StringIO(
data_text.decode('utf-8')))[["TIME", "Value"]]
df_bci.columns = ["Date", "US_OECD_BCI"]
df_bci["Date"] = pd.to_datetime(df_bci["Date"], format="%Y-%m")
df_bci["US_OECD_BCI"] = df_bci["US_OECD_BCI"].astype(float)
@ -1085,7 +1101,8 @@ def Leading_Indicators_OECD(startdate = "1950-01", enddate = "2021-05"):
request_header = {"User-Agent": ua.random}
r = requests.get(tmp_url, params=request_params, headers=request_header)
data_text = r.content
df_cci = pd.read_csv(io.StringIO(data_text.decode('utf-8')))[["TIME", "Value"]]
df_cci = pd.read_csv(io.StringIO(
data_text.decode('utf-8')))[["TIME", "Value"]]
df_cci.columns = ["Date", "US_OECD_CCI"]
df_cci["Date"] = pd.to_datetime(df_cci["Date"], format="%Y-%m")
df_cci["US_OECD_CCI"] = df_cci["US_OECD_CCI"].astype(float)
@ -1094,14 +1111,20 @@ def Leading_Indicators_OECD(startdate = "1950-01", enddate = "2021-05"):
return df
def US_EPU_Monthly():
df = pd.read_excel("https://www.policyuncertainty.com/media/US_Policy_Uncertainty_Data.xlsx")[:-1]
df['Date']=pd.to_datetime(df['Year'].apply(str).str.cat(df['Month'].apply(int).apply(str),sep='-'), format='%Y-%m')
df = pd.read_excel(
"https://www.policyuncertainty.com/media/US_Policy_Uncertainty_Data.xlsx")[:-1]
df['Date'] = pd.to_datetime(df['Year'].apply(str).str.cat(
df['Month'].apply(int).apply(str), sep='-'), format='%Y-%m')
df = df[["Date", "Three_Component_Index", "News_Based_Policy_Uncert_Index"]]
return df
def US_EPU_Daily():
df = pd.read_csv("https://www.policyuncertainty.com/media/All_Daily_Policy_Data.csv")[:-1]
df['Date']=pd.to_datetime(df['year'].apply(str).str.cat(df['month'].apply(str),sep='-').apply(str).str.cat(df['day'].apply(str),sep='-'), format='%Y-%m-%d')
df = pd.read_csv(
"https://www.policyuncertainty.com/media/All_Daily_Policy_Data.csv")[:-1]
df['Date'] = pd.to_datetime(df['year'].apply(str).str.cat(df['month'].apply(
str), sep='-').apply(str).str.cat(df['day'].apply(str), sep='-'), format='%Y-%m-%d')
df = df.drop(["year", "month", "day"], axis=1)
return df

View File

@ -9,7 +9,7 @@ from urllib.parse import quote, urlencode
from fake_useragent import UserAgent
url = {
"dukascopy": "http://data.deluxelau.com/forex/api/v1.0/getdata?"
"dukascopy": "https://data.deluxelau.com/api/v1.0/finance/getdata?"
}
#?instrument=usdcnh&startdate=2014-01-01&enddate=2014-12-31&timeframe=d1&pricetype=ask&utc=0&volume=false&flat=false
@ -34,12 +34,12 @@ def dukascopy(
"utc": "{}".format(utc),
"pricetype": "{}".format(pricetype),
"volume": "{}".format(str(volume).lower()),
"flat": "{}".format(str(flat).lower())
"flat": "{}".format(str(flat).lower()),
"token": "token=6dc8797f-aa4b-4b8c-b137-cfe9a9ace5a1"
}
r = requests.get(tmp_url, params=request_params, headers=request_header)
data_text = r.text
output_file = demjson.decode(data_text)
output_file = r.json()
return pd.json_normalize(output_file)
# example:
@ -47,7 +47,7 @@ def dukascopy(
df = dukascopy(instrument = "usdcnh",
startdate = "2014-01-01",
enddate = "2020-01-01",
timeframe = "h1",
timeframe = "m1",
pricetype = "bid",
utc = 0,
volume = False,

View File

@ -1,6 +1,6 @@
certifi==2020.12.5
chardet==4.0.0
demjson==2.2.4
demjson
fake-useragent==0.1.11
idna==2.10
numpy==1.20.3
@ -11,3 +11,4 @@ requests==2.25.1
six==1.16.0
urllib3==1.26.5
wincertstore==0.2
beautifulsoup4==4-4.10.1