CEDApy/CEDA/market/marketwatch.py

261 lines
9.0 KiB
Python

import io
import requests
import pandas as pd
from datetime import datetime
from urllib.parse import quote, urlencode
from fake_useragent import UserAgent
url = {
"moneywatch": "https://www.marketwatch.com/investing/"
}
def forex(instrument="eurusd", startdate="2019-01-01", enddate="2021-01-01"):
startdate = datetime.strptime(startdate, "%Y-%m-%d").strftime("%m/%d/%y")
enddate = datetime.strptime(enddate, "%Y-%m-%d").strftime("%m/%d/%y")
df = pd.DataFrame()
def _FX(instrument="eurusd", startdate="01/01/2020", enddate="01/01/2021"):
"""
https://www.marketwatch.com/investing/
"""
tmp_url = url["moneywatch"] + \
"currency/{}/downloaddatapartial".format(instrument)
ua = UserAgent(verify_ssl=False)
request_header = {"User-Agent": ua.random}
request_params = urlencode({
"startdate": r"{}".format(startdate),
"enddate": r"{}".format(enddate),
"daterange": "d30",
"frequency": "p1d",
"csvdownload": "true",
"downloadpartial": "false",
"newdates": "false"}, quote_via=quote)
r = requests.get(tmp_url, params=request_params.replace(
"%2F", "/").replace("%20", " ").replace("%3A", ":"), headers=request_header)
data_text = r.content
df = pd.read_csv(io.StringIO(data_text.decode('utf-8')))
Date = []
for i in range(0, len(df)):
Date.append(datetime.strptime(df["Date"][i], "%m/%d/%Y"))
df["Date"] = Date
return df
for i in range(int(startdate[6:10]), int(enddate[6:10])):
if i == int(startdate[6:10]):
tmp_startdate = startdate
else:
tmp_startdate = "01/01/" + str(i) + " 00:00:00"
if (i + 1) == int(enddate[6:10]):
tmp_enddate = enddate
else:
tmp_enddate = "01/01/" + str(i + 1) + " 00:00:00"
tmp_df = _FX(
instrument=instrument,
startdate=tmp_startdate,
enddate=tmp_enddate)
if i == int(startdate[6:10]):
df = tmp_df
else:
df = pd.concat([tmp_df, df], axis=0)
df = df.reset_index(drop=True)
return df
def index(instrument="vix", startdate="2019-01-01", enddate="2021-01-01", countrycode=None):
startdate = datetime.strptime(startdate, "%Y-%m-%d").strftime("%m/%d/%y")
enddate = datetime.strptime(enddate, "%Y-%m-%d").strftime("%m/%d/%y")
df = pd.DataFrame()
def _index(instrument="vix", startdate="01/01/2020", enddate="01/01/2021", countrycode=None):
"""
https://www.marketwatch.com/investing/
"""
tmp_url = url["moneywatch"] + \
"index/{}/downloaddatapartial".format(instrument)
ua = UserAgent(verify_ssl=False)
request_header = {"User-Agent": ua.random}
if countrycode == None:
request_params = urlencode({
"startdate": r"{}".format(startdate),
"enddate": r"{}".format(enddate),
"daterange": "d30",
"frequency": "p1d",
"csvdownload": "true",
"downloadpartial": "false",
"newdates": "false"}, quote_via=quote)
elif countrycode != None:
request_params = urlencode({
"startdate": r"{}".format(startdate),
"enddate": r"{}".format(enddate),
"daterange": "d30",
"frequency": "p1d",
"csvdownload": "true",
"downloadpartial": "false",
"newdates": "false",
"countrycode": "{}".format(countrycode)}, quote_via=quote)
r = requests.get(tmp_url, params=request_params.replace("%2F", "/").replace("%20", " ").replace("%3A", ":"), \
headers=request_header)
data_text = r.content
df = pd.read_csv(io.StringIO(data_text.decode('utf-8')))
Date = []
for i in range(0, len(df)):
Date.append(datetime.strptime(df["Date"][i], "%m/%d/%Y"))
df["Date"] = Date
return df
for i in range(int(startdate[6:10]), int(enddate[6:10])):
if i == int(startdate[6:10]):
tmp_startdate = startdate
else:
tmp_startdate = "01/01/" + str(i) + " 00:00:00"
if (i + 1) == int(enddate[6:10]):
tmp_enddate = enddate
else:
tmp_enddate = "01/01/" + str(i + 1) + " 00:00:00"
tmp_df = _index(
instrument=instrument,
startdate=tmp_startdate,
enddate=tmp_enddate, countrycode=countrycode)
if i == int(startdate[6:10]):
df = tmp_df
else:
df = pd.concat([tmp_df, df], axis=0)
df = df.reset_index(drop=True)
return df
def crypto(instrument="btcusd", startdate="2019-01-01", enddate="2021-01-01"):
startdate = datetime.strptime(startdate, "%Y-%m-%d").strftime("%m/%d/%y")
enddate = datetime.strptime(enddate, "%Y-%m-%d").strftime("%m/%d/%y")
df = pd.DataFrame()
def _crypto(
instrument="btcusd",
startdate="01/01/2020",
enddate="01/01/2021"):
"""
https://www.marketwatch.com/investing/
"""
tmp_url = url["moneywatch"] + \
"cryptocurrency/{}/downloaddatapartial".format(instrument)
ua = UserAgent(verify_ssl=False)
request_header = {"User-Agent": ua.random}
request_params = urlencode({
"startdate": r"{}".format(startdate),
"enddate": r"{}".format(enddate),
"daterange": "d30",
"frequency": "p1d",
"csvdownload": "true",
"downloadpartial": "false",
"newdates": "false"}, quote_via=quote)
r = requests.get(tmp_url, params=request_params.replace(
"%2F", "/").replace("%20", " ").replace("%3A", ":"), headers=request_header)
data_text = r.content
df = pd.read_csv(io.StringIO(data_text.decode('utf-8')))
Date = []
for i in range(0, len(df)):
Date.append(datetime.strptime(df["Date"][i], "%m/%d/%Y"))
df["Date"] = Date
return df
for i in range(int(startdate[6:10]), int(enddate[6:10])):
if i == int(startdate[6:10]):
tmp_startdate = startdate
else:
tmp_startdate = "01/01/" + str(i) + " 00:00:00"
if (i + 1) == int(enddate[6:10]):
tmp_enddate = enddate
else:
tmp_enddate = "01/01/" + str(i + 1) + " 00:00:00"
tmp_df = _crypto(
instrument=instrument,
startdate=tmp_startdate,
enddate=tmp_enddate)
if i == int(startdate[6:10]):
df = tmp_df
else:
df = pd.concat([tmp_df, df], axis=0)
df = df.reset_index(drop=True)
return df
def stock(
countrycode="cn",
instrument="601988",
startdate="2019-01-01",
enddate="2021-01-01"):
startdate = datetime.strptime(startdate, "%Y-%m-%d").strftime("%m/%d/%y")
enddate = datetime.strptime(enddate, "%Y-%m-%d").strftime("%m/%d/%y")
df = pd.DataFrame()
def _stock(
countrycode="cn",
instrument="601988",
startdate="01/01/2020",
enddate="01/01/2021"):
"""
https://www.marketwatch.com/investing/
"""
tmp_url = url["moneywatch"] + \
"stock/{}/downloaddatapartial".format(instrument)
ua = UserAgent(verify_ssl=False)
request_header = {"User-Agent": ua.random}
request_params = urlencode({
"startdate": r"{}".format(startdate),
"enddate": r"{}".format(enddate),
"daterange": "d30",
"frequency": "p1d",
"csvdownload": "true",
"downloadpartial": "false",
"newdates": "false",
"countrycode": "{}".format(countrycode)}, quote_via=quote)
r = requests.get(tmp_url, params=request_params.replace(
"%2F", "/").replace("%20", " ").replace("%3A", ":"), headers=request_header)
data_text = r.content
df = pd.read_csv(io.StringIO(data_text.decode('utf-8')))
Date = []
for i in range(0, len(df)):
Date.append(datetime.strptime(df["Date"][i], "%m/%d/%Y"))
df["Date"] = Date
return df
for i in range(int(startdate[6:10]), int(enddate[6:10])):
if i == int(startdate[6:10]):
tmp_startdate = startdate
else:
tmp_startdate = "01/01/" + str(i) + " 00:00:00"
if (i + 1) == int(enddate[6:10]):
tmp_enddate = enddate
else:
tmp_enddate = "01/01/" + str(i + 1) + " 00:00:00"
if countrycode == "us":
countrycode = ""
tmp_df = _stock(
countrycode=countrycode,
instrument=instrument,
startdate=tmp_startdate,
enddate=tmp_enddate)
if i == int(startdate[6:10]):
df = tmp_df
else:
df = pd.concat([tmp_df, df], axis=0)
df = df.reset_index(drop=True)
return df
if __name__ == "__main__":
data = FX()