2021-06-03 14:09:23 +00:00
|
|
|
import re
|
|
|
|
import io
|
2021-05-27 12:33:56 +00:00
|
|
|
import requests
|
|
|
|
import demjson
|
|
|
|
import pandas as pd
|
2021-06-03 14:09:23 +00:00
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
from datetime import datetime
|
|
|
|
from urllib.parse import quote, urlencode
|
2021-05-27 12:33:56 +00:00
|
|
|
from fake_useragent import UserAgent
|
|
|
|
|
|
|
|
url = {
|
2021-06-03 14:09:23 +00:00
|
|
|
"moneywatch": "https://www.marketwatch.com/investing/"
|
2021-05-27 12:33:56 +00:00
|
|
|
}
|
|
|
|
|
2021-06-07 06:55:01 +00:00
|
|
|
def forex(instrument = "eurusd", startdate = "2019-01-01", enddate = "2021-01-01"):
|
2021-06-07 05:32:24 +00:00
|
|
|
startdate = datetime.strptime(startdate, "%Y-%m-%d").strftime("%m/%d/%y")
|
|
|
|
enddate = datetime.strptime(enddate, "%Y-%m-%d").strftime("%m/%d/%y")
|
|
|
|
df = pd.DataFrame()
|
|
|
|
|
|
|
|
def _FX(instrument = "eurusd", startdate = "01/01/2020", enddate = "01/01/2021"):
|
|
|
|
"""
|
|
|
|
https://www.marketwatch.com/investing/
|
|
|
|
"""
|
2021-06-07 04:53:37 +00:00
|
|
|
tmp_url = url["moneywatch"] + "currency/{}/downloaddatapartial".format(instrument)
|
|
|
|
ua = UserAgent(verify_ssl=False)
|
|
|
|
request_header = {"User-Agent": ua.random}
|
|
|
|
request_params = urlencode({
|
|
|
|
"startdate": r"{}".format(startdate),
|
|
|
|
"enddate": r"{}".format(enddate),
|
|
|
|
"daterange": "d30",
|
|
|
|
"frequency": "p1d",
|
|
|
|
"csvdownload": "true",
|
|
|
|
"downloadpartial": "false",
|
|
|
|
"newdates": "false"}, quote_via= quote)
|
|
|
|
r = requests.get(tmp_url, params=request_params.replace("%2F", "/").replace("%20", " ").replace("%3A", ":"), headers=request_header)
|
|
|
|
data_text = r.content
|
|
|
|
df = pd.read_csv(io.StringIO(data_text.decode('utf-8')))
|
|
|
|
Date = []
|
|
|
|
for i in range(0, len(df)):
|
|
|
|
Date.append(datetime.strptime(df["Date"][i], "%m/%d/%Y"))
|
|
|
|
|
|
|
|
df["Date"] = Date
|
|
|
|
return df
|
2021-06-07 05:32:24 +00:00
|
|
|
|
|
|
|
for i in range(int(startdate[6:10]), int(enddate[6:10])):
|
|
|
|
if i == int(startdate[6:10]):
|
|
|
|
tmp_startdate = startdate
|
|
|
|
else:
|
2021-06-07 04:53:37 +00:00
|
|
|
tmp_startdate = "01/01/" + str(i) + " 00:00:00"
|
2021-06-07 05:32:24 +00:00
|
|
|
if (i+1) == int(enddate[6:10]):
|
|
|
|
tmp_enddate = enddate
|
|
|
|
else:
|
|
|
|
tmp_enddate = "01/01/" + str(i+1) + " 00:00:00"
|
2021-06-07 04:53:37 +00:00
|
|
|
|
2021-06-07 05:32:24 +00:00
|
|
|
tmp_df = _FX(instrument=instrument, startdate = tmp_startdate, enddate = tmp_enddate)
|
|
|
|
if i == int(startdate[6:10]):
|
|
|
|
df = tmp_df
|
|
|
|
else:
|
|
|
|
df = pd.concat([tmp_df, df], axis=0)
|
|
|
|
|
|
|
|
df = df.reset_index(drop = True)
|
|
|
|
return df
|
|
|
|
|
2021-06-07 07:28:29 +00:00
|
|
|
def index(instrument = "vix", startdate = "2019-01-01", enddate = "2021-01-01"):
|
2021-06-07 05:51:51 +00:00
|
|
|
startdate = datetime.strptime(startdate, "%Y-%m-%d").strftime("%m/%d/%y")
|
|
|
|
enddate = datetime.strptime(enddate, "%Y-%m-%d").strftime("%m/%d/%y")
|
|
|
|
df = pd.DataFrame()
|
2021-06-07 04:53:37 +00:00
|
|
|
|
2021-06-07 07:28:29 +00:00
|
|
|
def _index(instrument = "vix", startdate = "01/01/2020", enddate = "01/01/2021"):
|
2021-06-07 05:51:51 +00:00
|
|
|
"""
|
|
|
|
https://www.marketwatch.com/investing/
|
|
|
|
"""
|
|
|
|
tmp_url = url["moneywatch"] + "index/{}/downloaddatapartial".format(instrument)
|
|
|
|
ua = UserAgent(verify_ssl=False)
|
|
|
|
request_header = {"User-Agent": ua.random}
|
|
|
|
request_params = urlencode({
|
|
|
|
"startdate": r"{}".format(startdate),
|
|
|
|
"enddate": r"{}".format(enddate),
|
|
|
|
"daterange": "d30",
|
|
|
|
"frequency": "p1d",
|
|
|
|
"csvdownload": "true",
|
|
|
|
"downloadpartial": "false",
|
|
|
|
"newdates": "false"}, quote_via= quote)
|
|
|
|
r = requests.get(tmp_url, params=request_params.replace("%2F", "/").replace("%20", " ").replace("%3A", ":"), headers=request_header)
|
|
|
|
data_text = r.content
|
|
|
|
df = pd.read_csv(io.StringIO(data_text.decode('utf-8')))
|
|
|
|
Date = []
|
|
|
|
for i in range(0, len(df)):
|
|
|
|
Date.append(datetime.strptime(df["Date"][i], "%m/%d/%Y"))
|
|
|
|
|
|
|
|
df["Date"] = Date
|
|
|
|
return df
|
|
|
|
|
|
|
|
for i in range(int(startdate[6:10]), int(enddate[6:10])):
|
|
|
|
if i == int(startdate[6:10]):
|
|
|
|
tmp_startdate = startdate
|
|
|
|
else:
|
|
|
|
tmp_startdate = "01/01/" + str(i) + " 00:00:00"
|
|
|
|
if (i+1) == int(enddate[6:10]):
|
|
|
|
tmp_enddate = enddate
|
|
|
|
else:
|
|
|
|
tmp_enddate = "01/01/" + str(i+1) + " 00:00:00"
|
|
|
|
|
2021-06-07 07:28:29 +00:00
|
|
|
tmp_df = _index(instrument=instrument, startdate = tmp_startdate, enddate = tmp_enddate)
|
|
|
|
if i == int(startdate[6:10]):
|
|
|
|
df = tmp_df
|
|
|
|
else:
|
|
|
|
df = pd.concat([tmp_df, df], axis=0)
|
|
|
|
|
|
|
|
df = df.reset_index(drop = True)
|
|
|
|
return df
|
|
|
|
|
|
|
|
def crypto(instrument = "btcusd", startdate = "2019-01-01", enddate = "2021-01-01"):
|
|
|
|
startdate = datetime.strptime(startdate, "%Y-%m-%d").strftime("%m/%d/%y")
|
|
|
|
enddate = datetime.strptime(enddate, "%Y-%m-%d").strftime("%m/%d/%y")
|
|
|
|
df = pd.DataFrame()
|
|
|
|
|
|
|
|
def _crypto(instrument = "btcusd", startdate = "01/01/2020", enddate = "01/01/2021"):
|
|
|
|
"""
|
|
|
|
https://www.marketwatch.com/investing/
|
|
|
|
"""
|
|
|
|
tmp_url = url["moneywatch"] + "cryptocurrency/{}/downloaddatapartial".format(instrument)
|
|
|
|
ua = UserAgent(verify_ssl=False)
|
|
|
|
request_header = {"User-Agent": ua.random}
|
|
|
|
request_params = urlencode({
|
|
|
|
"startdate": r"{}".format(startdate),
|
|
|
|
"enddate": r"{}".format(enddate),
|
|
|
|
"daterange": "d30",
|
|
|
|
"frequency": "p1d",
|
|
|
|
"csvdownload": "true",
|
|
|
|
"downloadpartial": "false",
|
|
|
|
"newdates": "false"}, quote_via= quote)
|
|
|
|
r = requests.get(tmp_url, params=request_params.replace("%2F", "/").replace("%20", " ").replace("%3A", ":"), headers=request_header)
|
|
|
|
data_text = r.content
|
|
|
|
df = pd.read_csv(io.StringIO(data_text.decode('utf-8')))
|
|
|
|
Date = []
|
|
|
|
for i in range(0, len(df)):
|
|
|
|
Date.append(datetime.strptime(df["Date"][i], "%m/%d/%Y"))
|
|
|
|
|
|
|
|
df["Date"] = Date
|
|
|
|
return df
|
|
|
|
|
|
|
|
for i in range(int(startdate[6:10]), int(enddate[6:10])):
|
|
|
|
if i == int(startdate[6:10]):
|
|
|
|
tmp_startdate = startdate
|
|
|
|
else:
|
|
|
|
tmp_startdate = "01/01/" + str(i) + " 00:00:00"
|
|
|
|
if (i+1) == int(enddate[6:10]):
|
|
|
|
tmp_enddate = enddate
|
|
|
|
else:
|
|
|
|
tmp_enddate = "01/01/" + str(i+1) + " 00:00:00"
|
|
|
|
|
|
|
|
tmp_df = _crypto(instrument=instrument, startdate = tmp_startdate, enddate = tmp_enddate)
|
2021-06-07 05:51:51 +00:00
|
|
|
if i == int(startdate[6:10]):
|
|
|
|
df = tmp_df
|
|
|
|
else:
|
|
|
|
df = pd.concat([tmp_df, df], axis=0)
|
|
|
|
|
|
|
|
df = df.reset_index(drop = True)
|
|
|
|
return df
|
2021-06-03 14:09:23 +00:00
|
|
|
|
2021-06-07 07:28:29 +00:00
|
|
|
def stock(countrycode = "cn", instrument = "601988", startdate = "2019-01-01", enddate = "2021-01-01"):
|
|
|
|
startdate = datetime.strptime(startdate, "%Y-%m-%d").strftime("%m/%d/%y")
|
|
|
|
enddate = datetime.strptime(enddate, "%Y-%m-%d").strftime("%m/%d/%y")
|
|
|
|
df = pd.DataFrame()
|
2021-05-29 08:43:15 +00:00
|
|
|
|
2021-06-07 07:28:29 +00:00
|
|
|
def _stock(countrycode = "cn", instrument = "601988", startdate = "01/01/2020", enddate = "01/01/2021"):
|
|
|
|
"""
|
|
|
|
https://www.marketwatch.com/investing/
|
|
|
|
"""
|
|
|
|
tmp_url = url["moneywatch"] + "stock/{}/downloaddatapartial".format(instrument)
|
|
|
|
ua = UserAgent(verify_ssl=False)
|
|
|
|
request_header = {"User-Agent": ua.random}
|
|
|
|
request_params = urlencode({
|
|
|
|
"startdate": r"{}".format(startdate),
|
|
|
|
"enddate": r"{}".format(enddate),
|
|
|
|
"daterange": "d30",
|
|
|
|
"frequency": "p1d",
|
|
|
|
"csvdownload": "true",
|
|
|
|
"downloadpartial": "false",
|
|
|
|
"newdates": "false",
|
|
|
|
"countrycode": "{}".format(countrycode)}, quote_via= quote)
|
|
|
|
r = requests.get(tmp_url, params=request_params.replace("%2F", "/").replace("%20", " ").replace("%3A", ":"), headers=request_header)
|
|
|
|
data_text = r.content
|
|
|
|
df = pd.read_csv(io.StringIO(data_text.decode('utf-8')))
|
|
|
|
Date = []
|
|
|
|
for i in range(0, len(df)):
|
|
|
|
Date.append(datetime.strptime(df["Date"][i], "%m/%d/%Y"))
|
|
|
|
|
|
|
|
df["Date"] = Date
|
|
|
|
return df
|
|
|
|
|
|
|
|
for i in range(int(startdate[6:10]), int(enddate[6:10])):
|
|
|
|
if i == int(startdate[6:10]):
|
|
|
|
tmp_startdate = startdate
|
|
|
|
else:
|
|
|
|
tmp_startdate = "01/01/" + str(i) + " 00:00:00"
|
|
|
|
if (i+1) == int(enddate[6:10]):
|
|
|
|
tmp_enddate = enddate
|
|
|
|
else:
|
|
|
|
tmp_enddate = "01/01/" + str(i+1) + " 00:00:00"
|
|
|
|
|
|
|
|
if countrycode == "us":
|
|
|
|
countrycode = ""
|
|
|
|
tmp_df = _stock(countrycode = countrycode, instrument=instrument, startdate = tmp_startdate, enddate = tmp_enddate)
|
|
|
|
if i == int(startdate[6:10]):
|
|
|
|
df = tmp_df
|
|
|
|
else:
|
|
|
|
df = pd.concat([tmp_df, df], axis=0)
|
|
|
|
|
|
|
|
df = df.reset_index(drop = True)
|
|
|
|
return df
|
2021-06-07 04:53:37 +00:00
|
|
|
|
2021-06-07 07:28:29 +00:00
|
|
|
#https://www.marketwatch.com/investing/stock/googl/downloaddatapartial?startdate=05/05/2021 00:00:00&enddate=06/04/2021 23:59:59&daterange=d30&frequency=p1d&csvdownload=true&downloadpartial=false&newdates=false
|
2021-06-07 04:53:37 +00:00
|
|
|
|
2021-06-07 05:51:51 +00:00
|
|
|
if __name__ == "__main__":
|
2021-06-07 07:28:29 +00:00
|
|
|
data = FX()
|
|
|
|
|