2022-01-27 16:09:06 +00:00
|
|
|
import ast
|
2022-01-28 04:12:34 +00:00
|
|
|
import json
|
2022-01-27 16:09:06 +00:00
|
|
|
import requests
|
|
|
|
import pandas as pd
|
2022-01-28 04:12:34 +00:00
|
|
|
from tqdm import tqdm
|
|
|
|
from pygtrans import Translate
|
|
|
|
|
|
|
|
def translate(text:str=None):
|
|
|
|
client = Translate()
|
|
|
|
text = client.translate(text, target="en")
|
|
|
|
return text.translatedText
|
2022-01-27 16:09:06 +00:00
|
|
|
|
|
|
|
url = {
|
|
|
|
"CNFIN": "https://api.cnfin.com/roll/charts/"
|
|
|
|
}
|
|
|
|
|
|
|
|
class XHData(object):
|
2022-01-30 08:33:06 +00:00
|
|
|
def __init__(self, country:str=None):
|
|
|
|
self.country = country
|
2022-01-27 16:09:06 +00:00
|
|
|
pass
|
2022-01-28 04:12:34 +00:00
|
|
|
|
|
|
|
|
|
|
|
def toc(self):
|
|
|
|
urls, tid, titles, titles_en = [], [], [], []
|
2022-01-30 08:33:06 +00:00
|
|
|
if self.country == "CN":
|
|
|
|
for i in tqdm(range(12005, 12100)):
|
|
|
|
url = "https://api.cnfin.com/roll/charts/getContent?ids={}".format(i)
|
|
|
|
r = requests.get(url)
|
|
|
|
if r.ok:
|
|
|
|
data = r.json()
|
|
|
|
if data["data"] == "图表数据不存在":
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
urls.append(url)
|
|
|
|
tid.append(i)
|
|
|
|
title = json.loads(data["data"]["list"][0]["modelCode"])["title"]["text"]
|
|
|
|
titles.append(title)
|
|
|
|
titles_en.append(translate(text=title))
|
|
|
|
elif self.country == "USA":
|
|
|
|
for i in tqdm(range(6361, 6394)):
|
|
|
|
url = "https://api.cnfin.com/roll/charts/getContent?ids={}".format(i)
|
|
|
|
r = requests.get(url)
|
|
|
|
if r.ok:
|
|
|
|
data = r.json()
|
|
|
|
if data["data"] == "图表数据不存在":
|
2022-02-03 08:00:07 +00:00
|
|
|
pass
|
|
|
|
else:
|
|
|
|
urls.append(url)
|
|
|
|
tid.append(i)
|
|
|
|
title = json.loads(data["data"]["list"][0]["modelCode"])["title"]["text"]
|
|
|
|
titles.append(title)
|
|
|
|
titles_en.append(translate(text=title))
|
|
|
|
elif self.country == "UK":
|
|
|
|
for i in tqdm(range(6539, 6566)):
|
|
|
|
url = "https://api.cnfin.com/roll/charts/getContent?ids={}".format(i)
|
|
|
|
r = requests.get(url)
|
|
|
|
if r.ok:
|
|
|
|
data = r.json()
|
|
|
|
if data["data"] == "图表数据不存在":
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
urls.append(url)
|
|
|
|
tid.append(i)
|
|
|
|
title = json.loads(data["data"]["list"][0]["modelCode"])["title"]["text"]
|
|
|
|
titles.append(title)
|
|
|
|
titles_en.append(translate(text=title))
|
|
|
|
|
|
|
|
elif self.country == "Japan":
|
|
|
|
for i in tqdm(range(6394, 6425)):
|
|
|
|
url = "https://api.cnfin.com/roll/charts/getContent?ids={}".format(i)
|
|
|
|
r = requests.get(url)
|
|
|
|
if r.ok:
|
|
|
|
data = r.json()
|
|
|
|
if data["data"] == "图表数据不存在":
|
2022-01-30 08:33:06 +00:00
|
|
|
pass
|
|
|
|
else:
|
|
|
|
urls.append(url)
|
|
|
|
tid.append(i)
|
|
|
|
title = json.loads(data["data"]["list"][0]["modelCode"])["title"]["text"]
|
|
|
|
titles.append(title)
|
|
|
|
titles_en.append(translate(text=title))
|
2022-01-28 04:12:34 +00:00
|
|
|
|
|
|
|
return pd.DataFrame({"urls":urls, "id":tid, "title_zh":titles, "title_en":titles_en})
|
2022-01-27 16:09:06 +00:00
|
|
|
|
2022-01-28 04:12:34 +00:00
|
|
|
def download_data(self, iid:int=None):
|
|
|
|
tmp_url = url["CNFIN"] + "getContent?ids={}".format(iid)
|
2022-01-27 16:09:06 +00:00
|
|
|
r = requests.get(tmp_url)
|
|
|
|
if r.ok:
|
|
|
|
raw_data = r.json()
|
|
|
|
data = pd.DataFrame(ast.literal_eval(raw_data["data"]["list"][0]["content"]))
|
2022-01-28 04:12:34 +00:00
|
|
|
data.columns = ["date", "data"]
|
2022-01-27 16:09:06 +00:00
|
|
|
return data
|
|
|
|
else:
|
|
|
|
return ValueError("Something went wrong, try again later")
|
|
|
|
|
2022-01-28 04:12:34 +00:00
|
|
|
if __name__ == "__main__":
|
2022-01-30 08:33:06 +00:00
|
|
|
xhdata = XHData(country="CN")
|
2022-01-28 04:12:34 +00:00
|
|
|
toc = xhdata.toc()
|
|
|
|
data = xhdata.download_data(iid=12006) # GDP
|