225 lines
6.1 KiB
Python
225 lines
6.1 KiB
Python
|
import pandas as pd
|
||
|
import numpy as np
|
||
|
import re
|
||
|
import demjson
|
||
|
import requests
|
||
|
from fake_useragent import UserAgent
|
||
|
|
||
|
url = {
|
||
|
"eastmoney": "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx"
|
||
|
}
|
||
|
|
||
|
def cn_gdp_quarter():
|
||
|
"""
|
||
|
ABS: absolute value (per 100 million CNY)
|
||
|
YoY: year on year growth
|
||
|
"""
|
||
|
ua = UserAgent()
|
||
|
request_header = {"User-Agent": ua.random}
|
||
|
tmp_url = url["eastmoney"]
|
||
|
request_params = {
|
||
|
"cb": "datatable7519513",
|
||
|
"type": "GJZB",
|
||
|
"sty": "ZGZB",
|
||
|
"js": "({data:[(x)],pages:(pc)})",
|
||
|
"p": "1",
|
||
|
"ps": "200",
|
||
|
"mkt": "20",
|
||
|
"_": "1622020352668"
|
||
|
}
|
||
|
r = requests.get(tmp_url, params = request_params, headers = request_header)
|
||
|
data_text = r.text
|
||
|
data_json = demjson.decode(data_text[data_text.find("{") : -1])
|
||
|
df = pd.DataFrame([item.split(",") for item in data_json["data"]])
|
||
|
df.columns = [
|
||
|
"Date",
|
||
|
"Absolute_Value",
|
||
|
"YoY",
|
||
|
"Primary_Industry_ABS",
|
||
|
"Primary_Industry_YoY",
|
||
|
"Secondary_Industry_ABS",
|
||
|
"Secondary_Industry_YoY",
|
||
|
"Tertiary_Industry_ABS",
|
||
|
"Tertiary_Industry_YoY",
|
||
|
]
|
||
|
#df[(df['Date'] >= startdate) & (df['Date'] <= enddate)]
|
||
|
return df
|
||
|
|
||
|
def cn_cpi_monthly():
|
||
|
"""
|
||
|
Accum: Accumulation
|
||
|
YoY: year on year growth
|
||
|
MoM: month on month growth
|
||
|
"""
|
||
|
tmp_url = url["eastmoney"]
|
||
|
ua = UserAgent()
|
||
|
request_header = {"User-Agent": ua.random}
|
||
|
tmp_url = url["eastmoney"]
|
||
|
request_params = {
|
||
|
"cb": "datatable2790750",
|
||
|
"type": "GJZB",
|
||
|
"sty": "ZGZB",
|
||
|
"js": "({data:[(x)],pages:(pc)})",
|
||
|
"p": "1",
|
||
|
"ps": "200",
|
||
|
"mkt": "19",
|
||
|
"_": "1622020352668"
|
||
|
}
|
||
|
r = requests.get(tmp_url, params = request_params, headers = request_header)
|
||
|
data_text = r.text
|
||
|
data_json = demjson.decode(data_text[data_text.find("{") : -1])
|
||
|
df = pd.DataFrame([item.split(",") for item in data_json["data"]])
|
||
|
df.columns = [
|
||
|
"Date",
|
||
|
"Notion_Monthly",
|
||
|
"Notion_YoY",
|
||
|
"Notion_MoM",
|
||
|
"Notion_Accum",
|
||
|
"Urban_Monthly",
|
||
|
"Urban_YoY",
|
||
|
"Urban_MoM",
|
||
|
"Urban_Accum",
|
||
|
"Rural_Monthly",
|
||
|
"Rural_YoY",
|
||
|
"Rural_MoM",
|
||
|
"Rural_Accum",
|
||
|
]
|
||
|
return df
|
||
|
|
||
|
def cn_pmi_monthly():
|
||
|
"""
|
||
|
Man: manufacturing
|
||
|
Non-Man: Non-manufacturing
|
||
|
"""
|
||
|
tmp_url = url["eastmoney"]
|
||
|
ua = UserAgent()
|
||
|
request_header = {"User-Agent": ua.random}
|
||
|
tmp_url = url["eastmoney"]
|
||
|
request_params = {
|
||
|
"cb": "datatable4515395",
|
||
|
"type": "GJZB",
|
||
|
"sty": "ZGZB",
|
||
|
"js": "({data:[(x)],pages:(pc)})",
|
||
|
"p": "2",
|
||
|
"ps": "200",
|
||
|
"mkt": "21",
|
||
|
"_": "162202151821"
|
||
|
}
|
||
|
r = requests.get(tmp_url, params = request_params, headers = request_header)
|
||
|
data_text = r.text
|
||
|
data_json = demjson.decode(data_text[data_text.find("{") : -1])
|
||
|
temp_df = pd.DataFrame([item.split(",") for item in data_json["data"]])
|
||
|
temp_df.columns = [
|
||
|
"Date",
|
||
|
"Man_Industry_Index",
|
||
|
"Man_Index_YoY",
|
||
|
"Non-Man_Industry_Index",
|
||
|
"Non-Man_Index_YoY",
|
||
|
]
|
||
|
return temp_df
|
||
|
|
||
|
def cn_fai_monthly(): # fix asset investment
|
||
|
"""
|
||
|
Man: manufacturing
|
||
|
Non-Man: Non-manufacturing
|
||
|
"""
|
||
|
tmp_url = url["eastmoney"]
|
||
|
ua = UserAgent()
|
||
|
request_header = {"User-Agent": ua.random}
|
||
|
tmp_url = url["eastmoney"]
|
||
|
request_params = {
|
||
|
"cb": "datatable607120",
|
||
|
"type": "GJZB",
|
||
|
"sty": "ZGZB",
|
||
|
"js": "({data:[(x)],pages:(pc)})",
|
||
|
"p": "1",
|
||
|
"ps": "200",
|
||
|
"mkt": "12",
|
||
|
"_": "1622021790947"
|
||
|
}
|
||
|
r = requests.get(tmp_url, params = request_params, headers = request_header)
|
||
|
data_text = r.text
|
||
|
data_json = demjson.decode(data_text[data_text.find("{") : -1])
|
||
|
df = pd.DataFrame([item.split(",") for item in data_json["data"]])
|
||
|
df.columns = [
|
||
|
"Date",
|
||
|
"Current_Month",
|
||
|
"YoY",
|
||
|
"MoM",
|
||
|
"Current_Year_Accum"
|
||
|
]
|
||
|
return df
|
||
|
|
||
|
def cn_hi_old_monthly(): # house index old version (2008-2010)
|
||
|
"""
|
||
|
Man: manufacturing
|
||
|
Non-Man: Non-manufacturing
|
||
|
"""
|
||
|
tmp_url = url["eastmoney"]
|
||
|
ua = UserAgent()
|
||
|
request_header = {"User-Agent": ua.random}
|
||
|
tmp_url = url["eastmoney"]
|
||
|
request_params = {
|
||
|
"cb": "datatable1895714",
|
||
|
"type": "GJZB",
|
||
|
"sty": "ZGZB",
|
||
|
"js": "({data:[(x)],pages:(pc)})",
|
||
|
"p": "1",
|
||
|
"ps": "200",
|
||
|
"mkt": "10",
|
||
|
"_": "1622022794457"
|
||
|
}
|
||
|
r = requests.get(tmp_url, params = request_params, headers = request_header)
|
||
|
data_text = r.text
|
||
|
data_json = demjson.decode(data_text[data_text.find("{") : -1])
|
||
|
df = pd.DataFrame([item.split(",") for item in data_json["data"]])
|
||
|
df.columns = [
|
||
|
"Date",
|
||
|
"Housing_Prosperity_Index",
|
||
|
"HPI_YoY",
|
||
|
"Land_Development_Area_Index",
|
||
|
"LDAI_YoY",
|
||
|
"Sales_Price_Index",
|
||
|
"SPI_YoY"
|
||
|
]
|
||
|
return df
|
||
|
|
||
|
def cn_hi_mew_monthly(): # house index old version (2008-2010)
|
||
|
"""
|
||
|
Man: manufacturing
|
||
|
Non-Man: Non-manufacturing
|
||
|
http://data.eastmoney.com/dataapi/cjsj/getnewhousechartdata?mkt=1&stat=1&city1=%E5%8C%97%E4%BA%AC&city2=%E9%95%BF%E6%98%A5
|
||
|
"""
|
||
|
tmp_url = url["eastmoney"]
|
||
|
ua = UserAgent()
|
||
|
request_header = {"User-Agent": ua.random}
|
||
|
request_params = {
|
||
|
"cb": "datatable6451982",
|
||
|
"type": "GJZB",
|
||
|
"sty": "XFJLB",
|
||
|
"js": "({data:[(x)],pages:(pc)})",
|
||
|
"p": "1",
|
||
|
"ps": "2000",
|
||
|
"mkt": "19",
|
||
|
"pageNo": "1",
|
||
|
"pageNum": "1",
|
||
|
"_": "1603023435552",
|
||
|
}
|
||
|
r = requests.get(tmp_url, params = request_params, headers = request_header)
|
||
|
data_text = r.text
|
||
|
data_json = demjson.decode(data_text[data_text.find("{") : -1])
|
||
|
data = pd.DataFrame([item.split(",") for item in data_json["data"]])
|
||
|
df.columns = [
|
||
|
"Date",
|
||
|
"Housing_Prosperity_Index",
|
||
|
"HPI_YoY",
|
||
|
"Land_Development_Area_Index",
|
||
|
"LDAI_YoY",
|
||
|
"Sales_Price_Index",
|
||
|
"SPI_YoY"
|
||
|
]
|
||
|
return df
|
||
|
|
||
|
"""
|
||
|
if __name__ == "__main__":
|
||
|
"""
|