From 582ccb9d9b46847cb870f786286583ef68b0cba3 Mon Sep 17 00:00:00 2001 From: TerenceLiu98 Date: Wed, 26 May 2021 21:57:29 +0800 Subject: [PATCH] update --- CEDA/__init__.py | 3 + CEDA/economic/__init__.py | 4 + CEDA/economic/macro.py | 225 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 232 insertions(+) create mode 100644 CEDA/__init__.py create mode 100644 CEDA/economic/__init__.py create mode 100644 CEDA/economic/macro.py diff --git a/CEDA/__init__.py b/CEDA/__init__.py new file mode 100644 index 0000000..f9641c7 --- /dev/null +++ b/CEDA/__init__.py @@ -0,0 +1,3 @@ +from CEDA.economic.macro import ( + cn_gdp_quarter +) \ No newline at end of file diff --git a/CEDA/economic/__init__.py b/CEDA/economic/__init__.py new file mode 100644 index 0000000..014c9a5 --- /dev/null +++ b/CEDA/economic/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +# time: 05/25/2021 UTC+8 +# author: terencelau +# email: t_lau@uicstat.com \ No newline at end of file diff --git a/CEDA/economic/macro.py b/CEDA/economic/macro.py new file mode 100644 index 0000000..ccc8588 --- /dev/null +++ b/CEDA/economic/macro.py @@ -0,0 +1,225 @@ +import pandas as pd +import numpy as np +import re +import demjson +import requests +from fake_useragent import UserAgent + +url = { + "eastmoney": "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx" +} + +def cn_gdp_quarter(): + """ + ABS: absolute value (per 100 million CNY) + YoY: year on year growth + """ + ua = UserAgent() + request_header = {"User-Agent": ua.random} + tmp_url = url["eastmoney"] + request_params = { + "cb": "datatable7519513", + "type": "GJZB", + "sty": "ZGZB", + "js": "({data:[(x)],pages:(pc)})", + "p": "1", + "ps": "200", + "mkt": "20", + "_": "1622020352668" + } + r = requests.get(tmp_url, params = request_params, headers = request_header) + data_text = r.text + data_json = demjson.decode(data_text[data_text.find("{") : -1]) + df = pd.DataFrame([item.split(",") for item in data_json["data"]]) + df.columns = [ + "Date", + "Absolute_Value", + "YoY", + "Primary_Industry_ABS", + "Primary_Industry_YoY", + "Secondary_Industry_ABS", + "Secondary_Industry_YoY", + "Tertiary_Industry_ABS", + "Tertiary_Industry_YoY", + ] + #df[(df['Date'] >= startdate) & (df['Date'] <= enddate)] + return df + +def cn_cpi_monthly(): + """ + Accum: Accumulation + YoY: year on year growth + MoM: month on month growth + """ + tmp_url = url["eastmoney"] + ua = UserAgent() + request_header = {"User-Agent": ua.random} + tmp_url = url["eastmoney"] + request_params = { + "cb": "datatable2790750", + "type": "GJZB", + "sty": "ZGZB", + "js": "({data:[(x)],pages:(pc)})", + "p": "1", + "ps": "200", + "mkt": "19", + "_": "1622020352668" + } + r = requests.get(tmp_url, params = request_params, headers = request_header) + data_text = r.text + data_json = demjson.decode(data_text[data_text.find("{") : -1]) + df = pd.DataFrame([item.split(",") for item in data_json["data"]]) + df.columns = [ + "Date", + "Notion_Monthly", + "Notion_YoY", + "Notion_MoM", + "Notion_Accum", + "Urban_Monthly", + "Urban_YoY", + "Urban_MoM", + "Urban_Accum", + "Rural_Monthly", + "Rural_YoY", + "Rural_MoM", + "Rural_Accum", + ] + return df + +def cn_pmi_monthly(): + """ + Man: manufacturing + Non-Man: Non-manufacturing + """ + tmp_url = url["eastmoney"] + ua = UserAgent() + request_header = {"User-Agent": ua.random} + tmp_url = url["eastmoney"] + request_params = { + "cb": "datatable4515395", + "type": "GJZB", + "sty": "ZGZB", + "js": "({data:[(x)],pages:(pc)})", + "p": "2", + "ps": "200", + "mkt": "21", + "_": "162202151821" + } + r = requests.get(tmp_url, params = request_params, headers = request_header) + data_text = r.text + data_json = demjson.decode(data_text[data_text.find("{") : -1]) + temp_df = pd.DataFrame([item.split(",") for item in data_json["data"]]) + temp_df.columns = [ + "Date", + "Man_Industry_Index", + "Man_Index_YoY", + "Non-Man_Industry_Index", + "Non-Man_Index_YoY", + ] + return temp_df + +def cn_fai_monthly(): # fix asset investment + """ + Man: manufacturing + Non-Man: Non-manufacturing + """ + tmp_url = url["eastmoney"] + ua = UserAgent() + request_header = {"User-Agent": ua.random} + tmp_url = url["eastmoney"] + request_params = { + "cb": "datatable607120", + "type": "GJZB", + "sty": "ZGZB", + "js": "({data:[(x)],pages:(pc)})", + "p": "1", + "ps": "200", + "mkt": "12", + "_": "1622021790947" + } + r = requests.get(tmp_url, params = request_params, headers = request_header) + data_text = r.text + data_json = demjson.decode(data_text[data_text.find("{") : -1]) + df = pd.DataFrame([item.split(",") for item in data_json["data"]]) + df.columns = [ + "Date", + "Current_Month", + "YoY", + "MoM", + "Current_Year_Accum" + ] + return df + +def cn_hi_old_monthly(): # house index old version (2008-2010) + """ + Man: manufacturing + Non-Man: Non-manufacturing + """ + tmp_url = url["eastmoney"] + ua = UserAgent() + request_header = {"User-Agent": ua.random} + tmp_url = url["eastmoney"] + request_params = { + "cb": "datatable1895714", + "type": "GJZB", + "sty": "ZGZB", + "js": "({data:[(x)],pages:(pc)})", + "p": "1", + "ps": "200", + "mkt": "10", + "_": "1622022794457" + } + r = requests.get(tmp_url, params = request_params, headers = request_header) + data_text = r.text + data_json = demjson.decode(data_text[data_text.find("{") : -1]) + df = pd.DataFrame([item.split(",") for item in data_json["data"]]) + df.columns = [ + "Date", + "Housing_Prosperity_Index", + "HPI_YoY", + "Land_Development_Area_Index", + "LDAI_YoY", + "Sales_Price_Index", + "SPI_YoY" + ] + return df + +def cn_hi_mew_monthly(): # house index old version (2008-2010) + """ + Man: manufacturing + Non-Man: Non-manufacturing + http://data.eastmoney.com/dataapi/cjsj/getnewhousechartdata?mkt=1&stat=1&city1=%E5%8C%97%E4%BA%AC&city2=%E9%95%BF%E6%98%A5 + """ + tmp_url = url["eastmoney"] + ua = UserAgent() + request_header = {"User-Agent": ua.random} + request_params = { + "cb": "datatable6451982", + "type": "GJZB", + "sty": "XFJLB", + "js": "({data:[(x)],pages:(pc)})", + "p": "1", + "ps": "2000", + "mkt": "19", + "pageNo": "1", + "pageNum": "1", + "_": "1603023435552", + } + r = requests.get(tmp_url, params = request_params, headers = request_header) + data_text = r.text + data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data = pd.DataFrame([item.split(",") for item in data_json["data"]]) + df.columns = [ + "Date", + "Housing_Prosperity_Index", + "HPI_YoY", + "Land_Development_Area_Index", + "LDAI_YoY", + "Sales_Price_Index", + "SPI_YoY" + ] + return df + +""" +if __name__ == "__main__": +""" \ No newline at end of file