diff --git a/CEDA/MacroEcon/cn.py b/CEDA/MacroEcon/cn.py index f18b960..367f396 100644 --- a/CEDA/MacroEcon/cn.py +++ b/CEDA/MacroEcon/cn.py @@ -11,10 +11,12 @@ url = { "eastmoney": "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx" } + def gdp_quarterly(): """ ABS: absolute value (per 100 million CNY) YoY: year on year growth + Data source: http://data.eastmoney.com/cjsj/gdp.html """ ua = UserAgent() request_header = {"User-Agent": ua.random} @@ -29,28 +31,40 @@ def gdp_quarterly(): "mkt": "20", "_": "1622020352668" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", "Absolute_Value", - "YoY", + "YoY_Rate", "Primary_Industry_ABS", - "Primary_Industry_YoY", + "Primary_Industry_YoY_Rate", "Secondary_Industry_ABS", - "Secondary_Industry_YoY", + "Secondary_Industry_YoY_Rate", "Tertiary_Industry_ABS", - "Tertiary_Industry_YoY", + "Tertiary_Industry_YoY_Rate", ] - #df[(df['Date'] >= startdate) & (df['Date'] <= enddate)] + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df["Absolute_Value"] = df["Absolute_Value"].astype(float) + df["Secondary_Industry_ABS"] = df["Secondary_Industry_ABS"].astype(float) + df["Tertiary_Industry_ABS"] = df["Tertiary_Industry_ABS"].astype(float) + df["Absolute_Value"] = df["Absolute_Value"].astype(float) + df["YoY_Rate"] = df["YoY_Rate"].astype(float) / 100 + df["Secondary_Industry_YoY_Rate"] = df["Secondary_Industry_YoY_Rate"].astype( + float) / 100 + df["Tertiary_Industry_YoY_Rate"] = df["Tertiary_Industry_YoY_Rate"].astype( + float) / 100 return df + def ppi_monthly(): """ ABS: absolute value (per 100 million CNY) YoY: year on year growth + Accum: Accumulation + Data source: http://data.eastmoney.com/cjsj/ppi.html """ ua = UserAgent() request_header = {"User-Agent": ua.random} @@ -65,24 +79,30 @@ def ppi_monthly(): "mkt": "22", "_": "1622047940401" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", "Current_Month", - "Current_Month_YoY", + "Current_Month_YoY_Rate", "Current_Month_Accum" ] - #df[(df['Date'] >= startdate) & (df['Date'] <= enddate)] + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df["Current_Month"] = df["Current_Month"].astype(float) + df["Current_Month_YoY_Rate"] = df["Current_Month_YoY_Rate"].astype( + float) / 100 + df["Current_Month_Accum"] = df["Current_Month_Accum"].astype(float) return df + def cpi_monthly(): """ Accum: Accumulation YoY: year on year growth MoM: month on month growth + Data source: http://data.eastmoney.com/cjsj/cpi.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -98,31 +118,56 @@ def cpi_monthly(): "mkt": "19", "_": "1622020352668" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", "Notion_Monthly", - "Notion_YoY", - "Notion_MoM", + "Notion_YoY_Rate", + "Notion_MoM_Rate", "Notion_Accum", "Urban_Monthly", - "Urban_YoY", - "Urban_MoM", + "Urban_YoY_Rate", + "Urban_MoM_Rate", "Urban_Accum", "Rural_Monthly", - "Rural_YoY", - "Rural_MoM", + "Rural_YoY_Rate", + "Rural_MoM_Rate", "Rural_Accum", ] + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df[["Notion_Monthly", + "Notion_Accum", + "Urban_Monthly", + "Urban_Accum", + "Rural_Monthly", + "Rural_Accum"]] = df[["Notion_Monthly", + "Notion_Accum", + "Urban_Monthly", + "Urban_Accum", + "Rural_Monthly", + "Rural_Accum"]].astype(float) + df[["Notion_YoY_Rate", + "Notion_MoM_Rate", + "Urban_YoY_Rate", + "Urban_MoM_Rate", + "Rural_YoY_Rate", + "Rural_MoM_Rate"]] = df[["Notion_YoY_Rate", + "Notion_MoM_Rate", + "Urban_YoY_Rate", + "Urban_MoM_Rate", + "Rural_YoY_Rate", + "Rural_MoM_Rate"]].astype(float) / 100 return df + def pmi_monthly(): """ Man: manufacturing Non-Man: Non-manufacturing + Data Source: http://data.eastmoney.com/cjsj/pmi.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -138,23 +183,30 @@ def pmi_monthly(): "mkt": "21", "_": "162202151821" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) - temp_df = pd.DataFrame([item.split(",") for item in data_json["data"]]) - temp_df.columns = [ + data_json = demjson.decode(data_text[data_text.find("{"): -1]) + df = pd.DataFrame([item.split(",") for item in data_json["data"]]) + df.columns = [ "Date", "Man_Industry_Index", - "Man_Index_YoY", + "Man_Index_YoY_Rate", "Non-Man_Industry_Index", - "Non-Man_Index_YoY", + "Non-Man_Index_YoY_Rate", ] - return temp_df + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df[["Man_Industry_Index", "Non-Man_Industry_Index"]] = \ + df[["Man_Industry_Index", "Non-Man_Industry_Index"]].astype(float) + df[["Man_Index_YoY_Rate", "Non-Man_Index_YoY_Rate"]] = \ + df[["Man_Index_YoY_Rate", "Non-Man_Index_YoY_Rate"]].astype(float) / 100 + return df -def fai_monthly(): # fix asset investment + +def fai_monthly(): # fix asset investment """ Man: manufacturing Non-Man: Non-manufacturing + Data Source: http://data.eastmoney.com/cjsj/gdzctz.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -170,23 +222,30 @@ def fai_monthly(): # fix asset investment "mkt": "12", "_": "1622021790947" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", "Current_Month", - "YoY", - "MoM", + "YoY_Rate", + "MoM_Rate", "Current_Year_Accum" ] + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df[["Current_Month", "Current_Year_Accum"]] = \ + df[["Current_Month", "Current_Year_Accum"]].astype(float) + df[["YoY_Rate", "MoM_Rate"]] = \ + df[["YoY_Rate", "MoM_Rate"]].astype(float) / 100 return df -def hi_old_monthly(): # house index old version (2008-2010) + +def hi_old_monthly(): # house index old version (2008-2010) """ Man: manufacturing Non-Man: Non-manufacturing + Data Source: http://data.eastmoney.com/cjsj/house.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -202,65 +261,122 @@ def hi_old_monthly(): # house index old version (2008-2010) "mkt": "10", "_": "1622022794457" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", "Housing_Prosperity_Index", - "HPI_YoY", + "HPI_YoY_Rate", "Land_Development_Area_Index", - "LDAI_YoY", + "LDAI_YoY_Rate", "Sales_Price_Index", - "SPI_YoY" + "SPI_YoY_Rate" ] + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df[["Housing_Prosperity_Index", + "Land_Development_Area_Index", + "Sales_Price_Index"]] = df[["Housing_Prosperity_Index", + "Land_Development_Area_Index", + "Sales_Price_Index"]].astype(float) + df[["HPI_YoY_Rate", "LDAI_YoY_Rate", "SPI_YoY_Rate"]] = \ + df[["HPI_YoY_Rate", "LDAI_YoY_Rate", "SPI_YoY_Rate"]].astype(float) / 100 return df # mkt=1&stat=2&city1=%E5%B9%BF%E5%B7%9E&city2=%E4%B8%8A%E6%B5%B7 -def hi_new_monthly(city1:str, city2:str): # newly built commercial housing & second-hand commercial housing + +# newly built commercial housing & second-hand commercial housing +def hi_new_monthly(city1: str, city2: str): """ Man: manufacturing Non-Man: Non-manufacturing + Data Source: http://data.eastmoney.com/cjsj/newhouse.html """ tmp_url = "http://data.eastmoney.com/dataapi/cjsj/getnewhousechartdata?" ua = UserAgent() request_header = {"User-Agent": ua.random} - request_params_nbch = { + request_params_nbch_MoM = { "mkt": "1", "stat": "2", "city1": "{}".format(city1), "city2": "{}".format(city2) } - request_params_shch = { + request_params_shch_MoM = { "mkt": "1", "stat": "3", "city1": "{}".format(city1), "city2": "{}".format(city2) } - r_nbch = requests.get(tmp_url, params = request_params_nbch, headers = request_header) - r_shch = requests.get(tmp_url, params = request_params_shch, headers = request_header) - data_text_nbch = r_nbch.text - data_text_shch = r_shch.text - data_json_nbch = demjson.decode(data_text_nbch) - data_json_shch = demjson.decode(data_text_shch) - date_nbch = data_json_nbch['chart']['series']['value'] - data1_nbch = data_json_nbch['chart']['graphs']['graph'][0]['value'] - data2_nbch = data_json_nbch['chart']['graphs']['graph'][1]['value'] - data1_shch = data_json_shch['chart']['graphs']['graph'][0]['value'] - data2_shch = data_json_shch['chart']['graphs']['graph'][1]['value'] - df = pd.DataFrame({"Date": date_nbch, - "City1":data1_nbch, - "City2":data2_nbch, - "City1":data1_shch, - "City2":data2_shch}) + r_nbch_MoM = requests.get( + tmp_url, + params=request_params_nbch_MoM, + headers=request_header) + r_shch_MoM = requests.get( + tmp_url, + params=request_params_shch_MoM, + headers=request_header) + data_text_nbch_MoM = r_nbch_MoM.text + data_text_shch_MoM = r_shch_MoM.text + data_json_nbch_MoM = demjson.decode(data_text_nbch_MoM) + data_json_shch_MoM = demjson.decode(data_text_shch_MoM) + date_nbch = data_json_nbch_MoM['chart']['series']['value'] + data1_nbch_MoM = data_json_nbch_MoM['chart']['graphs']['graph'][0]['value'] + data2_nbch_MoM = data_json_nbch_MoM['chart']['graphs']['graph'][1]['value'] + data1_shch_MoM = data_json_shch_MoM['chart']['graphs']['graph'][0]['value'] + data2_shch_MoM = data_json_shch_MoM['chart']['graphs']['graph'][1]['value'] + df_MoM = pd.DataFrame({"Date": date_nbch, + "City1_nbch_MoM": data1_nbch_MoM, + "City1_shch_MoM": data1_shch_MoM, + "City2_nbch_MoM": data2_nbch_MoM, + "City2_shch_MoM": data2_shch_MoM}) + df_MoM["Date"] = pd.to_datetime(df_MoM["Date"], format="%m/%d/%Y") + + request_params_nbch_YoY = { + "mkt": "2", + "stat": "2", + "city1": "{}".format(city1), + "city2": "{}".format(city2) + } + request_params_shch_YoY = { + "mkt": "2", + "stat": "3", + "city1": "{}".format(city1), + "city2": "{}".format(city2) + } + r_nbch_YoY = requests.get( + tmp_url, + params=request_params_nbch_YoY, + headers=request_header) + r_shch_YoY = requests.get( + tmp_url, + params=request_params_shch_YoY, + headers=request_header) + data_text_nbch_YoY = r_nbch_YoY.text + data_text_shch_YoY = r_shch_YoY.text + data_json_nbch_YoY = demjson.decode(data_text_nbch_YoY) + data_json_shch_YoY = demjson.decode(data_text_shch_YoY) + date_nbch = data_json_nbch_YoY['chart']['series']['value'] + data1_nbch_YoY = data_json_nbch_YoY['chart']['graphs']['graph'][0]['value'] + data2_nbch_YoY = data_json_nbch_YoY['chart']['graphs']['graph'][1]['value'] + data1_shch_YoY = data_json_shch_YoY['chart']['graphs']['graph'][0]['value'] + data2_shch_YoY = data_json_shch_YoY['chart']['graphs']['graph'][1]['value'] + df_YoY = pd.DataFrame({"Date": date_nbch, + "City1_nbch_YoY": data1_nbch_YoY, + "City1_shch_YoY": data1_shch_YoY, + "City2_nbch_YoY": data2_nbch_YoY, + "City2_shch_YoY": data2_shch_YoY}) + df_YoY["Date"] = pd.to_datetime(df_YoY["Date"], format="%m/%d/%Y") + df = df_YoY.merge(df_MoM, on="Date") return df -def ci_eei_monthly(): # Climate Index & Entrepreneur Expectation Index + +def ci_eei_monthly(): # Climate Index & Entrepreneur Expectation Index """ Man: manufacturing Non-Man: Non-manufacturing + Data Source: http://data.eastmoney.com/cjsj/qyjqzs.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -276,25 +392,33 @@ def ci_eei_monthly(): # Climate Index & Entrepreneur Expectation Index "mkt": "8", "_": "1622041485306" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", "Climate_Index", - "CI_YoY", - "CI_MoM", + "CI_YoY_Rate", + "CI_MoM_Rate", "Entrepreneur_Expectation_Index", - "EEI_YoY", - "EEI_MoM" + "EEI_YoY_Rate", + "EEI_MoM_Rate" ] + df.replace('', np.nan, inplace=True) + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df[["Climate_Index", "Entrepreneur_Expectation_Index"]] = \ + df[["Climate_Index", "Entrepreneur_Expectation_Index"]].astype(float) + df[["CI_YoY_Rate", "CI_MoM_Rate", "EEI_YoY_Rate", "EEI_MoM_Rate"]] = df[[ + "CI_YoY_Rate", "CI_MoM_Rate", "EEI_YoY_Rate", "EEI_MoM_Rate"]].astype(float) / 100 return df -def ig_monthly(): # Industry Growth + +def ig_monthly(): # Industry Growth """ Man: manufacturing Non-Man: Non-manufacturing + Data Source: http://data.eastmoney.com/cjsj/gyzjz.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -310,21 +434,26 @@ def ig_monthly(): # Industry Growth "mkt": "0", "_": "1622042259898" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", - "IG_YoY", - "IG_Accum", + "IG_YoY_Rate", + "IG_Accum_Rate", ] + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df[["IG_YoY_Rate", "IG_Accum_Rate"]] = \ + df[["IG_YoY_Rate", "IG_Accum_Rate"]].astype(float) / 100 return df -def cgpi_monthly(): # Corporate Goods Price Index + +def cgpi_monthly(): # Corporate Goods Price Index """ Man: manufacturing Non-Man: Non-manufacturing + Data Source: http://data.eastmoney.com/cjsj/qyspjg.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -340,31 +469,52 @@ def cgpi_monthly(): # Corporate Goods Price Index "mkt": "9", "_": "1622042652353" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", "General_Index", - "General_Index_YoY", - "Total_Index_MoM", + "General_Index_YoY_Rate", + "Total_Index_MoM_Rate", "Agricultural_Product", - "Agricultural_Product_YoY", - "Agricultural_PRoduct_MoM", + "Agricultural_Product_YoY_Rate", + "Agricultural_Product_MoM_Rate", "Mineral_Product", - "Mineral_Product_YoY", - "Mineral_Product_MoM", + "Mineral_Product_YoY_Rate", + "Mineral_Product_MoM_Rate", "Coal_Oil_Electricity", - "Coal_Oil_Electricity_YoY", - "Coal_Oil_Electricity_MoM" + "Coal_Oil_Electricity_YoY_Rate", + "Coal_Oil_Electricity_MoM_Rate" ] + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df[["General_Index", + "Agricultural_Product", + "Mineral_Product", + "Coal_Oil_Electricity"]] = df[["General_Index", + "Agricultural_Product", + "Mineral_Product", + "Coal_Oil_Electricity"]].astype(float) + df[["General_Index_YoY_Rate", + "Total_Index_MoM_Rate", + "Agricultural_Product_YoY_Rate", + "Agricultural_Product_MoM_Rate", + "Coal_Oil_Electricity_YoY_Rate", + "Coal_Oil_Electricity_MoM_Rate"]] = df[["General_Index_YoY_Rate", + "Total_Index_MoM_Rate", + "Agricultural_Product_YoY_Rate", + "Agricultural_Product_MoM_Rate", + "Coal_Oil_Electricity_YoY_Rate", + "Coal_Oil_Electricity_MoM_Rate"]].astype(float) / 100 return df -def cci_csi_cei_monthly(): # Consumer Confidence Index & Consumer Satisfaction Index & Consumer Expectation Index + +def cci_csi_cei_monthly(): # Consumer Confidence Index & Consumer Satisfaction Index & Consumer Expectation Index """ Man: manufacturing Non-Man: Non-manufacturing + Data Source: http://data.eastmoney.com/cjsj/xfzxx.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -380,28 +530,39 @@ def cci_csi_cei_monthly(): # Consumer Confidence Index & Consumer Satisfaction I "mkt": "4", "_": "1622043704818" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", "CCI", - "CCI_YoY", - "CCI_MoM", + "CCI_YoY_Rate", + "CCI_MoM_Rate", "CSI", - "CSI_YoY", - "CSI_MoM", + "CSI_YoY_Rate", + "CSI_MoM_Rate", "CEI", - "CEI_YoY", - "CEI_MoM" + "CEI_YoY_Rate", + "CEI_MoM_Rate" ] + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df[["CCI", "CSI", "CEI"]] = \ + df[["CCI", "CSI", "CEI"]].astype(float) + df[["CCI_YoY_Rate", "CCI_MoM_Rate", + "CSI_YoY_Rate", "CSI_MoM_Rate", + "CEI_YoY_Rate", "CEI_MoM_Rate"]] = \ + df[["CCI_YoY_Rate", "CCI_MoM_Rate", + "CSI_YoY_Rate", "CSI_MoM_Rate", + "CEI_YoY_Rate", "CEI_MoM_Rate"]].astype(float) / 100 return df -def trscg_monthly(): # Total Retail Sales of Consumer Goods + +def trscg_monthly(): # Total Retail Sales of Consumer Goods """ Man: manufacturing Non-Man: Non-manufacturing + Data Source: http://data.eastmoney.com/cjsj/xfp.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -417,24 +578,32 @@ def trscg_monthly(): # Total Retail Sales of Consumer Goods "mkt": "5", "_": "1622044011316" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", "Current_Month", - "TRSCG_YoY", - "TRSCG_MoM", + "TRSCG_YoY_Rate", + "TRSCG_MoM_Rate", "TRSCG_Accum", - "TRSCG_Accum_YoY" + "TRSCG_Accum_YoY_Rate" ] + df.replace("", np.nan, inplace=True) + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df[["Current_Month", "TRSCG_Accum"]] = \ + df[["Current_Month", "TRSCG_Accum"]].astype(float) + df[["TRSCG_YoY_Rate", "TRSCG_MoM_Rate", "TRSCG_Accum_YoY_Rate"]] = df[[ + "TRSCG_YoY_Rate", "TRSCG_MoM_Rate", "TRSCG_Accum_YoY_Rate"]].astype(float) / 100 return df -def ms_monthly(): # monetary Supply + +def ms_monthly(): # monetary Supply """ Man: manufacturing Non-Man: Non-manufacturing + Data Source: http://data.eastmoney.com/cjsj/hbgyl.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -450,27 +619,35 @@ def ms_monthly(): # monetary Supply "mkt": "11", "_": "1622044292103" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", "M2", - "M2_YoY", - "M2_MoM", + "M2_YoY_Rate", + "M2_MoM_Rate", "M1", - "M1_YoY", - "M1_MoM", + "M1_YoY_Rate", + "M1_MoM_Rate", "M0", - "M0_YoY", - "M0_MoM" + "M0_YoY_Rate", + "M0_MoM_Rate" ] + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df[["M0", "M1", "M2"]] = \ + df[["M0", "M1", "M2"]].astype(float) + df[["M0_YoY_Rate", "M1_YoY_Rate", "M2_YoY_Rate", + "M0_MoM_Rate", "M1_MoM_Rate", "M2_MoM_Rate"]] = \ + df[["M0_YoY_Rate", "M1_YoY_Rate", "M2_YoY_Rate", + "M0_MoM_Rate", "M1_MoM_Rate", "M2_MoM_Rate"]].astype(float) / 100 return df -def ie_monthly(): # Import & Export - """ +def ie_monthly(): # Import & Export + """ + Data Source: http://data.eastmoney.com/cjsj/hgjck.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -486,29 +663,45 @@ def ie_monthly(): # Import & Export "mkt": "1", "_": "1622044292103" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", "Current_Month_Export", - "Current_Month_Export_YoY", - "Current_Month_Export_MoM", + "Current_Month_Export_YoY_Rate", + "Current_Month_Export_MoM_Rate", "Current_Month_Import", - "Current_Month_Import_YoY", - "Current_Month_Import_MoM", + "Current_Month_Import_YoY_Rate", + "Current_Month_Import_MoM_Rate", "Accumulation_Export", - "Accumulation_Export_YoY", + "Accumulation_Export_YoY_Rate", "Accumulation_Import", - "Accumulation_Import_YoY" + "Accumulation_Import_YoY_Rate" ] + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df[["Current_Month_Export", "Current_Month_Import", + "Accumulation_Export", "Accumulation_Import"]] = \ + df[["Current_Month_Export", "Current_Month_Import", + "Accumulation_Export", "Accumulation_Import"]].astype(float) + df[["Current_Month_Export_YoY_Rate", + "Current_Month_Export_MoM_Rate", + "Current_Month_Import_YoY_Rate", + "Current_Month_Import_MoM_Rate", + "Accumulation_Export_YoY_Rate", + "Accumulation_Export_MoM_Rate"]] = df[["Current_Month_Export_YoY_Rate", + "Current_Month_Export_MoM_Rate", + "Current_Month_Import_YoY_Rate", + "Current_Month_Import_MoM_Rate", + "Accumulation_Export_YoY_Rate", + "Accumulation_Export_MoM_Rate"]].astype(float) / 100 return df -def stock_monthly(): # Import & Export +def stock_monthly(): # Import & Export """ -&type=GJZB&sty=ZGZB&js=(%5B(x)%5D)&p=1&ps=200&mkt=2&_=1622084599456 + Data Source: http://data.eastmoney.com/cjsj/gpjytj.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -524,9 +717,9 @@ def stock_monthly(): # Import & Export "mkt": "2", "_": "1622084599456" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("(")+1:-1]) + data_json = demjson.decode(data_text[data_text.find("(") + 1:-1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", @@ -543,11 +736,15 @@ def stock_monthly(): # Import & Export "SH_lowest", "SZ_lowest" ] + df.replace("", np.nan, inplace=True) + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df[list(df.columns[1:])] = df[list(df.columns[1:])].astype(float) return df -def fgr_monthly(): # Forex and Gold Reserve - """ +def fgr_monthly(): # Forex and Gold Reserve + """ + Data Source: http://data.eastmoney.com/cjsj/gpjytj.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -563,24 +760,34 @@ def fgr_monthly(): # Forex and Gold Reserve "mkt": "16", "_": "1622044863548" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", "Forex", - "Forex_YoY", - "Forex_MoM", + "Forex_YoY_Rate", + "Forex_MoM_Rate", "Gold", - "Gold_YoY", - "Gold_MoM" + "Gold_YoY_Rate", + "Gold_MoM_Rate" ] + df.replace("", np.nan, inplace=True) + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df[["Forex", "Gold"]] = \ + df["Forex", "Gold"].astype(float) + df[["Forex_YoY_Rate", "Gold_YoY_Rate", + "Forex_MoM_Rate", "Gold_MoM_Rate"]] = \ + df["Forex_YoY_Rate", "Gold_YoY_Rate", + "Forex_MoM_Rate", "Gold_MoM_Rate"].astype(float) / 100 return df -#TODO: SPECIAL CASE -def ctsf_monthly(): # Client Transaction Settlement Funds - """ +# TODO: SPECIAL CASE + +def ctsf_monthly(): # Client Transaction Settlement Funds + """ + http://data.eastmoney.com/cjsj/banktransfer.html """ tmp_url = "http://data.eastmoney.com/dataapi/cjsj/getbanktransferdata?" ua = UserAgent() @@ -589,16 +796,22 @@ def ctsf_monthly(): # Client Transaction Settlement Funds "p": "1", "ps": "200" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text data_json = demjson.decode(data_text[data_text.find("["):-11]) df = pd.DataFrame(data_json) + df.replace("", np.nan, inplace=True) + df["StartDate"] = pd.to_datetime(df["StartDate"], format="%Y-%m-%d") + df["EndDate"] = pd.to_datetime(df["EndDate"], format="%Y-%m-%d") + df[list(df.columns)[2:]] = df[list(df.columns)[2:]].astype(float) return df # TODO: SPECIAL CASE -def sao_monthly(): # Stock Account Overview - """ + +def sao_monthly(): # Stock Account Overview + """ + http://data.eastmoney.com/cjsj/gpkhsj.html """ tmp_url = "http://dcfm.eastmoney.com/em_mutisvcexpandinterface/api/js/get?" ua = UserAgent() @@ -606,7 +819,7 @@ def sao_monthly(): # Stock Account Overview request_params = { "callback": "datatable4006236", "type": "GPKHData", - "js" : "({data:[(x)],pages:(pc)})", + "js": "({data:[(x)],pages:(pc)})", "st": "SDATE", "sr": "-1", "token": "894050c76af8597a853f5b408b759f5d", @@ -614,15 +827,15 @@ def sao_monthly(): # Stock Account Overview "ps": "2000", "_": "1622079339035" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{")+6 : -14]) + data_json = demjson.decode(data_text[data_text.find("{") + 6: -14]) df = pd.DataFrame(data_json[0]) df.columns = [ "Date", "New_Investor", - "New_Investor_MoM", - "New_Investor_YoY", + "New_Investor_MoM_Rate", + "New_Investor_YoY_Rate", "Active_Investor", "Active_Investor_A_Share", "Active_Investor_B_share", @@ -631,12 +844,18 @@ def sao_monthly(): # Stock Account Overview "SHSZ_Market_Capitalization", "SHSZ_Average_Capitalization" ] - df.Date = pd.to_datetime(df.Date, format = "%Y年%m月") + df.replace("-", np.nan, inplace=True) + df.Date = pd.to_datetime(df.Date, format="%Y年%m月") + df[list(df.columns[~df.columns.isin(["Date", "New_Investor_MoM_Rate", "New_Investor_YoY_Rate"])])] = df[list( + df.columns[~df.columns.isin(["Date", "New_Investor_MoM_Rate", "New_Investor_YoY_Rate"])])].astype(float) + df[["New_Investor_MoM_Rate", "New_Investor_YoY_Rate"]] = \ + df[["New_Investor_MoM_Rate", "New_Investor_YoY_Rate"]].astype(float) / 100 return df -def fdi_monthly(): # Foreign Direct Investment - """ +def fdi_monthly(): # Foreign Direct Investment + """ + http://data.eastmoney.com/cjsj/fdi.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -652,23 +871,30 @@ def fdi_monthly(): # Foreign Direct Investment "mkt": "15", "_": "1622044863548" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", "Current_Month", - "YoY", - "MoM", + "YoY_Rate", + "MoM_Rate", "Accumulation", - "Accum_YoY" + "Accum_YoY_Rate" ] + df.replace("", np.nan, inplace=True) + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df[["Current_Month", "Accumulation"]] = \ + df[["Current_Month", "Accumulation"]].astype(float) + df[["YoY_Rate", "MoM_Rate", "Accum_YoY_Rate"]] = \ + df[["YoY_Rate", "MoM_Rate", "Accum_YoY_Rate"]].astype(float) / 100 return df -def gr_monthly(): # Government Revenue - """ +def gr_monthly(): # Government Revenue + """ + http://data.eastmoney.com/cjsj/czsr.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -684,23 +910,29 @@ def gr_monthly(): # Government Revenue "mkt": "14", "_": "1622080618625" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", "Current_Month", - "YoY", - "MoM", + "YoY_Rate", + "MoM_Rate", "Accumulation", - "Accum_YoY" + "Accum_YoY_Rate" ] + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df[["Current_Month", "Accumulation"]] = \ + df[["Current_Month", "Accumulation"]].astype(float) + df[["YoY_Rate", "MoM_rate", "Accum_YoY_Rate"]] = \ + df[["YoY_Rate", "MoM_rate", "Accum_YoY_Rate"]].astype(float) / 100 return df -def ti_monthly(): # Tax Income - """ +def ti_monthly(): # Tax Income + """ + http://data.eastmoney.com/cjsj/qgsssr.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -716,22 +948,27 @@ def ti_monthly(): # Tax Income "mkt": "3", "_": "1622080669713" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", "Current_Month", - "YoY", - "MoM" + "YoY_Rate", + "MoM_Rate" ] + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df[["Current_Month"]] = \ + df[["Current_Month"]].astype(float) + df[["YoY_Rate", "MoM_rate"]] = \ + df[["YoY_Rate", "MoM_rate"]].astype(float) / 100 return df -def nl_monthly(): # New Loan +def nl_monthly(): # New Loan """ - + http://data.eastmoney.com/cjsj/xzxd.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -747,23 +984,29 @@ def nl_monthly(): # New Loan "mkt": "7", "_": "1622080800162" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", "Current_Month", - "YoY", - "MoM", + "YoY_Rate", + "MoM_Rate", "Accumulation", - "Accum_YoY" + "Accum_YoY_Rate" ] + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df[["Current_Month", "Accumulation"]] = \ + df[["Current_Month", "Accumulation"]].astype(float) + df[["YoY_Rate", "MoM_Rate", "Accum_YoY_Rate"]] =\ + df[["YoY_Rate", "MoM_Rate", "Accum_YoY_Rate"]].astype(float) / 100 return df -def dfclc_monthly(): # Deposit of Foreign Currency and Local Currency - """ +def dfclc_monthly(): # Deposit of Foreign Currency and Local Currency + """ + http://data.eastmoney.com/cjsj/wbck.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -779,22 +1022,28 @@ def dfclc_monthly(): # Deposit of Foreign Currency and Local Currency "mkt": "18", "_": "1622081057370" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", "Current_Month", - "YoY", - "MoM", + "YoY_Rate", + "MoM_Rate", "Accumulation" ] + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df[["Current_Month", "Accumulation"]] = \ + df[["Current_Month", "Accumulation"]].astype(float) + df[["YoY_Rate", "MoM_Rate"]] = \ + df[["YoY_Rate", "MoM_Rate"]].astype(float) / 100 return df -def fl_monthly(): # Forex Loan - """ +def fl_monthly(): # Forex Loan + """ + http://data.eastmoney.com/cjsj/whxd.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -810,9 +1059,9 @@ def fl_monthly(): # Forex Loan "mkt": "17", "_": "1622081336038" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", @@ -821,11 +1070,17 @@ def fl_monthly(): # Forex Loan "MoM", "Accumulation" ] + df["Date"] = pd.to_datetime(df["Date"], format="%Y-%m-%d") + df[["Current_Month", "Accumulation"]] = \ + df[["Current_Month", "Accumulation"]].astype(float) + df[["YoY_Rate", "MoM_Rate"]] = \ + df[["YoY_Rate", "MoM_Rate"]].astype(float) / 100 return df -def drr_monthly(): # Deposit Reserve Ratio - """ +def drr_monthly(): # Deposit Reserve Ratio + """ + http://data.eastmoney.com/cjsj/ckzbj.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -841,9 +1096,9 @@ def drr_monthly(): # Deposit Reserve Ratio "mkt": "23", "_": "1622081448882" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Announcement Date", @@ -858,11 +1113,27 @@ def drr_monthly(): # Deposit Reserve Ratio "SHIndex_Rate", "SZIndex_Rate" ] + df["Announcement Date"] = pd.to_datetime( + df["Announcement Date"], format="%Y-%m-%d") + df["Effective Date"] = pd.to_datetime( + df["Effective Date"], format="%Y-%m-%d") + df[["Large_Financial_institution_Before", + "Large_Financial_institution_After", + "Large_Financial_institution_Adj_Rate", + "S&M_Financial_institution_Before", + "S&M_Financial_institution_After", + "S&M_Financial_institution_Adj_Rate"]] = df[["Large_Financial_institution_Before", + "Large_Financial_institution_After", + "Large_Financial_institution_Adj_Rate", + "S&M_Financial_institution_Before", + "S&M_Financial_institution_After", + "S&M_Financial_institution_Adj_Rate"]].astype(float) / 100 return df -def interest_monthly(): # Interest - """ +def interest_monthly(): # Interest + """ + http://data.eastmoney.com/cjsj/yhll.html """ tmp_url = url["eastmoney"] ua = UserAgent() @@ -878,9 +1149,9 @@ def interest_monthly(): # Interest "mkt": "13", "_": "1622081956464" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -1]) + data_json = demjson.decode(data_text[data_text.find("{"): -1]) df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Announcement Date", @@ -906,11 +1177,15 @@ def interest_monthly(): # Interest "SHIndex_Rate", "SZIndex_Rate" ]] + df[list(df.columns)] = df[list(df.columns)].astype(float) / 100 return df -#TODO: SPECIAL CASE -def gdc_daily(): # gasoline, Diesel and Crude Oil +# TODO: SPECIAL CASE + + +def gdc_daily(): # gasoline, Diesel and Crude Oil """ + http://data.eastmoney.com/cjsj/oil_default.html """ tmp_url = "http://datacenter-web.eastmoney.com/api/data/get?" ua = UserAgent() @@ -925,16 +1200,18 @@ def gdc_daily(): # gasoline, Diesel and Crude Oil "p": "1", "ps": "50000", "source": "WEB", - "_":"1622082348722" + "_": "1622082348722" } - r = requests.get(tmp_url, params = request_params, headers = request_header) + r = requests.get(tmp_url, params=request_params, headers=request_header) data_text = r.text - data_json = demjson.decode(data_text[data_text.find("{") : -2]) + data_json = demjson.decode(data_text[data_text.find("{"): -2]) df = pd.DataFrame(data_json["result"]["data"]) df.columns = ["Crude_Oil", "Date", "Gasoline", "Diesel"] df = df[["Date", "Gasoline", "Diesel", "Crude_Oil"]] + df = pd.to_datetime(df["Date"], format="%Y-%m-%d") return df + """ if __name__ == "__main__": -""" \ No newline at end of file +""" diff --git a/setup.py b/setup.py index 3833b47..6fcc136 100644 --- a/setup.py +++ b/setup.py @@ -15,6 +15,17 @@ setup( url = "https://github.com/TerenceLiu98/CEDApy", packages = find_packages(), license = "MIT", + install_requires=[ + "numpy>=1.15.4", + "pandas>=0.25", + "requests>=2.22.0", + "demjson>=2.2.4", + "pillow>=6.2.0", + "xlrd==1.2.0", + "tqdm>=4.43.0", + "tabulate>=0.8.6", + "fake_useragent" + ], classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License",