From 4032023d8ca65b19ee34509e5c902f940ddd6be4 Mon Sep 17 00:00:00 2001 From: TerenceLiu98 Date: Thu, 27 May 2021 00:57:20 +0800 Subject: [PATCH] add functions --- .gitignore | 207 +++++++++++++++++++++ CEDA/__init__.py | 3 +- CEDA/economic/macro.py | 396 +++++++++++++++++++++++++++++++++++++++-- README.md | 5 + 4 files changed, 594 insertions(+), 17 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e885d1d --- /dev/null +++ b/.gitignore @@ -0,0 +1,207 @@ + +# Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,linux,windows +# Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode,linux,windows + +### Linux ### +*~ + +# temporary files which can be created if a process still has a handle open of a deleted file +.fuse_hidden* + +# KDE directory preferences +.directory + +# Linux trash folder which might appear on any partition or disk +.Trash-* + +# .nfs files are created when an open file is removed but is still being accessed +.nfs* + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +pytestdebug.log + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +doc/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +#poetry.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +# .env +.env/ +.venv/ +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +pythonenv* + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# operating system-related files +# file properties cache/storage on macOS +*.DS_Store +# thumbnail cache on Windows +Thumbs.db + +# profiling data +.prof + + +### VisualStudioCode ### +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +*.code-workspace + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +### Windows ### +# Windows thumbnail cache files +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + +# End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,linux,windows \ No newline at end of file diff --git a/CEDA/__init__.py b/CEDA/__init__.py index f9641c7..7e90110 100644 --- a/CEDA/__init__.py +++ b/CEDA/__init__.py @@ -1,3 +1,4 @@ from CEDA.economic.macro import ( - cn_gdp_quarter + cn_gdp_quarter, + cn_ig_monthly ) \ No newline at end of file diff --git a/CEDA/economic/macro.py b/CEDA/economic/macro.py index ccc8588..5d91175 100644 --- a/CEDA/economic/macro.py +++ b/CEDA/economic/macro.py @@ -5,6 +5,8 @@ import demjson import requests from fake_useragent import UserAgent +# TODO need add comments + url = { "eastmoney": "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx" } @@ -45,6 +47,37 @@ def cn_gdp_quarter(): #df[(df['Date'] >= startdate) & (df['Date'] <= enddate)] return df +def cn_ppi_monthly(): + """ + ABS: absolute value (per 100 million CNY) + YoY: year on year growth + """ + ua = UserAgent() + request_header = {"User-Agent": ua.random} + tmp_url = url["eastmoney"] + request_params = { + "cb": "datatable9051497", + "type": "GJZB", + "sty": "ZGZB", + "js": "({data:[(x)],pages:(pc)})", + "p": "1", + "ps": "200", + "mkt": "22", + "_": "1622047940401" + } + r = requests.get(tmp_url, params = request_params, headers = request_header) + data_text = r.text + data_json = demjson.decode(data_text[data_text.find("{") : -1]) + df = pd.DataFrame([item.split(",") for item in data_json["data"]]) + df.columns = [ + "Date", + "Current_Month", + "Current_Month_YoY", + "Current_Month_Accum" + ] + #df[(df['Date'] >= startdate) & (df['Date'] <= enddate)] + return df + def cn_cpi_monthly(): """ Accum: Accumulation @@ -184,42 +217,373 @@ def cn_hi_old_monthly(): # house index old version (2008-2010) ] return df -def cn_hi_mew_monthly(): # house index old version (2008-2010) +def cn_ci_eei_monthly(): # Climate Index & Entrepreneur Expectation Index """ Man: manufacturing Non-Man: Non-manufacturing - http://data.eastmoney.com/dataapi/cjsj/getnewhousechartdata?mkt=1&stat=1&city1=%E5%8C%97%E4%BA%AC&city2=%E9%95%BF%E6%98%A5 """ tmp_url = url["eastmoney"] ua = UserAgent() request_header = {"User-Agent": ua.random} + tmp_url = url["eastmoney"] request_params = { - "cb": "datatable6451982", + "cb": "datatable7709842", "type": "GJZB", - "sty": "XFJLB", + "sty": "ZGZB", "js": "({data:[(x)],pages:(pc)})", "p": "1", - "ps": "2000", - "mkt": "19", - "pageNo": "1", - "pageNum": "1", - "_": "1603023435552", + "ps": "200", + "mkt": "8", + "_": "1622041485306" } r = requests.get(tmp_url, params = request_params, headers = request_header) data_text = r.text data_json = demjson.decode(data_text[data_text.find("{") : -1]) - data = pd.DataFrame([item.split(",") for item in data_json["data"]]) + df = pd.DataFrame([item.split(",") for item in data_json["data"]]) df.columns = [ "Date", - "Housing_Prosperity_Index", - "HPI_YoY", - "Land_Development_Area_Index", - "LDAI_YoY", - "Sales_Price_Index", - "SPI_YoY" + "Climate_Index", + "CI_YoY", + "CI_MoM", + "Entrepreneur_Expectation_Index", + "EEI_YoY", + "EEI_MoM" ] return df +def cn_ig_monthly(): # Industry Growth + """ + Man: manufacturing + Non-Man: Non-manufacturing + """ + tmp_url = url["eastmoney"] + ua = UserAgent() + request_header = {"User-Agent": ua.random} + tmp_url = url["eastmoney"] + request_params = { + "cb": "datatable4577327", + "type": "GJZB", + "sty": "ZGZB", + "js": "({data:[(x)],pages:(pc)})", + "p": "1", + "ps": "200", + "mkt": "0", + "_": "1622042259898" + } + r = requests.get(tmp_url, params = request_params, headers = request_header) + data_text = r.text + data_json = demjson.decode(data_text[data_text.find("{") : -1]) + df = pd.DataFrame([item.split(",") for item in data_json["data"]]) + df.columns = [ + "Date", + "IG_YoY", + "IG_Accum", + ] + return df + +def cn_cgpi_monthly(): # Corporate Goods Price Index + """ + Man: manufacturing + Non-Man: Non-manufacturing + """ + tmp_url = url["eastmoney"] + ua = UserAgent() + request_header = {"User-Agent": ua.random} + tmp_url = url["eastmoney"] + request_params = { + "cb": "datatable7184534", + "type": "GJZB", + "sty": "ZGZB", + "js": "({data:[(x)],pages:(pc)})", + "p": "1", + "ps": "200", + "mkt": "9", + "_": "1622042652353" + } + r = requests.get(tmp_url, params = request_params, headers = request_header) + data_text = r.text + data_json = demjson.decode(data_text[data_text.find("{") : -1]) + df = pd.DataFrame([item.split(",") for item in data_json["data"]]) + df.columns = [ + "Date", + "General_Index", + "General_Index_YoY", + "Total_Index_MoM", + "Agricultural_Product", + "Agricultural_Product_YoY", + "Agricultural_PRoduct_MoM", + "Mineral_Product", + "Mineral_Product_YoY", + "Mineral_Product_MoM", + "Coal_Oil_Electricity", + "Coal_Oil_Electricity_YoY", + "Coal_Oil_Electricity_MoM" + ] + return df + +def cn_cci_csi_cei_monthly(): # Consumer Confidence Index & Consumer Satisfaction Index & Consumer Expectation Index + """ + Man: manufacturing + Non-Man: Non-manufacturing + """ + tmp_url = url["eastmoney"] + ua = UserAgent() + request_header = {"User-Agent": ua.random} + tmp_url = url["eastmoney"] + request_params = { + "cb": "datatable1243218", + "type": "GJZB", + "sty": "ZGZB", + "js": "({data:[(x)],pages:(pc)})", + "p": "1", + "ps": "200", + "mkt": "4", + "_": "1622043704818" + } + r = requests.get(tmp_url, params = request_params, headers = request_header) + data_text = r.text + data_json = demjson.decode(data_text[data_text.find("{") : -1]) + df = pd.DataFrame([item.split(",") for item in data_json["data"]]) + df.columns = [ + "Date", + "CCI", + "CCI_YoY", + "CCI_MoM", + "CSI", + "CSI_YoY", + "CSI_MoM", + "CEI", + "CEI_YoY", + "CEI_MoM" + ] + return df + +def cn_trscg_monthly(): # Total Retail Sales of Consumer Goods + """ + Man: manufacturing + Non-Man: Non-manufacturing + """ + tmp_url = url["eastmoney"] + ua = UserAgent() + request_header = {"User-Agent": ua.random} + tmp_url = url["eastmoney"] + request_params = { + "cb": "datatable3665821", + "type": "GJZB", + "sty": "ZGZB", + "js": "({data:[(x)],pages:(pc)})", + "p": "1", + "ps": "200", + "mkt": "5", + "_": "1622044011316" + } + r = requests.get(tmp_url, params = request_params, headers = request_header) + data_text = r.text + data_json = demjson.decode(data_text[data_text.find("{") : -1]) + df = pd.DataFrame([item.split(",") for item in data_json["data"]]) + df.columns = [ + "Date", + "Current_Month", + "TRSCG_YoY", + "TRSCG_MoM", + "TRSCG_Accum", + "TRSCG_Accum_YoY" + ] + return df + +def cn_ms_monthly(): # monetary Supply + """ + Man: manufacturing + Non-Man: Non-manufacturing + """ + tmp_url = url["eastmoney"] + ua = UserAgent() + request_header = {"User-Agent": ua.random} + tmp_url = url["eastmoney"] + request_params = { + "cb": "datatable3818891", + "type": "GJZB", + "sty": "ZGZB", + "js": "({data:[(x)],pages:(pc)})", + "p": "1", + "ps": "200", + "mkt": "11", + "_": "1622044292103" + } + r = requests.get(tmp_url, params = request_params, headers = request_header) + data_text = r.text + data_json = demjson.decode(data_text[data_text.find("{") : -1]) + df = pd.DataFrame([item.split(",") for item in data_json["data"]]) + df.columns = [ + "Date", + "M2", + "M2_YoY", + "M2_MoM", + "M1", + "M1_YoY", + "M1_MoM", + "M0", + "M0_YoY", + "M0_MoM" + ] + return df + +def cn_ie_monthly(): # Import & Export + """ + + """ + tmp_url = url["eastmoney"] + ua = UserAgent() + request_header = {"User-Agent": ua.random} + tmp_url = url["eastmoney"] + request_params = { + "cb": "datatable3818891", + "type": "GJZB", + "sty": "ZGZB", + "js": "({data:[(x)],pages:(pc)})", + "p": "1", + "ps": "200", + "mkt": "1", + "_": "1622044292103" + } + r = requests.get(tmp_url, params = request_params, headers = request_header) + data_text = r.text + data_json = demjson.decode(data_text[data_text.find("{") : -1]) + df = pd.DataFrame([item.split(",") for item in data_json["data"]]) + df.columns = [ + "Date", + "Current_Month_Export", + "Current_Month_Export_YoY", + "Current_Month_Export_MoM", + "Current_Month_Import", + "Current_Month_Import_YoY", + "Current_Month_Import_MoM", + "Accumulation_Export", + "Accumulation_Export_YoY", + "Accumulation_Import", + "Accumulation_Import_YoY", + ] + return df + + +def cn_ie_monthly(): # Import & Export + """ + + """ + tmp_url = url["eastmoney"] + ua = UserAgent() + request_header = {"User-Agent": ua.random} + tmp_url = url["eastmoney"] + request_params = { + "cb": "datatable3818891", + "type": "GJZB", + "sty": "ZGZB", + "js": "({data:[(x)],pages:(pc)})", + "p": "1", + "ps": "200", + "mkt": "1", + "_": "1622044292103" + } + r = requests.get(tmp_url, params = request_params, headers = request_header) + data_text = r.text + data_json = demjson.decode(data_text[data_text.find("{") : -1]) + df = pd.DataFrame([item.split(",") for item in data_json["data"]]) + df.columns = [ + "Date", + "Current_Month_Export", + "Current_Month_Export_YoY", + "Current_Month_Export_MoM", + "Current_Month_Import", + "Current_Month_Import_YoY", + "Current_Month_Import_MoM", + "Accumulation_Export", + "Accumulation_Export_YoY", + "Accumulation_Import", + "Accumulation_Import_YoY", + ] + return df + +def cn_fgr_monthly(): # Forex and Gold Reserve + """ + + """ + tmp_url = url["eastmoney"] + ua = UserAgent() + request_header = {"User-Agent": ua.random} + tmp_url = url["eastmoney"] + request_params = { + "cb": "atatable6260802", + "type": "GJZB", + "sty": "ZGZB", + "js": "({data:[(x)],pages:(pc)})", + "p": "1", + "ps": "200", + "mkt": "16", + "_": "1622044863548" + } + r = requests.get(tmp_url, params = request_params, headers = request_header) + data_text = r.text + data_json = demjson.decode(data_text[data_text.find("{") : -1]) + df = pd.DataFrame([item.split(",") for item in data_json["data"]]) + df.columns = [ + "Date", + "Forex", + "Forex_YoY", + "Forex_MoM", + "Gold", + "Gold_YoY", + "Gold_MoM" + ] + return df + +def cn_ctsf_monthly(): # Client Transaction Settlement Funds + """ + + """ + tmp_url = "http://data.eastmoney.com/dataapi/cjsj/getbanktransferdata?" + ua = UserAgent() + request_header = {"User-Agent": ua.random} + request_params = { + "p": "1", + "ps": "200" + } + r = requests.get(tmp_url, params = request_params, headers = request_header) + data_text = r.text + data_json = demjson.decode(data_text[data_text.find("["):-11]) + df = pd.DataFrame(data_json) + return df + +# TODO: needs help (missing two tables) +def cn_sao_monthly(): # Stock Account Overview + """ + """ + tmp_url = "http://dcfm.eastmoney.com/em_mutisvcexpandinterface/api/js/get?" + ua = UserAgent() + request_header = {"User-Agent": ua.random} + request_params = { + "callback": "jQuery1123014377091065513636_1622046865705", + "type": "GPKHData", + "st": "HdDate", + "sr": "-1", + "sty": "Chart", + "token": "894050c76af8597a853f5b408b759f5d", + "ps": "2000", + "_": "1622046865706" + } + r = requests.get(tmp_url, params = request_params, headers = request_header) + data_text = r.text + data_json = demjson.decode(data_text[data_text.find("(")+1:-1]) + df = pd.DataFrame(data_json) + df.columns = [ + "Date", + "New_Investor", + "Active_Investor", + "SHIndexClose" + ] + df.Date = pd.to_datetime(df.Date, format = "%Y年%m月") + return df + + """ if __name__ == "__main__": """ \ No newline at end of file diff --git a/README.md b/README.md index 3777f38..3d93aff 100644 --- a/README.md +++ b/README.md @@ -3,3 +3,8 @@ ## Introduction This is a data collecting project, with both `python` and `R` + + +## Acknowledgement + +* Thanks [akshare](https://github.com/jindaxiang/akshare/) \ No newline at end of file