From 4032023d8ca65b19ee34509e5c902f940ddd6be4 Mon Sep 17 00:00:00 2001
From: TerenceLiu98 <terenceliu1012@gmail.com>
Date: Thu, 27 May 2021 00:57:20 +0800
Subject: [PATCH] add functions

---
 .gitignore             | 207 +++++++++++++++++++++
 CEDA/__init__.py       |   3 +-
 CEDA/economic/macro.py | 396 +++++++++++++++++++++++++++++++++++++++--
 README.md              |   5 +
 4 files changed, 594 insertions(+), 17 deletions(-)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e885d1d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,207 @@
+
+# Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,linux,windows
+# Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode,linux,windows
+
+### Linux ###
+*~
+
+# temporary files which can be created if a process still has a handle open of a deleted file
+.fuse_hidden*
+
+# KDE directory preferences
+.directory
+
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+
+# .nfs files are created when an open file is removed but is still being accessed
+.nfs*
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+pytestdebug.log
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+doc/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#poetry.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+# .env
+.env/
+.venv/
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+pythonenv*
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# operating system-related files
+# file properties cache/storage on macOS
+*.DS_Store
+# thumbnail cache on Windows
+Thumbs.db
+
+# profiling data
+.prof
+
+
+### VisualStudioCode ###
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+*.code-workspace
+
+### VisualStudioCode Patch ###
+# Ignore all local history of files
+.history
+.ionide
+
+### Windows ###
+# Windows thumbnail cache files
+Thumbs.db:encryptable
+ehthumbs.db
+ehthumbs_vista.db
+
+# Dump file
+*.stackdump
+
+# Folder config file
+[Dd]esktop.ini
+
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+
+# Windows Installer files
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+
+# Windows shortcuts
+*.lnk
+
+# End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,linux,windows
\ No newline at end of file
diff --git a/CEDA/__init__.py b/CEDA/__init__.py
index f9641c7..7e90110 100644
--- a/CEDA/__init__.py
+++ b/CEDA/__init__.py
@@ -1,3 +1,4 @@
 from CEDA.economic.macro import (
-    cn_gdp_quarter
+    cn_gdp_quarter,
+    cn_ig_monthly
 )
\ No newline at end of file
diff --git a/CEDA/economic/macro.py b/CEDA/economic/macro.py
index ccc8588..5d91175 100644
--- a/CEDA/economic/macro.py
+++ b/CEDA/economic/macro.py
@@ -5,6 +5,8 @@ import demjson
 import requests
 from fake_useragent import UserAgent
 
+# TODO need add comments
+
 url = {
     "eastmoney": "http://datainterface.eastmoney.com/EM_DataCenter/JS.aspx"
 }
@@ -45,6 +47,37 @@ def cn_gdp_quarter():
     #df[(df['Date'] >= startdate) & (df['Date'] <= enddate)]
     return df
 
+def cn_ppi_monthly():
+    """
+    ABS: absolute value (per 100 million CNY)
+    YoY: year on year growth
+    """
+    ua = UserAgent()
+    request_header = {"User-Agent": ua.random}
+    tmp_url = url["eastmoney"]
+    request_params = {
+        "cb": "datatable9051497",
+        "type": "GJZB",
+        "sty": "ZGZB",
+        "js": "({data:[(x)],pages:(pc)})",
+        "p": "1",
+        "ps": "200",
+        "mkt": "22",
+        "_": "1622047940401"
+    }
+    r = requests.get(tmp_url, params = request_params, headers = request_header)
+    data_text = r.text
+    data_json = demjson.decode(data_text[data_text.find("{") : -1])
+    df = pd.DataFrame([item.split(",") for item in data_json["data"]])
+    df.columns = [
+        "Date",
+        "Current_Month",
+        "Current_Month_YoY",
+        "Current_Month_Accum"
+    ]
+    #df[(df['Date'] >= startdate) & (df['Date'] <= enddate)]
+    return df
+
 def cn_cpi_monthly():
     """
     Accum: Accumulation
@@ -184,42 +217,373 @@ def cn_hi_old_monthly(): # house index old version (2008-2010)
     ]
     return df
 
-def cn_hi_mew_monthly(): # house index old version (2008-2010)
+def cn_ci_eei_monthly(): # Climate Index & Entrepreneur Expectation Index
     """
     Man: manufacturing
     Non-Man: Non-manufacturing
-    http://data.eastmoney.com/dataapi/cjsj/getnewhousechartdata?mkt=1&stat=1&city1=%E5%8C%97%E4%BA%AC&city2=%E9%95%BF%E6%98%A5
     """
     tmp_url = url["eastmoney"]
     ua = UserAgent()
     request_header = {"User-Agent": ua.random}
+    tmp_url = url["eastmoney"]
     request_params = {
-        "cb": "datatable6451982",
+        "cb": "datatable7709842",
         "type": "GJZB",
-        "sty": "XFJLB",
+        "sty": "ZGZB",
         "js": "({data:[(x)],pages:(pc)})",
         "p": "1",
-        "ps": "2000",
-        "mkt": "19",
-        "pageNo": "1",
-        "pageNum": "1",
-        "_": "1603023435552",
+        "ps": "200",
+        "mkt": "8",
+        "_": "1622041485306"
     }
     r = requests.get(tmp_url, params = request_params, headers = request_header)
     data_text = r.text
     data_json = demjson.decode(data_text[data_text.find("{") : -1])
-    data = pd.DataFrame([item.split(",") for item in data_json["data"]])
+    df = pd.DataFrame([item.split(",") for item in data_json["data"]])
     df.columns = [
         "Date",
-        "Housing_Prosperity_Index",
-        "HPI_YoY",
-        "Land_Development_Area_Index",
-        "LDAI_YoY",
-        "Sales_Price_Index",
-        "SPI_YoY"
+        "Climate_Index",
+        "CI_YoY",
+        "CI_MoM",
+        "Entrepreneur_Expectation_Index",
+        "EEI_YoY",
+        "EEI_MoM"
     ]
     return df
 
+def cn_ig_monthly(): # Industry Growth
+    """
+    Man: manufacturing
+    Non-Man: Non-manufacturing
+    """
+    tmp_url = url["eastmoney"]
+    ua = UserAgent()
+    request_header = {"User-Agent": ua.random}
+    tmp_url = url["eastmoney"]
+    request_params = {
+        "cb": "datatable4577327",
+        "type": "GJZB",
+        "sty": "ZGZB",
+        "js": "({data:[(x)],pages:(pc)})",
+        "p": "1",
+        "ps": "200",
+        "mkt": "0",
+        "_": "1622042259898"
+    }
+    r = requests.get(tmp_url, params = request_params, headers = request_header)
+    data_text = r.text
+    data_json = demjson.decode(data_text[data_text.find("{") : -1])
+    df = pd.DataFrame([item.split(",") for item in data_json["data"]])
+    df.columns = [
+        "Date",
+        "IG_YoY",
+        "IG_Accum",
+    ]
+    return df
+
+def cn_cgpi_monthly(): # Corporate Goods Price Index
+    """
+    Man: manufacturing
+    Non-Man: Non-manufacturing
+    """
+    tmp_url = url["eastmoney"]
+    ua = UserAgent()
+    request_header = {"User-Agent": ua.random}
+    tmp_url = url["eastmoney"]
+    request_params = {
+        "cb": "datatable7184534",
+        "type": "GJZB",
+        "sty": "ZGZB",
+        "js": "({data:[(x)],pages:(pc)})",
+        "p": "1",
+        "ps": "200",
+        "mkt": "9",
+        "_": "1622042652353"
+    }
+    r = requests.get(tmp_url, params = request_params, headers = request_header)
+    data_text = r.text
+    data_json = demjson.decode(data_text[data_text.find("{") : -1])
+    df = pd.DataFrame([item.split(",") for item in data_json["data"]])
+    df.columns = [
+        "Date",
+        "General_Index",
+        "General_Index_YoY",
+        "Total_Index_MoM",
+        "Agricultural_Product",
+        "Agricultural_Product_YoY",
+        "Agricultural_PRoduct_MoM",
+        "Mineral_Product",
+        "Mineral_Product_YoY",
+        "Mineral_Product_MoM",
+        "Coal_Oil_Electricity",
+        "Coal_Oil_Electricity_YoY",
+        "Coal_Oil_Electricity_MoM"
+    ]
+    return df
+
+def cn_cci_csi_cei_monthly(): # Consumer Confidence Index & Consumer Satisfaction Index & Consumer Expectation Index
+    """
+    Man: manufacturing
+    Non-Man: Non-manufacturing
+    """
+    tmp_url = url["eastmoney"]
+    ua = UserAgent()
+    request_header = {"User-Agent": ua.random}
+    tmp_url = url["eastmoney"]
+    request_params = {
+        "cb": "datatable1243218",
+        "type": "GJZB",
+        "sty": "ZGZB",
+        "js": "({data:[(x)],pages:(pc)})",
+        "p": "1",
+        "ps": "200",
+        "mkt": "4",
+        "_": "1622043704818"
+    }
+    r = requests.get(tmp_url, params = request_params, headers = request_header)
+    data_text = r.text
+    data_json = demjson.decode(data_text[data_text.find("{") : -1])
+    df = pd.DataFrame([item.split(",") for item in data_json["data"]])
+    df.columns = [
+        "Date",
+        "CCI",
+        "CCI_YoY",
+        "CCI_MoM",
+        "CSI",
+        "CSI_YoY",
+        "CSI_MoM",
+        "CEI",
+        "CEI_YoY",
+        "CEI_MoM"
+    ]
+    return df
+
+def cn_trscg_monthly(): # Total Retail Sales of Consumer Goods
+    """
+    Man: manufacturing
+    Non-Man: Non-manufacturing
+    """
+    tmp_url = url["eastmoney"]
+    ua = UserAgent()
+    request_header = {"User-Agent": ua.random}
+    tmp_url = url["eastmoney"]
+    request_params = {
+        "cb": "datatable3665821",
+        "type": "GJZB",
+        "sty": "ZGZB",
+        "js": "({data:[(x)],pages:(pc)})",
+        "p": "1",
+        "ps": "200",
+        "mkt": "5",
+        "_": "1622044011316"
+    }
+    r = requests.get(tmp_url, params = request_params, headers = request_header)
+    data_text = r.text
+    data_json = demjson.decode(data_text[data_text.find("{") : -1])
+    df = pd.DataFrame([item.split(",") for item in data_json["data"]])
+    df.columns = [
+        "Date",
+        "Current_Month",
+        "TRSCG_YoY",
+        "TRSCG_MoM",
+        "TRSCG_Accum",
+        "TRSCG_Accum_YoY" 
+    ]
+    return df
+
+def cn_ms_monthly(): # monetary Supply
+    """
+    Man: manufacturing
+    Non-Man: Non-manufacturing
+    """
+    tmp_url = url["eastmoney"]
+    ua = UserAgent()
+    request_header = {"User-Agent": ua.random}
+    tmp_url = url["eastmoney"]
+    request_params = {
+        "cb": "datatable3818891",
+        "type": "GJZB",
+        "sty": "ZGZB",
+        "js": "({data:[(x)],pages:(pc)})",
+        "p": "1",
+        "ps": "200",
+        "mkt": "11",
+        "_": "1622044292103"
+    }
+    r = requests.get(tmp_url, params = request_params, headers = request_header)
+    data_text = r.text
+    data_json = demjson.decode(data_text[data_text.find("{") : -1])
+    df = pd.DataFrame([item.split(",") for item in data_json["data"]])
+    df.columns = [
+        "Date",
+        "M2",
+        "M2_YoY",
+        "M2_MoM",
+        "M1",
+        "M1_YoY",
+        "M1_MoM",
+        "M0",
+        "M0_YoY",
+        "M0_MoM"
+    ]
+    return df
+
+def cn_ie_monthly(): # Import & Export
+    """
+
+    """
+    tmp_url = url["eastmoney"]
+    ua = UserAgent()
+    request_header = {"User-Agent": ua.random}
+    tmp_url = url["eastmoney"]
+    request_params = {
+        "cb": "datatable3818891",
+        "type": "GJZB",
+        "sty": "ZGZB",
+        "js": "({data:[(x)],pages:(pc)})",
+        "p": "1",
+        "ps": "200",
+        "mkt": "1",
+        "_": "1622044292103"
+    }
+    r = requests.get(tmp_url, params = request_params, headers = request_header)
+    data_text = r.text
+    data_json = demjson.decode(data_text[data_text.find("{") : -1])
+    df = pd.DataFrame([item.split(",") for item in data_json["data"]])
+    df.columns = [
+        "Date",
+        "Current_Month_Export",
+        "Current_Month_Export_YoY",
+        "Current_Month_Export_MoM",
+        "Current_Month_Import",
+        "Current_Month_Import_YoY",
+        "Current_Month_Import_MoM",
+        "Accumulation_Export",
+        "Accumulation_Export_YoY",
+        "Accumulation_Import",
+        "Accumulation_Import_YoY",
+    ]
+    return df
+
+
+def cn_ie_monthly(): # Import & Export
+    """
+
+    """
+    tmp_url = url["eastmoney"]
+    ua = UserAgent()
+    request_header = {"User-Agent": ua.random}
+    tmp_url = url["eastmoney"]
+    request_params = {
+        "cb": "datatable3818891",
+        "type": "GJZB",
+        "sty": "ZGZB",
+        "js": "({data:[(x)],pages:(pc)})",
+        "p": "1",
+        "ps": "200",
+        "mkt": "1",
+        "_": "1622044292103"
+    }
+    r = requests.get(tmp_url, params = request_params, headers = request_header)
+    data_text = r.text
+    data_json = demjson.decode(data_text[data_text.find("{") : -1])
+    df = pd.DataFrame([item.split(",") for item in data_json["data"]])
+    df.columns = [
+        "Date",
+        "Current_Month_Export",
+        "Current_Month_Export_YoY",
+        "Current_Month_Export_MoM",
+        "Current_Month_Import",
+        "Current_Month_Import_YoY",
+        "Current_Month_Import_MoM",
+        "Accumulation_Export",
+        "Accumulation_Export_YoY",
+        "Accumulation_Import",
+        "Accumulation_Import_YoY",
+    ]
+    return df
+
+def cn_fgr_monthly(): # Forex and Gold Reserve
+    """
+
+    """
+    tmp_url = url["eastmoney"]
+    ua = UserAgent()
+    request_header = {"User-Agent": ua.random}
+    tmp_url = url["eastmoney"]
+    request_params = {
+        "cb": "atatable6260802",
+        "type": "GJZB",
+        "sty": "ZGZB",
+        "js": "({data:[(x)],pages:(pc)})",
+        "p": "1",
+        "ps": "200",
+        "mkt": "16",
+        "_": "1622044863548"
+    }
+    r = requests.get(tmp_url, params = request_params, headers = request_header)
+    data_text = r.text
+    data_json = demjson.decode(data_text[data_text.find("{") : -1])
+    df = pd.DataFrame([item.split(",") for item in data_json["data"]])
+    df.columns = [
+        "Date",
+        "Forex",
+        "Forex_YoY",
+        "Forex_MoM",
+        "Gold",
+        "Gold_YoY",
+        "Gold_MoM"
+    ]
+    return df
+
+def cn_ctsf_monthly(): # Client Transaction Settlement Funds
+    """
+
+    """
+    tmp_url = "http://data.eastmoney.com/dataapi/cjsj/getbanktransferdata?"
+    ua = UserAgent()
+    request_header = {"User-Agent": ua.random}
+    request_params = {
+        "p": "1",
+        "ps": "200"
+    }
+    r = requests.get(tmp_url, params = request_params, headers = request_header)
+    data_text = r.text
+    data_json = demjson.decode(data_text[data_text.find("["):-11])
+    df = pd.DataFrame(data_json)
+    return df
+
+# TODO: needs help (missing two tables)
+def cn_sao_monthly(): # Stock Account Overview 
+    """
+    """
+    tmp_url = "http://dcfm.eastmoney.com/em_mutisvcexpandinterface/api/js/get?"
+    ua = UserAgent()
+    request_header = {"User-Agent": ua.random}
+    request_params = {
+        "callback": "jQuery1123014377091065513636_1622046865705",
+        "type": "GPKHData",
+        "st": "HdDate",
+        "sr": "-1",
+        "sty": "Chart",
+        "token": "894050c76af8597a853f5b408b759f5d",
+        "ps": "2000",
+        "_": "1622046865706"
+    }
+    r = requests.get(tmp_url, params = request_params, headers = request_header)
+    data_text = r.text
+    data_json = demjson.decode(data_text[data_text.find("(")+1:-1])
+    df = pd.DataFrame(data_json)
+    df.columns = [
+        "Date",
+        "New_Investor",
+        "Active_Investor",
+        "SHIndexClose"
+    ]
+    df.Date = pd.to_datetime(df.Date, format = "%Y年%m月")
+    return df
+
+
 """
 if __name__ == "__main__":
 """ 
\ No newline at end of file
diff --git a/README.md b/README.md
index 3777f38..3d93aff 100644
--- a/README.md
+++ b/README.md
@@ -3,3 +3,8 @@
 ## Introduction
 
 This is a data collecting project, with both `python` and `R`
+
+
+## Acknowledgement
+
+* Thanks [akshare](https://github.com/jindaxiang/akshare/)
\ No newline at end of file