This commit is contained in:
TerenceLau 2022-04-14 05:48:32 +00:00 committed by GitHub
parent cc362bedfb
commit fb65525981
4 changed files with 22 additions and 28 deletions

View File

@ -1,7 +1,7 @@
import sys import sys
import os import os
__version__ = "0.3.5" __version__ = "0.3.6"
__author__ = "Terence Lau" __author__ = "Terence Lau"

View File

@ -18,7 +18,7 @@ requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
class NBSCData(object): class NBSCData(object):
def __init__(self, language:str="en"): def __init__(self, language:str="en", period:str="monthly"):
self.dbcode = [] self.dbcode = []
self.nid = [] self.nid = []
@ -26,6 +26,7 @@ class NBSCData(object):
self.name = [] self.name = []
self.wdcode= [] self.wdcode= []
if language == "cn": if language == "cn":
self.url = "https://data.stats.gov.cn/easyquery.htm" self.url = "https://data.stats.gov.cn/easyquery.htm"
self.BASE_DIR = os.path.dirname(__file__) self.BASE_DIR = os.path.dirname(__file__)
@ -35,6 +36,13 @@ class NBSCData(object):
self.BASE_DIR = os.path.dirname(__file__) self.BASE_DIR = os.path.dirname(__file__)
self.__TREE_PATH__ = os.path.join(self.BASE_DIR, "NBSCTree", "data_en.pkl") self.__TREE_PATH__ = os.path.join(self.BASE_DIR, "NBSCTree", "data_en.pkl")
if period == "monthly":
self.dbcode_query = "hgyd"
elif period == "quarterly":
self.dbcode = "hgjd"
elif period == "annual":
self.dbcode = "hgnd"
def generate_header(self): def generate_header(self):
ua = UserAgent() ua = UserAgent()
header = {'User-Agent':str(ua.chrome)} header = {'User-Agent':str(ua.chrome)}
@ -46,7 +54,7 @@ class NBSCData(object):
inspired by a blog: https://www.cnblogs.com/wang_yb/p/14636575.html inspired by a blog: https://www.cnblogs.com/wang_yb/p/14636575.html
""" """
parent = [] parent = []
r = requests.post("{}?id={}&dbcode=hgnd&wdcode=zb&m=getTree".format(self.url, rid), headers=self.generate_header(), verify=False) r = requests.post("{}?id={}&dbcode={}&wdcode=zb&m=getTree".format(self.url, rid, self.dbcode_query), headers=self.generate_header(), verify=False)
data = r.json() data = r.json()
for i in range(0, len(data)): for i in range(0, len(data)):
@ -70,7 +78,7 @@ class NBSCData(object):
""" """
inspired by a blog: https://www.cnblogs.com/wang_yb/p/14636575.html inspired by a blog: https://www.cnblogs.com/wang_yb/p/14636575.html
""" """
for i in range(0, len(nodes)): for i in tqdm(range(0, len(nodes))):
node = nodes[i] node = nodes[i]
if node["isParent"]: if node["isParent"]:
self.toc(node["children"]) self.toc(node["children"])
@ -87,24 +95,18 @@ class NBSCData(object):
def download_data(self, nid:str=None, sj="1978-", period:str="monthly"): def download_data(self, nid:str=None, sj="1978-"):
if period == "monthly":
dbcode="hgyd"
elif period == "quarterly":
dbcode="hgjd"
elif period == "annual":
dbcode="hgnd"
params = { params = {
"m": "QueryData", "m": "QueryData",
"dbcode": dbcode, "dbcode": self.dbcode_query,
"rowcode": "zb", "rowcode": "zb",
"colcode": "sj", "colcode": "sj",
"wds": "[]", "wds": "[]",
"dfwds": '[{"wdcode":"zb","valuecode":"' "dfwds": '[{"wdcode":"zb","valuecode":"'
+ nid + nid
+ '"},{"wdcode":"sj","valuecode":"' + '"},{"wdcode":"sj","valuecode":"'
+ sj
+ '"}]', + '"}]',
"sj": sj "sj": sj
} }
@ -117,19 +119,12 @@ class NBSCData(object):
value.append(data[i]["data"]["data"]) value.append(data[i]["data"]["data"])
output = pd.DataFrame({"date":date, "value":value}) output = pd.DataFrame({"date":date, "value":value})
output = output.drop_duplicates("date", keep="first")
return output return output
if __name__ == "__main__": if __name__ == "__main__":
nbsc = NBSCData(language="en") nbsc = NBSCData(language="en", period="annual")
nodes = nbsc.tree_generation() nodes = nbsc.tree_generation()
toc = nbsc.toc(nodes=nodes) toc = nbsc.toc(nodes=nodes)
toc[toc["name"].str.contains("GDP")] toc[toc["name"].str.contains("GDP")]
data = nbsc.download_data(nid="A0203") data = nbsc.download_data(nid="A0203")

View File

@ -43,11 +43,10 @@ data = oecd.download_data(dataset="QNA", query="QNA/CAN.B1_GE.CQRSA.Q")
```python ```python
from CEDA.economic.NBSC import * from CEDA.economic.NBSC import *
nbsc = NBSCData(language="en") nbsc = NBSCData(language="en", period="monthly")
nbsc_nodes = nbsc.tree_generation() nbsc_nodes = nbsc.tree_generation()
nbsc_toc = nbsc.toc(nodes=nbsc_nodes) nbsc_toc = nbsc.toc(nodes=nbsc_nodes)
nbsc_toc[nbsc_toc["name"].str.contains("GDP")] A010301 = nbsc.download_data(nid="A010301")
A0203 = nbsc.download_data(nid="A0203")
``` ```
## Xinhua ## Xinhua

View File

@ -2,7 +2,7 @@ from setuptools import setup, find_packages
import os import os
setup( setup(
name = "CEDApy", name = "CEDApy",
version = "0.3.5", version = "0.3.6",
keywords = "quantitative economic data", keywords = "quantitative economic data",
long_description = open( long_description = open(
os.path.join( os.path.join(
@ -13,7 +13,7 @@ setup(
author = "TerenceCKLau", author = "TerenceCKLau",
author_email = "terenceliu1012@outlook.com", author_email = "terenceliu1012@outlook.com",
url = "https://github.com/TerenceLiu98/CEDApy", url = "https://github.com/TerenceLiu98/CEDApy",
packages = (exclude=["test", "example"]), packages = find_packages(exclude=["test", "example"]),
install_requires=[ install_requires=[
"matplotlib>=3.1.1", "matplotlib>=3.1.1",
"numpy>=1.15.4", "numpy>=1.15.4",