From fb6552598189996dd2996dbeb6aa0c1b44e553e9 Mon Sep 17 00:00:00 2001 From: TerenceLau Date: Thu, 14 Apr 2022 05:48:32 +0000 Subject: [PATCH] v0.3.6 --- CEDA/__init__.py | 2 +- CEDA/economic/NBSC.py | 39 +++++++++++++++++---------------------- example/economic.md | 5 ++--- setup.py | 4 ++-- 4 files changed, 22 insertions(+), 28 deletions(-) diff --git a/CEDA/__init__.py b/CEDA/__init__.py index ceb8158..b9d6132 100644 --- a/CEDA/__init__.py +++ b/CEDA/__init__.py @@ -1,7 +1,7 @@ import sys import os -__version__ = "0.3.5" +__version__ = "0.3.6" __author__ = "Terence Lau" diff --git a/CEDA/economic/NBSC.py b/CEDA/economic/NBSC.py index 822aca3..65dc8bc 100644 --- a/CEDA/economic/NBSC.py +++ b/CEDA/economic/NBSC.py @@ -18,7 +18,7 @@ requests.packages.urllib3.disable_warnings(InsecureRequestWarning) class NBSCData(object): - def __init__(self, language:str="en"): + def __init__(self, language:str="en", period:str="monthly"): self.dbcode = [] self.nid = [] @@ -26,6 +26,7 @@ class NBSCData(object): self.name = [] self.wdcode= [] + if language == "cn": self.url = "https://data.stats.gov.cn/easyquery.htm" self.BASE_DIR = os.path.dirname(__file__) @@ -34,6 +35,13 @@ class NBSCData(object): self.url = "https://data.stats.gov.cn/english/easyquery.htm" self.BASE_DIR = os.path.dirname(__file__) self.__TREE_PATH__ = os.path.join(self.BASE_DIR, "NBSCTree", "data_en.pkl") + + if period == "monthly": + self.dbcode_query = "hgyd" + elif period == "quarterly": + self.dbcode = "hgjd" + elif period == "annual": + self.dbcode = "hgnd" def generate_header(self): ua = UserAgent() @@ -46,7 +54,7 @@ class NBSCData(object): inspired by a blog: https://www.cnblogs.com/wang_yb/p/14636575.html """ parent = [] - r = requests.post("{}?id={}&dbcode=hgnd&wdcode=zb&m=getTree".format(self.url, rid), headers=self.generate_header(), verify=False) + r = requests.post("{}?id={}&dbcode={}&wdcode=zb&m=getTree".format(self.url, rid, self.dbcode_query), headers=self.generate_header(), verify=False) data = r.json() for i in range(0, len(data)): @@ -70,7 +78,7 @@ class NBSCData(object): """ inspired by a blog: https://www.cnblogs.com/wang_yb/p/14636575.html """ - for i in range(0, len(nodes)): + for i in tqdm(range(0, len(nodes))): node = nodes[i] if node["isParent"]: self.toc(node["children"]) @@ -87,24 +95,18 @@ class NBSCData(object): - def download_data(self, nid:str=None, sj="1978-", period:str="monthly"): - - if period == "monthly": - dbcode="hgyd" - elif period == "quarterly": - dbcode="hgjd" - elif period == "annual": - dbcode="hgnd" + def download_data(self, nid:str=None, sj="1978-"): params = { "m": "QueryData", - "dbcode": dbcode, + "dbcode": self.dbcode_query, "rowcode": "zb", "colcode": "sj", "wds": "[]", "dfwds": '[{"wdcode":"zb","valuecode":"' + nid + '"},{"wdcode":"sj","valuecode":"' + + sj + '"}]', "sj": sj } @@ -117,19 +119,12 @@ class NBSCData(object): value.append(data[i]["data"]["data"]) output = pd.DataFrame({"date":date, "value":value}) + output = output.drop_duplicates("date", keep="first") return output if __name__ == "__main__": - nbsc = NBSCData(language="en") + nbsc = NBSCData(language="en", period="annual") nodes = nbsc.tree_generation() toc = nbsc.toc(nodes=nodes) toc[toc["name"].str.contains("GDP")] - data = nbsc.download_data(nid="A0203") - - - - - - - - \ No newline at end of file + data = nbsc.download_data(nid="A0203") \ No newline at end of file diff --git a/example/economic.md b/example/economic.md index 1317db7..d4eb267 100644 --- a/example/economic.md +++ b/example/economic.md @@ -43,11 +43,10 @@ data = oecd.download_data(dataset="QNA", query="QNA/CAN.B1_GE.CQRSA.Q") ```python from CEDA.economic.NBSC import * -nbsc = NBSCData(language="en") +nbsc = NBSCData(language="en", period="monthly") nbsc_nodes = nbsc.tree_generation() nbsc_toc = nbsc.toc(nodes=nbsc_nodes) -nbsc_toc[nbsc_toc["name"].str.contains("GDP")] -A0203 = nbsc.download_data(nid="A0203") +A010301 = nbsc.download_data(nid="A010301") ``` ## Xinhua diff --git a/setup.py b/setup.py index a65445b..44a1da5 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages import os setup( name = "CEDApy", - version = "0.3.5", + version = "0.3.6", keywords = "quantitative economic data", long_description = open( os.path.join( @@ -13,7 +13,7 @@ setup( author = "TerenceCKLau", author_email = "terenceliu1012@outlook.com", url = "https://github.com/TerenceLiu98/CEDApy", - packages = (exclude=["test", "example"]), + packages = find_packages(exclude=["test", "example"]), install_requires=[ "matplotlib>=3.1.1", "numpy>=1.15.4",