From 8ad645a3924b4fb5cb0930dd48f855a8d28342a6 Mon Sep 17 00:00:00 2001 From: Linlang Date: Mon, 28 Jul 2025 15:26:25 +0800 Subject: [PATCH 1/9] fix the bug that the US STMBOLS URL is faild --- scripts/data_collector/utils.py | 13 +++--------- scripts/data_collector/yahoo/collector.py | 24 +++++++++++------------ 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/scripts/data_collector/utils.py b/scripts/data_collector/utils.py index f25b1ec7a2..dd57fbb0aa 100644 --- a/scripts/data_collector/utils.py +++ b/scripts/data_collector/utils.py @@ -13,6 +13,7 @@ from pathlib import Path from typing import Iterable, Tuple, List +import akshare as ak import numpy as np import pandas as pd from loguru import logger @@ -301,16 +302,8 @@ def get_us_stock_symbols(qlib_data_path: [str, Path] = None) -> list: @deco_retry def _get_eastmoney(): - url = "http://4.push2.eastmoney.com/api/qt/clist/get?pn=1&pz=10000&fs=m:105,m:106,m:107&fields=f12" - resp = requests.get(url, timeout=None) - if resp.status_code != 200: - raise ValueError("request error") - - try: - _symbols = [_v["f12"].replace("_", "-P") for _v in resp.json()["data"]["diff"].values()] - except Exception as e: - logger.warning(f"request error: {e}") - raise + df = ak.get_us_stock_name() + _symbols = df["symbol"].to_list() if len(_symbols) < 8000: raise ValueError("request error") diff --git a/scripts/data_collector/yahoo/collector.py b/scripts/data_collector/yahoo/collector.py index d2fa0b06f7..843243e810 100644 --- a/scripts/data_collector/yahoo/collector.py +++ b/scripts/data_collector/yahoo/collector.py @@ -2,45 +2,45 @@ # Licensed under the MIT License. import abc -import sys import copy -import time import datetime import importlib -from abc import ABC import multiprocessing +import sys +import time +from abc import ABC from pathlib import Path from typing import Iterable import fire -import requests import numpy as np import pandas as pd +import requests +from dateutil.tz import tzlocal from loguru import logger from yahooquery import Ticker -from dateutil.tz import tzlocal import qlib +from qlib.constant import REG_CN as REGION_CN from qlib.data import D from qlib.tests.data import GetData -from qlib.utils import code_to_fname, fname_to_code, exists_qlib_data -from qlib.constant import REG_CN as REGION_CN +from qlib.utils import code_to_fname, exists_qlib_data, fname_to_code CUR_DIR = Path(__file__).resolve().parent sys.path.append(str(CUR_DIR.parent.parent)) -from dump_bin import DumpDataUpdate from data_collector.base import BaseCollector, BaseNormalize, BaseRun, Normalize from data_collector.utils import ( + calc_adjusted_price, deco_retry, + generate_minutes_calendar_from_daily, + get_br_stock_symbols, get_calendar_list, get_hs_stock_symbols, - get_us_stock_symbols, get_in_stock_symbols, - get_br_stock_symbols, - generate_minutes_calendar_from_daily, - calc_adjusted_price, + get_us_stock_symbols, ) +from dump_bin import DumpDataUpdate INDEX_BENCH_URL = "http://push2his.eastmoney.com/api/qt/stock/kline/get?secid=1.{index_code}&fields1=f1%2Cf2%2Cf3%2Cf4%2Cf5&fields2=f51%2Cf52%2Cf53%2Cf54%2Cf55%2Cf56%2Cf57%2Cf58&klt=101&fqt=0&beg={begin}&end={end}" From f78d6ac9643ed4a8b876d1d121f42570f44e75cf Mon Sep 17 00:00:00 2001 From: Linlang Date: Mon, 28 Jul 2025 15:34:58 +0800 Subject: [PATCH 2/9] recover code --- pyproject.toml | 1 + scripts/data_collector/yahoo/collector.py | 24 +++++++++++------------ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b902edfe29..4fd012b9e6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,6 +91,7 @@ package = [ test = [ "yahooquery", "baostock", + "akshare", ] analysis = [ "plotly", diff --git a/scripts/data_collector/yahoo/collector.py b/scripts/data_collector/yahoo/collector.py index 843243e810..d2fa0b06f7 100644 --- a/scripts/data_collector/yahoo/collector.py +++ b/scripts/data_collector/yahoo/collector.py @@ -2,45 +2,45 @@ # Licensed under the MIT License. import abc +import sys import copy +import time import datetime import importlib -import multiprocessing -import sys -import time from abc import ABC +import multiprocessing from pathlib import Path from typing import Iterable import fire +import requests import numpy as np import pandas as pd -import requests -from dateutil.tz import tzlocal from loguru import logger from yahooquery import Ticker +from dateutil.tz import tzlocal import qlib -from qlib.constant import REG_CN as REGION_CN from qlib.data import D from qlib.tests.data import GetData -from qlib.utils import code_to_fname, exists_qlib_data, fname_to_code +from qlib.utils import code_to_fname, fname_to_code, exists_qlib_data +from qlib.constant import REG_CN as REGION_CN CUR_DIR = Path(__file__).resolve().parent sys.path.append(str(CUR_DIR.parent.parent)) +from dump_bin import DumpDataUpdate from data_collector.base import BaseCollector, BaseNormalize, BaseRun, Normalize from data_collector.utils import ( - calc_adjusted_price, deco_retry, - generate_minutes_calendar_from_daily, - get_br_stock_symbols, get_calendar_list, get_hs_stock_symbols, - get_in_stock_symbols, get_us_stock_symbols, + get_in_stock_symbols, + get_br_stock_symbols, + generate_minutes_calendar_from_daily, + calc_adjusted_price, ) -from dump_bin import DumpDataUpdate INDEX_BENCH_URL = "http://push2his.eastmoney.com/api/qt/stock/kline/get?secid=1.{index_code}&fields1=f1%2Cf2%2Cf3%2Cf4%2Cf5&fields2=f51%2Cf52%2Cf53%2Cf54%2Cf55%2Cf56%2Cf57%2Cf58&klt=101&fqt=0&beg={begin}&end={end}" From 24c3fc8021e9bf945a44ddf17e663c59bdf2b83c Mon Sep 17 00:00:00 2001 From: Linlang Date: Tue, 29 Jul 2025 15:16:31 +0800 Subject: [PATCH 3/9] fix package dependence error --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 4fd012b9e6..ad429eb916 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,6 +91,7 @@ package = [ test = [ "yahooquery", "baostock", + "aiohttp>=3.11.13", "akshare", ] analysis = [ From 6e1a9069a908e6b7fa3e3b72872a65e93910507d Mon Sep 17 00:00:00 2001 From: Linlang Date: Tue, 29 Jul 2025 15:22:55 +0800 Subject: [PATCH 4/9] fix package dependence error --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ad429eb916..e6781f3888 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,7 +91,7 @@ package = [ test = [ "yahooquery", "baostock", - "aiohttp>=3.11.13", + "aiohttp>=3.11.15", "akshare", ] analysis = [ From 7ae4144c607c656f9b39c875b3d65ac42aef5f65 Mon Sep 17 00:00:00 2001 From: Linlang Date: Wed, 27 Aug 2025 16:13:29 +0800 Subject: [PATCH 5/9] fix package dependence error --- pyproject.toml | 2 -- scripts/data_collector/yahoo/requirements.txt | 3 ++- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e6781f3888..b902edfe29 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -91,8 +91,6 @@ package = [ test = [ "yahooquery", "baostock", - "aiohttp>=3.11.15", - "akshare", ] analysis = [ "plotly", diff --git a/scripts/data_collector/yahoo/requirements.txt b/scripts/data_collector/yahoo/requirements.txt index 1a58eda1f6..59f0d29101 100644 --- a/scripts/data_collector/yahoo/requirements.txt +++ b/scripts/data_collector/yahoo/requirements.txt @@ -9,4 +9,5 @@ yahooquery joblib beautifulsoup4 bs4 -soupsieve \ No newline at end of file +soupsieve +akshare \ No newline at end of file From 87ae6c67dc6f97e6e06365ec0e0cc04a09078d06 Mon Sep 17 00:00:00 2001 From: Linlang Date: Wed, 27 Aug 2025 16:32:17 +0800 Subject: [PATCH 6/9] fix package dependence error --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index fe48d090c0..cfcf0d6f97 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,6 +96,7 @@ package = [ test = [ "yahooquery", "baostock", + "akshare", ] analysis = [ "plotly", From 2b0afeba7d50c28e7af0c04d7d5c4133131eaeb8 Mon Sep 17 00:00:00 2001 From: Linlang Date: Wed, 27 Aug 2025 19:02:57 +0800 Subject: [PATCH 7/9] fix package dependence error --- pyproject.toml | 1 - scripts/data_collector/utils.py | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index cfcf0d6f97..fe48d090c0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,7 +96,6 @@ package = [ test = [ "yahooquery", "baostock", - "akshare", ] analysis = [ "plotly", diff --git a/scripts/data_collector/utils.py b/scripts/data_collector/utils.py index dd57fbb0aa..ca55500573 100644 --- a/scripts/data_collector/utils.py +++ b/scripts/data_collector/utils.py @@ -13,7 +13,6 @@ from pathlib import Path from typing import Iterable, Tuple, List -import akshare as ak import numpy as np import pandas as pd from loguru import logger @@ -298,6 +297,8 @@ def get_us_stock_symbols(qlib_data_path: [str, Path] = None) -> list: ------- stock symbols """ + import akshare as ak + global _US_SYMBOLS # pylint: disable=W0603 @deco_retry From 87b12c629a4caa3efeb2a1794bd3c4be17e77ff6 Mon Sep 17 00:00:00 2001 From: Linlang Date: Wed, 27 Aug 2025 19:09:52 +0800 Subject: [PATCH 8/9] format with black --- scripts/data_collector/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/data_collector/utils.py b/scripts/data_collector/utils.py index ca55500573..bc3fd921de 100644 --- a/scripts/data_collector/utils.py +++ b/scripts/data_collector/utils.py @@ -298,7 +298,7 @@ def get_us_stock_symbols(qlib_data_path: [str, Path] = None) -> list: stock symbols """ import akshare as ak - + global _US_SYMBOLS # pylint: disable=W0603 @deco_retry From 75600eb4282f9cfa60bc9b49adfd2a3e02b8d919 Mon Sep 17 00:00:00 2001 From: Linlang Date: Wed, 27 Aug 2025 19:17:18 +0800 Subject: [PATCH 9/9] disable pylint error --- scripts/data_collector/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/data_collector/utils.py b/scripts/data_collector/utils.py index bc3fd921de..a38520ad9b 100644 --- a/scripts/data_collector/utils.py +++ b/scripts/data_collector/utils.py @@ -297,7 +297,7 @@ def get_us_stock_symbols(qlib_data_path: [str, Path] = None) -> list: ------- stock symbols """ - import akshare as ak + import akshare as ak # pylint: disable=C0415 global _US_SYMBOLS # pylint: disable=W0603