1111from qlib .utils .time import Freq
1212from qlib .utils .resam import resam_calendar
1313from qlib .config import C
14+ from qlib .data .cache import H
1415from qlib .log import get_module_logger
1516from qlib .data .storage import CalendarStorage , InstrumentStorage , FeatureStorage , CalVT , InstKT , InstVT
1617
@@ -33,15 +34,15 @@ def support_freq(self) -> List[str]:
3334 if hasattr (self , _v ):
3435 return getattr (self , _v )
3536 if len (self .provider_uri ) == 1 and C .DEFAULT_FREQ in self .provider_uri :
36- freq = filter (
37+ freq_l = filter (
3738 lambda _freq : not _freq .endswith ("_future" ),
3839 map (lambda x : x .stem , self .dpm .get_data_uri (C .DEFAULT_FREQ ).joinpath ("calendars" ).glob ("*.txt" )),
3940 )
4041 else :
41- freq = self .provider_uri .keys ()
42- freq = list (freq )
43- setattr (self , _v , freq )
44- return freq
42+ freq_l = self .provider_uri .keys ()
43+ freq_l = [ Freq (freq ) for freq in freq_l ]
44+ setattr (self , _v , freq_l )
45+ return freq_l
4546
4647 @property
4748 def uri (self ) -> Path :
@@ -65,15 +66,28 @@ def __init__(self, freq: str, future: bool, provider_uri: dict, **kwargs):
6566 super (FileCalendarStorage , self ).__init__ (freq , future , ** kwargs )
6667 self .future = future
6768 self .provider_uri = C .DataPathManager .format_provider_uri (provider_uri )
68- self .resample_freq = None
69+ self .enable_read_cache = True # TODO: make it configurable
6970
7071 @property
7172 def file_name (self ) -> str :
72- return f"{ self .use_freq } _future.txt" if self .future else f"{ self .use_freq } .txt" .lower ()
73+ return f"{ self ._freq_file } _future.txt" if self .future else f"{ self ._freq_file } .txt" .lower ()
7374
7475 @property
75- def use_freq (self ) -> str :
76- return self .freq if self .resample_freq is None else self .resample_freq
76+ def _freq_file (self ) -> str :
77+ """the freq to read from file"""
78+ if not hasattr (self , "_freq_file_cache" ):
79+ freq = Freq (self .freq )
80+ if freq not in self .support_freq :
81+ # NOTE: uri
82+ # 1. If `uri` does not exist
83+ # - Get the `min_uri` of the closest `freq` under the same "directory" as the `uri`
84+ # - Read data from `min_uri` and resample to `freq`
85+
86+ freq = Freq .get_recent_freq (freq , self .support_freq )
87+ if freq is None :
88+ raise ValueError (f"can't find a freq from { self .support_freq } that can resample to { self .freq } !" )
89+ self ._freq_file_cache = freq
90+ return self ._freq_file_cache
7791
7892 def _read_calendar (self , skip_rows : int = 0 , n_rows : int = None ) -> List [CalVT ]:
7993 if not self .uri .exists ():
@@ -90,25 +104,21 @@ def _write_calendar(self, values: Iterable[CalVT], mode: str = "wb"):
90104
91105 @property
92106 def uri (self ) -> Path :
93- freq = self .freq
94- if freq not in self .support_freq :
95- # NOTE: uri
96- # 1. If `uri` does not exist
97- # - Get the `min_uri` of the closest `freq` under the same "directory" as the `uri`
98- # - Read data from `min_uri` and resample to `freq`
99-
100- freq = Freq .get_recent_freq (freq , self .support_freq )
101- if freq is None :
102- raise ValueError (f"can't find a freq from { self .support_freq } that can resample to { self .freq } !" )
103- self .resample_freq = freq
104- return self .dpm .get_data_uri (self .use_freq ).joinpath (f"{ self .storage_name } s" , self .file_name )
107+ return self .dpm .get_data_uri (self ._freq_file ).joinpath (f"{ self .storage_name } s" , self .file_name )
105108
106109 @property
107110 def data (self ) -> List [CalVT ]:
108111 self .check ()
109- _calendar = self ._read_calendar ()
110- if self .resample_freq is not None :
111- _calendar = resam_calendar (np .array (list (map (pd .Timestamp , _calendar ))), self .resample_freq , self .freq )
112+ # If cache is enabled, then return cache directly
113+ if self .enable_read_cache :
114+ key = "orig_file" + str (self .uri )
115+ if not key in H ["c" ]:
116+ H ["c" ][key ] = self ._read_calendar ()
117+ _calendar = H ["c" ][key ]
118+ else :
119+ _calendar = self ._read_calendar ()
120+ if Freq (self ._freq_file ) != Freq (self .freq ):
121+ _calendar = resam_calendar (np .array (list (map (pd .Timestamp , _calendar ))), self ._freq_file , self .freq )
112122 return _calendar
113123
114124 def _get_storage_freq (self ) -> List [str ]:
0 commit comments