Skip to content

Commit 3ee4674

Browse files
authored
Merge pull request ranaroussi#1302 from ranaroussi/dev
dev -> main
2 parents eacfbc4 + 5d9a91d commit 3ee4674

File tree

8 files changed

+114
-30
lines changed

8 files changed

+114
-30
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ msft.capital_gains
8484

8585
# show share count
8686
msft.shares
87+
msft.get_shares_full()
8788

8889
# show financials:
8990
# - income statement
@@ -213,8 +214,7 @@ data = yf.download( # or pdr.get_data_yahoo(...
213214
interval = "5d",
214215

215216
# Whether to ignore timezone when aligning ticker data from
216-
# different timezones. Default is True. False may be useful for
217-
# minute/hourly data.
217+
# different timezones. Default is False.
218218
ignore_tz = False,
219219

220220
# group by ticker (to access via data['SPY'])

tests/ticker.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ def test_badTicker(self):
6565
dat.splits
6666
dat.actions
6767
dat.shares
68+
dat.get_shares_full()
6869
dat.info
6970
dat.calendar
7071
dat.recommendations
@@ -100,6 +101,7 @@ def test_goodTicker(self):
100101
dat.splits
101102
dat.actions
102103
dat.shares
104+
dat.get_shares_full()
103105
dat.info
104106
dat.calendar
105107
dat.recommendations
@@ -653,6 +655,11 @@ def test_shares(self):
653655
self.assertIsInstance(data, pd.DataFrame, "data has wrong type")
654656
self.assertFalse(data.empty, "data is empty")
655657

658+
def test_shares_full(self):
659+
data = self.ticker.get_shares_full()
660+
self.assertIsInstance(data, pd.Series, "data has wrong type")
661+
self.assertFalse(data.empty, "data is empty")
662+
656663
def test_info(self):
657664
data = self.ticker.info
658665
self.assertIsInstance(data, dict, "data has wrong type")

yfinance/base.py

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
from .scrapers.fundamentals import Fundamentals
4141
from .scrapers.holders import Holders
4242
from .scrapers.quote import Quote
43+
import json as _json
4344

4445
_BASE_URL_ = 'https://query2.finance.yahoo.com'
4546
_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
@@ -1118,6 +1119,59 @@ def get_shares(self, proxy=None, as_dict=False):
11181119
return data.to_dict()
11191120
return data
11201121

1122+
def get_shares_full(self, start=None, end=None, proxy=None):
1123+
# Process dates
1124+
tz = self._get_ticker_tz(debug_mode=False, proxy=None, timeout=10)
1125+
dt_now = _pd.Timestamp.utcnow().tz_convert(tz)
1126+
if start is not None:
1127+
start_ts = utils._parse_user_dt(start, tz)
1128+
start = _pd.Timestamp.fromtimestamp(start_ts).tz_localize("UTC").tz_convert(tz)
1129+
start_d = start.date()
1130+
if end is not None:
1131+
end_ts = utils._parse_user_dt(end, tz)
1132+
end = _pd.Timestamp.fromtimestamp(end_ts).tz_localize("UTC").tz_convert(tz)
1133+
end_d = end.date()
1134+
if end is None:
1135+
end = dt_now
1136+
if start is None:
1137+
start = end - _pd.Timedelta(days=548) # 18 months
1138+
if start >= end:
1139+
print("ERROR: start date must be before end")
1140+
return None
1141+
start = start.floor("D")
1142+
end = end.ceil("D")
1143+
1144+
# Fetch
1145+
ts_url_base = "https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{0}?symbol={0}".format(self.ticker)
1146+
shares_url = ts_url_base + "&period1={}&period2={}".format(int(start.timestamp()), int(end.timestamp()))
1147+
try:
1148+
json_str = self._data.cache_get(shares_url).text
1149+
json_data = _json.loads(json_str)
1150+
except:
1151+
print(f"{self.ticker}: Yahoo web request for share count failed")
1152+
return None
1153+
try:
1154+
fail = json_data["finance"]["error"]["code"] == "Bad Request"
1155+
except:
1156+
fail = False
1157+
if fail:
1158+
print(f"{self.ticker}: Yahoo web request for share count failed")
1159+
return None
1160+
1161+
shares_data = json_data["timeseries"]["result"]
1162+
if not "shares_out" in shares_data[0]:
1163+
print(f"{self.ticker}: Yahoo did not return share count in date range {start} -> {end}")
1164+
return None
1165+
try:
1166+
df = _pd.Series(shares_data[0]["shares_out"], index=_pd.to_datetime(shares_data[0]["timestamp"], unit="s"))
1167+
except Exception as e:
1168+
print(f"{self.ticker}: Failed to parse shares count data: "+str(e))
1169+
return None
1170+
1171+
df.index = df.index.tz_localize(tz)
1172+
df = df.sort_index()
1173+
return df
1174+
11211175
def get_isin(self, proxy=None) -> Optional[str]:
11221176
# *** experimental ***
11231177
if self._isin is not None:
@@ -1254,8 +1308,8 @@ def get_earnings_dates(self, limit=12, proxy=None) -> Optional[pd.DataFrame]:
12541308
dates[cn] = _pd.to_datetime(dates[cn], format="%b %d, %Y, %I %p")
12551309
# - instead of attempting decoding of ambiguous timezone abbreviation, just use 'info':
12561310
self._quote.proxy = proxy
1257-
dates[cn] = dates[cn].dt.tz_localize(
1258-
tz=self._quote.info["exchangeTimezoneName"])
1311+
tz = self._get_ticker_tz(debug_mode=False, proxy=proxy, timeout=30)
1312+
dates[cn] = dates[cn].dt.tz_localize(tz)
12591313

12601314
dates = dates.set_index("Earnings Date")
12611315

yfinance/data.py

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,33 @@ def wrapped(*args, **kwargs):
4646
return wrapped
4747

4848

49-
def decrypt_cryptojs_aes(data):
49+
def decrypt_cryptojs_aes_stores(data):
5050
encrypted_stores = data['context']['dispatcher']['stores']
51-
_cs = data["_cs"]
52-
_cr = data["_cr"]
5351

54-
_cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"])
55-
password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex()
52+
if "_cs" in data and "_cr" in data:
53+
_cs = data["_cs"]
54+
_cr = data["_cr"]
55+
_cr = b"".join(int.to_bytes(i, length=4, byteorder="big", signed=True) for i in json.loads(_cr)["words"])
56+
password = hashlib.pbkdf2_hmac("sha1", _cs.encode("utf8"), _cr, 1, dklen=32).hex()
57+
else:
58+
# Currently assume one extra key in dict, which is password. Print error if
59+
# more extra keys detected.
60+
new_keys = [k for k in data.keys() if k not in ["context", "plugins"]]
61+
l = len(new_keys)
62+
if l == 0:
63+
return None
64+
elif l == 1 and isinstance(data[new_keys[0]], str):
65+
password_key = new_keys[0]
66+
else:
67+
msg = "Yahoo has again changed data format, yfinance now unsure which key(s) is for decryption:"
68+
k = new_keys[0]
69+
k_str = k if len(k) < 32 else k[:32-3]+"..."
70+
msg += f" '{k_str}'->{type(data[k])}"
71+
for i in range(1, len(new_keys)):
72+
msg += f" , '{k_str}'->{type(data[k])}"
73+
raise Exception(msg)
74+
password_key = new_keys[0]
75+
password = data[password_key]
5676

5777
encrypted_stores = b64decode(encrypted_stores)
5878
assert encrypted_stores[0:8] == b"Salted__"
@@ -98,7 +118,10 @@ def EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="m
98118
key, iv = key_iv[:keySize], key_iv[keySize:final_length]
99119
return key, iv
100120

101-
key, iv = EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5")
121+
try:
122+
key, iv = EVPKDF(password, salt, keySize=32, ivSize=16, iterations=1, hashAlgorithm="md5")
123+
except:
124+
raise Exception("yfinance failed to decrypt Yahoo data response")
102125

103126
if usePycryptodome:
104127
cipher = AES.new(key, AES.MODE_CBC, iv=iv)
@@ -176,15 +199,16 @@ def get_json_data_stores(self, sub_page: str = None, proxy=None) -> dict:
176199

177200
data = json.loads(json_str)
178201

179-
if "_cs" in data and "_cr" in data:
180-
data = decrypt_cryptojs_aes(data)
181-
182-
if "context" in data and "dispatcher" in data["context"]:
183-
# Keep old code, just in case
184-
data = data['context']['dispatcher']['stores']
202+
stores = decrypt_cryptojs_aes_stores(data)
203+
if stores is None:
204+
# Maybe Yahoo returned old format, not encrypted
205+
if "context" in data and "dispatcher" in data["context"]:
206+
stores = data['context']['dispatcher']['stores']
207+
if stores is None:
208+
raise Exception(f"{self.ticker}: Failed to extract data stores from web request")
185209

186210
# return data
187-
new_data = json.dumps(data).replace('{}', 'null')
211+
new_data = json.dumps(stores).replace('{}', 'null')
188212
new_data = re.sub(
189213
r'{[\'|\"]raw[\'|\"]:(.*?),(.*?)}', r'\1', new_data)
190214

yfinance/multi.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from . import shared
3030

3131

32-
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=True,
32+
def download(tickers, start=None, end=None, actions=False, threads=True, ignore_tz=False,
3333
group_by='column', auto_adjust=False, back_adjust=False, repair=False, keepna=False,
3434
progress=True, period="max", show_errors=True, interval="1d", prepost=False,
3535
proxy=None, rounding=False, timeout=10):
@@ -68,7 +68,7 @@ def download(tickers, start=None, end=None, actions=False, threads=True, ignore_
6868
How many threads to use for mass downloading. Default is True
6969
ignore_tz: bool
7070
When combining from different timezones, ignore that part of datetime.
71-
Default is True
71+
Default is False
7272
proxy: str
7373
Optional. Proxy server URL scheme. Default is None
7474
rounding: bool

yfinance/scrapers/fundamentals.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -195,7 +195,7 @@ def get_financials_time_series(self, timescale, keys: list, proxy=None) -> pd.Da
195195
url = ts_url_base + "&type=" + ",".join([timescale + k for k in keys])
196196
# Yahoo returns maximum 4 years or 5 quarters, regardless of start_dt:
197197
start_dt = datetime.datetime(2016, 12, 31)
198-
end = (datetime.datetime.now() + datetime.timedelta(days=366))
198+
end = pd.Timestamp.utcnow().ceil("D")
199199
url += "&period1={}&period2={}".format(int(start_dt.timestamp()), int(end.timestamp()))
200200

201201
# Step 3: fetch and reshape data

yfinance/scrapers/quote.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,11 @@ def _scrape_complementary(self, proxy):
194194
for k in keys:
195195
url += "&type=" + k
196196
# Request 6 months of data
197-
url += "&period1={}".format(
198-
int((datetime.datetime.now() - datetime.timedelta(days=365 // 2)).timestamp()))
199-
url += "&period2={}".format(int((datetime.datetime.now() + datetime.timedelta(days=1)).timestamp()))
197+
start = pd.Timestamp.utcnow().floor("D") - datetime.timedelta(days=365 // 2)
198+
start = int(start.timestamp())
199+
end = pd.Timestamp.utcnow().ceil("D")
200+
end = int(end.timestamp())
201+
url += f"&period1={start}&period2={end}"
200202

201203
json_str = self._data.cache_get(url=url, proxy=proxy).text
202204
json_data = json.loads(json_str)

yfinance/utils.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -607,7 +607,7 @@ def _reindex_events(df, new_index, data_col_name):
607607
if interval.endswith('m') or interval.endswith('h') or interval == "1d":
608608
# Update: is possible with daily data when dividend very recent
609609
f_missing = ~df_sub.index.isin(df.index)
610-
df_sub_missing = df_sub[f_missing]
610+
df_sub_missing = df_sub[f_missing].copy()
611611
keys = {"Adj Open", "Open", "Adj High", "High", "Adj Low", "Low", "Adj Close",
612612
"Close"}.intersection(df.columns)
613613
df_sub_missing[list(keys)] = _np.nan
@@ -743,8 +743,10 @@ class _TzCache:
743743
"""Simple sqlite file cache of ticker->timezone"""
744744

745745
def __init__(self):
746-
self._tz_db = None
747746
self._setup_cache_folder()
747+
# Must init db here, where is thread-safe
748+
self._tz_db = _KVStore(_os.path.join(self._db_dir, "tkr-tz.db"))
749+
self._migrate_cache_tkr_tz()
748750

749751
def _setup_cache_folder(self):
750752
if not _os.path.isdir(self._db_dir):
@@ -776,11 +778,6 @@ def _db_dir(self):
776778

777779
@property
778780
def tz_db(self):
779-
# lazy init
780-
if self._tz_db is None:
781-
self._tz_db = _KVStore(_os.path.join(self._db_dir, "tkr-tz.db"))
782-
self._migrate_cache_tkr_tz()
783-
784781
return self._tz_db
785782

786783
def _migrate_cache_tkr_tz(self):

0 commit comments

Comments
 (0)