covid19datahub
diff --git a/‎covid19dh/__init__.py‎
Lines changed: 2 additions & 2 deletions b/‎covid19dh/__init__.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎covid19dh/_cache.py‎
Lines changed: 17 additions & 7 deletions b/‎covid19dh/_cache.py‎
Lines changed: 17 additions & 7 deletions
diff --git a/‎covid19dh/_cite.py‎
Lines changed: 42 additions & 35 deletions b/‎covid19dh/_cite.py‎
Lines changed: 42 additions & 35 deletions
@@ -2,7 +2,7 @@
 """Unified data hub for a better understanding of COVID-19.
 
 For more information check README.md.
- 
+
 Reference: https://covid19datahub.io/
 Todo:
     * caching
@@ -13,5 +13,5 @@
 
 try:
     __version__ = pkg_resources.get_distribution("covid19dh").version
-except:
+except Exception:
     __version__ = None
@@ -1,6 +1,7 @@
 
 # ======== data cache =========
-_cache = {} # data
+_cache = {}  # data
+
 
 def _construct_cache_id(level, dt, raw, vintage):
     cache_id = f"{level}"
@@ -10,32 +11,41 @@ def _construct_cache_id(level, dt, raw, vintage):
         cache_id += dt.strftime("%Y-%m-%d")
     return cache_id
 
+
 def read_cache(level, dt, raw, vintage):
     cache_id = _construct_cache_id(level=level, dt=dt, raw=raw, vintage=vintage)
     try:
         return _cache[cache_id]
-    except:
+    except Exception:
         return None
+
 def write_cache(x, level, dt, raw, vintage):
     cache_id = _construct_cache_id(level=level, dt=dt, raw=raw, vintage=vintage)
     _cache[cache_id] = x
 
+
 # ========= src cache ==========
-_cache_src = {} # src
+_cache_src = {}  # src
+
+
 def _construct_src_cache_id(dt, vintage):
     cache_id = "src"
     if vintage:
         cache_id += dt.strftime("%Y-%m-%d")
     return cache_id
-    
+
+
 def read_src_cache(dt, vintage):
     cache_id = _construct_src_cache_id(dt=dt, vintage=vintage)
     try:
         return _cache_src[cache_id]
-    except:
+    except Exception:
         return None
+
+
 def write_src_cache(src, dt, vintage):
     cache_id = _construct_src_cache_id(dt=dt, vintage=vintage)
     _cache_src[cache_id] = src
-    
-__all__ = ["read_cache", "write_cache", "read_src_cache", "write_src_cache"]
+
+
+__all__ = ["read_cache", "write_cache", "read_src_cache", "write_src_cache"]
@@ -7,27 +7,29 @@
 import pandas as pd
 import requests
 
+
 def get_sources():
     url = 'https://storage.covid19datahub.io/src.csv'
     response = requests.get(url) # headers={'User-Agent': 'Mozilla/5.0'}
     return pd.read_csv( StringIO(response.text))
 
+
 def sources_to_citations(sources):
     # shorten URL
     sources.url = sources.url.apply(
         lambda u: re.sub(
             r"(http://|https://|www\\.)([^/]+)(.*)",
             r"\1\2/",
-            u )
+            u)
         )
     # remove duplicit
-    unique_references = sources.groupby(["title","author","institution","url","textVersion","bibtype"]) 
-    
+    unique_references = sources.groupby(["title","author","institution","url","textVersion","bibtype"])
+
     # format
     citations = []
-    for n,g in unique_references:
+    for n, g in unique_references:
         for i in range(1):
-            (title,author,institution,url,textVersion,bibtype) = n
+            (title, author, institution, url, textVersion, bibtype) = n
             year = g.year.max()
 
             if textVersion:
@@ -55,53 +57,57 @@ def sources_to_citations(sources):
                 else:
                     post += "."
                 citation = f"{pre} ({year}), {post}"
-        
+
             citations.append(citation)
     return citations
 
-def cite(x, verbose = True, sources = None):
+
+def cite(x: pd.DataFrame, verbose: bool = True, sources: bool = None):
     # all sources if missing
     if sources is None:
         sources = get_sources()
-    
+
     # per iso
     references = pd.DataFrame(data=None, columns=sources.columns)
-    for iso,country in x.groupby(["iso_alpha_3"]):
+    for (iso,), country in x.groupby(["iso_alpha_3"]):
         # levels
         level = country.administrative_area_level.unique()[0]
         # empty attributes
         empty_params = country.apply(lambda c: c.isnull().all() | (c == 0).all())
         params = x.columns[~empty_params]
-        
+
         # filter
         src = sources[
-            (sources.administrative_area_level == level) & # level 
+            (sources.administrative_area_level == level) & # level
             (sources.iso_alpha_3 == iso) & # iso
             sources.data_type.isin(params) # data type
         ]
         # fallback for missing
         missing = set(params) - set(src.data_type.unique())
         if missing:
-            src = src.append(sources[
-                sources.data_type.isin(missing) & # data type
-                sources.iso_alpha_3.isnull() & # empty ISO
-                sources.administrative_area_level.isnull() # empty level
+            src = pd.concat([
+                src,
+                sources[
+                    sources.data_type.isin(missing) & # data type
+                    sources.iso_alpha_3.isnull() & # empty ISO
+                    sources.administrative_area_level.isnull() # empty level
+                ]
             ])
-            
+
         # set iso,level
         src.iso_alpha_3 = iso
         src.administrative_area_level = level
-        
+
         # join
-        references = references.append(src)
-    
-    references.drop_duplicates(inplace = True)
-            
+        references = pd.concat([references, src])
+
+    references.drop_duplicates(inplace=True)
+
     return references
-        
-        
-    
-    
+
+
+
+
     # ===
     # hash data stats
     params = set(x.columns)
@@ -112,36 +118,37 @@ def cite(x, verbose = True, sources = None):
         sources["iso_alpha_3"].isin(isos) &
         sources["data_type"].isin(params) ]
     sources = sources.fillna("")
-    
+
     # filter
     def is_source_used(ref):
         # data type not present
         if not ref['data_type'] in params: return False
         # fallbacks
         if not ref['iso_alpha_3'] or not ref['administrative_area_level']: return True
-        
+
         # check both equal
         return ((x.iso_alpha_3 == ref.iso_alpha_3) & (x.administrative_area_level == ref.administrative_area_level)).any()
-    
+
     sources = sources[sources.apply(is_source_used, axis=1)]
-    
+
     # drop fallback
     for p in params:
         non_fallback = (sources.data_type == p) & (sources.iso_alpha_3 != '')
         no_data = (x[p].isnull() | (x[p] == 0))
         fallback = (sources.data_type == p) & (sources.iso_alpha_3 == '')
         if non_fallback.any() or no_data.all():
             sources.drop(fallback.index[fallback].tolist(), inplace=True)
-    
+
     #citations = sources_to_citations(sources)
-    
+
     #if verbose:
     #    print("\033[1mData References:\033[0m\n", end="")
     #    for ref in citations:
     #        print("\t" + ref, end="\n\n")
     #    print("\033[33mTo hide the data sources use 'verbose = False'.\033[0m")
-    
-    sources.replace(r'^\s*$', math.nan, regex=True, inplace=True)  
+
+    sources.replace(r'^\s*$', math.nan, regex=True, inplace=True)
     return sources
-        
-__all__ = ["cite","get_sources"]
+
+
+__all__ = ["cite", "get_sources"]