Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions code/vocab_funcs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
from rdflib import Graph, Namespace
from rdflib.compare import graph_diff
from rdflib.term import Literal, URIRef, BNode
from rdflib.term import BNode, Literal
from vocab_management import *


Expand Down Expand Up @@ -239,7 +237,7 @@ def construct_source(item, data, namespace, header):
label = label.replace('(', '', 1)
if url.endswith(')'):
url = url[::-1].replace(')', '', 1)[::-1] # reverse string
node = BNode()
node = BNode(hash_id(f'{label}-{url}', 'web-', 16, 16))
triples.append((node, RDF.type, SCHEMA.WebPage))
triples.append((node, SCHEMA.name, Literal(label)))
triples.append((node, SCHEMA.url, Literal(url)))
Expand Down
39 changes: 30 additions & 9 deletions code/vocab_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@
'''Data and configurations for vocabulary management'''

import csv
from rdflib import Namespace, BNode, Literal

import hashlib
import logging
import re
import unicodedata

from rdflib import BNode, Literal, Namespace

logging.basicConfig(
level=logging.DEBUG, format='%(levelname)s - %(funcName)s :: %(lineno)d - %(message)s')
DEBUG = logging.debug
Expand Down Expand Up @@ -109,7 +113,7 @@

# Root folder to import RDF files from
IMPORT_PATH = f'../{DPV_VERSION}'
# Root folder to export HTML filese to
# Root folder to export HTML files to
EXPORT_PATH = f'../{DPV_VERSION}'
# Root folder where Jinja2 templates are stored
TEMPLATE_PATH = './jinja2_resources'
Expand Down Expand Up @@ -2679,6 +2683,20 @@ def generate_authors_affiliations(authors):
return authors


def hash_id(input_text: str, output_prefix: str, output_slug_len:int, output_hash_len: int) -> str:
"""Takes an input text and generates a hash-based identifier, with a prefix if provided.
Example:
>>> generate_hash_id("Unabhängige Landeszentrum", "org-", 5, 8)
'org-Unabhäng-41451ff1'
"""
input_text = unicodedata.normalize("NFKC", input_text.strip())
slug = re.sub(r"[^a-zA-Z0-9\u00C0-\u00FF-]", "", input_text, flags=re.UNICODE)[:output_slug_len]
# add version to make ID stable only wihtin the version
input_text = f"{DPV_VERSION}{input_text}"
output_hash = hashlib.sha256(input_text.encode()).hexdigest()[:output_hash_len]
return f"{output_prefix}{slug}-{output_hash}"


def _person_slugify():
'''person is a string, slugify means make it IRI compatible'
- e.g. 'Harshvardhan J. Pandit' should be 'HarshvardhanJPandit'
Expand All @@ -2692,11 +2710,16 @@ def _helper(person_name):
nonlocal people
person_name = person_name.strip()
person = person_name.replace(',','').replace('.','').replace(' ','')
# if person_name.startswith('n')
if person in people:
return people[person]
bnode_person = BNode()
bnode_org = BNode()

org_name = generate_author_affiliation(person_name)
if not org_name:
raise Exception(f"{person_name} org is empty!")

bnode_person = BNode(hash_id(f"{person_name}{org_name}", "person-", 8, 8))
bnode_org = BNode( hash_id(org_name, "org-", 8, 8))

triples = []
triples.append((bnode_person, RDF.type, FOAF.Person))
triples.append((bnode_person, RDF.type, DCT.Agent))
Expand All @@ -2706,9 +2729,7 @@ def _helper(person_name):
if generate_author_website(person_name):
triples.append((bnode_person, FOAF.homepage, Literal(generate_author_website(person_name))))
triples.append((bnode_org, RDF.type, FOAF.Organization))
triples.append((bnode_org, FOAF.name, Literal(generate_author_affiliation(person_name))))
if not generate_author_affiliation(person_name):
raise Exception(f"{person_name} org is empty!")
triples.append((bnode_org, FOAF.name, Literal(org_name)))
triples.append((bnode_person, ORG.memberOf, bnode_org))
people[person] = {
'person': bnode_person,
Expand Down