Source code for renku.core.models.provenance.agents

# -*- coding: utf-8 -*-
#
# Copyright 2018-2021- Swiss Data Science Center (SDSC)
# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and
# Eidgenössische Technische Hochschule Zürich (ETHZ).
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Represent provenance agents."""

import re
import uuid
from urllib.parse import quote

from calamus.schema import JsonLDSchema
from marshmallow import EXCLUDE

from renku.core.models.calamus import StringList, fields, prov, rdfs, schema, wfprov
from renku.core.models.git import get_user_info
from renku.core.utils.urls import get_host
from renku.version import __version__, version_url


[docs]class Person: """Represent a person.""" def __init__( self, *, affiliation: str = None, alternate_name: str = None, email: str = None, id: str = None, label: str = None, name: str, ): self.validate_email(email) self.affiliation: str = affiliation self.alternate_name: str = alternate_name self.email: str = email self.id: str = id self.label: str = label or name self.name: str = name # handle the case where ids were improperly set if self.id == "mailto:None" or not self.id or self.id.startswith("_:"): self.id = Person.generate_id(self.email, self.full_identity, hostname=get_host(client=None)) def __eq__(self, other): if self is other: return True if not isinstance(other, Person): return False return self.id == other.id and self.full_identity == other.full_identity def __hash__(self): return hash((self.id, self.full_identity))
[docs] @staticmethod def generate_id(email, full_identity, hostname): """Generate identifier for Person.""" if email: return f"mailto:{email}" id = full_identity or str(uuid.uuid4()) id = quote(id, safe="") # TODO: Remove hostname part once migrating to new metadata return f"https://{hostname}/persons/{id}"
[docs] @staticmethod def validate_email(email): """Check that the email is valid.""" if not email: return if not isinstance(email, str) or not re.match(r"[^@]+@[^@]+\.[^@]+", email): raise ValueError("Email address is invalid.")
[docs] @classmethod def from_commit(cls, commit): """Create an instance from a Git commit.""" return cls(name=commit.author.name, email=commit.author.email)
@property def short_name(self): """Gives full name in short form.""" names = self.name.split() if len(names) == 1: return self.name last_name = names[-1] initials = [name[0] for name in names] initials.pop() return "{0}.{1}".format(".".join(initials), last_name) @property def full_identity(self): """Return name, email, and affiliation.""" email = f" <{self.email}>" if self.email else "" affiliation = f" [{self.affiliation}]" if self.affiliation else "" return f"{self.name}{email}{affiliation}"
[docs] @classmethod def from_git(cls, git): """Create an instance from a Git repo.""" name, email = get_user_info(git) return cls(email=email, name=name)
[docs] @classmethod def from_string(cls, string): """Create an instance from a 'Name <email>' string.""" regex_pattern = r"([^<>\[\]]*)" r"(?:<{1}\s*(\S+@\S+\.\S+){0,1}\s*>{1}){0,1}\s*" r"(?:\[{1}(.*)\]{1}){0,1}" name, email, affiliation = re.search(regex_pattern, string).groups() if name: name = name.strip() if affiliation: affiliation = affiliation.strip() affiliation = affiliation or None return cls(affiliation=affiliation, email=email, name=name)
[docs] @classmethod def from_dict(cls, data): """Create and instance from a dictionary.""" return cls(**data)
[docs] @classmethod def from_jsonld(cls, data): """Create an instance from JSON-LD data.""" if isinstance(data, cls): return data if not isinstance(data, dict): raise ValueError(data) return PersonSchema().load(data)
class PersonSchema(JsonLDSchema): """Person schema.""" class Meta: """Meta class.""" rdf_type = [prov.Person, schema.Person] model = Person unknown = EXCLUDE affiliation = StringList(schema.affiliation, missing=None) alternate_name = StringList(schema.alternateName, missing=None) email = fields.String(schema.email, missing=None) id = fields.Id() label = StringList(rdfs.label, missing=None) name = StringList(schema.name, missing=None)
[docs]class SoftwareAgent: """Represent executed software.""" def __init__(self, *, id: str, label: str): self.id: str = id self.label: str = label def __eq__(self, other): if self is other: return True if not isinstance(other, SoftwareAgent): return False return self.id == other.id and self.label == other.label def __hash__(self): return hash((self.id, self.label))
[docs] @classmethod def from_commit(cls, commit): """Create an instance from a Git commit.""" # FIXME: This method can return a Person object but SoftwareAgent is not its super class author = Person.from_commit(commit) if commit.author != commit.committer: return cls(label=commit.committer.name, id=commit.committer.email) return author
# set up the default agent renku_agent = SoftwareAgent(label="renku {0}".format(__version__), id=version_url) class SoftwareAgentSchema(JsonLDSchema): """SoftwareAgent schema.""" class Meta: """Meta class.""" rdf_type = [prov.SoftwareAgent, wfprov.WorkflowEngine] model = SoftwareAgent unknown = EXCLUDE label = fields.String(rdfs.label) id = fields.Id()