Skip to content

Tabular Tags

nyckel.TabularTagsFunction

Example:

from nyckel import Credentials, TabularTagsFunction, TabularTagsSample, TagsAnnotation, TabularFunctionField

credentials = Credentials(client_id="...", client_secret="...")

func = TabularTagsFunction.create("NewsTopics", credentials)
func.create_fields([
    TabularFunctionField(type="Text", name="Title"),
    TabularFunctionField(type="Text", name="Abstract")
])
func.create_samples([
    TabularTagsSample(data={"Title": "New restaurant in SOHO", "Abstract": "This is the best..."}, annotation=[TagsAnnotation("Food"), TagsAnnotation("Reviews")]),
    TabularTagsSample(data={"Title": "Belly-up still going strong", "Abstract": "The Belly-Up tavern in Solana..."}, annotation=[TagsAnnotation("Music"), TagsAnnotation("Reviews")]),
    TabularTagsSample(data={"Title": "Carbonara at its best", "Abstract": "Here is how to make the best..."}, annotation=[TagsAnnotation("Food")]),
    TabularTagsSample(data={"Title": "New album out!", "Abstract": "Taylor swift just released ..."}, annotation=[TagsAnnotation("Music")]),
])

predictions = func.invoke([{"Title": "Swedish meatballs: the best recipe", "Abstract": "This age-old Swedish classic ..."}])
Source code in src/nyckel/functions/tags/tabular_tags.py
class TabularTagsFunction(TabularTagsFunctionInterface):
    """
    Example:

    ```py

    from nyckel import Credentials, TabularTagsFunction, TabularTagsSample, TagsAnnotation, TabularFunctionField

    credentials = Credentials(client_id="...", client_secret="...")

    func = TabularTagsFunction.create("NewsTopics", credentials)
    func.create_fields([
        TabularFunctionField(type="Text", name="Title"),
        TabularFunctionField(type="Text", name="Abstract")
    ])
    func.create_samples([
        TabularTagsSample(data={"Title": "New restaurant in SOHO", "Abstract": "This is the best..."}, annotation=[TagsAnnotation("Food"), TagsAnnotation("Reviews")]),
        TabularTagsSample(data={"Title": "Belly-up still going strong", "Abstract": "The Belly-Up tavern in Solana..."}, annotation=[TagsAnnotation("Music"), TagsAnnotation("Reviews")]),
        TabularTagsSample(data={"Title": "Carbonara at its best", "Abstract": "Here is how to make the best..."}, annotation=[TagsAnnotation("Food")]),
        TabularTagsSample(data={"Title": "New album out!", "Abstract": "Taylor swift just released ..."}, annotation=[TagsAnnotation("Music")]),
    ])

    predictions = func.invoke([{"Title": "Swedish meatballs: the best recipe", "Abstract": "This age-old Swedish classic ..."}])
    ```
    """

    def __init__(self, function_id: NyckelId, credentials: Credentials):
        self._function_id = function_id

        self._function_handler = TagsFunctionHandler(function_id, credentials)
        self._label_handler = ClassificationLabelHandler(function_id, credentials)
        self._url_handler = TagsFunctionURLHandler(function_id, credentials.server_url)
        self._sample_handler = TagsSampleHandler(function_id, credentials)
        self._field_handler = TabularFieldHandler(function_id, credentials)

        assert self._function_handler.get_input_modality() == "Tabular"

    @property
    def function_id(self) -> NyckelId:
        return self._function_id

    @property
    def sample_count(self) -> int:
        return self._function_handler.sample_count

    @property
    def label_count(self) -> int:
        return self._function_handler.label_count

    @property
    def name(self) -> str:
        return self._function_handler.get_name()

    @classmethod
    def create(cls, name: str, credentials: Credentials) -> "TabularTagsFunction":
        return tags_function_factory.TagsFunctionFactory().create(name, "Tabular", credentials)  # type:ignore

    def delete(self) -> None:
        self._function_handler.delete()

    def invoke(self, sample_data_list: List[TabularSampleData]) -> List[TagsPrediction]:
        return self._sample_handler.invoke(sample_data_list, self._get_image_field_transformer("name"))

    def has_trained_model(self) -> bool:
        return self._function_handler.is_trained

    def create_labels(self, labels: Sequence[Union[ClassificationLabel, str]]) -> List[NyckelId]:
        typed_labels = [
            label if isinstance(label, ClassificationLabel) else ClassificationLabel(name=label) for label in labels
        ]
        return self._label_handler.create_labels(typed_labels)

    def list_labels(self) -> List[ClassificationLabel]:
        return self._label_handler.list_labels(self.label_count)

    def read_label(self, label_id: NyckelId) -> ClassificationLabel:
        return self._label_handler.read_label(label_id)

    def update_label(self, label: ClassificationLabel) -> ClassificationLabel:
        return self._label_handler.update_label(label)

    def delete_labels(self, label_ids: List[NyckelId]) -> None:
        self._label_handler.delete_labels(label_ids)

    def create_fields(self, fields: List[TabularFunctionField]) -> List[NyckelId]:
        return self._field_handler.create_fields(fields)

    def list_fields(self) -> List[TabularFunctionField]:
        return self._field_handler.list_fields()

    def read_field(self, field_id: NyckelId) -> TabularFunctionField:
        return self._field_handler.read_field(field_id)

    def delete_field(self, field_id: NyckelId) -> None:
        return self._field_handler.delete_field(field_id)

    def create_samples(self, samples: Sequence[Union[TabularTagsSample, TabularSampleData]]) -> List[NyckelId]:
        if len(samples) == 0:
            return []

        typed_samples = self._wrangle_post_samples_input(samples)
        typed_samples = self._strip_label_names(typed_samples)
        self._assert_fields_created(typed_samples)
        self._create_labels_as_needed(typed_samples)

        # For large tabular functions, the POST samples API does not support field names. So we need to switch to IDs.
        typed_samples = self._switch_field_names_to_field_ids(typed_samples)
        return self._sample_handler.create_samples(typed_samples, self._get_image_field_transformer())

    def _wrangle_post_samples_input(
        self, samples: Sequence[Union[TabularTagsSample, TabularSampleData]]
    ) -> List[TabularTagsSample]:
        typed_samples: List[TabularTagsSample] = []
        for sample in samples:
            if isinstance(sample, TabularTagsSample):
                typed_samples.append(sample)
            elif isinstance(sample, dict):
                typed_samples.append(TabularTagsSample(data=sample))
            else:
                raise ValueError(f"Sample {sample} has invalid type: {type(sample)}")
        return typed_samples

    def _strip_label_names(self, samples: List[TabularTagsSample]) -> List[TabularTagsSample]:
        for sample in samples:
            if sample.annotation:
                for entry in sample.annotation:
                    entry.label_name = entry.label_name.strip()
        return samples

    def _assert_fields_created(self, samples: List[TabularTagsSample]) -> None:
        existing_fields = self.list_fields()
        existing_field_names = {field.name for field in existing_fields}
        new_field_names = {field_name for sample in samples for field_name in sample.data.keys()}
        missing_field_names = new_field_names - existing_field_names
        assert len(missing_field_names) == 0, f"Fields not created: {missing_field_names=}. Please create fields first."

    def _create_labels_as_needed(self, samples: List[TabularTagsSample]) -> None:
        existing_labels = self._label_handler.list_labels(None)
        existing_label_names = {label.name for label in existing_labels}
        new_label_names: set = set()
        for sample in samples:
            if sample.annotation:
                new_label_names |= {annotation.label_name for annotation in sample.annotation}
        missing_label_names = new_label_names - existing_label_names
        missing_labels = [ClassificationLabel(name=label_name) for label_name in missing_label_names]
        if len(missing_labels) > 0:
            self._label_handler.create_labels(missing_labels)

    def _get_image_field_transformer(self, field_identifier: str = "id") -> Callable:
        fields = self.list_fields()
        image_field_transformer = lambda x: x  # noqa: E731
        for field in fields:
            if field.type == "Image":
                # There is only one image field (max) per function, so we can break here.
                if field_identifier == "id":
                    assert field.id is not None
                    image_field_transformer = ImageFieldTransformer(field.id)
                elif field_identifier == "name":
                    image_field_transformer = ImageFieldTransformer(field.name)
                break
        return image_field_transformer

    def _switch_field_names_to_field_ids(self, samples: List[TabularTagsSample]) -> List[TabularTagsSample]:
        samples = copy.deepcopy(samples)  # Deep-copy so we don't modify the callers input.
        fields = self.list_fields()
        field_id_by_name = {field.name: field.id for field in fields}
        for sample in samples:
            field_names = list(sample.data.keys())
            for field_name in field_names:
                field_value = sample.data.pop(field_name)
                sample.data[field_id_by_name[field_name]] = field_value  # type: ignore
        return samples

    def list_samples(self) -> List[TabularTagsSample]:
        samples_dict_list = self._sample_handler.list_samples(self.sample_count)
        labels = self._label_handler.list_labels(None)
        fields = self.list_fields()

        label_name_by_id = {label.id: label.name for label in labels}
        field_name_by_id = {field.id: field.name for field in fields}  # type: ignore

        return [self._sample_from_dict(entry, label_name_by_id, field_name_by_id) for entry in samples_dict_list]  # type: ignore # noqa: E501

    def _sample_from_dict(
        self, sample_dict: Dict, label_name_by_id: Dict[str, str], field_name_by_id: Dict[str, str]
    ) -> TabularTagsSample:

        tabular_data_body = {
            field_name_by_id[strip_nyckel_prefix(field_id)]: field_data
            for field_id, field_data in sample_dict["data"].items()
        }

        if "annotation" in sample_dict:
            annotation = [
                TagsAnnotation(
                    label_name=label_name_by_id[strip_nyckel_prefix(entry["labelId"])],
                    present=entry["present"],
                )
                for entry in sample_dict["annotation"]
            ]
        else:
            annotation = None

        if "prediction" in sample_dict:
            # TODO: Note that we filter out predictsion that are not in the label list.
            # This is a temporary fix since these should not be there in the first place.

            prediction = [
                ClassificationPrediction(
                    confidence=entry["confidence"],
                    label_name=label_name_by_id[strip_nyckel_prefix(entry["labelId"])],
                )
                for entry in sample_dict["prediction"]
                if strip_nyckel_prefix(entry["labelId"]) in label_name_by_id
            ]
        else:
            prediction = None

        return TabularTagsSample(
            id=strip_nyckel_prefix(sample_dict["id"]),
            data=tabular_data_body,
            external_id=sample_dict["externalId"] if "externalId" in sample_dict else None,
            annotation=annotation,
            prediction=prediction,
        )

    def read_sample(self, sample_id: NyckelId) -> TabularTagsSample:
        sample_as_dict = self._sample_handler.read_sample(sample_id)

        labels = self._label_handler.list_labels(None)
        fields = self.list_fields()

        label_name_by_id = {label.id: label.name for label in labels}
        field_name_by_id = {field.id: field.name for field in fields}

        return self._sample_from_dict(sample_as_dict, label_name_by_id, field_name_by_id)

    def update_annotation(self, sample: TabularTagsSample) -> None:
        self._sample_handler.update_annotation(sample)

    def delete_samples(self, sample_ids: List[NyckelId]) -> None:
        self._sample_handler.delete_samples(sample_ids)

function_id property

function_id: NyckelId

sample_count property

sample_count: int

label_count property

label_count: int

name property

name: str

__init__

__init__(function_id: NyckelId, credentials: Credentials)
Source code in src/nyckel/functions/tags/tabular_tags.py
def __init__(self, function_id: NyckelId, credentials: Credentials):
    self._function_id = function_id

    self._function_handler = TagsFunctionHandler(function_id, credentials)
    self._label_handler = ClassificationLabelHandler(function_id, credentials)
    self._url_handler = TagsFunctionURLHandler(function_id, credentials.server_url)
    self._sample_handler = TagsSampleHandler(function_id, credentials)
    self._field_handler = TabularFieldHandler(function_id, credentials)

    assert self._function_handler.get_input_modality() == "Tabular"

create classmethod

create(name: str, credentials: Credentials) -> TabularTagsFunction
Source code in src/nyckel/functions/tags/tabular_tags.py
@classmethod
def create(cls, name: str, credentials: Credentials) -> "TabularTagsFunction":
    return tags_function_factory.TagsFunctionFactory().create(name, "Tabular", credentials)  # type:ignore

delete

delete() -> None
Source code in src/nyckel/functions/tags/tabular_tags.py
def delete(self) -> None:
    self._function_handler.delete()

invoke

invoke(sample_data_list: List[TabularSampleData]) -> List[TagsPrediction]
Source code in src/nyckel/functions/tags/tabular_tags.py
def invoke(self, sample_data_list: List[TabularSampleData]) -> List[TagsPrediction]:
    return self._sample_handler.invoke(sample_data_list, self._get_image_field_transformer("name"))

has_trained_model

has_trained_model() -> bool
Source code in src/nyckel/functions/tags/tabular_tags.py
def has_trained_model(self) -> bool:
    return self._function_handler.is_trained

create_labels

create_labels(labels: Sequence[Union[ClassificationLabel, str]]) -> List[NyckelId]
Source code in src/nyckel/functions/tags/tabular_tags.py
def create_labels(self, labels: Sequence[Union[ClassificationLabel, str]]) -> List[NyckelId]:
    typed_labels = [
        label if isinstance(label, ClassificationLabel) else ClassificationLabel(name=label) for label in labels
    ]
    return self._label_handler.create_labels(typed_labels)

list_labels

list_labels() -> List[ClassificationLabel]
Source code in src/nyckel/functions/tags/tabular_tags.py
def list_labels(self) -> List[ClassificationLabel]:
    return self._label_handler.list_labels(self.label_count)

read_label

read_label(label_id: NyckelId) -> ClassificationLabel
Source code in src/nyckel/functions/tags/tabular_tags.py
def read_label(self, label_id: NyckelId) -> ClassificationLabel:
    return self._label_handler.read_label(label_id)

update_label

update_label(label: ClassificationLabel) -> ClassificationLabel
Source code in src/nyckel/functions/tags/tabular_tags.py
def update_label(self, label: ClassificationLabel) -> ClassificationLabel:
    return self._label_handler.update_label(label)

delete_labels

delete_labels(label_ids: List[NyckelId]) -> None
Source code in src/nyckel/functions/tags/tabular_tags.py
def delete_labels(self, label_ids: List[NyckelId]) -> None:
    self._label_handler.delete_labels(label_ids)

create_fields

create_fields(fields: List[TabularFunctionField]) -> List[NyckelId]
Source code in src/nyckel/functions/tags/tabular_tags.py
def create_fields(self, fields: List[TabularFunctionField]) -> List[NyckelId]:
    return self._field_handler.create_fields(fields)

list_fields

list_fields() -> List[TabularFunctionField]
Source code in src/nyckel/functions/tags/tabular_tags.py
def list_fields(self) -> List[TabularFunctionField]:
    return self._field_handler.list_fields()

read_field

read_field(field_id: NyckelId) -> TabularFunctionField
Source code in src/nyckel/functions/tags/tabular_tags.py
def read_field(self, field_id: NyckelId) -> TabularFunctionField:
    return self._field_handler.read_field(field_id)

delete_field

delete_field(field_id: NyckelId) -> None
Source code in src/nyckel/functions/tags/tabular_tags.py
def delete_field(self, field_id: NyckelId) -> None:
    return self._field_handler.delete_field(field_id)

create_samples

create_samples(samples: Sequence[Union[TabularTagsSample, TabularSampleData]]) -> List[NyckelId]
Source code in src/nyckel/functions/tags/tabular_tags.py
def create_samples(self, samples: Sequence[Union[TabularTagsSample, TabularSampleData]]) -> List[NyckelId]:
    if len(samples) == 0:
        return []

    typed_samples = self._wrangle_post_samples_input(samples)
    typed_samples = self._strip_label_names(typed_samples)
    self._assert_fields_created(typed_samples)
    self._create_labels_as_needed(typed_samples)

    # For large tabular functions, the POST samples API does not support field names. So we need to switch to IDs.
    typed_samples = self._switch_field_names_to_field_ids(typed_samples)
    return self._sample_handler.create_samples(typed_samples, self._get_image_field_transformer())

list_samples

list_samples() -> List[TabularTagsSample]
Source code in src/nyckel/functions/tags/tabular_tags.py
def list_samples(self) -> List[TabularTagsSample]:
    samples_dict_list = self._sample_handler.list_samples(self.sample_count)
    labels = self._label_handler.list_labels(None)
    fields = self.list_fields()

    label_name_by_id = {label.id: label.name for label in labels}
    field_name_by_id = {field.id: field.name for field in fields}  # type: ignore

    return [self._sample_from_dict(entry, label_name_by_id, field_name_by_id) for entry in samples_dict_list]  # type: ignore # noqa: E501

read_sample

read_sample(sample_id: NyckelId) -> TabularTagsSample
Source code in src/nyckel/functions/tags/tabular_tags.py
def read_sample(self, sample_id: NyckelId) -> TabularTagsSample:
    sample_as_dict = self._sample_handler.read_sample(sample_id)

    labels = self._label_handler.list_labels(None)
    fields = self.list_fields()

    label_name_by_id = {label.id: label.name for label in labels}
    field_name_by_id = {field.id: field.name for field in fields}

    return self._sample_from_dict(sample_as_dict, label_name_by_id, field_name_by_id)

update_annotation

update_annotation(sample: TabularTagsSample) -> None
Source code in src/nyckel/functions/tags/tabular_tags.py
def update_annotation(self, sample: TabularTagsSample) -> None:
    self._sample_handler.update_annotation(sample)

delete_samples

delete_samples(sample_ids: List[NyckelId]) -> None
Source code in src/nyckel/functions/tags/tabular_tags.py
def delete_samples(self, sample_ids: List[NyckelId]) -> None:
    self._sample_handler.delete_samples(sample_ids)

nyckel.TabularTagsSample dataclass

Source code in src/nyckel/functions/tags/tags.py
@dataclass
class TabularTagsSample:
    data: TabularSampleData
    id: Optional[NyckelId] = None
    external_id: Optional[str] = None
    annotation: Optional[List[TagsAnnotation]] = None
    prediction: Optional[List[ClassificationPrediction]] = None

data instance-attribute

id class-attribute instance-attribute

id: Optional[NyckelId] = None

external_id class-attribute instance-attribute

external_id: Optional[str] = None

annotation class-attribute instance-attribute

annotation: Optional[List[TagsAnnotation]] = None

prediction class-attribute instance-attribute

prediction: Optional[List[ClassificationPrediction]] = None

__init__

__init__(data: TabularSampleData, id: Optional[NyckelId] = None, external_id: Optional[str] = None, annotation: Optional[List[TagsAnnotation]] = None, prediction: Optional[List[ClassificationPrediction]] = None) -> None

nyckel.TabularSampleData module-attribute

TabularSampleData = Dict[TabularFieldKey, TabularFieldValue]

nyckel.TabularFieldKey module-attribute

TabularFieldKey = str

nyckel.TabularFieldValue module-attribute

TabularFieldValue = Union[str, float]