Skip to content

Tabular Classification

nyckel.TabularClassificationFunction

Example:

from nyckel import Credentials, TabularClassificationFunction, TabularFunctionField

credentials = Credentials(client_id="...", client_secret="...")

func = TabularClassificationFunction.create("InterestedProspect", credentials)
func.create_fields([
    TabularFunctionField(type="Text", name="Name"),
    TabularFunctionField(type="Text", name="Response")
])
func.create_samples([
    ({"Name": "Adam Adams", "Response": "Thanks for reaching out. I'd love to chat"}, "Interested"),
    ({"Name": "Bo Berg", "Response": "Sure! Can you tell me a bit more?"}, "Interested"),
    ({"Name": "Charles Carter", "Response": "No thanks, I don't need a Classification API"}, "Not Interested"),
    ({"Name": "Devin Duncan", "Response": "Nope. Please stop bugging me."}, "Not Interested"),
])

predictions = func.invoke([{"Name": "Frank Fisher", "Response": "Yea, I'd love to try the Nyckel API!"}])
Source code in src/nyckel/functions/classification/tabular_classification.py
class TabularClassificationFunction(ClassificationFunction):
    """
    Example:

    ```py

    from nyckel import Credentials, TabularClassificationFunction, TabularFunctionField

    credentials = Credentials(client_id="...", client_secret="...")

    func = TabularClassificationFunction.create("InterestedProspect", credentials)
    func.create_fields([
        TabularFunctionField(type="Text", name="Name"),
        TabularFunctionField(type="Text", name="Response")
    ])
    func.create_samples([
        ({"Name": "Adam Adams", "Response": "Thanks for reaching out. I'd love to chat"}, "Interested"),
        ({"Name": "Bo Berg", "Response": "Sure! Can you tell me a bit more?"}, "Interested"),
        ({"Name": "Charles Carter", "Response": "No thanks, I don't need a Classification API"}, "Not Interested"),
        ({"Name": "Devin Duncan", "Response": "Nope. Please stop bugging me."}, "Not Interested"),
    ])

    predictions = func.invoke([{"Name": "Frank Fisher", "Response": "Yea, I'd love to try the Nyckel API!"}])
    ```
    """

    def __init__(self, function_id: NyckelId, credentials: Credentials) -> None:
        self._function_id = function_id

        self._function_handler = ClassificationFunctionHandler(function_id, credentials)
        self._label_handler = ClassificationLabelHandler(function_id, credentials)
        self._field_handler = TabularFieldHandler(function_id, credentials)
        self._sample_handler = ClassificationSampleHandler(function_id, credentials)
        self._url_handler = ClassificationFunctionURLHandler(function_id, credentials.server_url)
        assert self._function_handler.get_input_modality() == "Tabular"

    def __str__(self) -> str:
        return self.__repr__()

    def __repr__(self) -> str:
        status_string = f"Name: {self.name}, id: {self.function_id}, url: {self._url_handler.train_page}"
        return status_string

    @property
    def function_id(self) -> str:
        return self._function_id

    @property
    def sample_count(self) -> int:
        return self._function_handler.sample_count

    @property
    def label_count(self) -> int:
        return self._function_handler.label_count

    @property
    def name(self) -> str:
        return self._function_handler.get_name()

    @classmethod
    def create(cls, name: str, credentials: Credentials) -> "TabularClassificationFunction":
        return factory.ClassificationFunctionFactory.create(name, "Tabular", credentials)  # type: ignore

    def delete(self) -> None:
        self._function_handler.delete()

    def invoke(  # type: ignore
        self,
        sample_data_list: List[TabularSampleData],
        model_id: str = "",
    ) -> List[ClassificationPrediction]:
        return self._sample_handler.invoke(
            sample_data_list, self._get_image_field_transformer("name"), model_id=model_id
        )

    def has_trained_model(self) -> bool:
        return self._function_handler.is_trained

    def create_labels(self, labels: Sequence[Union[ClassificationLabel, str]]) -> List[NyckelId]:
        typed_labels = [
            label if isinstance(label, ClassificationLabel) else ClassificationLabel(name=label) for label in labels
        ]
        return self._label_handler.create_labels(typed_labels)

    def list_labels(self) -> List[ClassificationLabel]:
        return self._label_handler.list_labels(self.label_count)

    def read_label(self, label_id: NyckelId) -> ClassificationLabel:
        return self._label_handler.read_label(label_id)

    def update_label(self, label: ClassificationLabel) -> ClassificationLabel:
        return self._label_handler.update_label(label)

    def delete_labels(self, label_ids: List[NyckelId]) -> None:
        return self._label_handler.delete_labels(label_ids)

    def create_fields(self, fields: List[TabularFunctionField]) -> List[NyckelId]:
        return self._field_handler.create_fields(fields)

    def list_fields(self) -> List[TabularFunctionField]:
        return self._field_handler.list_fields()

    def read_field(self, field_id: NyckelId) -> TabularFunctionField:
        return self._field_handler.read_field(field_id)

    def delete_field(self, field_id: NyckelId) -> None:
        return self._field_handler.delete_field(field_id)

    def create_samples(self, samples: Sequence[Union[TabularClassificationSample, Tuple[TabularSampleData, LabelName], TabularSampleData]]) -> List[NyckelId]:  # type: ignore # noqa: E501
        if len(samples) == 0:
            return []

        typed_samples = self._wrangle_post_samples_input(samples)
        typed_samples = self._strip_label_names(typed_samples)
        self._assert_fields_created(typed_samples)
        self._create_labels_as_needed(typed_samples)

        # For large tabular functions, the POST samples API does not support field names. So we need to switch to IDs.
        typed_samples = self._switch_field_names_to_field_ids(typed_samples)
        return self._sample_handler.create_samples(typed_samples, self._get_image_field_transformer())

    def _get_image_field_transformer(self, field_identifier: str = "id") -> Callable:
        fields = self.list_fields()
        image_field_transformer = lambda x: x  # noqa: E731
        for field in fields:
            if field.type == "Image":
                # There is only one image field (max) per function, so we can break here.
                if field_identifier == "id":
                    assert field.id is not None
                    image_field_transformer = ImageFieldTransformer(field.id)
                elif field_identifier == "name":
                    image_field_transformer = ImageFieldTransformer(field.name)
                break
        return image_field_transformer

    def _switch_field_names_to_field_ids(
        self, samples: List[TabularClassificationSample]
    ) -> List[TabularClassificationSample]:
        samples = copy.deepcopy(samples)  # Deep-copy so we don't modify the callers input.
        fields = self.list_fields()
        field_id_by_name = {field.name: field.id for field in fields}
        for sample in samples:
            field_names = list(sample.data.keys())
            for field_name in field_names:
                field_value = sample.data.pop(field_name)
                sample.data[field_id_by_name[field_name]] = field_value  # type: ignore
        return samples

    def list_samples(self) -> List[TabularClassificationSample]:  # type: ignore
        samples_dict_list = self._sample_handler.list_samples(self.sample_count)
        labels = self._label_handler.list_labels(None)
        fields = self.list_fields()

        label_name_by_id = {label.id: label.name for label in labels}
        field_name_by_id = {field.id: field.name for field in fields}  # type: ignore

        return [self._sample_from_dict(entry, label_name_by_id, field_name_by_id) for entry in samples_dict_list]  # type: ignore # noqa: E501

    def read_sample(self, sample_id: NyckelId) -> TabularClassificationSample:
        sample_as_dict = self._sample_handler.read_sample(sample_id)

        labels = self._label_handler.list_labels(None)
        fields = self.list_fields()

        label_name_by_id = {label.id: label.name for label in labels}
        field_name_by_id = {field.id: field.name for field in fields}  # type: ignore

        return self._sample_from_dict(sample_as_dict, label_name_by_id, field_name_by_id)  # type: ignore

    def update_annotation(self, sample: TabularClassificationSample) -> None:  # type: ignore
        self._sample_handler.update_annotation(sample)

    def delete_samples(self, sample_ids: List[NyckelId]) -> None:
        self._sample_handler.delete_samples(sample_ids)

    def _wrangle_post_samples_input(
        self,
        samples: Sequence[Union[TabularClassificationSample, Tuple[TabularSampleData, LabelName], TabularSampleData]],
    ) -> List[TabularClassificationSample]:
        typed_samples: List[TabularClassificationSample] = []
        for sample in samples:
            if isinstance(sample, TabularClassificationSample):
                typed_samples.append(sample)
            elif isinstance(sample, (list, tuple)):
                data_dict, label_name = sample
                typed_samples.append(
                    TabularClassificationSample(
                        data=data_dict, annotation=ClassificationAnnotation(label_name=label_name)
                    )
                )
            elif isinstance(sample, dict):
                typed_samples.append(TabularClassificationSample(data=sample))
            else:
                raise ValueError(f"Unknown sample type: {type(sample)}")
        return typed_samples

    def _assert_fields_created(self, samples: List[TabularClassificationSample]) -> None:
        existing_fields = self.list_fields()
        existing_field_names = {field.name for field in existing_fields}
        new_field_names = {field_name for sample in samples for field_name in sample.data.keys()}
        missing_field_names = new_field_names - existing_field_names
        assert len(missing_field_names) == 0, f"Fields not created: {missing_field_names=}. Please create fields first."

    def _create_labels_as_needed(self, samples: List[TabularClassificationSample]) -> None:
        existing_labels = self._label_handler.list_labels(None)
        existing_label_names = {label.name for label in existing_labels}
        new_label_names = {sample.annotation.label_name for sample in samples if sample.annotation}
        missing_label_names = new_label_names - existing_label_names
        missing_labels = [ClassificationLabel(name=label_name) for label_name in missing_label_names]
        if len(missing_labels) > 0:
            self._label_handler.create_labels(missing_labels)

    def _sample_from_dict(
        self, sample_dict: Dict, label_name_by_id: Dict[str, str], field_name_by_id: Dict[str, str]
    ) -> TabularClassificationSample:
        tabular_data_body = {
            field_name_by_id[strip_nyckel_prefix(field_id)]: field_data
            for field_id, field_data in sample_dict["data"].items()
        }

        if "externalId" in sample_dict:
            external_id = sample_dict["externalId"]
        else:
            external_id = None

        if "annotation" in sample_dict:
            annotation = ClassificationAnnotation(
                label_name=label_name_by_id[strip_nyckel_prefix(sample_dict["annotation"]["labelId"])],
            )
        else:
            annotation = None

        if "prediction" in sample_dict:
            prediction = ClassificationPrediction(
                confidence=sample_dict["prediction"]["confidence"],
                label_name=label_name_by_id[strip_nyckel_prefix(sample_dict["prediction"]["labelId"])],
            )
        else:
            prediction = None

        return TabularClassificationSample(
            id=strip_nyckel_prefix(sample_dict["id"]),
            data=tabular_data_body,
            external_id=external_id,
            annotation=annotation,
            prediction=prediction,
        )

    def _strip_label_names(self, samples: List[TabularClassificationSample]) -> List[TabularClassificationSample]:
        for sample in samples:
            if sample.annotation:
                sample.annotation.label_name = sample.annotation.label_name.strip()
        return samples

function_id property

function_id: str

sample_count property

sample_count: int

label_count property

label_count: int

name property

name: str

__init__

__init__(function_id: NyckelId, credentials: Credentials) -> None
Source code in src/nyckel/functions/classification/tabular_classification.py
def __init__(self, function_id: NyckelId, credentials: Credentials) -> None:
    self._function_id = function_id

    self._function_handler = ClassificationFunctionHandler(function_id, credentials)
    self._label_handler = ClassificationLabelHandler(function_id, credentials)
    self._field_handler = TabularFieldHandler(function_id, credentials)
    self._sample_handler = ClassificationSampleHandler(function_id, credentials)
    self._url_handler = ClassificationFunctionURLHandler(function_id, credentials.server_url)
    assert self._function_handler.get_input_modality() == "Tabular"

create classmethod

create(name: str, credentials: Credentials) -> TabularClassificationFunction
Source code in src/nyckel/functions/classification/tabular_classification.py
@classmethod
def create(cls, name: str, credentials: Credentials) -> "TabularClassificationFunction":
    return factory.ClassificationFunctionFactory.create(name, "Tabular", credentials)  # type: ignore

delete

delete() -> None
Source code in src/nyckel/functions/classification/tabular_classification.py
def delete(self) -> None:
    self._function_handler.delete()

invoke

invoke(sample_data_list: List[TabularSampleData], model_id: str = '') -> List[ClassificationPrediction]
Source code in src/nyckel/functions/classification/tabular_classification.py
def invoke(  # type: ignore
    self,
    sample_data_list: List[TabularSampleData],
    model_id: str = "",
) -> List[ClassificationPrediction]:
    return self._sample_handler.invoke(
        sample_data_list, self._get_image_field_transformer("name"), model_id=model_id
    )

has_trained_model

has_trained_model() -> bool
Source code in src/nyckel/functions/classification/tabular_classification.py
def has_trained_model(self) -> bool:
    return self._function_handler.is_trained

create_labels

create_labels(labels: Sequence[Union[ClassificationLabel, str]]) -> List[NyckelId]
Source code in src/nyckel/functions/classification/tabular_classification.py
def create_labels(self, labels: Sequence[Union[ClassificationLabel, str]]) -> List[NyckelId]:
    typed_labels = [
        label if isinstance(label, ClassificationLabel) else ClassificationLabel(name=label) for label in labels
    ]
    return self._label_handler.create_labels(typed_labels)

list_labels

list_labels() -> List[ClassificationLabel]
Source code in src/nyckel/functions/classification/tabular_classification.py
def list_labels(self) -> List[ClassificationLabel]:
    return self._label_handler.list_labels(self.label_count)

read_label

read_label(label_id: NyckelId) -> ClassificationLabel
Source code in src/nyckel/functions/classification/tabular_classification.py
def read_label(self, label_id: NyckelId) -> ClassificationLabel:
    return self._label_handler.read_label(label_id)

update_label

update_label(label: ClassificationLabel) -> ClassificationLabel
Source code in src/nyckel/functions/classification/tabular_classification.py
def update_label(self, label: ClassificationLabel) -> ClassificationLabel:
    return self._label_handler.update_label(label)

delete_labels

delete_labels(label_ids: List[NyckelId]) -> None
Source code in src/nyckel/functions/classification/tabular_classification.py
def delete_labels(self, label_ids: List[NyckelId]) -> None:
    return self._label_handler.delete_labels(label_ids)

create_fields

create_fields(fields: List[TabularFunctionField]) -> List[NyckelId]
Source code in src/nyckel/functions/classification/tabular_classification.py
def create_fields(self, fields: List[TabularFunctionField]) -> List[NyckelId]:
    return self._field_handler.create_fields(fields)

list_fields

list_fields() -> List[TabularFunctionField]
Source code in src/nyckel/functions/classification/tabular_classification.py
def list_fields(self) -> List[TabularFunctionField]:
    return self._field_handler.list_fields()

read_field

read_field(field_id: NyckelId) -> TabularFunctionField
Source code in src/nyckel/functions/classification/tabular_classification.py
def read_field(self, field_id: NyckelId) -> TabularFunctionField:
    return self._field_handler.read_field(field_id)

delete_field

delete_field(field_id: NyckelId) -> None
Source code in src/nyckel/functions/classification/tabular_classification.py
def delete_field(self, field_id: NyckelId) -> None:
    return self._field_handler.delete_field(field_id)

create_samples

create_samples(samples: Sequence[Union[TabularClassificationSample, Tuple[TabularSampleData, LabelName], TabularSampleData]]) -> List[NyckelId]
Source code in src/nyckel/functions/classification/tabular_classification.py
def create_samples(self, samples: Sequence[Union[TabularClassificationSample, Tuple[TabularSampleData, LabelName], TabularSampleData]]) -> List[NyckelId]:  # type: ignore # noqa: E501
    if len(samples) == 0:
        return []

    typed_samples = self._wrangle_post_samples_input(samples)
    typed_samples = self._strip_label_names(typed_samples)
    self._assert_fields_created(typed_samples)
    self._create_labels_as_needed(typed_samples)

    # For large tabular functions, the POST samples API does not support field names. So we need to switch to IDs.
    typed_samples = self._switch_field_names_to_field_ids(typed_samples)
    return self._sample_handler.create_samples(typed_samples, self._get_image_field_transformer())

list_samples

list_samples() -> List[TabularClassificationSample]
Source code in src/nyckel/functions/classification/tabular_classification.py
def list_samples(self) -> List[TabularClassificationSample]:  # type: ignore
    samples_dict_list = self._sample_handler.list_samples(self.sample_count)
    labels = self._label_handler.list_labels(None)
    fields = self.list_fields()

    label_name_by_id = {label.id: label.name for label in labels}
    field_name_by_id = {field.id: field.name for field in fields}  # type: ignore

    return [self._sample_from_dict(entry, label_name_by_id, field_name_by_id) for entry in samples_dict_list]  # type: ignore # noqa: E501

read_sample

read_sample(sample_id: NyckelId) -> TabularClassificationSample
Source code in src/nyckel/functions/classification/tabular_classification.py
def read_sample(self, sample_id: NyckelId) -> TabularClassificationSample:
    sample_as_dict = self._sample_handler.read_sample(sample_id)

    labels = self._label_handler.list_labels(None)
    fields = self.list_fields()

    label_name_by_id = {label.id: label.name for label in labels}
    field_name_by_id = {field.id: field.name for field in fields}  # type: ignore

    return self._sample_from_dict(sample_as_dict, label_name_by_id, field_name_by_id)  # type: ignore

update_annotation

update_annotation(sample: TabularClassificationSample) -> None
Source code in src/nyckel/functions/classification/tabular_classification.py
def update_annotation(self, sample: TabularClassificationSample) -> None:  # type: ignore
    self._sample_handler.update_annotation(sample)

delete_samples

delete_samples(sample_ids: List[NyckelId]) -> None
Source code in src/nyckel/functions/classification/tabular_classification.py
def delete_samples(self, sample_ids: List[NyckelId]) -> None:
    self._sample_handler.delete_samples(sample_ids)

nyckel.TabularClassificationSample dataclass

Source code in src/nyckel/functions/classification/classification.py
@dataclass
class TabularClassificationSample:
    data: TabularSampleData
    id: Optional[NyckelId] = None
    external_id: Optional[str] = None
    annotation: Optional[ClassificationAnnotation] = None
    prediction: Optional[ClassificationPrediction] = None

data instance-attribute

id class-attribute instance-attribute

id: Optional[NyckelId] = None

external_id class-attribute instance-attribute

external_id: Optional[str] = None

annotation class-attribute instance-attribute

annotation: Optional[ClassificationAnnotation] = None

prediction class-attribute instance-attribute

prediction: Optional[ClassificationPrediction] = None

__init__

__init__(data: TabularSampleData, id: Optional[NyckelId] = None, external_id: Optional[str] = None, annotation: Optional[ClassificationAnnotation] = None, prediction: Optional[ClassificationPrediction] = None) -> None

nyckel.TabularSampleData module-attribute

TabularSampleData = Dict[TabularFieldKey, TabularFieldValue]

nyckel.TabularFieldKey module-attribute

TabularFieldKey = str

nyckel.TabularFieldValue module-attribute

TabularFieldValue = Union[str, float]