跳到内容

向量

Argilla 中的向量字段用于定义记录的向量形式,该记录将由用户审核。

使用示例

要定义向量字段,请使用名称和维度实例化 VectorField 类,然后将其传递给 Settings 类的 vectors 参数。

settings = rg.Settings(
    fields=[
        rg.TextField(name="text"),
    ],
    vectors=[
        rg.VectorField(
            name="my_vector",
            dimension=768,
            title="Document Embedding",
        ),
    ],
)

要添加带有向量的记录,请参阅 rg.Vector 类文档。


VectorField

基类:Resource

用于 Argilla Dataset Settings 中的向量字段

源代码位于 src/argilla/settings/_vector.py
class VectorField(Resource):
    """Vector field for use in Argilla `Dataset` `Settings`"""

    _model: VectorFieldModel
    _api: VectorsAPI
    _dataset: Optional["Dataset"]

    def __init__(
        self,
        name: str,
        dimensions: int,
        title: Optional[str] = None,
        _client: Optional["Argilla"] = None,
    ) -> None:
        """Vector field for use in Argilla `Dataset` `Settings`

        Parameters:
            name (str): The name of the vector field
            dimensions (int): The number of dimensions in the vector
            title (Optional[str]): The title of the vector to be shown in the UI.
        """
        client = _client or Argilla._get_default()
        super().__init__(api=client.api.vectors, client=client)
        self._model = VectorFieldModel(name=name, title=title, dimensions=dimensions)
        self._dataset = None

    @property
    def name(self) -> str:
        return self._model.name

    @name.setter
    def name(self, value: str) -> None:
        self._model.name = value

    @property
    def title(self) -> Optional[str]:
        return self._model.title

    @title.setter
    def title(self, value: Optional[str]) -> None:
        self._model.title = value

    @property
    def dimensions(self) -> int:
        return self._model.dimensions

    @dimensions.setter
    def dimensions(self, value: int) -> None:
        self._model.dimensions = value

    @property
    def dataset(self) -> "Dataset":
        return self._dataset

    @dataset.setter
    def dataset(self, value: "Dataset") -> None:
        self._dataset = value
        self._model.dataset_id = self._dataset.id
        self._with_client(self._dataset._client)

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}(name={self.name}, title={self.title}, dimensions={self.dimensions})"

    @classmethod
    def from_model(cls, model: VectorFieldModel) -> "VectorField":
        instance = cls(name=model.name, dimensions=model.dimensions)
        instance._model = model

        return instance

    @classmethod
    def from_dict(cls, data: dict) -> "VectorField":
        model = VectorFieldModel(**data)
        return cls.from_model(model=model)

    def _with_client(self, client: "Argilla") -> "VectorField":
        # TODO: Review and simplify. Maybe only one of them is required
        self._client = client
        self._api = self._client.api.vectors

        return self

__init__(name, dimensions, title=None, _client=None)

用于 Argilla Dataset Settings 中的向量字段

参数

名称 类型 描述 默认值
name str

向量字段的名称

必需
dimensions int

向量中的维度数量

必需
title Optional[str]

将在 UI 中显示的向量标题。

源代码位于 src/argilla/settings/_vector.py
def __init__(
    self,
    name: str,
    dimensions: int,
    title: Optional[str] = None,
    _client: Optional["Argilla"] = None,
) -> None:
    """Vector field for use in Argilla `Dataset` `Settings`

    Parameters:
        name (str): The name of the vector field
        dimensions (int): The number of dimensions in the vector
        title (Optional[str]): The title of the vector to be shown in the UI.
    """
    client = _client or Argilla._get_default()
    super().__init__(api=client.api.vectors, client=client)
    self._model = VectorFieldModel(name=name, title=title, dimensions=dimensions)
    self._dataset = None