From 0ed6b806c1994cc1f916f6cc291d70c2b75801ac Mon Sep 17 00:00:00 2001 From: jannisborn Date: Thu, 16 Nov 2023 17:07:03 +0100 Subject: [PATCH] feat: get a float encoding from the full value --- terminator/numerical_encodings.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/terminator/numerical_encodings.py b/terminator/numerical_encodings.py index c16e19c..201fb7c 100644 --- a/terminator/numerical_encodings.py +++ b/terminator/numerical_encodings.py @@ -49,6 +49,34 @@ def get_float_encoding( return vals / (vmax / 10) +def get_full_float_encoding( + value: float, embedding_size: int, vmax: float = 1.0 +) -> Tensor: + """ + Convert a float value into a _fixed_ embedding vector. + + Args: + value: The float value to be encoded. + embedding_size: The size of the embedding. + vmax: Maximal value the `value` variable can take. This normalizes values + to be in the range ~ [-10, 10]. NOTE: If remaining nn.embeddings in + model use `max_norm`, this might result in large range discrepancies. + + Returns: + torch.Tensor of shape (embedding_size, ) containing the embedding. + """ + if embedding_size % 2 != 0: + raise ValueError(f"Embedding size {embedding_size} cant be odd.") + integer = int(value) + decimal = value - integer + scalar = integer * 10**decimal + embedding = torch.zeros((embedding_size,)) + for i in range(0, embedding_size, 2): + embedding[i] = scalar / (i + 1) + embedding[i + 1] = -scalar / (i + 1) + return embedding + + def get_int_encoding(token: str, embedding_size: int) -> torch.Tensor: """Convert a token representing an integer into a _fixed_ embedding vector. NOTE: This can be used only for positive integers - the generation of the