Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update deps #2

Merged
merged 1 commit into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,10 @@

root = true


[*]
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
indent_style = space
indent_size = 4
indent_size = 2
11 changes: 0 additions & 11 deletions .github/dependabot.yml

This file was deleted.

19 changes: 19 additions & 0 deletions .github/renovate.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"$schema": "https://docs.renovatebot.com/renovate-schema.json",
"timezone": "Asia/Shanghai",
"schedule": ["before 8am on monday"],
"automerge": true,
"lockFileMaintenance": {
"enabled": true
},
"packageRules": [
{
"groupName": "github-actions",
"matchManagers": ["github-actions"]
},
{
"groupName": "rust crates",
"matchManagers": ["cargo"]
}
]
}
18 changes: 8 additions & 10 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ jobs:
- target: x86_64-unknown-linux-gnu
- os: macos-latest
target: aarch64-apple-darwin
just_goals: build
- os: macos-latest
target: x86_64-apple-darwin
- os: windows-latest
Expand All @@ -36,20 +35,18 @@ jobs:
submodules: recursive

- name: Install rust
uses: actions-rs/toolchain@v1
uses: dtolnay/rust-toolchain@stable
with:
target: ${{ matrix.target }}
toolchain: nightly
profile: minimal
override: true
targets: ${{ matrix.target }}
toolchain: stable

- uses: Swatinem/rust-cache@v2

- uses: extractions/setup-just@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- run: just ${{ matrix.just_goals || 'build test' }}
- run: just build test --target ${{ matrix.target }}

lint:
runs-on: ubuntu-latest
Expand All @@ -59,10 +56,9 @@ jobs:
submodules: recursive

- name: Install rust
uses: actions-rs/toolchain@v1
uses: dtolnay/rust-toolchain@stable
with:
toolchain: nightly
override: true
components: rustfmt, clippy

- uses: Swatinem/rust-cache@v2
Expand All @@ -76,11 +72,13 @@ jobs:
ci-everything:
name: All CI stages
runs-on: ubuntu-latest
if: always()
needs:
- just
- lint
steps:
- run: exit 0
- run: exit 1
if: ${{ always() && (contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled')) }}

init-release:
name: Run the release workflow
Expand Down
2 changes: 1 addition & 1 deletion Justfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ install:
cargo install --path .

test *args:
cargo test --all-features {{args}}
cargo test --features async-openai --features dhat-heap {{args}}
alias t := test

bench *args:
Expand Down
12 changes: 4 additions & 8 deletions tiktoken-rs/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,16 @@ documentation = "https://docs.rs/crate/tiktoken-rs/"
license = "MIT"
readme = "../README.md"

[profile.release]
incremental = true
debug = 1

[dependencies]
anyhow = "1.0.76"
async-openai = { version = "0.14.2", optional = true }
base64 = "0.21.5"
async-openai = { version = "0.21.0", optional = true }
base64 = "0.22.1"
bstr = "1.6.2"
dhat = { version = "0.3.2", optional = true }
fancy-regex = "0.12.0"
fancy-regex = "0.13.0"
lazy_static = "1.4.0"
parking_lot = "0.12.1"
pyo3 = { version = "0.19.2", optional = true }
pyo3 = { version = "0.21.2", optional = true }
rustc-hash = "1.1.0"

[features]
Expand Down
23 changes: 10 additions & 13 deletions tiktoken-rs/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,27 +75,24 @@ Need to enable the `async-openai` feature in your `Cargo.toml` file.

```rust
use tiktoken_rs::async_openai::get_chat_completion_max_tokens;
use async_openai::types::{ChatCompletionRequestMessage, Role};
use async_openai::types::{ChatCompletionRequestMessage, ChatCompletionRequestSystemMessage, ChatCompletionRequestUserMessage, ChatCompletionRequestUserMessageContent, Role};

let messages = vec![
ChatCompletionRequestMessage {
content: Some("You are a helpful assistant that only speaks French.".to_string()),
ChatCompletionRequestMessage::System(ChatCompletionRequestSystemMessage {
content: "You are a helpful assistant that only speaks French.".to_string(),
role: Role::System,
name: None,
function_call: None,
},
ChatCompletionRequestMessage {
content: Some("Hello, how are you?".to_string()),
}),
ChatCompletionRequestMessage::User(ChatCompletionRequestUserMessage {
content: ChatCompletionRequestUserMessageContent::Text("Hello, how are you?".to_string()),
role: Role::User,
name: None,
function_call: None,
},
ChatCompletionRequestMessage {
content: Some("Parlez-vous francais?".to_string()),
}),
ChatCompletionRequestMessage::System(ChatCompletionRequestSystemMessage {
content: "Parlez-vous francais?".to_string(),
role: Role::System,
name: None,
function_call: None,
},
}),
];
let max_tokens = get_chat_completion_max_tokens("gpt-4", &messages).unwrap();
println!("max_tokens: {}", max_tokens);
Expand Down
105 changes: 90 additions & 15 deletions tiktoken-rs/src/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ pub struct FunctionCall {

#[derive(Debug, Default, Clone, PartialEq, Eq)]
pub struct ChatCompletionRequestMessage {
/// The role of the messages author. One of `system`, `user`, `assistant`, or `function`.
/// The role of the messages author. One of `system`, `user`, `assistant`, `tool`, or `function`.
pub role: String,
/// The contents of the message.
/// `content` is required for all messages except assistant messages with function calls.
Expand Down Expand Up @@ -379,10 +379,22 @@ pub mod async_openai {
{
fn from(m: &async_openai::types::ChatCompletionRequestMessage) -> Self {
Self {
role: m.role.to_string(),
name: m.name.clone(),
content: m.content.clone(),
function_call: m.function_call.as_ref().map(|f| f.into()),
role: m.role().to_string(),
name: m.name().map(|x| x.to_owned()),
content: m.content(),
function_call: if let async_openai::types::ChatCompletionRequestMessage::Function(
async_openai::types::ChatCompletionRequestFunctionMessage {
name, content, ..
},
) = m
{
Some(super::FunctionCall {
name: name.clone(),
arguments: content.clone().unwrap_or_default(),
})
} else {
None
},
}
}
}
Expand Down Expand Up @@ -423,32 +435,95 @@ pub mod async_openai {
super::get_chat_completion_max_tokens(model, &messages)
}

trait ChatCompletionRequestMessageCommon {
fn role(&self) -> &str;
fn name(&self) -> Option<&str>;
fn content(&self) -> Option<String>;
}

impl ChatCompletionRequestMessageCommon for async_openai::types::ChatCompletionRequestMessage {
fn role(&self) -> &str {
match self {
async_openai::types::ChatCompletionRequestMessage::System(_) => "system",
async_openai::types::ChatCompletionRequestMessage::User(_) => "user",
async_openai::types::ChatCompletionRequestMessage::Assistant(_) => "assistant",
async_openai::types::ChatCompletionRequestMessage::Tool(_) => "tool",
async_openai::types::ChatCompletionRequestMessage::Function(_) => "function",
}
}

fn name(&self) -> Option<&str> {
match self {
async_openai::types::ChatCompletionRequestMessage::System(
async_openai::types::ChatCompletionRequestSystemMessage { name, .. },
) => name.as_deref(),
async_openai::types::ChatCompletionRequestMessage::User(
async_openai::types::ChatCompletionRequestUserMessage { name, .. },
) => name.as_deref(),
async_openai::types::ChatCompletionRequestMessage::Assistant(
async_openai::types::ChatCompletionRequestAssistantMessage { name, .. },
) => name.as_deref(),
async_openai::types::ChatCompletionRequestMessage::Function(
async_openai::types::ChatCompletionRequestFunctionMessage { name, .. },
) => Some(name.as_str()),
_ => None,
}
}

fn content(&self) -> Option<String> {
match self {
async_openai::types::ChatCompletionRequestMessage::System(
async_openai::types::ChatCompletionRequestSystemMessage { content, .. },
) => Some(content.clone()),
async_openai::types::ChatCompletionRequestMessage::User(
async_openai::types::ChatCompletionRequestUserMessage { content, .. },
) => match content {
async_openai::types::ChatCompletionRequestUserMessageContent::Text(s) => {
Some(s.clone())
}
async_openai::types::ChatCompletionRequestUserMessageContent::Array(m) => {
Some(m.iter().filter_map(|x| if let async_openai::types::ChatCompletionRequestMessageContentPart::Text(async_openai::types::ChatCompletionRequestMessageContentPartText { text, .. }) = x { Some(text.as_str()) } else { None }).collect::<Vec<_>>().as_slice().join(""))
},
},
async_openai::types::ChatCompletionRequestMessage::Assistant(
async_openai::types::ChatCompletionRequestAssistantMessage { content, .. },
) => content.clone(),
async_openai::types::ChatCompletionRequestMessage::Tool(async_openai::types::ChatCompletionRequestToolMessage { content, .. }) => {
Some(content.clone())
}
async_openai::types::ChatCompletionRequestMessage::Function(
async_openai::types::ChatCompletionRequestFunctionMessage { content, .. },
) => content.clone(),
}
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_num_tokens_from_messages() {
let model = "gpt-3.5-turbo-0301";
let messages = &[async_openai::types::ChatCompletionRequestMessage {
let messages = &[async_openai::types::ChatCompletionRequestMessage::System(async_openai::types::ChatCompletionRequestSystemMessage {
role: async_openai::types::Role::System,
name: None,
content: Some("You are a helpful, pattern-following assistant that translates corporate jargon into plain English.".to_string()),
function_call: None,
}];
content: "You are a helpful, pattern-following assistant that translates corporate jargon into plain English.".to_string(),
})];
let num_tokens = num_tokens_from_messages(model, messages).unwrap();
assert!(num_tokens > 0);
}

#[test]
fn test_get_chat_completion_max_tokens() {
let model = "gpt-3.5-turbo";
let messages = &[async_openai::types::ChatCompletionRequestMessage {
content: Some("You are a helpful assistant that only speaks French.".to_string()),
role: async_openai::types::Role::System,
name: None,
function_call: None,
}];
let messages = &[async_openai::types::ChatCompletionRequestMessage::System(
async_openai::types::ChatCompletionRequestSystemMessage {
content: "You are a helpful assistant that only speaks French.".to_string(),
role: async_openai::types::Role::System,
name: None,
},
)];
let max_tokens = get_chat_completion_max_tokens(model, messages).unwrap();
assert!(max_tokens > 0);
}
Expand Down
28 changes: 18 additions & 10 deletions tiktoken-rs/src/vendor_tiktoken.rs
Original file line number Diff line number Diff line change
Expand Up @@ -694,12 +694,20 @@ impl CoreBPE {
&self,
py: Python,
text: &str,
allowed_special: HashSet<&str>,
allowed_special: HashSet<String>,
) -> Py<PyTuple> {
let (tokens, completions) =
py.allow_threads(|| self._encode_unstable_native(text, &allowed_special));
let py_completions =
PyList::new(py, completions.iter().map(|seq| PyList::new(py, &seq[..])));
let (tokens, completions) = py.allow_threads(|| {
self._encode_unstable_native(
text,
&HashSet::from_iter(allowed_special.iter().map(|x| x.as_str())),
)
});
let py_completions = PyList::new_bound(
py,
completions
.iter()
.map(|seq| PyList::new_bound(py, &seq[..])),
);
(tokens, py_completions).into_py(py)
}

Expand Down Expand Up @@ -728,15 +736,15 @@ impl CoreBPE {

pub fn decode_bytes(&self, py: Python, tokens: Vec<usize>) -> Py<PyBytes> {
let bytes = py.allow_threads(|| self._decode_native(&tokens));
PyBytes::new(py, &bytes).into()
PyBytes::new_bound(py, &bytes).into()
}

pub fn decode_single_token_bytes(&self, py: Python, token: usize) -> PyResult<Py<PyBytes>> {
if let Some(bytes) = self.decoder.get(&token) {
return Ok(PyBytes::new(py, bytes).into());
return Ok(PyBytes::new_bound(py, bytes).into());
}
if let Some(bytes) = self.special_tokens_decoder.get(&token) {
return Ok(PyBytes::new(py, bytes).into());
return Ok(PyBytes::new_bound(py, bytes).into());
}
Err(PyErr::new::<exceptions::PyKeyError, _>(token.to_string()))
}
Expand All @@ -748,14 +756,14 @@ impl CoreBPE {
pub fn token_byte_values(&self, py: Python) -> Vec<Py<PyBytes>> {
self.sorted_token_bytes
.iter()
.map(|x| PyBytes::new(py, x).into())
.map(|x| PyBytes::new_bound(py, x).into())
.collect()
}
}

#[cfg(feature = "python")]
#[pymodule]
fn _tiktoken(_py: Python, m: &PyModule) -> PyResult<()> {
fn _tiktoken(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<CoreBPE>()?;
Ok(())
}
Expand Down
Loading