diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 4aef2dc..f84f393 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -20,7 +20,9 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENAI_API_BASE: ${{ secrets.OPENAI_API_BASE }} with: temperature: 0.1 review_per_file: true comment_per_file: true + model: gpt-35-turbo diff --git a/app/completion.py b/app/completion.py index 4bb1266..cc87b7a 100644 --- a/app/completion.py +++ b/app/completion.py @@ -37,6 +37,9 @@ def __init__(self, model, temperature, frequency_penalty, presence_penalty, self.encoder = tiktoken.get_encoding("gpt2") self.max_tokens = max_tokens self.min_tokens = min_tokens + self.openai_kwargs = {'model': self.model} + if openai.api_type == "azure": + self.openai_kwargs = {'engine': self.model} @backoff.on_exception(backoff.expo, (openai.error.RateLimitError, @@ -56,14 +59,13 @@ def get_completion_chat(self, prompt) -> str: {"role": "user", "content": prompt}, ] response = openai.ChatCompletion.create( - model=self.model, messages=messages, temperature=self.temperature, frequency_penalty=self.frequency_penalty, presence_penalty=self.presence_penalty, request_timeout=100, max_tokens=self.max_tokens - len(self.encoder.encode(f'{system_prompt}\n{prompt}')), - stream=True) + stream=True, **self.openai_kwargs) completion_text = '' for event in response: @@ -79,7 +81,6 @@ def get_completion_text(self, prompt) -> str: '''Invoke OpenAI API to get text completion''' prompt_message = f'{system_prompt}\n{prompt}' response = openai.Completion.create( - model=self.model, prompt=prompt_message, temperature=self.temperature, best_of=1, @@ -87,7 +88,7 @@ def get_completion_text(self, prompt) -> str: presence_penalty=self.presence_penalty, request_timeout=100, max_tokens=self.max_tokens - len(self.encoder.encode(prompt_message)), - stream=True) + stream=True, **self.openai_kwargs) completion_text = '' for event in response: