diff --git a/vllm/config.py b/vllm/config.py index 68ca81a2ec4fe..d333a042fe5af 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -138,12 +138,10 @@ def __init__( self.quantization = quantization self.quantization_param_path = quantization_param_path self.enforce_eager = enforce_eager - self.max_context_len_to_capture = max_context_len_to_capture - if self.max_context_len_to_capture is not None: + if max_context_len_to_capture is not None: raise ValueError("`max_context_len_to_capture` is deprecated. " "Use `max_seq_len_to_capture` instead.") - self.max_seq_len_to_capture = (max_seq_len_to_capture - or max_context_len_to_capture) + self.max_seq_len_to_capture = max_seq_len_to_capture self.max_logprobs = max_logprobs self.disable_sliding_window = disable_sliding_window self.skip_tokenizer_init = skip_tokenizer_init diff --git a/vllm/worker/xpu_model_runner.py b/vllm/worker/xpu_model_runner.py index e03f24fdfc41a..876abb3bf94d1 100644 --- a/vllm/worker/xpu_model_runner.py +++ b/vllm/worker/xpu_model_runner.py @@ -109,9 +109,6 @@ def __init__( self.kv_cache_dtype = kv_cache_dtype self.block_size = cache_config.block_size - self.max_context_len_to_capture = ( - self.model_config.max_context_len_to_capture - if self.model_config is not None else 0) self.attn_backend = get_attn_backend( self.model_config.get_num_attention_heads(self.parallel_config),