Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename #466

Merged
merged 9 commits into from
Mar 8, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added the option to directly pass input embeddings to `OLMo` and `OLMoForCausalLM`.
- Added support for Python 3.8.
- Added code to throw an error if `output_attentions` is set to `True` in forward call to `OLMoForCausalLM`. This functionality hasn't been implemented yet.
- Rename `Olmo` to `OLMo` everywhere in the codebase
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please move this to Unreleased or v0.3.0 or similar

- Fixed running with data loading workers on LUMI

### Added
Expand Down
4 changes: 2 additions & 2 deletions docs/NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,10 @@ For example, checkpoints for the run [https://wandb.ai/ai2-llm/c4-small/runs/euo
You can load a checkpoint like this:

```python
from olmo import Olmo, Tokenizer
from olmo import OLMo, Tokenizer

checkpoint = "gs://ai2-olmo/ai2-llm/c4-small/euox4j8q/step73000-unsharded"
model = Olmo.from_checkpoint(checkpoint, device="cuda")
model = OLMo.from_checkpoint(checkpoint, device="cuda")
tokenizer = Tokenizer.from_checkpoint(checkpoint)
```

Expand Down
4 changes: 2 additions & 2 deletions hf_olmo/configuration_olmo.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ def __init__(self, use_cache: bool = False, **kwargs):
all_kwargs.update({"use_cache": use_cache})
all_kwargs.update(
{
"architectures": all_kwargs.get("architectures", ["OlmoModelForCausalLM"])
or ["OlmoModelForCausalLM"]
"architectures": all_kwargs.get("architectures", ["OLMoModelForCausalLM"])
or ["OLMoModelForCausalLM"]
}
)
super().__init__(**all_kwargs)
Expand Down
6 changes: 3 additions & 3 deletions hf_olmo/modeling_olmo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from transformers.models.auto import AutoModelForCausalLM

from olmo.config import ModelConfig
from olmo.model import Olmo
from olmo.model import OLMo

from .configuration_olmo import OLMoConfig

Expand All @@ -34,14 +34,14 @@ class OLMoForCausalLM(PreTrainedModel):
base_model_prefix = "model"
_no_split_modules = ["OLMoBlock"]

def __init__(self, config: OLMoConfig, model: Optional[Olmo] = None, init_params: bool = False):
def __init__(self, config: OLMoConfig, model: Optional[OLMo] = None, init_params: bool = False):
super().__init__(config)

if not model:
model_config = create_model_config_from_pretrained_config(config)
# Initialize model (always on CPU to start with so we don't run out of GPU memory).
model_config.init_device = "cpu"
self.model = Olmo(model_config, init_params=init_params)
self.model = OLMo(model_config, init_params=init_params)
else:
self.model = model

Expand Down
2 changes: 1 addition & 1 deletion hf_olmo/tokenization_olmo_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


class OLMoTokenizerFast(PreTrainedTokenizerFast):
# Note: Olmo's tokenizer is already a wrapper around huggingface. This is potentially unnecessary.
# Note: OLMo's tokenizer is already a wrapper around huggingface. This is potentially unnecessary.
pass

# def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
Expand Down
4 changes: 2 additions & 2 deletions inference/NOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,12 @@ To add an `olmo.py` module, we can basically just imitate what was done for othe
There's one important wrinkle here: some OLMo models use *fused linear attention*. I'm not sure how GPTQ handles this or whether any existing supported models implement attention the same way. This might be something to discuss with Dirk and Pete.

```python
Olmo(
OLMo(
(transformer): ModuleDict(
(wte): Embedding(50304, 768)
(emb_drop): Dropout(p=0.1, inplace=False)
(blocks): ModuleList(
(0-11): 12 x OlmoSequentialBlock(
(0-11): 12 x OLMoSequentialBlock(
(dropout): Dropout(p=0.1, inplace=False)
(norm): LayerNorm()
(act): SwiGLU()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from .internlm import InternLMGPTQForCausalLM
from .llama import LlamaGPTQForCausalLM
from .moss import MOSSGPTQForCausalLM
from .olmo import OlmoGPTQForCausalLM
from .olmo import OLMoGPTQForCausalLM
from .opt import OPTGPTQForCausalLM
from .qwen import QwenGPTQForCausalLM
from .rw import RWGPTQForCausalLM
Expand All @@ -24,7 +24,7 @@
"gptj": GPTJGPTQForCausalLM,
"gpt2": GPT2GPTQForCausalLM,
"llama": LlamaGPTQForCausalLM,
"olmo": OlmoGPTQForCausalLM,
"olmo": OLMoGPTQForCausalLM,
"opt": OPTGPTQForCausalLM,
"moss": MOSSGPTQForCausalLM,
"gpt_bigcode": GPTBigCodeGPTQForCausalLM,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from ._base import *


class OlmoGPTQForCausalLM(BaseGPTQForCausalLM):
class OLMoGPTQForCausalLM(BaseGPTQForCausalLM):
# Attribute name of Transformer layer block.
layers_block_name = "model.transformer.blocks"

Expand All @@ -19,4 +19,4 @@ class OlmoGPTQForCausalLM(BaseGPTQForCausalLM):
inside_layer_modules = [["att_proj"], ["attn_out"], ["ff_proj"], ["ff_out"]]


__all__ = ["OlmoGPTQForCausalLM"]
__all__ = ["OLMoGPTQForCausalLM"]
8 changes: 4 additions & 4 deletions inference/compression/olmo_gptq_class.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from auto_gptq.modeling._base import BaseGPTQForCausalLM


class OlmoGPTQForCausalLM(BaseGPTQForCausalLM):
class OLMoGPTQForCausalLM(BaseGPTQForCausalLM):
# Attribute name of Transformer layer block.
layers_block_name = "model.transformer.blocks"

Expand All @@ -17,12 +17,12 @@ class OlmoGPTQForCausalLM(BaseGPTQForCausalLM):
inside_layer_modules = [["att_proj"], ["attn_out"], ["ff_proj"], ["ff_out"]]


__all__ = ["OlmoGPTQForCausalLM"]
__all__ = ["OLMoGPTQForCausalLM"]

# NOTE: In progress; may change if OLMo model is updated.


# class OlmoGPTQForCausalLM(BaseGPTQForCausalLM):
# class OLMoGPTQForCausalLM(BaseGPTQForCausalLM):
# # Attribute name of Transformer layer block.
# layers_block_name = "transformer.blocks" # NOTE(wadden) Correct
#
Expand Down Expand Up @@ -51,4 +51,4 @@ class OlmoGPTQForCausalLM(BaseGPTQForCausalLM):
# ]


# __all__ = ["OlmoGPTQForCausalLM"]
# __all__ = ["OLMoGPTQForCausalLM"]
6 changes: 3 additions & 3 deletions olmo/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from torch.distributed.fsdp import MixedPrecision, ShardingStrategy

from .aliases import PathOrStr
from .exceptions import OlmoConfigurationError
from .exceptions import OLMoConfigurationError
from .util import StrEnum

__all__ = [
Expand Down Expand Up @@ -116,7 +116,7 @@ def new(cls: Type[C], **kwargs) -> C:
conf = om.merge(conf, kwargs)
return cast(C, om.to_object(conf))
except OmegaConfBaseException as e:
raise OlmoConfigurationError(str(e))
raise OLMoConfigurationError(str(e))

@classmethod
def load(
Expand All @@ -139,7 +139,7 @@ def load(
conf = om.merge(conf, om.from_dotlist(overrides))
return cast(C, om.to_object(conf))
except OmegaConfBaseException as e:
raise OlmoConfigurationError(str(e))
raise OLMoConfigurationError(str(e))

def save(self, path: PathOrStr) -> None:
"""Save to a YAML file."""
Expand Down
8 changes: 4 additions & 4 deletions olmo/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from ..aliases import PathOrStr
from ..config import DataConfig, TrainConfig
from ..exceptions import OlmoConfigurationError
from ..exceptions import OLMoConfigurationError
from ..torch_util import barrier, get_global_rank, get_world_size
from .collator import DataCollator
from .iterable_dataset import IterableDataset
Expand All @@ -21,7 +21,7 @@ def build_memmap_dataset(
metadata: List[Dict[str, Any]] = []
if data_config.paths:
if data_config.datasets:
raise OlmoConfigurationError("DataConfig.paths is mutually exclusive with DataConfig.datasets")
raise OLMoConfigurationError("DataConfig.paths is mutually exclusive with DataConfig.datasets")
paths = data_config.paths
for path in paths:
metadata.append({"path": str(path)})
Expand All @@ -32,7 +32,7 @@ def build_memmap_dataset(
paths.extend(label_paths)
metadata.extend([{"label": label}] * len(label_paths))
else:
raise OlmoConfigurationError("One of DataConfig.paths or DataConfig.datasets is required")
raise OLMoConfigurationError("One of DataConfig.paths or DataConfig.datasets is required")
return MemMapDataset(
*paths,
chunk_size=train_config.model.max_sequence_length,
Expand Down Expand Up @@ -87,7 +87,7 @@ def build_train_dataloader(train_config: TrainConfig) -> DataLoader:
work_dir = Path(train_config.save_folder) / "train_data"
if get_global_rank() == 0:
if work_dir.is_dir() and not train_config.save_overwrite:
raise OlmoConfigurationError(
raise OLMoConfigurationError(
"train data working directory already exists, use --save_overwrite to overwrite"
)
else:
Expand Down
4 changes: 2 additions & 2 deletions olmo/data/memmap_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import torch
from torch.utils.data import Dataset

from olmo.exceptions import OlmoEnvironmentError
from olmo.exceptions import OLMoEnvironmentError

from ..aliases import PathOrStr
from ..util import _get_s3_client, file_size, get_bytes_range
Expand Down Expand Up @@ -93,7 +93,7 @@ def offsets(self) -> List[Tuple[int, int]]:
_get_s3_client("s3")
try:
_get_s3_client("r2")
except OlmoEnvironmentError:
except OLMoEnvironmentError:
# R2 might not be needed, so ignore this error. We will get an error
# later if R2 is needed.
pass
Expand Down
4 changes: 2 additions & 2 deletions olmo/eval/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from torchmetrics import MeanMetric, Metric

from ..config import EvaluatorConfig, EvaluatorType, TrainConfig
from ..exceptions import OlmoConfigurationError
from ..exceptions import OLMoConfigurationError
from ..tokenizer import Tokenizer
from ..torch_util import get_global_rank, get_world_size
from .downstream import ICLMetric, label_to_task_map
Expand Down Expand Up @@ -93,7 +93,7 @@ def make_metric():
elif eval_config.data.datasets:
eval_metric = {label: make_metric() for label in eval_config.data.datasets.keys()}
else:
raise OlmoConfigurationError("One of DataConfig.paths or DataConfig.datasets is required")
raise OLMoConfigurationError("One of DataConfig.paths or DataConfig.datasets is required")

return Evaluator(
label=eval_config.label,
Expand Down
14 changes: 7 additions & 7 deletions olmo/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,37 @@
__all__ = ["OlmoError", "OlmoConfigurationError", "OlmoCliError", "OlmoEnvironmentError", "OlmoNetworkError"]
__all__ = ["OLMoError", "OLMoConfigurationError", "OLMoCliError", "OLMoEnvironmentError", "OLMoNetworkError"]


class OlmoError(Exception):
class OLMoError(Exception):
"""
Base class for all custom OLMo exceptions.
"""


class OlmoConfigurationError(OlmoError):
class OLMoConfigurationError(OLMoError):
"""
An error with a configuration file.
"""


class OlmoCliError(OlmoError):
class OLMoCliError(OLMoError):
"""
An error from incorrect CLI usage.
"""


class OlmoEnvironmentError(OlmoError):
class OLMoEnvironmentError(OLMoError):
"""
An error from incorrect environment variables.
"""


class OlmoNetworkError(OlmoError):
class OLMoNetworkError(OLMoError):
"""
An error with a network request.
"""


class OlmoThreadError(Exception):
class OLMoThreadError(Exception):
"""
Raised when a thread fails.
"""
Loading
Loading