Skip to content

Commit

Permalink
feat: adjust models to properly support MySQL
Browse files Browse the repository at this point in the history
This codebase was originally developed and tested using only SQLite. In
this commit, we're adding proper support for MySQL. In particular:

* The size of ``key`` fields and titles were reduced from 1000 to 500.
  This is to accommodate MySQL's index size limit (3072 bytes which
  translates to 768 unicode code points in 4-byte encoding). Saving a
  little headroom for future compound indexes.
* fields.py now has helper classes to support multiple collations so
  that we can specify utf8mb4 for the charset in MySQL.
* fields.py now has helper functions that allow us to normalize case
  sensitivity across databases. By default, fields would otherwise be
  case sensitive in SQLite and case insensitive in MySQL. This is
  important for correctness, since ``key`` fields are meant to be be
  case sensitive for the purposes of uniqueness constraints.
  • Loading branch information
ormsbee committed Jun 20, 2023
1 parent f1b8579 commit 084ec21
Show file tree
Hide file tree
Showing 10 changed files with 425 additions and 559 deletions.
149 changes: 38 additions & 111 deletions openedx_learning/core/components/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -1,153 +1,80 @@
# Generated by Django 3.2.18 on 2023-05-11 02:07
# Generated by Django 3.2.19 on 2023-06-15 14:43

from django.db import migrations, models
import django.db.models.deletion
import openedx_learning.lib.fields
import uuid


class Migration(migrations.Migration):

initial = True

dependencies = [
("oel_publishing", "0001_initial"),
("oel_contents", "0001_initial"),
('oel_publishing', '0001_initial'),
('oel_contents', '0001_initial'),
]

operations = [
migrations.CreateModel(
name="Component",
name='Component',
fields=[
(
"publishable_entity",
models.OneToOneField(
on_delete=django.db.models.deletion.CASCADE,
primary_key=True,
serialize=False,
to="oel_publishing.publishableentity",
),
),
("namespace", models.CharField(max_length=100)),
("type", models.CharField(blank=True, max_length=100)),
("local_key", models.CharField(max_length=255)),
(
"learning_package",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="oel_publishing.learningpackage",
),
),
('publishable_entity', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, primary_key=True, serialize=False, to='oel_publishing.publishableentity')),
('namespace', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=100)),
('type', openedx_learning.lib.fields.MultiCollationCharField(blank=True, db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=100)),
('local_key', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=500)),
('learning_package', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.learningpackage')),
],
options={
"verbose_name": "Component",
"verbose_name_plural": "Components",
'verbose_name': 'Component',
'verbose_name_plural': 'Components',
},
),
migrations.CreateModel(
name="ComponentVersion",
name='ComponentVersion',
fields=[
(
"publishable_entity_version",
models.OneToOneField(
on_delete=django.db.models.deletion.CASCADE,
primary_key=True,
serialize=False,
to="oel_publishing.publishableentityversion",
),
),
(
"component",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="versions",
to="oel_components.component",
),
),
('publishable_entity_version', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, primary_key=True, serialize=False, to='oel_publishing.publishableentityversion')),
('component', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='versions', to='oel_components.component')),
],
options={
"verbose_name": "Component Version",
"verbose_name_plural": "Component Versions",
'verbose_name': 'Component Version',
'verbose_name_plural': 'Component Versions',
},
),
migrations.CreateModel(
name="ComponentVersionRawContent",
name='ComponentVersionRawContent',
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"uuid",
models.UUIDField(
default=uuid.uuid4,
editable=False,
unique=True,
verbose_name="UUID",
),
),
("key", models.CharField(max_length=255)),
("learner_downloadable", models.BooleanField(default=False)),
(
"component_version",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="oel_components.componentversion",
),
),
(
"raw_content",
models.ForeignKey(
on_delete=django.db.models.deletion.RESTRICT,
to="oel_contents.rawcontent",
),
),
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('uuid', models.UUIDField(default=uuid.uuid4, editable=False, unique=True, verbose_name='UUID')),
('key', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=500)),
('learner_downloadable', models.BooleanField(default=False)),
('component_version', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_components.componentversion')),
('raw_content', models.ForeignKey(on_delete=django.db.models.deletion.RESTRICT, to='oel_contents.rawcontent')),
],
),
migrations.AddField(
model_name="componentversion",
name="raw_contents",
field=models.ManyToManyField(
related_name="component_versions",
through="oel_components.ComponentVersionRawContent",
to="oel_contents.RawContent",
),
model_name='componentversion',
name='raw_contents',
field=models.ManyToManyField(related_name='component_versions', through='oel_components.ComponentVersionRawContent', to='oel_contents.RawContent'),
),
migrations.AddIndex(
model_name="componentversionrawcontent",
index=models.Index(
fields=["raw_content", "component_version"],
name="oel_cvrawcontent_c_cv",
),
model_name='componentversionrawcontent',
index=models.Index(fields=['raw_content', 'component_version'], name='oel_cvrawcontent_c_cv'),
),
migrations.AddIndex(
model_name="componentversionrawcontent",
index=models.Index(
fields=["component_version", "raw_content"],
name="oel_cvrawcontent_cv_d",
),
model_name='componentversionrawcontent',
index=models.Index(fields=['component_version', 'raw_content'], name='oel_cvrawcontent_cv_d'),
),
migrations.AddConstraint(
model_name="componentversionrawcontent",
constraint=models.UniqueConstraint(
fields=("component_version", "key"), name="oel_cvrawcontent_uniq_cv_key"
),
model_name='componentversionrawcontent',
constraint=models.UniqueConstraint(fields=('component_version', 'key'), name='oel_cvrawcontent_uniq_cv_key'),
),
migrations.AddIndex(
model_name="component",
index=models.Index(
fields=["learning_package", "namespace", "type", "local_key"],
name="oel_component_idx_lc_ns_t_lk",
),
model_name='component',
index=models.Index(fields=['learning_package', 'namespace', 'type', 'local_key'], name='oel_component_idx_lc_ns_t_lk'),
),
migrations.AddConstraint(
model_name="component",
constraint=models.UniqueConstraint(
fields=("learning_package", "namespace", "type", "local_key"),
name="oel_component_uniq_lc_ns_t_lk",
),
model_name='component',
constraint=models.UniqueConstraint(fields=('learning_package', 'namespace', 'type', 'local_key'), name='oel_component_uniq_lc_ns_t_lk'),
),
]
10 changes: 7 additions & 3 deletions openedx_learning/core/components/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@
"""
from django.db import models

from openedx_learning.lib.fields import key_field, immutable_uuid_field
from openedx_learning.lib.fields import (
case_sensitive_char_field,
immutable_uuid_field,
key_field,
)
from ..publishing.models import LearningPackage
from ..publishing.model_mixins import (
PublishableEntityMixin,
Expand Down Expand Up @@ -77,13 +81,13 @@ class Component(PublishableEntityMixin):
# namespace and type work together to help figure out what Component needs
# to handle this data. A namespace is *required*. The namespace for XBlocks
# is "xblock.v1" (to match the setup.py entrypoint naming scheme).
namespace = models.CharField(max_length=100, null=False, blank=False)
namespace = case_sensitive_char_field(max_length=100, blank=False)

# type is a way to help sub-divide namespace if that's convenient. This
# field cannot be null, but it can be blank if it's not necessary. For an
# XBlock, type corresponds to tag, e.g. "video". It's also the block_type in
# the UsageKey.
type = models.CharField(max_length=100, null=False, blank=True)
type = case_sensitive_char_field(max_length=100, blank=True)

# local_key is an identifier that is local to the (namespace, type). The
# publishable.key should be calculated as a combination of (namespace, type,
Expand Down
112 changes: 42 additions & 70 deletions openedx_learning/core/contents/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -1,103 +1,75 @@
# Generated by Django 3.2.18 on 2023-05-11 02:07
# Generated by Django 3.2.19 on 2023-06-15 14:43

import django.core.validators
from django.db import migrations, models
import django.db.models.deletion
import openedx_learning.lib.fields
import openedx_learning.lib.validators


def use_compressed_table_format(apps, schema_editor):
"""
Use the COMPRESSED row format for TextContent if we're using MySQL.
This table will hold a lot of OLX, which compresses very well using MySQL's
built-in zlib compression. This is especially important because we're
keeping so much version history.
"""
if schema_editor.connection.vendor == 'mysql':
table_name = apps.get_model("oel_contents", "TextContent")._meta.db_table
sql = f"ALTER TABLE {table_name} ROW_FORMAT=COMPRESSED;"
schema_editor.execute(sql)


class Migration(migrations.Migration):

initial = True

dependencies = [
("oel_publishing", "0001_initial"),
('oel_publishing', '0001_initial'),
]

operations = [
migrations.CreateModel(
name="RawContent",
name='RawContent',
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("hash_digest", models.CharField(editable=False, max_length=40)),
("mime_type", models.CharField(max_length=255)),
(
"size",
models.PositiveBigIntegerField(
validators=[django.core.validators.MaxValueValidator(50000000)]
),
),
(
"created",
models.DateTimeField(
validators=[
openedx_learning.lib.validators.validate_utc_datetime
]
),
),
("file", models.FileField(null=True, upload_to="")),
(
"learning_package",
models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
to="oel_publishing.learningpackage",
),
),
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('hash_digest', models.CharField(editable=False, max_length=40)),
('mime_type', openedx_learning.lib.fields.MultiCollationCharField(db_collations={'mysql': 'utf8mb4_unicode_ci', 'sqlite': 'NOCASE'}, max_length=255)),
('size', models.PositiveBigIntegerField(validators=[django.core.validators.MaxValueValidator(50000000)])),
('created', models.DateTimeField(validators=[openedx_learning.lib.validators.validate_utc_datetime])),
('file', models.FileField(null=True, upload_to='')),
('learning_package', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='oel_publishing.learningpackage')),
],
options={
"verbose_name": "Raw Content",
"verbose_name_plural": "Raw Contents",
'verbose_name': 'Raw Content',
'verbose_name_plural': 'Raw Contents',
},
),
migrations.CreateModel(
name="TextContent",
name='TextContent',
fields=[
(
"raw_content",
models.OneToOneField(
on_delete=django.db.models.deletion.CASCADE,
primary_key=True,
related_name="text_content",
serialize=False,
to="oel_contents.rawcontent",
),
),
("text", models.TextField(blank=True, max_length=100000)),
("length", models.PositiveIntegerField()),
('raw_content', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, primary_key=True, related_name='text_content', serialize=False, to='oel_contents.rawcontent')),
('text', openedx_learning.lib.fields.MultiCollationTextField(blank=True, db_collations={'mysql': 'utf8mb4_bin', 'sqlite': 'BINARY'}, max_length=100000)),
('length', models.PositiveIntegerField()),
],
),
# Call out to custom code here to change row format for TextContent
migrations.RunPython(use_compressed_table_format, reverse_code=migrations.RunPython.noop, atomic=False),
migrations.AddIndex(
model_name="rawcontent",
index=models.Index(
fields=["learning_package", "mime_type"],
name="oel_content_idx_lp_mime_type",
),
model_name='rawcontent',
index=models.Index(fields=['learning_package', 'mime_type'], name='oel_content_idx_lp_mime_type'),
),
migrations.AddIndex(
model_name="rawcontent",
index=models.Index(
fields=["learning_package", "-size"], name="oel_content_idx_lp_rsize"
),
model_name='rawcontent',
index=models.Index(fields=['learning_package', '-size'], name='oel_content_idx_lp_rsize'),
),
migrations.AddIndex(
model_name="rawcontent",
index=models.Index(
fields=["learning_package", "-created"],
name="oel_content_idx_lp_rcreated",
),
model_name='rawcontent',
index=models.Index(fields=['learning_package', '-created'], name='oel_content_idx_lp_rcreated'),
),
migrations.AddConstraint(
model_name="rawcontent",
constraint=models.UniqueConstraint(
fields=("learning_package", "mime_type", "hash_digest"),
name="oel_content_uniq_lc_mime_type_hash_digest",
),
model_name='rawcontent',
constraint=models.UniqueConstraint(fields=('learning_package', 'mime_type', 'hash_digest'), name='oel_content_uniq_lc_mime_type_hash_digest'),
),
]
Loading

0 comments on commit 084ec21

Please sign in to comment.