diff --git a/Makefile b/Makefile index 15bab5df67a9..c70de65fb454 100644 --- a/Makefile +++ b/Makefile @@ -137,8 +137,6 @@ compile-requirements: pre-requirements $(COMMON_CONSTRAINTS_TXT) ## Re-compile * mv requirements/common_constraints.tmp requirements/common_constraints.txt sed 's/Django<4.0//g' requirements/common_constraints.txt > requirements/common_constraints.tmp mv requirements/common_constraints.tmp requirements/common_constraints.txt - sed 's/event-tracking<2.4.1//g' requirements/common_constraints.txt > requirements/common_constraints.tmp - mv requirements/common_constraints.tmp requirements/common_constraints.txt pip-compile -v --allow-unsafe ${COMPILE_OPTS} -o requirements/pip.txt requirements/pip.in pip install -r requirements/pip.txt diff --git a/requirements/common_constraints.txt b/requirements/common_constraints.txt index 9405a605c520..ef8bc86061b7 100644 --- a/requirements/common_constraints.txt +++ b/requirements/common_constraints.txt @@ -40,4 +40,4 @@ importlib-metadata<7 # We will pin event-tracking to do not break existing installations # This can be unpinned once https://github.com/openedx/edx-platform/issues/34586 # has been resolved and edx-platform is running with pymongo>=4.4.0 - +event-tracking<2.4.1 diff --git a/requirements/constraints.txt b/requirements/constraints.txt index ceb019231451..df36493234c5 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -32,13 +32,9 @@ django-oauth-toolkit==1.7.1 # incremental upgrade django-simple-history==3.4.0 -# Adding pin to avoid any major upgrade -pymongo<4.4.1 - -# To override the constraint of edx-lint -# This can be removed once https://github.com/openedx/edx-platform/issues/34586 is resolved -# and the upstream constraint in edx-lint has been removed. -event-tracking==3.0.0 +# constrained in opaque_keys. migration guide here: https://pymongo.readthedocs.io/en/4.0/migrate-to-pymongo4.html +# Major upgrade will be done in separate ticket. +pymongo<4.0.0 # greater version has breaking changes and requires some migration steps. django-webpack-loader==0.7.0 @@ -129,4 +125,4 @@ numpy<2.0.0 # django-storages==1.14.4 breaks course imports # Two lines were added in 1.14.4 that make file_exists_in_storage function always return False, # as the default value of AWS_S3_FILE_OVERWRITE is True -django-storages<1.14.4 +django-storages<1.14.4 \ No newline at end of file diff --git a/requirements/edx/base.txt b/requirements/edx/base.txt index 9fa27e14c5b1..b56b280405c5 100644 --- a/requirements/edx/base.txt +++ b/requirements/edx/base.txt @@ -376,10 +376,6 @@ djangorestframework==3.14.0 # super-csv djangorestframework-xml==2.0.0 # via edx-enterprise -dnspython==2.6.1 - # via - # -r requirements/edx/paver.txt - # pymongo done-xblock==2.3.0 # via -r requirements/edx/bundled.in drf-jwt==1.19.2 @@ -540,9 +536,9 @@ enmerkar==0.7.1 # via enmerkar-underscore enmerkar-underscore==2.3.0 # via -r requirements/edx/kernel.in -event-tracking==3.0.0 +event-tracking==2.4.0 # via - # -c requirements/edx/../constraints.txt + # -c requirements/edx/../common_constraints.txt # -r requirements/edx/kernel.in # edx-completion # edx-proctoring @@ -869,7 +865,7 @@ pylti1p3==2.0.0 # via -r requirements/edx/kernel.in pymemcache==4.0.0 # via -r requirements/edx/paver.txt -pymongo==4.4.0 +pymongo==3.13.0 # via # -c requirements/edx/../constraints.txt # -r requirements/edx/kernel.in diff --git a/requirements/edx/development.txt b/requirements/edx/development.txt index 942c780a0f48..360862385c34 100644 --- a/requirements/edx/development.txt +++ b/requirements/edx/development.txt @@ -614,10 +614,8 @@ djangorestframework-xml==2.0.0 # edx-enterprise dnspython==2.6.1 # via - # -r requirements/edx/doc.txt # -r requirements/edx/testing.txt # email-validator - # pymongo docutils==0.21.2 # via # -r requirements/edx/doc.txt @@ -855,9 +853,9 @@ enmerkar-underscore==2.3.0 # via # -r requirements/edx/doc.txt # -r requirements/edx/testing.txt -event-tracking==3.0.0 +event-tracking==2.4.0 # via - # -c requirements/edx/../constraints.txt + # -c requirements/edx/../common_constraints.txt # -r requirements/edx/doc.txt # -r requirements/edx/testing.txt # edx-completion @@ -1537,7 +1535,7 @@ pymemcache==4.0.0 # via # -r requirements/edx/doc.txt # -r requirements/edx/testing.txt -pymongo==4.4.0 +pymongo==3.13.0 # via # -c requirements/edx/../constraints.txt # -r requirements/edx/doc.txt diff --git a/requirements/edx/doc.txt b/requirements/edx/doc.txt index 0791a21fefe4..fcc073f09520 100644 --- a/requirements/edx/doc.txt +++ b/requirements/edx/doc.txt @@ -440,10 +440,6 @@ djangorestframework-xml==2.0.0 # via # -r requirements/edx/base.txt # edx-enterprise -dnspython==2.6.1 - # via - # -r requirements/edx/base.txt - # pymongo docutils==0.21.2 # via # pydata-sphinx-theme @@ -618,9 +614,9 @@ enmerkar==0.7.1 # enmerkar-underscore enmerkar-underscore==2.3.0 # via -r requirements/edx/base.txt -event-tracking==3.0.0 +event-tracking==2.4.0 # via - # -c requirements/edx/../constraints.txt + # -c requirements/edx/../common_constraints.txt # -r requirements/edx/base.txt # edx-completion # edx-proctoring @@ -1030,7 +1026,7 @@ pylti1p3==2.0.0 # via -r requirements/edx/base.txt pymemcache==4.0.0 # via -r requirements/edx/base.txt -pymongo==4.4.0 +pymongo==3.13.0 # via # -c requirements/edx/../constraints.txt # -r requirements/edx/base.txt diff --git a/requirements/edx/paver.txt b/requirements/edx/paver.txt index 0b82d71c91e6..bf7337f4147a 100644 --- a/requirements/edx/paver.txt +++ b/requirements/edx/paver.txt @@ -10,8 +10,6 @@ charset-normalizer==2.0.12 # via # -c requirements/edx/../constraints.txt # requests -dnspython==2.6.1 - # via pymongo edx-opaque-keys==2.10.0 # via -r requirements/edx/paver.in idna==3.7 @@ -38,7 +36,7 @@ psutil==6.0.0 # via -r requirements/edx/paver.in pymemcache==4.0.0 # via -r requirements/edx/paver.in -pymongo==4.4.0 +pymongo==3.13.0 # via # -c requirements/edx/../constraints.txt # -r requirements/edx/paver.in diff --git a/requirements/edx/testing.txt b/requirements/edx/testing.txt index 6800f096e9ab..caa5d3c6f9db 100644 --- a/requirements/edx/testing.txt +++ b/requirements/edx/testing.txt @@ -476,10 +476,7 @@ djangorestframework-xml==2.0.0 # -r requirements/edx/base.txt # edx-enterprise dnspython==2.6.1 - # via - # -r requirements/edx/base.txt - # email-validator - # pymongo + # via email-validator done-xblock==2.3.0 # via -r requirements/edx/base.txt drf-jwt==1.19.2 @@ -653,9 +650,9 @@ enmerkar==0.7.1 # enmerkar-underscore enmerkar-underscore==2.3.0 # via -r requirements/edx/base.txt -event-tracking==3.0.0 +event-tracking==2.4.0 # via - # -c requirements/edx/../constraints.txt + # -c requirements/edx/../common_constraints.txt # -r requirements/edx/base.txt # edx-completion # edx-proctoring @@ -1144,7 +1141,7 @@ pylti1p3==2.0.0 # via -r requirements/edx/base.txt pymemcache==4.0.0 # via -r requirements/edx/base.txt -pymongo==4.4.0 +pymongo==3.13.0 # via # -c requirements/edx/../constraints.txt # -r requirements/edx/base.txt diff --git a/scripts/structures_pruning/requirements/base.txt b/scripts/structures_pruning/requirements/base.txt index 828a81a8d4ed..87aa858e9f8e 100644 --- a/scripts/structures_pruning/requirements/base.txt +++ b/scripts/structures_pruning/requirements/base.txt @@ -11,13 +11,11 @@ click==8.1.6 # click-log click-log==0.4.0 # via -r scripts/structures_pruning/requirements/base.in -dnspython==2.6.1 - # via pymongo edx-opaque-keys==2.10.0 # via -r scripts/structures_pruning/requirements/base.in pbr==6.0.0 # via stevedore -pymongo==4.4.0 +pymongo==3.13.0 # via # -c scripts/structures_pruning/requirements/../../../requirements/constraints.txt # -r scripts/structures_pruning/requirements/base.in diff --git a/scripts/structures_pruning/requirements/testing.txt b/scripts/structures_pruning/requirements/testing.txt index d74b204fad5c..2590ca8ca52b 100644 --- a/scripts/structures_pruning/requirements/testing.txt +++ b/scripts/structures_pruning/requirements/testing.txt @@ -12,10 +12,6 @@ click-log==0.4.0 # via -r scripts/structures_pruning/requirements/base.txt ddt==1.7.2 # via -r scripts/structures_pruning/requirements/testing.in -dnspython==2.6.1 - # via - # -r scripts/structures_pruning/requirements/base.txt - # pymongo edx-opaque-keys==2.10.0 # via -r scripts/structures_pruning/requirements/base.txt iniconfig==2.0.0 @@ -28,7 +24,7 @@ pbr==6.0.0 # stevedore pluggy==1.5.0 # via pytest -pymongo==4.4.0 +pymongo==3.13.0 # via # -r scripts/structures_pruning/requirements/base.txt # edx-opaque-keys diff --git a/xmodule/contentstore/mongo.py b/xmodule/contentstore/mongo.py index e44f03cede05..66d9474cde7c 100644 --- a/xmodule/contentstore/mongo.py +++ b/xmodule/contentstore/mongo.py @@ -3,7 +3,6 @@ """ -import hashlib import json import os @@ -41,29 +40,16 @@ def __init__( # GridFS will throw an exception if the Database is wrapped in a MongoProxy. So don't wrap it. # The appropriate methods below are marked as autoretry_read - those methods will handle # the AutoReconnect errors. - self.connection_params = { - 'db': db, - 'host': host, - 'port': port, - 'tz_aware': tz_aware, - 'user': user, - 'password': password, - 'proxy': False, - **kwargs - } - self.bucket = bucket - self.do_connection() - - def do_connection(self): - """ - Connects to mongodb. - """ - mongo_db = connect_to_mongodb(**self.connection_params) + proxy = False + mongo_db = connect_to_mongodb( + db, host, + port=port, tz_aware=tz_aware, user=user, password=password, proxy=proxy, **kwargs + ) - self.fs = gridfs.GridFS(mongo_db, self.bucket) # pylint: disable=invalid-name + self.fs = gridfs.GridFS(mongo_db, bucket) # pylint: disable=invalid-name - self.fs_files = mongo_db[self.bucket + ".files"] # the underlying collection GridFS uses - self.chunks = mongo_db[self.bucket + ".chunks"] + self.fs_files = mongo_db[bucket + ".files"] # the underlying collection GridFS uses + self.chunks = mongo_db[bucket + ".chunks"] def close_connections(self): """ @@ -71,25 +57,6 @@ def close_connections(self): """ self.fs_files.database.client.close() - def ensure_connection(self): - """ - Ensure that mongodb connection is open. - """ - if self.check_connection(): - return - self.do_connection() - - def check_connection(self): - """ - Check if mongodb connection is open or not. - """ - connection = self.fs_files.database.client - try: - connection.admin.command('ping') - return True - except pymongo.errors.InvalidOperation: - return False - def _drop_database(self, database=True, collections=True, connections=True): """ A destructive operation to drop the underlying database and close all connections. @@ -102,8 +69,8 @@ def _drop_database(self, database=True, collections=True, connections=True): If connections is True, then close the connection to the database as well. """ - self.ensure_connection() connection = self.fs_files.database.client + if database: connection.drop_database(self.fs_files.database.name) elif collections: @@ -136,22 +103,16 @@ def save(self, content): # but many more objects have this in python3 and shouldn't be using the chunking logic. For string and # byte streams we write them directly to gridfs and convert them to byetarrys if necessary. if hasattr(content.data, '__iter__') and not isinstance(content.data, (bytes, (str,))): - custom_md5 = hashlib.md5() for chunk in content.data: fp.write(chunk) - custom_md5.update(chunk) - fp.custom_md5 = custom_md5.hexdigest() else: # Ideally we could just ensure that we don't get strings in here and only byte streams # but being confident of that wolud be a lot more work than we have time for so we just # handle both cases here. if isinstance(content.data, str): - encoded_data = content.data.encode('utf-8') - fp.write(encoded_data) - fp.custom_md5 = hashlib.md5(encoded_data).hexdigest() + fp.write(content.data.encode('utf-8')) else: fp.write(content.data) - fp.custom_md5 = hashlib.md5(content.data).hexdigest() return content @@ -181,13 +142,12 @@ def find(self, location, throw_on_not_found=True, as_stream=False): # lint-amne 'thumbnail', thumbnail_location[4] ) - return StaticContentStream( location, fp.displayname, fp.content_type, fp, last_modified_at=fp.uploadDate, thumbnail_location=thumbnail_location, import_path=getattr(fp, 'import_path', None), length=fp.length, locked=getattr(fp, 'locked', False), - content_digest=getattr(fp, 'custom_md5', None), + content_digest=getattr(fp, 'md5', None), ) else: with self.fs.get(content_id) as fp: @@ -201,13 +161,12 @@ def find(self, location, throw_on_not_found=True, as_stream=False): # lint-amne 'thumbnail', thumbnail_location[4] ) - return StaticContent( location, fp.displayname, fp.content_type, fp.read(), last_modified_at=fp.uploadDate, thumbnail_location=thumbnail_location, import_path=getattr(fp, 'import_path', None), length=fp.length, locked=getattr(fp, 'locked', False), - content_digest=getattr(fp, 'custom_md5', None), + content_digest=getattr(fp, 'md5', None), ) except NoFile: if throw_on_not_found: # lint-amnesty, pylint: disable=no-else-raise diff --git a/xmodule/modulestore/mongo/base.py b/xmodule/modulestore/mongo/base.py index 16a8c134c1d6..c5cc935861d2 100644 --- a/xmodule/modulestore/mongo/base.py +++ b/xmodule/modulestore/mongo/base.py @@ -473,9 +473,30 @@ def __init__(self, contentstore, doc_store_config, fs_root, render_template, super().__init__(contentstore=contentstore, **kwargs) - self.doc_store_config = doc_store_config - self.retry_wait_time = retry_wait_time - self.do_connection(**self.doc_store_config) + def do_connection( + db, collection, host, port=27017, tz_aware=True, user=None, password=None, asset_collection=None, **kwargs + ): + """ + Create & open the connection, authenticate, and provide pointers to the collection + """ + # Set a write concern of 1, which makes writes complete successfully to the primary + # only before returning. Also makes pymongo report write errors. + kwargs['w'] = 1 + + self.database = connect_to_mongodb( + db, host, + port=port, tz_aware=tz_aware, user=user, password=password, + retry_wait_time=retry_wait_time, **kwargs + ) + + self.collection = self.database[collection] + + # Collection which stores asset metadata. + if asset_collection is None: + asset_collection = self.DEFAULT_ASSET_COLLECTION_NAME + self.asset_collection = self.database[asset_collection] + + do_connection(**doc_store_config) if default_class is not None: module_path, _, class_name = default_class.rpartition('.') @@ -502,48 +523,6 @@ def __init__(self, contentstore, doc_store_config, fs_root, render_template, self._course_run_cache = {} self.signal_handler = signal_handler - def check_connection(self): - """ - Check if mongodb connection is open or not. - """ - try: - # The ismaster command is cheap and does not require auth. - self.database.client.admin.command('ismaster') - return True - except pymongo.errors.InvalidOperation: - return False - - def ensure_connection(self): - """ - Ensure that mongodb connection is open. - """ - if self.check_connection(): - return - self.do_connection(**self.doc_store_config) - - def do_connection( - self, db, collection, host, port=27017, tz_aware=True, user=None, password=None, asset_collection=None, **kwargs - ): - """ - Create & open the connection, authenticate, and provide pointers to the collection - """ - # Set a write concern of 1, which makes writes complete successfully to the primary - # only before returning. Also makes pymongo report write errors. - kwargs['w'] = 1 - - self.database = connect_to_mongodb( - db, host, - port=port, tz_aware=tz_aware, user=user, password=password, - retry_wait_time=self.retry_wait_time, **kwargs - ) - - self.collection = self.database[collection] - - # Collection which stores asset metadata. - if asset_collection is None: - asset_collection = self.DEFAULT_ASSET_COLLECTION_NAME - self.asset_collection = self.database[asset_collection] - def close_connections(self): """ Closes any open connections to the underlying database @@ -562,7 +541,6 @@ def _drop_database(self, database=True, collections=True, connections=True): If connections is True, then close the connection to the database as well. """ - self.ensure_connection() # drop the assets super()._drop_database(database, collections, connections) @@ -894,8 +872,6 @@ def has_course(self, course_key, ignore_case=False, **kwargs): # lint-amnesty, course_query[key] = re.compile(r"(?i)^{}$".format(course_query[key])) else: course_query = {'_id': location.to_deprecated_son()} - - self.ensure_connection() course = self.collection.find_one(course_query, projection={'_id': True}) if course: return CourseKey.from_string('/'.join([ diff --git a/xmodule/modulestore/split_mongo/mongo_connection.py b/xmodule/modulestore/split_mongo/mongo_connection.py index 9c00b0c22fc8..bfb20fe0f5d5 100644 --- a/xmodule/modulestore/split_mongo/mongo_connection.py +++ b/xmodule/modulestore/split_mongo/mongo_connection.py @@ -279,30 +279,20 @@ def __init__( #make sure the course index cache is fresh. RequestCache(namespace="course_index_cache").clear() - self.collection = collection - self.connection_params = { - 'db': db, - 'host': host, - 'port': port, - 'tz_aware': tz_aware, - 'user': user, - 'password': password, - 'retry_wait_time': retry_wait_time, - **kwargs - } - - self.do_connection() + self.database = connect_to_mongodb( + db, host, + port=port, tz_aware=tz_aware, user=user, password=password, + retry_wait_time=retry_wait_time, **kwargs + ) + + self.course_index = self.database[collection + '.active_versions'] + self.structures = self.database[collection + '.structures'] + self.definitions = self.database[collection + '.definitions'] # Is the MySQL subclass in use, passing through some reads/writes to us? If so this will be True. # If this MongoPersistenceBackend is being used directly (only MongoDB is involved), this is False. self.with_mysql_subclass = with_mysql_subclass - def do_connection(self): - self.database = connect_to_mongodb(**self.connection_params) - self.course_index = self.database[self.collection + '.active_versions'] - self.structures = self.database[self.collection + '.structures'] - self.definitions = self.database[self.collection + '.definitions'] - def heartbeat(self): """ Check that the db is reachable. @@ -314,24 +304,6 @@ def heartbeat(self): except pymongo.errors.ConnectionFailure: raise HeartbeatFailure(f"Can't connect to {self.database.name}", 'mongo') # lint-amnesty, pylint: disable=raise-missing-from - def check_connection(self): - """ - Check if mongodb connection is open or not. - """ - try: - self.database.client.admin.command("ping") - return True - except pymongo.errors.InvalidOperation: - return False - - def ensure_connection(self): - """ - Ensure that mongodb connection is open. - """ - if self.check_connection(): - return - self.do_connection() - def get_structure(self, key, course_context=None): """ Get the structure from the persistence mechanism whose id is the given key. @@ -530,7 +502,6 @@ def delete_course_index(self, course_key): """ Delete the course_index from the persistence mechanism whose id is the given course_index """ - self.ensure_connection() with TIMER.timer("delete_course_index", course_key): query = { key_attr: getattr(course_key, key_attr) @@ -590,8 +561,7 @@ def close_connections(self): Closes any open connections to the underlying databases """ RequestCache(namespace="course_index_cache").clear() - if self.check_connection(): - self.database.client.close() + self.database.client.close() def _drop_database(self, database=True, collections=True, connections=True): """ @@ -606,8 +576,6 @@ def _drop_database(self, database=True, collections=True, connections=True): If connections is True, then close the connection to the database as well. """ RequestCache(namespace="course_index_cache").clear() - - self.ensure_connection() connection = self.database.client if database: diff --git a/xmodule/modulestore/tests/test_mixed_modulestore.py b/xmodule/modulestore/tests/test_mixed_modulestore.py index 8adbfcb911a4..91292cd88d71 100644 --- a/xmodule/modulestore/tests/test_mixed_modulestore.py +++ b/xmodule/modulestore/tests/test_mixed_modulestore.py @@ -156,8 +156,8 @@ def setUp(self): tz_aware=True, ) self.connection.drop_database(self.DB) - self.addCleanup(self._drop_database) - self.addCleanup(self._close_connection) + self.addCleanup(self.connection.drop_database, self.DB) + self.addCleanup(self.connection.close) # define attrs which get set in initdb to quell pylint self.writable_chapter_location = self.store = self.fake_location = None @@ -165,43 +165,6 @@ def setUp(self): self.user_id = ModuleStoreEnum.UserID.test - def _check_connection(self): - """ - Check mongodb connection is open or not. - """ - try: - self.connection.admin.command('ping') - return True - except pymongo.errors.InvalidOperation: - return False - - def _ensure_connection(self): - """ - Make sure that mongodb connection is open. - """ - if not self._check_connection(): - self.connection = pymongo.MongoClient( - host=self.HOST, - port=self.PORT, - tz_aware=True, - ) - - def _drop_database(self): - """ - Drop mongodb database. - """ - self._ensure_connection() - self.connection.drop_database(self.DB) - - def _close_connection(self): - """ - Close mongodb connection. - """ - try: - self.connection.close() - except pymongo.errors.InvalidOperation: - pass - def _create_course(self, course_key, asides=None): """ Create a course w/ one item in the persistence store using the given course & item location. diff --git a/xmodule/mongo_utils.py b/xmodule/mongo_utils.py index ad9ddcdc248b..5daeff034e99 100644 --- a/xmodule/mongo_utils.py +++ b/xmodule/mongo_utils.py @@ -51,31 +51,27 @@ def connect_to_mongodb( if read_preference is not None: kwargs['read_preference'] = read_preference - if 'replicaSet' in kwargs and kwargs['replicaSet'] == '': - kwargs['replicaSet'] = None - - connection_params = { - 'host': host, - 'port': port, - 'tz_aware': tz_aware, - 'document_class': dict, - 'directConnection': True, - **kwargs, - } - - if user is not None and password is not None and not db.startswith('test_'): - connection_params.update({'username': user, 'password': password, 'authSource': db}) - - mongo_conn = pymongo.MongoClient(**connection_params) + mongo_conn = pymongo.database.Database( + pymongo.MongoClient( + host=host, + port=port, + tz_aware=tz_aware, + document_class=dict, + **kwargs + ), + db + ) if proxy: mongo_conn = MongoProxy( - mongo_conn[db], + mongo_conn, wait_time=retry_wait_time ) - return mongo_conn + # If credentials were provided, authenticate the user. + if user is not None and password is not None: + mongo_conn.authenticate(user, password, source=auth_source) - return mongo_conn[db] + return mongo_conn def create_collection_index(