diff --git a/Pipfile b/Pipfile index 680b6116..66b0686b 100644 --- a/Pipfile +++ b/Pipfile @@ -22,7 +22,7 @@ flask = "~=3.0" flask-apscheduler = "~=1.13" gunicorn = "~=21.2" more-itertools = "~=10.2" -mysql-connector-python = "~=8.1" +mysql-connector-python = "~=8.3" pandas = "~=2.0" pika = "~=1.3" pymongo = "~=4.6" diff --git a/Pipfile.lock b/Pipfile.lock index aeb12e24..aa9b33b3 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "f130ca89c2175aa102884ff4a28186f21a3de6b94f64fde85ee5e647fec0b99a" + "sha256": "9a413f0b17fb546083fac8ae38461e20640761502cc2609db1597d43a8ca2198" }, "pipfile-spec": 6, "requires": { @@ -824,31 +824,36 @@ }, "mysql-connector-python": { "hashes": [ - "sha256:1d292d8ee08a3e0103f3e9aba81f292c12c244c1d5345168375bf6842b2b3e9b", - "sha256:22259479bdba53e0eb64325d5d97d7ecae35e729a8b39822c16729476ba2cd92", - "sha256:40e3b5d60dbc5446ccd1901eb3db4c9b1ad07a490f4e0fc96e7ac53a6fe9b7bc", - "sha256:4f58e1d41b3ba04281099884421c3a63589204b2cf40e473c5dbebbc26971017", - "sha256:556f83cfc3e1f3e3db0080bb19ca50650815551ef7e10518053fe2ba76d2e59e", - "sha256:5f1cdf9d3ed2f3c70a91c4b94d3ab3b2890c477a61b4085bb89d2275c50e1e3d", - "sha256:64f4d7f7c60e2fe5a01d9e7915634d05a350bd866332ea4eab281b29db190e2b", - "sha256:76432810ee58abda81e0a34666f86aba69a7ded58f164a1d19835dba70b1451b", - "sha256:78a43eeb2b845986dc769ab611e3c480991c8037c94aee67ee6842d1f7e961a9", - "sha256:7abe27b88ae1354433713fc289ed663cc04f7b75ea818a2b18067c3a736b0975", - "sha256:7f1cbc06411115c318af775291386ed1494302e05b5c45da73f01593c0827bc1", - "sha256:8062c11619bc48baf9ada3f85d85832aec5e57359eb11ed9e7d75ddcce81721a", - "sha256:816b46c9dc250a0e59d11a902e187be190dae992a2b802512b888930db380487", - "sha256:89151683654748f4de59cd4cf04102b6180976923f1db9921b87cff2c2da7b1d", - "sha256:90adf52cdb25a7adc18da0cc6c5454709598af0e87343e39ab2c11d7dc5aaf34", - "sha256:9cc256eaf55a7dd706ae514bebb863d3e426bd12686b88e284bdad6c37bf6dde", - "sha256:dd0005e9b7e54c700745416ecf26435c69687d5234eef9ff11cf85a7d76945e2", - "sha256:e3a2c2b398af1c5f0cbc1b1935f98bb5a7da4f178b2e73f0851872fbb37e66cc", - "sha256:f2ac6d43fc5a01e574fcc2a6b732cc2374e6ce576ee039cc2051f18d2daafd6d", - "sha256:f36a1a308d0d0d6202fea2e51741b5265f60206be331cfbb32f2f5bf62a20359", - "sha256:f8160777788c7e561f9d7f93eff45776031c83aa2f02a0d8c138d8f9c9bd088e" + "sha256:0deb38f05057e12af091a48e03a1ff00e213945880000f802879fae5665e7502", + "sha256:125714c998a697592bc56cce918a1acc58fadc510a7f588dbef3e53a1920e086", + "sha256:1db5b48b4ff7d24344217ed2418b162c7677eec86ab9766dc0e5feae39c90974", + "sha256:201e609159b84a247be87b76f5deb79e8c6b368e91f043790e62077f13f3fed8", + "sha256:27f8be2087627366a44a6831ec68b568c98dbf0f4ceff24682d90c21db6e0f1f", + "sha256:4be4165e4cd5acb4659261ddc74e9164d2dfa0d795d5695d52f2bf39ea0762fa", + "sha256:51d97bf771519829797556718d81e8b9bdcd0a00427740ca57c085094c8bde17", + "sha256:55cb57d8098c721abce20fdef23232663977c0e5c87a4d0f9f73466f32c7d168", + "sha256:5718e426cf67f041772d4984f709052201883f74190ba6feaddce5cbd3b99e6f", + "sha256:5e2c86c60be08c71bae755d811fe8b89ec4feb8117ec3440ebc6c042dd6f06bc", + "sha256:5f707a9b040ad4700fc447ba955c78b08f2dd5affde37ac2401918f7b6daaba3", + "sha256:73ee8bc5f9626c42b37342a91a825cddb3461f6bfbbd6524d8ccfd3293aaa088", + "sha256:77bae496566d3da77bb0e938d89243103d20ee41633f626a47785470451bf45c", + "sha256:7f4f5fa844c19ee3a78c4606f6e138b06829e75469592d90246a290c7befc322", + "sha256:85fa878fdd6accaeb7d609bd2637c2cfa61592e7f9bdbdc0da18b2fa998d3d5a", + "sha256:9302d774025e76a0fac46bfeea8854b3d6819715a6a16ff23bfcda04218a76b7", + "sha256:b2901391b651d60dab3cc8985df94976fc1ea59fa7324c5b19d0a4177914c8dd", + "sha256:c57d02fd6c28be444487e7905ede09e3fecb18377cf82908ca262826369d3401", + "sha256:de0f2f2baa9e091ca8bdc4a091f874f9cd0b84b256389596adb0e032a05fe9f9", + "sha256:de5c3ee89d9276356f93df003949d3ba4c486f32fec9ec9fd7bc0caab124d89c", + "sha256:de74055944b214bff56e1752ec213d705c421414c67a250fb695af0c5c214135", + "sha256:e4ff23aa8036b4c5b6463fa81398bb5a528a29f99955de6ba937f0bba57a2fe3", + "sha256:e868ccc7ad9fbc242546db04673d89cee87d12b8139affd114524553df4e5d6a", + "sha256:ec6dc3434a7deef74ab04e8978f6c5e181866a5423006c1b5aec5390a189d28d", + "sha256:f4ee7e07cca6b744874d60d6b0b24817d9246eb4e8d7269b7ddbe68763a0bd13", + "sha256:f7acacdf9fd4260702f360c00952ad9a9cc73e8b7475e0d0c973c085a3dd7b7d" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==8.1.0" + "version": "==8.3.0" }, "numpy": { "hashes": [ diff --git a/crawler/db/mysql.py b/crawler/db/mysql.py index 8432719e..6323576b 100644 --- a/crawler/db/mysql.py +++ b/crawler/db/mysql.py @@ -1,10 +1,11 @@ import logging from datetime import datetime from itertools import islice -from typing import Any, Dict, Generator, Iterable, List, cast +from typing import Any, Dict, Generator, Iterable, List, cast, Sequence import mysql.connector as mysql import sqlalchemy +from mysql.connector.types import MySQLConvertibleType from mysql.connector.connection_cext import MySQLConnectionAbstract from mysql.connector.cursor_cext import MySQLCursorAbstract from sqlalchemy.engine.base import Engine @@ -65,7 +66,7 @@ def create_mysql_connection(config: Config, readonly: bool = True) -> MySQLConne def run_mysql_executemany_query( - mysql_conn: MySQLConnectionAbstract, sql_query: str, values: List[Dict[str, str]] + mysql_conn: MySQLConnectionAbstract, sql_query: str, values: Sequence[Dict[str, MySQLConvertibleType]] ) -> None: """Writes the sample testing information into the MLWH. @@ -223,7 +224,9 @@ def partition(iterable: Iterable, partition_size: int) -> Generator[List[Any], N yield part -def reset_is_current_flags(cursor: MySQLCursorAbstract, rna_ids: List[str], chunk_size: int = 1000) -> None: +def reset_is_current_flags( + cursor: MySQLCursorAbstract, rna_ids: List[MySQLConvertibleType], chunk_size: int = 1000 +) -> None: """Receives a cursor with an active connection and a list of rna_ids and runs an update resetting any is_current flags to false for all the specified rna ids in groups of chunk_size. diff --git a/crawler/helpers/general_helpers.py b/crawler/helpers/general_helpers.py index 3491b523..ddd953d9 100644 --- a/crawler/helpers/general_helpers.py +++ b/crawler/helpers/general_helpers.py @@ -5,7 +5,8 @@ from datetime import datetime from decimal import Decimal from http import HTTPStatus -from typing import Any, Dict, Iterable, List, Optional +from typing import Any, Dict, Iterable, List, Optional, Sequence +from mysql.connector.types import MySQLConvertibleType import pysftp import requests @@ -268,7 +269,7 @@ def map_mongo_sample_to_mysql(doc: SampleDoc, copy_date: bool = False) -> Dict[s return value -def set_is_current_on_mysql_samples(samples: Iterable[Dict[str, str]]) -> List[Dict[str, str]]: +def set_is_current_on_mysql_samples(samples: Iterable[Dict[str, str]]) -> Sequence[Dict[str, MySQLConvertibleType]]: """Creates a copy of the samples passed in, adding is_current values to each sample. is_current will be True for all samples unless there is a repeated RNA ID, in which case only the last one is set to True. diff --git a/migrations/back_populate_source_plate_and_sample_uuids.py b/migrations/back_populate_source_plate_and_sample_uuids.py index f463ce72..e7f76bfb 100644 --- a/migrations/back_populate_source_plate_and_sample_uuids.py +++ b/migrations/back_populate_source_plate_and_sample_uuids.py @@ -7,6 +7,7 @@ from typing import Dict, List, Optional, cast from uuid import uuid4 +from mysql.connector.types import MySQLConvertibleType, RowType from mysql.connector.connection_cext import MySQLConnectionAbstract from pymongo.collection import Collection from pymongo.database import Database @@ -366,7 +367,7 @@ def prepare_mlwh_row(sample_doc): existing_samples = mysql_generator_from_connection(mysql_conn, query) log_mlwh_sample_fields("Before update", existing_samples) - row_data = [cast(Dict[str, str], row) for row in update_rows] + row_data = [cast(Dict[str, MySQLConvertibleType], row) for row in update_rows] run_mysql_executemany_query(mysql_conn, SQL_MLWH_UPDATE_SAMPLE_UUID_PLATE_UUID, row_data) # Log the new fields on the MLWH samples @@ -394,7 +395,7 @@ def mlwh_count_samples_from_mongo_ids(mysql_conn: MySQLConnectionAbstract, mongo if result is None: raise Exception("Query result was not valid") - return cast(int, result[0]) + return cast(int, cast(RowType, result)[0]) else: raise Exception("Cannot connect mysql") diff --git a/migrations/helpers/shared_helper.py b/migrations/helpers/shared_helper.py index 18023a3f..7f0db3fa 100644 --- a/migrations/helpers/shared_helper.py +++ b/migrations/helpers/shared_helper.py @@ -6,9 +6,10 @@ from contextlib import closing from csv import DictReader from datetime import datetime -from typing import Any, Dict, Iterator, List, Optional +from typing import Any, Dict, Iterator, List, Optional, cast from mysql.connector.connection_cext import MySQLConnectionAbstract +from mysql.connector.types import RowItemType from pymongo.collection import Collection from crawler.constants import ( @@ -117,11 +118,13 @@ def valid_filepath(s_filepath: str) -> bool: return False -def mysql_generator_from_connection(connection: MySQLConnectionAbstract, query: str) -> Iterator[Dict[str, Any]]: +def mysql_generator_from_connection( + connection: MySQLConnectionAbstract, query: str +) -> Iterator[Dict[str, RowItemType]]: with closing(connection.cursor(dictionary=True, buffered=False)) as cursor: cursor.execute(query) for row in cursor.fetchall(): - yield row + yield cast(Dict[str, RowItemType], row) def mysql_generator_from_config(config: Config, query: str) -> Iterator[Dict[str, Any]]: diff --git a/tests/db/test_mysql.py b/tests/db/test_mysql.py index ee625740..ebc8cd35 100644 --- a/tests/db/test_mysql.py +++ b/tests/db/test_mysql.py @@ -28,10 +28,29 @@ class MockMySQLConnection(MySQLConnectionAbstract): _execute_query = MagicMock() _open_connection = MagicMock() close = MagicMock() + cmd_change_user = MagicMock() + cmd_debug = MagicMock() + cmd_init_db = MagicMock() + cmd_ping = MagicMock() + cmd_process_kill = MagicMock() + cmd_query = MagicMock() + cmd_query_iter = MagicMock() + cmd_quit = MagicMock() + cmd_refresh = MagicMock() + cmd_reset_connection = MagicMock() + cmd_shutdown = MagicMock() + cmd_statistics = MagicMock() + cmd_stmt_close = MagicMock() + cmd_stmt_execute = MagicMock() + cmd_stmt_prepare = MagicMock() + cmd_stmt_reset = MagicMock() + cmd_stmt_send_long_data = MagicMock() commit = MagicMock() + connection_id = MagicMock() cursor = MagicMock() database = MagicMock() disconnect = MagicMock() + get_row = MagicMock() get_rows = MagicMock() in_transaction = MagicMock() info_query = MagicMock() diff --git a/tests/file_processing/test_file_processing.py b/tests/file_processing/test_file_processing.py index 1e1d6cfe..56f97853 100644 --- a/tests/file_processing/test_file_processing.py +++ b/tests/file_processing/test_file_processing.py @@ -4,13 +4,14 @@ from datetime import datetime from decimal import Decimal from io import StringIO -from typing import List +from typing import List, cast, Dict from unittest.mock import MagicMock, patch import pytest from bson.decimal128 import Decimal128 from bson.objectid import ObjectId from mysql.connector.connection_cext import MySQLConnectionAbstract +from mysql.connector.types import RowItemType from crawler.constants import ( CENTRE_KEY_BACKUPS_FOLDER, @@ -1548,65 +1549,68 @@ def test_insert_samples_from_docs_into_mlwh( rows = cursor.fetchall() cursor.close() - assert rows[0][MLWH_MONGODB_ID] == "5f562d9931d9959b92544728" - assert rows[0][MLWH_ROOT_SAMPLE_ID] == "ABC00000004" - assert rows[0][MLWH_COG_UK_ID] == "TEST-123ABC" - assert rows[0][MLWH_RNA_ID] == "TC-rna-00000029_H11" - assert rows[0][MLWH_PLATE_BARCODE] == "TC-rna-00000029" - assert rows[0][MLWH_COORDINATE] == "H11" - assert rows[0][MLWH_RESULT] == "Negative" - assert rows[0][MLWH_DATE_TESTED] == date_tested_1 - assert rows[0][MLWH_SOURCE] == "Test Centre" - assert rows[0][MLWH_LAB_ID] == "TC" - assert rows[0][MLWH_CH1_TARGET] is None - assert rows[0][MLWH_CH1_RESULT] is None - assert rows[0][MLWH_CH1_CQ] is None - assert rows[0][MLWH_CH2_TARGET] is None - assert rows[0][MLWH_CH2_RESULT] is None - assert rows[0][MLWH_CH2_CQ] is None - assert rows[0][MLWH_CH3_TARGET] is None - assert rows[0][MLWH_CH3_RESULT] is None - assert rows[0][MLWH_CH3_CQ] is None - assert rows[0][MLWH_CH4_TARGET] is None - assert rows[0][MLWH_CH4_RESULT] is None - assert rows[0][MLWH_CH4_CQ] is None - assert rows[0][MLWH_FILTERED_POSITIVE] is None - assert rows[0][MLWH_FILTERED_POSITIVE_VERSION] is None - assert rows[0][MLWH_FILTERED_POSITIVE_TIMESTAMP] is None - assert rows[0][MLWH_CREATED_AT] is not None - assert rows[0][MLWH_UPDATED_AT] is not None - assert rows[0][MLWH_MUST_SEQUENCE] == 0 - assert rows[0][MLWH_PREFERENTIALLY_SEQUENCE] == 1 - - assert rows[1][MLWH_MONGODB_ID] == "5f562d9931d9959b92544729" - assert rows[1][MLWH_ROOT_SAMPLE_ID] == "ABC00000005" - assert rows[1][MLWH_COG_UK_ID] == "TEST-123ABD" - assert rows[1][MLWH_RNA_ID] == "TC-rna-00000029_H12" - assert rows[1][MLWH_PLATE_BARCODE] == "TC-rna-00000029" - assert rows[1][MLWH_COORDINATE] == "H12" - assert rows[1][MLWH_RESULT] == RESULT_VALUE_POSITIVE - assert rows[1][MLWH_DATE_TESTED] == date_tested_2 - assert rows[1][MLWH_SOURCE] == "Test Centre" - assert rows[1][MLWH_LAB_ID] == "TC" - assert rows[1][MLWH_CH1_TARGET] == "ORF1ab" - assert rows[1][MLWH_CH1_RESULT] == RESULT_VALUE_POSITIVE - assert rows[1][MLWH_CH1_CQ] == Decimal("21.28726211") - assert rows[1][MLWH_CH2_TARGET] == "N gene" - assert rows[1][MLWH_CH2_RESULT] == RESULT_VALUE_POSITIVE - assert rows[1][MLWH_CH2_CQ] == Decimal("18.12736661") - assert rows[1][MLWH_CH3_TARGET] == "S gene" - assert rows[1][MLWH_CH3_RESULT] == RESULT_VALUE_POSITIVE - assert rows[1][MLWH_CH3_CQ] == Decimal("22.63616273") - assert rows[1][MLWH_CH4_TARGET] == "MS2" - assert rows[1][MLWH_CH4_RESULT] == RESULT_VALUE_POSITIVE - assert rows[1][MLWH_CH4_CQ] == Decimal("26.25125612") - assert rows[1][MLWH_FILTERED_POSITIVE] == 1 - assert rows[1][MLWH_FILTERED_POSITIVE_VERSION] == "v2.3" - assert rows[1][MLWH_FILTERED_POSITIVE_TIMESTAMP] == filtered_positive_timestamp - assert rows[1][MLWH_CREATED_AT] is not None - assert rows[1][MLWH_UPDATED_AT] is not None - assert rows[1][MLWH_MUST_SEQUENCE] == 1 - assert rows[1][MLWH_PREFERENTIALLY_SEQUENCE] == 0 + first_row = cast(Dict[str, RowItemType], rows[0]) + second_row = cast(Dict[str, RowItemType], rows[1]) + + assert first_row[MLWH_MONGODB_ID] == "5f562d9931d9959b92544728" + assert first_row[MLWH_ROOT_SAMPLE_ID] == "ABC00000004" + assert first_row[MLWH_COG_UK_ID] == "TEST-123ABC" + assert first_row[MLWH_RNA_ID] == "TC-rna-00000029_H11" + assert first_row[MLWH_PLATE_BARCODE] == "TC-rna-00000029" + assert first_row[MLWH_COORDINATE] == "H11" + assert first_row[MLWH_RESULT] == "Negative" + assert first_row[MLWH_DATE_TESTED] == date_tested_1 + assert first_row[MLWH_SOURCE] == "Test Centre" + assert first_row[MLWH_LAB_ID] == "TC" + assert first_row[MLWH_CH1_TARGET] is None + assert first_row[MLWH_CH1_RESULT] is None + assert first_row[MLWH_CH1_CQ] is None + assert first_row[MLWH_CH2_TARGET] is None + assert first_row[MLWH_CH2_RESULT] is None + assert first_row[MLWH_CH2_CQ] is None + assert first_row[MLWH_CH3_TARGET] is None + assert first_row[MLWH_CH3_RESULT] is None + assert first_row[MLWH_CH3_CQ] is None + assert first_row[MLWH_CH4_TARGET] is None + assert first_row[MLWH_CH4_RESULT] is None + assert first_row[MLWH_CH4_CQ] is None + assert first_row[MLWH_FILTERED_POSITIVE] is None + assert first_row[MLWH_FILTERED_POSITIVE_VERSION] is None + assert first_row[MLWH_FILTERED_POSITIVE_TIMESTAMP] is None + assert first_row[MLWH_CREATED_AT] is not None + assert first_row[MLWH_UPDATED_AT] is not None + assert first_row[MLWH_MUST_SEQUENCE] == 0 + assert first_row[MLWH_PREFERENTIALLY_SEQUENCE] == 1 + + assert second_row[MLWH_MONGODB_ID] == "5f562d9931d9959b92544729" + assert second_row[MLWH_ROOT_SAMPLE_ID] == "ABC00000005" + assert second_row[MLWH_COG_UK_ID] == "TEST-123ABD" + assert second_row[MLWH_RNA_ID] == "TC-rna-00000029_H12" + assert second_row[MLWH_PLATE_BARCODE] == "TC-rna-00000029" + assert second_row[MLWH_COORDINATE] == "H12" + assert second_row[MLWH_RESULT] == RESULT_VALUE_POSITIVE + assert second_row[MLWH_DATE_TESTED] == date_tested_2 + assert second_row[MLWH_SOURCE] == "Test Centre" + assert second_row[MLWH_LAB_ID] == "TC" + assert second_row[MLWH_CH1_TARGET] == "ORF1ab" + assert second_row[MLWH_CH1_RESULT] == RESULT_VALUE_POSITIVE + assert second_row[MLWH_CH1_CQ] == Decimal("21.28726211") + assert second_row[MLWH_CH2_TARGET] == "N gene" + assert second_row[MLWH_CH2_RESULT] == RESULT_VALUE_POSITIVE + assert second_row[MLWH_CH2_CQ] == Decimal("18.12736661") + assert second_row[MLWH_CH3_TARGET] == "S gene" + assert second_row[MLWH_CH3_RESULT] == RESULT_VALUE_POSITIVE + assert second_row[MLWH_CH3_CQ] == Decimal("22.63616273") + assert second_row[MLWH_CH4_TARGET] == "MS2" + assert second_row[MLWH_CH4_RESULT] == RESULT_VALUE_POSITIVE + assert second_row[MLWH_CH4_CQ] == Decimal("26.25125612") + assert second_row[MLWH_FILTERED_POSITIVE] == 1 + assert second_row[MLWH_FILTERED_POSITIVE_VERSION] == "v2.3" + assert second_row[MLWH_FILTERED_POSITIVE_TIMESTAMP] == filtered_positive_timestamp + assert second_row[MLWH_CREATED_AT] is not None + assert second_row[MLWH_UPDATED_AT] is not None + assert second_row[MLWH_MUST_SEQUENCE] == 1 + assert second_row[MLWH_PREFERENTIALLY_SEQUENCE] == 0 def test_insert_samples_from_docs_into_mlwh_date_tested_missing(config, mlwh_connection): @@ -1676,7 +1680,7 @@ def test_insert_samples_from_docs_into_mlwh_date_tested_none( rows = cursor.fetchall() cursor.close() - assert rows[0][MLWH_DATE_TESTED] is None + assert cast(Dict[str, RowItemType], rows[0])[MLWH_DATE_TESTED] is None def test_insert_samples_from_docs_into_mlwh_returns_false_none_connection(config, mlwh_connection):