Skip to content

Commit

Permalink
Merge pull request #890 from sanger/dependabot/pip/mysql-connector-py…
Browse files Browse the repository at this point in the history
…thon-8.3.0

Bump mysql-connector-python from 8.1.0 to 8.3.0
  • Loading branch information
dasunpubudumal authored Feb 21, 2024
2 parents 7090be3 + eae5839 commit 27f8796
Show file tree
Hide file tree
Showing 8 changed files with 131 additions and 95 deletions.
2 changes: 1 addition & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ flask = "~=3.0"
flask-apscheduler = "~=1.13"
gunicorn = "~=21.2"
more-itertools = "~=10.2"
mysql-connector-python = "~=8.1"
mysql-connector-python = "~=8.3"
pandas = "~=2.0"
pika = "~=1.3"
pymongo = "~=4.6"
Expand Down
51 changes: 28 additions & 23 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 6 additions & 3 deletions crawler/db/mysql.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import logging
from datetime import datetime
from itertools import islice
from typing import Any, Dict, Generator, Iterable, List, cast
from typing import Any, Dict, Generator, Iterable, List, cast, Sequence

import mysql.connector as mysql
import sqlalchemy
from mysql.connector.types import MySQLConvertibleType
from mysql.connector.connection_cext import MySQLConnectionAbstract
from mysql.connector.cursor_cext import MySQLCursorAbstract
from sqlalchemy.engine.base import Engine
Expand Down Expand Up @@ -65,7 +66,7 @@ def create_mysql_connection(config: Config, readonly: bool = True) -> MySQLConne


def run_mysql_executemany_query(
mysql_conn: MySQLConnectionAbstract, sql_query: str, values: List[Dict[str, str]]
mysql_conn: MySQLConnectionAbstract, sql_query: str, values: Sequence[Dict[str, MySQLConvertibleType]]
) -> None:
"""Writes the sample testing information into the MLWH.
Expand Down Expand Up @@ -223,7 +224,9 @@ def partition(iterable: Iterable, partition_size: int) -> Generator[List[Any], N
yield part


def reset_is_current_flags(cursor: MySQLCursorAbstract, rna_ids: List[str], chunk_size: int = 1000) -> None:
def reset_is_current_flags(
cursor: MySQLCursorAbstract, rna_ids: List[MySQLConvertibleType], chunk_size: int = 1000
) -> None:
"""Receives a cursor with an active connection and a list of rna_ids and
runs an update resetting any is_current flags to false for all the specified
rna ids in groups of chunk_size.
Expand Down
5 changes: 3 additions & 2 deletions crawler/helpers/general_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from datetime import datetime
from decimal import Decimal
from http import HTTPStatus
from typing import Any, Dict, Iterable, List, Optional
from typing import Any, Dict, Iterable, List, Optional, Sequence
from mysql.connector.types import MySQLConvertibleType

import pysftp
import requests
Expand Down Expand Up @@ -268,7 +269,7 @@ def map_mongo_sample_to_mysql(doc: SampleDoc, copy_date: bool = False) -> Dict[s
return value


def set_is_current_on_mysql_samples(samples: Iterable[Dict[str, str]]) -> List[Dict[str, str]]:
def set_is_current_on_mysql_samples(samples: Iterable[Dict[str, str]]) -> Sequence[Dict[str, MySQLConvertibleType]]:
"""Creates a copy of the samples passed in, adding is_current values to each sample.
is_current will be True for all samples unless there is a repeated RNA ID, in which case
only the last one is set to True.
Expand Down
5 changes: 3 additions & 2 deletions migrations/back_populate_source_plate_and_sample_uuids.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import Dict, List, Optional, cast
from uuid import uuid4

from mysql.connector.types import MySQLConvertibleType, RowType
from mysql.connector.connection_cext import MySQLConnectionAbstract
from pymongo.collection import Collection
from pymongo.database import Database
Expand Down Expand Up @@ -366,7 +367,7 @@ def prepare_mlwh_row(sample_doc):
existing_samples = mysql_generator_from_connection(mysql_conn, query)
log_mlwh_sample_fields("Before update", existing_samples)

row_data = [cast(Dict[str, str], row) for row in update_rows]
row_data = [cast(Dict[str, MySQLConvertibleType], row) for row in update_rows]
run_mysql_executemany_query(mysql_conn, SQL_MLWH_UPDATE_SAMPLE_UUID_PLATE_UUID, row_data)

# Log the new fields on the MLWH samples
Expand Down Expand Up @@ -394,7 +395,7 @@ def mlwh_count_samples_from_mongo_ids(mysql_conn: MySQLConnectionAbstract, mongo
if result is None:
raise Exception("Query result was not valid")

return cast(int, result[0])
return cast(int, cast(RowType, result)[0])
else:
raise Exception("Cannot connect mysql")

Expand Down
9 changes: 6 additions & 3 deletions migrations/helpers/shared_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
from contextlib import closing
from csv import DictReader
from datetime import datetime
from typing import Any, Dict, Iterator, List, Optional
from typing import Any, Dict, Iterator, List, Optional, cast

from mysql.connector.connection_cext import MySQLConnectionAbstract
from mysql.connector.types import RowItemType
from pymongo.collection import Collection

from crawler.constants import (
Expand Down Expand Up @@ -117,11 +118,13 @@ def valid_filepath(s_filepath: str) -> bool:
return False


def mysql_generator_from_connection(connection: MySQLConnectionAbstract, query: str) -> Iterator[Dict[str, Any]]:
def mysql_generator_from_connection(
connection: MySQLConnectionAbstract, query: str
) -> Iterator[Dict[str, RowItemType]]:
with closing(connection.cursor(dictionary=True, buffered=False)) as cursor:
cursor.execute(query)
for row in cursor.fetchall():
yield row
yield cast(Dict[str, RowItemType], row)


def mysql_generator_from_config(config: Config, query: str) -> Iterator[Dict[str, Any]]:
Expand Down
19 changes: 19 additions & 0 deletions tests/db/test_mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,29 @@ class MockMySQLConnection(MySQLConnectionAbstract):
_execute_query = MagicMock()
_open_connection = MagicMock()
close = MagicMock()
cmd_change_user = MagicMock()
cmd_debug = MagicMock()
cmd_init_db = MagicMock()
cmd_ping = MagicMock()
cmd_process_kill = MagicMock()
cmd_query = MagicMock()
cmd_query_iter = MagicMock()
cmd_quit = MagicMock()
cmd_refresh = MagicMock()
cmd_reset_connection = MagicMock()
cmd_shutdown = MagicMock()
cmd_statistics = MagicMock()
cmd_stmt_close = MagicMock()
cmd_stmt_execute = MagicMock()
cmd_stmt_prepare = MagicMock()
cmd_stmt_reset = MagicMock()
cmd_stmt_send_long_data = MagicMock()
commit = MagicMock()
connection_id = MagicMock()
cursor = MagicMock()
database = MagicMock()
disconnect = MagicMock()
get_row = MagicMock()
get_rows = MagicMock()
in_transaction = MagicMock()
info_query = MagicMock()
Expand Down
126 changes: 65 additions & 61 deletions tests/file_processing/test_file_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
from datetime import datetime
from decimal import Decimal
from io import StringIO
from typing import List
from typing import List, cast, Dict
from unittest.mock import MagicMock, patch

import pytest
from bson.decimal128 import Decimal128
from bson.objectid import ObjectId
from mysql.connector.connection_cext import MySQLConnectionAbstract
from mysql.connector.types import RowItemType

from crawler.constants import (
CENTRE_KEY_BACKUPS_FOLDER,
Expand Down Expand Up @@ -1548,65 +1549,68 @@ def test_insert_samples_from_docs_into_mlwh(
rows = cursor.fetchall()
cursor.close()

assert rows[0][MLWH_MONGODB_ID] == "5f562d9931d9959b92544728"
assert rows[0][MLWH_ROOT_SAMPLE_ID] == "ABC00000004"
assert rows[0][MLWH_COG_UK_ID] == "TEST-123ABC"
assert rows[0][MLWH_RNA_ID] == "TC-rna-00000029_H11"
assert rows[0][MLWH_PLATE_BARCODE] == "TC-rna-00000029"
assert rows[0][MLWH_COORDINATE] == "H11"
assert rows[0][MLWH_RESULT] == "Negative"
assert rows[0][MLWH_DATE_TESTED] == date_tested_1
assert rows[0][MLWH_SOURCE] == "Test Centre"
assert rows[0][MLWH_LAB_ID] == "TC"
assert rows[0][MLWH_CH1_TARGET] is None
assert rows[0][MLWH_CH1_RESULT] is None
assert rows[0][MLWH_CH1_CQ] is None
assert rows[0][MLWH_CH2_TARGET] is None
assert rows[0][MLWH_CH2_RESULT] is None
assert rows[0][MLWH_CH2_CQ] is None
assert rows[0][MLWH_CH3_TARGET] is None
assert rows[0][MLWH_CH3_RESULT] is None
assert rows[0][MLWH_CH3_CQ] is None
assert rows[0][MLWH_CH4_TARGET] is None
assert rows[0][MLWH_CH4_RESULT] is None
assert rows[0][MLWH_CH4_CQ] is None
assert rows[0][MLWH_FILTERED_POSITIVE] is None
assert rows[0][MLWH_FILTERED_POSITIVE_VERSION] is None
assert rows[0][MLWH_FILTERED_POSITIVE_TIMESTAMP] is None
assert rows[0][MLWH_CREATED_AT] is not None
assert rows[0][MLWH_UPDATED_AT] is not None
assert rows[0][MLWH_MUST_SEQUENCE] == 0
assert rows[0][MLWH_PREFERENTIALLY_SEQUENCE] == 1

assert rows[1][MLWH_MONGODB_ID] == "5f562d9931d9959b92544729"
assert rows[1][MLWH_ROOT_SAMPLE_ID] == "ABC00000005"
assert rows[1][MLWH_COG_UK_ID] == "TEST-123ABD"
assert rows[1][MLWH_RNA_ID] == "TC-rna-00000029_H12"
assert rows[1][MLWH_PLATE_BARCODE] == "TC-rna-00000029"
assert rows[1][MLWH_COORDINATE] == "H12"
assert rows[1][MLWH_RESULT] == RESULT_VALUE_POSITIVE
assert rows[1][MLWH_DATE_TESTED] == date_tested_2
assert rows[1][MLWH_SOURCE] == "Test Centre"
assert rows[1][MLWH_LAB_ID] == "TC"
assert rows[1][MLWH_CH1_TARGET] == "ORF1ab"
assert rows[1][MLWH_CH1_RESULT] == RESULT_VALUE_POSITIVE
assert rows[1][MLWH_CH1_CQ] == Decimal("21.28726211")
assert rows[1][MLWH_CH2_TARGET] == "N gene"
assert rows[1][MLWH_CH2_RESULT] == RESULT_VALUE_POSITIVE
assert rows[1][MLWH_CH2_CQ] == Decimal("18.12736661")
assert rows[1][MLWH_CH3_TARGET] == "S gene"
assert rows[1][MLWH_CH3_RESULT] == RESULT_VALUE_POSITIVE
assert rows[1][MLWH_CH3_CQ] == Decimal("22.63616273")
assert rows[1][MLWH_CH4_TARGET] == "MS2"
assert rows[1][MLWH_CH4_RESULT] == RESULT_VALUE_POSITIVE
assert rows[1][MLWH_CH4_CQ] == Decimal("26.25125612")
assert rows[1][MLWH_FILTERED_POSITIVE] == 1
assert rows[1][MLWH_FILTERED_POSITIVE_VERSION] == "v2.3"
assert rows[1][MLWH_FILTERED_POSITIVE_TIMESTAMP] == filtered_positive_timestamp
assert rows[1][MLWH_CREATED_AT] is not None
assert rows[1][MLWH_UPDATED_AT] is not None
assert rows[1][MLWH_MUST_SEQUENCE] == 1
assert rows[1][MLWH_PREFERENTIALLY_SEQUENCE] == 0
first_row = cast(Dict[str, RowItemType], rows[0])
second_row = cast(Dict[str, RowItemType], rows[1])

assert first_row[MLWH_MONGODB_ID] == "5f562d9931d9959b92544728"
assert first_row[MLWH_ROOT_SAMPLE_ID] == "ABC00000004"
assert first_row[MLWH_COG_UK_ID] == "TEST-123ABC"
assert first_row[MLWH_RNA_ID] == "TC-rna-00000029_H11"
assert first_row[MLWH_PLATE_BARCODE] == "TC-rna-00000029"
assert first_row[MLWH_COORDINATE] == "H11"
assert first_row[MLWH_RESULT] == "Negative"
assert first_row[MLWH_DATE_TESTED] == date_tested_1
assert first_row[MLWH_SOURCE] == "Test Centre"
assert first_row[MLWH_LAB_ID] == "TC"
assert first_row[MLWH_CH1_TARGET] is None
assert first_row[MLWH_CH1_RESULT] is None
assert first_row[MLWH_CH1_CQ] is None
assert first_row[MLWH_CH2_TARGET] is None
assert first_row[MLWH_CH2_RESULT] is None
assert first_row[MLWH_CH2_CQ] is None
assert first_row[MLWH_CH3_TARGET] is None
assert first_row[MLWH_CH3_RESULT] is None
assert first_row[MLWH_CH3_CQ] is None
assert first_row[MLWH_CH4_TARGET] is None
assert first_row[MLWH_CH4_RESULT] is None
assert first_row[MLWH_CH4_CQ] is None
assert first_row[MLWH_FILTERED_POSITIVE] is None
assert first_row[MLWH_FILTERED_POSITIVE_VERSION] is None
assert first_row[MLWH_FILTERED_POSITIVE_TIMESTAMP] is None
assert first_row[MLWH_CREATED_AT] is not None
assert first_row[MLWH_UPDATED_AT] is not None
assert first_row[MLWH_MUST_SEQUENCE] == 0
assert first_row[MLWH_PREFERENTIALLY_SEQUENCE] == 1

assert second_row[MLWH_MONGODB_ID] == "5f562d9931d9959b92544729"
assert second_row[MLWH_ROOT_SAMPLE_ID] == "ABC00000005"
assert second_row[MLWH_COG_UK_ID] == "TEST-123ABD"
assert second_row[MLWH_RNA_ID] == "TC-rna-00000029_H12"
assert second_row[MLWH_PLATE_BARCODE] == "TC-rna-00000029"
assert second_row[MLWH_COORDINATE] == "H12"
assert second_row[MLWH_RESULT] == RESULT_VALUE_POSITIVE
assert second_row[MLWH_DATE_TESTED] == date_tested_2
assert second_row[MLWH_SOURCE] == "Test Centre"
assert second_row[MLWH_LAB_ID] == "TC"
assert second_row[MLWH_CH1_TARGET] == "ORF1ab"
assert second_row[MLWH_CH1_RESULT] == RESULT_VALUE_POSITIVE
assert second_row[MLWH_CH1_CQ] == Decimal("21.28726211")
assert second_row[MLWH_CH2_TARGET] == "N gene"
assert second_row[MLWH_CH2_RESULT] == RESULT_VALUE_POSITIVE
assert second_row[MLWH_CH2_CQ] == Decimal("18.12736661")
assert second_row[MLWH_CH3_TARGET] == "S gene"
assert second_row[MLWH_CH3_RESULT] == RESULT_VALUE_POSITIVE
assert second_row[MLWH_CH3_CQ] == Decimal("22.63616273")
assert second_row[MLWH_CH4_TARGET] == "MS2"
assert second_row[MLWH_CH4_RESULT] == RESULT_VALUE_POSITIVE
assert second_row[MLWH_CH4_CQ] == Decimal("26.25125612")
assert second_row[MLWH_FILTERED_POSITIVE] == 1
assert second_row[MLWH_FILTERED_POSITIVE_VERSION] == "v2.3"
assert second_row[MLWH_FILTERED_POSITIVE_TIMESTAMP] == filtered_positive_timestamp
assert second_row[MLWH_CREATED_AT] is not None
assert second_row[MLWH_UPDATED_AT] is not None
assert second_row[MLWH_MUST_SEQUENCE] == 1
assert second_row[MLWH_PREFERENTIALLY_SEQUENCE] == 0


def test_insert_samples_from_docs_into_mlwh_date_tested_missing(config, mlwh_connection):
Expand Down Expand Up @@ -1676,7 +1680,7 @@ def test_insert_samples_from_docs_into_mlwh_date_tested_none(
rows = cursor.fetchall()
cursor.close()

assert rows[0][MLWH_DATE_TESTED] is None
assert cast(Dict[str, RowItemType], rows[0])[MLWH_DATE_TESTED] is None


def test_insert_samples_from_docs_into_mlwh_returns_false_none_connection(config, mlwh_connection):
Expand Down

0 comments on commit 27f8796

Please sign in to comment.