From 889af42f52248573eb2cc7307e2d5bc987e260b3 Mon Sep 17 00:00:00 2001 From: nageshwaran Date: Mon, 28 Oct 2024 18:04:11 +0530 Subject: [PATCH 1/3] added url in post logic and test using url --- src/unstract/llmwhisperer/client_v2.py | 12 +- tests/conftest.py | 3 + tests/integration/client_v2_test.py | 69 +++ ...credit_card.low_cost.layout_preserving.txt | 586 +++++++++--------- 4 files changed, 382 insertions(+), 288 deletions(-) diff --git a/src/unstract/llmwhisperer/client_v2.py b/src/unstract/llmwhisperer/client_v2.py index 101e74e..c9fef16 100644 --- a/src/unstract/llmwhisperer/client_v2.py +++ b/src/unstract/llmwhisperer/client_v2.py @@ -151,13 +151,13 @@ def whisper( file_path: str = "", stream: IO[bytes] = None, url: str = "", - mode: str = "high_quality", + mode: str = "form", output_mode: str = "layout_preserving", page_seperator: str = "<<<", pages_to_extract: str = "", median_filter_size: int = 0, gaussian_blur_radius: int = 0, - line_splitter_tolerance: float = 0.75, + line_splitter_tolerance: float = 0.4, horizontal_stretch_factor: float = 1.0, mark_vertical_lines: bool = False, mark_horizontal_lines: bool = False, @@ -178,7 +178,7 @@ def whisper( file_path (str, optional): The path to the file to be processed. Defaults to "". stream (IO[bytes], optional): A stream of bytes to be processed. Defaults to None. url (str, optional): The URL of the file to be processed. Defaults to "". - mode (str, optional): The processing mode. Can be "high_quality", "form", "low_cost" or "native_text". Defaults to "high_quality". + mode (str, optional): The processing mode. Can be "high_quality", "form", "low_cost" or "native_text". Defaults to "form". output_mode (str, optional): The output mode. Can be "layout_preserving" or "text". Defaults to "layout_preserving". page_seperator (str, optional): The page separator. Defaults to "<<<". pages_to_extract (str, optional): The pages to extract. Defaults to "". @@ -207,7 +207,6 @@ def whisper( self.logger.debug("whisper called") api_url = f"{self.base_url}/whisper" params = { - "url": url, "mode": mode, "output_mode": output_mode, "page_seperator": page_seperator, @@ -272,7 +271,8 @@ def generate(): data=data, ) else: - req = requests.Request("POST", api_url, params=params, headers=self.headers) + params["url_in_post"] = True + req = requests.Request("POST", api_url, params=params, headers=self.headers, data=url) prepared = req.prepare() s = requests.Session() response = s.send(prepared, timeout=120, stream=should_stream) @@ -340,7 +340,7 @@ def generate(): return message # Will not reach here if status code is 202 - message = response.text + message = json.loads(response.text) message["status_code"] = response.status_code return message diff --git a/tests/conftest.py b/tests/conftest.py index 49eab9a..3c342c1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,10 +1,13 @@ import os import pytest +from dotenv import load_dotenv from unstract.llmwhisperer.client import LLMWhispererClient from unstract.llmwhisperer.client_v2 import LLMWhispererClientV2 +load_dotenv() + @pytest.fixture(name="client") def llm_whisperer_client(): diff --git a/tests/integration/client_v2_test.py b/tests/integration/client_v2_test.py index a5ef4b6..dcb9bcd 100644 --- a/tests/integration/client_v2_test.py +++ b/tests/integration/client_v2_test.py @@ -69,3 +69,72 @@ def test_whisper_v2(client_v2, data_dir, output_mode, mode, input_file): unified_diff(exp.splitlines(), extracted_text.splitlines(), fromfile="Expected", tofile="Extracted") ) pytest.fail(f"Texts are not similar enough: {similarity * 100:.2f}% similarity. Diff:\n{diff}") + + +@pytest.mark.parametrize( + "output_mode, mode, url, input_file, page_count", + [ + ("layout_preserving", "native_text", "https://unstractpocstorage.blob.core.windows.net/public/Amex.pdf", + "credit_card.pdf", 7), + ("layout_preserving", "low_cost", "https://unstractpocstorage.blob.core.windows.net/public/Amex.pdf", + "credit_card.pdf", 7), + ( + "layout_preserving", "high_quality", + "https://unstractpocstorage.blob.core.windows.net/public/scanned_bill.pdf", + "restaurant_invoice_photo.pdf", 1), + ("layout_preserving", "form", "https://unstractpocstorage.blob.core.windows.net/public/scanned_form.pdf", + "handwritten-form.pdf", 1), + ] +) +def test_whisper_v2_url_in_post(client_v2, data_dir, output_mode, mode, url, input_file, page_count): + usage_before = client_v2.get_usage_info() + whisper_result = client_v2.whisper( + mode=mode, output_mode=output_mode, url=url, wait_for_completion=True + ) + logger.debug(f"Result for '{output_mode}', '{mode}', " f"'{input_file}: {whisper_result}") + + exp_basename = f"{Path(input_file).stem}.{mode}.{output_mode}.txt" + exp_file = os.path.join(data_dir, "expected", exp_basename) + with open(exp_file, encoding="utf-8") as f: + exp = f.read() + + assert isinstance(whisper_result, dict) + assert whisper_result["status_code"] == 200 + + # For text based processing, perform a strict match + if mode == "native_text" and output_mode == "text": + assert whisper_result["extraction"]["result_text"] == exp + # For OCR based processing, perform a fuzzy match + else: + extracted_text = whisper_result["extraction"]["result_text"] + similarity = SequenceMatcher(None, extracted_text, exp).ratio() + threshold = 0.97 + + if similarity < threshold: + diff = "\n".join( + unified_diff(exp.splitlines(), extracted_text.splitlines(), fromfile="Expected", tofile="Extracted") + ) + pytest.fail(f"Texts are not similar enough: {similarity * 100:.2f}% similarity. Diff:\n{diff}") + + usage_after = client_v2.get_usage_info() + # Verify usage after extraction + verify_usage(usage_before, usage_after, page_count, mode) + + +def verify_usage(before_extract, after_extract, page_count, mode='form'): + all_modes = ['form', 'high_quality', 'low_cost', 'native_text'] + all_modes.remove(mode) + assert (after_extract['today_page_count'] == before_extract['today_page_count'] + page_count), \ + "today_page_count calculation is wrong" + if after_extract['current_page_count'] != -1: + assert (after_extract['current_page_count'] == before_extract['current_page_count'] + page_count), \ + "current_page_count calculation is wrong" + if after_extract['overage_page_count'] > 0: + assert (after_extract['overage_page_count'] == before_extract['overage_page_count'] + page_count), \ + "overage_page_count calculation is wrong" + assert (after_extract[f'current_page_count_{mode}'] == before_extract[f'current_page_count_{mode}'] + page_count), \ + f"{mode} mode calculation is wrong" + for i in range(len(all_modes)): + assert (after_extract[f'current_page_count_{all_modes[i]}'] == + before_extract[f'current_page_count_{all_modes[i]}']), \ + f"{all_modes[i]} mode calculation is wrong" diff --git a/tests/test_data/expected/credit_card.low_cost.layout_preserving.txt b/tests/test_data/expected/credit_card.low_cost.layout_preserving.txt index 974d682..8964a8a 100644 --- a/tests/test_data/expected/credit_card.low_cost.layout_preserving.txt +++ b/tests/test_data/expected/credit_card.low_cost.layout_preserving.txt @@ -1,355 +1,377 @@ -AMERICAN Blue Cash® from American Express p. 1/7 - EXPRESS - JOSEPH PAULSON Customer Care: 1-888-258-3741 - Closing Date 09/27/23 TTY: Use Relay 711 - Website: americanexpress.com - Account Ending 7-73045 ~ ~ - - Reward Dollars - New Balance $10,269.65 as of 08/29/2023 - - Minimum Payment Due $205.39 1,087.93 - For more details about Rewards, visit - americanexpress.com/cashbackrewards - - Payment Due Date 10/22/23 Account Summary - - Late Payment Warning: If we do not receive your Minimum Payment Due by Previous Balance $6,583.67 - - the Payment Due Date of 10/22/23, you may have to pay a late fee of up to Payments/Credits -$6,583.67 - $40.00 and your APRs may be increased to the Penalty APR of 29.99%. New Charges +$10,269.65 - Fees +$0.00 - - Interest Charged +$0.00 - - Minimum Payment Warning: If you have a Non-Plan Balance and make only the New Balance $10,269.65 - - minimum payment each period, you will pay more in interest and it will take you longer Minimum Payment Due $205.39 - to pay off your Non-Plan Balance. For example: - Credit Limit $26,400.00 - If you make no additional You will pay off the balance And you will pay an Available Credit $16,130.35 - charges and each month shown on this statement in estimated total of... - you pay... about... Cash Available Advance Cash Limit $4,600.00 $4,600.00 - - Only the 22 years $29,830 - Minimum Payment Due - - $14,640 - $407 3 years (Savings = $15,190) - - If you would like information about credit counseling services, call 1-888-733-4139. - - See page 2 for important information about your account. - [+] - - > Please refer to the IMPORTANT NOTICES section on - page 7. - - Continued on page 3 - - \ Please fold on the perforation below, detach and return with your payment V - - ps Payment Coupon Pay by Computer Pay by Phone Account Ending 7-73045 - I Do not staple or use paper clips americanexpress.com/pbc C 1-800-472-9297 - Enter 15 digit account # on all payments. - Make check payable to American Express. - - JOSEPH PAULSON Payment Due Date - 3742 CLOUD SPGS RD 10/22/23 - #403-1045 - DALLAS TX 75219-4136 New Balance - $10,269.65 - - Minimum Payment Due - 205.39 - - See reverse side for instructions AMERICAN EXPRESS e - - on how to update your address, PO BOX 6031 Amount Enclosed - phone number, or email. CAROL STREAM IL 60197-6031 - - Wall dbollllllllatloodladll - - 00003499916e2708152 0010269650000280539 a4 d +AMERICAN Blue Cash® from American Express p. 1/7 + EXPRESS + JOSEPH PAULSON Customer Care: 1-888-258-3741 + TTY: Use 711 + Closing Date 09/27/23 Relay + Account 7-73045 Website: americanexpress.com + Ending ~ ~ + + Reward Dollars + New Balance $10,269.65 + as of 08/29/2023 + + Minimum Payment Due $205.39 1,087.93 + For more details about Rewards, visit + americanexpress.com/cashbackrewards + + Payment Due Date 10/22/23 + Account Summary + + Previous Balance $6,583.67 + Late Payment Warning: If we do not receive your Minimum Payment Due by + the Payment Due Date of 10/22/23, you may have to pay a late fee of up to Payments/Credits -$6,583.67 + $40.00 and your APRs may be increased to the Penalty APR of 29.99%. New Charges +$10,269.65 + Fees +$0.00 + + Interest Charged +$0.00 + + New Balance + Minimum Payment Warning: If you have a Non-Plan Balance and make only the $10,269.65 + Minimum Due $205.39 + minimum payment each period, you will pay more in interest and it will take you longer Payment + to pay off your Non-Plan Balance. For example: + Credit Limit $26,400.00 + If you make no additional You will pay off the balance And you will pay an Available Credit $16,130.35 + charges and each month shown on this statement in estimated total of... + Cash Advance Limit $4,600.00 + you pay... about... + Available Cash $4,600.00 + + Only the + 22 years $29,830 + Minimum Payment Due + + $14,640 + $407 3 years = + (Savings $15,190) + + If you would like information about credit counseling services, call 1-888-733-4139. + + See page 2 for important information about your account. + [+] + + > Please refer to the IMPORTANT NOTICES section on + page 7. + + Continued on page 3 + + \ Please fold on the perforation below, detach and return with your payment V + + ps Payment Coupon Pay by Computer Pay by Phone Account Ending 7-73045 + I Do not or use C 1-800-472-9297 + staple paper clips americanexpress.com/pbc + Enter 15 digit account # on all payments. + Make check payable to American Express. + + JOSEPH PAULSON Payment Due Date + 3742 CLOUD SPGS RD 10/22/23 + #403-1045 + New Balance + DALLAS TX 75219-4136 + $10,269.65 + + Minimum Payment Due + 205.39 + + See reverse side for instructions AMERICAN EXPRESS e + PO BOX 6031 Amount Enclosed + on how to update your address, + CAROL STREAM IL 60197-6031 + phone number, or email. + + Wall dbollllllllatloodladll + + 00003499916e2708152 0010269650000280539 a4 d <<< - JOSEPH PAULSON Account Ending 7-73045 p. 2/7 - - Payments: Your payment must be sent to the payment address shown on represents money owed to you. If within the six-month period following -your statement and must be received by 5 p.m. local time at that address to the date of the first statement indicating the credit balance you do not - be credited as of the day it is received. Payments we receive after 5 p.m. will request a refund or charge enough to use up the credit balance, we will - not be credited to your Account until the next day. Payments must also: (1) send you a check for the credit balance within 30 days if the amount is - include the remittance coupon from your statement; (2) be made with a $1.00 or more. - single check drawn on a US bank and payable in US dollars, or with a Credit Reporting: We may report information about your Account to credit - negotiable instrument payable in US dollars and clearable through the US bureaus. Late payments, missed payments, or other defaults on your - banking system; and (3) include your Account number. If your payment Account may be reflected in your credit report. - does not meet all of the above requirements, crediting may be delayed and What To Do If You Think You Find A Mistake On Your Statement -you may incur late payment fees and additional interest charges. Electronic If you think there is an error on your statement, write to us at: - payments must be made through an electronic payment method payable American Express, PO Box 981535, El Paso TX 79998-1535 - in US dollars and clearable through the US banking system. Please do not You may also contact us on the Web: www.americanexpress.com - send post-dated checks as they will be deposited upon receipt. Any In your letter, give us the following information: - restrictive language on a payment we accept will have no effect on us - Account information: Your name and account number. -without our express prior written approval. We will re-present to your - Dollar amount: The dollar amount of the suspected error. -financial institution any payment that is returned unpaid. - Description of Problem: If you think there is an error on your bill, - Permission for Electronic Withdrawal: (1) When you send a check for describe what you believe is wrong and why you believe it is a mistake. - payment, you give us permission to electronically withdraw your payment You must contact us within 60 days after the error appeared on your -from your deposit or other asset account. We will process checks statement. - electronically by transmitting the amount of the check, routing number, You must notify us of any potential errors in writing [or electronically]. You - account number and check serial number to your financial institution, may call us, but if you do we are not required to investigate any potential - unless the check is not processable electronically or a less costly process is errors and you may have to pay the amount in question. - available. When we process your check electronically, your payment may While we investigate whether or not there has been an error, the following - be withdrawn from your deposit or other asset account as soon as the same are true: - day we receive your check, and you will not receive that cancelled check - We cannot try to collect the amount in question, or report you as -with your deposit or other asset account statement. If we cannot collect the delinquent on that amount. -funds electronically we may issue a draft against your deposit or other asset - The charge in question may remain on your statement, and we may - account for the amount of the check. (2) By using Pay By Computer, Pay By continue to charge you interest on that amount. But, if we determine that - Phone or any other electronic payment service of ours, you give us we made a mistake, you will not have to pay the amount in question or any - permission to electronically withdraw funds from the deposit or other asset interest or other fees related to that amount. - account you specify in the amount you request. Payments using such - While you do not have to pay the amount in question, you are responsible - services of ours received after 8:00 p.m. MST may not be credited until the for the remainder of your balance. - next day. - We can apply any unpaid amount against your credit limit. - How We Calculate Your Balance: We use the Average Daily Balance (ADB) Your Rights If You Are Dissatisfied With Your Credit Card Purchases - method (including new transactions) to calculate the balance on which we If you are dissatisfied with the goods or services that you have purchased - charge interest on your Account. Call the Customer Care number on page 3 with your credit card, and you have tried in good faith to correct the -for more information about this balance computation method and how problem with the merchant, you may have the right not to pay the - resulting interest charges are determined. The method we use to figure the remaining amount due on the purchase. -ADB and interest results in daily compounding of interest. To use this right, all of the following must be true: - Paying Interest: Your due date is at least 25 days after the close of each 1. The purchase must have been made in your home state or within 100 - billing period. We will not charge you interest on your purchases if you pay miles of your current mailing address, and the purchase price must have - each month your entire balance (or Adjusted Balance if applicable) by the been more than $50. (Note: Neither of these is necessary if your purchase - due date each month. We will charge you interest on cash advances and was based on an advertisement we mailed to you, or if we own the - (unless otherwise disclosed) balance transfers beginning on the transaction company that sold you the goods or services.) - date. 2. You must have used your credit card for the purchase. Purchases made - Foreign Currency Charges: If you make a Charge in a foreign currency, we with cash advances from an ATM or with a check that accesses your credit -will convert it into US dollars on the date we or our agents process it. We card account do not qualify. -will charge a fee of 2.70% of the converted US dollar amount. We will 3. You must not yet have fully paid for the purchase. - choose a conversion rate that is acceptable to us for that date, unless a If all of the criteria above are met and you are still dissatisfied with the - particular rate is required by law. The conversion rate we use is no more purchase, contact us in writing or electronically at: -than the highest official rate published by a government agency or the American Express, PO Box 981535, El Paso TX 79998-1535 - highest interbank rate we identify from customary banking sources on the www.americanexpress.com - conversion date or the prior business day. This rate may differ from rates in While we investigate, the same rules apply to the disputed amount as - effect on the date of your charge. Charges converted by establishments discussed above. After we finish our investigation, we will tell you our - (such as airlines) will be billed at the rates such establishments use. decision. At that point, if we think you owe an amount and you do not pay - Credit Balance: A credit balance (designated CR) shown on this statement we may report you as delinquent. - - Pay Your Bill with AutoPay - - Deduct your payment from your bank - account automatically each month. - - - Avoid late fees - - - Save time - - Change of Address, phone number, email - - Visit americanexpress.com/autopay - - Online at www.americanexpress.com/updatecontactinfo today to enroll. - - Via mobile device - - - Voice automated: call the number on the back of your card - - For name, company name, and foreign address or phone changes, please call Customer Care - - Please do not add any written communication or address change on this stub - For information on how we protect your - privacy and to set your communication - and privacy choices, please visit - www.americanexpress.com/privacy. + JOSEPH PAULSON + Account Ending 7-73045 p. 2/7 + + Payments: Your payment must be sent to the payment address shown on represents money owed to you. If within the six-month period following +your statement and must be received by 5 p.m. local time at that address to the date of the first statement indicating the credit balance you do not + be credited as of the day it is received. Payments we receive after 5 p.m. will request a refund or charge enough to use up the credit balance, we will + not be credited to your Account until the next day. Payments must also: (1) send you a check for the credit balance within 30 days if the amount is + include the remittance coupon from your statement; (2) be made with a $1.00 or more. + single check drawn on a US bank and payable in US dollars, or with a Credit Reporting: We may report information about your Account to credit + negotiable instrument payable in US dollars and clearable through the US bureaus. Late payments, missed payments, or other defaults on your + banking system; and (3) include your Account number. If your payment Account may be reflected in your credit report. + does not meet all of the above requirements, crediting may be delayed and What To Do If You Think You Find A Mistake On Your Statement +you may incur late payment fees and additional interest charges. Electronic If you think there is an error on your statement, write to us at: + payments must be made through an electronic payment method payable American Express, PO Box 981535, El Paso TX 79998-1535 + in US dollars and clearable through the US banking system. Please do not You may also contact us on the Web: www.americanexpress.com + send post-dated checks as they will be deposited upon receipt. Any In your letter, give us the following information: + restrictive language on a payment we accept will have no effect on us - Account information: Your name and account number. +without our express prior written approval. We will re-present to your - Dollar amount: The dollar amount of the suspected error. + - If +financial institution any payment that is returned unpaid. Description of Problem: you think there is an error on your bill, + Permission for Electronic Withdrawal: (1) When you send a check for describe what you believe is wrong and why you believe it is a mistake. + payment, you give us permission to electronically withdraw your payment You must contact us within 60 days after the error appeared on your +from your deposit or other asset account. We will process checks statement. + electronically by transmitting the amount of the check, routing number, You must notify us of any potential errors in writing [or electronically]. You + account number and check serial number to your financial institution, may call us, but if you do we are not required to investigate any potential + unless the check is not processable electronically or a less costly process is errors and you may have to pay the amount in question. + available. When we process your check electronically, your payment may While we investigate whether or not there has been an error, the following + be withdrawn from your deposit or other asset account as soon as the same are true: + day we receive your check, and you will not receive that cancelled check - We cannot try to collect the amount in question, or report you as +with your deposit or other asset account statement. If we cannot collect the delinquent on that amount. +funds electronically we may issue a draft against your deposit or other asset - The charge in question may remain on your statement, and we may + account for the amount of the check. (2) By using Pay By Computer, Pay By continue to charge you interest on that amount. But, if we determine that + Phone or any other electronic payment service of ours, you give us we made a mistake, you will not have to pay the amount in question or any + permission to electronically withdraw funds from the deposit or other asset interest or other fees related to that amount. + account you specify in the amount you request. Payments using such - While you do not have to pay the amount in question, you are responsible + services of ours received after 8:00 p.m. MST may not be credited until the for the remainder of your balance. + next day. - We can apply any unpaid amount against your credit limit. + How We Calculate Your Balance: We use the Average Daily Balance (ADB) Your Rights If You Are Dissatisfied With Your Credit Card Purchases + method (including new transactions) to calculate the balance on which we If you are dissatisfied with the goods or services that you have purchased + charge interest on your Account. Call the Customer Care number on page 3 with your credit card, and you have tried in good faith to correct the +for more information about this balance computation method and how problem with the merchant, you may have the right not to pay the + resulting interest charges are determined. The method we use to figure the remaining amount due on the purchase. +ADB and interest results in daily compounding of interest. To use this right, all of the following must be true: + Paying Interest: Your due date is at least 25 days after the close of each 1. The purchase must have been made in your home state or within 100 + billing period. We will not charge you interest on your purchases if you pay miles of your current mailing address, and the purchase price must have + each month your entire balance (or Adjusted Balance if applicable) by the been more than $50. (Note: Neither of these is necessary if your purchase + due date each month. We will charge you interest on cash advances and was based on an advertisement we mailed to you, or if we own the + (unless otherwise disclosed) balance transfers beginning on the transaction company that sold you the goods or services.) + date. 2. You must have used your credit card for the purchase. Purchases made + Foreign Currency Charges: If you make a Charge in a foreign currency, we with cash advances from an ATM or with a check that accesses your credit +will convert it into US dollars on the date we or our agents process it. We card account do not qualify. +will charge a fee of 2.70% of the converted US dollar amount. We will 3. You must not yet have fully paid for the purchase. + choose a conversion rate that is acceptable to us for that date, unless a If all of the criteria above are met and you are still dissatisfied with the + particular rate is required by law. The conversion rate we use is no more purchase, contact us in writing or electronically at: +than the highest official rate published by a government agency or the American Express, PO Box 981535, El Paso TX 79998-1535 + highest interbank rate we identify from customary banking sources on the www.americanexpress.com + conversion date or the prior business day. This rate may differ from rates in While we investigate, the same rules apply to the disputed amount as + effect on the date of your charge. Charges converted by establishments discussed above. After we finish our investigation, we will tell you our + (such as airlines) will be billed at the rates such establishments use. decision. At that point, if we think you owe an amount and you do not pay + Credit Balance: A credit balance (designated CR) shown on this statement we may report you as delinquent. + + Your Bill with + Pay AutoPay + + Deduct your payment from your bank + account automatically each month. + + - + Avoid late fees + - + Save time + + Change of Address, phone number, email + + - Visit americanexpress.com/autopay + Online at www.americanexpress.com/updatecontactinfo + today to enroll. + - + Via mobile device + - + Voice automated: call the number on the back of your card + - + For name, company name, and foreign address or phone changes, please call Customer Care + + Please do not add any written communication or address change on this stub + For information on how we protect your + privacy and to set your communication + and privacy choices, please visit + www.americanexpress.com/privacy. <<< -AMERICAN Blue Cash® from American Express p. 3/7 - EXPRESS - JOSEPH PAULSON +AMERICAN Blue Cash® from American Express p. 3/7 + EXPRESS + JOSEPH PAULSON - Closing Date 09/27/23 Account Ending 7-73045 + Closing Date 09/27/23 Account Ending 7-73045 - Customer Care & Billing Inquiries 1-888-258-3741 - C International Collect 1-336-393-1111 =] Website: americanexpress.com - Cash Advance at ATMs Inquiries 1-800-CASH-NOW - Large Print & Braille Statements 1-888-258-3741 Customer Care Payments - & Billing Inquiries PO BOX 6031 - P.O. BOX 981535 CAROL STREAM IL - EL PASO, TX 60197-6031 - 79998-1535 - Hearing Impaired - Online chat at americanexpress.com or use Relay dial 711 and 1-888-258-3741 + Customer Care & Billing Inquiries 1-888-258-3741 + C International Collect 1-336-393-1111 + =] Website: americanexpress.com + Cash Advance at ATMs Inquiries 1-800-CASH-NOW + Customer Care Payments + Large Print & Braille Statements 1-888-258-3741 + & Billing Inquiries PO BOX 6031 + P.O. BOX 981535 CAROL STREAM IL + EL PASO, TX 60197-6031 + 79998-1535 + Hearing Impaired + Online chat at americanexpress.com or use Relay dial 711 and 1-888-258-3741 - American Express® High Yield Savings Account - No monthly fees. No minimum opening monthly deposit. 24/7 customer + American Express® High Yield Savings Account + No monthly fees. No minimum opening monthly deposit. 24/7 customer - support. FDIC insured. Meet your savings goals faster with an American + support. FDIC insured. Meet your savings goals faster with an American - Express High Yield Savings Account. Terms apply. Learn more by visiting + Express High Yield Savings Account. Terms apply. Learn more by visiting - americanexpress.com/savenow. + americanexpress.com/savenow. - Total + Total - Payments -$6,583.67 + Payments -$6,583.67 - Credits $0.00 + Credits $0.00 - Total Payments and Credits -$6,583.67 + Total Payments and Credits -$6,583.67 - Payments Amount + Payments Amount - 09/22/23* MOBILE PAYMENT - THANK YOU -$6,583.67 + 09/22/23* MOBILE PAYMENT - THANK YOU -$6,583.67 - Total + Total - Total New Charges $10,269.65 + Total New Charges $10,269.65 - JOSEPH PAULSON - an Card Ending 7-73045 + JOSEPH PAULSON + an Card Ending 7-73045 - Amount + Amount - 08/30/23 SAFEWAY CUPERTINO CA $23.11 - 800-898-4027 + 08/30/23 SAFEWAY CUPERTINO CA $23.11 + 800-898-4027 - 09/01/23 BANANA LEAF 650000012619980 MILPITAS CA $144.16 - 4087199811 + 09/01/23 BANANA LEAF 650000012619980 MILPITAS CA $144.16 + 4087199811 - 09/01/23 BT*LINODE*AKAMAI CAMBRIDGE MA $6,107.06 - 6093807100 + 09/01/23 BT*LINODE*AKAMAI CAMBRIDGE MA $6,107.06 + 6093807100 - 09/01/23 GOOGLE*GSUITE_SOCIALANIMAL.IO MOUNTAIN VIEW CA $20.44 - ADVERTISING SERVICE + 09/01/23 GOOGLE*GSUITE_SOCIALANIMAL.IO MOUNTAIN VIEW CA $20.44 + ADVERTISING SERVICE - 09/02/23 Amazon Web Services AWS.Amazon.com WA $333.88 - WEB SERVICES + 09/02/23 Amazon Web Services AWS.Amazon.com WA $333.88 + WEB SERVICES - 09/03/23 SAFEWAY CUPERTINO CA $11.18 - 800-898-4027 + 09/03/23 SAFEWAY CUPERTINO CA $11.18 + 800-898-4027 - 09/09/23 TST* BIKANER SWEET 00053687 SUNNYVALE CA $21.81 - RESTAURANT + 09/09/23 TST* BIKANER SWEET 00053687 SUNNYVALE CA $21.81 + RESTAURANT - Continued on reverse + Continued on reverse <<< - JOSEPH PAULSON Account Ending 7-73045 p.4/7 + JOSEPH PAULSON + Account Ending 7-73045 p.4/7 - Amount + Amount -09/10/23 CVS PHARMACY CUPERTINO CA $2.34 - 8007467287 +09/10/23 CVS PHARMACY CUPERTINO CA $2.34 + 8007467287 -09/13/23 APPLE.COM/BILL INTERNET CHARGE CA $2.99 - RECORD STORE +09/13/23 APPLE.COM/BILL INTERNET CHARGE CA $2.99 + RECORD STORE -09/13/23 SAFEWAY CUPERTINO CA $26.73 - 800-898-4027 +09/13/23 SAFEWAY CUPERTINO CA $26.73 + 800-898-4027 -09/14/23 MCDONALD'S CUPERTINO CA $3.26 - 6509404200 +09/14/23 MCDONALD'S CUPERTINO CA $3.26 + 6509404200 -09/14/23 PANERA BREAD #204476 CAMPBELL CA $23.38 +09/14/23 PANERA BREAD #204476 CAMPBELL CA $23.38 - 975313007 95008 + 975313007 95008 -09/14/23 MANLEY DONUTS 00-08040662747 CUPERTINO CA $21.15 - BAKERY +09/14/23 MANLEY DONUTS 00-08040662747 CUPERTINO CA $21.15 + BAKERY -09/15/23 Ap|Pay 6631309 - PEETS B TMP 53033 OKALAND CA $4.27 - RESTAURANT +09/15/23 Ap|Pay 6631309 - PEETS B TMP 53033 OKALAND CA $4.27 + RESTAURANT -09/16/23 VEGAS.COM LAS VEGAS NV $761.58 - 18669983427 +09/16/23 VEGAS.COM LAS VEGAS NV $761.58 + 18669983427 -09/16/23 Ap|Pay PANDA EXPRESS LAS VEGAS NV $12.08 - FAST FOOD RESTAURANT +09/16/23 Ap|Pay PANDA EXPRESS LAS VEGAS NV $12.08 + FAST FOOD RESTAURANT -09/17/23 Ap|IPay LUX_STARBUCKS_ATRIUM LAS VEGAS NV $23.68 - 11980066 89109 - RESTAURANT +09/17/23 Ap|IPay LUX_STARBUCKS_ATRIUM LAS VEGAS NV $23.68 + 11980066 89109 + RESTAURANT -09/18/23 SPK*SPOKEO ENTPRS 888-858-0803 CA $119.95 +09/18/23 SPK*SPOKEO ENTPRS 888-858-0803 CA $119.95 - 888-858-0803 + 888-858-0803 -09/24/23 SIXT USA POS FORT LAUDERDALE FL $2,537.90 - AUTOMOBILE RENTAL - Sixt9497938611 - 30826E5JF4ZIIBIHSB +09/24/23 SIXT USA POS FORT LAUDERDALE FL $2,537.90 + AUTOMOBILE RENTAL + Sixt9497938611 + 30826E5JF4ZIIBIHSB -09/24/23 LUCKY #773.SANTA CLARACA 0000000009925 SANTA CLARA CA $35.17 - 4082475200 +09/24/23 LUCKY #773.SANTA CLARACA 0000000009925 SANTA CLARA CA $35.17 + 4082475200 -09/24/23 MILAN SWEET CENTER 0000 MILPITAS CA $27.03 - 408-946-2525 +09/24/23 MILAN SWEET CENTER 0000 MILPITAS CA $27.03 + 408-946-2525 -09/25/23 ApIPay MANLEY DONUTS 00-08040662747 CUPERTINO CA $6.50 +09/25/23 ApIPay MANLEY DONUTS 00-08040662747 CUPERTINO CA $6.50 - BAKERY + BAKERY - Amount + Amount -Total Fees for this Period $0.00 +Total Fees for this Period $0.00 - Amount + Amount -Total Interest Charged for this Period $0.00 +Total Interest Charged for this Period $0.00 -About Trailing Interest -You may see interest on your next statement even if you pay the new balance in full and on time and make no new charges. This is called -"trailing interest". Trailing interest is the interest charged when, for example, you didn't pay your previous balance in full. When that -happens, we charge interest from the first day of the billing period until we receive your payment in full. You can avoid paying interest -on purchases by paying your balance in full (or if you have a Plan balance, by paying your Adjusted Balance on your billing statement) by -the due date each month. Please see the "When we charge interest" sub-section in your Cardmember Agreement for details. +About Trailing Interest +You may see interest on your next statement even if you pay the new balance in full and on time and make no new charges. This is called +"trailing interest". Trailing interest is the interest charged when, for example, you didn't pay your previous balance in full. When that +happens, we charge interest from the first day of the billing period until we receive your payment in full. You can avoid paying interest +on purchases by paying your balance in full (or if you have a Plan balance, by paying your Adjusted Balance on your billing statement) by +the due date each month. Please see the "When we charge interest" sub-section in your Cardmember Agreement for details. - Continued on next page + Continued on next page <<< -AMERICAN Blue Cash® from American Express p.5/7 - EXPRESS - JOSEPH PAULSON +AMERICAN Blue Cash® from American Express p.5/7 + EXPRESS + JOSEPH PAULSON - Closing Date 09/27/23 Account Ending 7-73045 + Closing Date 09/27/23 Account Ending 7-73045 - Amount + Amount - Total Fees in 2023 $0.00 + Total Fees in 2023 $0.00 - Total Interest in 2023 $0.00 + Total Interest in 2023 $0.00 - Your Annual Percentage Rate (APR) is the annual interest rate on your account. - Variable APRs will not exceed 29.99%. - Transactions Dated Annual Balance Interest - Percentage Subject to Charge - From To Rate Interest Rate + Your Annual Percentage Rate (APR) is the annual interest rate on your account. + Variable APRs will not exceed 29.99%. + Transactions Dated Annual Balance Interest + Percentage Subject to Charge + From To Rate Interest Rate - Purchases 02/26/2011 24.49% (v) $0.00 $0.00 + Purchases 02/26/2011 24.49% (v) $0.00 $0.00 - Cash Advances 02/26/2011 29.99% (v) $0.00 $0.00 + Cash Advances 02/26/2011 29.99% (v) $0.00 $0.00 - Total $0.00 + Total $0.00 - (v) Variable Rate + (v) Variable Rate <<< -JOSEPH PAULSON Account Ending 7-73045 p. 6/7 +JOSEPH PAULSON + Account Ending 7-73045 p. 6/7 <<< -AMERICAN 7/7 - EXPRESS JOSEPH PAULSON Closing Date 09/27/23 Account Ending 7-73045 - - EFT Error Resolution Notice - In Case of Errors or Questions About Your Electronic Transfers Telephone us at 1-800-IPAY-AXP for Pay By - Phone questions, at 1-800-528-2122 for Pay By Computer questions, and at 1-800-528-4800 for AutoPay. You - may also write us at American Express, Electronic Funds Services, P.O. Box 981531, El Paso TX 79998-1531, or - contact online at www.americanexpress.com/inquirycenter as soon as you can, if you think your statement or - receipt is wrong or if you need more information about a transfer on the statement or receipt. We must hear from - you no later than 60 days after we sent you the FIRST statement on which the error or problem appeared. - 1. Tell us your name and account number (if any). - 2. Describe the error or the transfer you are unsure about, and explain as clearly as you can why you - believe it is an error or why you need more information. - 3. Tell us the dollar amount of the suspected error. - We will investigate your complaint and will correct any error promptly. If we take more than 10 business days to - do this, we will credit your account for the amount you think is in error, so that you will have the use of the money - during the time it takes us to complete our investigation. - - End of Important Notices. +AMERICAN 7/7 + EXPRESS JOSEPH PAULSON Closing Date 09/27/23 Account Ending 7-73045 + + EFT Error Resolution Notice + In Case of Errors or Questions About Your Electronic Transfers Telephone us at 1-800-IPAY-AXP for Pay By + Phone questions, at 1-800-528-2122 for Pay By Computer questions, and at 1-800-528-4800 for AutoPay. You + may also write us at American Express, Electronic Funds Services, P.O. Box 981531, El Paso TX 79998-1531, or + contact online at www.americanexpress.com/inquirycenter as soon as you can, if you think your statement or + receipt is wrong or if you need more information about a transfer on the statement or receipt. We must hear from + you no later than 60 days after we sent you the FIRST statement on which the error or problem appeared. + 1. Tell us your name and account number (if any). + 2. Describe the error or the transfer you are unsure about, and explain as clearly as you can why you + believe it is an error or why you need more information. + 3. Tell us the dollar amount of the suspected error. + We will investigate your complaint and will correct any error promptly. If we take more than 10 business days to + do this, we will credit your account for the amount you think is in error, so that you will have the use of the money + during the time it takes us to complete our investigation. + + End of Important Notices. <<< \ No newline at end of file From 23076d61011e9873424fbe222ff5454a9808da90 Mon Sep 17 00:00:00 2001 From: nageshwaran Date: Mon, 28 Oct 2024 22:25:08 +0530 Subject: [PATCH 2/3] added common function for extracted text assertion --- tests/integration/client_v2_test.py | 48 ++++++++++------------------- 1 file changed, 17 insertions(+), 31 deletions(-) diff --git a/tests/integration/client_v2_test.py b/tests/integration/client_v2_test.py index dcb9bcd..4578147 100644 --- a/tests/integration/client_v2_test.py +++ b/tests/integration/client_v2_test.py @@ -49,26 +49,8 @@ def test_whisper_v2(client_v2, data_dir, output_mode, mode, input_file): exp_basename = f"{Path(input_file).stem}.{mode}.{output_mode}.txt" exp_file = os.path.join(data_dir, "expected", exp_basename) - with open(exp_file, encoding="utf-8") as f: - exp = f.read() - - assert isinstance(whisper_result, dict) - assert whisper_result["status_code"] == 200 - - # For text based processing, perform a strict match - if mode == "native_text" and output_mode == "text": - assert whisper_result["extraction"]["result_text"] == exp - # For OCR based processing, perform a fuzzy match - else: - extracted_text = whisper_result["extraction"]["result_text"] - similarity = SequenceMatcher(None, extracted_text, exp).ratio() - threshold = 0.97 - - if similarity < threshold: - diff = "\n".join( - unified_diff(exp.splitlines(), extracted_text.splitlines(), fromfile="Expected", tofile="Extracted") - ) - pytest.fail(f"Texts are not similar enough: {similarity * 100:.2f}% similarity. Diff:\n{diff}") + # verify extracted text + do_fuzzy_assertion_with_extracted_text(exp_file, whisper_result, mode, output_mode) @pytest.mark.parametrize( @@ -95,15 +77,24 @@ def test_whisper_v2_url_in_post(client_v2, data_dir, output_mode, mode, url, inp exp_basename = f"{Path(input_file).stem}.{mode}.{output_mode}.txt" exp_file = os.path.join(data_dir, "expected", exp_basename) - with open(exp_file, encoding="utf-8") as f: + # verify extracted text + do_fuzzy_assertion_with_extracted_text(exp_file, whisper_result, mode, output_mode) + usage_after = client_v2.get_usage_info() + # Verify usage after extraction + verify_usage(usage_before, usage_after, page_count, mode) + + +def do_fuzzy_assertion_with_extracted_text(file_path, whisper_result, mode=None, output_mode=None): + with open(file_path, encoding="utf-8") as f: exp = f.read() assert isinstance(whisper_result, dict) assert whisper_result["status_code"] == 200 - # For text based processing, perform a strict match - if mode == "native_text" and output_mode == "text": - assert whisper_result["extraction"]["result_text"] == exp + if mode and output_mode: + # For text based processing, perform a strict match + if mode == "native_text" and output_mode == "text": + assert whisper_result["extraction"]["result_text"] == exp # For OCR based processing, perform a fuzzy match else: extracted_text = whisper_result["extraction"]["result_text"] @@ -116,19 +107,14 @@ def test_whisper_v2_url_in_post(client_v2, data_dir, output_mode, mode, url, inp ) pytest.fail(f"Texts are not similar enough: {similarity * 100:.2f}% similarity. Diff:\n{diff}") - usage_after = client_v2.get_usage_info() - # Verify usage after extraction - verify_usage(usage_before, usage_after, page_count, mode) - def verify_usage(before_extract, after_extract, page_count, mode='form'): all_modes = ['form', 'high_quality', 'low_cost', 'native_text'] all_modes.remove(mode) assert (after_extract['today_page_count'] == before_extract['today_page_count'] + page_count), \ "today_page_count calculation is wrong" - if after_extract['current_page_count'] != -1: - assert (after_extract['current_page_count'] == before_extract['current_page_count'] + page_count), \ - "current_page_count calculation is wrong" + assert (after_extract['current_page_count'] == before_extract['current_page_count'] + page_count), \ + "current_page_count calculation is wrong" if after_extract['overage_page_count'] > 0: assert (after_extract['overage_page_count'] == before_extract['overage_page_count'] + page_count), \ "overage_page_count calculation is wrong" From 4df2f52b57618d07936844c6922b3ddfab502ddb Mon Sep 17 00:00:00 2001 From: nageshwaran Date: Tue, 29 Oct 2024 13:06:24 +0530 Subject: [PATCH 3/3] renamed assert function --- tests/integration/client_v2_test.py | 41 +++++++++++++---------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/tests/integration/client_v2_test.py b/tests/integration/client_v2_test.py index 4578147..b42fbb1 100644 --- a/tests/integration/client_v2_test.py +++ b/tests/integration/client_v2_test.py @@ -50,7 +50,7 @@ def test_whisper_v2(client_v2, data_dir, output_mode, mode, input_file): exp_basename = f"{Path(input_file).stem}.{mode}.{output_mode}.txt" exp_file = os.path.join(data_dir, "expected", exp_basename) # verify extracted text - do_fuzzy_assertion_with_extracted_text(exp_file, whisper_result, mode, output_mode) + assert_extracted_text(exp_file, whisper_result, mode, output_mode) @pytest.mark.parametrize( @@ -60,10 +60,8 @@ def test_whisper_v2(client_v2, data_dir, output_mode, mode, input_file): "credit_card.pdf", 7), ("layout_preserving", "low_cost", "https://unstractpocstorage.blob.core.windows.net/public/Amex.pdf", "credit_card.pdf", 7), - ( - "layout_preserving", "high_quality", - "https://unstractpocstorage.blob.core.windows.net/public/scanned_bill.pdf", - "restaurant_invoice_photo.pdf", 1), + ("layout_preserving", "high_quality", "https://unstractpocstorage.blob.core.windows.net/public/scanned_bill.pdf", + "restaurant_invoice_photo.pdf", 1), ("layout_preserving", "form", "https://unstractpocstorage.blob.core.windows.net/public/scanned_form.pdf", "handwritten-form.pdf", 1), ] @@ -78,34 +76,33 @@ def test_whisper_v2_url_in_post(client_v2, data_dir, output_mode, mode, url, inp exp_basename = f"{Path(input_file).stem}.{mode}.{output_mode}.txt" exp_file = os.path.join(data_dir, "expected", exp_basename) # verify extracted text - do_fuzzy_assertion_with_extracted_text(exp_file, whisper_result, mode, output_mode) + assert_extracted_text(exp_file, whisper_result, mode, output_mode) usage_after = client_v2.get_usage_info() # Verify usage after extraction verify_usage(usage_before, usage_after, page_count, mode) -def do_fuzzy_assertion_with_extracted_text(file_path, whisper_result, mode=None, output_mode=None): +def assert_extracted_text(file_path, whisper_result, mode, output_mode): with open(file_path, encoding="utf-8") as f: exp = f.read() assert isinstance(whisper_result, dict) assert whisper_result["status_code"] == 200 - if mode and output_mode: - # For text based processing, perform a strict match - if mode == "native_text" and output_mode == "text": - assert whisper_result["extraction"]["result_text"] == exp - # For OCR based processing, perform a fuzzy match - else: - extracted_text = whisper_result["extraction"]["result_text"] - similarity = SequenceMatcher(None, extracted_text, exp).ratio() - threshold = 0.97 - - if similarity < threshold: - diff = "\n".join( - unified_diff(exp.splitlines(), extracted_text.splitlines(), fromfile="Expected", tofile="Extracted") - ) - pytest.fail(f"Texts are not similar enough: {similarity * 100:.2f}% similarity. Diff:\n{diff}") + # For OCR based processing + threshold = 0.97 + + # For text based processing + if mode == "native_text" and output_mode == "text": + threshold = 0.99 + extracted_text = whisper_result["extraction"]["result_text"] + similarity = SequenceMatcher(None, extracted_text, exp).ratio() + + if similarity < threshold: + diff = "\n".join( + unified_diff(exp.splitlines(), extracted_text.splitlines(), fromfile="Expected", tofile="Extracted") + ) + pytest.fail(f"Texts are not similar enough: {similarity * 100:.2f}% similarity. Diff:\n{diff}") def verify_usage(before_extract, after_extract, page_count, mode='form'):