diff --git a/domain-cc/cc-app/src/python_src/api.py b/domain-cc/cc-app/src/python_src/api.py index 74dec7e6b..5eed7c1e3 100644 --- a/domain-cc/cc-app/src/python_src/api.py +++ b/domain-cc/cc-app/src/python_src/api.py @@ -82,6 +82,30 @@ def log_as_json(log: dict): logging.info(json.dumps(log)) +def log_expanded_contention_text( + logging_dict: dict, contention_text: str, log_contention_text: str +): + """ + Updates the logging dictionary with the contention text updates from the expanded classification method + """ + processed_text = expanded_lookup_table.prep_incoming_text(contention_text) + # only log these items if the expanded lookup returns a classification code + if expanded_lookup_table.get(contention_text)["classification_code"]: + if log_contention_text == "unmapped contention text": + log_contention_text = f"unmapped contention text {[processed_text]}" + logging_dict.update( + { + "processed_contention_text": processed_text, + "contention_text": log_contention_text, + } + ) + # log none as the processed text if it is not in the LUT and leave unmapped contention text as is + else: + logging_dict.update({"processed_contention_text": None}) + + return logging_dict + + def log_contention_stats( contention: Contention, classified_contention: ClassifiedContention, @@ -108,20 +132,27 @@ def log_contention_stats( is_multi_contention = len(claim.contentions) > 1 - log_as_json( - { - "vagov_claim_id": sanitize_log(claim.claim_id), - "claim_type": sanitize_log(log_contention_type), - "classification_code": classification_code, - "classification_name": classification_name, - "contention_text": log_contention_text, - "diagnostic_code": sanitize_log(contention.diagnostic_code), - "is_in_dropdown": is_in_dropdown, - "is_lookup_table_match": classification_code is not None, - "is_multi_contention": is_multi_contention, - "endpoint": request.url.path, - } - ) + logging_dict = { + "vagov_claim_id": sanitize_log(claim.claim_id), + "claim_type": sanitize_log(log_contention_type), + "classification_code": classification_code, + "classification_name": classification_name, + "contention_text": log_contention_text, + "diagnostic_code": sanitize_log(contention.diagnostic_code), + "is_in_dropdown": is_in_dropdown, + "is_lookup_table_match": classification_code is not None, + "is_multi_contention": is_multi_contention, + "endpoint": request.url.path, + } + + if request.url.path == "/expanded-contention-classification": + logging_dict = log_expanded_contention_text( + logging_dict, contention.contention_text, log_contention_text + ) + + # log_as_json(logging_dict) + # else: + log_as_json(logging_dict) def log_claim_stats_v2( diff --git a/domain-cc/cc-app/src/python_src/util/expanded_lookup_table.py b/domain-cc/cc-app/src/python_src/util/expanded_lookup_table.py index 782e6ef25..679c74c2f 100644 --- a/domain-cc/cc-app/src/python_src/util/expanded_lookup_table.py +++ b/domain-cc/cc-app/src/python_src/util/expanded_lookup_table.py @@ -123,6 +123,19 @@ def _build_lut(self): return classification_code_mappings + def prep_incoming_text(self, input_str: str): + """ + Prepares the incoming text for lookup by removing common words and punctuation + """ + input_str = input_str.strip().lower() + + for term in ["due to", "secondary to", "because of"]: + if term in input_str: + input_str = input_str.split(term)[0] + input_str = self._removal_pipeline(input_str) + + return input_str + def get(self, input_str: str, default_value=LUT_DEFAULT_VALUE): """ Processes input string using same method as the LUT and performs the lookup @@ -132,19 +145,14 @@ def get(self, input_str: str, default_value=LUT_DEFAULT_VALUE): This also process the parenthetical terms in the mappings """ - # There is only one case of using due to in the mappings (need to figure out a better way than hard coding it) - input_str = input_str.strip().lower() if input_str == "loss of teeth due to bone loss": return { "classification_code": 8967, "classification_name": "Dental and Oral", } - for term in ["due to", "secondary to", "because of"]: - if term in input_str: - input_str = input_str.split(term)[0] + input_str = self.prep_incoming_text(input_str) - input_str = self._removal_pipeline(input_str) input_str_lookup = frozenset(input_str.split()) classification = self.contention_text_lookup_table.get( input_str_lookup, default_value diff --git a/domain-cc/cc-app/tests/test_expanded_lookup.py b/domain-cc/cc-app/tests/test_expanded_lookup.py index 778906919..96cb45492 100644 --- a/domain-cc/cc-app/tests/test_expanded_lookup.py +++ b/domain-cc/cc-app/tests/test_expanded_lookup.py @@ -1,3 +1,5 @@ +from unittest.mock import patch + from fastapi.testclient import TestClient from src.python_src.util.expanded_lookup_config import COMMON_WORDS, FILE_READ_HELPER from src.python_src.util.expanded_lookup_table import ExpandedLookupTable @@ -118,6 +120,24 @@ def test_removed_parentheses(): assert TEST_LUT.get(test_str) == expected +@patch( + "src.python_src.util.expanded_lookup_table.ExpandedLookupTable._removal_pipeline" +) +def test_prep_incoming_text_cause(mock_removal_pipeline): + test_str = "acl tear, due to something" + TEST_LUT.prep_incoming_text(test_str) + mock_removal_pipeline.assert_called_once_with("acl tear, ") + + +@patch( + "src.python_src.util.expanded_lookup_table.ExpandedLookupTable._removal_pipeline" +) +def test_prep_incoming_text_non_cause(mock_removal_pipeline): + test_str = "acl tear in my right knee" + TEST_LUT.prep_incoming_text(test_str) + mock_removal_pipeline.assert_called_once_with(test_str) + + def test_lookup_cause_included(): test_str = [ "migraines (headaches), due to something", diff --git a/domain-cc/cc-app/tests/test_logging.py b/domain-cc/cc-app/tests/test_logging.py new file mode 100644 index 000000000..cfd724729 --- /dev/null +++ b/domain-cc/cc-app/tests/test_logging.py @@ -0,0 +1,445 @@ +""" +Tests for the logging functions in the API. This mocks the log_as_json function and tests that the logging is called +with the correct dict for different situations. The primary purpose is to test logging contention text to ensure that +there is no PII in the logs. +""" + +from unittest.mock import patch + +from fastapi import Request +from src.python_src.api import log_claim_stats_v2, log_contention_stats +from src.python_src.pydantic_models import ( + ClassifiedContention, + ClassifierResponse, + Contention, + VaGovClaim, +) +from starlette.datastructures import Headers + +test_expanded_request = Request( + scope={ + "type": "http", + "method": "POST", + "path": "/expanded-contention-classification", + "headers": Headers(), + } +) + +test_current_classifier_request = Request( + scope={ + "type": "http", + "method": "POST", + "path": "/va-gov-claim-classifier", + "headers": Headers(), + } +) + + +@patch("src.python_src.api.log_as_json") +def test_log_contention_stats_expanded(mocked_func): + """ + Tests the logging of a contention that is classified but considered free text + """ + test_contention = Contention( + contention_text="acl tear right", + contention_type="NEW", + ) + test_claim = VaGovClaim( + claim_id=100, form526_submission_id=500, contentions=[test_contention] + ) + classified_contention = ClassifiedContention( + classification_code=8997, + classification_name="Musculoskeletal - Knee", + diagnostic_code=None, + contention_type="NEW", + ) + log_contention_stats( + test_contention, classified_contention, test_claim, test_expanded_request + ) + + expected_logging_dict = { + "vagov_claim_id": test_claim.claim_id, + "claim_type": "new", + "classification_code": classified_contention.classification_code, + "classification_name": classified_contention.classification_name, + "contention_text": "unmapped contention text ['acl tear']", + "diagnostic_code": "None", + "is_in_dropdown": False, + "is_lookup_table_match": True, + "is_multi_contention": False, + "endpoint": "/expanded-contention-classification", + "processed_contention_text": "acl tear", + } + + mocked_func.assert_called_once_with(expected_logging_dict) + + +@patch("src.python_src.api.log_as_json") +def test_non_classified_contentions(mocked_func): + """ + Tests the logging of a contention that is not classified + """ + test_contention = Contention( + contention_text="John has an acl tear right", + contention_type="NEW", + ) + test_claim = VaGovClaim( + claim_id=100, form526_submission_id=500, contentions=[test_contention] + ) + classified_contention = ClassifiedContention( + classification_code=None, + classification_name=None, + diagnostic_code=None, + contention_type="NEW", + ) + log_contention_stats( + test_contention, classified_contention, test_claim, test_expanded_request + ) + + expected_log = { + "vagov_claim_id": test_claim.claim_id, + "claim_type": "new", + "classification_code": classified_contention.classification_code, + "classification_name": classified_contention.classification_name, + "contention_text": "unmapped contention text", + "diagnostic_code": "None", + "is_in_dropdown": False, + "is_lookup_table_match": False, + "is_multi_contention": False, + "endpoint": "/expanded-contention-classification", + "processed_contention_text": None, + } + mocked_func.assert_called_once_with(expected_log) + + +@patch("src.python_src.api.log_as_json") +def test_multiple_contentions(mocked_func): + """ + Tests multiple contentions one from autosuggestion and one that would be considered free text + """ + test_contentions = [ + Contention( + contention_text="tinnitus (ringing or hissing in ears)", + contention_type="NEW", + ), + Contention( + contention_text="anxiety condition due to somethiing that happened", + contention_type="NEW", + ), + ] + test_claim = VaGovClaim( + claim_id=100, form526_submission_id=500, contentions=test_contentions + ) + classified_contentions = [ + ClassifiedContention( + classification_code=3140, + classification_name="Hearing Loss", + diagnostic_code=None, + contention_type="NEW", + ), + ClassifiedContention( + classification_code=8989, + classification_name="Mental Disorders", + diagnostic_code=None, + contention_type="NEW", + ), + ] + + expected_logs = [ + { + "vagov_claim_id": test_claim.claim_id, + "claim_type": "new", + "classification_code": classified_contentions[0].classification_code, + "classification_name": classified_contentions[0].classification_name, + "contention_text": "tinnitus (ringing or hissing in ears)", + "diagnostic_code": "None", + "is_in_dropdown": True, + "is_lookup_table_match": True, + "is_multi_contention": True, + "endpoint": "/expanded-contention-classification", + "processed_contention_text": "tinnitus ringing hissing ears", + }, + { + "vagov_claim_id": test_claim.claim_id, + "claim_type": "new", + "classification_code": classified_contentions[1].classification_code, + "classification_name": classified_contentions[1].classification_name, + "contention_text": "unmapped contention text ['anxiety']", + "diagnostic_code": "None", + "is_in_dropdown": False, + "is_lookup_table_match": True, + "is_multi_contention": True, + "endpoint": "/expanded-contention-classification", + "processed_contention_text": "anxiety", + }, + ] + + for i in range(len(test_contentions)): + log_contention_stats( + test_contentions[i], + classified_contentions[i], + test_claim, + test_expanded_request, + ) + mocked_func.assert_called_with(expected_logs[i]) + + assert mocked_func.call_count == 2 + + +@patch("src.python_src.api.log_as_json") +def test_contentions_with_pii(mocked_func): + """ + Tests that the logging will not log unless completely classified and no PII slips through + """ + test_contentions = [ + Contention( + contention_text="dependent claim for child, 111-11-1111", + contention_type="NEW", + ), + Contention( + contention_text="John Doe(111-11-1111), right acl tear", + contention_type="NEW", + ), + ] + test_claim = VaGovClaim( + claim_id=100, form526_submission_id=500, contentions=test_contentions + ) + classified_contentions = [ + ClassifiedContention( + classification_code=None, + classification_name=None, + diagnostic_code=None, + contention_type="NEW", + ), + ClassifiedContention( + classification_code=None, + classification_name="None", + diagnostic_code=None, + contention_type="NEW", + ), + ] + expected_logs = [ + { + "vagov_claim_id": test_claim.claim_id, + "claim_type": "new", + "classification_code": classified_contentions[0].classification_code, + "classification_name": classified_contentions[0].classification_name, + "contention_text": "unmapped contention text", + "diagnostic_code": "None", + "is_in_dropdown": False, + "is_lookup_table_match": False, + "is_multi_contention": True, + "endpoint": "/expanded-contention-classification", + "processed_contention_text": None, + }, + { + "vagov_claim_id": test_claim.claim_id, + "claim_type": "new", + "classification_code": classified_contentions[1].classification_code, + "classification_name": classified_contentions[1].classification_name, + "contention_text": "unmapped contention text", + "diagnostic_code": "None", + "is_in_dropdown": False, + "is_lookup_table_match": False, + "is_multi_contention": True, + "endpoint": "/expanded-contention-classification", + "processed_contention_text": None, + }, + ] + + for i in range(len(test_contentions)): + log_contention_stats( + test_contentions[i], + classified_contentions[i], + test_claim, + test_expanded_request, + ) + mocked_func.assert_called_with(expected_logs[i]) + + assert mocked_func.call_count == 2 + + +@patch("src.python_src.api.log_as_json") +def test_log_claim_stats(mocked_func): + test_claim = VaGovClaim( + claim_id=100, + form526_submission_id=500, + contentions=[ + Contention( + contention_text="acl tear right", + contention_type="NEW", + ), + Contention( + contention_text="dependent claim, john doe 222-22-2222", + contention_type="NEW", + ), + ], + ) + classifier_response = ClassifierResponse( + contentions=[ + ClassifiedContention( + classification_code=8997, + classification_name="Muscloskeletal - Knee", + diagnostic_code=None, + contention_type="NEW", + ), + ClassifiedContention( + classification_code=None, + classification_name=None, + diagnostic_code=None, + contention_type="NEW", + ), + ], + claim_id=test_claim.claim_id, + form526_submission_id=test_claim.form526_submission_id, + is_fully_classified=False, + num_processed_contentions=2, + num_classified_contentions=1, + ) + + expected_log = { + "claim_id": 100, + "form526_submission_id": 500, + "is_fully_classified": False, + "percent_clasified": 50.0, + "num_processed_contentions": 2, + "num_classified_contentions": 1, + "endpoint": "/expanded-contention-classification", + } + log_claim_stats_v2(test_claim, classifier_response, test_expanded_request) + mocked_func.assert_called_once_with(expected_log) + + +@patch("src.python_src.api.log_as_json") +def test_current_classifier_contention(mocked_func): + """ + Tests the logging of the current contention logs + """ + test_contention = Contention( + contention_text="acl tear right", + contention_type="NEW", + ) + test_claim = VaGovClaim( + claim_id=100, form526_submission_id=500, contentions=[test_contention] + ) + classified_contention = ClassifiedContention( + classification_code=None, + classification_name=None, + diagnostic_code=None, + contention_type="NEW", + ) + log_contention_stats( + test_contention, + classified_contention, + test_claim, + test_current_classifier_request, + ) + + expected_logging_dict = { + "vagov_claim_id": test_claim.claim_id, + "claim_type": "new", + "classification_code": classified_contention.classification_code, + "classification_name": classified_contention.classification_name, + "contention_text": "unmapped contention text", + "diagnostic_code": "None", + "is_in_dropdown": False, + "is_lookup_table_match": False, + "is_multi_contention": False, + "endpoint": "/va-gov-claim-classifier", + } + + mocked_func.assert_called_once_with(expected_logging_dict) + + +@patch("src.python_src.api.log_as_json") +def test_full_logging_expanded_endpoint(mocked_func): + """ + Tests full logging including individual contentions and one claim + """ + test_contentions = [ + Contention( + contention_text="john doe 111-11-1111 acl tear right", + contention_type="NEW", + ), + Contention( + contention_text="anxiety condition due to somethiing that happened", + contention_type="NEW", + ), + ] + test_claim = VaGovClaim( + claim_id=100, form526_submission_id=500, contentions=test_contentions + ) + classified_contentions = [ + ClassifiedContention( + classification_code=None, + classification_name=None, + diagnostic_code=None, + contention_type="NEW", + ), + ClassifiedContention( + classification_code=8989, + classification_name="Mental Disorders", + diagnostic_code=None, + contention_type="NEW", + ), + ] + + expected_contention_logs = [ + { + "vagov_claim_id": test_claim.claim_id, + "claim_type": "new", + "classification_code": classified_contentions[0].classification_code, + "classification_name": classified_contentions[0].classification_name, + "contention_text": "unmapped contention text", + "diagnostic_code": "None", + "is_in_dropdown": False, + "is_lookup_table_match": False, + "is_multi_contention": True, + "endpoint": "/expanded-contention-classification", + "processed_contention_text": None, + }, + { + "vagov_claim_id": test_claim.claim_id, + "claim_type": "new", + "classification_code": classified_contentions[1].classification_code, + "classification_name": classified_contentions[1].classification_name, + "contention_text": "unmapped contention text ['anxiety']", + "diagnostic_code": "None", + "is_in_dropdown": False, + "is_lookup_table_match": True, + "is_multi_contention": True, + "endpoint": "/expanded-contention-classification", + "processed_contention_text": "anxiety", + }, + ] + + classifier_response = ClassifierResponse( + contentions=classified_contentions, + claim_id=test_claim.claim_id, + form526_submission_id=test_claim.form526_submission_id, + is_fully_classified=False, + num_processed_contentions=2, + num_classified_contentions=1, + ) + expected_claim_log = { + "claim_id": 100, + "form526_submission_id": 500, + "is_fully_classified": False, + "percent_clasified": 50.0, + "num_processed_contentions": 2, + "num_classified_contentions": 1, + "endpoint": "/expanded-contention-classification", + } + + for i in range(len(test_contentions)): + log_contention_stats( + test_contentions[i], + classified_contentions[i], + test_claim, + test_expanded_request, + ) + mocked_func.assert_called_with(expected_contention_logs[i]) + + log_claim_stats_v2(test_claim, classifier_response, test_expanded_request) + mocked_func.assert_called_with(expected_claim_log) + assert mocked_func.call_count == 3