From 90b8d5ce72dbd134c2b68ad55e33a92f5c7d59bb Mon Sep 17 00:00:00 2001 From: Ole Einar Christoph Date: Mon, 22 Aug 2022 08:00:15 +0200 Subject: [PATCH 1/3] Food is fetched different --- essensGetter.py | 66 ++++++++++++++++++++++++++------------ tests/Test_essensGetter.py | 10 +++--- tests/Test_formatting.py | 2 +- utils/formatting.py | 60 ++++++++++++++++++++++++++++++++-- utils/mail.py | 2 +- 5 files changed, 110 insertions(+), 30 deletions(-) diff --git a/essensGetter.py b/essensGetter.py index bcd6dda..d9b9081 100644 --- a/essensGetter.py +++ b/essensGetter.py @@ -1,8 +1,9 @@ from requests_html import HTMLSession from bs4 import BeautifulSoup as bs +from bs4.element import Tag, NavigableString import datetime import calendar -from utils.formatting import remove_HTML, format_food_price +from utils.formatting import remove_HTML, format_food_price, format_meals_from_list from utils.mail import send_Email import logging @@ -24,9 +25,32 @@ def fetch_prices(): # Fetches the category from the food -def fetch_food_category(): - data = soup.find_all(class_="title-prim") - return remove_HTML(data) +def fetch_food_as_lists(): + html_class_meals = soup.find_all(class_="meals")[0].__getattribute__("contents") + food_categorys = soup.find_all(class_="title-prim") + + list_of_categorys_index = list() + list_of_all_meals = list() + + for x in html_class_meals: + if isinstance(x,NavigableString): + html_class_meals.remove(x) + + for x in food_categorys: + list_of_categorys_index.append(html_class_meals.index(x)) + list_of_categorys_index.append(len(html_class_meals)) + + for x in food_categorys: + one_meal = list() + count = list_of_categorys_index[food_categorys.index(x)] + for y in html_class_meals[count:list_of_categorys_index[food_categorys.index(x) + 1]:1]: + one_meal.append(y) + + if html_class_meals.index(y) == list_of_categorys_index[food_categorys.index(x) + 1] -1: + list_of_all_meals.append(one_meal) + + return list_of_all_meals + # Fetches the names from the food @@ -69,22 +93,22 @@ def fetch_food(): # don't do anything on weekends -if calendar.day_name[datetime.date.today().weekday()] == "Saturday" \ - or calendar.day_name[datetime.date.today().weekday()] == "Sunday": - logging.info("Weekend -> no call on website and no other operations") - print("Weekend -> no call on website and no other operations") +#if calendar.day_name[datetime.date.today().weekday()] == "Saturday" \ +# or calendar.day_name[datetime.date.today().weekday()] == "Sunday": + # logging.info("Weekend -> no call on website and no other operations") +# print("Weekend -> no call on website and no other operations") +#else: +url = "https://www.studentenwerk-leipzig.de/mensen-cafeterien/speiseplan?location=140&date=2022-08-22&criteria=&meal_type=all" # URL +session = HTMLSession() # Initialize HTML Session +response = session.get(url) # call the URL + +if response.status_code != 200: # If response != 200 don't try to read the data + logging.critical("Response != 200 " + str(response.status_code)) else: - url = "https://www.studentenwerk-leipzig.de/mensen-cafeterien/speiseplan?location=140" # URL - session = HTMLSession() # Initialize HTML Session - response = session.get(url) # call the URL - - if response.status_code != 200: # If response != 200 don't try to read the data - logging.critical("Response != 200 " + str(response.status_code)) - else: - soup = bs(response.content, "html.parser") # html parser from BeautifulSoup - - # give_me_everything() # Important to know which property's you can extract - # convert the HTML List to usable data + soup = bs(response.content, "html.parser") # html parser from BeautifulSoup - foodprice = format_food_price(fetch_prices()) # call the function to convert the HTML Stuff to usable data - send_Email(food=fetch_food(), foodcategory=fetch_food_category(), foodprice=foodprice) + # give_me_everything() # Important to know which property's you can extract + # convert the HTML List to usable data + food = format_meals_from_list(fetch_food_as_lists()) + foodprice = format_food_price(fetch_prices()) # call the function to convert the HTML Stuff to usable data + send_Email(food=fetch_food(), foodcategory=fetch_food_as_lists(), foodprice=foodprice) diff --git a/tests/Test_essensGetter.py b/tests/Test_essensGetter.py index 200656f..464e5ca 100644 --- a/tests/Test_essensGetter.py +++ b/tests/Test_essensGetter.py @@ -16,16 +16,16 @@ def test_fetch_prices(self): if len(essensGetter.fetch_prices()) == 1: self.assertGreater(len(essensGetter.fetch_food()), 1) self.assertLess(len(essensGetter.fetch_food()), 3) - self.assertEqual(len(essensGetter.fetch_food_category()), 1) + self.assertEqual(len(essensGetter.fetch_food_as_lists()), 1) elif len(essensGetter.fetch_prices()) == 2: self.assertGreater(len(essensGetter.fetch_food()), 2) self.assertLess(len(essensGetter.fetch_food()), 5) - self.assertEqual(len(essensGetter.fetch_food_category()), 2) + self.assertEqual(len(essensGetter.fetch_food_as_lists()), 2) def test_fetch_food_category(self): - self.assertIsNot(essensGetter.fetch_food_category(), None) - self.assertIsNot(essensGetter.fetch_food_category(), []) - self.assertGreater(len(essensGetter.fetch_food_category()), 0) + self.assertIsNot(essensGetter.fetch_food_as_lists(), None) + self.assertIsNot(essensGetter.fetch_food_as_lists(), []) + self.assertGreater(len(essensGetter.fetch_food_as_lists()), 0) def test_fetch_food(self): self.assertIsNot(essensGetter.fetch_food(), None) diff --git a/tests/Test_formatting.py b/tests/Test_formatting.py index 52aa18e..3beb3c7 100644 --- a/tests/Test_formatting.py +++ b/tests/Test_formatting.py @@ -9,5 +9,5 @@ def test_format_price(self): def test_format_string(self): self.assertTrue(str(formatting.format_string(essensGetter.fetch_food())).isascii()) - self.assertTrue(str(formatting.format_string(essensGetter.fetch_food_category())).isascii()) + self.assertTrue(str(formatting.format_string(essensGetter.fetch_food_as_lists())).isascii()) self.assertTrue(str(formatting.format_string(essensGetter.fetch_prices())).isascii()) \ No newline at end of file diff --git a/utils/formatting.py b/utils/formatting.py index 68afe56..efff6b4 100644 --- a/utils/formatting.py +++ b/utils/formatting.py @@ -1,8 +1,10 @@ import logging +import traceback +from bs4.element import NavigableString logging.basicConfig(filename='essensGetter.log', level=logging.INFO, filemode='w', - format='%(asctime)s %(levelname)s - %(message)s', force=True, encoding='utf-8') + format='%(asctime)s %(levelname)s %(lineno)d - %(message)s', force=True, encoding='utf-8') # Removes the HTML from the data @@ -76,4 +78,58 @@ def format_food_price(data): data[x] = str(data[x]).replace(" ", "") logging.info("Prices after formatting: " + str(data)) print("Prices after formatting: " + str(data)) - return data + return str(data) + + +def format_meals_from_list(list_of_all_meals): + """Converts the list_of_all_meals into a dictionary with all special possibilities + + Takes the fetched Data from essensGetter.py and convert it into dictionaries. Because multiple meals are possible + each dictionary will be added to a list. + :param list_of_all_meals: + :return: + """ + + meals = list() + + for x in list_of_all_meals: + try: + if len(x) == 3: + meal = {"category": x[0].__getattribute__("contents")[0], "food": "", "beilagen": "", "price": "", + "additional_info": ""} + + try: + meal["additional_info"] = x[1].__getattribute__("contents")[0] + except AttributeError as e: + logging.error("No additional info available: " + str(e)) + print("No additional info available: " + str(e)) + meal["additional_info"] = "" + + + html_food_content = x[2].__getattribute__("contents") + for y in html_food_content: + if isinstance(y, NavigableString): + html_food_content.remove(y) + + food_list = list() + price_list = list() + for y in html_food_content: + food_list.append(y.find_all(class_="meals__name")[0].__getattribute__("contents")[0]) + price_list.append(format_food_price(y.find_all(class_="meals__price"))) + meal["food"] = food_list + meal["price"] = price_list + + beilagen_list = list() + for y in html_food_content: + if y.find_all(class_="u-list-bare").__getattrbitute__("contents")[0] is not None: + for z in y.find_all(class_="u-list-bare").__getattrbitute__("contents"): + if isinstance(z, NavigableString) is False: + beilagen_list.append(z[0]) + else: + logging.warning("No beilagen available") + print("No beilagen available") + meal["beilagen"] = beilagen_list + + except Exception as e: + logging.exception("Error in format_meals_from_list: " + str(e)) + print("Error in format_meals_from_list: " + str(e.__traceback__)) \ No newline at end of file diff --git a/utils/mail.py b/utils/mail.py index a397ee3..a4d106e 100644 --- a/utils/mail.py +++ b/utils/mail.py @@ -69,7 +69,7 @@ def send_Email(food, foodcategory, foodprice): for i in range(len(names)): message = 'Subject: {}\n\n{}'.format(SUBJECT.format(names[i]), content.format(names[i])) - smtpObj.sendmail(sender, emails[i], message) + #smtpObj.sendmail(sender, emails[i], message) smtpObj.quit() logging.info("Email sent successfully to: " + str(names)) From 01bafebc42ccb4f6a0c880774ed879f43b151a75 Mon Sep 17 00:00:00 2001 From: Ole Einar Christoph Date: Mon, 22 Aug 2022 13:31:33 +0200 Subject: [PATCH 2/3] fetching the food works fine --- essensGetter.py | 36 ++++++++++++------------- utils/formatting.py | 64 ++++++++++++++++++++++++++++++++++++--------- 2 files changed, 70 insertions(+), 30 deletions(-) diff --git a/essensGetter.py b/essensGetter.py index d9b9081..e6957fa 100644 --- a/essensGetter.py +++ b/essensGetter.py @@ -93,22 +93,22 @@ def fetch_food(): # don't do anything on weekends -#if calendar.day_name[datetime.date.today().weekday()] == "Saturday" \ -# or calendar.day_name[datetime.date.today().weekday()] == "Sunday": - # logging.info("Weekend -> no call on website and no other operations") -# print("Weekend -> no call on website and no other operations") -#else: -url = "https://www.studentenwerk-leipzig.de/mensen-cafeterien/speiseplan?location=140&date=2022-08-22&criteria=&meal_type=all" # URL -session = HTMLSession() # Initialize HTML Session -response = session.get(url) # call the URL - -if response.status_code != 200: # If response != 200 don't try to read the data - logging.critical("Response != 200 " + str(response.status_code)) +if calendar.day_name[datetime.date.today().weekday()] == "Saturday" \ + or calendar.day_name[datetime.date.today().weekday()] == "Sunday": + logging.info("Weekend -> no call on website and no other operations") + print("Weekend -> no call on website and no other operations") else: - soup = bs(response.content, "html.parser") # html parser from BeautifulSoup - - # give_me_everything() # Important to know which property's you can extract - # convert the HTML List to usable data - food = format_meals_from_list(fetch_food_as_lists()) - foodprice = format_food_price(fetch_prices()) # call the function to convert the HTML Stuff to usable data - send_Email(food=fetch_food(), foodcategory=fetch_food_as_lists(), foodprice=foodprice) + url = "https://www.studentenwerk-leipzig.de/mensen-cafeterien/speiseplan?location=140&date=2022-08-26&criteria=&meal_type=all" # URL + session = HTMLSession() # Initialize HTML Session + response = session.get(url) # call the URL + + if response.status_code != 200: # If response != 200 don't try to read the data + logging.critical("Response != 200 " + str(response.status_code)) + else: + soup = bs(response.content, "html.parser") # html parser from BeautifulSoup + + # give_me_everything() # Important to know which property's you can extract + # convert the HTML List to usable data + food = format_meals_from_list(fetch_food_as_lists()) + foodprice = format_food_price(fetch_prices()) # call the function to convert the HTML Stuff to usable data + send_Email(food=fetch_food(), foodcategory=fetch_food_as_lists(), foodprice=foodprice) diff --git a/utils/formatting.py b/utils/formatting.py index efff6b4..ca29846 100644 --- a/utils/formatting.py +++ b/utils/formatting.py @@ -18,8 +18,11 @@ def remove_HTML(htmlContent): del (secondsplit[1]) htmlContent[i] = secondsplit elif isinstance(htmlContent, str): - logging.error("A string is given -> no formatting ") - print("A string is given -> no formatting ") + first_split = htmlContent.split(">") + del (first_split[0]) + secondsplit = first_split[0].split(" no formatting") print("Unknown datatype -> no formatting") @@ -87,16 +90,50 @@ def format_meals_from_list(list_of_all_meals): Takes the fetched Data from essensGetter.py and convert it into dictionaries. Because multiple meals are possible each dictionary will be added to a list. :param list_of_all_meals: - :return: + :return: meals """ meals = list() for x in list_of_all_meals: + try: + if len(x) == 2: + meal = {"category": x[0].__getattribute__("contents")[0], "food": "", "beilagen": "", "price": ""} + + html_food_content = x[1].__getattribute__("contents") + for y in html_food_content: + if isinstance(y, NavigableString): + html_food_content.remove(y) + + food_list = list() + price_list = list() + for y in html_food_content: + food_list.append(y.find_all(class_="meals__name")[0].__getattribute__("contents")[0]) + price_list.append(format_food_price(y.find_all(class_="meals__price"))) + meal["food"] = food_list + meal["price"] = price_list + + beilagen_list = list() + for y in html_food_content: + try: + for z in y.find_all(class_="u-list-bare")[0]: + if isinstance(z, NavigableString) is False: + beilagen_list.append(remove_HTML(str(z))) + except Exception as e: + logging.warning("No beilagen found") + print("No beilagen found") + meal["beilagen"] = beilagen_list + + meals.append(meal) + except Exception as e: + logging.error("Error in format_meals_from_list for a meal containing 2 Attr.: " + str(e)) + print("Error in format_meals_from_list for a meal containing 2 Attr.: " + str(e)) + continue + try: if len(x) == 3: meal = {"category": x[0].__getattribute__("contents")[0], "food": "", "beilagen": "", "price": "", - "additional_info": ""} + "additional_info": ""} try: meal["additional_info"] = x[1].__getattribute__("contents")[0] @@ -105,12 +142,11 @@ def format_meals_from_list(list_of_all_meals): print("No additional info available: " + str(e)) meal["additional_info"] = "" - html_food_content = x[2].__getattribute__("contents") for y in html_food_content: if isinstance(y, NavigableString): html_food_content.remove(y) - + food_list = list() price_list = list() for y in html_food_content: @@ -121,15 +157,19 @@ def format_meals_from_list(list_of_all_meals): beilagen_list = list() for y in html_food_content: - if y.find_all(class_="u-list-bare").__getattrbitute__("contents")[0] is not None: - for z in y.find_all(class_="u-list-bare").__getattrbitute__("contents"): + try: + for z in y.find_all(class_="u-list-bare")[0]: if isinstance(z, NavigableString) is False: - beilagen_list.append(z[0]) - else: + beilagen_list.append(z) + except Exception as e: logging.warning("No beilagen available") print("No beilagen available") meal["beilagen"] = beilagen_list + meals.append(meal) + except Exception as e: - logging.exception("Error in format_meals_from_list: " + str(e)) - print("Error in format_meals_from_list: " + str(e.__traceback__)) \ No newline at end of file + logging.exception("Error in format_meals_from_list for a meal containing 3 Attr.: " + str(e)) + print("Error in format_meals_from_list for a meal containing 3 Attr.: " + str(e)) + + return meals From 5855043e6709c1ce1f9b414aa52eef6c0456fd0f Mon Sep 17 00:00:00 2001 From: Ole Einar Christoph Date: Tue, 23 Aug 2022 14:40:43 +0200 Subject: [PATCH 3/3] Works fine, wrote the tests --- essensGetter.py | 55 +++----------------------------------- tests/Test_essensGetter.py | 26 +++--------------- tests/Test_formatting.py | 37 ++++++++++++++++++++++--- utils/formatting.py | 1 + utils/mail.py | 40 ++++++++++++++------------- 5 files changed, 62 insertions(+), 97 deletions(-) diff --git a/essensGetter.py b/essensGetter.py index e6957fa..9dfaae9 100644 --- a/essensGetter.py +++ b/essensGetter.py @@ -18,14 +18,8 @@ def give_me_everything(): return data -# Fetches the prices from the website -def fetch_prices(): - data = soup.find_all(class_="meals__price") - return data - - # Fetches the category from the food -def fetch_food_as_lists(): +def fetch_food_from_website(): html_class_meals = soup.find_all(class_="meals")[0].__getattribute__("contents") food_categorys = soup.find_all(class_="title-prim") @@ -52,53 +46,13 @@ def fetch_food_as_lists(): return list_of_all_meals - -# Fetches the names from the food -def fetch_food(): - data = soup.find_all(class_="meals__name") - list_of_food = list() - beilagen = list() - for x in range(len(data)): - try: - list_of_food.append(data[x].__getattribute__("contents")[0]) - if data[x].findNext(class_="u-list-bare").__getattribute__("contents")[0] is not None: - try: - for i in range(len(data[x].findNext(class_="u-list-bare").__getattribute__("contents"))): - if data[x].findNext(class_="u-list-bare").__getattribute__("contents")[i] != "\n": - beilagen.append( - data[x].findNext(class_="u-list-bare").__getattribute__("contents")[i].get_text()) - if i + 1 == len(data[x].findNext(class_="u-list-bare").__getattribute__("contents")): - if (len(beilagen) == 1): - list_of_food.append(beilagen[0]) - elif (len(beilagen) > 1): - beilagen = ", ".join(beilagen) - list_of_food.append(beilagen) - else: - list_of_food.append("") - beilagen = list() - except Exception as e: - logging.warning("Warn: " + str(e) + " in " + str(data[x])) - print("Warn: " + str(e) + " in " + str(data[x])) - beilagen.append("") - else: - logging.warning("No beilagen found for meal: " + data[x].get_text()) - print("No beilagen found for meal: " + data[x].get_text()) - list_of_food.append("") - except AttributeError as attribute_error: - logging.warning("AttributeError: " + str(attribute_error) + " in " + str(data[x])) - print("AttributeError: " + str(attribute_error) + " in " + str(data[x])) - list_of_food.append("Keine Beilagen") - - return list_of_food - - # don't do anything on weekends if calendar.day_name[datetime.date.today().weekday()] == "Saturday" \ or calendar.day_name[datetime.date.today().weekday()] == "Sunday": logging.info("Weekend -> no call on website and no other operations") print("Weekend -> no call on website and no other operations") else: - url = "https://www.studentenwerk-leipzig.de/mensen-cafeterien/speiseplan?location=140&date=2022-08-26&criteria=&meal_type=all" # URL + url = "https://www.studentenwerk-leipzig.de/mensen-cafeterien/speiseplan?location=140" # URL session = HTMLSession() # Initialize HTML Session response = session.get(url) # call the URL @@ -109,6 +63,5 @@ def fetch_food(): # give_me_everything() # Important to know which property's you can extract # convert the HTML List to usable data - food = format_meals_from_list(fetch_food_as_lists()) - foodprice = format_food_price(fetch_prices()) # call the function to convert the HTML Stuff to usable data - send_Email(food=fetch_food(), foodcategory=fetch_food_as_lists(), foodprice=foodprice) + meals = format_meals_from_list(fetch_food_from_website()) + send_Email(meals) diff --git a/tests/Test_essensGetter.py b/tests/Test_essensGetter.py index 464e5ca..6f5ec31 100644 --- a/tests/Test_essensGetter.py +++ b/tests/Test_essensGetter.py @@ -8,29 +8,9 @@ def test_give_me_everything(self): self.assertIsNot(essensGetter.give_me_everything(), None) self.assertIsNot(essensGetter.give_me_everything(), []) - def test_fetch_prices(self): - self.assertIsNot(essensGetter.fetch_prices(), None) - self.assertIsNot(essensGetter.fetch_prices(), []) - self.assertGreater(len(essensGetter.fetch_prices()), 0) - - if len(essensGetter.fetch_prices()) == 1: - self.assertGreater(len(essensGetter.fetch_food()), 1) - self.assertLess(len(essensGetter.fetch_food()), 3) - self.assertEqual(len(essensGetter.fetch_food_as_lists()), 1) - elif len(essensGetter.fetch_prices()) == 2: - self.assertGreater(len(essensGetter.fetch_food()), 2) - self.assertLess(len(essensGetter.fetch_food()), 5) - self.assertEqual(len(essensGetter.fetch_food_as_lists()), 2) - - def test_fetch_food_category(self): - self.assertIsNot(essensGetter.fetch_food_as_lists(), None) - self.assertIsNot(essensGetter.fetch_food_as_lists(), []) - self.assertGreater(len(essensGetter.fetch_food_as_lists()), 0) - - def test_fetch_food(self): - self.assertIsNot(essensGetter.fetch_food(), None) - self.assertIsNot(essensGetter.fetch_food(), []) - self.assertGreater(len(essensGetter.fetch_food()), 0) + def test_fetch_food_from_website(self): + self.assertIsNot(essensGetter.fetch_food_from_website(), None) + self.assertIsNot(essensGetter.fetch_food_from_website(), []) if __name__ == '__main__': diff --git a/tests/Test_formatting.py b/tests/Test_formatting.py index 3beb3c7..c863199 100644 --- a/tests/Test_formatting.py +++ b/tests/Test_formatting.py @@ -2,12 +2,41 @@ import utils.formatting as formatting import essensGetter + class Test_formatting(unittest.TestCase): def test_format_price(self): - self.assertFalse(str(formatting.format_food_price(essensGetter.fetch_food())).__contains__("