## [v9.1]() (2023-06-23)

**Added** - added `keywordSearchMode` parameter that can be used in `QueryArticles`, `QueryArticlesIter`, `QueryEvents`, `QueryEventsIter` and `QueryEvent` constructors. - added `keywordSearchMode` parameter to the advanced query language **Updated** - types of parameters in the method calls - updated several code example files
EventRegistry · Jun 23, 2023 · bf3ce14 · bf3ce14
1 parent 301be9b
commit bf3ce14
Show file tree

Hide file tree

Showing 27 changed files with 745 additions and 407 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,18 @@
 # Change Log
 
+## [v9.1]() (2023-06-23)
+
+**Added**
+- added `keywordSearchMode` parameter that can be used in `QueryArticles`, `QueryArticlesIter`, `QueryEvents`, `QueryEventsIter` and `QueryEvent` constructors.
+- added `keywordSearchMode` parameter to the advanced query language
+
+
+**Updated**
+- types of parameters in the method calls
+- updated several code example files
+
+
+
 ## [v9.0]() (2023-05-15)
 
 **Added**

diff --git a/eventregistry/Analytics.py b/eventregistry/Analytics.py
@@ -24,7 +24,7 @@ def __init__(self, eventRegistry: EventRegistry):
         self._er = eventRegistry
 
 
-    def annotate(self, text: str, lang: str = None, customParams: dict = None):
+    def annotate(self, text: str, lang: Union[str, None] = None, customParams: Union[dict, None] = None):
         """
         identify the list of entities and nonentities mentioned in the text
         @param text: input text to annotate
@@ -38,7 +38,7 @@ def annotate(self, text: str, lang: str = None, customParams: dict = None):
         return self._er.jsonRequestAnalytics("/api/v1/annotate", params)
 
 
-    def categorize(self, text: str, taxonomy: str = "dmoz", concepts: List[str] = None):
+    def categorize(self, text: str, taxonomy: str = "dmoz", concepts: Union[List[str], None] = None):
         """
         determine the set of up to 5 categories the text is about. Currently, only English text can be categorized!
         @param text: input text to categorize
@@ -86,7 +86,7 @@ def detectLanguage(self, text: str):
         return self._er.jsonRequestAnalytics("/api/v1/detectLanguage", { "text": text })
 
 
-    def extractArticleInfo(self, url: str, proxyUrl: str = None, headers: Union[str, dict] = None, cookies: Union[dict, str] = None):
+    def extractArticleInfo(self, url: str, proxyUrl: Union[str, None] = None, headers: Union[str, dict, None] = None, cookies: Union[dict, str, None] = None):
         """
         extract all available information about an article available at url `url`. Returned information will include
         article title, body, authors, links in the articles, ...
@@ -120,8 +120,8 @@ def ner(self, text: str):
 
 
     def trainTopicOnTweets(self, twitterQuery: str, useTweetText: bool = True, useIdfNormalization: bool = True,
-            normalization: bool = "linear", maxTweets: int = 2000, maxUsedLinks: int = 500, ignoreConceptTypes: Union[str, List[str]] = [],
-            maxConcepts: int = 20, maxCategories: int = 10, notifyEmailAddress: str = None):
+            normalization: str = "linear", maxTweets: int = 2000, maxUsedLinks: int = 500, ignoreConceptTypes: Union[str, List[str]] = [],
+            maxConcepts: int = 20, maxCategories: int = 10, notifyEmailAddress: Union[str, None] = None):
         """
         create a new topic and train it using the tweets that match the twitterQuery
         @param twitterQuery: string containing the content to search for. It can be a Twitter user account (using "@" prefix or user's Twitter url),
@@ -175,14 +175,12 @@ def trainTopicAddDocument(self, uri: str, text: str):
         return self._er.jsonRequestAnalytics("/api/v1/trainTopic", { "action": "addDocument", "uri": uri, "text": text})
 
 
-    def trainTopicGetTrainedTopic(self, uri: str, maxConcepts: int = 20, maxCategories: int = 10,
-            ignoreConceptTypes: Union[str, List[str]] = [], idfNormalization: bool = True):
+    def trainTopicGetTrainedTopic(self, uri: str, maxConcepts: int = 20, maxCategories: int = 10, idfNormalization: bool = True):
         """
         retrieve topic for the topic for which you have already finished training
         @param uri: uri of the topic (obtained by calling trainTopicCreateTopic method)
         @param maxConcepts: number of top concepts to retrieve in the topic
         @param maxCategories: number of top categories to retrieve in the topic
-        @param ignoreConceptTypes: what types of concepts you would like to ignore in the profile. options: person, org, loc, wiki or an array with those
         @param idfNormalization: should the concepts be normalized by punishing the commonly mentioned concepts
         @param returns: returns the trained topic: { concepts: [], categories: [] }
         """

diff --git a/eventregistry/Base.py b/eventregistry/Base.py
@@ -4,7 +4,7 @@
 
 import six, warnings, os, sys, re, datetime, time
 from eventregistry.Logger import logger
-from typing import Union, List
+from typing import Union, List, Dict
 
 mainLangs = ["eng", "deu", "zho", "slv", "spa"]
 allLangs = [ "eng", "deu", "spa", "cat", "por", "ita", "fra", "rus", "ara", "tur", "zho", "slv", "hrv", "srp" ]
@@ -123,7 +123,7 @@ def encodeDate(val: Union[datetime.datetime, datetime.date, str]):
         elif isinstance(val, datetime.date):
             return val.isoformat()
         elif isinstance(val, six.string_types):
-            assert re.match("^\d{4}-\d{2}-\d{2}$", val), "date value '%s' was not provided in the 'YYYY-MM-DD' format" % (val)
+            assert re.match(r"^\d{4}-\d{2}-\d{2}$", val), f"date value '{val}' was not provided in the 'YYYY-MM-DD' format"
             return val
         raise AssertionError("date was not in the expected format")
 
@@ -133,12 +133,12 @@ def encodeDateTime(val: Union[datetime.datetime, str]):
         """encode datetime into UTC ISO format which can be sent to ER"""
         if isinstance(val, datetime.datetime):
             # if we have a datetime in some tz, we convert it first to UTC
-            if val.utcoffset() != None:
+            if val.utcoffset() is not None:
                 import pytz
                 val = val.astimezone(pytz.utc)
             return val.isoformat()
         elif isinstance(val, six.string_types):
-            assert re.match("^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?$", val), "datetime value '%s' was not provided in the 'YYYY-MM-DDTHH:MM:SS.SSSS' format" % (val)
+            assert re.match(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?$", val), f"datetime value '{val}' was not provided in the 'YYYY-MM-DDTHH:MM:SS.SSSS' format"
             return val
         raise AssertionError("datetime was not in the recognizable data type. Use datetime or string in ISO format")
 
@@ -149,7 +149,7 @@ def _clearVal(self, propName: str):
             del self.queryParams[propName]
 
 
-    def _hasVal(self, propName: str):
+    def _hasVal(self, propName: str) -> bool:
         """do we have in the query property named propName"""
         return propName in self.queryParams
 
@@ -188,16 +188,16 @@ def _addArrayVal(self, propName: str, val):
         self.queryParams[propName].append(val)
 
 
-    def _update(self, object: dict):
+    def _update(self, object: Dict):
         self.queryParams.update(object)
 
 
-    def _getQueryParams(self):
+    def _getQueryParams(self) -> Dict:
         """return the parameters."""
         return dict(self.queryParams)
 
 
-    def _setQueryArrVal(self, value: Union[str, QueryItems, list], propName: str, propOperName: str, defaultOperName: str):
+    def _setQueryArrVal(self, value: Union[str, QueryItems, List, None], propName: str, propOperName: Union[str, None], defaultOperName: str):
         """
         parse the value "value" and use it to set the property propName and the operator with name propOperName
         @param value: None, string, QueryItems or list. Values to be set using property name propName
@@ -211,10 +211,10 @@ def _setQueryArrVal(self, value: Union[str, QueryItems, list], propName: str, pr
         if isinstance(value, QueryItems):
             self.queryParams[propName] = value.getItems()
             # if we need to specify the operator for the property
-            if propOperName != None:
+            if propOperName is not None:
                 self.queryParams[propOperName] = value.getOper().replace("$", "")
             # if the user specified the QueryItems class but used the invalid operator type then raise an error
-            assert propOperName != None or value.getOper().replace("$", "") == defaultOperName, "An invalid operator type '%s' was used for property '%s'" % (value.getOper().replace("$", ""), propName)
+            assert propOperName is not None or value.getOper().replace("$", "") == defaultOperName, "An invalid operator type '%s' was used for property '%s'" % (value.getOper().replace("$", ""), propName)
 
         # if we have a string value, just use it
         elif isinstance(value, six.string_types):
@@ -224,14 +224,14 @@ def _setQueryArrVal(self, value: Union[str, QueryItems, list], propName: str, pr
         elif isinstance(value, list):
             self.queryParams[propName] = value
             # if we need to specify the operator for the property
-            if propOperName != None:
+            if propOperName is not None:
                 self.queryParams[propOperName] = defaultOperName
                 if len(value) > 1:
-                    logger.warning("Warning: The value of parameter '%s' was provided as a list and '%s' operator was used implicitly between the items. We suggest specifying the list using the QueryItems.AND() or QueryItems.OR() to ensure the appropriate operator is used." % (propName, defaultOperName))
+                    logger.warning("Warning: The value of parameter '%s' was provided as a list and '%s' operator was used implicitly between the items. We suggest specifying the list using the QueryItems.AND() or QueryItems.OR() to ensure the appropriate operator is used.", propName, defaultOperName)
 
         # there should be no other valid types
         else:
-            assert False, "Parameter '%s' was of unsupported type. It should either be None, a string or an instance of QueryItems" % (propName)
+            assert False, f"Parameter '{propName}' was of unsupported type. It should either be None, a string or an instance of QueryItems"
 
 
 

diff --git a/eventregistry/DailyShares.py b/eventregistry/DailyShares.py
@@ -13,7 +13,7 @@
 # get top shared articles for today or any other day
 class GetTopSharedArticles(QueryParamsBase):
     def __init__(self,
-                 date: str = None,     # specify the date (either in YYYY-MM-DD or datetime.date format) for which to return top shared articles. If None then today is used
+                 date: Union[str, datetime.date, datetime.datetime, None] = None,     # specify the date (either in YYYY-MM-DD or datetime.date format) for which to return top shared articles. If None then today is used
                  count: int = 20,      # number of top shared articles to return
                  returnInfo: ReturnInfo = ReturnInfo()):
         QueryParamsBase.__init__(self)
@@ -23,7 +23,7 @@ def __init__(self,
         self._setVal("articlesSortBy", "socialScore")
         self._update(returnInfo.getParams("articles"))
 
-        if date == None:
+        if date is None:
             date = datetime.date.today()
         self._setDateVal("dateStart", date)
         self._setDateVal("dateEnd", date)
@@ -36,8 +36,8 @@ def _getPath(self):
 # get top shared events for today or any other day
 class GetTopSharedEvents(QueryParamsBase):
     def __init__(self,
-                 date: str = None,     # specify the date (either in YYYY-MM-DD or datetime.date format) for which to return top shared articles. If None then today is used
-                 count: int = 20,      # number of top shared articles to return
+                 date: Union[str, datetime.date, datetime.datetime, None] = None,     # specify the date (either in YYYY-MM-DD or datetime.date format) for which to return top shared articles. If None then today is used
+                 count: int = 20,                                                     # number of top shared articles to return
                  returnInfo: ReturnInfo = ReturnInfo()):
         QueryParamsBase.__init__(self)
         self._setVal("action", "getEvents")
@@ -46,7 +46,7 @@ def __init__(self,
         self._setVal("eventsSortBy", "socialScore")
         self._update(returnInfo.getParams("events"))
 
-        if date == None:
+        if date is None:
             date = datetime.date.today()
         self._setDateVal("dateStart", date)
         self._setDateVal("dateEnd", date)