Skip to content

Commit

Permalink
## [v9.1]() (2023-06-23)
Browse files Browse the repository at this point in the history
**Added**
- added `keywordSearchMode` parameter that can be used in `QueryArticles`, `QueryArticlesIter`, `QueryEvents`, `QueryEventsIter` and `QueryEvent` constructors.
- added `keywordSearchMode` parameter to the advanced query language

**Updated**
- types of parameters in the method calls
- updated several code example files
  • Loading branch information
gregorleban committed Jun 23, 2023
1 parent 301be9b commit bf3ce14
Show file tree
Hide file tree
Showing 27 changed files with 745 additions and 407 deletions.
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
# Change Log

## [v9.1]() (2023-06-23)

**Added**
- added `keywordSearchMode` parameter that can be used in `QueryArticles`, `QueryArticlesIter`, `QueryEvents`, `QueryEventsIter` and `QueryEvent` constructors.
- added `keywordSearchMode` parameter to the advanced query language


**Updated**
- types of parameters in the method calls
- updated several code example files



## [v9.0]() (2023-05-15)

**Added**
Expand Down
14 changes: 6 additions & 8 deletions eventregistry/Analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(self, eventRegistry: EventRegistry):
self._er = eventRegistry


def annotate(self, text: str, lang: str = None, customParams: dict = None):
def annotate(self, text: str, lang: Union[str, None] = None, customParams: Union[dict, None] = None):
"""
identify the list of entities and nonentities mentioned in the text
@param text: input text to annotate
Expand All @@ -38,7 +38,7 @@ def annotate(self, text: str, lang: str = None, customParams: dict = None):
return self._er.jsonRequestAnalytics("/api/v1/annotate", params)


def categorize(self, text: str, taxonomy: str = "dmoz", concepts: List[str] = None):
def categorize(self, text: str, taxonomy: str = "dmoz", concepts: Union[List[str], None] = None):
"""
determine the set of up to 5 categories the text is about. Currently, only English text can be categorized!
@param text: input text to categorize
Expand Down Expand Up @@ -86,7 +86,7 @@ def detectLanguage(self, text: str):
return self._er.jsonRequestAnalytics("/api/v1/detectLanguage", { "text": text })


def extractArticleInfo(self, url: str, proxyUrl: str = None, headers: Union[str, dict] = None, cookies: Union[dict, str] = None):
def extractArticleInfo(self, url: str, proxyUrl: Union[str, None] = None, headers: Union[str, dict, None] = None, cookies: Union[dict, str, None] = None):
"""
extract all available information about an article available at url `url`. Returned information will include
article title, body, authors, links in the articles, ...
Expand Down Expand Up @@ -120,8 +120,8 @@ def ner(self, text: str):


def trainTopicOnTweets(self, twitterQuery: str, useTweetText: bool = True, useIdfNormalization: bool = True,
normalization: bool = "linear", maxTweets: int = 2000, maxUsedLinks: int = 500, ignoreConceptTypes: Union[str, List[str]] = [],
maxConcepts: int = 20, maxCategories: int = 10, notifyEmailAddress: str = None):
normalization: str = "linear", maxTweets: int = 2000, maxUsedLinks: int = 500, ignoreConceptTypes: Union[str, List[str]] = [],
maxConcepts: int = 20, maxCategories: int = 10, notifyEmailAddress: Union[str, None] = None):
"""
create a new topic and train it using the tweets that match the twitterQuery
@param twitterQuery: string containing the content to search for. It can be a Twitter user account (using "@" prefix or user's Twitter url),
Expand Down Expand Up @@ -175,14 +175,12 @@ def trainTopicAddDocument(self, uri: str, text: str):
return self._er.jsonRequestAnalytics("/api/v1/trainTopic", { "action": "addDocument", "uri": uri, "text": text})


def trainTopicGetTrainedTopic(self, uri: str, maxConcepts: int = 20, maxCategories: int = 10,
ignoreConceptTypes: Union[str, List[str]] = [], idfNormalization: bool = True):
def trainTopicGetTrainedTopic(self, uri: str, maxConcepts: int = 20, maxCategories: int = 10, idfNormalization: bool = True):
"""
retrieve topic for the topic for which you have already finished training
@param uri: uri of the topic (obtained by calling trainTopicCreateTopic method)
@param maxConcepts: number of top concepts to retrieve in the topic
@param maxCategories: number of top categories to retrieve in the topic
@param ignoreConceptTypes: what types of concepts you would like to ignore in the profile. options: person, org, loc, wiki or an array with those
@param idfNormalization: should the concepts be normalized by punishing the commonly mentioned concepts
@param returns: returns the trained topic: { concepts: [], categories: [] }
"""
Expand Down
26 changes: 13 additions & 13 deletions eventregistry/Base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import six, warnings, os, sys, re, datetime, time
from eventregistry.Logger import logger
from typing import Union, List
from typing import Union, List, Dict

mainLangs = ["eng", "deu", "zho", "slv", "spa"]
allLangs = [ "eng", "deu", "spa", "cat", "por", "ita", "fra", "rus", "ara", "tur", "zho", "slv", "hrv", "srp" ]
Expand Down Expand Up @@ -123,7 +123,7 @@ def encodeDate(val: Union[datetime.datetime, datetime.date, str]):
elif isinstance(val, datetime.date):
return val.isoformat()
elif isinstance(val, six.string_types):
assert re.match("^\d{4}-\d{2}-\d{2}$", val), "date value '%s' was not provided in the 'YYYY-MM-DD' format" % (val)
assert re.match(r"^\d{4}-\d{2}-\d{2}$", val), f"date value '{val}' was not provided in the 'YYYY-MM-DD' format"
return val
raise AssertionError("date was not in the expected format")

Expand All @@ -133,12 +133,12 @@ def encodeDateTime(val: Union[datetime.datetime, str]):
"""encode datetime into UTC ISO format which can be sent to ER"""
if isinstance(val, datetime.datetime):
# if we have a datetime in some tz, we convert it first to UTC
if val.utcoffset() != None:
if val.utcoffset() is not None:
import pytz
val = val.astimezone(pytz.utc)
return val.isoformat()
elif isinstance(val, six.string_types):
assert re.match("^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?$", val), "datetime value '%s' was not provided in the 'YYYY-MM-DDTHH:MM:SS.SSSS' format" % (val)
assert re.match(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?$", val), f"datetime value '{val}' was not provided in the 'YYYY-MM-DDTHH:MM:SS.SSSS' format"
return val
raise AssertionError("datetime was not in the recognizable data type. Use datetime or string in ISO format")

Expand All @@ -149,7 +149,7 @@ def _clearVal(self, propName: str):
del self.queryParams[propName]


def _hasVal(self, propName: str):
def _hasVal(self, propName: str) -> bool:
"""do we have in the query property named propName"""
return propName in self.queryParams

Expand Down Expand Up @@ -188,16 +188,16 @@ def _addArrayVal(self, propName: str, val):
self.queryParams[propName].append(val)


def _update(self, object: dict):
def _update(self, object: Dict):
self.queryParams.update(object)


def _getQueryParams(self):
def _getQueryParams(self) -> Dict:
"""return the parameters."""
return dict(self.queryParams)


def _setQueryArrVal(self, value: Union[str, QueryItems, list], propName: str, propOperName: str, defaultOperName: str):
def _setQueryArrVal(self, value: Union[str, QueryItems, List, None], propName: str, propOperName: Union[str, None], defaultOperName: str):
"""
parse the value "value" and use it to set the property propName and the operator with name propOperName
@param value: None, string, QueryItems or list. Values to be set using property name propName
Expand All @@ -211,10 +211,10 @@ def _setQueryArrVal(self, value: Union[str, QueryItems, list], propName: str, pr
if isinstance(value, QueryItems):
self.queryParams[propName] = value.getItems()
# if we need to specify the operator for the property
if propOperName != None:
if propOperName is not None:
self.queryParams[propOperName] = value.getOper().replace("$", "")
# if the user specified the QueryItems class but used the invalid operator type then raise an error
assert propOperName != None or value.getOper().replace("$", "") == defaultOperName, "An invalid operator type '%s' was used for property '%s'" % (value.getOper().replace("$", ""), propName)
assert propOperName is not None or value.getOper().replace("$", "") == defaultOperName, "An invalid operator type '%s' was used for property '%s'" % (value.getOper().replace("$", ""), propName)

# if we have a string value, just use it
elif isinstance(value, six.string_types):
Expand All @@ -224,14 +224,14 @@ def _setQueryArrVal(self, value: Union[str, QueryItems, list], propName: str, pr
elif isinstance(value, list):
self.queryParams[propName] = value
# if we need to specify the operator for the property
if propOperName != None:
if propOperName is not None:
self.queryParams[propOperName] = defaultOperName
if len(value) > 1:
logger.warning("Warning: The value of parameter '%s' was provided as a list and '%s' operator was used implicitly between the items. We suggest specifying the list using the QueryItems.AND() or QueryItems.OR() to ensure the appropriate operator is used." % (propName, defaultOperName))
logger.warning("Warning: The value of parameter '%s' was provided as a list and '%s' operator was used implicitly between the items. We suggest specifying the list using the QueryItems.AND() or QueryItems.OR() to ensure the appropriate operator is used.", propName, defaultOperName)

# there should be no other valid types
else:
assert False, "Parameter '%s' was of unsupported type. It should either be None, a string or an instance of QueryItems" % (propName)
assert False, f"Parameter '{propName}' was of unsupported type. It should either be None, a string or an instance of QueryItems"



Expand Down
10 changes: 5 additions & 5 deletions eventregistry/DailyShares.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# get top shared articles for today or any other day
class GetTopSharedArticles(QueryParamsBase):
def __init__(self,
date: str = None, # specify the date (either in YYYY-MM-DD or datetime.date format) for which to return top shared articles. If None then today is used
date: Union[str, datetime.date, datetime.datetime, None] = None, # specify the date (either in YYYY-MM-DD or datetime.date format) for which to return top shared articles. If None then today is used
count: int = 20, # number of top shared articles to return
returnInfo: ReturnInfo = ReturnInfo()):
QueryParamsBase.__init__(self)
Expand All @@ -23,7 +23,7 @@ def __init__(self,
self._setVal("articlesSortBy", "socialScore")
self._update(returnInfo.getParams("articles"))

if date == None:
if date is None:
date = datetime.date.today()
self._setDateVal("dateStart", date)
self._setDateVal("dateEnd", date)
Expand All @@ -36,8 +36,8 @@ def _getPath(self):
# get top shared events for today or any other day
class GetTopSharedEvents(QueryParamsBase):
def __init__(self,
date: str = None, # specify the date (either in YYYY-MM-DD or datetime.date format) for which to return top shared articles. If None then today is used
count: int = 20, # number of top shared articles to return
date: Union[str, datetime.date, datetime.datetime, None] = None, # specify the date (either in YYYY-MM-DD or datetime.date format) for which to return top shared articles. If None then today is used
count: int = 20, # number of top shared articles to return
returnInfo: ReturnInfo = ReturnInfo()):
QueryParamsBase.__init__(self)
self._setVal("action", "getEvents")
Expand All @@ -46,7 +46,7 @@ def __init__(self,
self._setVal("eventsSortBy", "socialScore")
self._update(returnInfo.getParams("events"))

if date == None:
if date is None:
date = datetime.date.today()
self._setDateVal("dateStart", date)
self._setDateVal("dateEnd", date)
Expand Down
Loading

0 comments on commit bf3ce14

Please sign in to comment.