diff --git a/CHANGELOG.md b/CHANGELOG.md index d76211b..4e30de1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ # Change Log +## [v9.1]() (2023-06-23) + +**Added** +- added `keywordSearchMode` parameter that can be used in `QueryArticles`, `QueryArticlesIter`, `QueryEvents`, `QueryEventsIter` and `QueryEvent` constructors. +- added `keywordSearchMode` parameter to the advanced query language + + +**Updated** +- types of parameters in the method calls +- updated several code example files + + + ## [v9.0]() (2023-05-15) **Added** diff --git a/eventregistry/Analytics.py b/eventregistry/Analytics.py index 6c271ea..f15d7ed 100644 --- a/eventregistry/Analytics.py +++ b/eventregistry/Analytics.py @@ -24,7 +24,7 @@ def __init__(self, eventRegistry: EventRegistry): self._er = eventRegistry - def annotate(self, text: str, lang: str = None, customParams: dict = None): + def annotate(self, text: str, lang: Union[str, None] = None, customParams: Union[dict, None] = None): """ identify the list of entities and nonentities mentioned in the text @param text: input text to annotate @@ -38,7 +38,7 @@ def annotate(self, text: str, lang: str = None, customParams: dict = None): return self._er.jsonRequestAnalytics("/api/v1/annotate", params) - def categorize(self, text: str, taxonomy: str = "dmoz", concepts: List[str] = None): + def categorize(self, text: str, taxonomy: str = "dmoz", concepts: Union[List[str], None] = None): """ determine the set of up to 5 categories the text is about. Currently, only English text can be categorized! @param text: input text to categorize @@ -86,7 +86,7 @@ def detectLanguage(self, text: str): return self._er.jsonRequestAnalytics("/api/v1/detectLanguage", { "text": text }) - def extractArticleInfo(self, url: str, proxyUrl: str = None, headers: Union[str, dict] = None, cookies: Union[dict, str] = None): + def extractArticleInfo(self, url: str, proxyUrl: Union[str, None] = None, headers: Union[str, dict, None] = None, cookies: Union[dict, str, None] = None): """ extract all available information about an article available at url `url`. Returned information will include article title, body, authors, links in the articles, ... @@ -120,8 +120,8 @@ def ner(self, text: str): def trainTopicOnTweets(self, twitterQuery: str, useTweetText: bool = True, useIdfNormalization: bool = True, - normalization: bool = "linear", maxTweets: int = 2000, maxUsedLinks: int = 500, ignoreConceptTypes: Union[str, List[str]] = [], - maxConcepts: int = 20, maxCategories: int = 10, notifyEmailAddress: str = None): + normalization: str = "linear", maxTweets: int = 2000, maxUsedLinks: int = 500, ignoreConceptTypes: Union[str, List[str]] = [], + maxConcepts: int = 20, maxCategories: int = 10, notifyEmailAddress: Union[str, None] = None): """ create a new topic and train it using the tweets that match the twitterQuery @param twitterQuery: string containing the content to search for. It can be a Twitter user account (using "@" prefix or user's Twitter url), @@ -175,14 +175,12 @@ def trainTopicAddDocument(self, uri: str, text: str): return self._er.jsonRequestAnalytics("/api/v1/trainTopic", { "action": "addDocument", "uri": uri, "text": text}) - def trainTopicGetTrainedTopic(self, uri: str, maxConcepts: int = 20, maxCategories: int = 10, - ignoreConceptTypes: Union[str, List[str]] = [], idfNormalization: bool = True): + def trainTopicGetTrainedTopic(self, uri: str, maxConcepts: int = 20, maxCategories: int = 10, idfNormalization: bool = True): """ retrieve topic for the topic for which you have already finished training @param uri: uri of the topic (obtained by calling trainTopicCreateTopic method) @param maxConcepts: number of top concepts to retrieve in the topic @param maxCategories: number of top categories to retrieve in the topic - @param ignoreConceptTypes: what types of concepts you would like to ignore in the profile. options: person, org, loc, wiki or an array with those @param idfNormalization: should the concepts be normalized by punishing the commonly mentioned concepts @param returns: returns the trained topic: { concepts: [], categories: [] } """ diff --git a/eventregistry/Base.py b/eventregistry/Base.py index 33e0bb1..3c0803f 100644 --- a/eventregistry/Base.py +++ b/eventregistry/Base.py @@ -4,7 +4,7 @@ import six, warnings, os, sys, re, datetime, time from eventregistry.Logger import logger -from typing import Union, List +from typing import Union, List, Dict mainLangs = ["eng", "deu", "zho", "slv", "spa"] allLangs = [ "eng", "deu", "spa", "cat", "por", "ita", "fra", "rus", "ara", "tur", "zho", "slv", "hrv", "srp" ] @@ -123,7 +123,7 @@ def encodeDate(val: Union[datetime.datetime, datetime.date, str]): elif isinstance(val, datetime.date): return val.isoformat() elif isinstance(val, six.string_types): - assert re.match("^\d{4}-\d{2}-\d{2}$", val), "date value '%s' was not provided in the 'YYYY-MM-DD' format" % (val) + assert re.match(r"^\d{4}-\d{2}-\d{2}$", val), f"date value '{val}' was not provided in the 'YYYY-MM-DD' format" return val raise AssertionError("date was not in the expected format") @@ -133,12 +133,12 @@ def encodeDateTime(val: Union[datetime.datetime, str]): """encode datetime into UTC ISO format which can be sent to ER""" if isinstance(val, datetime.datetime): # if we have a datetime in some tz, we convert it first to UTC - if val.utcoffset() != None: + if val.utcoffset() is not None: import pytz val = val.astimezone(pytz.utc) return val.isoformat() elif isinstance(val, six.string_types): - assert re.match("^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?$", val), "datetime value '%s' was not provided in the 'YYYY-MM-DDTHH:MM:SS.SSSS' format" % (val) + assert re.match(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?$", val), f"datetime value '{val}' was not provided in the 'YYYY-MM-DDTHH:MM:SS.SSSS' format" return val raise AssertionError("datetime was not in the recognizable data type. Use datetime or string in ISO format") @@ -149,7 +149,7 @@ def _clearVal(self, propName: str): del self.queryParams[propName] - def _hasVal(self, propName: str): + def _hasVal(self, propName: str) -> bool: """do we have in the query property named propName""" return propName in self.queryParams @@ -188,16 +188,16 @@ def _addArrayVal(self, propName: str, val): self.queryParams[propName].append(val) - def _update(self, object: dict): + def _update(self, object: Dict): self.queryParams.update(object) - def _getQueryParams(self): + def _getQueryParams(self) -> Dict: """return the parameters.""" return dict(self.queryParams) - def _setQueryArrVal(self, value: Union[str, QueryItems, list], propName: str, propOperName: str, defaultOperName: str): + def _setQueryArrVal(self, value: Union[str, QueryItems, List, None], propName: str, propOperName: Union[str, None], defaultOperName: str): """ parse the value "value" and use it to set the property propName and the operator with name propOperName @param value: None, string, QueryItems or list. Values to be set using property name propName @@ -211,10 +211,10 @@ def _setQueryArrVal(self, value: Union[str, QueryItems, list], propName: str, pr if isinstance(value, QueryItems): self.queryParams[propName] = value.getItems() # if we need to specify the operator for the property - if propOperName != None: + if propOperName is not None: self.queryParams[propOperName] = value.getOper().replace("$", "") # if the user specified the QueryItems class but used the invalid operator type then raise an error - assert propOperName != None or value.getOper().replace("$", "") == defaultOperName, "An invalid operator type '%s' was used for property '%s'" % (value.getOper().replace("$", ""), propName) + assert propOperName is not None or value.getOper().replace("$", "") == defaultOperName, "An invalid operator type '%s' was used for property '%s'" % (value.getOper().replace("$", ""), propName) # if we have a string value, just use it elif isinstance(value, six.string_types): @@ -224,14 +224,14 @@ def _setQueryArrVal(self, value: Union[str, QueryItems, list], propName: str, pr elif isinstance(value, list): self.queryParams[propName] = value # if we need to specify the operator for the property - if propOperName != None: + if propOperName is not None: self.queryParams[propOperName] = defaultOperName if len(value) > 1: - logger.warning("Warning: The value of parameter '%s' was provided as a list and '%s' operator was used implicitly between the items. We suggest specifying the list using the QueryItems.AND() or QueryItems.OR() to ensure the appropriate operator is used." % (propName, defaultOperName)) + logger.warning("Warning: The value of parameter '%s' was provided as a list and '%s' operator was used implicitly between the items. We suggest specifying the list using the QueryItems.AND() or QueryItems.OR() to ensure the appropriate operator is used.", propName, defaultOperName) # there should be no other valid types else: - assert False, "Parameter '%s' was of unsupported type. It should either be None, a string or an instance of QueryItems" % (propName) + assert False, f"Parameter '{propName}' was of unsupported type. It should either be None, a string or an instance of QueryItems" diff --git a/eventregistry/DailyShares.py b/eventregistry/DailyShares.py index eb0c56c..65bfba1 100644 --- a/eventregistry/DailyShares.py +++ b/eventregistry/DailyShares.py @@ -13,7 +13,7 @@ # get top shared articles for today or any other day class GetTopSharedArticles(QueryParamsBase): def __init__(self, - date: str = None, # specify the date (either in YYYY-MM-DD or datetime.date format) for which to return top shared articles. If None then today is used + date: Union[str, datetime.date, datetime.datetime, None] = None, # specify the date (either in YYYY-MM-DD or datetime.date format) for which to return top shared articles. If None then today is used count: int = 20, # number of top shared articles to return returnInfo: ReturnInfo = ReturnInfo()): QueryParamsBase.__init__(self) @@ -23,7 +23,7 @@ def __init__(self, self._setVal("articlesSortBy", "socialScore") self._update(returnInfo.getParams("articles")) - if date == None: + if date is None: date = datetime.date.today() self._setDateVal("dateStart", date) self._setDateVal("dateEnd", date) @@ -36,8 +36,8 @@ def _getPath(self): # get top shared events for today or any other day class GetTopSharedEvents(QueryParamsBase): def __init__(self, - date: str = None, # specify the date (either in YYYY-MM-DD or datetime.date format) for which to return top shared articles. If None then today is used - count: int = 20, # number of top shared articles to return + date: Union[str, datetime.date, datetime.datetime, None] = None, # specify the date (either in YYYY-MM-DD or datetime.date format) for which to return top shared articles. If None then today is used + count: int = 20, # number of top shared articles to return returnInfo: ReturnInfo = ReturnInfo()): QueryParamsBase.__init__(self) self._setVal("action", "getEvents") @@ -46,7 +46,7 @@ def __init__(self, self._setVal("eventsSortBy", "socialScore") self._update(returnInfo.getParams("events")) - if date == None: + if date is None: date = datetime.date.today() self._setDateVal("dateStart", date) self._setDateVal("dateEnd", date) diff --git a/eventregistry/EventRegistry.py b/eventregistry/EventRegistry.py index af85d65..27c1932 100644 --- a/eventregistry/EventRegistry.py +++ b/eventregistry/EventRegistry.py @@ -3,7 +3,7 @@ """ import six, os, sys, traceback, json, re, requests, time, logging, threading -from typing import Union, List +from typing import Union, List, Tuple from eventregistry.Base import * from eventregistry.ReturnInfo import * from eventregistry.Logger import logger @@ -15,14 +15,14 @@ class EventRegistry(object): it is used to send all the requests and queries """ def __init__(self, - apiKey: str = None, - host: str = None, - hostAnalytics: str = None, + apiKey: Union[str, None] = None, + host: Union[str, None] = None, + hostAnalytics: Union[str, None] = None, minDelayBetweenRequests: float = 0.5, repeatFailedRequestCount: int = -1, allowUseOfArchive: bool = True, verboseOutput: bool = False, - settingsFName: str = None): + settingsFName: Union[str, None] = None): """ @param apiKey: API key that should be used to make the requests to the Event Registry. API key is assigned to each user account and can be obtained on this page: https://newsapi.ai/dashboard @@ -38,8 +38,8 @@ def __init__(self, @param settingsFName: If provided it should be a full path to 'settings.json' file where apiKey an/or host can be loaded from. If None, we will look for the settings file in the eventregistry module folder """ - self._host = host - self._hostAnalytics = hostAnalytics + self._host = host or "http://eventregistry.org" + self._hostAnalytics = hostAnalytics or "http://analytics.eventregistry.org" self._lastException = None self._logRequests = False self._minDelayBetweenRequests = minDelayBetweenRequests @@ -72,16 +72,13 @@ def __init__(self, if "apiKey" in settings and not apiKey: logger.debug("found apiKey in settings file which will be used for making requests") self._apiKey = settings["apiKey"] - else: - self._host = host or "http://eventregistry.org" - self._hostAnalytics = hostAnalytics or "http://analytics.eventregistry.org" if self._apiKey == None: print("No API key was provided. You will be allowed to perform only a very limited number of requests per day.") self._requestLogFName = os.path.join(currPath, "requests_log.txt") - logger.debug("Event Registry host: %s" % (self._host)) - logger.debug("Text analytics host: %s" % (self._hostAnalytics)) + logger.debug("Event Registry host: %s", self._host) + logger.debug("Text analytics host: %s", self._hostAnalytics) # list of status codes - when we get them as a response from the call, we don't want to repeat the query as the response will likely always be the same self._stopStatusCodes = set([ @@ -106,7 +103,7 @@ def checkVersion(self): for (latest, current) in zip(latestVersion.split("."), currentVersion.split(".")): if int(latest) > int(current): logger.info("==============\nYour version of the module is outdated, please update to the latest version") - logger.info("Your version is %s while the latest is %s" % (currentVersion, latestVersion)) + logger.info("Your version is %s while the latest is %s", currentVersion, latestVersion) logger.info("Update by calling: pip install --upgrade eventregistry\n==============") return # in case the server mistakenly has a lower version that the user has, don't report an error @@ -122,7 +119,7 @@ def setLogging(self, val: bool): def setExtraParams(self, params: dict): - if params != None: + if params is not None: assert(isinstance(params, dict)) self._extraParams = params @@ -175,10 +172,7 @@ def getUrl(self, query: QueryParamsBase): # don't modify original query params allParams = query._getQueryParams() # make the url - try: - url = self._host + query._getPath() + "?" + urllib.urlencode(allParams, doseq=True) - except: - url = self._host + query._getPath() + "?" + urllib.parse.urlencode(allParams, doseq=True) + url = self._host + query._getPath() + "?" + urllib.parse.urlencode(allParams, doseq=True) return url @@ -200,8 +194,8 @@ def printLastReqStats(self): """ print some statistics about the last executed request """ - print("Tokens used by the request: " + self.getLastHeader("req-tokens")) - print("Performed action: " + self.getLastHeader("req-action")) + print("Tokens used by the request: " + str(self.getLastHeader("req-tokens"))) + print("Performed action: " + str(self.getLastHeader("req-action"))) print("Was archive used for the query: " + (self.getLastHeader("req-archive") == "1" and "Yes" or "No")) @@ -212,7 +206,7 @@ def getLastReqArchiveUse(self): return self.getLastHeader("req-archive", "0") == "1" - def execQuery(self, query:QueryParamsBase, allowUseOfArchive: bool = None): + def execQuery(self, query:QueryParamsBase, allowUseOfArchive: Union[bool, None] = None): """ main method for executing the search queries. @param query: instance of Query class @@ -228,7 +222,7 @@ def execQuery(self, query:QueryParamsBase, allowUseOfArchive: bool = None): return respInfo - def jsonRequest(self, methodUrl: str, paramDict: dict, customLogFName: str = None, allowUseOfArchive: bool = None): + def jsonRequest(self, methodUrl: str, paramDict: dict, customLogFName: Union[str, None] = None, allowUseOfArchive: Union[bool, None] = None): """ make a request for json data. repeat it _repeatFailedRequestCount times, if they fail (indefinitely if _repeatFailedRequestCount = -1) @param methodUrl: url on er (e.g. "/api/v1/article") @@ -244,24 +238,24 @@ def jsonRequest(self, methodUrl: str, paramDict: dict, customLogFName: str = Non self._lock.acquire() if self._logRequests: try: - with open(customLogFName or self._requestLogFName, "a") as log: - if paramDict != None: + with open(customLogFName or self._requestLogFName, "a", encoding="utf-8") as log: + if isinstance(paramDict, dict): log.write("# " + json.dumps(paramDict) + "\n") log.write(methodUrl + "\n\n") except Exception as ex: self._lastException = ex - if paramDict == None: + if paramDict is None: paramDict = {} # if we have api key then add it to the paramDict if self._apiKey: paramDict["apiKey"] = self._apiKey # if we want to ignore the archive, set the flag - if allowUseOfArchive != None: + if isinstance(allowUseOfArchive, bool): if not allowUseOfArchive: paramDict["forceMaxDataTimeWindow"] = 31 # if we didn't override the parameter then check what we've set when constructing the EventRegistry class - elif self._allowUseOfArchive == False: + elif self._allowUseOfArchive is False: paramDict["forceMaxDataTimeWindow"] = 31 # if we also have some extra parameters, then set those too if self._extraParams: @@ -284,7 +278,7 @@ def jsonRequest(self, methodUrl: str, paramDict: dict, customLogFName: str = Non raise Exception(respInfo.text) # did we get a warning. if yes, print it if self.getLastHeader("warning"): - logger.warning("=========== WARNING ===========\n%s\n===============================" % (self.getLastHeader("warning"))) + logger.warning("=========== WARNING ===========\n%s\n===============================", self.getLastHeader("warning")) # remember the available requests self._dailyAvailableRequests = tryParseInt(self.getLastHeader("x-ratelimit-limit", ""), val = -1) self._remainingAvailableRequests = tryParseInt(self.getLastHeader("x-ratelimit-remaining", ""), val = -1) @@ -295,16 +289,16 @@ def jsonRequest(self, methodUrl: str, paramDict: dict, customLogFName: str = Non self._lastException = ex if self._verboseOutput: logger.error("Event Registry exception while executing the request:") - logger.error("endpoint: %s\nParams: %s" % (url, json.dumps(paramDict, indent=4))) + logger.error("endpoint: %s\nParams: %s", url, json.dumps(paramDict, indent=4)) self.printLastException() # in case of invalid input parameters, don't try to repeat the search but we simply raise the same exception again - if respInfo != None and respInfo.status_code in self._stopStatusCodes: + if respInfo is not None and respInfo.status_code in self._stopStatusCodes: break # in case of the other exceptions (maybe the service is temporarily unavailable) we try to repeat the query logger.info("The request will be automatically repeated in 3 seconds...") time.sleep(5) # sleep for X seconds on error self._lock.release() - if returnData == None: + if returnData is None: raise self._lastException or Exception("No valid return data provided") return returnData @@ -334,22 +328,21 @@ def jsonRequestAnalytics(self, methodUrl: str, paramDict: dict): # if we got some error codes print the error and repeat the request after a short time period if respInfo.status_code != 200: raise Exception(respInfo.text) - returnData = respInfo.json() break except Exception as ex: self._lastException = ex if self._verboseOutput: logger.error("Event Registry Analytics exception while executing the request:") - logger.error("endpoint: %s\nParams: %s" % (url, json.dumps(paramDict, indent=4))) + logger.error("endpoint: %s\nParams: %s", url, json.dumps(paramDict, indent=4)) self.printLastException() # in case of invalid input parameters, don't try to repeat the search but we simply raise the same exception again - if respInfo != None and respInfo.status_code in self._stopStatusCodes: + if respInfo is not None and respInfo.status_code in self._stopStatusCodes: break logger.info("The request will be automatically repeated in 3 seconds...") time.sleep(5) # sleep for X seconds on error self._lock.release() - if returnData == None: + if returnData is None: raise self._lastException or Exception("No valid return data provided") return returnData @@ -417,7 +410,7 @@ def suggestSourceGroups(self, prefix: str, page: int = 1, count: int = 20, **kwa return self.jsonRequest("/api/v1/suggestSourceGroups", params) - def suggestLocations(self, prefix: str, sources: Union[str, list] = ["place", "country"], lang: str = "eng", count: int = 20, countryUri: str = None, sortByDistanceTo: bool = None, returnInfo: ReturnInfo = ReturnInfo(), **kwargs): + def suggestLocations(self, prefix: str, sources: Union[str, list] = ["place", "country"], lang: str = "eng", count: int = 20, countryUri: Union[str, None] = None, sortByDistanceTo: Union[List, Tuple, None] = None, returnInfo: ReturnInfo = ReturnInfo(), **kwargs): """ return a list of geo locations (cities or countries) that contain the prefix @param prefix: input text that should be contained in the location name @@ -439,7 +432,7 @@ def suggestLocations(self, prefix: str, sources: Union[str, list] = ["place", "c return self.jsonRequest("/api/v1/suggestLocationsFast", params) - def suggestLocationsAtCoordinate(self, latitude: Union[int, float], longitude: Union[int, float], radiusKm: Union[int, float], limitToCities: bool = False, lang: str = "eng", count: int = 20, ignoreNonWiki: bool = True, returnInfo: ReturnInfo = ReturnInfo(), **kwargs): + def suggestLocationsAtCoordinate(self, latitude: Union[int, float], longitude: Union[int, float], radiusKm: Union[int, float], limitToCities: bool = False, lang: str = "eng", count: int = 20, returnInfo: ReturnInfo = ReturnInfo(), **kwargs): """ return a list of geo locations (cities or places) that are close to the provided (lat, long) values @param latitude: latitude part of the coordinate @@ -448,7 +441,6 @@ def suggestLocationsAtCoordinate(self, latitude: Union[int, float], longitude: U @param limitToCities: limit the set of results only to cities (True) or also to general places (False) @param lang: language in which the location label should be returned @param count: number of returned suggestions - @param ignoreNonWiki: ignore locations that don't have a wiki page and can not be used for concept search @param returnInfo: what details about locations should be included in the returned information """ assert isinstance(latitude, (int, float)), "The 'latitude' should be a number" @@ -574,7 +566,7 @@ def getConceptUri(self, conceptLabel: str, lang: str = "eng", sources: Union[str return None - def getLocationUri(self, locationLabel: str, lang: str = "eng", sources: Union[str, List[str]] = ["place", "country"], countryUri: str = None, sortByDistanceTo: str = None): + def getLocationUri(self, locationLabel: str, lang: str = "eng", sources: Union[str, List[str]] = ["place", "country"], countryUri: Union[str, None] = None, sortByDistanceTo: Union[List, Tuple, None] = None): """ return a location uri that is the best match for the given location label @param locationLabel: partial or full location name for which to return the location uri @@ -624,7 +616,7 @@ def getSourceGroupUri(self, sourceGroupName: str): @param sourceGroupName: partial or full name of the source group """ matches = self.suggestSourceGroups(sourceGroupName) - if matches != None and isinstance(matches, list) and len(matches) > 0 and "uri" in matches[0]: + if matches is not None and isinstance(matches, list) and len(matches) > 0 and "uri" in matches[0]: return matches[0]["uri"] return None @@ -635,7 +627,7 @@ def getConceptClassUri(self, classLabel: str, lang: str = "eng"): @param classLabel: partial or full name of the concept class for which to return class uri """ matches = self.suggestConceptClasses(classLabel, lang = lang) - if matches != None and isinstance(matches, list) and len(matches) > 0 and "uri" in matches[0]: + if matches is not None and isinstance(matches, list) and len(matches) > 0 and "uri" in matches[0]: return matches[0]["uri"] return None @@ -659,7 +651,7 @@ def getAuthorUri(self, authorName: str): @param authorName: partial or full name of the author, potentially also containing the source url (e.g. "george brown nytimes") """ matches = self.suggestAuthors(authorName) - if matches != None and isinstance(matches, list) and len(matches) > 0 and "uri" in matches[0]: + if matches is not None and isinstance(matches, list) and len(matches) > 0 and "uri" in matches[0]: return matches[0]["uri"] return None @@ -670,7 +662,7 @@ def getEventTypeUri(self, eventTypeLabel: str): @param eventTypeLabel: partial or full name of the event type for which we want to retrieve uri """ matches = self.suggestEventTypes(eventTypeLabel) - if matches != None and isinstance(matches, list) and len(matches) > 0 and "uri" in matches[0]: + if matches is not None and isinstance(matches, list) and len(matches) > 0 and "uri" in matches[0]: return matches[0]["uri"] return None diff --git a/eventregistry/Query.py b/eventregistry/Query.py index 0cc02c8..b5101c6 100644 --- a/eventregistry/Query.py +++ b/eventregistry/Query.py @@ -24,29 +24,29 @@ def _setValIfNotDefault(self, propName, value, defVal): class BaseQuery(_QueryCore): def __init__(self, - keyword: Union[str, QueryItems] = None, - conceptUri: Union[str, QueryItems] = None, - categoryUri: Union[str, QueryItems] = None, - sourceUri: Union[str, QueryItems] = None, - locationUri: Union[str, QueryItems] = None, - lang: Union[str, QueryItems] = None, - dateStart: Union[datetime.datetime, datetime.date, str] = None, - dateEnd: Union[datetime.datetime, datetime.date, str] = None, - sourceLocationUri: Union[str, List[str]] = None, - sourceGroupUri: Union[str, List[str]] = None, + keyword: Union[str, QueryItems, None] = None, + conceptUri: Union[str, QueryItems, None] = None, + categoryUri: Union[str, QueryItems, None] = None, + sourceUri: Union[str, QueryItems, None] = None, + locationUri: Union[str, QueryItems, None] = None, + lang: Union[str, QueryItems, None] = None, + dateStart: Union[datetime.datetime, datetime.date, str, None] = None, + dateEnd: Union[datetime.datetime, datetime.date, str, None] = None, + sourceLocationUri: Union[str, List[str], None] = None, + sourceGroupUri: Union[str, List[str], None] = None, # article or event search only: - dateMention: Union[datetime.datetime, datetime.date, str] = None, - authorUri: Union[str, List[str]] = None, + dateMention: Union[datetime.datetime, datetime.date, str, None] = None, + authorUri: Union[str, List[str], None] = None, keywordLoc: str = "body", # event search only: minMaxArticlesInEvent = None, # mention search only: - industryUri: Union[str, QueryItems] = None, - sdgUri: Union[str, QueryItems] = None, - sasbUri: Union[str, QueryItems] = None, - esgUri: Union[str, QueryItems] = None, + industryUri: Union[str, QueryItems, None] = None, + sdgUri: Union[str, QueryItems, None] = None, + sasbUri: Union[str, QueryItems, None] = None, + esgUri: Union[str, QueryItems, None] = None, # universal: - exclude: Union["BaseQuery", "CombinedQuery"] = None): + exclude: Union["BaseQuery", "CombinedQuery", None] = None): """ @param keyword: keyword(s) to query. Either None, string or QueryItems instance @param conceptUri: concept(s) to query. Either None, string or QueryItems instance @@ -74,14 +74,14 @@ def __init__(self, self._setQueryArrVal("lang", lang) # starting date of the published articles (e.g. 2014-05-02) - if dateStart != None: + if dateStart is not None: self._queryObj["dateStart"] = QueryParamsBase.encodeDate(dateStart) # ending date of the published articles (e.g. 2014-05-02) - if dateEnd != None: + if dateEnd is not None: self._queryObj["dateEnd"] = QueryParamsBase.encodeDate(dateEnd) # mentioned date detected in articles (e.g. 2014-05-02) - if dateMention != None: + if dateMention is not None: if isinstance(dateMention, list): self._queryObj["dateMention"] = [QueryParamsBase.encodeDate(d) for d in dateMention] else: @@ -99,12 +99,12 @@ def __init__(self, if keywordLoc != "body": self._queryObj["keywordLoc"] = keywordLoc - if minMaxArticlesInEvent != None: + if minMaxArticlesInEvent is not None: assert isinstance(minMaxArticlesInEvent, tuple), "minMaxArticlesInEvent parameter should either be None or a tuple with two integer values" self._queryObj["minArticlesInEvent"] = minMaxArticlesInEvent[0] self._queryObj["maxArticlesInEvent"] = minMaxArticlesInEvent[1] - if exclude != None: + if exclude is not None: assert isinstance(exclude, (CombinedQuery, BaseQuery)), "exclude parameter was not a CombinedQuery or BaseQuery instance" self._queryObj["$not"] = exclude.getQuery() @@ -133,7 +133,7 @@ def __init__(self): @staticmethod def AND(queryArr: List[Union["BaseQuery", "CombinedQuery"]], - exclude: Union["BaseQuery", "CombinedQuery"] = None): + exclude: Union["BaseQuery", "CombinedQuery", None] = None): """ create a combined query with multiple items on which to perform an AND operation @param queryArr: a list of items on which to perform an AND operation. Items can be either a CombinedQuery or BaseQuery instances. @@ -146,7 +146,7 @@ def AND(queryArr: List[Union["BaseQuery", "CombinedQuery"]], for item in queryArr: assert isinstance(item, (CombinedQuery, BaseQuery)), "item in the list was not a CombinedQuery or BaseQuery instance" q.getQuery()["$and"].append(item.getQuery()) - if exclude != None: + if exclude is not None: assert isinstance(exclude, (CombinedQuery, BaseQuery)), "exclude parameter was not a CombinedQuery or BaseQuery instance" q.setQueryParam("$not", exclude.getQuery()) return q @@ -154,7 +154,7 @@ def AND(queryArr: List[Union["BaseQuery", "CombinedQuery"]], @staticmethod def OR(queryArr: List[Union["BaseQuery", "CombinedQuery"]], - exclude: Union["BaseQuery", "CombinedQuery"] = None): + exclude: Union["BaseQuery", "CombinedQuery", None] = None): """ create a combined query with multiple items on which to perform an OR operation @param queryArr: a list of items on which to perform an OR operation. Items can be either a CombinedQuery or BaseQuery instances. @@ -167,7 +167,7 @@ def OR(queryArr: List[Union["BaseQuery", "CombinedQuery"]], for item in queryArr: assert isinstance(item, (CombinedQuery, BaseQuery)), "item in the list was not a CombinedQuery or BaseQuery instance" q.getQuery()["$or"].append(item.getQuery()) - if exclude != None: + if exclude is not None: assert isinstance(exclude, (CombinedQuery, BaseQuery)), "exclude parameter was not a CombinedQuery or BaseQuery instance" q.setQueryParam("$not", exclude.getQuery()) return q @@ -178,8 +178,8 @@ class ComplexArticleQuery(_QueryCore): def __init__(self, query: Union["BaseQuery", "CombinedQuery"], dataType: Union[str, List[str]] = "news", - minSentiment: float = None, - maxSentiment: float = None, + minSentiment: Union[float, None] = None, + maxSentiment: Union[float, None] = None, minSocialScore: int = 0, minFacebookShares: int = 0, startSourceRankPercentile: int = 0, @@ -220,9 +220,9 @@ def __init__(self, if dataType != "news": filter["dataType"] = dataType - if minSentiment != None: + if minSentiment is not None: filter["minSentiment"] = minSentiment - if maxSentiment != None: + if maxSentiment is not None: filter["maxSentiment"] = maxSentiment if minSocialScore > 0: @@ -249,8 +249,8 @@ def __init__(self, class ComplexEventQuery(_QueryCore): def __init__(self, query: Union["BaseQuery", "CombinedQuery"], - minSentiment: float = None, - maxSentiment: float = None): + minSentiment: Union[float, None] = None, + maxSentiment: Union[float, None] = None): """ create an event query using a complex query @param query: an instance of CombinedQuery or BaseQuery to use to find events that match the conditions @@ -259,9 +259,9 @@ def __init__(self, assert isinstance(query, (CombinedQuery, BaseQuery)), "query parameter was not a CombinedQuery or BaseQuery instance" filter = {} - if minSentiment != None: + if minSentiment is not None: filter["minSentiment"] = minSentiment - if maxSentiment != None: + if maxSentiment is not None: filter["maxSentiment"] = maxSentiment if len(filter) > 0: @@ -273,10 +273,10 @@ def __init__(self, class ComplexMentionQuery(_QueryCore): def __init__(self, query: Union["BaseQuery", "CombinedQuery"], - minSentiment: float = None, - maxSentiment: float = None, - minSentenceIndex: int = None, - maxSentenceIndex: int = None, + minSentiment: Union[float, None] = None, + maxSentiment: Union[float, None] = None, + minSentenceIndex: Union[int, None] = None, + maxSentenceIndex: Union[int, None] = None, showDuplicates: bool = False): """ create a mention query using a complex query @@ -290,14 +290,14 @@ def __init__(self, assert isinstance(query, (CombinedQuery, BaseQuery)), "query parameter was not a CombinedQuery or BaseQuery instance" filter = {} - if minSentiment != None: + if minSentiment is not None: filter["minSentiment"] = minSentiment - if maxSentiment != None: + if maxSentiment is not None: filter["maxSentiment"] = maxSentiment - if minSentenceIndex != None: + if minSentenceIndex is not None: filter["minSentenceIndex"] = minSentenceIndex - if maxSentenceIndex != None: + if maxSentenceIndex is not None: filter["maxSentenceIndex"] = maxSentenceIndex if showDuplicates: filter["showDuplicates"] = showDuplicates diff --git a/eventregistry/QueryArticle.py b/eventregistry/QueryArticle.py index 27ba178..c090cc3 100644 --- a/eventregistry/QueryArticle.py +++ b/eventregistry/QueryArticle.py @@ -5,7 +5,7 @@ class QueryArticle(Query): def __init__(self, articleUriOrUriList: Union[str, List[str]], - requestedResult: "RequestArticle" = None): + requestedResult: Union["RequestArticle", None] = None): """ Class for obtaining available info for one or more articles in the Event Registry @param articleUriOrUriList: a single article uri or a list of article uris @@ -58,6 +58,7 @@ def __init__(self, returnInfo: ReturnInfo = ReturnInfo(articleInfo = ArticleInfo return details about the article @param returnInfo: what details should be included in the returned information """ + super(RequestArticle, self).__init__() self.resultType = "info" self.__dict__.update(returnInfo.getParams("info")) @@ -78,6 +79,7 @@ def __init__(self, @param limitPerLang: max number of articles per language to return (-1 for no limit) @param returnInfo: what details should be included in the returned information """ + super(RequestArticle, self).__init__() assert page >= 1, "page has to be >= 1" assert count <= 200, "at most 200 articles can be returned per call" self.resultType = "similarArticles" @@ -103,6 +105,7 @@ def __init__(self, @param sortByAsc: should the results be sorted in ascending order (True) or descending (False) @param returnInfo: what details should be included in the returned information """ + super(RequestArticle, self).__init__() assert page >= 1, "page has to be >= 1" assert count <= 200, "at most 200 articles can be returned per call" self.resultType = "duplicatedArticles" @@ -121,5 +124,6 @@ def __init__(self, return the article that is the original of the given article (the current article is a duplicate) @param returnInfo: what details should be included in the returned information """ + super(RequestArticle, self).__init__() self.resultType = "originalArticle" self.__dict__.update(returnInfo.getParams("originalArticle")) diff --git a/eventregistry/QueryArticles.py b/eventregistry/QueryArticles.py index ef6282e..65a2731 100644 --- a/eventregistry/QueryArticles.py +++ b/eventregistry/QueryArticles.py @@ -4,36 +4,38 @@ from eventregistry.Query import * from eventregistry.Logger import logger from eventregistry.EventRegistry import EventRegistry -from typing import Union, List +from typing import Union, List, Literal class QueryArticles(Query): def __init__(self, - keywords: Union[str, QueryItems] = None, - conceptUri: Union[str, QueryItems] = None, - categoryUri: Union[str, QueryItems] = None, - sourceUri: Union[str, QueryItems] = None, - sourceLocationUri: Union[str, QueryItems] = None, - sourceGroupUri: Union[str, QueryItems] = None, - authorUri: Union[str, QueryItems] = None, - locationUri: Union[str, QueryItems] = None, - lang: Union[str, QueryItems] = None, - dateStart: Union[datetime.datetime, datetime.date, str] = None, - dateEnd: Union[datetime.datetime, datetime.date, str] = None, - dateMentionStart: Union[datetime.datetime, datetime.date, str] = None, - dateMentionEnd: Union[datetime.datetime, datetime.date, str] = None, + keywords: Union[str, QueryItems, None] = None, + conceptUri: Union[str, QueryItems, None] = None, + categoryUri: Union[str, QueryItems, None] = None, + sourceUri: Union[str, QueryItems, None] = None, + sourceLocationUri: Union[str, QueryItems, None] = None, + sourceGroupUri: Union[str, QueryItems, None] = None, + authorUri: Union[str, QueryItems, None] = None, + locationUri: Union[str, QueryItems, None] = None, + lang: Union[str, QueryItems, None] = None, + dateStart: Union[datetime.datetime, datetime.date, str, None] = None, + dateEnd: Union[datetime.datetime, datetime.date, str, None] = None, + dateMentionStart: Union[datetime.datetime, datetime.date, str, None] = None, + dateMentionEnd: Union[datetime.datetime, datetime.date, str, None] = None, keywordsLoc: str = "body", - - ignoreKeywords: Union[str, QueryItems] = None, - ignoreConceptUri: Union[str, QueryItems] = None, - ignoreCategoryUri: Union[str, QueryItems] = None, - ignoreSourceUri: Union[str, QueryItems] = None, - ignoreSourceLocationUri: Union[str, QueryItems] = None, - ignoreSourceGroupUri: Union[str, QueryItems] = None, - ignoreAuthorUri: Union[str, QueryItems] = None, - ignoreLocationUri: Union[str, QueryItems] = None, - ignoreLang: Union[str, QueryItems] = None, + keywordSearchMode: Literal["simple", "exact", "phrase"] = "phrase", + + ignoreKeywords: Union[str, QueryItems, None] = None, + ignoreConceptUri: Union[str, QueryItems, None] = None, + ignoreCategoryUri: Union[str, QueryItems, None] = None, + ignoreSourceUri: Union[str, QueryItems, None] = None, + ignoreSourceLocationUri: Union[str, QueryItems, None] = None, + ignoreSourceGroupUri: Union[str, QueryItems, None] = None, + ignoreAuthorUri: Union[str, QueryItems, None] = None, + ignoreLocationUri: Union[str, QueryItems, None] = None, + ignoreLang: Union[str, QueryItems, None] = None, ignoreKeywordsLoc: str = "body", + ignoreKeywordSearchMode: Literal["simple", "exact", "phrase"] = "phrase", isDuplicateFilter: str = "keepAll", hasDuplicateFilter: str = "keepAll", @@ -46,7 +48,7 @@ def __init__(self, minSentiment: float = -1, maxSentiment: float = 1, dataType: Union[str, List[str]] = "news", - requestedResult: "RequestArticles" = None): + requestedResult: Union["RequestArticles", None] = None): """ Query class for searching for individual articles in the Event Registry. The resulting articles have to match all specified conditions. If a parameter value equals "" or [], then it is ignored. @@ -85,6 +87,7 @@ def __init__(self, @param dateMentionStart: find articles that explicitly mention a date that is equal or greater than dateMentionStart. @param dateMentionEnd: find articles that explicitly mention a date that is lower or equal to dateMentionEnd. @param keywordsLoc: where should we look when searching using the keywords provided by "keywords" parameter. "body" (default), "title", or "body,title" + @param keywordSearchMode: what search mode to use when specifying keywords. Possible values are: simple, exact, phrase @param ignoreKeywords: ignore articles that mention all provided keywords @param ignoreConceptUri: ignore articles that mention all provided concepts @@ -96,6 +99,8 @@ def __init__(self, @param ignoreLocationUri: ignore articles that occurred in any of the provided locations. A location can be a city or a place @param ignoreLang: ignore articles that are written in *any* of the provided languages @param ignoreKeywordsLoc: where should we look when data should be used when searching using the keywords provided by "ignoreKeywords" parameter. "body" (default), "title", or "body,title" + @param ignoreKeywordSearchMode: what search mode to use when specifying ignoreKeywords. Possible values are: simple, exact, phrase + @param isDuplicateFilter: some articles can be duplicates of other articles. What should be done with them. Possible values are: "skipDuplicates" (skip the resulting articles that are duplicates of other articles) "keepOnlyDuplicates" (return only the duplicate articles) @@ -132,6 +137,7 @@ def __init__(self, a computed value for the sentiment (all non-English articles) @param dataType: what data types should we search? "news" (news content, default), "pr" (press releases), or "blog". If you want to use multiple data types, put them in an array (e.g. ["news", "pr"]) + @param requestedResult: the information to return as the result of the query. By default return the list of matching articles """ super(QueryArticles, self).__init__() @@ -149,19 +155,22 @@ def __init__(self, self._setQueryArrVal(lang, "lang", None, "or") # a single lang or list (possible: eng, deu, spa, zho, slv) # starting date of the published articles (e.g. 2014-05-02) - if dateStart != None: + if dateStart is not None: self._setDateVal("dateStart", dateStart) # ending date of the published articles (e.g. 2014-05-02) - if dateEnd != None: + if dateEnd is not None: self._setDateVal("dateEnd", dateEnd) # first valid mentioned date detected in articles (e.g. 2014-05-02) - if dateMentionStart != None: + if dateMentionStart is not None: self._setDateVal("dateMentionStart", dateMentionStart) # last valid mentioned date detected in articles (e.g. 2014-05-02) - if dateMentionEnd != None: + if dateMentionEnd is not None: self._setDateVal("dateMentionEnd", dateMentionEnd) + self._setValIfNotDefault("keywordLoc", keywordsLoc, "body") + self._setValIfNotDefault("keywordSearchMode", keywordSearchMode, "phrase") + # for the negative conditions, only the OR is a valid operator type self._setQueryArrVal(ignoreKeywords, "ignoreKeyword", None, "or") self._setQueryArrVal(ignoreConceptUri, "ignoreConceptUri", None, "or") @@ -173,9 +182,9 @@ def __init__(self, self._setQueryArrVal(ignoreLocationUri, "ignoreLocationUri", None, "or") self._setQueryArrVal(ignoreLang, "ignoreLang", None, "or") - - self._setValIfNotDefault("keywordLoc", keywordsLoc, "body") self._setValIfNotDefault("ignoreKeywordLoc", ignoreKeywordsLoc, "body") + self._setValIfNotDefault("ignoreKeywordSearchMode", ignoreKeywordSearchMode, "phrase") + self._setValIfNotDefault("isDuplicateFilter", isDuplicateFilter, "keepAll") self._setValIfNotDefault("hasDuplicateFilter", hasDuplicateFilter, "keepAll") @@ -217,19 +226,19 @@ def setRequestedResult(self, requestArticles: "RequestArticles"): @staticmethod - def initWithArticleUriList(uriList: Union[str, List[str]], returnInfo: ReturnInfo = None): + def initWithArticleUriList(uriList: Union[str, List[str]], returnInfo: Union[ReturnInfo, None] = None): """ instead of making a query, provide a list of article URIs manually, and then produce the desired results on top of them """ # we need to set the dataType parameter here, otherwise users cannot ask for blog or pr articles using this way - q = QueryArticles(requestedResult=RequestArticlesInfo(returnInfo=returnInfo)) + q = QueryArticles(requestedResult=RequestArticlesInfo(returnInfo = returnInfo)) assert isinstance(uriList, str) or isinstance(uriList, list), "uriList has to be a list of strings or a string that represent article uris" q.queryParams = { "action": "getArticles", "articleUri": uriList, "dataType": ["news", "blog", "pr"] } return q @staticmethod - def initWithArticleUriWgtList(uriWgtList: Union[str, List[str]], returnInfo: ReturnInfo = None): + def initWithArticleUriWgtList(uriWgtList: Union[str, List[str]], returnInfo: Union[ReturnInfo, None] = None): """ instead of making a query, provide a list of article URIs manually, and then produce the desired results on top of them """ @@ -289,7 +298,7 @@ def count(self, eventRegistry: EventRegistry): def execQuery(self, eventRegistry: EventRegistry, sortBy: str = "rel", sortByAsc: bool = False, - returnInfo: ReturnInfo = None, + returnInfo: Union[ReturnInfo, None] = None, maxItems: int = -1, **kwargs): """ @@ -361,10 +370,10 @@ def _getNextArticleBatch(self): sortBy=self._sortBy, sortByAsc=self._sortByAsc, returnInfo = self._returnInfo)) if self._er._verboseOutput: - logger.debug("Downloading article page %d..." % (self._articlePage)) + logger.debug("Downloading article page %d...", self._articlePage) res = self._er.execQuery(self) if "error" in res: - logger.error("Error while obtaining a list of articles: " + res["error"]) + logger.error("Error while obtaining a list of articles: %s", res["error"]) else: self._totalPages = res.get("articles", {}).get("pages", 0) results = res.get("articles", {}).get("results", []) @@ -404,7 +413,7 @@ def __init__(self, page: int = 1, count: int = 100, sortBy: str = "date", sortByAsc: bool = False, - returnInfo : ReturnInfo = None): + returnInfo : Union[ReturnInfo, None] = None): """ return article details for resulting articles @param page: page of the articles to return @@ -413,6 +422,7 @@ def __init__(self, @param sortByAsc: should the results be sorted in ascending order (True) or descending (False) @param returnInfo: what details should be included in the returned information """ + super(RequestArticles, self).__init__() assert page >= 1, "page has to be >= 1" assert count <= 200, "at most 100 articles can be returned per call" self.resultType = "articles" @@ -420,7 +430,7 @@ def __init__(self, self.articlesCount = count self.articlesSortBy = sortBy self.articlesSortByAsc = sortByAsc - if returnInfo != None: + if returnInfo is not None: self.__dict__.update(returnInfo.getParams("articles")) @@ -428,6 +438,7 @@ def setPage(self, page: int): """ set the page of results to obtain """ + super(RequestArticles, self).__init__() assert page >= 1, "page has to be >= 1" self.articlesPage = page @@ -445,6 +456,7 @@ def __init__(self, @param sortBy: how are articles sorted. Options: id (internal id), date (publishing date), cosSim (closeness to the event centroid), rel (relevance to the query), sourceImportance (manually curated score of source importance - high value, high importance), sourceImportanceRank (reverse of sourceImportance), sourceAlexaGlobalRank (global rank of the news source), sourceAlexaCountryRank (country rank of the news source), socialScore (total shares on social media), facebookShares (shares on Facebook only) @param sortByAsc: should the results be sorted in ascending order (True) or descending (False) according to the sortBy criteria """ + super(RequestArticles, self).__init__() assert page >= 1, "page has to be >= 1" assert count <= 50000 self.resultType = "uriWgtList" @@ -465,6 +477,7 @@ def __init__(self): """ return time distribution of resulting articles """ + super(RequestArticles, self).__init__() self.resultType = "timeAggr" @@ -472,9 +485,9 @@ def __init__(self): class RequestArticlesConceptAggr(RequestArticles): def __init__(self, conceptCount: int = 25, - conceptCountPerType: int = None, + conceptCountPerType: Union[int, None] = None, conceptScoring: str = "importance", - articlesSampleSize: str = 10000, + articlesSampleSize: int = 10000, returnInfo: ReturnInfo = ReturnInfo()): """ get aggreate of concepts of resulting articles @@ -488,6 +501,7 @@ def __init__(self, @param articlesSampleSize: on what sample of results should the aggregate be computed (at most 20000) @param returnInfo: what details about the concepts should be included in the returned information """ + super(RequestArticles, self).__init__() assert conceptCount <= 500 assert articlesSampleSize <= 20000 self.resultType = "conceptAggr" @@ -509,6 +523,7 @@ def __init__(self, @param articlesSampleSize: on what sample of results should the aggregate be computed (at most 50000) @param returnInfo: what details about the categories should be included in the returned information """ + super(RequestArticles, self).__init__() assert articlesSampleSize <= 50000 self.resultType = "categoryAggr" self.categoryAggrSampleSize = articlesSampleSize @@ -530,6 +545,7 @@ def __init__(self, content overall, but their published content is more about the searched query. @param returnInfo: what details about the sources should be included in the returned information """ + super(RequestArticles, self).__init__() self.resultType = "sourceAggr" self.sourceAggrSourceCount = sourceCount self.sourceAggrNormalizeBySourceArts = normalizeBySourceArts @@ -543,6 +559,7 @@ def __init__(self, get top keywords in the resulting articles @param articlesSampleSize: on what sample of results should the aggregate be computed (at most 20000) """ + super(RequestArticles, self).__init__() assert articlesSampleSize <= 20000 self.resultType = "keywordAggr" self.keywordAggrSampleSize = articlesSampleSize @@ -563,6 +580,7 @@ def __init__(self, @param articlesSampleSize: on what sample of results should the aggregate be computed (at most 50000) @param returnInfo: what details about the concepts should be included in the returned information """ + super(RequestArticles, self).__init__() assert conceptCount <= 1000 assert linkCount <= 2000 assert articlesSampleSize <= 50000 @@ -588,6 +606,7 @@ def __init__(self, @param articlesSampleSize: on what sample of results should the aggregate be computed (at most 50000) @param returnInfo: what details should be included in the returned information """ + super(RequestArticles, self).__init__() assert conceptCount <= 200 assert articlesSampleSize <= 50000 self.resultType = "conceptMatrix" @@ -600,7 +619,7 @@ def __init__(self, class RequestArticlesConceptTrends(RequestArticles): def __init__(self, - conceptUris: Union[str, List[str]] = None, + conceptUris: Union[str, List[str], None] = None, conceptCount: int = 25, articlesSampleSize: int = 10000, returnInfo: ReturnInfo = ReturnInfo()): @@ -611,10 +630,11 @@ def __init__(self, @param articlesSampleSize: on what sample of results should the aggregate be computed (at most 50000) @param returnInfo: what details should be included in the returned information """ + super(RequestArticles, self).__init__() assert conceptCount <= 50 assert articlesSampleSize <= 50000 self.resultType = "conceptTrends" - if conceptUris != None: + if conceptUris is not None: self.conceptTrendsConceptUri = conceptUris self.conceptTrendsConceptCount = conceptCount self.conceptTrendsSampleSize = articlesSampleSize @@ -627,6 +647,7 @@ class RequestArticlesDateMentionAggr(RequestArticles): get mentioned dates in the articles """ def __init__(self): + super(RequestArticles, self).__init__() self.resultType = "dateMentionAggr" @@ -634,15 +655,15 @@ def __init__(self): class RequestArticlesRecentActivity(RequestArticles): def __init__(self, maxArticleCount: int = 100, - updatesAfterNewsUri: str = None, - updatesafterBlogUri: str = None, - updatesAfterPrUri: str = None, - updatesAfterTm: Union[datetime.datetime, datetime.date, str] = None, - updatesAfterMinsAgo: int = None, - updatesUntilTm: Union[datetime.datetime, datetime.date, str] = None, - updatesUntilMinsAgo: int = None, + updatesAfterNewsUri: Union[str, None] = None, + updatesafterBlogUri: Union[str, None] = None, + updatesAfterPrUri: Union[str, None] = None, + updatesAfterTm: Union[datetime.datetime, str, None] = None, + updatesAfterMinsAgo: Union[int, None] = None, + updatesUntilTm: Union[datetime.datetime, str, None] = None, + updatesUntilMinsAgo: Union[int, None] = None, mandatorySourceLocation: bool = False, - returnInfo: ReturnInfo = None): + returnInfo: Union[ReturnInfo, None] = None): """ get the list of articles that were recently added to the Event Registry and match the selected criteria @param maxArticleCount: the maximum number of articles to return in the call (the number can be even higher than 100 but in case more articles @@ -654,29 +675,30 @@ def __init__(self, @param mandatorySourceLocation: return only articles for which we know the source's geographic location @param returnInfo: what details should be included in the returned information """ + super(RequestArticles, self).__init__() assert maxArticleCount <= 2000 - assert updatesAfterTm == None or updatesAfterMinsAgo == None, "You should specify either updatesAfterTm or updatesAfterMinsAgo parameter, but not both" - assert updatesUntilTm == None or updatesUntilMinsAgo == None, "You should specify either updatesUntilTm or updatesUntilMinsAgo parameter, but not both" + assert updatesAfterTm is None or updatesAfterMinsAgo is None, "You should specify either updatesAfterTm or updatesAfterMinsAgo parameter, but not both" + assert updatesUntilTm is None or updatesUntilMinsAgo is None, "You should specify either updatesUntilTm or updatesUntilMinsAgo parameter, but not both" self.resultType = "recentActivityArticles" self.recentActivityArticlesMaxArticleCount = maxArticleCount - if updatesAfterTm != None: + if updatesAfterTm is not None: self.recentActivityArticlesUpdatesAfterTm = QueryParamsBase.encodeDateTime(updatesAfterTm) - if updatesAfterMinsAgo != None: + if updatesAfterMinsAgo is not None: self.recentActivityArticlesUpdatesAfterMinsAgo = updatesAfterMinsAgo - if updatesUntilTm != None: + if updatesUntilTm is not None: self.recentActivityArticlesUpdatesUntilTm = QueryParamsBase.encodeDateTime(updatesUntilTm) - if updatesUntilMinsAgo != None: + if updatesUntilMinsAgo is not None: self.recentActivityArticlesUpdatesUntilMinsAgo = updatesUntilMinsAgo # set the stopping uris, if provided - if updatesAfterNewsUri != None: + if updatesAfterNewsUri is not None: self.recentActivityArticlesNewsUpdatesAfterUri = updatesAfterNewsUri - if updatesafterBlogUri != None: + if updatesafterBlogUri is not None: self.recentActivityArticlesBlogUpdatesAfterUri = updatesafterBlogUri - if updatesAfterPrUri != None: + if updatesAfterPrUri is not None: self.recentActivityArticlesPrUpdatesAfterUri = updatesAfterPrUri self.recentActivityArticlesMaxArticleCount = maxArticleCount self.recentActivityArticlesMandatorySourceLocation = mandatorySourceLocation - if returnInfo != None: + if returnInfo is not None: self.__dict__.update(returnInfo.getParams("recentActivityArticles")) \ No newline at end of file diff --git a/eventregistry/QueryEvent.py b/eventregistry/QueryEvent.py index 93284e0..473aff7 100644 --- a/eventregistry/QueryEvent.py +++ b/eventregistry/QueryEvent.py @@ -5,7 +5,7 @@ from eventregistry.Query import * from eventregistry.Logger import logger from eventregistry.EventRegistry import EventRegistry -from typing import Union, List +from typing import Union, List, Literal class QueryEvent(Query): @@ -14,7 +14,7 @@ class QueryEvent(Query): """ def __init__(self, eventUriOrList: Union[str, List[str]], - requestedResult: "RequestEvent" = None): + requestedResult: Union["RequestEvent", None] = None): """ @param eventUriOrUriList: a single event uri or a list of event uris (max 50) @param requestedResult: the information to return as the result of the query. By default return the details of the event @@ -44,20 +44,21 @@ class QueryEventArticlesIter(QueryEvent, six.Iterator): Class for obtaining an iterator over all articles in the event """ def __init__(self, eventUri: str, - lang: Union[str, QueryItems] = None, - keywords: Union[str, QueryItems] = None, - conceptUri: Union[str, QueryItems] = None, - categoryUri: Union[str, QueryItems] = None, - sourceUri: Union[str, QueryItems] = None, - sourceLocationUri: Union[str, QueryItems] = None, - sourceGroupUri: Union[str, QueryItems] = None, - authorUri: Union[str, QueryItems] = None, - locationUri: Union[str, QueryItems] = None, - dateStart: Union[datetime.datetime, datetime.date, str] = None, - dateEnd: Union[datetime.datetime, datetime.date, str] = None, - dateMentionStart: Union[datetime.datetime, datetime.date, str] = None, - dateMentionEnd: Union[datetime.datetime, datetime.date, str] = None, + lang: Union[str, QueryItems, None] = None, + keywords: Union[str, QueryItems, None] = None, + conceptUri: Union[str, QueryItems, None] = None, + categoryUri: Union[str, QueryItems, None] = None, + sourceUri: Union[str, QueryItems, None] = None, + sourceLocationUri: Union[str, QueryItems, None] = None, + sourceGroupUri: Union[str, QueryItems, None] = None, + authorUri: Union[str, QueryItems, None] = None, + locationUri: Union[str, QueryItems, None] = None, + dateStart: Union[datetime.datetime, datetime.date, str, None] = None, + dateEnd: Union[datetime.datetime, datetime.date, str, None] = None, + dateMentionStart: Union[datetime.datetime, datetime.date, str, None] = None, + dateMentionEnd: Union[datetime.datetime, datetime.date, str, None] = None, keywordsLoc: str = "body", + keywordSearchMode: Literal["simple", "exact", "phrase"] = "phrase", startSourceRankPercentile: int = 0, endSourceRankPercentile: int = 100, @@ -99,6 +100,7 @@ def __init__(self, eventUri: str, @param dateMentionStart: limit the event articles to those that explicitly mention a date that is equal or greater than dateMentionStart. @param dateMentionEnd: limit the event articles to those that explicitly mention a date that is lower or equal to dateMentionEnd. @param keywordsLoc: where should we look when searching using the keywords provided by "keywords" parameter. "body" (default), "title", or "body,title" + @param keywordSearchMode: what search mode to use when specifying keywords. Possible values are: simple, exact, phrase @param startSourceRankPercentile: starting percentile of the sources to consider in the results (default: 0). Value should be in range 0-90 and divisible by 10. @param endSourceRankPercentile: ending percentile of the sources to consider in the results (default: 100). Value should be in range 10-100 and divisible by 10. @@ -120,20 +122,21 @@ def __init__(self, eventUri: str, self._setQueryArrVal(lang, "articlesLang", None, "or") # a single lang or list # starting date of the published articles (e.g. 2014-05-02) - if dateStart != None: + if dateStart is not None: self._setDateVal("dateStart", dateStart) # ending date of the published articles (e.g. 2014-05-02) - if dateEnd != None: + if dateEnd is not None: self._setDateVal("dateEnd", dateEnd) # first valid mentioned date detected in articles (e.g. 2014-05-02) - if dateMentionStart != None: + if dateMentionStart is not None: self._setDateVal("dateMentionStart", dateMentionStart) # last valid mentioned date detected in articles (e.g. 2014-05-02) - if dateMentionEnd != None: + if dateMentionEnd is not None: self._setDateVal("dateMentionEnd", dateMentionEnd) self._setValIfNotDefault("keywordLoc", keywordsLoc, "body") + self._setValIfNotDefault("keywordSearchMode", keywordSearchMode, "phrase") assert startSourceRankPercentile >= 0 and startSourceRankPercentile % 10 == 0 and startSourceRankPercentile <= 100 assert endSourceRankPercentile >= 0 and endSourceRankPercentile % 10 == 0 and endSourceRankPercentile <= 100 @@ -165,7 +168,7 @@ def count(self, eventRegistry: EventRegistry): def execQuery(self, eventRegistry: EventRegistry, sortBy: str = "cosSim", sortByAsc: bool = False, - returnInfo: ReturnInfo = None, + returnInfo: Union[ReturnInfo, None] = None, maxItems: int = -1): """ @param eventRegistry: instance of EventRegistry class. used to obtain the necessary data @@ -200,7 +203,7 @@ def _getNextArticleBatch(self): if self._totalPages != None and self._articlePage > self._totalPages: return if self._er._verboseOutput: - logger.debug("Downloading article page %d from event %s" % (self._articlePage, eventUri)) + logger.debug("Downloading article page %d from event %s", self._articlePage, eventUri) self.setRequestedResult(RequestEventArticles( page = self._articlePage, @@ -260,26 +263,27 @@ def __init__(self, page = 1, count = 100, - lang: Union[str, QueryItems] = None, - keywords: Union[str, QueryItems] = None, - conceptUri: Union[str, QueryItems] = None, - categoryUri: Union[str, QueryItems] = None, - sourceUri: Union[str, QueryItems] = None, - sourceLocationUri: Union[str, QueryItems] = None, - sourceGroupUri: Union[str, QueryItems] = None, - authorUri: Union[str, QueryItems] = None, - locationUri: Union[str, QueryItems] = None, - dateStart: Union[datetime.datetime, datetime.date, str] = None, - dateEnd: Union[datetime.datetime, datetime.date, str] = None, - dateMentionStart: Union[datetime.datetime, datetime.date, str] = None, - dateMentionEnd: Union[datetime.datetime, datetime.date, str] = None, + lang: Union[str, QueryItems, None] = None, + keywords: Union[str, QueryItems, None] = None, + conceptUri: Union[str, QueryItems, None] = None, + categoryUri: Union[str, QueryItems, None] = None, + sourceUri: Union[str, QueryItems, None] = None, + sourceLocationUri: Union[str, QueryItems, None] = None, + sourceGroupUri: Union[str, QueryItems, None] = None, + authorUri: Union[str, QueryItems, None] = None, + locationUri: Union[str, QueryItems, None] = None, + dateStart: Union[datetime.datetime, datetime.date, str, None] = None, + dateEnd: Union[datetime.datetime, datetime.date, str, None] = None, + dateMentionStart: Union[datetime.datetime, datetime.date, str, None] = None, + dateMentionEnd: Union[datetime.datetime, datetime.date, str, None] = None, keywordsLoc: str = "body", + keywordSearchMode: Literal["simple", "exact", "phrase"] = "phrase", startSourceRankPercentile: int = 0, endSourceRankPercentile: int = 100, sortBy: str = "cosSim", sortByAsc: bool = False, - returnInfo: ReturnInfo = None, + returnInfo: Union[ReturnInfo, None] = None, **kwds): """ return articles about the event @@ -320,6 +324,7 @@ def __init__(self, @param dateMentionStart: limit the event articles to those that explicitly mention a date that is equal or greater than dateMentionStart. @param dateMentionEnd: limit the event articles to those that explicitly mention a date that is lower or equal to dateMentionEnd. @param keywordsLoc: where should we look when searching using the keywords provided by "keywords" parameter. "body" (default), "title", or "body,title" + @param keywordSearchMode: what search mode to use when specifying keywords. Possible values are: simple, exact, phrase @param startSourceRankPercentile: starting percentile of the sources to consider in the results (default: 0). Value should be in range 0-100 and divisible by 10. @param endSourceRankPercentile: ending percentile of the sources to consider in the results (default: 100). Value should be in range 0-100 and divisible by 10. @@ -348,20 +353,21 @@ def __init__(self, self._setQueryArrVal(lang, "lang", None, "or") # a single lang or list (possible: eng, deu, spa, zho, slv) # starting date of the published articles (e.g. 2014-05-02) - if dateStart != None: + if dateStart is not None: self._setDateVal("dateStart", dateStart) # ending date of the published articles (e.g. 2014-05-02) - if dateEnd != None: + if dateEnd is not None: self._setDateVal("dateEnd", dateEnd) # first valid mentioned date detected in articles (e.g. 2014-05-02) - if dateMentionStart != None: + if dateMentionStart is not None: self._setDateVal("dateMentionStart", dateMentionStart) # last valid mentioned date detected in articles (e.g. 2014-05-02) - if dateMentionEnd != None: + if dateMentionEnd is not None: self._setDateVal("dateMentionEnd", dateMentionEnd) self._setValIfNotDefault("keywordLoc", keywordsLoc, "body") + self._setValIfNotDefault("keywordSearchMode", keywordSearchMode, "phrase") assert startSourceRankPercentile >= 0 and startSourceRankPercentile % 10 == 0 and startSourceRankPercentile <= 100 assert endSourceRankPercentile >= 0 and endSourceRankPercentile % 10 == 0 and endSourceRankPercentile <= 100 @@ -383,7 +389,7 @@ def __init__(self, class RequestEventArticleUriWgts(RequestEvent): def __init__(self, - lang: Union[str, List[str]] = None, + lang: Union[str, List[str], None] = None, sortBy: str = "cosSim", sortByAsc: bool = False, **kwds): """ @@ -403,7 +409,7 @@ def __init__(self, class RequestEventKeywordAggr(RequestEvent): - def __init__(self, lang: Union[str, List[str]] = None, + def __init__(self, lang: Union[str, List[str], None] = None, **kwds): """ return keyword aggregate (tag-cloud) from articles in the event @@ -437,7 +443,7 @@ def __init__(self): class RequestEventArticleTrend(RequestEvent): def __init__(self, - lang: str = None, + lang: Union[str, None] = None, page: int = 1, count: int = 100, minArticleCosSim: int = -1, returnInfo: ReturnInfo = ReturnInfo(articleInfo = ArticleInfoFlags(bodyLen = 0))): @@ -464,8 +470,8 @@ class RequestEventSimilarEvents(RequestEvent): def __init__(self, conceptInfoList: List[dict], count: int = 50, # number of similar events to return - dateStart: Union[datetime.datetime, datetime.date, str] = None, # what can be the oldest date of the similar events - dateEnd: Union[datetime.datetime, datetime.date, str] = None, # what can be the newest date of the similar events + dateStart: Union[datetime.datetime, datetime.date, str, None] = None, # what can be the oldest date of the similar events + dateEnd: Union[datetime.datetime, datetime.date, str, None] = None, # what can be the newest date of the similar events addArticleTrendInfo: bool = False, # add info how the articles in the similar events are distributed over time aggrHours: int = 6, # if similarEventsAddArticleTrendInfo == True then this is the aggregating window returnInfo: ReturnInfo = ReturnInfo()): @@ -484,9 +490,9 @@ def __init__(self, self.action = "getSimilarEvents" self.concepts = json.dumps(conceptInfoList) self.eventsCount = count - if dateStart != None: + if dateStart is not None: self.dateStart = QueryParamsBase.encodeDate(dateStart) - if dateEnd != None: + if dateEnd is not None: self.dateEnd = QueryParamsBase.encodeDate(dateEnd) self.similarEventsAddArticleTrendInfo = addArticleTrendInfo self.similarEventsAggrHours = aggrHours diff --git a/eventregistry/QueryEvents.py b/eventregistry/QueryEvents.py index a705615..8ac001b 100644 --- a/eventregistry/QueryEvents.py +++ b/eventregistry/QueryEvents.py @@ -4,41 +4,45 @@ from eventregistry.Query import * from eventregistry.Logger import logger from eventregistry.EventRegistry import EventRegistry -from typing import Union, List +from typing import Union, List, Literal class QueryEvents(Query): def __init__(self, - keywords: Union[str, QueryItems] = None, - conceptUri: Union[str, QueryItems] = None, - categoryUri: Union[str, QueryItems] = None, - sourceUri: Union[str, QueryItems] = None, - sourceLocationUri: Union[str, QueryItems] = None, - sourceGroupUri: Union[str, QueryItems] = None, - authorUri: Union[str, QueryItems] = None, - locationUri: Union[str, QueryItems] = None, - lang: Union[str, QueryItems] = None, - dateStart: Union[datetime.datetime, datetime.date, str] = None, - dateEnd: Union[datetime.datetime, datetime.date, str] = None, - reportingDateStart: Union[datetime.datetime, datetime.date, str] = None, - reportingDateEnd: Union[datetime.datetime, datetime.date, str] = None, + keywords: Union[str, QueryItems, None] = None, + conceptUri: Union[str, QueryItems, None] = None, + categoryUri: Union[str, QueryItems, None] = None, + sourceUri: Union[str, QueryItems, None] = None, + sourceLocationUri: Union[str, QueryItems, None] = None, + sourceGroupUri: Union[str, QueryItems, None] = None, + authorUri: Union[str, QueryItems, None] = None, + locationUri: Union[str, QueryItems, None] = None, + lang: Union[str, QueryItems, None] = None, + dateStart: Union[datetime.datetime, datetime.date, str, None] = None, + dateEnd: Union[datetime.datetime, datetime.date, str, None] = None, + reportingDateStart: Union[datetime.datetime, datetime.date, str, None] = None, + reportingDateEnd: Union[datetime.datetime, datetime.date, str, None] = None, minSentiment: float = -1, maxSentiment: float = 1, - minArticlesInEvent: int = None, - maxArticlesInEvent: int = None, - dateMentionStart: Union[datetime.datetime, datetime.date, str] = None, - dateMentionEnd: Union[datetime.datetime, datetime.date, str] = None, - ignoreKeywords: Union[str, QueryItems] = None, - ignoreConceptUri: Union[str, QueryItems] = None, - ignoreCategoryUri: Union[str, QueryItems] = None, - ignoreSourceUri: Union[str, QueryItems] = None, - ignoreSourceLocationUri: Union[str, QueryItems] = None, - ignoreSourceGroupUri: Union[str, QueryItems] = None, - ignoreAuthorUri: Union[str, QueryItems] = None, - ignoreLocationUri: Union[str, QueryItems] = None, - ignoreLang: Union[str, QueryItems] = None, + minArticlesInEvent: Union[int, None] = None, + maxArticlesInEvent: Union[int, None] = None, + dateMentionStart: Union[datetime.datetime, datetime.date, str, None] = None, + dateMentionEnd: Union[datetime.datetime, datetime.date, str, None] = None, keywordsLoc: str = "body", + keywordSearchMode: Literal["simple", "exact", "phrase"] = "phrase", + + ignoreKeywords: Union[str, QueryItems, None] = None, + ignoreConceptUri: Union[str, QueryItems, None] = None, + ignoreCategoryUri: Union[str, QueryItems, None] = None, + ignoreSourceUri: Union[str, QueryItems, None] = None, + ignoreSourceLocationUri: Union[str, QueryItems, None] = None, + ignoreSourceGroupUri: Union[str, QueryItems, None] = None, + ignoreAuthorUri: Union[str, QueryItems, None] = None, + ignoreLocationUri: Union[str, QueryItems, None] = None, + ignoreLang: Union[str, QueryItems, None] = None, ignoreKeywordsLoc: str = "body", - requestedResult: "RequestEvents" = None): + ignoreKeywordSearchMode: Literal["simple", "exact", "phrase"] = "phrase", + + requestedResult: Union["RequestEvents", None] = None): """ Query class for searching for events in the Event Registry. The resulting events have to match all specified conditions. If a parameter value equals "" or [], then it is ignored. @@ -84,6 +88,9 @@ def __init__(self, @param maxArticlesInEvent: find events that have not been reported in more than maxArticlesInEvent articles (regardless of language) @param dateMentionStart: find events where articles explicitly mention a date that is equal or greater than dateMentionStart. @param dateMentionEnd: find events where articles explicitly mention a date that is lower or equal to dateMentionEnd. + @param keywordsLoc: what data should be used when searching using the keywords provided by "keywords" parameter. "body" (default), "title", or "body,title" + @param keywordSearchMode: what search mode to use when specifying keywords. Possible values are: simple, exact, phrase + @param ignoreKeywords: ignore events where articles about the event mention any of the provided keywords @param ignoreConceptUri: ignore events that are about any of the provided concepts @param ignoreCategoryUri: ignore events that are about any of the provided categories @@ -93,8 +100,9 @@ def __init__(self, @param ignoreAuthorUri: ignore articles that were written by *any* of the specified authors @param ignoreLocationUri: ignore events that occurred in any of the provided locations. A location can be a city or a place @param ignoreLang: ignore events that are reported in any of the provided languages - @param keywordsLoc: what data should be used when searching using the keywords provided by "keywords" parameter. "body" (default), "title", or "body,title" @param ignoreKeywordsLoc: what data should be used when searching using the keywords provided by "ignoreKeywords" parameter. "body" (default), "title", or "body,title" + @param ignoreKeywordSearchMode: what search mode to use when specifying ignoreKeywords. Possible values are: simple, exact, phrase + @param requestedResult: the information to return as the result of the query. By default return the list of matching events """ super(QueryEvents, self).__init__() @@ -112,13 +120,13 @@ def __init__(self, self._setQueryArrVal(lang, "lang", None, "or") # a single lang or list (possible: eng, deu, spa, zho, slv) - if (dateStart != None): + if dateStart is not None: self._setDateVal("dateStart", dateStart) # e.g. 2014-05-02 - if (dateEnd != None): + if dateEnd is not None: self._setDateVal("dateEnd", dateEnd) # e.g. 2014-05-02 - if (reportingDateStart != None): + if reportingDateStart is not None: self._setDateVal("reportingDateStart", reportingDateStart) # e.g. 2014-05-02 - if (reportingDateEnd != None): + if reportingDateEnd is not None: self._setDateVal("reportingDateEnd", reportingDateEnd) # e.g. 2014-05-02 if minSentiment != -1: assert minSentiment >= -1 and minSentiment <= 1 @@ -130,11 +138,14 @@ def __init__(self, self._setValIfNotDefault("minArticlesInEvent", minArticlesInEvent, None) self._setValIfNotDefault("maxArticlesInEvent", maxArticlesInEvent, None) - if (dateMentionStart != None): + if dateMentionStart is not None: self._setDateVal("dateMentionStart", dateMentionStart) # e.g. 2014-05-02 - if (dateMentionEnd != None): + if dateMentionEnd is not None: self._setDateVal("dateMentionEnd", dateMentionEnd) # e.g. 2014-05-02 + self._setValIfNotDefault("keywordLoc", keywordsLoc, "body") + self._setValIfNotDefault("keywordSearchMode", keywordSearchMode, "phrase") + # for the negative conditions, only the OR is a valid operator type self._setQueryArrVal(ignoreKeywords, "ignoreKeywords", None, "or") @@ -148,8 +159,8 @@ def __init__(self, self._setQueryArrVal(ignoreLang, "ignoreLang", None, "or") - self._setValIfNotDefault("keywordLoc", keywordsLoc, "body") self._setValIfNotDefault("ignoreKeywordLoc", ignoreKeywordsLoc, "body") + self._setValIfNotDefault("ignoreKeywordSearchMode", ignoreKeywordSearchMode, "phrase") self.setRequestedResult(requestedResult or RequestEventsInfo()) @@ -240,7 +251,7 @@ def count(self, eventRegistry: EventRegistry): def execQuery(self, eventRegistry: EventRegistry, sortBy: str = "rel", sortByAsc: bool = False, - returnInfo: ReturnInfo = None, + returnInfo: Union[ReturnInfo, None] = None, maxItems: int = -1, **kwargs): """ @@ -288,17 +299,17 @@ def _getNextEventBatch(self): """download next batch of events based on the event uris in the uri list""" self._eventPage += 1 # if we have already obtained all pages, then exit - if self._totalPages != None and self._eventPage > self._totalPages: + if self._totalPages is not None and self._eventPage > self._totalPages: return self.setRequestedResult(RequestEventsInfo(page=self._eventPage, count=self._eventBatchSize, sortBy= self._sortBy, sortByAsc=self._sortByAsc, returnInfo = self._returnInfo)) # download articles and make sure that we set the same archive flag as it was returned when we were processing the uriList request if self._er._verboseOutput: - logger.debug("Downloading event page %d..." % (self._eventPage)) + logger.debug("Downloading event page %d...", self._eventPage) res = self._er.execQuery(self) if "error" in res: - logger.error("Error while obtaining a list of events: " + res["error"]) + logger.error("Error while obtaining a list of events: %s", res["error"]) else: self._totalPages = res.get("events", {}).get("pages", 0) results = res.get("events", {}).get("results", []) @@ -337,7 +348,7 @@ class RequestEventsInfo(RequestEvents): def __init__(self, page: int = 1, count: int = 50, sortBy: str = "rel", sortByAsc: bool = False, - returnInfo: ReturnInfo = None): + returnInfo: Union[ReturnInfo, None] = None): """ return event details for resulting events @param page: page of the results to return (1, 2, ...) @@ -347,6 +358,7 @@ def __init__(self, page: int = 1, @param sortByAsc: should the results be sorted in ascending order (True) or descending (False) @param returnInfo: what details should be included in the returned information """ + super(RequestEvents, self).__init__() assert page >= 1, "page has to be >= 1" assert count <= 50, "at most 50 events can be returned per call" self.resultType = "events" @@ -354,7 +366,7 @@ def __init__(self, page: int = 1, self.eventsCount = count self.eventsSortBy = sortBy self.eventsSortByAsc = sortByAsc - if returnInfo != None: + if returnInfo is not None: self.__dict__.update(returnInfo.getParams("events")) @@ -381,6 +393,7 @@ def __init__(self, socialScore (amount of shares in social media), none (no specific sorting) @param sortByAsc: should the events be sorted in ascending order (True) or descending (False) """ + super(RequestEvents, self).__init__() assert page >= 1, "page has to be >= 1" assert count <= 100000 self.resultType = "uriWgtList" @@ -389,6 +402,7 @@ def __init__(self, self.uriWgtListSortBy = sortBy self.uriWgtListSortByAsc = sortByAsc + def setPage(self, page): assert page >= 1, "page has to be >= 1" self.uriWgtListPage = page @@ -400,18 +414,20 @@ def __init__(self): """ return time distribution of resulting events """ + super(RequestEvents, self).__init__() self.resultType = "timeAggr" class RequestEventsKeywordAggr(RequestEvents): - def __init__(self, lang: str = None): + def __init__(self, lang: Union[str, None] = None): """ return keyword aggregate (tag cloud) on words in articles in resulting events @param lang: in which language to produce the list of top keywords. If None, then compute on all articles """ + super(RequestEvents, self).__init__() self.resultType = "keywordAggr" - if lang != None: + if lang is not None: self.keywordAggrLang = lang @@ -425,6 +441,7 @@ def __init__(self, @param eventsSampleSize: sample of events to use to compute the location aggregate (at most 100000) @param returnInfo: what details (about locations) should be included in the returned information """ + super(RequestEvents, self).__init__() assert eventsSampleSize <= 100000 self.resultType = "locAggr" self.locAggrSampleSize = eventsSampleSize @@ -442,6 +459,7 @@ def __init__(self, @param eventsSampleSize: sample of events to use to compute the location aggregate (at most 100000) @param returnInfo: what details (about locations) should be included in the returned information """ + super(RequestEvents, self).__init__() assert eventsSampleSize <= 100000 self.resultType = "locTimeAggr" self.locTimeAggrSampleSize = eventsSampleSize @@ -460,6 +478,7 @@ def __init__(self, @param eventsSampleSize: on what sample of results should the aggregate be computed (at most 1000000) @param returnInfo: what details about the concepts should be included in the returned information """ + super(RequestEvents, self).__init__() assert conceptCount <= 200 assert eventsSampleSize <= 1000000 self.resultType = "conceptAggr" @@ -482,6 +501,7 @@ def __init__(self, @param eventsSampleSize: on what sample of results should the aggregate be computed (at most 100000) @param returnInfo: what details about the concepts should be included in the returned information """ + super(RequestEvents, self).__init__() assert conceptCount <= 1000 assert linkCount <= 2000 assert eventsSampleSize <= 300000 @@ -508,6 +528,7 @@ def __init__(self, @param eventsSampleSize: on what sample of results should the aggregate be computed (at most 300000) @param returnInfo: what details about the concepts should be included in the returned information """ + super(RequestEvents, self).__init__() assert conceptCount <= 200 assert eventsSampleSize <= 300000 self.resultType = "conceptMatrix" @@ -520,7 +541,7 @@ def __init__(self, class RequestEventsConceptTrends(RequestEvents): def __init__(self, - conceptUris: Union[str, List[str]] = None, + conceptUris: Union[str, List[str], None] = None, conceptCount: int = 10, returnInfo: ReturnInfo = ReturnInfo()): """ @@ -529,9 +550,10 @@ def __init__(self, @param count: if the concepts are not provided, what should be the number of automatically determined concepts to return (at most 50) @param returnInfo: what details about the concepts should be included in the returned information """ + super(RequestEvents, self).__init__() assert conceptCount <= 50 self.resultType = "conceptTrends" - if conceptUris != None: + if conceptUris is not None: self.conceptTrendsConceptUri = conceptUris self.conceptTrendsConceptCount = conceptCount self.__dict__.update(returnInfo.getParams("conceptTrends")) @@ -549,6 +571,7 @@ def __init__(self, @param eventsSampleSize: on what sample of results should the aggregate be computed (at most 300000) @param returnInfo: what details about the sources should be included in the returned information """ + super(RequestEvents, self).__init__() assert sourceCount <= 200 assert eventsSampleSize <= 100000 self.resultType = "sourceAggr" @@ -569,6 +592,7 @@ def __init__(self, @param minDateMentionCount: report only dates that are mentioned at least this number of times @param eventsSampleSize: on what sample of results should the aggregate be computed (at most 300000) """ + super(RequestEvents, self).__init__() assert eventsSampleSize <= 300000 self.resultType = "dateMentionAggr" self.dateMentionAggrMinDaysApart = minDaysApart @@ -588,6 +612,7 @@ def __init__(self, @param maxEventsToCluster: try to cluster at most this number of events (at most 10000) @param returnInfo: what details about the concepts should be included in the returned information """ + super(RequestEvents, self).__init__() assert keywordCount <= 100 assert maxEventsToCluster <= 10000 self.resultType = "eventClusters" @@ -604,6 +629,7 @@ def __init__(self, return distribution of events into dmoz categories @param returnInfo: what details about the categories should be included in the returned information """ + super(RequestEvents, self).__init__() self.resultType = "categoryAggr" self.__dict__.update(returnInfo.getParams("categoryAggr")) @@ -612,11 +638,11 @@ def __init__(self, class RequestEventsRecentActivity(RequestEvents): def __init__(self, maxEventCount: int = 50, - updatesAfterTm: Union[datetime.datetime, datetime.date, str] = None, - updatesAfterMinsAgo: int = None, - mandatoryLocation: bool = True, + updatesAfterTm: Union[datetime.datetime, str, None] = None, + updatesAfterMinsAgo: Union[int, None] = None, + mandatoryLocation: Union[bool, None] = True, minAvgCosSim: float = 0, - returnInfo: ReturnInfo = None): + returnInfo: Union[ReturnInfo, None] = None): """ return a list of recently changed events that match search conditions @param maxEventCount: max events to return (at most 200) @@ -626,17 +652,18 @@ def __init__(self, @param minAvgCosSim: the minimum avg cos sim of the events to be returned (events with lower quality should not be included) @param returnInfo: what details should be included in the returned information """ + super(RequestEvents, self).__init__() assert maxEventCount <= 2000 - assert updatesAfterTm == None or updatesAfterMinsAgo == None, "You should specify either updatesAfterTm or updatesAfterMinsAgo parameter, but not both" + assert updatesAfterTm is None or updatesAfterMinsAgo is None, "You should specify either updatesAfterTm or updatesAfterMinsAgo parameter, but not both" self.resultType = "recentActivityEvents" self.recentActivityEventsMaxEventCount = maxEventCount self.recentActivityEventsMandatoryLocation = mandatoryLocation - if updatesAfterTm != None: + if updatesAfterTm is not None: self.recentActivityEventsUpdatesAfterTm = QueryParamsBase.encodeDateTime(updatesAfterTm) - if updatesAfterMinsAgo != None: + if updatesAfterMinsAgo is not None: self.recentActivityEventsUpdatesAfterMinsAgo = updatesAfterMinsAgo self.recentActivityEventsMinAvgCosSim = minAvgCosSim - if returnInfo != None: + if returnInfo is not None: self.__dict__.update(returnInfo.getParams("recentActivityEvents")) @@ -645,7 +672,7 @@ def __init__(self, page: int = 1, count: int = 50, minBreakingScore: float = 0.2, - returnInfo: ReturnInfo = None): + returnInfo: Union[ReturnInfo, None] = None): """ return a list of events that are currently breaking @param page: max events to return (at most 50) @@ -653,13 +680,14 @@ def __init__(self, @param minBreakingScore: the minimum score of "breakingness" of the events to be returned @param returnInfo: what details should be included in the returned information """ + super(RequestEvents, self).__init__() assert page >= 1 assert count <= 50 self.resultType = "breakingEvents" self.breakingEventsPage = page self.breakingEventsCount = count self.breakingEventsMinBreakingScore = minBreakingScore - if returnInfo != None: + if returnInfo is not None: self.__dict__.update(returnInfo.getParams("breakingEvents")) diff --git a/eventregistry/QueryMentions.py b/eventregistry/QueryMentions.py index 2d2b2ed..978e91b 100644 --- a/eventregistry/QueryMentions.py +++ b/eventregistry/QueryMentions.py @@ -9,44 +9,44 @@ class QueryMentions(Query): def __init__(self, - eventTypeUri: Union[str, QueryItems] = None, - keywords: Union[str, QueryItems] = None, - conceptUri: Union[str, QueryItems] = None, - categoryUri: Union[str, QueryItems] = None, - sourceUri: Union[str, QueryItems] = None, - sourceLocationUri: Union[str, QueryItems] = None, - sourceGroupUri: Union[str, QueryItems] = None, - industryUri: Union[str, QueryItems] = None, - sdgUri: Union[str, QueryItems] = None, - sasbUri: Union[str, QueryItems] = None, - esgUri: Union[str, QueryItems] = None, - locationUri: Union[str, QueryItems] = None, - lang: Union[str, QueryItems] = None, - dateStart: Union[datetime.datetime, datetime.date, str] = None, - dateEnd: Union[datetime.datetime, datetime.date, str] = None, - - ignoreEventTypeUri: Union[str, QueryItems] = None, - ignoreKeywords: Union[str, QueryItems] = None, - ignoreConceptUri: Union[str, QueryItems] = None, - ignoreCategoryUri: Union[str, QueryItems] = None, - ignoreSourceUri: Union[str, QueryItems] = None, - ignoreSourceLocationUri: Union[str, QueryItems] = None, - ignoreSourceGroupUri: Union[str, QueryItems] = None, - ignoreIndustryUri: Union[str, QueryItems] = None, - ignoreSdgUri: Union[str, QueryItems] = None, - ignoreSasbUri: Union[str, QueryItems] = None, - ignoreEsgUri: Union[str, QueryItems] = None, - ignoreLocationUri: Union[str, QueryItems] = None, - ignoreLang: Union[str, QueryItems] = None, + eventTypeUri: Union[str, QueryItems, None] = None, + keywords: Union[str, QueryItems, None] = None, + conceptUri: Union[str, QueryItems, None] = None, + categoryUri: Union[str, QueryItems, None] = None, + sourceUri: Union[str, QueryItems, None] = None, + sourceLocationUri: Union[str, QueryItems, None] = None, + sourceGroupUri: Union[str, QueryItems, None] = None, + industryUri: Union[str, QueryItems, None] = None, + sdgUri: Union[str, QueryItems, None] = None, + sasbUri: Union[str, QueryItems, None] = None, + esgUri: Union[str, QueryItems, None] = None, + locationUri: Union[str, QueryItems, None] = None, + lang: Union[str, QueryItems, None] = None, + dateStart: Union[datetime.datetime, datetime.date, str, None] = None, + dateEnd: Union[datetime.datetime, datetime.date, str, None] = None, + + ignoreEventTypeUri: Union[str, QueryItems, None] = None, + ignoreKeywords: Union[str, QueryItems, None] = None, + ignoreConceptUri: Union[str, QueryItems, None] = None, + ignoreCategoryUri: Union[str, QueryItems, None] = None, + ignoreSourceUri: Union[str, QueryItems, None] = None, + ignoreSourceLocationUri: Union[str, QueryItems, None] = None, + ignoreSourceGroupUri: Union[str, QueryItems, None] = None, + ignoreIndustryUri: Union[str, QueryItems, None] = None, + ignoreSdgUri: Union[str, QueryItems, None] = None, + ignoreSasbUri: Union[str, QueryItems, None] = None, + ignoreEsgUri: Union[str, QueryItems, None] = None, + ignoreLocationUri: Union[str, QueryItems, None] = None, + ignoreLang: Union[str, QueryItems, None] = None, showDuplicates: bool = False, startSourceRankPercentile: int = 0, endSourceRankPercentile: int = 100, minSentiment: float = -1, maxSentiment: float = 1, - minSentenceIndex: int = None, - maxSentenceIndex: int = None, - requestedResult: "RequestMentions" = None): + minSentenceIndex: Union[int, None] = None, + maxSentenceIndex: Union[int, None] = None, + requestedResult: Union["RequestMentions", None] = None): """ Query class for searching for individual mentions in the Event Registry. The resulting mentions (objects, containing sentence, article information, mentioned entities, etc.) have to match all specified conditions. @@ -144,10 +144,10 @@ def __init__(self, self._setQueryArrVal(lang, "lang", None, "or") # a single lang or list (possible: eng, deu, spa, zho, slv) # starting date of the published articles (e.g. 2014-05-02) - if dateStart != None: + if dateStart is not None: self._setDateVal("dateStart", dateStart) # ending date of the published articles (e.g. 2014-05-02) - if dateEnd != None: + if dateEnd is not None: self._setDateVal("dateEnd", dateEnd) @@ -181,10 +181,10 @@ def __init__(self, if maxSentiment != 1: assert maxSentiment >= -1 and maxSentiment <= 1 self._setVal("maxSentiment", maxSentiment) - if minSentenceIndex != None: + if minSentenceIndex is not None: assert minSentenceIndex >= 0 self._setVal("minSentenceIndex", minSentenceIndex) - if maxSentenceIndex != None: + if maxSentenceIndex is not None: assert maxSentenceIndex >= 0 self._setVal("maxSentenceIndex", maxSentenceIndex) @@ -270,7 +270,7 @@ def count(self, eventRegistry: EventRegistry): def execQuery(self, eventRegistry: EventRegistry, sortBy: str = "rel", sortByAsc: bool = False, - returnInfo: ReturnInfo = None, + returnInfo: Union[ReturnInfo, None] = None, maxItems: int = -1, **kwargs): """ @@ -330,16 +330,16 @@ def _getNextMentionBatch(self): # try to get more uris, if none self._mentionPage += 1 # if we have already obtained all pages, then exit - if self._totalPages != None and self._mentionPage > self._totalPages: + if self._totalPages is not None and self._mentionPage > self._totalPages: return self.setRequestedResult(RequestMentionsInfo(page=self._mentionPage, sortBy=self._sortBy, sortByAsc=self._sortByAsc, returnInfo = self._returnInfo)) if self._er._verboseOutput: - logger.debug("Downloading mention page %d..." % (self._mentionPage)) + logger.debug("Downloading mention page %d...", self._mentionPage) res = self._er.execQuery(self) if "error" in res: - logger.error("Error while obtaining a list of mentions: " + res["error"]) + logger.error("Error while obtaining a list of mentions: %s", res["error"]) else: self._totalPages = res.get("mentions", {}).get("pages", 0) results = res.get("mentions", {}).get("results", []) @@ -379,7 +379,7 @@ def __init__(self, page: int = 1, count: int = 100, sortBy: str = "date", sortByAsc: bool = False, - returnInfo: RequestMentions = None): + returnInfo: Union[ReturnInfo, None] = None): """ return mention details for resulting mentions @param page: page of the mentions to return @@ -388,6 +388,7 @@ def __init__(self, @param sortByAsc: should the results be sorted in ascending order (True) or descending (False) @param returnInfo: what details should be included in the returned information """ + super(RequestMentions, self).__init__() assert page >= 1, "page has to be >= 1" assert count <= 200, "at most 100 mentions can be returned per call" self.resultType = "mentions" @@ -395,7 +396,7 @@ def __init__(self, self.mentionsCount = count self.mentionsSortBy = sortBy self.mentionsSortByAsc = sortByAsc - if returnInfo != None: + if returnInfo is not None: self.__dict__.update(returnInfo.getParams("mentions")) @@ -420,6 +421,7 @@ def __init__(self, @param sortBy: how are mentions sorted. Options: id (internal id), date (publishing date), cosSim (closeness to the event centroid), rel (relevance to the query), sourceImportance (manually curated score of source importance - high value, high importance), sourceImportanceRank (reverse of sourceImportance), sourceAlexaGlobalRank (global rank of the news source), sourceAlexaCountryRank (country rank of the news source), socialScore (total shares on social media), facebookShares (shares on Facebook only) @param sortByAsc: should the results be sorted in ascending order (True) or descending (False) according to the sortBy criteria """ + super(RequestMentions, self).__init__() assert page >= 1, "page has to be >= 1" assert count <= 50000 self.resultType = "uriWgtList" @@ -440,6 +442,7 @@ def __init__(self): """ return time distribution of resulting mentions """ + super(RequestMentions, self).__init__() self.resultType = "timeAggr" @@ -447,7 +450,7 @@ def __init__(self): class RequestMentionsConceptAggr(RequestMentions): def __init__(self, conceptCount: int = 25, - conceptCountPerType: bool = None, + conceptCountPerType: Union[int, None] = None, conceptScoring: str = "importance", mentionsSampleSize: int = 10000, returnInfo: ReturnInfo = ReturnInfo()): @@ -463,13 +466,14 @@ def __init__(self, @param mentionsSampleSize: on what sample of results should the aggregate be computed (at most 20000) @param returnInfo: what details about the concepts should be included in the returned information """ + super(RequestMentions, self).__init__() assert conceptCount <= 500 assert mentionsSampleSize <= 20000 self.resultType = "conceptAggr" self.conceptAggrConceptCount = conceptCount self.conceptAggrSampleSize = mentionsSampleSize self.conceptAggrScoring = conceptScoring - if conceptCountPerType != None: + if conceptCountPerType is not None: self.conceptAggrConceptCountPerType = conceptCountPerType self.__dict__.update(returnInfo.getParams("conceptAggr")) @@ -484,6 +488,7 @@ def __init__(self, @param mentionsSampleSize: on what sample of results should the aggregate be computed (at most 50000) @param returnInfo: what details about the categories should be included in the returned information """ + super(RequestMentions, self).__init__() assert mentionsSampleSize <= 50000 self.resultType = "categoryAggr" self.categoryAggrSampleSize = mentionsSampleSize @@ -500,6 +505,7 @@ def __init__(self, @param sourceCount: the number of top sources to return @param returnInfo: what details about the sources should be included in the returned information """ + super(RequestMentions, self).__init__() self.resultType = "sourceAggr" self.sourceAggrSourceCount = sourceCount self.__dict__.update(returnInfo.getParams("sourceAggr")) @@ -512,6 +518,7 @@ def __init__(self, get top keywords in the resulting mentions @param mentionsSampleSize: on what sample of results should the aggregate be computed (at most 20000) """ + super(RequestMentions, self).__init__() assert mentionsSampleSize <= 20000 self.resultType = "keywordAggr" self.keywordAggrSampleSize = mentionsSampleSize @@ -532,6 +539,7 @@ def __init__(self, @param mentionsSampleSize: on what sample of results should the aggregate be computed (at most 50000) @param returnInfo: what details about the concepts should be included in the returned information """ + super(RequestMentions, self).__init__() assert conceptCount <= 1000 assert linkCount <= 2000 assert mentionsSampleSize <= 50000 @@ -547,13 +555,13 @@ def __init__(self, class RequestMentionsRecentActivity(RequestMentions): def __init__(self, maxMentionCount: int = 100, - updatesAfterUri: str =None, - updatesAfterTm: Union[datetime.datetime, str] = None, - updatesAfterMinsAgo: int = None, - updatesUntilTm: Union[datetime.datetime, str] = None, - updatesUntilMinsAgo: int = None, + updatesAfterUri: Union[str, None] = None, + updatesAfterTm: Union[datetime.datetime, str, None] = None, + updatesAfterMinsAgo: Union[int, None] = None, + updatesUntilTm: Union[datetime.datetime, str, None] = None, + updatesUntilMinsAgo: Union[int, None] = None, mandatorySourceLocation: bool = False, - returnInfo: ReturnInfo = None): + returnInfo: Union[ReturnInfo, None] = None): """ get the list of mentions that were recently added to the Event Registry and match the selected criteria @param maxMentionCount: the maximum number of mentions to return in the call (the number can be even higher than 100 but in case more mentions @@ -565,25 +573,26 @@ def __init__(self, @param mandatorySourceLocation: return only mentions for which we know the source's geographic location @param returnInfo: what details should be included in the returned information """ + super(RequestMentions, self).__init__() assert maxMentionCount <= 2000 - assert updatesAfterTm == None or updatesAfterMinsAgo == None, "You should specify either updatesAfterTm or updatesAfterMinsAgo parameter, but not both" - assert updatesUntilTm == None or updatesUntilMinsAgo == None, "You should specify either updatesUntilTm or updatesUntilMinsAgo parameter, but not both" + assert updatesAfterTm is None or updatesAfterMinsAgo is None, "You should specify either updatesAfterTm or updatesAfterMinsAgo parameter, but not both" + assert updatesUntilTm is None or updatesUntilMinsAgo is None, "You should specify either updatesUntilTm or updatesUntilMinsAgo parameter, but not both" self.resultType = "recentActivityMentions" self.recentActivityMentionsMaxMentionCount = maxMentionCount - if updatesAfterTm != None: + if updatesAfterTm is not None: self.recentActivityMentionsUpdatesAfterTm = QueryParamsBase.encodeDateTime(updatesAfterTm) - if updatesAfterMinsAgo != None: + if updatesAfterMinsAgo is not None: self.recentActivityMentionsUpdatesAfterMinsAgo = updatesAfterMinsAgo - if updatesUntilTm != None: + if updatesUntilTm is not None: self.recentActivityMentionsUpdatesUntilTm = QueryParamsBase.encodeDateTime(updatesUntilTm) - if updatesUntilMinsAgo != None: + if updatesUntilMinsAgo is not None: self.recentActivityMentionsUpdatesUntilMinsAgo = updatesUntilMinsAgo # set the stopping uris, if provided - if updatesAfterUri != None: + if updatesAfterUri is not None: self.recentActivityMentionsUpdatesAfterUri = updatesAfterUri self.recentActivityMentionsMaxMentionCount = maxMentionCount self.recentActivityMentionsMandatorySourceLocation = mandatorySourceLocation - if returnInfo != None: + if returnInfo is not None: self.__dict__.update(returnInfo.getParams("recentActivityMentions")) \ No newline at end of file diff --git a/eventregistry/QueryStory.py b/eventregistry/QueryStory.py index 7542d4a..ac4d79e 100644 --- a/eventregistry/QueryStory.py +++ b/eventregistry/QueryStory.py @@ -12,7 +12,7 @@ class QueryStory(Query): @param storyUriOrList: a single story uri or a list of story uris """ - def __init__(self, storyUriOrList: Union[str, List[str]] = None): + def __init__(self, storyUriOrList: Union[str, List[str], None] = None): super(QueryStory, self).__init__() self._setVal("action", "getStory") if storyUriOrList != None: @@ -53,6 +53,7 @@ class RequestStoryInfo(RequestStory): return details about a story """ def __init__(self, returnInfo: ReturnInfo = ReturnInfo()): + super(RequestStory, self).__init__() self.resultType = "info" self.__dict__.update(returnInfo.getParams("info")) @@ -75,6 +76,7 @@ def __init__(self, @param sortByAsc: should the articles be sorted in ascending order (True) or descending (False) based on sortBy value @param returnInfo: what details should be included in the returned information """ + super(RequestStory, self).__init__() assert page >= 1, "page has to be >= 1" assert count <= 100 self.resultType = "articles" @@ -98,6 +100,7 @@ def __init__(self, @param sortBy: order in which articles are sorted. Options: id (internal id), date (published date), cosSim (closeness to event centroid), sourceImportanceRank (importance of the news source, custom set), sourceAlexaGlobalRank (global rank of the news source), sourceAlexaCountryRank (country rank of the news source), socialScore (total shares in social media) @param sortByAsc: should the articles be sorted in ascending order (True) or descending (False) based on sortBy value """ + super(RequestStory, self).__init__() self.articleUrisSortBy = sortBy self.articleUrisSortByAsc = sortByAsc self.resultType = "articleUris" @@ -112,6 +115,7 @@ def __init__(self, lang: Union[str, List[str]] = mainLangs, minArticleCosSim: float = -1, returnInfo: ReturnInfo = ReturnInfo(articleInfo = ArticleInfoFlags(bodyLen = 0))): + super(RequestStory, self).__init__() self.resultType = "articleTrend" self.articleTrendLang = lang self.articleTrendMinArticleCosSim = minArticleCosSim @@ -132,19 +136,20 @@ class RequestStorySimilarStories(RequestStory): """ def __init__(self, conceptInfoList: Union[str, List[str]], - count: int = 50, # number of similar stories to return - dateStart: Union[datetime.date, str] = None, # what can be the oldest date of the similar stories - dateEnd: Union[datetime.date, str] = None, # what can be the newest date of the similar stories + count: int = 50, # number of similar stories to return + dateStart: Union[datetime.date, str, None] = None, # what can be the oldest date of the similar stories + dateEnd: Union[datetime.date, str, None] = None, # what can be the newest date of the similar stories lang: Union[str, List[str]] = [], returnInfo: ReturnInfo = ReturnInfo()): + super(RequestStory, self).__init__() assert count <= 50 assert isinstance(conceptInfoList, list) self.action = "getSimilarStories" self.concepts = json.dumps(conceptInfoList) self.storiesCount = count - if dateStart != None: + if dateStart is not None: self.dateStart = QueryParamsBase.encodeDate(dateStart) - if dateEnd != None: + if dateEnd is not None: self.dateEnd = QueryParamsBase.encodeDate(dateEnd) if len(lang) > 0: self.lang = lang diff --git a/eventregistry/Recent.py b/eventregistry/Recent.py index d1b4bc9..675fbe6 100644 --- a/eventregistry/Recent.py +++ b/eventregistry/Recent.py @@ -10,7 +10,7 @@ class GetRecentEvents(QueryParamsBase): def __init__(self, eventRegistry: EventRegistry, - mandatoryLang: Union[str, List[str]] = None, + mandatoryLang: Union[str, List[str], None] = None, mandatoryLocation: bool = True, returnInfo: ReturnInfo = ReturnInfo(), **kwargs): @@ -26,7 +26,7 @@ def __init__(self, self._er = eventRegistry self._setVal("recentActivityEventsMandatoryLocation", mandatoryLocation) # return only events that have at least a story in the specified language - if mandatoryLang != None: + if mandatoryLang is not None: self._setVal("recentActivityEventsMandatoryLang", mandatoryLang) self.queryParams.update(kwargs) self._update(returnInfo.getParams("recentActivityEvents")) @@ -57,7 +57,7 @@ class GetRecentArticles(QueryParamsBase): def __init__(self, eventRegistry: EventRegistry, mandatorySourceLocation: bool = False, - articleLang: Union[str, List[str]] = None, + articleLang: Union[str, List[str], None] = None, returnInfo: ReturnInfo = ReturnInfo(), **kwargs): """ @@ -71,7 +71,7 @@ def __init__(self, self._er = eventRegistry self._setVal("recentActivityArticlesMandatorySourceLocation", mandatorySourceLocation) - if articleLang != None: + if articleLang is not None: self._setVal("recentActivityArticlesLang", articleLang) self.queryParams.update(kwargs) self._update(returnInfo.getParams("recentActivityArticles")) diff --git a/eventregistry/ReturnInfo.py b/eventregistry/ReturnInfo.py index d02d512..90e4aa5 100644 --- a/eventregistry/ReturnInfo.py +++ b/eventregistry/ReturnInfo.py @@ -459,7 +459,7 @@ def loadFromFile(fileName: str): @param fileName: filename that contains the json configuration to use in the ReturnInfo """ assert os.path.exists(fileName), "File " + fileName + " does not exist" - conf = json.load(open(fileName)) + conf = json.load(open(fileName, encoding="utf8")) return ReturnInfo( articleInfo=ArticleInfoFlags(**conf.get("articleInfo", {})), eventInfo=EventInfoFlags(**conf.get("eventInfo", {})), diff --git a/eventregistry/TopicPage.py b/eventregistry/TopicPage.py index 2428f78..9ba294b 100644 --- a/eventregistry/TopicPage.py +++ b/eventregistry/TopicPage.py @@ -20,8 +20,10 @@ def __init__(self, eventRegistry: EventRegistry): @param eventRegistry: instance of class EventRegistry """ + super(QueryParamsBase, self).__init__() self.eventRegistry = eventRegistry + def getMyTopicPages(self): """ get the list of topic pages owned by me @@ -38,6 +40,7 @@ def __init__(self, eventRegistry: EventRegistry): @param eventRegistry: instance of class EventRegistry """ + super(QueryParamsBase, self).__init__() self.eventRegistry = eventRegistry # topic page definition self.topicPage = self._createEmptyTopicPage() @@ -243,7 +246,7 @@ def clearLocations(self): self.topicPage["locations"] = [] - def addConcept(self, conceptUri: str, weight: float, label: str = None, conceptType: str = None, required: bool = False, excluded: bool = False): + def addConcept(self, conceptUri: str, weight: float, label: Union[str, None] = None, conceptType: Union[str, None] = None, required: bool = False, excluded: bool = False): """ add a relevant concept to the topic page @param conceptUri: uri of the concept to be added @@ -252,11 +255,11 @@ def addConcept(self, conceptUri: str, weight: float, label: str = None, conceptT @param excluded: if true, then all results annotated with this concept will be ignored """ assert isinstance(weight, (float, int)), "weight value has to be a positive or negative integer" - assert not (required == True and excluded == True), "Parameters required and excluded can not be True at the same time" + assert not (required is True and excluded is True), "Parameters required and excluded can not be True at the same time" concept = {"uri": conceptUri, "wgt": weight, "required": required, "excluded": excluded } - if label != None: + if label is not None: concept["label"] = label - if conceptType != None: + if conceptType is not None: concept["type"] = conceptType self.topicPage["concepts"].append(concept) @@ -270,7 +273,7 @@ def addKeyword(self, keyword: str, weight: float, required: bool = False, exclud @param excluded: if true, then no results that mention this keyword will be returned """ assert isinstance(weight, (float, int)), "weight value has to be a positive or negative integer" - assert not (required == True and excluded == True), "Parameters required and excluded can not be True at the same time" + assert not (required is True and excluded is True), "Parameters required and excluded can not be True at the same time" self.topicPage["keywords"].append({"keyword": keyword, "wgt": weight, "required": required, "excluded": excluded }) @@ -283,7 +286,7 @@ def addCategory(self, categoryUri: str, weight: float, required: bool = False, e @param excluded: if true, then no results with this category will be returned """ assert isinstance(weight, (float, int)), "weight value has to be a positive or negative integer" - assert not (required == True and excluded == True), "Parameters required and excluded can not be True at the same time" + assert not (required is True and excluded is True), "Parameters required and excluded can not be True at the same time" self.topicPage["categories"].append({"uri": categoryUri, "wgt": weight, "required": required, "excluded": excluded }) diff --git a/eventregistry/Trends.py b/eventregistry/Trends.py index bc742cb..b037e5a 100644 --- a/eventregistry/Trends.py +++ b/eventregistry/Trends.py @@ -26,7 +26,7 @@ def __init__(self, @param conceptType: which types of concepts are we interested in @param returnInfo: what details should be included in the returned information """ - QueryParamsBase.__init__(self) + TrendsBase.__init__(self) self._setVal("action", "getTrendingConcepts") self._setVal("source", source) if source != "social": @@ -48,7 +48,7 @@ def __init__(self, @param count: number of top trends to return @param returnInfo: what details should be included in the returned information """ - QueryParamsBase.__init__(self) + TrendsBase.__init__(self) self._setVal("action", "getTrendingCategories") self._setVal("source", source) if source != "social": @@ -68,7 +68,7 @@ def __init__(self, @param count: number of top trends to return @param returnInfo: what details should be included in the returned information """ - QueryParamsBase.__init__(self) + TrendsBase.__init__(self) self._setVal("action", "getTrendingCustom") self._setVal("conceptCount", count) self._update(returnInfo.getParams()) @@ -87,7 +87,7 @@ def __init__(self, @param count: number of top trends to return @param returnInfo: what details should be included in the returned information """ - QueryParamsBase.__init__(self) + TrendsBase.__init__(self) self._setVal("action", "getConceptTrendGroups") self._setVal("source", source) self._setVal("conceptCount", count) diff --git a/eventregistry/_version.py b/eventregistry/_version.py index 6c77fe1..d0c2471 100644 --- a/eventregistry/_version.py +++ b/eventregistry/_version.py @@ -1 +1 @@ -__version__ = "9.0" +__version__ = "9.1" diff --git a/eventregistry/examples/AutoSuggestionsExamples.py b/eventregistry/examples/AutoSuggestionsExamples.py index bdec748..e84e13e 100644 --- a/eventregistry/examples/AutoSuggestionsExamples.py +++ b/eventregistry/examples/AutoSuggestionsExamples.py @@ -9,30 +9,38 @@ # get concept uris for concepts based on the concept labels: conceptUrisMatchingObama = er.suggestConcepts("Obama", lang = "eng", conceptLang = ["eng", "deu"]) print(conceptUrisMatchingObama) + # get only the top concept that best matches the prefix conceptUriForBarackObama = er.getConceptUri("Obama") print("A URI of the top concept that contains the term 'Obama': " + conceptUriForBarackObama) + # return a list of categories that contain text "Business" businessRelated = er.suggestCategories("Business") print(businessRelated) + # return the top category that contains text "Business" businessCategoryUri = er.getCategoryUri("Business") print("A URI of the top category that contains the term 'Business': " + businessCategoryUri) + # get a list of locations that best match the prefix "Lond" locations = er.suggestLocations("Lond") print(locations) + # get a top location that best matches the prefix "Lond" londonUri = er.getLocationUri("Lond") print("A top location that contains text 'Lond': " + londonUri) + usUri = er.getLocationUri("united states", sources = "country") print(usUri) + # get a top location for "lond" that is located in USA londonUsUri = er.getLocationUri("Lond", countryUri = usUri) print("A top US location that contains text 'Lond': " + londonUsUri) + # suggest a list of concept classes that best match the text "auto" classes = er.suggestConceptClasses("auto") print(classes) \ No newline at end of file diff --git a/eventregistry/examples/BreakingEvents.py b/eventregistry/examples/BreakingEvents.py new file mode 100644 index 0000000..31caaa6 --- /dev/null +++ b/eventregistry/examples/BreakingEvents.py @@ -0,0 +1,22 @@ +from eventregistry import * + +er = EventRegistry() + +# get the list of all breaking events +params = { + "includeEventSocialScore": True, + "includeEventLocation": True, + "includeLocationGeoLocation": True +} + +res = er.jsonRequest("/api-c/v1/event/getBreakingEvents", paramDict=params) +print(res) + + +q = QueryEvents( + categoryUri="news/Business", + lang="eng") +q.setRequestedResult(RequestEventsBreakingEvents()) + +res = er.execQuery(q) +print(res) diff --git a/eventregistry/examples/CountsExamples.py b/eventregistry/examples/CountsExamples.py index cddf7d0..c8005e5 100644 --- a/eventregistry/examples/CountsExamples.py +++ b/eventregistry/examples/CountsExamples.py @@ -6,28 +6,15 @@ from eventregistry import * -er = EventRegistry(host = "https://eventregistry.org") +er = EventRegistry() obamaUri = er.getConceptUri("Trump") ebolaUri = er.getConceptUri("ebola") -q = GetCounts([obamaUri, ebolaUri], - dateStart = "2015-05-15", - dateEnd = "2018-05-20") +q = GetCounts([obamaUri, ebolaUri]) ret = er.execQuery(q) print(er.format(ret)) -q = GetCountsEx([er.getCategoryUri("business")], type="category", - dateStart = "2015-05-15", - dateEnd = "2018-05-20") +q = GetCountsEx([er.getCategoryUri("business")], type="category") ret = er.execQuery(q) print(er.format(ret)) - -# get geographic spreadness of the concept Obama -obamaUri = er.getConceptUri("Obama") -q = GetCounts(obamaUri, source="geo") -ret = er.execQuery(q) - -# get the sentiment expressed about Obama -q = GetCounts(obamaUri, source="sentiment") -ret = er.execQuery(q) diff --git a/eventregistry/examples/QueryArticleExamples.py b/eventregistry/examples/QueryArticleExamples.py index 2b1a96f..962d022 100644 --- a/eventregistry/examples/QueryArticleExamples.py +++ b/eventregistry/examples/QueryArticleExamples.py @@ -3,7 +3,7 @@ """ from eventregistry import * -er = EventRegistry(host = "https://eventregistry.org") +er = EventRegistry() # # search article by uri diff --git a/eventregistry/examples/QueryArticlesExamples.py b/eventregistry/examples/QueryArticlesExamples.py index 82d783e..99b1d91 100644 --- a/eventregistry/examples/QueryArticlesExamples.py +++ b/eventregistry/examples/QueryArticlesExamples.py @@ -5,15 +5,19 @@ er = EventRegistry(allowUseOfArchive=False) +# max articles to return - change for your use case MAX_RESULTS = 100 # search for the phrase "Tesla Inc" - both words have to appear together - download at most 100 articles +# for each article retrieve also the list of mentioned concepts, categories, location, image, links and videos from the article q = QueryArticlesIter(keywords = "Tesla Inc") -for art in q.execQuery(er, maxItems = MAX_RESULTS): +for art in q.execQuery(er, + returnInfo = ReturnInfo(articleInfo=ArticleInfoFlags(concepts=True, categories=True, location=True, image=True, links=True, videos=True)), + maxItems = MAX_RESULTS): print(art) # search for articles that mention both of the two words - maybe together, maybe apart -# this form of specifying multiple keywords, concepts, etc is now depricated. When you have a list, +# this form of specifying multiple keywords, concepts, etc is now deprecated. When you have a list, # use it with QueryItems.AND() or QueryItems.OR() to explicitly specify how the query should be processed q = QueryArticles(keywords = ["Barack", "Obama"]) # set some custom information that should be returned as a result of the query @@ -99,7 +103,7 @@ for art in q.execQuery(er, sortBy="sourceAlexaGlobalRank", returnInfo = ReturnInfo( articleInfo=ArticleInfoFlags(concepts=True, categories=True, location=True, image=True)), - maxItems = 500): + maxItems = MAX_RESULTS): print(art["uri"]) @@ -213,28 +217,112 @@ # get articles that were published on 2017-02-05 or are about trump or are about politics or are about Merkel and business # # and are not published on 2017-02-05 or are about Obama -qStr = """ -{ +q = { "$query": { "$or": [ { "dateStart": "2017-02-05", "dateEnd": "2017-02-05" }, - { "conceptUri": "%s" }, - { "categoryUri": "%s" }, + { "conceptUri": trumpUri }, + { "categoryUri": politicsUri }, { "$and": [ - { "conceptUri": "%s" }, - { "categoryUri": "%s" } + { "conceptUri": merkelUri }, + { "categoryUri": businessUri } ] } ], "$not": { "$or": [ { "dateStart": "2017-02-04", "dateEnd": "2017-02-04" }, - { "conceptUri": "%s" } + { "conceptUri": obamaUri } ] } } } - """ % (trumpUri, politicsUri, merkelUri, businessUri, obamaUri) -q1 = QueryArticles.initWithComplexQuery(qStr) -res = er.execQuery(q1) +query = QueryArticles.initWithComplexQuery(q) +res = er.execQuery(query) + +# +# use of EXACT search mode when using keywords +# NOTE: You don’t have to write AND, OR, NOT in uppercase — we will use uppercase just to make examples more readable. +# + +# USE OF AND, OR and NOT operators +# find articles from Jan 2013 that mention samsung and tv and either led or lcd or plasma but not smartphone or phone +q = { + "$query": { + "keyword": "Samsung AND TV AND (LED OR LCD OR Plasma) NOT (smartphone OR phone)", + "keywordSearchMode": "exact", + "dateStart": "2023-01-01", + "dateEnd": "2023-01-31" + } +} +iter = QueryArticlesIter.initWithComplexQuery(q) +for art in iter.execQuery(er, maxItems = MAX_RESULTS): + print(art) + + +# use of operator NEAR +# find English articles that mention siemens and sustainability or ecology or renewable energy, but at most 15 words apart (forward or backward) +q = { + "$query": { + "keyword": "Siemens NEAR/15 (sustainability or ecology or renewable energy)", + "keywordSearchMode": "exact", + "lang": "eng" + } +} +iter = QueryArticlesIter.initWithComplexQuery(q) +for art in iter.execQuery(er, maxItems = MAX_RESULTS): + print(art) + + +# use of operator NEXT +# find English articles that mention sustainability or ecology or renewable energy at most 15 words after siemens is mentioned +q = { + "$query": { + "keyword": "Siemens NEXT/15 (sustainability or ecology or renewable energy)", + "keywordSearchMode": "exact", + "lang": "eng" + } +} +iter = QueryArticlesIter.initWithComplexQuery(q) +for art in iter.execQuery(er, maxItems = MAX_RESULTS): + print(art) + + +# +# use of SIMPLE search mode when using keywords +# + +# find articles that at least some of the specified keywords and phrases and that belong to the AI category +q = { + "$query": { + "keyword": "AI \\\"deep learning\\\" \\\"machine learning\\\" latest developments", + "keywordSearchMode": "simple", + "categoryUri": "dmoz/Computers/Artificial_Intelligence" + } +} +iter = QueryArticlesIter.initWithComplexQuery(q) +for art in iter.execQuery(er, sortBy = "rel", maxItems = MAX_RESULTS): + print(art) + +# the same query, but without using the complex query language +iter = QueryArticlesIter(keywords = "AI \\\"deep learning\\\" \\\"machine learning\\\" latest developments", keywordSearchMode="simple") +for art in iter.execQuery(er, sortBy = "rel", maxItems = MAX_RESULTS): + print(art) + + +# +# use of PHRASE search mode when using keywords +# phrase search mode is used by default, so in this case, you don't even need to specify the "keywordSearchMode" parameter +# + +# search for articles that mention the phrase "Apple iPhone" or "Microsoft Store" +qStr = { + "$query": { + "$or": [ + { "keyword": "Apple iPhone" }, + { "keyword": "Microsoft Store" } + ] + } +} +q = QueryArticlesIter.initWithComplexQuery(qStr) diff --git a/eventregistry/examples/QueryEventsExamples.py b/eventregistry/examples/QueryEventsExamples.py index a8fefd6..1a5715e 100644 --- a/eventregistry/examples/QueryEventsExamples.py +++ b/eventregistry/examples/QueryEventsExamples.py @@ -6,6 +6,10 @@ er = EventRegistry() +# max events to return - change for your use case +MAX_RESULTS = 50 + + # get the concept URI that matches label "Barack Obama" obamaUri = er.getConceptUri("Obama") print("Concept uri for 'Obama' is " + obamaUri) @@ -19,9 +23,9 @@ # query for events related to Barack Obama. return the matching events sorted from the latest to oldest event # use the iterator class and easily iterate over all matching events -# we specify maxItems to limit the results to maximum 300 results +# we specify maxItems to limit the results to maximum MAX_RESULTS results q = QueryEventsIter(conceptUri = obamaUri) -for event in q.execQuery(er, sortBy = "date", maxItems = 300): +for event in q.execQuery(er, sortBy = "date", maxItems = MAX_RESULTS): # print(json.dumps(event, indent=2)) print(event["uri"]) @@ -116,7 +120,7 @@ res = er.execQuery(q) -# query for events about Obama and produce the concept co-occurence graph - which concepts appear frequently together in the matching events +# query for events about Obama and produce the concept co-occurrence graph - which concepts appear frequently together in the matching events q = QueryEvents(conceptUri = er.getConceptUri("Obama")) q.setRequestedResult(RequestEventsConceptGraph(conceptCount = 200, linkCount = 500, eventsSampleSize = 2000)) res = er.execQuery(q) @@ -128,17 +132,15 @@ # events that are occurred between 2017-02-05 and 2017-02-06 and are not about business businessUri = er.getCategoryUri("Business") -q = QueryEvents.initWithComplexQuery(""" -{ +query = QueryEvents.initWithComplexQuery({ "$query": { "dateStart": "2017-02-05", "dateEnd": "2017-02-06", "$not": { - "categoryUri": "%s" + "categoryUri": businessUri } } -} -""" % (businessUri)) -res = er.execQuery(q) +}) +res = er.execQuery(query) # example of a complex query containing a combination of OR and AND parameters # get events that: @@ -153,31 +155,29 @@ merkelUri = er.getConceptUri("merkel") businessUri = er.getCategoryUri("business") -qStr = """ -{ +q = { "$query": { "$or": [ { "dateStart": "2017-02-05", "dateEnd": "2017-02-05" }, - { "conceptUri": "%s" }, - { "categoryUri": "%s" }, + { "conceptUri": trumpUri }, + { "categoryUri": politicsUri }, { "$and": [ - { "conceptUri": "%s" }, - { "categoryUri": "%s" } + { "conceptUri": merkelUri }, + { "categoryUri": businessUri } ] } ], "$not": { "$or": [ { "dateStart": "2017-02-04", "dateEnd": "2017-02-04" }, - { "conceptUri": "%s" } + { "conceptUri": obamaUri } ] } } } - """ % (trumpUri, politicsUri, merkelUri, businessUri, obamaUri) -q1 = QueryEvents.initWithComplexQuery(qStr) -res = er.execQuery(q1) +query = QueryEvents.initWithComplexQuery(q) +res = er.execQuery(query) cq = ComplexEventQuery( query = CombinedQuery.OR([ @@ -194,5 +194,93 @@ # example of an ITERATOR with a COMPLEX QUERY iter = QueryEventsIter.initWithComplexQuery(cq) -for event in iter.execQuery(er, returnInfo = retInfo, maxItems = 10): +for event in iter.execQuery(er, returnInfo = retInfo, maxItems = MAX_RESULTS): print(json.dumps(event, indent=2)) + + + +# +# use of EXACT search mode when using keywords +# NOTE: You don’t have to write AND, OR, NOT in uppercase — we will use uppercase just to make examples more readable. +# + +# USE OF AND, OR and NOT operators +# find events from Jan 2013 that mention samsung and tv and either led or lcd or plasma but not smartphone or phone +q = { + "$query": { + "keyword": "Samsung AND TV AND (LED OR LCD OR Plasma) NOT (smartphone OR phone)", + "keywordSearchMode": "exact", + "dateStart": "2023-01-01", + "dateEnd": "2023-01-31" + } +} +iter = QueryEventsIter.initWithComplexQuery(q) +for ev in iter.execQuery(er, maxItems = MAX_RESULTS): + print(ev) + + +# use of operator NEAR +# find English events that mention siemens and sustainability or ecology or renewable energy, but at most 15 words apart (forward or backward) +q = { + "$query": { + "keyword": "Siemens NEAR/15 (sustainability or ecology or renewable energy)", + "keywordSearchMode": "exact", + "lang": "eng" + } +} +iter = QueryEventsIter.initWithComplexQuery(q) +for ev in iter.execQuery(er, maxItems = MAX_RESULTS): + print(ev) + + +# use of operator NEXT +# find English events that mention sustainability or ecology or renewable energy at most 15 words after siemens is mentioned +q = { + "$query": { + "keyword": "Siemens NEXT/15 (sustainability or ecology or renewable energy)", + "keywordSearchMode": "exact", + "lang": "eng" + } +} +iter = QueryEventsIter.initWithComplexQuery(q) +for ev in iter.execQuery(er, maxItems = MAX_RESULTS): + print(ev) + + +# +# use of SIMPLE search mode when using keywords +# + +# find events that at least some of the specified keywords and phrases and that belong to the AI category +q = { + "$query": { + "keyword": "AI \\\"deep learning\\\" \\\"machine learning\\\" latest developments", + "keywordSearchMode": "simple", + "categoryUri": "dmoz/Computers/Artificial_Intelligence" + } +} +iter = QueryEventsIter.initWithComplexQuery(q) +for ev in iter.execQuery(er, sortBy = "rel", maxItems = MAX_RESULTS): + print(ev) + +# the same query, but without using the complex query language +iter = QueryEventsIter(keywords = "AI \\\"deep learning\\\" \\\"machine learning\\\" latest developments", keywordSearchMode="simple") +for ev in iter.execQuery(er, sortBy = "rel", maxItems = MAX_RESULTS): + print(ev) + + +# +# use of PHRASE search mode when using keywords +# phrase search mode is used by default, so in this case, you don't even need to specify the "keywordSearchMode" parameter +# + +# search for events that mention the phrase "Apple iPhone" or "Microsoft Store" +qStr = { + "$query": { + "$or": [ + { "keyword": "Apple iPhone" }, + { "keyword": "Microsoft Store" } + ] + } +} +q = QueryEventsIter.initWithComplexQuery(qStr) diff --git a/eventregistry/examples/QueryMentions.py b/eventregistry/examples/QueryMentions.py index c35685d..f2a10d2 100644 --- a/eventregistry/examples/QueryMentions.py +++ b/eventregistry/examples/QueryMentions.py @@ -3,7 +3,7 @@ """ from eventregistry import * -er = EventRegistry(allowUseOfArchive=False) +er = EventRegistry() # # Find mentions of stock price change event types related to companies in the transportation industry that occured in April 2023. diff --git a/eventregistry/examples/TopicPagesExamples.py b/eventregistry/examples/TopicPagesExamples.py index 98d1fe6..e577ecb 100644 --- a/eventregistry/examples/TopicPagesExamples.py +++ b/eventregistry/examples/TopicPagesExamples.py @@ -1,6 +1,6 @@ from eventregistry import * -er = EventRegistry(host = "http://eventregistry.org") +er = EventRegistry() def getMyTopicPages(): """ diff --git a/eventregistry/tests/TestERTrends.py b/eventregistry/tests/TestERTrends.py new file mode 100644 index 0000000..1e56069 --- /dev/null +++ b/eventregistry/tests/TestERTrends.py @@ -0,0 +1,47 @@ +import unittest, math +from eventregistry import * +from eventregistry.tests.DataValidator import DataValidator + + +class TestERTrends(DataValidator): + + def testGetTrendingConcepts(self): + q = GetTrendingConcepts(source = "news", count = 10) + ret = self.er.execQuery(q) + self.assertTrue(isinstance(ret, list)) + self.assertTrue(len(ret) == 10) + for item in ret: + self.assertTrue("uri" in item) + self.assertTrue("label" in item) + self.assertTrue("trendingScore" in item) + + + def testGetTrendingConceptGroups(self): + q = GetTrendingConceptGroups(source = "news", count = 10) + q.getConceptTypeGroups(["person", "org"]) + ret = self.er.execQuery(q) + self.assertTrue(isinstance(ret, dict)) + self.assertTrue("person" in ret) + self.assertTrue("org" in ret) + for name in ["person", "org"]: + arr = ret[name].get("trendingConcepts") + self.assertTrue(len(arr) == 10) + for item in arr: + self.assertTrue("uri" in item) + self.assertTrue("label" in item) + self.assertTrue("trendingScore" in item) + + + def testGetTrendingCategories(self): + q = GetTrendingCategories(source = "news", count = 10) + ret = self.er.execQuery(q) + self.assertTrue(isinstance(ret, list)) + self.assertTrue(len(ret) == 10) + for item in ret: + self.assertTrue("uri" in item) + self.assertTrue("label" in item) + self.assertTrue("trendingScore" in item) + +if __name__ == "__main__": + suite = unittest.TestLoader().loadTestsFromTestCase(TestERTrends) + unittest.TextTestRunner(verbosity=3).run(suite) diff --git a/eventregistry/tests/TestQueryParamsBase.py b/eventregistry/tests/TestQueryParamsBase.py new file mode 100644 index 0000000..ec5a12d --- /dev/null +++ b/eventregistry/tests/TestQueryParamsBase.py @@ -0,0 +1,18 @@ +import unittest, math +from eventregistry import * +from eventregistry.tests.DataValidator import DataValidator + + +class TestQueryParamsBase(DataValidator): + + def testDateConversion(self): + self.assertEqual("2015-01-01", QueryParamsBase.encodeDate(datetime.datetime(2015, 1, 1, 12, 0, 0))) + self.assertEqual("2015-01-01", QueryParamsBase.encodeDate(datetime.date(2015, 1, 1))) + self.assertEqual("2015-01-01", QueryParamsBase.encodeDate("2015-01-01")) + + self.assertEqual("2015-01-01T12:00:00", QueryParamsBase.encodeDateTime(datetime.datetime(2015, 1, 1, 12, 0, 0))) + self.assertEqual("2015-01-01T12:00:00.123", QueryParamsBase.encodeDateTime("2015-01-01T12:00:00.123")) + +if __name__ == "__main__": + suite = unittest.TestLoader().loadTestsFromTestCase(TestQueryParamsBase) + unittest.TextTestRunner(verbosity=3).run(suite)