bot.py

import os
import tweepy
import logging
import re
from sentence_splitter import SentenceSplitter

# Config
MAX_CHAR_FOR_TWEET = 240
BOTNAME = "@ConstAssembly"

logging.basicConfig(filename="bot.log", level=logging.DEBUG)
splitter = SentenceSplitter(language="en")


def create_api():
    consumer_key = os.getenv("CONSUMER_KEY")
    consumer_secret = os.getenv("CONSUMER_SECRET")
    access_token = os.getenv("ACCESS_TOKEN")
    access_token_secret = os.getenv("ACCESS_SECRET")

    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
    try:
        api.verify_credentials()
    except Exception as e:
        logging.error("Error creating API", exc_info=True)
        raise e
    logging.info("API created")
    return api


# -*- coding: utf-8 -*-
alphabets = "([A-Za-z])"
prefixes = "(Mr|St|Mrs|Ms|Dr|No)[.]"
suffixes = "(Inc|Ltd|Jr|Sr|Co)"
starters = "(Mr|Mrs|Ms|Dr|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)"
acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)"
websites = "[.](com|net|org|io|gov)"


def split_text(tweet_text):
    # Split words at spaces, ignore newlines
    words = tweet_text.split(" ")
    tweet = words[0]  # Assign First word
    for word in words[1:]:
        if len(tweet) + len(word) + 1 > MAX_CHAR_FOR_TWEET:
            # current line + word + 1 space exceeds single tweet
            # return current line
            yield tweet.strip()
            tweet = word  # Start ext tweet
        else:
            # There's more room left, add next word!
            tweet = tweet + " " + word
    yield tweet.strip()


def run():
    api = create_api()

    # read all lines
    f_data = open("data.txt")
    lines = f_data.readlines()
    f_data.close()

    # read last line
    f = open("last_line.txt")
    last_line_data = f.readlines()
    f.close()

    # get line to tweet
    line_to_tweet = int(last_line_data[0])
    line_char = int(last_line_data[1])

    pending_tweet = ""

    # check for ignorable lines
    while True:
        line = lines[line_to_tweet]
        if line == "\n":
            line_to_tweet += 1
        elif line == "":
            line_to_tweet += 1
        else:
            break

    # if it matches with 1.1.1 format, skip line, add name, save to file
    if re.match("^\d*\.\d*\.\d*", line):
        line_to_tweet += 1
        pending_tweet = lines[line_to_tweet]
        line_to_tweet += 1
        of = open("last_line.txt", "w")
        of.writelines([str(line_to_tweet), "\n", str(line_char)])
        of.close()

    # check if it is a sentence
    sentences = splitter.split(text=lines[line_to_tweet][line_char:])

    if len(sentences) > 0:
        first_sentence = sentences[0]

        if len(first_sentence) != len(lines[line_to_tweet]):
            line_char = lines[line_to_tweet].find(first_sentence) + len(first_sentence)

        pending_tweet += first_sentence

        if len(sentences) == 1:
            line_char = 0
            line_to_tweet += 1
    else:
        pending_tweet += lines[line_to_tweet]
        line_to_tweet += 1
        line_char = 0

    # create tweet
    logging.info("sending tweet: ===\n" + pending_tweet + "\n===")

    prev_status = None
    for out_tweet in split_text(pending_tweet):
        try:
            if prev_status == None:
                prev_status = api.update_status(status=out_tweet)
            else:
                prev_status = api.update_status(
                    status=out_tweet, in_reply_to_status_id=prev_status.id
                )
        except tweepy.TweepError as error:
            if error.api_code == 187:
                # Do something special
                api.update_status(status=out_tweet + ".")
            else:
                raise error

    of = open("last_line.txt", "w")
    of.writelines([str(line_to_tweet), "\n", str(line_char)])
    of.close()


if __name__ == "__main__":
    run()