Skip to content

Commit

Permalink
Merge branch 'test'
Browse files Browse the repository at this point in the history
  • Loading branch information
shahules786 committed Oct 21, 2020
2 parents 4307a77 + 1edddce commit ca37693
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 44 deletions.
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os
import setuptools

current_dir = os.path.abspath(os.path.dirname(__file__))
current_dir = os.path.dirname(os.path.abspath("__file__"))


## requirements
with open(os.path.join(current_dir, "README.md"), "r") as fh:
Expand Down Expand Up @@ -31,4 +32,5 @@
],
python_requires=">=3.6",
install_requires=required,
include_package_data=True,
)
21 changes: 11 additions & 10 deletions twittersentiment/TwitterSentiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
from sklearn.model_selection import train_test_split


from twittersentiment.utils.model import TwitterModel as Model
from twittersentiment.utils.model import TwitterModel
from twittersentiment.utils.model import TweetModel
from twittersentiment.utils.preprocessing import Preprocess
from twittersentiment.utils.data import TweetDataset

Expand All @@ -35,19 +34,21 @@ def __init__(self):

self.models = {
"twitter-en": model(
url="https://github.com/shahules786/ML_EXP/releases/download/sample/best_model.pt",
model=Model,
url="/home/shahul/Downloads/classifier(1).pt",
model=TweetModel,
)
}

self.tokenizer_path = os.path.split(os.__file__)[0]
self.tokenizer_path = os.path.join(
os.path.dirname(os.path.abspath("__file__")), "twittersentiment", "utils", "tokenizer.pickle"
)

def load_pretrained(self, model_name="twitter-en"):

state_dict = torch.hub.load_state_dict_from_url(
self.models[model_name].url, progress=True, map_location=DEVICE
)
# state_dict = torch.load("/home/shahul/Downloads/classifier.pt", map_location=DEVICE)
# state_dict = torch.hub.load_state_dict_from_url(
# self.models[model_name].url, progress=True, map_location=DEVICE
# )
state_dict = torch.load("/home/shahul/Downloads/classifier (1).pt", map_location=DEVICE)
self.model = self.models[model_name].model(state_dict["embedding.weight"].numpy())
self.model.load_state_dict(state_dict)

Expand Down Expand Up @@ -89,7 +90,7 @@ def train(self, X, y, path, epochs=5, lr=1e-3, batch_size=32, test_size=0.15, na
"valid": DataLoader(test_data, batch_size=batch_size, shuffle=False),
}

model = TwitterModel(embedding_matrix).to(DEVICE)
model = TweetModel(embedding_matrix).to(DEVICE)
loss_fn = torch.nn.BCELoss().cuda()
optimizer = torch.optim.Adam(model.parameters())

Expand Down
49 changes: 17 additions & 32 deletions twittersentiment/utils/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,46 +3,31 @@
import torch.nn.functional as F


class TwitterModel(nn.Module):
"""
Model Architecture
"""
class TweetModel(nn.Module):
def __init__(self, embedding_matrix, lstm_hidden_size=200, gru_hidden_size=128):

def __init__(self, embedding_matrix, lstm_hidden_size=128, gru_hidden_size=64):

super(TwitterModel, self).__init__()
super(TweetModel, self).__init__()
self.embedding = nn.Embedding(*embedding_matrix.shape)
self.embedding.weight = nn.Parameter(torch.tensor(embedding_matrix, dtype=torch.float32))
self.embedding.weight.requires_grad = False
self.embedding_dropout = nn.Dropout2d(0.2)

self.lstm = nn.LSTM(embedding_matrix.shape[1], lstm_hidden_size, bidirectional=True, batch_first=True)
self.lstm2 = nn.LSTM(lstm_hidden_size * 2, gru_hidden_size, bidirectional=True, batch_first=True)

self.Linear1 = nn.Linear(gru_hidden_size * 4, 1)

def apply_spatial_dropout(self, h_embedding):
self.embedding.weight.requires_grad = True
self.embedding_dropout = nn.Dropout2d(0.1)

h_embedding = h_embedding.transpose(1, 2).unsqueeze(2)
h_embedding = self.embedding_dropout(h_embedding).squeeze(2).transpose(1, 2)
return h_embedding
self.gru = nn.GRU(
embedding_matrix.shape[1], gru_hidden_size, num_layers=1, bidirectional=True, batch_first=True
)

def flatten_parameters(self):

self.lstm.flatten_parameters()
self.lstm2.flatten_parameters()
self.dropout2 = nn.Dropout(0.25)
self.Linear1 = nn.Linear(gru_hidden_size * 5, 16)
self.Linear2 = nn.Linear(16, 1)

def forward(self, x):

h_embedding = self.embedding(x)
h_embedding = self.apply_spatial_dropout(h_embedding)

h_lstm, _ = self.lstm(h_embedding)
h_lstm, _ = self.lstm2(h_lstm)

avg_pool = torch.mean(h_lstm, 1)
max_pool, _ = torch.max(h_lstm, 1)
concat = torch.cat((avg_pool, max_pool), 1)
x, (x_h, x_c) = self.gru(h_embedding)

out = torch.sigmoid(self.Linear1(concat))
avg_pool = torch.mean(x, 1)
max_pool, _ = torch.max(x, 1)
concat = torch.cat((avg_pool, x_h, max_pool), 1)
concat = self.Linear1(concat)
out = torch.sigmoid(self.Linear2(concat))
return out
3 changes: 2 additions & 1 deletion twittersentiment/utils/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ def tokenizer(corpus, path, mode="train"):
"""

path = os.path.join(path, "new_tokenizer.pickle")
if not path.endswith(".pickle"):
path = os.path.join(path, "tokenizer.pickle")

if mode == "train":
tokenizer_obj = Tokenizer()
Expand Down
Binary file added twittersentiment/utils/tokenizer.pickle
Binary file not shown.

0 comments on commit ca37693

Please sign in to comment.