docs/mcat/commentAnalysis.html

<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, minimum-scale=1" />
<meta name="generator" content="pdoc 0.10.0" />
<title>mcat.commentAnalysis API documentation</title>
<meta name="description" content="" />
<link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/sanitize.min.css" integrity="sha256-PK9q560IAAa6WVRRh76LtCaI8pjTJ2z11v0miyNNjrs=" crossorigin>
<link rel="preload stylesheet" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/10up-sanitize.css/11.0.1/typography.min.css" integrity="sha256-7l/o7C8jubJiy74VsKTidCy1yBkRtiUGbVkYBylBqUg=" crossorigin>
<link rel="stylesheet preload" as="style" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/styles/github.min.css" crossorigin>
<style>:root{--highlight-color:#fe9}.flex{display:flex !important}body{line-height:1.5em}#content{padding:20px}#sidebar{padding:30px;overflow:hidden}#sidebar > *:last-child{margin-bottom:2cm}.http-server-breadcrumbs{font-size:130%;margin:0 0 15px 0}#footer{font-size:.75em;padding:5px 30px;border-top:1px solid #ddd;text-align:right}#footer p{margin:0 0 0 1em;display:inline-block}#footer p:last-child{margin-right:30px}h1,h2,h3,h4,h5{font-weight:300}h1{font-size:2.5em;line-height:1.1em}h2{font-size:1.75em;margin:1em 0 .50em 0}h3{font-size:1.4em;margin:25px 0 10px 0}h4{margin:0;font-size:105%}h1:target,h2:target,h3:target,h4:target,h5:target,h6:target{background:var(--highlight-color);padding:.2em 0}a{color:#058;text-decoration:none;transition:color .3s ease-in-out}a:hover{color:#e82}.title code{font-weight:bold}h2[id^="header-"]{margin-top:2em}.ident{color:#900}pre code{background:#f8f8f8;font-size:.8em;line-height:1.4em}code{background:#f2f2f1;padding:1px 4px;overflow-wrap:break-word}h1 code{background:transparent}pre{background:#f8f8f8;border:0;border-top:1px solid #ccc;border-bottom:1px solid #ccc;margin:1em 0;padding:1ex}#http-server-module-list{display:flex;flex-flow:column}#http-server-module-list div{display:flex}#http-server-module-list dt{min-width:10%}#http-server-module-list p{margin-top:0}.toc ul,#index{list-style-type:none;margin:0;padding:0}#index code{background:transparent}#index h3{border-bottom:1px solid #ddd}#index ul{padding:0}#index h4{margin-top:.6em;font-weight:bold}@media (min-width:200ex){#index .two-column{column-count:2}}@media (min-width:300ex){#index .two-column{column-count:3}}dl{margin-bottom:2em}dl dl:last-child{margin-bottom:4em}dd{margin:0 0 1em 3em}#header-classes + dl > dd{margin-bottom:3em}dd dd{margin-left:2em}dd p{margin:10px 0}.name{background:#eee;font-weight:bold;font-size:.85em;padding:5px 10px;display:inline-block;min-width:40%}.name:hover{background:#e0e0e0}dt:target .name{background:var(--highlight-color)}.name > span:first-child{white-space:nowrap}.name.class > span:nth-child(2){margin-left:.4em}.inherited{color:#999;border-left:5px solid #eee;padding-left:1em}.inheritance em{font-style:normal;font-weight:bold}.desc h2{font-weight:400;font-size:1.25em}.desc h3{font-size:1em}.desc dt code{background:inherit}.source summary,.git-link-div{color:#666;text-align:right;font-weight:400;font-size:.8em;text-transform:uppercase}.source summary > *{white-space:nowrap;cursor:pointer}.git-link{color:inherit;margin-left:1em}.source pre{max-height:500px;overflow:auto;margin:0}.source pre code{font-size:12px;overflow:visible}.hlist{list-style:none}.hlist li{display:inline}.hlist li:after{content:',\2002'}.hlist li:last-child:after{content:none}.hlist .hlist{display:inline;padding-left:1em}img{max-width:100%}td{padding:0 .5em}.admonition{padding:.1em .5em;margin-bottom:1em}.admonition-title{font-weight:bold}.admonition.note,.admonition.info,.admonition.important{background:#aef}.admonition.todo,.admonition.versionadded,.admonition.tip,.admonition.hint{background:#dfd}.admonition.warning,.admonition.versionchanged,.admonition.deprecated{background:#fd4}.admonition.error,.admonition.danger,.admonition.caution{background:lightpink}</style>
<style media="screen and (min-width: 700px)">@media screen and (min-width:700px){#sidebar{width:30%;height:100vh;overflow:auto;position:sticky;top:0}#content{width:70%;max-width:100ch;padding:3em 4em;border-left:1px solid #ddd}pre code{font-size:1em}.item .name{font-size:1em}main{display:flex;flex-direction:row-reverse;justify-content:flex-end}.toc ul ul,#index ul{padding-left:1.5em}.toc > ul > li{margin-top:.5em}}</style>
<style media="print">@media print{#sidebar h1{page-break-before:always}.source{display:none}}@media print{*{background:transparent !important;color:#000 !important;box-shadow:none !important;text-shadow:none !important}a[href]:after{content:" (" attr(href) ")";font-size:90%}a[href][title]:after{content:none}abbr[title]:after{content:" (" attr(title) ")"}.ir a:after,a[href^="javascript:"]:after,a[href^="#"]:after{content:""}pre,blockquote{border:1px solid #999;page-break-inside:avoid}thead{display:table-header-group}tr,img{page-break-inside:avoid}img{max-width:100% !important}@page{margin:0.5cm}p,h2,h3{orphans:3;widows:3}h1,h2,h3,h4,h5,h6{page-break-after:avoid}}</style>
<script defer src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/10.1.1/highlight.min.js" integrity="sha256-Uv3H6lx7dJmRfRvH8TH6kJD1TSK1aFcwgx+mdg3epi8=" crossorigin></script>
<script>window.addEventListener('DOMContentLoaded', () => hljs.initHighlighting())</script>
</head>
<body>
<main>
<article id="content">
<header>
<h1 class="title">Module <code>mcat.commentAnalysis</code></h1>
</header>
<section id="section-intro">
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python"># Copyright 2021 VMware, Inc.
# SPDX-License-Identifier: Apache-2.0

import argparse

import nltk
from nltk.sentiment import vader

nltk.download(&#39;vader_lexicon&#39;)  # Model download


class CommentAnalyzer:
    def __init__(self, words):
        &#34;&#34;&#34;
        Constructors form a dictionary to be used for counting.
        Parameters: words - list of words to count
        &#34;&#34;&#34;
        self.word_count = {word.lower(): 0 for word in words}  # Create dictionary with list items as key
        self.vader_sentiment = vader.SentimentIntensityAnalyzer()  # Initialize sentiment analysis model

    def analyzeComment(self, comment):
        &#34;&#34;&#34;
        Method to get desired features from an input comment.
        Parameters: comment - string.
        Returns: dictionary with features
        &#34;&#34;&#34;
        result = {}  # Create return dictionary
        cleaned_comment = self.preProcess(comment)  # Clean comment text
        # result[&#39;Word Counts&#39;] = self.countWords(cleaned_comment)  # Determine word counts
        result.update(self.countWords(cleaned_comment))
        result[&#39;Sentiment&#39;] = self.getSentiment(comment)  # Determine sentiment
        result[&#39;Code Blocks&#39;] = self.getCodeBlockCount(cleaned_comment)  # Determine code block count
        return result

    def preProcess(self, text):
        &#34;&#34;&#34;
        Method to clean and return text.
        Parameters: text - string.
        Returns: string after cleaning
        &#34;&#34;&#34;
        if not isinstance(text, str):
            return &#34;&#34;
        cleaned_text = text.strip()  # Remove trailing and starting spaces
        cleaned_text = cleaned_text.lower()  # Convert to lowercase
        return cleaned_text

    def countWords(self, comment):
        &#34;&#34;&#34;
        Method to determine word count.
        Parameters: comment - string
        Returns: dictionary with word counts
        &#34;&#34;&#34;
        words = comment.split(&#34; &#34;)  # Split text into words
        current_word_count = self.word_count.copy()  # Copy default dict for new count
        for word in words:  # Iterate over all words
            if word in self.word_count:
                current_word_count[word] = current_word_count[word] + 1
        return current_word_count

    def getCodeBlockCount(self, comment):
        &#34;&#34;&#34;
        Method to determine the code blocks.
        Parameters: comment - string
        Returns: integer count
        &#34;&#34;&#34;
        count = comment.count(&#34;```&#34;)  # Find occurences of code block
        if count % 2 != 0:  # Should be in pairs
            print(&#34;Warning: Mismatched code blocks&#34;)
            return int(count / 2 - 1)  # Subtract 1 since unmatched pair
        return int(count / 2)  # Divide by 2 since pairs

    def getSentiment(self, comment):
        &#34;&#34;&#34;
        Method to determine sentiment. Parameters: comment - string
        Returns: dictionary with positive, negative and neutral scores
        &#34;&#34;&#34;
        return self.vader_sentiment.polarity_scores(comment)[&#34;compound&#34;]

    def changeWords(self, words):
        &#34;&#34;&#34;
        Method to change words to count. Parameters: Set new word count with new keys/
        &#34;&#34;&#34;
        self.word_count = {word: 0 for word in words}


if __name__ == &#34;__main__&#34;:
    parser = argparse.ArgumentParser(description=&#39;Analyze input text segment.&#39;)
    parser.add_argument(&#39;text&#39;, help=&#39;Text to analyze&#39;)
    parser.add_argument(&#39;-w&#39;, &#39;--words&#39;, required=False, help=&#39;File containing words to count&#39;)

    args = parser.parse_args()
    # Form word list through the file
    word_list = []
    if args.words:
        with open(args.words, &#39;r&#39;) as word_file:
            word_list = word_file.read().replace(&#34; &#34;, &#34;&#34;).strip().split(&#34;,&#34;)
    analyzer = CommentAnalyzer(word_list)
    print(analyzer.analyzeComment(args.text))</code></pre>
</details>
</section>
<section>
</section>
<section>
</section>
<section>
</section>
<section>
<h2 class="section-title" id="header-classes">Classes</h2>
<dl>
<dt id="mcat.commentAnalysis.CommentAnalyzer"><code class="flex name class">
<span>class <span class="ident">CommentAnalyzer</span></span>
<span>(</span><span>words)</span>
</code></dt>
<dd>
<div class="desc"><p>Constructors form a dictionary to be used for counting.
Parameters: words - list of words to count</p></div>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">class CommentAnalyzer:
    def __init__(self, words):
        &#34;&#34;&#34;
        Constructors form a dictionary to be used for counting.
        Parameters: words - list of words to count
        &#34;&#34;&#34;
        self.word_count = {word.lower(): 0 for word in words}  # Create dictionary with list items as key
        self.vader_sentiment = vader.SentimentIntensityAnalyzer()  # Initialize sentiment analysis model

    def analyzeComment(self, comment):
        &#34;&#34;&#34;
        Method to get desired features from an input comment.
        Parameters: comment - string.
        Returns: dictionary with features
        &#34;&#34;&#34;
        result = {}  # Create return dictionary
        cleaned_comment = self.preProcess(comment)  # Clean comment text
        # result[&#39;Word Counts&#39;] = self.countWords(cleaned_comment)  # Determine word counts
        result.update(self.countWords(cleaned_comment))
        result[&#39;Sentiment&#39;] = self.getSentiment(comment)  # Determine sentiment
        result[&#39;Code Blocks&#39;] = self.getCodeBlockCount(cleaned_comment)  # Determine code block count
        return result

    def preProcess(self, text):
        &#34;&#34;&#34;
        Method to clean and return text.
        Parameters: text - string.
        Returns: string after cleaning
        &#34;&#34;&#34;
        if not isinstance(text, str):
            return &#34;&#34;
        cleaned_text = text.strip()  # Remove trailing and starting spaces
        cleaned_text = cleaned_text.lower()  # Convert to lowercase
        return cleaned_text

    def countWords(self, comment):
        &#34;&#34;&#34;
        Method to determine word count.
        Parameters: comment - string
        Returns: dictionary with word counts
        &#34;&#34;&#34;
        words = comment.split(&#34; &#34;)  # Split text into words
        current_word_count = self.word_count.copy()  # Copy default dict for new count
        for word in words:  # Iterate over all words
            if word in self.word_count:
                current_word_count[word] = current_word_count[word] + 1
        return current_word_count

    def getCodeBlockCount(self, comment):
        &#34;&#34;&#34;
        Method to determine the code blocks.
        Parameters: comment - string
        Returns: integer count
        &#34;&#34;&#34;
        count = comment.count(&#34;```&#34;)  # Find occurences of code block
        if count % 2 != 0:  # Should be in pairs
            print(&#34;Warning: Mismatched code blocks&#34;)
            return int(count / 2 - 1)  # Subtract 1 since unmatched pair
        return int(count / 2)  # Divide by 2 since pairs

    def getSentiment(self, comment):
        &#34;&#34;&#34;
        Method to determine sentiment. Parameters: comment - string
        Returns: dictionary with positive, negative and neutral scores
        &#34;&#34;&#34;
        return self.vader_sentiment.polarity_scores(comment)[&#34;compound&#34;]

    def changeWords(self, words):
        &#34;&#34;&#34;
        Method to change words to count. Parameters: Set new word count with new keys/
        &#34;&#34;&#34;
        self.word_count = {word: 0 for word in words}</code></pre>
</details>
<h3>Methods</h3>
<dl>
<dt id="mcat.commentAnalysis.CommentAnalyzer.analyzeComment"><code class="name flex">
<span>def <span class="ident">analyzeComment</span></span>(<span>self, comment)</span>
</code></dt>
<dd>
<div class="desc"><p>Method to get desired features from an input comment.
Parameters: comment - string.
Returns: dictionary with features</p></div>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def analyzeComment(self, comment):
    &#34;&#34;&#34;
    Method to get desired features from an input comment.
    Parameters: comment - string.
    Returns: dictionary with features
    &#34;&#34;&#34;
    result = {}  # Create return dictionary
    cleaned_comment = self.preProcess(comment)  # Clean comment text
    # result[&#39;Word Counts&#39;] = self.countWords(cleaned_comment)  # Determine word counts
    result.update(self.countWords(cleaned_comment))
    result[&#39;Sentiment&#39;] = self.getSentiment(comment)  # Determine sentiment
    result[&#39;Code Blocks&#39;] = self.getCodeBlockCount(cleaned_comment)  # Determine code block count
    return result</code></pre>
</details>
</dd>
<dt id="mcat.commentAnalysis.CommentAnalyzer.changeWords"><code class="name flex">
<span>def <span class="ident">changeWords</span></span>(<span>self, words)</span>
</code></dt>
<dd>
<div class="desc"><p>Method to change words to count. Parameters: Set new word count with new keys/</p></div>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def changeWords(self, words):
    &#34;&#34;&#34;
    Method to change words to count. Parameters: Set new word count with new keys/
    &#34;&#34;&#34;
    self.word_count = {word: 0 for word in words}</code></pre>
</details>
</dd>
<dt id="mcat.commentAnalysis.CommentAnalyzer.countWords"><code class="name flex">
<span>def <span class="ident">countWords</span></span>(<span>self, comment)</span>
</code></dt>
<dd>
<div class="desc"><p>Method to determine word count.
Parameters: comment - string
Returns: dictionary with word counts</p></div>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def countWords(self, comment):
    &#34;&#34;&#34;
    Method to determine word count.
    Parameters: comment - string
    Returns: dictionary with word counts
    &#34;&#34;&#34;
    words = comment.split(&#34; &#34;)  # Split text into words
    current_word_count = self.word_count.copy()  # Copy default dict for new count
    for word in words:  # Iterate over all words
        if word in self.word_count:
            current_word_count[word] = current_word_count[word] + 1
    return current_word_count</code></pre>
</details>
</dd>
<dt id="mcat.commentAnalysis.CommentAnalyzer.getCodeBlockCount"><code class="name flex">
<span>def <span class="ident">getCodeBlockCount</span></span>(<span>self, comment)</span>
</code></dt>
<dd>
<div class="desc"><p>Method to determine the code blocks.
Parameters: comment - string
Returns: integer count</p></div>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def getCodeBlockCount(self, comment):
    &#34;&#34;&#34;
    Method to determine the code blocks.
    Parameters: comment - string
    Returns: integer count
    &#34;&#34;&#34;
    count = comment.count(&#34;```&#34;)  # Find occurences of code block
    if count % 2 != 0:  # Should be in pairs
        print(&#34;Warning: Mismatched code blocks&#34;)
        return int(count / 2 - 1)  # Subtract 1 since unmatched pair
    return int(count / 2)  # Divide by 2 since pairs</code></pre>
</details>
</dd>
<dt id="mcat.commentAnalysis.CommentAnalyzer.getSentiment"><code class="name flex">
<span>def <span class="ident">getSentiment</span></span>(<span>self, comment)</span>
</code></dt>
<dd>
<div class="desc"><p>Method to determine sentiment. Parameters: comment - string
Returns: dictionary with positive, negative and neutral scores</p></div>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def getSentiment(self, comment):
    &#34;&#34;&#34;
    Method to determine sentiment. Parameters: comment - string
    Returns: dictionary with positive, negative and neutral scores
    &#34;&#34;&#34;
    return self.vader_sentiment.polarity_scores(comment)[&#34;compound&#34;]</code></pre>
</details>
</dd>
<dt id="mcat.commentAnalysis.CommentAnalyzer.preProcess"><code class="name flex">
<span>def <span class="ident">preProcess</span></span>(<span>self, text)</span>
</code></dt>
<dd>
<div class="desc"><p>Method to clean and return text.
Parameters: text - string.
Returns: string after cleaning</p></div>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def preProcess(self, text):
    &#34;&#34;&#34;
    Method to clean and return text.
    Parameters: text - string.
    Returns: string after cleaning
    &#34;&#34;&#34;
    if not isinstance(text, str):
        return &#34;&#34;
    cleaned_text = text.strip()  # Remove trailing and starting spaces
    cleaned_text = cleaned_text.lower()  # Convert to lowercase
    return cleaned_text</code></pre>
</details>
</dd>
</dl>
</dd>
</dl>
</section>
</article>
<nav id="sidebar">
<h1>Index</h1>
<div class="toc">
<ul></ul>
</div>
<ul id="index">
<li><h3>Super-module</h3>
<ul>
<li><code><a title="mcat" href="index.html">mcat</a></code></li>
</ul>
</li>
<li><h3><a href="#header-classes">Classes</a></h3>
<ul>
<li>
<h4><code><a title="mcat.commentAnalysis.CommentAnalyzer" href="#mcat.commentAnalysis.CommentAnalyzer">CommentAnalyzer</a></code></h4>
<ul class="two-column">
<li><code><a title="mcat.commentAnalysis.CommentAnalyzer.analyzeComment" href="#mcat.commentAnalysis.CommentAnalyzer.analyzeComment">analyzeComment</a></code></li>
<li><code><a title="mcat.commentAnalysis.CommentAnalyzer.changeWords" href="#mcat.commentAnalysis.CommentAnalyzer.changeWords">changeWords</a></code></li>
<li><code><a title="mcat.commentAnalysis.CommentAnalyzer.countWords" href="#mcat.commentAnalysis.CommentAnalyzer.countWords">countWords</a></code></li>
<li><code><a title="mcat.commentAnalysis.CommentAnalyzer.getCodeBlockCount" href="#mcat.commentAnalysis.CommentAnalyzer.getCodeBlockCount">getCodeBlockCount</a></code></li>
<li><code><a title="mcat.commentAnalysis.CommentAnalyzer.getSentiment" href="#mcat.commentAnalysis.CommentAnalyzer.getSentiment">getSentiment</a></code></li>
<li><code><a title="mcat.commentAnalysis.CommentAnalyzer.preProcess" href="#mcat.commentAnalysis.CommentAnalyzer.preProcess">preProcess</a></code></li>
</ul>
</li>
</ul>
</li>
</ul>
</nav>
</main>
<footer id="footer">
<p>Generated by <a href="https://pdoc3.github.io/pdoc" title="pdoc: Python API documentation generator"><cite>pdoc</cite> 0.10.0</a>.</p>
</footer>
</body>
</html>