Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pyhton3.4 compatibility #6

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 10 additions & 1 deletion README.md
Expand Up @@ -2,7 +2,7 @@ Readability
====================

A collection of functions that measure the readability of a given body of text. I'd
recommend checking out the wikipedia articles below--most of the metrics estimate
recommend checking out the wikipedia articles below--most of the metrics estimate
the grade level required to comprehend a given block of text and may return odd results
on small snippets of text.

Expand Down Expand Up @@ -43,3 +43,12 @@ Largely lifted from:
https://github.com/nltk/nltk_contrib/tree/master/nltk_contrib/readability

SMOG index appears to perform most accurately.

#Usage

```
from readability.readability import Readability
<rest as usual>

#caveats
The cloned library should be in a folder titled 'readability' at the root directory
Empty file added __init__.py
Empty file.
65 changes: 32 additions & 33 deletions readability.py
Expand Up @@ -2,11 +2,11 @@

import math

from utils import get_char_count
from utils import get_words
from utils import get_sentences
from utils import count_syllables
from utils import count_complex_words
from .utils import get_char_count
from .utils import get_words
from .utils import get_sentences
from .utils import count_syllables
from .utils import count_complex_words


class Readability:
Expand All @@ -23,7 +23,7 @@ def analyze_text(self, text):
syllable_count = count_syllables(words)
complexwords_count = count_complex_words(text)
avg_words_p_sentence = word_count/sentence_count

self.analyzedVars = {
'words': words,
'char_cnt': float(char_count),
Expand All @@ -35,44 +35,44 @@ def analyze_text(self, text):
}

def ARI(self):
score = 0.0
score = 0.0
if self.analyzedVars['word_cnt'] > 0.0:
score = 4.71 * (self.analyzedVars['char_cnt'] / self.analyzedVars['word_cnt']) + 0.5 * (self.analyzedVars['word_cnt'] / self.analyzedVars['sentence_cnt']) - 21.43
return score

def FleschReadingEase(self):
score = 0.0
score = 0.0
if self.analyzedVars['word_cnt'] > 0.0:
score = 206.835 - (1.015 * (self.analyzedVars['avg_words_p_sentence'])) - (84.6 * (self.analyzedVars['syllable_cnt']/ self.analyzedVars['word_cnt']))
return round(score, 4)

def FleschKincaidGradeLevel(self):
score = 0.0
score = 0.0
if self.analyzedVars['word_cnt'] > 0.0:
score = 0.39 * (self.analyzedVars['avg_words_p_sentence']) + 11.8 * (self.analyzedVars['syllable_cnt']/ self.analyzedVars['word_cnt']) - 15.59
return round(score, 4)

def GunningFogIndex(self):
score = 0.0
score = 0.0
if self.analyzedVars['word_cnt'] > 0.0:
score = 0.4 * ((self.analyzedVars['avg_words_p_sentence']) + (100 * (self.analyzedVars['complex_word_cnt']/self.analyzedVars['word_cnt'])))
return round(score, 4)

def SMOGIndex(self):
score = 0.0
score = 0.0
if self.analyzedVars['word_cnt'] > 0.0:
score = (math.sqrt(self.analyzedVars['complex_word_cnt']*(30/self.analyzedVars['sentence_cnt'])) + 3)
return score

def ColemanLiauIndex(self):
score = 0.0
score = 0.0
if self.analyzedVars['word_cnt'] > 0.0:
score = (5.89*(self.analyzedVars['char_cnt']/self.analyzedVars['word_cnt']))-(30*(self.analyzedVars['sentence_cnt']/self.analyzedVars['word_cnt']))-15.8
return round(score, 4)

def LIX(self):
longwords = 0.0
score = 0.0
score = 0.0
if self.analyzedVars['word_cnt'] > 0.0:
for word in self.analyzedVars['words']:
if len(word) >= 7:
Expand All @@ -82,27 +82,26 @@ def LIX(self):

def RIX(self):
longwords = 0.0
score = 0.0
score = 0.0
if self.analyzedVars['word_cnt'] > 0.0:
for word in self.analyzedVars['words']:
if len(word) >= 7:
longwords += 1.0
score = longwords / self.analyzedVars['sentence_cnt']
return score


if __name__ == "__main__":
text = """We are close to wrapping up our 10 week Rails Course. This week we will cover a handful of topics commonly encountered in Rails projects. We then wrap up with part 2 of our Reddit on Rails exercise! By now you should be hard at work on your personal projects. The students in the course just presented in front of the class with some live demos and a brief intro to to the problems their app were solving. Maybe set aside some time this week to show someone your progress, block off 5 minutes and describe what goal you are working towards, the current state of the project (is it almost done, just getting started, needs UI, etc.), and then show them a quick demo of the app. Explain what type of feedback you are looking for (conceptual, design, usability, etc.) and see what they have to say. As we are wrapping up the course you need to be focused on learning as much as you can, but also making sure you have the tools to succeed after the class is over."""

rd = Readability(text)
print 'Test text:'
print '"%s"\n' % text
print 'ARI: ', rd.ARI()
print 'FleschReadingEase: ', rd.FleschReadingEase()
print 'FleschKincaidGradeLevel: ', rd.FleschKincaidGradeLevel()
print 'GunningFogIndex: ', rd.GunningFogIndex()
print 'SMOGIndex: ', rd.SMOGIndex()
print 'ColemanLiauIndex: ', rd.ColemanLiauIndex()
print 'LIX: ', rd.LIX()
print 'RIX: ', rd.RIX()

#
# if __name__ == "__main__":
# text = """We are close to wrapping up our 10 week Rails Course. This week we will cover a handful of topics commonly encountered in Rails projects. We then wrap up with part 2 of our Reddit on Rails exercise! By now you should be hard at work on your personal projects. The students in the course just presented in front of the class with some live demos and a brief intro to to the problems their app were solving. Maybe set aside some time this week to show someone your progress, block off 5 minutes and describe what goal you are working towards, the current state of the project (is it almost done, just getting started, needs UI, etc.), and then show them a quick demo of the app. Explain what type of feedback you are looking for (conceptual, design, usability, etc.) and see what they have to say. As we are wrapping up the course you need to be focused on learning as much as you can, but also making sure you have the tools to succeed after the class is over."""
#
# rd = Readability(text)
# print 'Test text:'
# print '"%s"\n' % text
# print 'ARI: ', rd.ARI()
# print 'FleschReadingEase: ', rd.FleschReadingEase()
# print 'FleschKincaidGradeLevel: ', rd.FleschKincaidGradeLevel()
# print 'GunningFogIndex: ', rd.GunningFogIndex()
# print 'SMOGIndex: ', rd.SMOGIndex()
# print 'ColemanLiauIndex: ', rd.ColemanLiauIndex()
# print 'LIX: ', rd.LIX()
# print 'RIX: ', rd.RIX()
1 change: 0 additions & 1 deletion syllables_en.py
Expand Up @@ -138,4 +138,3 @@ def count(word):
fallback_cache[word] = count

return count

21 changes: 10 additions & 11 deletions utils.py
Expand Up @@ -6,17 +6,17 @@
import nltk

from nltk.tokenize import RegexpTokenizer
import syllables_en

from .syllables_en import count
TOKENIZER = RegexpTokenizer('(?u)\W+|\$[\d\.]+|\S+')
SPECIAL_CHARS = ['.', ',', '!', '?']

def get_char_count(words):
characters = 0
for word in words:
characters += len(word.decode("utf-8"))
print(word)
characters += len(word)
return characters

def get_words(text=''):
words = []
words = TOKENIZER.tokenize(text)
Expand All @@ -38,7 +38,7 @@ def get_sentences(text=''):
def count_syllables(words):
syllableCount = 0
for word in words:
syllableCount += syllables_en.count(word)
syllableCount += count(word)
return syllableCount

#This method must be enhanced. At the moment it only
Expand All @@ -50,11 +50,11 @@ def count_complex_words(text=''):
complex_words = 0
found = False
cur_word = []
for word in words:

for word in words:
cur_word.append(word)
if count_syllables(cur_word)>= 3:

#Checking proper nouns. If a word starts with a capital letter
#and is NOT at the beginning of a sentence we don't add it
#as a complex word.
Expand All @@ -65,10 +65,9 @@ def count_complex_words(text=''):
if str(sentence).startswith(word):
found = True
break
if found:
if found:
complex_words += 1
found = False

cur_word.remove(word)
return complex_words