/
bot.py
175 lines (139 loc) · 4.9 KB
/
bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import discord
import sqlite3
import requests
import re
from urllib.parse import quote
import os
from dotenv import load_dotenv
# Client setup
client = discord.Client()
# load .env
load_dotenv()
discord_token = os.getenv('DISCORD_TOKEN')
# database connection
db = sqlite3.connect('tags.db')
# database cursor
cursor = db.cursor()
# regex to use to isolate tokens
token_regex = re.compile(r'\S+')
command_regex = re.compile(r':(h|he|hel|help)')
# finds the most relevant help tag according
# to the following:
#
# If there is no full match for the pattern,
# or there are several matches, the "best"
# match will be used.
#
# - A match with the same case is much better
# than a match with a different case
# - A match that starts after a non-alphanumeric
# character is better than a match in the
# middle of a word
# - A match at or near the beginning of the tag
# is better than a match further on
# - The more alphanumeric characters match, the better.
# - The shorter the length of the match, the better.
def match_weight(match, tag):
# weights
case_weight = 10000 # weight for correct case
prev_char_weight = 1000 # weight for matches starting after [^a-zA-Z0-9]
index_weight = -100 # weight for index of match in tag
alnum_weight = 10 # weight for each matching [a-zA-Z0-9]
length_weight = -1 # weight for length of match
# calculated tag weight
weight = 0
# get index of match
idx = tag.find(match)
weight += idx * index_weight
# check case
if match == tag[idx:idx+len(match)]:
# exact case match
weight += case_weight
# check if previous char is alphanumeric, if applicable
# NOTE: if match starts at beginning of tag, the weight
# is still applied
if len(tag) > 0:
if idx > 0:
if tag[idx-1].isalnum():
weight += prev_char_weight
else:
weight += prev_char_weight
# count number of alphanumeric matches
for c in match:
if c.isalnum():
weight += alnum_weight
# apply length weight
weight += len(match) * length_weight
# return tag weight
return weight
def sort_matches(matches):
sorted_matches = sorted(matches, key=lambda m: match_weight(m[0], m[1]))
return sorted_matches
def fmt_help_tag(tag):
search_query = tag
# * -> star
search_query = search_query.replace('*', 'star')
# " -> quote
search_query = search_query.replace('"', 'quote')
return search_query
@client.event
async def on_ready():
print('Logged in as {}'.format(client.user))
def build_link(doc, tag):
return 'https://vimhelp.org/{}.txt.html#{}'.format(quote(doc), quote(tag))
@client.event
async def on_message(message):
# if the message is from bot
if message.author == client.user:
return
if command_regex.match(message.content) != None:
# ignore first token, it is the command itself
tokens = token_regex.findall(message.content)[1:]
# if no arguments are passed, default to 'help.txt'
if len(tokens) == 0:
tokens = ['help.txt']
replied_tokens = []
responses = []
for t in tokens:
# escape special characters
t = fmt_help_tag(t)
# check if we've responded to this query already
if t in replied_tokens:
print('already replied to {}, skipping'.format(t))
continue
query = 'SELECT * FROM tags WHERE tag=?'
# check for exact match
entry = (t,)
exact_match = cursor.execute(query, entry).fetchone()
if exact_match is not None:
# exact match exists
best_match = exact_match
else:
# no exact match, perform fuzzy match
query = 'SELECT * FROM tags WHERE tag LIKE ? '
# get result
entry = ('%' + t + '%',)
all_matches = cursor.execute(query, entry).fetchall()
if len(all_matches) > 0:
best_match = sort_matches(all_matches)[0]
else:
# no match for current token
continue
if best_match is not None:
tag = best_match[0]
doc = best_match[1]
link = build_link(doc, tag)
replied_tokens.append(t)
# check that url gets a response
request = requests.head(link)
if request.ok:
responses.insert(len(responses), '`:h {}`: {}'.format(tag, link))
if len(responses) > 0:
if len(responses) == 1:
msg = 'Help page for {}'.format(responses[0])
else:
msg = 'Help pages for:\n'
for r in responses:
msg += '• ' + r + '\n'
await message.channel.send(msg)
client.run(discord_token)