Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Get Context Entities from Twitter Dev Repo #1921

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ env.sh
.pdbrc
tweepy.egg-info

.vscode/

# Created by https://www.gitignore.io/api/vim,python

### Python ###
Expand Down
17 changes: 17 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@




```sh
conda create -n tweepy-env python=3.7

conda activate tweepy-env
```

```sh
pip install ".[dev,test]" # need quotes?
```

```sh
python -m unittest discover tests
```
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"oauthlib>=3.2.0,<4",
"requests>=2.27.0,<3",
"requests-oauthlib>=1.2.0,<2",
"pandas>=1.3.5",
],
extras_require={
"async": [
Expand Down
10 changes: 9 additions & 1 deletion tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
consumer_secret, tape, user_id
)
import tweepy

from pandas import DataFrame

class TweepyClientTests(unittest.TestCase):

Expand Down Expand Up @@ -241,3 +241,11 @@ def test_create_and_get_compliance_job_and_jobs(self):
job_id = response.data["id"]
self.client.get_compliance_job(job_id)
self.client.get_compliance_jobs("tweets")


def test_get_entities(self):
# it fetches twitter evergreen data from github:
entities_df = self.client.get_entities()
assert isinstance(entities_df, DataFrame)
assert entities_df.columns.tolist() == ["domains", "entity_id", "entity_name"]
assert len(entities_df) == 144_753
17 changes: 17 additions & 0 deletions tweepy/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import warnings

import requests
from pandas import read_csv

import tweepy
from tweepy.auth import OAuth1UserHandler
Expand Down Expand Up @@ -3618,3 +3619,19 @@ def create_compliance_job(self, type, *, name=None, resumable=None):
return self._make_request(
"POST", "/2/compliance/jobs", json=json
)

# Entities and Domains (evergreen data updated quarterly)

def get_entities(self):
"""
Get entities from https://github.com/twitterdev/twitter-context-annotations.

This data is updated quarterly.
"""
# update the url when a new data file is released:
request_url = "https://raw.githubusercontent.com/twitterdev/twitter-context-annotations/6c349b2f3e1a3e7aca54d941225c485698a93c7a/files/evergreen-context-entities-20220601.csv"
# fetch the data:
df = read_csv(request_url)
# clean tab characters and other spaces from the entity names:
df["entity_name"] = df["entity_name"].apply(lambda txt: txt.strip())
return df