Skip to content

Commit 0e49f45

Browse files
committed
Add same_language to edges_for, edges_from, edges_to
1 parent 71df3c8 commit 0e49f45

File tree

6 files changed

+52
-26
lines changed

6 files changed

+52
-26
lines changed

README.md

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -76,33 +76,35 @@ for e in edges_between(introvert_concepts, extrovert_concepts, two_way=False):
7676

7777
```json
7878
{
79-
'dataset': '/d/wiktionary/en',
80-
'license': 'cc:by-sa/4.0',
81-
'sources': [{
82-
'contributor': '/s/resource/wiktionary/en',
83-
'process': '/s/process/wikiparsec/2'
79+
"dataset": "/d/wiktionary/en",
80+
"license": "cc:by-sa/4.0",
81+
"sources": [{
82+
"contributor": "/s/resource/wiktionary/en",
83+
"process": "/s/process/wikiparsec/2"
8484
}, {
85-
'contributor': '/s/resource/wiktionary/fr',
86-
'process': '/s/process/wikiparsec/2'
85+
"contributor": "/s/resource/wiktionary/fr",
86+
"process": "/s/process/wikiparsec/2"
8787
}],
88-
'weight': 2.0
88+
"weight": 2.0
8989
}
9090
```
9191

92-
## Accessing all relations for a given concept
92+
## Accessing all relations for a given concepts
9393

94-
You can also retrieve all relations between a given concept and all other concepts, with the same options as above:
94+
You can also retrieve all relations between a given concepts and all other concepts, with the same options as above:
9595

9696
```python
9797
from conceptnet_lite import Label, Language, edges_for
9898

9999
english = Language.get(name='en')
100-
for e in edges_for(Label.get(text='introvert', language=english, same_language=True).concepts):
100+
for e in edges_for(Label.get(text='introvert', language=english).concepts, same_language=True):
101101
print(" Edge URI:", e.uri)
102102
print(e.relation.name, e.start.text, e.end.text, e.etc)
103103
```
104104

105-
The only difference is that since the other concepts are not specified, it is possible to get edges to concepts in languages other than the source concept language.
105+
Note that we have used optional argument `same_language=True`. By supplying this argument we make `edges_for` return
106+
relations, both ends of which are in the same language. If this argument is skipped it is possible to get edges to
107+
concepts in languages other than the source concepts language.
106108

107109
## Accessing concept edges with a given relation direction
108110

@@ -114,7 +116,7 @@ from conceptnet_lite import Language, Label
114116
english = Language.get(name='en')
115117
cat_concepts = Label.get(text='introvert', language=english).concepts #
116118
for c in cat_concepts:
117-
print(" Concept text:", c.text) # shall we also contract this to c.text?
119+
print(" Concept text:", c.text)
118120
if c.edges_out:
119121
print(" Edges out:")
120122
for e in c.edges_out:

conceptnet_lite/__init__.py

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,18 +45,42 @@ def connect(
4545
_open_db(path=db_path)
4646

4747

48-
def edges_from(start_concepts: Iterable[Concept]) -> peewee.BaseModelSelect:
49-
result = Edge.select().where(Edge.start.in_(start_concepts))
50-
return result
48+
def edges_from(start_concepts: Iterable[Concept], same_language: bool = False) -> peewee.BaseModelSelect:
49+
if same_language:
50+
ConceptAlias = Concept.alias()
51+
start_concepts = list(start_concepts)
52+
return (Edge
53+
.select()
54+
.join(Concept, on=(Concept.id == Edge.start))
55+
.where(Concept.id.in_(start_concepts))
56+
.switch(Edge)
57+
.join(ConceptAlias, on=(ConceptAlias.id == Edge.end))
58+
.join(Label)
59+
.join(Language)
60+
.where(Language.id == start_concepts[0].label.language))
61+
else:
62+
return Edge.select().where(Edge.start.in_(start_concepts))
5163

5264

53-
def edges_to(end_concepts: Iterable[Concept]) -> peewee.BaseModelSelect:
54-
result = Edge.select().where(Edge.end.in_(end_concepts))
55-
return result
65+
def edges_to(end_concepts: Iterable[Concept], same_language: bool = False) -> peewee.BaseModelSelect:
66+
if same_language:
67+
ConceptAlias = Concept.alias()
68+
end_concepts = list(end_concepts)
69+
return (Edge
70+
.select()
71+
.join(Concept, on=(Concept.id == Edge.end))
72+
.where(Concept.id.in_(end_concepts))
73+
.switch(Edge)
74+
.join(ConceptAlias, on=(ConceptAlias.id == Edge.start))
75+
.join(Label)
76+
.join(Language)
77+
.where(Language.id == end_concepts[0].label.language))
78+
else:
79+
return Edge.select().where(Edge.end.in_(end_concepts))
5680

5781

58-
def edges_for(concepts: Iterable[Concept]) -> peewee.BaseModelSelect:
59-
return edges_from(concepts) | edges_to(concepts)
82+
def edges_for(concepts: Iterable[Concept], same_language: bool = False) -> peewee.BaseModelSelect:
83+
return edges_from(concepts, same_language=same_language) | edges_to(concepts, same_language=same_language)
6084

6185

6286
def edges_between(

docs/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
author = 'LDT team'
2626

2727
# The full version, including alpha/beta/rc tags
28-
release = '0.1.10'
28+
release = '0.1.11'
2929

3030

3131
# -- General configuration ---------------------------------------------------

examples/conceptnet_queries.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
print("Get edges for 'introvert':")
2525
introvert_concepts = Label.get(text='introvert', language=english).concepts
26-
for e in edges_for(introvert_concepts):
26+
for e in edges_for(introvert_concepts, same_language=True):
2727
print(" Edge URI:", e.uri)
2828

2929
print("Traversing Russian:")

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "conceptnet-lite"
3-
version = "0.1.10"
3+
version = "0.1.11"
44
description = "Python library to work with ConceptNet offline without the need of PostgreSQL"
55
authors = ["Roman Inflianskas <infroma@gmail.com>"]
66
license = "Apache-2.0"

setup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@
1515

1616
setup_kwargs = {
1717
'name': 'conceptnet-lite',
18-
'version': '0.1.10',
18+
'version': '0.1.11',
1919
'description': 'Python library to work with ConceptNet offline without the need of PostgreSQL',
20-
'long_description': '# conceptnet-lite\n\nConceptnet-lite is a Python library for working with ConceptNet offline without the need for PostgreSQL.\n\nThe basic usage is as follows.\n\n## Loading the database object\n\nConceptNet releases happen once a year. You can build your own database from an assertions file, but if there is a pre-built file it will be faster to just download that one. Here is the [compressed database file](todo) for ConceptNet 5.7 release.\n\n```python\nimport conceptnet_lite\n\nconceptnet_lite.connect(\'/path/to/conceptnet.db\')\n```\n\n## Building the database for a new release.\n\nThe assertion files for ConceptNet are provided [here](https://github.com/commonsense/conceptnet5/wiki/Downloads).\n\n(building instructions TBA)\n\n## Accessing concepts\n\nConcepts objects are created by looking for every entry that matches the input string exactly.\nIf none is found, the `peewee.DoesNotExist` exception will be raised.\n\n```python\nfrom conceptnet_lite import Label\n\ncat_concepts = Label.get(text=\'cat\').concepts #\nfor c in cat_concepts:\n print(" Concept URI:", c.uri)\n print(" Concept text:", c.text)\n```\n\n`concept.uri` provides access to ConceptNet URIs, as described [here](https://github.com/commonsense/conceptnet5/wiki/URI-hierarchy). You can also retrieve only the text of the entry by `concept.text`.\n\n## Working with languages\n\nYou can limit the languages to search for matches. Label.get() takes an optional `language` attribute that is expected to be an instance `Language`, which in turn is created by calling `Language.get()` with `name` argument.\nList of available languages and their codes are described [here](https://github.com/commonsense/conceptnet5/wiki/Languages).\n\n```python\nfrom conceptnet_lite import Label, Language\n\nenglish = Language.get(name=\'en\')\ncat_concepts = Label.get(text=\'cat\', language=english).concepts #\nfor c in cat_concepts:\n print(" Concept URI:", c.uri)\n print(" Concept text:", c.text)\n print(" Concept language:", c.language.name)\n```\n\n## Querying edges between concepts\n\nTo retrieve the set of relations between two concepts, you need to create the concept objects (optionally specifying the language as described above). `cn.edges_between()` method retrieves all edges between the specified concepts. You can access its URI and a number of attributes, as shown below.\n\nSome ConceptNet relations are symmetrical: for example, the antonymy between *white* and *black* works both ways. Some relations are asymmetrical: e.g. the relation between *cat* and *mammal* is either hyponymy or hyperonymy, depending on the direction. The `two_way` argument lets you choose whether the query should be symmetrical or not.\n\n```python\nfrom conceptnet_lite import Label, Language, edges_between\n\nenglish = Language.get(name=\'en\')\nintrovert_concepts = Label.get(text=\'introvert\', language=english).concepts\nextrovert_concepts = Label.get(text=\'extrovert\', language=english).concepts\nfor e in edges_between(introvert_concepts, extrovert_concepts, two_way=False):\n print(" Edge URI:", e.uri)\n print(e.relation.name, e.start.text, e.end.text, e.etc)\n```\n* **e.relation.name**: the name of ConceptNet relation. Full list [here](https://github.com/commonsense/conceptnet5/wiki/Relations).\n\n* **e.start.text, e.end.text**: the source and the target concepts in the edge\n\n* **e.etc**: the ConceptNet [metadata](https://github.com/commonsense/conceptnet5/wiki/Edges) dictionary contains the source dataset, sources, weight, and license. For example, the introvert:extrovert edge for English contains the following metadata:\n\n```json\n{\n\t\'dataset\': \'/d/wiktionary/en\',\n\t\'license\': \'cc:by-sa/4.0\',\n\t\'sources\': [{\n\t\t\'contributor\': \'/s/resource/wiktionary/en\',\n\t\t\'process\': \'/s/process/wikiparsec/2\'\n\t}, {\n\t\t\'contributor\': \'/s/resource/wiktionary/fr\',\n\t\t\'process\': \'/s/process/wikiparsec/2\'\n\t}],\n\t\'weight\': 2.0\n}\n```\n\n## Accessing all relations for a given concept\n\nYou can also retrieve all relations between a given concept and all other concepts, with the same options as above:\n\n```python\nfrom conceptnet_lite import Label, Language, edges_for\n\nenglish = Language.get(name=\'en\')\nfor e in edges_for(Label.get(text=\'introvert\', language=english, same_language=True).concepts):\n print(" Edge URI:", e.uri)\n print(e.relation.name, e.start.text, e.end.text, e.etc)\n```\n\nThe only difference is that since the other concepts are not specified, it is possible to get edges to concepts in languages other than the source concept language.\n\n## Accessing concept edges with a given relation direction\n\nYou can also query the relations that have a specific concept as target or source. This is achieved with `concept.edges_out` and `concept.edges_in`, as follows:\n\n```python\nfrom conceptnet_lite import Language, Label\n\nenglish = Language.get(name=\'en\')\ncat_concepts = Label.get(text=\'introvert\', language=english).concepts #\nfor c in cat_concepts:\n print(" Concept text:", c.text) # shall we also contract this to c.text?\n if c.edges_out:\n print(" Edges out:")\n for e in c.edges_out:\n print(" Edge URI:", e.uri)\n print(" Relation:", e.relation.name)\n print(" End:", e.end.text)\n if c.edges_in:\n print(" Edges in:")\n for e in c.edges_in:\n print(" Edge URI:", e.uri)\n print(" Relation:", e.relation.name)\n print(" End:", e.end.text)\n```\n\n\n# Traversing all the data for a language\n\nYou can go over all concepts for a given language. For illustration, let us try Avestan, a "small" language with the code "ae" and vocab size of 371, according to the [ConceptNet language statistics](https://github.com/commonsense/conceptnet5/wiki/Languages).\n\n```python\nfrom conceptnet_lite import Language\n\nmylanguage = Language.get(name=\'ae\')\nfor l in mylanguage.labels:\n print(" Label:", l.text)\n for c in l.concepts:\n print(" Concept URI:", c.uri)\n if c.edges_out:\n print(" Edges out:")\n for e in c.edges_out:\n print(" Edge URI:", e.uri)\n if c.edges_in:\n print(" Edges in:")\n for e in c.edges_in:\n print(" Edge URI:", e.uri)\n```\n\nTodo:\n\n- [ ] add database file link\n- [ ] describe how to build the database\n- [ ] add sample outputs\n',
20+
'long_description': '# conceptnet-lite\n\nConceptnet-lite is a Python library for working with ConceptNet offline without the need for PostgreSQL.\n\nThe basic usage is as follows.\n\n## Loading the database object\n\nConceptNet releases happen once a year. You can build your own database from an assertions file, but if there is a pre-built file it will be faster to just download that one. Here is the [compressed database file](todo) for ConceptNet 5.7 release.\n\n```python\nimport conceptnet_lite\n\nconceptnet_lite.connect(\'/path/to/conceptnet.db\')\n```\n\n## Building the database for a new release.\n\nThe assertion files for ConceptNet are provided [here](https://github.com/commonsense/conceptnet5/wiki/Downloads).\n\n(building instructions TBA)\n\n## Accessing concepts\n\nConcepts objects are created by looking for every entry that matches the input string exactly.\nIf none is found, the `peewee.DoesNotExist` exception will be raised.\n\n```python\nfrom conceptnet_lite import Label\n\ncat_concepts = Label.get(text=\'cat\').concepts #\nfor c in cat_concepts:\n print(" Concept URI:", c.uri)\n print(" Concept text:", c.text)\n```\n\n`concept.uri` provides access to ConceptNet URIs, as described [here](https://github.com/commonsense/conceptnet5/wiki/URI-hierarchy). You can also retrieve only the text of the entry by `concept.text`.\n\n## Working with languages\n\nYou can limit the languages to search for matches. Label.get() takes an optional `language` attribute that is expected to be an instance `Language`, which in turn is created by calling `Language.get()` with `name` argument.\nList of available languages and their codes are described [here](https://github.com/commonsense/conceptnet5/wiki/Languages).\n\n```python\nfrom conceptnet_lite import Label, Language\n\nenglish = Language.get(name=\'en\')\ncat_concepts = Label.get(text=\'cat\', language=english).concepts #\nfor c in cat_concepts:\n print(" Concept URI:", c.uri)\n print(" Concept text:", c.text)\n print(" Concept language:", c.language.name)\n```\n\n## Querying edges between concepts\n\nTo retrieve the set of relations between two concepts, you need to create the concept objects (optionally specifying the language as described above). `cn.edges_between()` method retrieves all edges between the specified concepts. You can access its URI and a number of attributes, as shown below.\n\nSome ConceptNet relations are symmetrical: for example, the antonymy between *white* and *black* works both ways. Some relations are asymmetrical: e.g. the relation between *cat* and *mammal* is either hyponymy or hyperonymy, depending on the direction. The `two_way` argument lets you choose whether the query should be symmetrical or not.\n\n```python\nfrom conceptnet_lite import Label, Language, edges_between\n\nenglish = Language.get(name=\'en\')\nintrovert_concepts = Label.get(text=\'introvert\', language=english).concepts\nextrovert_concepts = Label.get(text=\'extrovert\', language=english).concepts\nfor e in edges_between(introvert_concepts, extrovert_concepts, two_way=False):\n print(" Edge URI:", e.uri)\n print(e.relation.name, e.start.text, e.end.text, e.etc)\n```\n* **e.relation.name**: the name of ConceptNet relation. Full list [here](https://github.com/commonsense/conceptnet5/wiki/Relations).\n\n* **e.start.text, e.end.text**: the source and the target concepts in the edge\n\n* **e.etc**: the ConceptNet [metadata](https://github.com/commonsense/conceptnet5/wiki/Edges) dictionary contains the source dataset, sources, weight, and license. For example, the introvert:extrovert edge for English contains the following metadata:\n\n```json\n{\n\t"dataset": "/d/wiktionary/en",\n\t"license": "cc:by-sa/4.0",\n\t"sources": [{\n\t\t"contributor": "/s/resource/wiktionary/en",\n\t\t"process": "/s/process/wikiparsec/2"\n\t}, {\n\t\t"contributor": "/s/resource/wiktionary/fr",\n\t\t"process": "/s/process/wikiparsec/2"\n\t}],\n\t"weight": 2.0\n}\n```\n\n## Accessing all relations for a given concepts\n\nYou can also retrieve all relations between a given concepts and all other concepts, with the same options as above:\n\n```python\nfrom conceptnet_lite import Label, Language, edges_for\n\nenglish = Language.get(name=\'en\')\nfor e in edges_for(Label.get(text=\'introvert\', language=english).concepts, same_language=True):\n print(" Edge URI:", e.uri)\n print(e.relation.name, e.start.text, e.end.text, e.etc)\n```\n\nNote that we have used optional argument `same_language=True`. By supplying this argument we make `edges_for` return\nrelations, both ends of which are in the same language. If this argument is skipped it is possible to get edges to\nconcepts in languages other than the source concepts language.\n\n## Accessing concept edges with a given relation direction\n\nYou can also query the relations that have a specific concept as target or source. This is achieved with `concept.edges_out` and `concept.edges_in`, as follows:\n\n```python\nfrom conceptnet_lite import Language, Label\n\nenglish = Language.get(name=\'en\')\ncat_concepts = Label.get(text=\'introvert\', language=english).concepts #\nfor c in cat_concepts:\n print(" Concept text:", c.text)\n if c.edges_out:\n print(" Edges out:")\n for e in c.edges_out:\n print(" Edge URI:", e.uri)\n print(" Relation:", e.relation.name)\n print(" End:", e.end.text)\n if c.edges_in:\n print(" Edges in:")\n for e in c.edges_in:\n print(" Edge URI:", e.uri)\n print(" Relation:", e.relation.name)\n print(" End:", e.end.text)\n```\n\n\n# Traversing all the data for a language\n\nYou can go over all concepts for a given language. For illustration, let us try Avestan, a "small" language with the code "ae" and vocab size of 371, according to the [ConceptNet language statistics](https://github.com/commonsense/conceptnet5/wiki/Languages).\n\n```python\nfrom conceptnet_lite import Language\n\nmylanguage = Language.get(name=\'ae\')\nfor l in mylanguage.labels:\n print(" Label:", l.text)\n for c in l.concepts:\n print(" Concept URI:", c.uri)\n if c.edges_out:\n print(" Edges out:")\n for e in c.edges_out:\n print(" Edge URI:", e.uri)\n if c.edges_in:\n print(" Edges in:")\n for e in c.edges_in:\n print(" Edge URI:", e.uri)\n```\n\nTodo:\n\n- [ ] add database file link\n- [ ] describe how to build the database\n- [ ] add sample outputs\n',
2121
'author': 'Roman Inflianskas',
2222
'author_email': 'infroma@gmail.com',
2323
'url': 'https://github.com/ldtoolkit/conceptnet-lite',

0 commit comments

Comments
 (0)