nlcst-parse-english
v1.4.0
Published
Parse english with POS and output NLCST.
Downloads
3,070
Readme
nlcst-parse-english
Parse english with Part-of-speech(POS) tagged and output NLCST.
Install
Install with npm:
npm install nlcst-parse-english
Part-of-speech(POS)
WordNode
has { data: { pos: Annotation } }
.
This library use parse-english + FinNLP/en-pos.
Annotation | Name | Example
--- | --- | ---
NN
| Noun | dog
man
NNS
| Plural noun | dogs
men
NNP
| Proper noun | London
Alex
NNPS
| Plural proper noun | Smiths
VB
| Base form verb | be
VBP
| Present form verb | throw
VBZ
| Present form (3rd person) | throws
VBG
| Gerund form verb | throwing
VBD
| Past tense verb | threw
VBN
| Past participle verb | thrown
MD
| Modal verb | can
shall
will
may
must
ought
JJ
| Adjective | big
fast
JJR
| Comparative adjective | bigger
JJS
| Superlative adjective | biggest
RB
| Adverb | not
quickly
closely
RBR
| Comparative adverb | less-closely
faster
RBS
| Superlative adverb | fastest
DT
| Determiner | the
a
some
both
PDT
| Predeterminer | all
quite
PRP
| Personal Pronoun | I
you
he
she
PRP$
| Possessive Pronoun | I
you
he
she
POS
| Possessive ending | 's
IN
| Preposition | of
by
in
PR
| Particle | up
off
TO
| to | to
WDT
| Wh-determiner | which
that
whatever
whichever
WP
| Wh-pronoun | who
whoever
whom
what
WP$
| Wh-possessive | whose
WRB
| Wh-adverb | how
where
EX
| Expletive there | there
CC
| Coordinating conjugation | &
and
nor
or
CD
| Cardinal Numbers | 1
7
77
one
LS
| List item marker | 1
B
C
One
UH
| Interjection | ah
oh
oops
FW
| Foreign Words | viva
mon
toujours
,
| Comma | ,
:
|Mid-sent punct | :
;
...
.
| Sent-final punct. | .
!
?
(
| Left parenthesis | )
}
]
)
| Right parenthesis | (
{
[
#
| Pound sign | #
$
| Currency symbols | $
€
£
¥
SYM
| Other symbols | +
*
/
<
>
EM
| Emojis & emoticons | :)
❤
Usage
const parser = new EnglishParser();
const CST = parser.parse("Mr. Henry Brown: A hapless but friendly City of London worker.");
assert.deepEqual(CST, {
"type": "RootNode",
"children": [
{
"type": "ParagraphNode",
"children": [
{
"type": "SentenceNode",
"children": [
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "Mr",
"position": {
"start": {
"line": 1,
"column": 1,
"offset": 0
},
"end": {
"line": 1,
"column": 3,
"offset": 2
}
}
},
{
"type": "PunctuationNode",
"value": ".",
"position": {
"start": {
"line": 1,
"column": 3,
"offset": 2
},
"end": {
"line": 1,
"column": 4,
"offset": 3
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 1,
"offset": 0
},
"end": {
"line": 1,
"column": 4,
"offset": 3
}
},
"data": {
"pos": "NNP"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 4,
"offset": 3
},
"end": {
"line": 1,
"column": 5,
"offset": 4
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "Henry",
"position": {
"start": {
"line": 1,
"column": 5,
"offset": 4
},
"end": {
"line": 1,
"column": 10,
"offset": 9
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 5,
"offset": 4
},
"end": {
"line": 1,
"column": 10,
"offset": 9
}
},
"data": {
"pos": "NNP"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 10,
"offset": 9
},
"end": {
"line": 1,
"column": 11,
"offset": 10
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "Brown",
"position": {
"start": {
"line": 1,
"column": 11,
"offset": 10
},
"end": {
"line": 1,
"column": 16,
"offset": 15
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 11,
"offset": 10
},
"end": {
"line": 1,
"column": 16,
"offset": 15
}
},
"data": {
"pos": "NNP"
}
},
{
"type": "PunctuationNode",
"value": ":",
"position": {
"start": {
"line": 1,
"column": 16,
"offset": 15
},
"end": {
"line": 1,
"column": 17,
"offset": 16
}
},
"data": {
"pos": ":"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 17,
"offset": 16
},
"end": {
"line": 1,
"column": 18,
"offset": 17
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "A",
"position": {
"start": {
"line": 1,
"column": 18,
"offset": 17
},
"end": {
"line": 1,
"column": 19,
"offset": 18
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 18,
"offset": 17
},
"end": {
"line": 1,
"column": 19,
"offset": 18
}
},
"data": {
"pos": "DT"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 19,
"offset": 18
},
"end": {
"line": 1,
"column": 20,
"offset": 19
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "hapless",
"position": {
"start": {
"line": 1,
"column": 20,
"offset": 19
},
"end": {
"line": 1,
"column": 27,
"offset": 26
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 20,
"offset": 19
},
"end": {
"line": 1,
"column": 27,
"offset": 26
}
},
"data": {
"pos": "JJ"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 27,
"offset": 26
},
"end": {
"line": 1,
"column": 28,
"offset": 27
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "but",
"position": {
"start": {
"line": 1,
"column": 28,
"offset": 27
},
"end": {
"line": 1,
"column": 31,
"offset": 30
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 28,
"offset": 27
},
"end": {
"line": 1,
"column": 31,
"offset": 30
}
},
"data": {
"pos": "CC"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 31,
"offset": 30
},
"end": {
"line": 1,
"column": 32,
"offset": 31
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "friendly",
"position": {
"start": {
"line": 1,
"column": 32,
"offset": 31
},
"end": {
"line": 1,
"column": 40,
"offset": 39
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 32,
"offset": 31
},
"end": {
"line": 1,
"column": 40,
"offset": 39
}
},
"data": {
"pos": "JJ"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 40,
"offset": 39
},
"end": {
"line": 1,
"column": 41,
"offset": 40
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "City",
"position": {
"start": {
"line": 1,
"column": 41,
"offset": 40
},
"end": {
"line": 1,
"column": 45,
"offset": 44
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 41,
"offset": 40
},
"end": {
"line": 1,
"column": 45,
"offset": 44
}
},
"data": {
"pos": "NNP"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 45,
"offset": 44
},
"end": {
"line": 1,
"column": 46,
"offset": 45
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "of",
"position": {
"start": {
"line": 1,
"column": 46,
"offset": 45
},
"end": {
"line": 1,
"column": 48,
"offset": 47
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 46,
"offset": 45
},
"end": {
"line": 1,
"column": 48,
"offset": 47
}
},
"data": {
"pos": "IN"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 48,
"offset": 47
},
"end": {
"line": 1,
"column": 49,
"offset": 48
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "London",
"position": {
"start": {
"line": 1,
"column": 49,
"offset": 48
},
"end": {
"line": 1,
"column": 55,
"offset": 54
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 49,
"offset": 48
},
"end": {
"line": 1,
"column": 55,
"offset": 54
}
},
"data": {
"pos": "NNP"
}
},
{
"type": "WhiteSpaceNode",
"value": " ",
"position": {
"start": {
"line": 1,
"column": 55,
"offset": 54
},
"end": {
"line": 1,
"column": 56,
"offset": 55
}
}
},
{
"type": "WordNode",
"children": [
{
"type": "TextNode",
"value": "worker",
"position": {
"start": {
"line": 1,
"column": 56,
"offset": 55
},
"end": {
"line": 1,
"column": 62,
"offset": 61
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 56,
"offset": 55
},
"end": {
"line": 1,
"column": 62,
"offset": 61
}
},
"data": {
"pos": "NN"
}
},
{
"type": "PunctuationNode",
"value": ".",
"position": {
"start": {
"line": 1,
"column": 62,
"offset": 61
},
"end": {
"line": 1,
"column": 63,
"offset": 62
}
},
"data": {
"pos": "."
}
}
],
"position": {
"start": {
"line": 1,
"column": 1,
"offset": 0
},
"end": {
"line": 1,
"column": 63,
"offset": 62
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 1,
"offset": 0
},
"end": {
"line": 1,
"column": 63,
"offset": 62
}
}
}
],
"position": {
"start": {
"line": 1,
"column": 1,
"offset": 0
},
"end": {
"line": 1,
"column": 63,
"offset": 62
}
}
});
Changelog
See Releases page.
Running tests
Install devDependencies and Run npm test
:
npm i -d && npm test
Contributing
Pull requests and stars are always welcome.
For bugs and feature requests, please create an issue.
- Fork it!
- Create your feature branch:
git checkout -b my-new-feature
- Commit your changes:
git commit -am 'Add some feature'
- Push to the branch:
git push origin my-new-feature
- Submit a pull request :D
Author
License
MIT © azu