Skip to content

Commit

Permalink
Merge pull request #88 from blackducksoftware/OTWO-7093
Browse files Browse the repository at this point in the history
OTWO-7093 Add support for powershell
  • Loading branch information
alex-sig committed Oct 26, 2023
2 parents 736a750 + 40fc486 commit e176f5e
Show file tree
Hide file tree
Showing 11 changed files with 173 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Expand Up @@ -10,7 +10,7 @@ on:

jobs:
test:
runs-on: ubuntu-18.04
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- uses: ruby/setup-ruby@v1
Expand Down
9 changes: 7 additions & 2 deletions README.md
Expand Up @@ -146,11 +146,16 @@ Contributing
-------------

* Observe any existing PR contribution and emulate the pattern. For e.g. see [this](https://github.com/blackducksoftware/ohcount/pull/76/files).
* Run `./build` to compile the ragel files.
* While writing the **test/expected_dir** files, disable any whitespace/tab replacing options from your editor.
* Ohcount output has tabs in it, so the **test/expected_dir** also needs to contain tab characters.
* Sample format of **test/expected_dir** is as follows. There is a tab character after dart, code & comment:
* Sample format of **test/expected_dir** is as follows. There is a **Tab** character after dart, code & comment:
```
dart code void main() {
dart comment // Line comment
```
* Run tests with `./build tests`.
* Some editors convert **Tab** to Space. The following steps help ensure that the proper character is added.
** Open the file in Vim editor.
** Run `:set list`. This makes all hidden characters like **Tab** visible.
** Type *dart*, press `ctrl+v` followed by `tab`.
** Run the tests to confirm these changes: `./build tests`.
Binary file modified ruby/x86_64-linux_ubuntu/ohcount.so
Binary file not shown.
1 change: 1 addition & 0 deletions src/hash/extensions.gperf
Expand Up @@ -174,6 +174,7 @@ pp, DISAMBIGUATE("pp")
ppt, BINARY
pro, DISAMBIGUATE("pro")
ps, LANG_POSTSCRIPT
ps1, LANG_POWERSHELL
py, LANG_PYTHON
qml, LANG_QML
qt, BINARY
Expand Down
1 change: 1 addition & 0 deletions src/hash/languages.gperf
Expand Up @@ -91,6 +91,7 @@ perl, LANG_PERL, "Perl", 0
php, LANG_PHP, "PHP", 0
pike, LANG_PIKE, "Pike", 0
postscript, LANG_POSTSCRIPT, "PostScript", 1
powershell, LANG_POWERSHELL, "PowerShell", 0
prolog, LANG_PROLOG, "Prolog", 0
puppet, LANG_PUPPET, "Puppet", 0
python, LANG_PYTHON, "Python", 0
Expand Down
2 changes: 2 additions & 0 deletions src/hash/parsers.gperf
Expand Up @@ -78,6 +78,7 @@
#include "../parsers/phphtml.h"
#include "../parsers/pike.h"
#include "../parsers/postscript.h"
#include "../parsers/powershell.h"
#include "../parsers/prolog.h"
#include "../parsers/puppet.h"
#include "../parsers/python.h"
Expand Down Expand Up @@ -198,6 +199,7 @@ perl, parse_perl
php, parse_phtml
pike, parse_pike
postscript, parse_postscript
powershell, parse_powershell
prolog, parse_prolog
puppet, parse_puppet
python, parse_python
Expand Down
1 change: 1 addition & 0 deletions src/languages.h
Expand Up @@ -92,6 +92,7 @@
#define LANG_PHP "php"
#define LANG_PIKE "pike"
#define LANG_POSTSCRIPT "postscript"
#define LANG_POWERSHELL "powershell"
#define LANG_PROLOG "prolog"
#define LANG_PUPPET "puppet"
#define LANG_PYTHON "python"
Expand Down
139 changes: 139 additions & 0 deletions src/parsers/powershell.rl
@@ -0,0 +1,139 @@
/************************* Required for every parser *************************/
#ifndef OHCOUNT_POWERSHELL_PARSER_H
#define OHCOUNT_POWERSHELL_PARSER_H

#include "../parser_macros.h"

// the name of the language
const char *POWERSHELL_LANG = LANG_POWERSHELL;

// the languages entities
const char *powershell_entities[] = {
"space", "comment", "string", "any"
};

// constants associated with the entities
enum {
POWERSHELL_SPACE = 0, POWERSHELL_COMMENT, POWERSHELL_STRING, POWERSHELL_ANY
};

/*****************************************************************************/

%%{
machine powershell;
write data;
include common "common.rl";

# Line counting machine

action powershell_ccallback {
switch(entity) {
case POWERSHELL_SPACE:
ls
break;
case POWERSHELL_ANY:
code
break;
case INTERNAL_NL:
std_internal_newline(POWERSHELL_LANG)
break;
case NEWLINE:
std_newline(POWERSHELL_LANG)
}
}

powershell_line_comment = '#' @comment nonnewline*;
powershell_block_comment =
'<#' @comment (
newline %{ entity = INTERNAL_NL; } %powershell_ccallback
|
ws
|
(nonnewline - ws) @comment
)* :>> '#>';
powershell_comment = powershell_line_comment | powershell_block_comment;

powershell_sq_str =
'\'' @enqueue @code (
newline %{ entity = INTERNAL_NL; } %powershell_ccallback
|
'\\' newline %{ entity = INTERNAL_NL; } %powershell_ccallback
|
ws
|
[^\r\n\f\t '\\] @code
|
'\\' nonnewline @code
)* '\'' @commit;
powershell_dq_str =
'"' @enqueue @code (
newline %{ entity = INTERNAL_NL; } %powershell_ccallback
|
'\\' newline %{ entity = INTERNAL_NL; } %powershell_ccallback
|
ws
|
[^\r\n\f\t "\\] @code
|
'\\' nonnewline @code
)* '"' @commit;
# TODO: heredoc; see ruby.rl for details.
powershell_string = powershell_sq_str | powershell_dq_str;

powershell_line := |*
spaces ${ entity = POWERSHELL_SPACE; } => powershell_ccallback;
powershell_comment;
powershell_string;
newline ${ entity = NEWLINE; } => powershell_ccallback;
^space ${ entity = POWERSHELL_ANY; } => powershell_ccallback;
*|;

# Entity machine

action powershell_ecallback {
callback(POWERSHELL_LANG, powershell_entities[entity], cint(ts), cint(te), userdata);
}

powershell_line_comment_entity = '#' nonnewline*;
powershell_block_comment_entity = '<#' any* :>> '#>';
powershell_comment_entity = powershell_line_comment_entity | powershell_block_comment_entity;

powershell_entity := |*
space+ ${ entity = POWERSHELL_SPACE; } => powershell_ecallback;
powershell_comment_entity ${ entity = POWERSHELL_COMMENT; } => powershell_ecallback;
# TODO:
^space;
*|;
}%%

/************************* Required for every parser *************************/

/* Parses a string buffer with powershell code.
*
* @param *buffer The string to parse.
* @param length The length of the string to parse.
* @param count Integer flag specifying whether or not to count lines. If yes,
* uses the Ragel machine optimized for counting. Otherwise uses the Ragel
* machine optimized for returning entity positions.
* @param *callback Callback function. If count is set, callback is called for
* every line of code, comment, or blank with 'lcode', 'lcomment', and
* 'lblank' respectively. Otherwise callback is called for each entity found.
*/
void parse_powershell(char *buffer, int length, int count,
void (*callback) (const char *lang, const char *entity, int s,
int e, void *udata),
void *userdata
) {
init

%% write init;
cs = (count) ? powershell_en_powershell_line : powershell_en_powershell_entity;
%% write exec;

// if no newline at EOF; callback contents of last line
if (count) { process_last_line(POWERSHELL_LANG) }
}

#endif

/*****************************************************************************/
1 change: 1 addition & 0 deletions test/unit/detector_test.h
Expand Up @@ -178,6 +178,7 @@ void test_detector_detect_polyglot() {
ASSERT_DETECT(LANG_LIVECODE, "foo.lc");
ASSERT_DETECT(LANG_LIVECODE, "script.utf8");
ASSERT_DETECT(LANG_POSTSCRIPT, "foo.ps");
ASSERT_DETECT(LANG_POWERSHELL, "foo.ps1");
ASSERT_DETECT(LANG_SWIFT, "foo.swift");
ASSERT_DETECT(LANG_UMPLE, "foo.umple");
ASSERT_NODETECT("empty.inc");
Expand Down
2 changes: 2 additions & 0 deletions test/unit/parser_test.h
Expand Up @@ -142,6 +142,7 @@ void test_parser_verify_entity(SourceFile *sf, const char *entity,
#include "parsers/test_perl.h"
#include "parsers/test_pike.h"
#include "parsers/test_postscript.h"
#include "parsers/test_powershell.h"
#include "parsers/test_puppet.h"
#include "parsers/test_python.h"
#include "parsers/test_qml.h"
Expand Down Expand Up @@ -336,6 +337,7 @@ void all_parser_tests() {
all_perl_tests();
all_pike_tests();
all_postscript_tests();
all_powershell_tests();
all_python_tests();
all_r_tests();
all_racket_tests();
Expand Down
18 changes: 18 additions & 0 deletions test/unit/parsers/test_powershell.h
@@ -0,0 +1,18 @@
void test_powershell_comments() {
test_parser_verify_parse(
test_parser_sourcefile("powershell", " #comment"),
"powershell", "", "#comment", 0
);
}

void test_powershell_comment_entities() {
test_parser_verify_entity(
test_parser_sourcefile("powershell", " #comment"),
"comment", "#comment"
);
}

void all_powershell_tests() {
test_powershell_comments();
test_powershell_comment_entities();
}

0 comments on commit e176f5e

Please sign in to comment.