diff --git a/src/hash/extensions.gperf b/src/hash/extensions.gperf index e03f840..b2eff8c 100755 --- a/src/hash/extensions.gperf +++ b/src/hash/extensions.gperf @@ -116,6 +116,7 @@ java, LANG_JAVA jpeg, BINARY jpg, BINARY js, LANG_JAVASCRIPT +jl, LANG_JULIA jsp, LANG_JSP kdebuild-1, LANG_EBUILD kt, LANG_KOTLIN diff --git a/src/hash/languages.gperf b/src/hash/languages.gperf index c51883b..20e57c0 100755 --- a/src/hash/languages.gperf +++ b/src/hash/languages.gperf @@ -62,6 +62,7 @@ idl_pvwave, LANG_IDL_PVWAVE, "IDL/PV-WAVE/GDL", 0 jam, LANG_JAM, "Jam", 2 java, LANG_JAVA, "Java", 0 javascript, LANG_JAVASCRIPT, "JavaScript", 0 +julia, LANG_JULIA, "Julia", 0 jsp, LANG_JSP, "", 0 kotlin, LANG_KOTLIN, "Kotlin", 0 limbo, LANG_LIMBO, "Limbo", 0 diff --git a/src/hash/parsers.gperf b/src/hash/parsers.gperf index 83fcfc8..ac9687f 100644 --- a/src/hash/parsers.gperf +++ b/src/hash/parsers.gperf @@ -49,6 +49,7 @@ #include "../parsers/jam.h" #include "../parsers/java.h" #include "../parsers/javascript.h" +#include "../parsers/julia.h" #include "../parsers/jsp.h" #include "../parsers/kotlin.h" #include "../parsers/lisp.h" @@ -169,6 +170,7 @@ idl_pvwave, parse_idl_pvwave jam, parse_jam java, parse_java javascript, parse_javascript +julia, parse_julia jsp, parse_jsp kotlin, parse_kotlin lisp, parse_lisp diff --git a/src/languages.h b/src/languages.h index ae97c9f..4b2f1d8 100755 --- a/src/languages.h +++ b/src/languages.h @@ -63,6 +63,7 @@ #define LANG_JAM "jam" #define LANG_JAVA "java" #define LANG_JAVASCRIPT "javascript" +#define LANG_JULIA "julia" #define LANG_JSP "jsp" #define LANG_KOTLIN "kotlin" #define LANG_LIMBO "limbo" diff --git a/src/parsers/julia.rl b/src/parsers/julia.rl new file mode 100644 index 0000000..d700570 --- /dev/null +++ b/src/parsers/julia.rl @@ -0,0 +1,140 @@ +/************************* Required for every parser *************************/ +#ifndef OHCOUNT_JULIA_PARSER_H +#define OHCOUNT_JULIA_PARSER_H + +#include "../parser_macros.h" + +// the name of the language +const char *JULIA_LANG = LANG_JULIA; + +// the languages entities +const char *julia_entities[] = { + "space", "comment", "string", "any" +}; + +// constants associated with the entities +enum { + JULIA_SPACE = 0, JULIA_COMMENT, JULIA_STRING, JULIA_ANY +}; + +/*****************************************************************************/ + +%%{ + machine julia; + write data; + include common "common.rl"; + + # Line counting machine + + action julia_ccallback { + switch(entity) { + case JULIA_SPACE: + ls + break; + case JULIA_ANY: + code + break; + case INTERNAL_NL: + std_internal_newline(JULIA_LANG) + break; + case NEWLINE: + std_newline(JULIA_LANG) + } + } + + julia_line_comment = '#' @comment nonnewline*; + julia_block_comment = + '#=' @comment ( + newline %{ entity = INTERNAL_NL; } %julia_ccallback + | + ws + | + (nonnewline - ws) @comment + )* :>> '=#'; + julia_sq_doc_str = + '\'\'\'' @comment ( + newline %{ entity = INTERNAL_NL; } %julia_ccallback + | + ws + | + (nonnewline - ws) @comment + )* :>> '\'\'\'' @comment; + julia_dq_doc_str = + '"""' @comment ( + newline %{ entity = INTERNAL_NL; } %julia_ccallback + | + ws + | + (nonnewline - ws) @comment + )* :>> '"""' @comment; + julia_comment = julia_line_comment | julia_block_comment | + julia_sq_doc_str | julia_dq_doc_str; + + # make sure it's not ''' or """ + julia_sq_str = + '\'' ([^'] | '\'' [^'] @{ fhold; }) @{ fhold; } + ([^\r\n\f'\\] | '\\' nonnewline)* '\''; + julia_dq_str = + '"' ([^"] | '"' [^"] @{ fhold; }) @{ fhold; } + ([^\r\n\f"\\] | '\\' nonnewline)* '"'; + julia_string = (julia_sq_str | julia_dq_str) @code; + + julia_line := |* + spaces ${ entity = JULIA_SPACE; } => julia_ccallback; + julia_comment; + julia_string; + newline ${ entity = NEWLINE; } => julia_ccallback; + ^space ${ entity = JULIA_ANY; } => julia_ccallback; + *|; + + # Entity machine + + action julia_ecallback { + callback(JULIA_LANG, julia_entities[entity], cint(ts), cint(te), userdata); + } + + julia_line_comment_entity = '#' nonnewline*; + julia_block_comment_entity = '#=' any* :>> '=#'; + julia_sq_doc_str_entity = '\'\'\'' any* :>> '\'\'\''; + julia_dq_doc_str_entity = '"""' any* :>> '"""'; + julia_comment_entity = julia_line_comment_entity | julia_block_comment_entity | + julia_sq_doc_str_entity | julia_dq_doc_str_entity; + + julia_entity := |* + space+ ${ entity = JULIA_SPACE; } => julia_ecallback; + julia_comment_entity ${ entity = JULIA_COMMENT; } => julia_ecallback; + ^space; + *|; +}%% + +/************************* Required for every parser *************************/ + +/* Parses a string buffer with julia code. + * + * @param *buffer The string to parse. + * @param length The length of the string to parse. + * @param count Integer flag specifying whether or not to count lines. If yes, + * uses the Ragel machine optimized for counting. Otherwise uses the Ragel + * machine optimized for returning entity positions. + * @param *callback Callback function. If count is set, callback is called for + * every line of code, comment, or blank with 'lcode', 'lcomment', and + * 'lblank' respectively. Otherwise callback is called for each entity found. + */ +void parse_julia(char *buffer, int length, int count, + void (*callback) (const char *lang, const char *entity, int s, + int e, void *udata), + void *userdata + ) { + init + + %% write init; + cs = (count) ? julia_en_julia_line : julia_en_julia_entity; + %% write exec; + + // if no newline at EOF; callback contents of last line + if (count) { process_last_line(JULIA_LANG) } +} + +#endif + +/*****************************************************************************/ diff --git a/test/expected_dir/julia.jl b/test/expected_dir/julia.jl new file mode 100644 index 0000000..0b4efad --- /dev/null +++ b/test/expected_dir/julia.jl @@ -0,0 +1,61 @@ +julia code module SampleOperations +julia blank +julia code export ∂x, ∂y, @binary, @multiary +julia code export Δx, Δy, Ax, Ay, volume +julia blank +julia code using Base: @propagate_inbounds +julia blank +julia code import Foobar.BoundaryConditions: fill_halo_regions! +julia blank +julia comment ##### +julia comment ##### Regex +julia code regex = r"\bhello\b" +julia code string = "this is not a comment" +julia code ismatch = occursin(regex, "hello world") +julia blank +julia code abstract type AbstractOperation{LX, LY, LZ, G, T} <: AbstractField{LX, LY, LZ, G, T, 3} end +julia code const AF = AbstractField # used in unary_operations.jl, binary_operations.jl, etc +julia blank +julia code function Base.axes(f::AbstractOperation) +julia code if idx === (:, : ,:) +julia code return Base.OneTo.(size(f)) +julia code else +julia code return Tuple(idx[i] isa Colon ? Base.OneTo(size(f, i)) : idx[i] for i = 1:3) +julia code end +julia code end +julia blank +julia code @inline fill_halo_regions!(::AbstractOperation, args...; kwargs...) = nothing +julia code architecture(a::AbstractOperation) = architecture(a.grid) +julia blank +julia comment """ +julia comment at(loc, abstract_operation) +julia comment multline string literal +julia comment """ +julia code at(loc, f) = f # fallback +julia blank +julia code include("barfoo.jl") +julia blank +julia comment # Make some operators! +julia comment # +julia comment # Some operators: +julia code import Base: sqrt, sin, cos, exp, tanh, abs, -, +, /, ^, * +julia blank +julia code @unary sqrt sin cos exp tanh abs +julia code @unary - +julia code @unary + +julia blank +julia code @binary / +julia code @binary ^ +julia blank +julia comment #= +julia comment Some multiline +julia comment comment here +julia comment =# +julia blank +julia code atan(1 #=inline comment=#,0) +julia blank +julia comment # Another multiline +julia comment # comment here +julia code import Base: * +julia blank +julia code end # module diff --git a/test/src_dir/julia.jl b/test/src_dir/julia.jl new file mode 100644 index 0000000..6af8f84 --- /dev/null +++ b/test/src_dir/julia.jl @@ -0,0 +1,61 @@ +module SampleOperations + +export ∂x, ∂y, @binary, @multiary +export Δx, Δy, Ax, Ay, volume + +using Base: @propagate_inbounds + +import Foobar.BoundaryConditions: fill_halo_regions! + +##### +##### Regex +regex = r"\bhello\b" +string = "this is not a comment" +ismatch = occursin(regex, "hello world") + +abstract type AbstractOperation{LX, LY, LZ, G, T} <: AbstractField{LX, LY, LZ, G, T, 3} end +const AF = AbstractField # used in unary_operations.jl, binary_operations.jl, etc + +function Base.axes(f::AbstractOperation) + if idx === (:, : ,:) + return Base.OneTo.(size(f)) + else + return Tuple(idx[i] isa Colon ? Base.OneTo(size(f, i)) : idx[i] for i = 1:3) + end +end + +@inline fill_halo_regions!(::AbstractOperation, args...; kwargs...) = nothing +architecture(a::AbstractOperation) = architecture(a.grid) + +""" + at(loc, abstract_operation) +multline string literal +""" +at(loc, f) = f # fallback + +include("barfoo.jl") + +# Make some operators! +# +# Some operators: +import Base: sqrt, sin, cos, exp, tanh, abs, -, +, /, ^, * + +@unary sqrt sin cos exp tanh abs +@unary - +@unary + + +@binary / +@binary ^ + +#= +Some multiline +comment here +=# + +atan(1 #=inline comment=#,0) + +# Another multiline +# comment here +import Base: * + +end # module diff --git a/test/unit/detector_test.h b/test/unit/detector_test.h index 82a5392..dfaa604 100755 --- a/test/unit/detector_test.h +++ b/test/unit/detector_test.h @@ -124,6 +124,7 @@ void test_detector_detect_polyglot() { ASSERT_DETECT(LANG_CPP, "uses_cpp_headers.h"); ASSERT_DETECT(LANG_CPP, "uses_cpp_stdlib_headers.h"); ASSERT_DETECT(LANG_CPP, "uses_cpp_keywords.h"); + ASSERT_DETECT(LANG_JULIA, "foo.jl"); ASSERT_DETECT(LANG_RUBY, "foo.rb"); ASSERT_DETECT(LANG_CRYSTAL, "foo.cr"); ASSERT_DETECT(LANG_MAKE, "foo.mk"); diff --git a/test/unit/parser_test.h b/test/unit/parser_test.h index 381fcb1..8d1e8b1 100644 --- a/test/unit/parser_test.h +++ b/test/unit/parser_test.h @@ -122,6 +122,7 @@ void test_parser_verify_entity(SourceFile *sf, const char *entity, #include "parsers/test_jam.h" #include "parsers/test_java.h" #include "parsers/test_javascript.h" +#include "parsers/test_julia.h" #include "parsers/test_jsp.h" #include "parsers/test_lisp.h" #include "parsers/test_livecode.h" @@ -317,6 +318,7 @@ void all_parser_tests() { all_jam_tests(); all_java_tests(); all_javascript_tests(); + all_julia_tests(); all_jsp_tests(); all_lisp_tests(); all_livecode_tests(); diff --git a/test/unit/parsers/test_julia.h b/test/unit/parsers/test_julia.h new file mode 100644 index 0000000..1bbe55b --- /dev/null +++ b/test/unit/parsers/test_julia.h @@ -0,0 +1,23 @@ + +void test_julia_comments() { + test_parser_verify_parse( + test_parser_sourcefile("julia", " #comment"), + "julia", "", "#comment", 0 + ); +} + +void test_julia_comment_entities() { + test_parser_verify_entity( + test_parser_sourcefile("julia", " #comment"), + "comment", "#comment" + ); + test_parser_verify_entity( + test_parser_sourcefile("julia", "#=\ncomment\n=#"), + "comment", "#=\ncomment\n=#" + ); +} + +void all_julia_tests() { + test_julia_comments(); + test_julia_comment_entities(); +}