From 4330bc62b5408657368da21c509e90e34023e2da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Henrique=20Guard=C3=A3o=20Gandarez?= Date: Mon, 19 May 2025 11:32:59 -0300 Subject: [PATCH] Detect Forth over F# --- pkg/language/forth.go | 33 ---------- pkg/language/fsharp.go | 26 -------- pkg/language/language.go | 63 ++++--------------- pkg/language/language_test.go | 4 +- pkg/language/priority.go | 8 ++- .../codefiles/{fsharp.fs => forth.fs} | 0 6 files changed, 19 insertions(+), 115 deletions(-) delete mode 100644 pkg/language/forth.go delete mode 100644 pkg/language/fsharp.go rename pkg/language/testdata/codefiles/{fsharp.fs => forth.fs} (100%) diff --git a/pkg/language/forth.go b/pkg/language/forth.go deleted file mode 100644 index 6086caec..00000000 --- a/pkg/language/forth.go +++ /dev/null @@ -1,33 +0,0 @@ -package language - -import ( - "regexp" - "strings" - - "github.com/wakatime/wakatime-cli/pkg/heartbeat" -) - -var forthFuncTest = regexp.MustCompile(`:[^\n\r]+;[\n\r]`) - -// detectForthFromContents tries to detect the language from the file contents. -func detectForthFromContents(text string) (heartbeat.Language, float32, bool) { - var weight float32 - - if forthFuncTest.MatchString(text) { - weight = 0.9 - } - - if strings.Contains(text, "\\ ") { - weight += 0.5 - } - - if strings.Contains(text, "( ") { - weight += 0.2 - } - - if weight > 1 { - weight = 1 - } - - return heartbeat.LanguageUnknown, weight, weight > 0 -} diff --git a/pkg/language/fsharp.go b/pkg/language/fsharp.go deleted file mode 100644 index c0016fb7..00000000 --- a/pkg/language/fsharp.go +++ /dev/null @@ -1,26 +0,0 @@ -package language - -import ( - "strings" - - "github.com/wakatime/wakatime-cli/pkg/heartbeat" -) - -// detectFSharpFromContents tries to detect the language from the file contents. -func detectFSharpFromContents(text string) (heartbeat.Language, float32, bool) { - var weight float32 - - if strings.Contains(text, "let ") && strings.Contains(text, "match ") && strings.Contains(text, " ->") { - weight = 0.9 - } - - if strings.Contains(text, "// ") || strings.Contains(text, "(* ") && strings.Contains(text, " *)") { - weight += 0.7 - } - - if weight > 1 { - weight = 1 - } - - return heartbeat.LanguageUnknown, weight, weight > 0 -} diff --git a/pkg/language/language.go b/pkg/language/language.go index 51709c76..60e315f6 100644 --- a/pkg/language/language.go +++ b/pkg/language/language.go @@ -3,12 +3,10 @@ package language import ( "context" "fmt" - "io" "os" "path/filepath" "strings" - "github.com/wakatime/wakatime-cli/pkg/file" "github.com/wakatime/wakatime-cli/pkg/heartbeat" "github.com/wakatime/wakatime-cli/pkg/log" ) @@ -63,6 +61,8 @@ func WithDetection(config Config) heartbeat.HandleOption { // Detect detects the language of a specific file. If guessLanguage is true, // Chroma will be used to detect a language from the file contents. func Detect(ctx context.Context, fp string, guessLanguage bool) (heartbeat.Language, error) { + logger := log.Extract(ctx) + if language, ok := detectSpecialCases(ctx, fp); ok { return language, nil } @@ -74,7 +74,16 @@ func Detect(ctx context.Context, fp string, guessLanguage bool) (heartbeat.Langu language = languageChroma } - language = detectOverrideCases(ctx, fp, language, weight) + head, err := fileHead(ctx, fp) + if err != nil { + logger.Warnf("failed to load head from file %q: %s", fp, err) + } + + languageVim, weightVim, okVim := detectVimModeline(string(head)) + if okVim && weightVim > weight { + // use language from vim modeline, if weight is higher + language = languageVim + } if language == heartbeat.LanguageUnknown { return heartbeat.LanguageUnknown, fmt.Errorf("could not detect the language of file %q", fp) @@ -129,54 +138,6 @@ func detectSpecialCases(ctx context.Context, fp string) (heartbeat.Language, boo return heartbeat.LanguageUnknown, false } -// detectOverrideCases overwrides the Chroma detected language based on file contents. -func detectOverrideCases(ctx context.Context, fp string, language heartbeat.Language, weight float32) heartbeat.Language { - logger := log.Extract(ctx) - - f, err := file.OpenNoLock(fp) // nolint:gosec - if err != nil { - logger.Debugf("failed to open file: %s", err) - return language - } - - defer func() { - if err := f.Close(); err != nil { - logger.Debugf("failed to close file: %s", err) - } - }() - - buf := make([]byte, 4096) - c, err := f.Read(buf) - if err != nil && err != io.EOF { - logger.Debugf("failed to open file: %s", err) - return language - } - - text := string(buf[:c]) - - languageVim, weightVim, okVim := detectVimModeline(text) - if okVim && weightVim > weight { - language = languageVim - } - - _, file := filepath.Split(fp) - ext := strings.ToLower(filepath.Ext(file)) - - if ext == ".fs" { - languageForth, weightForth, okForth := detectForthFromContents(text) - if okForth && weightForth >= weight { - language = languageForth - } - - languageFSharp, weightFSharp, okFSharp := detectFSharpFromContents(text) - if okFSharp && weightFSharp >= weight { - language = languageFSharp - } - } - - return language -} - // folderContainsCFiles returns true, if filder contains c files. func folderContainsCFiles(ctx context.Context, dir string) bool { if dir == "" { diff --git a/pkg/language/language_test.go b/pkg/language/language_test.go index 002f951a..550e6c41 100644 --- a/pkg/language/language_test.go +++ b/pkg/language/language_test.go @@ -250,11 +250,11 @@ func TestDetect_Perl_Over_Prolog(t *testing.T) { assert.Equal(t, heartbeat.LanguagePerl, lang) } -func TestDetect_FSharp_Over_Forth(t *testing.T) { +func TestDetect_Forth_Over_FSharp(t *testing.T) { lang, err := language.Detect(t.Context(), "testdata/codefiles/fsharp.fs", false) require.NoError(t, err) - assert.Equal(t, heartbeat.LanguageFSharp, lang) + assert.Equal(t, heartbeat.LanguageForth, lang) } func TestDetect_ChromaTopLanguagesRetrofit(t *testing.T) { diff --git a/pkg/language/priority.go b/pkg/language/priority.go index 112c1f5c..43674d02 100644 --- a/pkg/language/priority.go +++ b/pkg/language/priority.go @@ -3,16 +3,18 @@ package language func priority(lang string) (float32, bool) { prios := map[string]float32{ "FSharp": 0.01, - // Higher priority than the ca 65 assembler and ArmAsm + // Higher priority than FSharp + "Forth": 0.05, + // Higher priority than ca 65 assembler and ArmAsm "GAS": 0.1, - // Higher priority than the ca Inform 6 + // Higher priority than ca Inform 6 "INI": 0.1, // TASM uses the same file endings, but TASM is not as common as NASM, so we prioritize NASM higher by default. "NASM": 0.1, "Perl": 0.01, // Higher priority than Rebol "R": 0.1, - // Higher priority than the TypoScript, as TypeScript is far more + // Higher priority than TypoScript, as TypeScript is far more // common these days "TypeScript": 0.5, } diff --git a/pkg/language/testdata/codefiles/fsharp.fs b/pkg/language/testdata/codefiles/forth.fs similarity index 100% rename from pkg/language/testdata/codefiles/fsharp.fs rename to pkg/language/testdata/codefiles/forth.fs