From cb0a5c7a36122a053c6c86cab145aa4c0336f5ba Mon Sep 17 00:00:00 2001 From: Jacob Segal Date: Sun, 16 Dec 2018 20:47:35 -0800 Subject: [PATCH] Improve perf for compile dbs in large projects When using a compilation database (compile_commands.json) in very large projects, significant delays would occur when changing files -- particularly those that happened to be far down the db. Rather than iterating over the whole list every time, we now build up a lookup table based on the tail of the filename (and tail of the directory for widening searches) and iterate over the much smaller list of compile commands for files with the given name. Test metrics (from compile_database_perf/test.sh) show a 90% performance improvement -- from 25 seconds to 2.5 seconds per run. --- autoload/ale/c.vim | 50 +++++++++++++++++++++--------- test/compile_database_perf/test.sh | 29 +++++++++++++++++ test/test_c_flag_parsing.vader | 21 +++++++++++-- 3 files changed, 83 insertions(+), 17 deletions(-) create mode 100755 test/compile_database_perf/test.sh diff --git a/autoload/ale/c.vim b/autoload/ale/c.vim index 617e81f6..746d19b0 100644 --- a/autoload/ale/c.vim +++ b/autoload/ale/c.vim @@ -157,15 +157,17 @@ if !exists('s:compile_commands_cache') let s:compile_commands_cache = {} endif -function! s:GetListFromCompileCommandsFile(compile_commands_file) abort +function! s:GetLookupFromCompileCommandsFile(compile_commands_file) abort + let l:empty = [{}, {}] + if empty(a:compile_commands_file) - return [] + return l:empty endif let l:time = getftime(a:compile_commands_file) if l:time < 0 - return [] + return l:empty endif let l:key = a:compile_commands_file . ':' . l:time @@ -174,21 +176,36 @@ function! s:GetListFromCompileCommandsFile(compile_commands_file) abort return s:compile_commands_cache[l:key] endif - let l:data = [] - silent! let l:data = json_decode(join(readfile(a:compile_commands_file), '')) + let l:raw_data = [] + silent! let l:raw_data = json_decode(join(readfile(a:compile_commands_file), '')) - if !empty(l:data) - let s:compile_commands_cache[l:key] = l:data + let l:file_lookup = {} + let l:dir_lookup = {} - return l:data + for l:entry in l:raw_data + let l:basename = tolower(fnamemodify(l:entry.file, ':t')) + let l:file_lookup[l:basename] = get(l:file_lookup, l:basename, []) + [l:entry] + + let l:dirbasename = tolower(fnamemodify(l:entry.directory, ':p:h:t')) + let l:dir_lookup[l:dirbasename] = get(l:dir_lookup, l:basename, []) + [l:entry] + endfor + + if !empty(l:file_lookup) && !empty(l:dir_lookup) + let l:result = [l:file_lookup, l:dir_lookup] + let s:compile_commands_cache[l:key] = l:result + + return l:result endif - return [] + return l:empty endfunction -function! ale#c#ParseCompileCommandsFlags(buffer, dir, json_list) abort +function! ale#c#ParseCompileCommandsFlags(buffer, dir, file_lookup, dir_lookup) abort " Search for an exact file match first. - for l:item in a:json_list + let l:basename = tolower(expand('#' . a:buffer . ':t')) + let l:file_list = get(a:file_lookup, l:basename, []) + + for l:item in l:file_list if bufnr(l:item.file) is a:buffer return ale#c#ParseCFlags(a:dir, l:item.command) endif @@ -197,7 +214,10 @@ function! ale#c#ParseCompileCommandsFlags(buffer, dir, json_list) abort " Look for any file in the same directory if we can't find an exact match. let l:dir = ale#path#Simplify(expand('#' . a:buffer . ':p:h')) - for l:item in a:json_list + let l:dirbasename = tolower(expand('#' . a:buffer . ':p:h:t')) + let l:dir_list = get(a:dir_lookup, l:dirbasename, []) + + for l:item in l:dir_list if ale#path#Simplify(fnamemodify(l:item.file, ':h')) is? l:dir return ale#c#ParseCFlags(a:dir, l:item.command) endif @@ -208,9 +228,11 @@ endfunction function! ale#c#FlagsFromCompileCommands(buffer, compile_commands_file) abort let l:dir = ale#path#Dirname(a:compile_commands_file) - let l:json_list = s:GetListFromCompileCommandsFile(a:compile_commands_file) + let l:lookups = s:GetLookupFromCompileCommandsFile(a:compile_commands_file) + let l:file_lookup = l:lookups[0] + let l:dir_lookup = l:lookups[1] - return ale#c#ParseCompileCommandsFlags(a:buffer, l:dir, l:json_list) + return ale#c#ParseCompileCommandsFlags(a:buffer, l:dir, l:file_lookup, l:dir_lookup) endfunction function! ale#c#GetCFlags(buffer, output) abort diff --git a/test/compile_database_perf/test.sh b/test/compile_database_perf/test.sh new file mode 100755 index 00000000..15a2b442 --- /dev/null +++ b/test/compile_database_perf/test.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# Generate source files for ALE to read. They don't have to be very long, the delay is in reading compile_commands, not actually running tests +mkdir -p gen_src +for i in {1..400}; do echo "const char *GeneratedFunc${i}() { return \"Word ${i}\"; }" > gen_src/source${i}.cpp; done + +# Create the compile_commands database +echo "[ {" > compile_commands.json + +for i in {1..399}; do + { + echo "\"command\": \"clang++ -c $(pwd)/gen_src/source${i}.cpp -o $(pwd)/build/obj/Debug/source${i}.o -MF $(pwd)/build/obj/Debug/source${i}.d -MMD -MP\"," + echo "\"directory\": \"$(pwd)/build\"," + echo "\"file\": \"$(pwd)/gen_src/source${i}.cpp\"" + echo "}, {" + } >> compile_commands.json +done + +{ + echo "\"command\": \"clang++ -c $(pwd)/gen_src/source400.cpp -o $(pwd)/build/obj/Debug/source400.o -MF $(pwd)/build/obj/Debug/source400.d -MMD -MP\"," + echo "\"directory\": \"$(pwd)/build\"," + echo "\"file\": \"$(pwd)/gen_src/source400.cpp\"" + echo "} ]" +} >> compile_commands.json + +# Start up vim and switch back and forth between files -- at least one of the files must be near the bottom of compile_commands.json +time vim -c "for i in range(0,20) | edit gen_src/source10.cpp | edit gen_src/source400.cpp | endfor" \ + -c "noautocmd qa!" \ + `find . | grep "source..\.cpp"` diff --git a/test/test_c_flag_parsing.vader b/test/test_c_flag_parsing.vader index d3cb2b51..4c23c711 100644 --- a/test/test_c_flag_parsing.vader +++ b/test/test_c_flag_parsing.vader @@ -161,14 +161,14 @@ Execute(FlagsFromCompileCommands should tolerate empty values): AssertEqual '', ale#c#FlagsFromCompileCommands(bufnr(''), '') Execute(ParseCompileCommandsFlags should tolerate empty values): - AssertEqual '', ale#c#ParseCompileCommandsFlags(bufnr(''), '', []) + AssertEqual '', ale#c#ParseCompileCommandsFlags(bufnr(''), '', {}, {}) Execute(ParseCompileCommandsFlags should parse some basic flags): noautocmd execute 'file! ' . fnameescape(ale#path#Simplify('/foo/bar/xmms2-mpris/src/xmms2-mpris.c')) AssertEqual \ '-I' . ale#path#Simplify('/usr/include/xmms2'), - \ ale#c#ParseCompileCommandsFlags(bufnr(''), ale#path#Simplify('/foo/bar/xmms2-mpris'), [ + \ ale#c#ParseCompileCommandsFlags(bufnr(''), ale#path#Simplify('/foo/bar/xmms2-mpris'), { "xmms2-mpris.c": [ \ { \ 'directory': ale#path#Simplify('/foo/bar/xmms2-mpris'), \ 'command': '/usr/bin/cc -I' . ale#path#Simplify('/usr/include/xmms2') @@ -176,7 +176,22 @@ Execute(ParseCompileCommandsFlags should parse some basic flags): \ . ' -c ' . ale#path#Simplify('/foo/bar/xmms2-mpris/src/xmms2-mpris.c'), \ 'file': ale#path#Simplify('/foo/bar/xmms2-mpris/src/xmms2-mpris.c'), \ }, - \ ]) + \ ] }, {}) + +Execute(ParseCompileCommandsFlags should fall back to files in the same directory): + noautocmd execute 'file! ' . fnameescape(ale#path#Simplify('/foo/bar/xmms2-mpris/src/xmms2-mpris.c')) + + AssertEqual + \ '-I' . ale#path#Simplify('/usr/include/xmms2'), + \ ale#c#ParseCompileCommandsFlags(bufnr(''), ale#path#Simplify('/foo/bar/xmms2-mpris'), {}, { "src": [ + \ { + \ 'directory': ale#path#Simplify('/foo/bar/xmms2-mpris'), + \ 'command': '/usr/bin/cc -I' . ale#path#Simplify('/usr/include/xmms2') + \ . ' -o CMakeFiles/xmms2-mpris.dir/src/xmms2-mpris.c.o' + \ . ' -c ' . ale#path#Simplify('/foo/bar/xmms2-mpris/src/xmms2-mpris.c'), + \ 'file': ale#path#Simplify((has('win32') ? 'C:' : '') . '/foo/bar/xmms2-mpris/src/xmms2-other.c'), + \ }, + \ ] }) Execute(ParseCFlags should not merge flags): AssertEqual