chore: package reorg

This commit is contained in:
Marc Cataford 2020-10-02 23:00:38 -04:00
parent db7146bb31
commit 11799dcf27
20 changed files with 131 additions and 51 deletions

View file

@ -1,24 +0,0 @@
import argparse
from pathlib import Path
from server import Server
from indexer import Indexer
from client import search
from settings import settings
parser = argparse.ArgumentParser()
parser.add_argument("command")
parser.add_argument("--q", required=False)
args = parser.parse_args()
if args.command == "start":
watched = [Path(p).expanduser() for p in settings.WATCHED]
server = Server(
indexer=Indexer(domain=watched),
watched=watched,
)
server.run()
elif args.command == "search":
search(args.q)

View file

30
src/codesearch/cli.py Normal file
View file

@ -0,0 +1,30 @@
import argparse
from pathlib import Path
from .server import Server
from .indexer import Indexer
from .client import search
from .settings import settings
def main():
parser = argparse.ArgumentParser()
parser.add_argument("command")
parser.add_argument("--q", required=False)
args = parser.parse_args()
if args.command == "start":
watched = [Path(p).expanduser() for p in settings.WATCHED]
server = Server(
indexer=Indexer(
domain=watched,
exclusions=settings.EXCLUDES,
file_types=settings.FILE_TYPES,
),
watched=watched,
)
server.run()
elif args.command == "search":
search(args.q)

View file

@ -1,12 +1,14 @@
import socket
import json
from settings import settings
from pathlib import Path
from colors import highlight
import curses
from .settings import settings
from .colors import highlight
def display_handler(stdscr, buffer):
current_y = 0
stdscr.refresh()

View file

@ -43,6 +43,8 @@ class Indexer(IndexerBase):
_trigram_index = attr.ib(default=attr.Factory(TrigramIndex))
_line_index = attr.ib(default=attr.Factory(LineIndex))
_exclusions = attr.ib(default=attr.Factory(list))
_file_types = attr.ib(default=attr.Factory(list))
# Document corpus
corpus = attr.ib(default=attr.Factory(Corpus))
domain = attr.ib(default=attr.Factory(list))
@ -120,7 +122,7 @@ class Indexer(IndexerBase):
current = Path(path_root)
# Avoid any excluded paths
if any([current.match(x) for x in settings.EXCLUDES]):
if any([current.match(x) for x in self._exclusions]):
logger.info(f"{path_root} excluded.", prefix="Discovery")
return []
@ -130,7 +132,7 @@ class Indexer(IndexerBase):
return collected
if current.suffix not in settings.FILE_TYPES:
if current.suffix not in self._file_types:
return []
logger.info(f"Collected {path_root}", prefix="Discovery")

View file

@ -2,6 +2,7 @@ import json
import attr
@attr.s
class PrefixTree:
root = attr.ib()
@ -49,7 +50,6 @@ class PrefixTree:
def to_json(self):
return json.dumps(self.to_dict())
@attr.s
@ -59,4 +59,8 @@ class PrefixTreeNode:
children = attr.ib(default=attr.Factory(dict))
def to_dict(self):
return {"value": self.value, "mappings": self.mappings, "children": [child.to_dict() for child in self.children.values()]}
return {
"value": self.value,
"mappings": self.mappings,
"children": [child.to_dict() for child in self.children.values()],
}

View file

@ -2,13 +2,13 @@ import json
import socket
import pyinotify
import attr
from watcher import WatchHandler
from indexer import Indexer
from constants import QUERY_STRING_LENGTH
from codesearch.watcher import WatchHandler
from codesearch.indexer import Indexer
from codesearch.constants import QUERY_STRING_LENGTH
from pathlib import Path
from settings import settings
from codesearch.settings import settings
from logger import get_logger
from codesearch.logger import get_logger
logger = get_logger(__name__)

View file

@ -0,0 +1,49 @@
import pytest
from .indexer import Indexer
@pytest.fixture()
def indexer():
return Indexer()
def test_indexer_builds_trigram_set_for_given_document(indexer):
mock_document = "now that's a doc"
mock_path = "/home/documents/cool_doc"
indexer.index(path=mock_path, content=mock_document)
expected_trigrams = [
"now",
"ow ",
"w t",
" th",
"tha",
"hat",
"at'",
"t's",
"'s ",
"s a",
" a ",
"a d",
" do",
"doc",
]
assert indexer.trigrams == {mock_path: set(expected_trigrams)}
def test_indexer_preserves_previous_trigram_sets_on_index(indexer):
mock_document_1 = "wow"
mock_document_2 = "woa"
mock_path_1 = "/home"
mock_path_2 = "/somewhere_else"
indexer.index(path=mock_path_1, content=mock_document_1)
assert indexer.trigrams == {mock_path_1: set(["wow"])}
indexer.index(path=mock_path_2, content=mock_document_2)
assert indexer.trigrams == {mock_path_1: set(["wow"]), mock_path_2: set(["woa"])}

View file

@ -2,58 +2,66 @@ import pytest
from .prefix_tree import PrefixTree
@pytest.fixture
def prefix_tree():
return PrefixTree.initialize()
def test_base_tree_has_a_root_node(prefix_tree, snapshot):
assert prefix_tree.to_dict() == snapshot
def test_insert_single_string(prefix_tree, snapshot):
mock_value = 'abc'
mock_key = 'key_1'
mock_value = "abc"
mock_key = "key_1"
prefix_tree.insert(value=mock_value, key=mock_key)
assert prefix_tree.to_dict() == snapshot
assert prefix_tree.get(value=mock_value) == [mock_key]
def test_insert_single_character_(prefix_tree, snapshot):
mock_value = 'a'
mock_key = 'key_1'
mock_value = "a"
mock_key = "key_1"
prefix_tree.insert(value=mock_value, key=mock_key)
assert prefix_tree.to_dict() == snapshot
assert prefix_tree.get(value=mock_value) == [mock_key]
def test_insert_overlapping_strings(prefix_tree, snapshot):
mock_value_1 = 'abcd'
mock_key_1 = 'key_1'
mock_value_2 = 'abce'
mock_key_2 = 'key_2'
mock_value_1 = "abcd"
mock_key_1 = "key_1"
mock_value_2 = "abce"
mock_key_2 = "key_2"
prefix_tree.insert(value=mock_value_1, key=mock_key_1)
prefix_tree.insert(value=mock_value_2, key=mock_key_2)
assert prefix_tree.to_dict() == snapshot
assert prefix_tree.get(value=mock_value_1) == [mock_key_1]
assert prefix_tree.get(value=mock_value_2) == [mock_key_2]
def test_insert_multiple_keys_same_string(prefix_tree, snapshot):
mock_value = 'abcd'
mock_key_1 = 'key_1'
mock_key_2 = 'key_2'
mock_value = "abcd"
mock_key_1 = "key_1"
mock_key_2 = "key_2"
prefix_tree.insert(value=mock_value, key=mock_key_1)
prefix_tree.insert(value=mock_value, key=mock_key_2)
assert prefix_tree.to_dict() == snapshot
assert prefix_tree.get(value=mock_value) == [mock_key_1, mock_key_2]
def test_insert_strings_subsets_of_each_other(prefix_tree, snapshot):
mock_value_1 = 'abcd'
mock_key_1 = 'key_1'
mock_value_2 = 'abc'
mock_key_2 = 'key_2'
mock_value_1 = "abcd"
mock_key_1 = "key_1"
mock_value_2 = "abc"
mock_key_2 = "key_2"
prefix_tree.insert(value=mock_value_1, key=mock_key_1)
prefix_tree.insert(value=mock_value_2, key=mock_key_2)
assert prefix_tree.to_dict() == snapshot
assert prefix_tree.get(value=mock_value_1) == [mock_key_1]
assert prefix_tree.get(value=mock_value_2) == [mock_key_2]
def test_serializes_to_json(prefix_tree, snapshot):
prefix_tree.insert(value="abcd", key="key_1")
assert prefix_tree.to_json() == snapshot

View file

@ -1,7 +1,7 @@
import pyinotify
import attr
from logger import get_logger
from .logger import get_logger
logger = get_logger(__name__)

9
src/setup.py Normal file
View file

@ -0,0 +1,9 @@
from setuptools import setup
setup(
name="codesearch",
version="0.1",
packages=["codesearch"],
install_requires=["pyinotify", "attr"],
entry_points={"console_scripts": ["codesearch=codesearch.cli:main"]},
)