From 11799dcf275a39327707da89eb45dca0eedbb8cf Mon Sep 17 00:00:00 2001 From: Marc Cataford Date: Fri, 2 Oct 2020 23:00:38 -0400 Subject: [PATCH] chore: package reorg --- src/cli.py | 24 ------------ src/codesearch/__init__.py | 0 src/{ => codesearch}/base.py | 0 src/codesearch/cli.py | 30 +++++++++++++++ src/{ => codesearch}/client.py | 6 ++- src/{ => codesearch}/colors.py | 0 src/{ => codesearch}/constants.py | 0 src/{ => codesearch}/document_models.py | 0 src/{ => codesearch}/indexer.py | 6 ++- src/{ => codesearch}/line_index.py | 0 src/{ => codesearch}/logger.py | 0 src/{ => codesearch}/prefix_tree.py | 8 +++- src/{ => codesearch}/process_utils.py | 0 src/{ => codesearch}/server.py | 10 ++--- src/{ => codesearch}/settings.py | 0 src/codesearch/test_indexer.py | 49 ++++++++++++++++++++++++ src/{ => codesearch}/test_prefix_tree.py | 38 ++++++++++-------- src/{ => codesearch}/trigram_index.py | 0 src/{ => codesearch}/watcher.py | 2 +- src/setup.py | 9 +++++ 20 files changed, 131 insertions(+), 51 deletions(-) delete mode 100644 src/cli.py create mode 100644 src/codesearch/__init__.py rename src/{ => codesearch}/base.py (100%) create mode 100644 src/codesearch/cli.py rename src/{ => codesearch}/client.py (97%) rename src/{ => codesearch}/colors.py (100%) rename src/{ => codesearch}/constants.py (100%) rename src/{ => codesearch}/document_models.py (100%) rename src/{ => codesearch}/indexer.py (96%) rename src/{ => codesearch}/line_index.py (100%) rename src/{ => codesearch}/logger.py (100%) rename src/{ => codesearch}/prefix_tree.py (88%) rename src/{ => codesearch}/process_utils.py (100%) rename src/{ => codesearch}/server.py (91%) rename src/{ => codesearch}/settings.py (100%) create mode 100644 src/codesearch/test_indexer.py rename src/{ => codesearch}/test_prefix_tree.py (82%) rename src/{ => codesearch}/trigram_index.py (100%) rename src/{ => codesearch}/watcher.py (87%) create mode 100644 src/setup.py diff --git a/src/cli.py b/src/cli.py deleted file mode 100644 index 29ae400..0000000 --- a/src/cli.py +++ /dev/null @@ -1,24 +0,0 @@ -import argparse - -from pathlib import Path -from server import Server -from indexer import Indexer -from client import search -from settings import settings - -parser = argparse.ArgumentParser() - -parser.add_argument("command") -parser.add_argument("--q", required=False) - -args = parser.parse_args() - -if args.command == "start": - watched = [Path(p).expanduser() for p in settings.WATCHED] - server = Server( - indexer=Indexer(domain=watched), - watched=watched, - ) - server.run() -elif args.command == "search": - search(args.q) diff --git a/src/codesearch/__init__.py b/src/codesearch/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/base.py b/src/codesearch/base.py similarity index 100% rename from src/base.py rename to src/codesearch/base.py diff --git a/src/codesearch/cli.py b/src/codesearch/cli.py new file mode 100644 index 0000000..5f66ddb --- /dev/null +++ b/src/codesearch/cli.py @@ -0,0 +1,30 @@ +import argparse + +from pathlib import Path +from .server import Server +from .indexer import Indexer +from .client import search +from .settings import settings + + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument("command") + parser.add_argument("--q", required=False) + + args = parser.parse_args() + + if args.command == "start": + watched = [Path(p).expanduser() for p in settings.WATCHED] + server = Server( + indexer=Indexer( + domain=watched, + exclusions=settings.EXCLUDES, + file_types=settings.FILE_TYPES, + ), + watched=watched, + ) + server.run() + elif args.command == "search": + search(args.q) diff --git a/src/client.py b/src/codesearch/client.py similarity index 97% rename from src/client.py rename to src/codesearch/client.py index f9a53e4..e5291e4 100644 --- a/src/client.py +++ b/src/codesearch/client.py @@ -1,12 +1,14 @@ import socket import json -from settings import settings from pathlib import Path -from colors import highlight import curses +from .settings import settings +from .colors import highlight + + def display_handler(stdscr, buffer): current_y = 0 stdscr.refresh() diff --git a/src/colors.py b/src/codesearch/colors.py similarity index 100% rename from src/colors.py rename to src/codesearch/colors.py diff --git a/src/constants.py b/src/codesearch/constants.py similarity index 100% rename from src/constants.py rename to src/codesearch/constants.py diff --git a/src/document_models.py b/src/codesearch/document_models.py similarity index 100% rename from src/document_models.py rename to src/codesearch/document_models.py diff --git a/src/indexer.py b/src/codesearch/indexer.py similarity index 96% rename from src/indexer.py rename to src/codesearch/indexer.py index af7e9af..c4090fa 100644 --- a/src/indexer.py +++ b/src/codesearch/indexer.py @@ -43,6 +43,8 @@ class Indexer(IndexerBase): _trigram_index = attr.ib(default=attr.Factory(TrigramIndex)) _line_index = attr.ib(default=attr.Factory(LineIndex)) + _exclusions = attr.ib(default=attr.Factory(list)) + _file_types = attr.ib(default=attr.Factory(list)) # Document corpus corpus = attr.ib(default=attr.Factory(Corpus)) domain = attr.ib(default=attr.Factory(list)) @@ -120,7 +122,7 @@ class Indexer(IndexerBase): current = Path(path_root) # Avoid any excluded paths - if any([current.match(x) for x in settings.EXCLUDES]): + if any([current.match(x) for x in self._exclusions]): logger.info(f"{path_root} excluded.", prefix="Discovery") return [] @@ -130,7 +132,7 @@ class Indexer(IndexerBase): return collected - if current.suffix not in settings.FILE_TYPES: + if current.suffix not in self._file_types: return [] logger.info(f"Collected {path_root}", prefix="Discovery") diff --git a/src/line_index.py b/src/codesearch/line_index.py similarity index 100% rename from src/line_index.py rename to src/codesearch/line_index.py diff --git a/src/logger.py b/src/codesearch/logger.py similarity index 100% rename from src/logger.py rename to src/codesearch/logger.py diff --git a/src/prefix_tree.py b/src/codesearch/prefix_tree.py similarity index 88% rename from src/prefix_tree.py rename to src/codesearch/prefix_tree.py index 16ceb67..3eb2ea0 100644 --- a/src/prefix_tree.py +++ b/src/codesearch/prefix_tree.py @@ -2,6 +2,7 @@ import json import attr + @attr.s class PrefixTree: root = attr.ib() @@ -49,7 +50,6 @@ class PrefixTree: def to_json(self): return json.dumps(self.to_dict()) - @attr.s @@ -59,4 +59,8 @@ class PrefixTreeNode: children = attr.ib(default=attr.Factory(dict)) def to_dict(self): - return {"value": self.value, "mappings": self.mappings, "children": [child.to_dict() for child in self.children.values()]} + return { + "value": self.value, + "mappings": self.mappings, + "children": [child.to_dict() for child in self.children.values()], + } diff --git a/src/process_utils.py b/src/codesearch/process_utils.py similarity index 100% rename from src/process_utils.py rename to src/codesearch/process_utils.py diff --git a/src/server.py b/src/codesearch/server.py similarity index 91% rename from src/server.py rename to src/codesearch/server.py index e09fadd..bebf1b7 100644 --- a/src/server.py +++ b/src/codesearch/server.py @@ -2,13 +2,13 @@ import json import socket import pyinotify import attr -from watcher import WatchHandler -from indexer import Indexer -from constants import QUERY_STRING_LENGTH +from codesearch.watcher import WatchHandler +from codesearch.indexer import Indexer +from codesearch.constants import QUERY_STRING_LENGTH from pathlib import Path -from settings import settings +from codesearch.settings import settings -from logger import get_logger +from codesearch.logger import get_logger logger = get_logger(__name__) diff --git a/src/settings.py b/src/codesearch/settings.py similarity index 100% rename from src/settings.py rename to src/codesearch/settings.py diff --git a/src/codesearch/test_indexer.py b/src/codesearch/test_indexer.py new file mode 100644 index 0000000..e0d3960 --- /dev/null +++ b/src/codesearch/test_indexer.py @@ -0,0 +1,49 @@ +import pytest + +from .indexer import Indexer + + +@pytest.fixture() +def indexer(): + return Indexer() + + +def test_indexer_builds_trigram_set_for_given_document(indexer): + mock_document = "now that's a doc" + mock_path = "/home/documents/cool_doc" + + indexer.index(path=mock_path, content=mock_document) + + expected_trigrams = [ + "now", + "ow ", + "w t", + " th", + "tha", + "hat", + "at'", + "t's", + "'s ", + "s a", + " a ", + "a d", + " do", + "doc", + ] + + assert indexer.trigrams == {mock_path: set(expected_trigrams)} + + +def test_indexer_preserves_previous_trigram_sets_on_index(indexer): + mock_document_1 = "wow" + mock_document_2 = "woa" + mock_path_1 = "/home" + mock_path_2 = "/somewhere_else" + + indexer.index(path=mock_path_1, content=mock_document_1) + + assert indexer.trigrams == {mock_path_1: set(["wow"])} + + indexer.index(path=mock_path_2, content=mock_document_2) + + assert indexer.trigrams == {mock_path_1: set(["wow"]), mock_path_2: set(["woa"])} diff --git a/src/test_prefix_tree.py b/src/codesearch/test_prefix_tree.py similarity index 82% rename from src/test_prefix_tree.py rename to src/codesearch/test_prefix_tree.py index ea12c98..2d52ac6 100644 --- a/src/test_prefix_tree.py +++ b/src/codesearch/test_prefix_tree.py @@ -2,58 +2,66 @@ import pytest from .prefix_tree import PrefixTree + @pytest.fixture def prefix_tree(): return PrefixTree.initialize() + def test_base_tree_has_a_root_node(prefix_tree, snapshot): assert prefix_tree.to_dict() == snapshot + def test_insert_single_string(prefix_tree, snapshot): - mock_value = 'abc' - mock_key = 'key_1' + mock_value = "abc" + mock_key = "key_1" prefix_tree.insert(value=mock_value, key=mock_key) assert prefix_tree.to_dict() == snapshot assert prefix_tree.get(value=mock_value) == [mock_key] + def test_insert_single_character_(prefix_tree, snapshot): - mock_value = 'a' - mock_key = 'key_1' + mock_value = "a" + mock_key = "key_1" prefix_tree.insert(value=mock_value, key=mock_key) assert prefix_tree.to_dict() == snapshot assert prefix_tree.get(value=mock_value) == [mock_key] + def test_insert_overlapping_strings(prefix_tree, snapshot): - mock_value_1 = 'abcd' - mock_key_1 = 'key_1' - mock_value_2 = 'abce' - mock_key_2 = 'key_2' + mock_value_1 = "abcd" + mock_key_1 = "key_1" + mock_value_2 = "abce" + mock_key_2 = "key_2" prefix_tree.insert(value=mock_value_1, key=mock_key_1) prefix_tree.insert(value=mock_value_2, key=mock_key_2) assert prefix_tree.to_dict() == snapshot assert prefix_tree.get(value=mock_value_1) == [mock_key_1] assert prefix_tree.get(value=mock_value_2) == [mock_key_2] + def test_insert_multiple_keys_same_string(prefix_tree, snapshot): - mock_value = 'abcd' - mock_key_1 = 'key_1' - mock_key_2 = 'key_2' + mock_value = "abcd" + mock_key_1 = "key_1" + mock_key_2 = "key_2" prefix_tree.insert(value=mock_value, key=mock_key_1) prefix_tree.insert(value=mock_value, key=mock_key_2) assert prefix_tree.to_dict() == snapshot assert prefix_tree.get(value=mock_value) == [mock_key_1, mock_key_2] + def test_insert_strings_subsets_of_each_other(prefix_tree, snapshot): - mock_value_1 = 'abcd' - mock_key_1 = 'key_1' - mock_value_2 = 'abc' - mock_key_2 = 'key_2' + mock_value_1 = "abcd" + mock_key_1 = "key_1" + mock_value_2 = "abc" + mock_key_2 = "key_2" prefix_tree.insert(value=mock_value_1, key=mock_key_1) prefix_tree.insert(value=mock_value_2, key=mock_key_2) assert prefix_tree.to_dict() == snapshot assert prefix_tree.get(value=mock_value_1) == [mock_key_1] assert prefix_tree.get(value=mock_value_2) == [mock_key_2] + def test_serializes_to_json(prefix_tree, snapshot): prefix_tree.insert(value="abcd", key="key_1") assert prefix_tree.to_json() == snapshot diff --git a/src/trigram_index.py b/src/codesearch/trigram_index.py similarity index 100% rename from src/trigram_index.py rename to src/codesearch/trigram_index.py diff --git a/src/watcher.py b/src/codesearch/watcher.py similarity index 87% rename from src/watcher.py rename to src/codesearch/watcher.py index 8822dae..92c25db 100644 --- a/src/watcher.py +++ b/src/codesearch/watcher.py @@ -1,7 +1,7 @@ import pyinotify import attr -from logger import get_logger +from .logger import get_logger logger = get_logger(__name__) diff --git a/src/setup.py b/src/setup.py new file mode 100644 index 0000000..6d1bf11 --- /dev/null +++ b/src/setup.py @@ -0,0 +1,9 @@ +from setuptools import setup + +setup( + name="codesearch", + version="0.1", + packages=["codesearch"], + install_requires=["pyinotify", "attr"], + entry_points={"console_scripts": ["codesearch=codesearch.cli:main"]}, +)