wip: (dirty) buffer instead of large socket msg

This commit is contained in:
Marc Cataford 2020-09-28 16:46:10 -04:00
parent 806279edaa
commit b9841084a1
6 changed files with 43 additions and 15 deletions

View file

@ -1,8 +1,7 @@
import socket
import json
from settings import settings
from pathlib import Path
from colors import highlight
@ -10,8 +9,13 @@ def search(query):
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
s.connect((settings.SOCKET_HOST, settings.SOCKET_PORT))
s.sendall(query.encode())
length = int(s.recv(4).decode())
results = json.loads(s.recv(length).decode())
length = int(s.recv(8).decode())
results = None
with open(Path(settings.BUFFER_PATH).expanduser(), "rb") as infile:
results = infile.read().decode()
results = json.loads(results)
for result in results:
with open(result["key"], "r") as infile:

View file

@ -6,5 +6,6 @@ SETTINGS_KEYS = [
"FILE_TYPES",
"SIGNIFICANCE_THRESHOLD",
"INDEXING_PROCESSES",
"BUFFER_PATH",
]
QUERY_STRING_LENGTH = 1024

View file

@ -73,8 +73,18 @@ class Indexer(IndexerBase):
uniques = 0
for lead in leads:
uid, score = lead
lead_content = self.corpus.get_document(uid=uid).content
lead_path = self.corpus.get_document(uid=uid).key
lead_content = ""
try:
with open(lead_path, "r") as infile:
import mmap
m = mmap.mmap(infile.fileno(), 0, prot=mmap.PROT_READ)
lead_content = m.read().decode()
except Exception as e:
logger.warning(e)
logger.warning(f"No content in {lead_path}", prefix="Query")
results = re.finditer(query, lead_content)
hits_in_lead = []
for hit in results:
@ -130,7 +140,7 @@ class Indexer(IndexerBase):
try:
with open(discovered_file, "r") as infile:
content = infile.read()
self.corpus.add_document(key=discovered_file, content=content)
self.corpus.add_document(key=discovered_file, content="")
logger.info(f"Loaded {discovered_file} in memory", prefix="Preloading")
except Exception as e:
logger.warning(
@ -156,11 +166,18 @@ class Indexer(IndexerBase):
for uid in uids:
document = self.corpus.get_document(uid=uid)
path = document.key
content = document.content
trigrams[uid] = TrigramIndex.trigramize(content)
self._line_index.index(path, content)
logger.info(f"Processed {path}", prefix="Indexing")
# content = document.content
try:
with open(path, "r") as infile:
import mmap
m = mmap.mmap(infile.fileno(), 0, prot=mmap.PROT_READ)
content = m.read().decode()
trigrams[uid] = TrigramIndex.trigramize(content)
self._line_index.index(path, content)
logger.info(f"Processed {path}", prefix="Indexing")
except:
logger.warning(path, prefix="Indexing")
return (trigrams, self._line_index._lines)
def _find_closest_line(self, path, index):

View file

@ -1,7 +1,10 @@
from base import IndexBase
import attr
from logger import get_logger
logger = get_logger(__name__)
@attr.s
class LineIndex(IndexBase):

View file

@ -2,11 +2,10 @@ import json
import socket
import pyinotify
import attr
from watcher import WatchHandler
from indexer import Indexer
from constants import QUERY_STRING_LENGTH
from pathlib import Path
from settings import settings
from logger import get_logger
@ -38,12 +37,14 @@ class Server:
try:
query_results = self.indexer.query(query_string)
response = json.dumps(query_results).encode()
response_length = str(len(response))
response_length = str(len(response.decode()))
with open(Path(settings.BUFFER_PATH).expanduser(), "wb") as outfile:
outfile.write(response)
conn.sendall(response_length.encode())
conn.sendall(response)
except KeyboardInterrupt:
raise e
except Exception as e:
logger.warning(e)
pass
def _start_socket(self):
@ -52,6 +53,7 @@ class Server:
self._socket = socket_obj
self._handle_socket(socket=socket_obj)
except Exception as e:
logger.warning(e)
raise e
def _start_watch(self):

View file

@ -15,6 +15,7 @@ default_settings = {
"SIGNIFICANCE_THRESHOLD": 0,
"WATCHED": [],
"INDEXING_PROCESSES": 4,
"BUFFER_PATH": "~/.codesearchbuffer",
}