wip: (dirty) buffer instead of large socket msg
This commit is contained in:
parent
806279edaa
commit
b9841084a1
6 changed files with 43 additions and 15 deletions
|
@ -1,8 +1,7 @@
|
|||
import socket
|
||||
import json
|
||||
|
||||
from settings import settings
|
||||
|
||||
from pathlib import Path
|
||||
from colors import highlight
|
||||
|
||||
|
||||
|
@ -10,8 +9,13 @@ def search(query):
|
|||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
s.connect((settings.SOCKET_HOST, settings.SOCKET_PORT))
|
||||
s.sendall(query.encode())
|
||||
length = int(s.recv(4).decode())
|
||||
results = json.loads(s.recv(length).decode())
|
||||
length = int(s.recv(8).decode())
|
||||
results = None
|
||||
|
||||
with open(Path(settings.BUFFER_PATH).expanduser(), "rb") as infile:
|
||||
results = infile.read().decode()
|
||||
|
||||
results = json.loads(results)
|
||||
|
||||
for result in results:
|
||||
with open(result["key"], "r") as infile:
|
||||
|
|
|
@ -6,5 +6,6 @@ SETTINGS_KEYS = [
|
|||
"FILE_TYPES",
|
||||
"SIGNIFICANCE_THRESHOLD",
|
||||
"INDEXING_PROCESSES",
|
||||
"BUFFER_PATH",
|
||||
]
|
||||
QUERY_STRING_LENGTH = 1024
|
||||
|
|
|
@ -73,8 +73,18 @@ class Indexer(IndexerBase):
|
|||
uniques = 0
|
||||
for lead in leads:
|
||||
uid, score = lead
|
||||
lead_content = self.corpus.get_document(uid=uid).content
|
||||
lead_path = self.corpus.get_document(uid=uid).key
|
||||
lead_content = ""
|
||||
try:
|
||||
with open(lead_path, "r") as infile:
|
||||
import mmap
|
||||
|
||||
m = mmap.mmap(infile.fileno(), 0, prot=mmap.PROT_READ)
|
||||
lead_content = m.read().decode()
|
||||
except Exception as e:
|
||||
logger.warning(e)
|
||||
logger.warning(f"No content in {lead_path}", prefix="Query")
|
||||
|
||||
results = re.finditer(query, lead_content)
|
||||
hits_in_lead = []
|
||||
for hit in results:
|
||||
|
@ -130,7 +140,7 @@ class Indexer(IndexerBase):
|
|||
try:
|
||||
with open(discovered_file, "r") as infile:
|
||||
content = infile.read()
|
||||
self.corpus.add_document(key=discovered_file, content=content)
|
||||
self.corpus.add_document(key=discovered_file, content="")
|
||||
logger.info(f"Loaded {discovered_file} in memory", prefix="Preloading")
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
|
@ -156,11 +166,18 @@ class Indexer(IndexerBase):
|
|||
for uid in uids:
|
||||
document = self.corpus.get_document(uid=uid)
|
||||
path = document.key
|
||||
content = document.content
|
||||
trigrams[uid] = TrigramIndex.trigramize(content)
|
||||
self._line_index.index(path, content)
|
||||
logger.info(f"Processed {path}", prefix="Indexing")
|
||||
# content = document.content
|
||||
try:
|
||||
with open(path, "r") as infile:
|
||||
import mmap
|
||||
|
||||
m = mmap.mmap(infile.fileno(), 0, prot=mmap.PROT_READ)
|
||||
content = m.read().decode()
|
||||
trigrams[uid] = TrigramIndex.trigramize(content)
|
||||
self._line_index.index(path, content)
|
||||
logger.info(f"Processed {path}", prefix="Indexing")
|
||||
except:
|
||||
logger.warning(path, prefix="Indexing")
|
||||
return (trigrams, self._line_index._lines)
|
||||
|
||||
def _find_closest_line(self, path, index):
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
from base import IndexBase
|
||||
|
||||
import attr
|
||||
|
||||
from logger import get_logger
|
||||
|
||||
logger = get_logger(__name__)
|
||||
|
||||
|
||||
@attr.s
|
||||
class LineIndex(IndexBase):
|
||||
|
|
|
@ -2,11 +2,10 @@ import json
|
|||
import socket
|
||||
import pyinotify
|
||||
import attr
|
||||
|
||||
from watcher import WatchHandler
|
||||
from indexer import Indexer
|
||||
from constants import QUERY_STRING_LENGTH
|
||||
|
||||
from pathlib import Path
|
||||
from settings import settings
|
||||
|
||||
from logger import get_logger
|
||||
|
@ -38,12 +37,14 @@ class Server:
|
|||
try:
|
||||
query_results = self.indexer.query(query_string)
|
||||
response = json.dumps(query_results).encode()
|
||||
response_length = str(len(response))
|
||||
response_length = str(len(response.decode()))
|
||||
with open(Path(settings.BUFFER_PATH).expanduser(), "wb") as outfile:
|
||||
outfile.write(response)
|
||||
conn.sendall(response_length.encode())
|
||||
conn.sendall(response)
|
||||
except KeyboardInterrupt:
|
||||
raise e
|
||||
except Exception as e:
|
||||
logger.warning(e)
|
||||
pass
|
||||
|
||||
def _start_socket(self):
|
||||
|
@ -52,6 +53,7 @@ class Server:
|
|||
self._socket = socket_obj
|
||||
self._handle_socket(socket=socket_obj)
|
||||
except Exception as e:
|
||||
logger.warning(e)
|
||||
raise e
|
||||
|
||||
def _start_watch(self):
|
||||
|
|
|
@ -15,6 +15,7 @@ default_settings = {
|
|||
"SIGNIFICANCE_THRESHOLD": 0,
|
||||
"WATCHED": [],
|
||||
"INDEXING_PROCESSES": 4,
|
||||
"BUFFER_PATH": "~/.codesearchbuffer",
|
||||
}
|
||||
|
||||
|
||||
|
|
Reference in a new issue