Skip to content

Commit

Permalink
chg: scan when searching, get all entries when in tables
Browse files Browse the repository at this point in the history
  • Loading branch information
Rafiot committed Jan 19, 2025
1 parent 6fa80e8 commit 6f9101a
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 30 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ repos:
- id: check-yaml
- id: check-added-large-files
- repo: https://github.com/asottile/pyupgrade
rev: v3.19.0
rev: v3.19.1
hooks:
- id: pyupgrade
args: [--py39-plus]
7 changes: 7 additions & 0 deletions lookyloo/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import hashlib
import logging
import re
from collections.abc import Iterator

from datetime import datetime, timedelta

Expand Down Expand Up @@ -351,6 +352,9 @@ def get_captures_body_hash(self, body_hash: str, most_recent_capture: datetime |
total = self.redis.zcard(f'body_hashes|{body_hash}|captures')
return total, self.redis.zrevrangebyscore(f'body_hashes|{body_hash}|captures', max_score, min_score, withscores=True, start=offset, num=limit)

def scan_captures_body_hash(self, body_hash: str) -> Iterator[tuple[str, float]]:
yield from self.redis.zscan_iter(f'body_hashes|{body_hash}|captures')

def get_capture_body_hash_nodes(self, capture_uuid: str, body_hash: str) -> set[str]:
if url_nodes := self.redis.smembers(f'capture_indexes|{capture_uuid}|body_hashes|{body_hash}'):
return set(url_nodes)
Expand Down Expand Up @@ -428,6 +432,9 @@ def get_captures_hhhash(self, hhh: str, most_recent_capture: datetime | None = N
total = self.redis.zcard(f'hhhashes|{hhh}|captures')
return total, self.redis.zrevrangebyscore(f'hhhashes|{hhh}|captures', max_score, min_score, withscores=True, start=offset, num=limit)

def scan_captures_hhhash(self, hhh: str) -> Iterator[tuple[str, float]]:
yield from self.redis.zscan_iter(f'hhhashes|{hhh}|captures')

def get_captures_hhhash_count(self, hhh: str) -> int:
return self.redis.zcard(f'hhhashes|{hhh}|captures')

Expand Down
52 changes: 23 additions & 29 deletions website/web/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,21 +359,18 @@ def handle_pydandic_validation_exception(error: CaptureSettingsError) -> Respons

# ##### Methods querying the indexes #####

def get_body_hash_investigator_search(body_hash: str, offset: int, limit: int, search: str) -> tuple[int, list[CaptureCache]]:
def get_body_hash_investigator_search(body_hash: str, search: str) -> tuple[int, list[CaptureCache]]:
cached_captures: list[CaptureCache] = []
while True:
total, entries = get_indexing(flask_login.current_user).get_captures_body_hash(body_hash, offset=offset, limit=limit)
cached_captures += [capture for capture in lookyloo.sorted_capture_cache([uuid for uuid, _ in entries], cached_captures_only=False) if capture.search(search)]
offset += limit
if total < offset:
break
total = get_indexing(flask_login.current_user).get_captures_body_hash_count(body_hash)
entries = [uuid for uuid, _ in get_indexing(flask_login.current_user).scan_captures_body_hash(body_hash)]
cached_captures += [capture for capture in lookyloo.sorted_capture_cache(entries, cached_captures_only=False) if capture.search(search)]
return total, cached_captures


def _get_body_hash_investigator(body_hash: str, offset: int | None=None, limit: int | None=None, search: str | None=None) -> tuple[int, list[tuple[str, str, datetime, str, str]]]:
def _get_body_hash_investigator(body_hash: str, offset: int | None=None, limit: int | None=None, search: str | None=None) -> tuple[int, list[tuple[str, str, str, datetime, list[tuple[str, str]]]]]:
'''Returns all the captures related to a hash (sha512), used in the web interface.'''
if offset is not None and limit and search:
total, cached_captures = get_body_hash_investigator_search(body_hash=body_hash, offset=offset, limit=limit, search=search)
if search:
total, cached_captures = get_body_hash_investigator_search(body_hash=body_hash, search=search)
else:
total, entries = get_indexing(flask_login.current_user).get_captures_body_hash(body_hash=body_hash, offset=offset, limit=limit)
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in entries], cached_captures_only=False)
Expand All @@ -386,7 +383,7 @@ def _get_body_hash_investigator(body_hash: str, offset: int | None=None, limit:
nodes_info.append((urlnode.name, urlnode_uuid))
except IndexError:
continue
captures.append((cache.uuid, cache.title, cache.timestamp, cache.redirects[-1], nodes_info))
captures.append((cache.uuid, cache.title, cache.redirects[-1], cache.timestamp, nodes_info))
return total, captures


Expand Down Expand Up @@ -443,7 +440,7 @@ def get_all_urls(capture_uuid: str, /) -> dict[str, dict[str, int | list[URLNode
def get_hostname_investigator(hostname: str, offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, str, str, datetime, list[tuple[str, str]]]]]:
'''Returns all the captures loading content from that hostname, used in the web interface.'''
total, entries = get_indexing(flask_login.current_user).get_captures_hostname(hostname=hostname, offset=offset, limit=limit)
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in entries])
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in entries], cached_captures_only=False)
_captures = [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp, get_indexing(flask_login.current_user).get_capture_hostname_nodes(cache.uuid, hostname)) for cache in cached_captures]
captures = []
for capture_uuid, capture_title, landing_page, capture_ts, nodes in _captures:
Expand Down Expand Up @@ -479,7 +476,7 @@ def get_url_investigator(url: str, offset: int | None=None, limit: int | None=No
def get_cookie_name_investigator(cookie_name: str, offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, str, str, datetime, list[tuple[str, str]]]]]:
'''Returns all the captures related to a cookie name entry, used in the web interface.'''
total, entries = get_indexing(flask_login.current_user).get_captures_cookies_name(cookie_name=cookie_name)
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in entries])
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in entries], cached_captures_only=False)
_captures = [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp, get_indexing(flask_login.current_user).get_capture_cookie_name_nodes(cache.uuid, cookie_name)) for cache in cached_captures]
captures = []
for capture_uuid, capture_title, landing_page, capture_ts, nodes in _captures:
Expand All @@ -496,44 +493,41 @@ def get_cookie_name_investigator(cookie_name: str, offset: int | None=None, limi

def get_identifier_investigator(identifier_type: str, identifier: str, offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, str, str, datetime]]]:
total, entries = get_indexing(flask_login.current_user).get_captures_identifier(identifier_type=identifier_type, identifier=identifier, offset=offset, limit=limit)
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in entries])
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in entries], cached_captures_only=False)
captures = [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures]
return total, captures


def get_capture_hash_investigator(hash_type: str, h: str, offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, str, str, datetime]]]:
total, entries = get_indexing(flask_login.current_user).get_captures_hash_type(hash_type=hash_type, h=h, offset=offset, limit=limit)
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in entries])
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in entries], cached_captures_only=False)
captures = [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures]
return total, captures


def get_favicon_investigator(favicon_sha512: str, offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, str, str, datetime]]]:
'''Returns all the captures related to a cookie name entry, used in the web interface.'''
total, entries = get_indexing(flask_login.current_user).get_captures_favicon(favicon_sha512=favicon_sha512, offset=offset, limit=limit)
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in entries])
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in entries], cached_captures_only=False)
captures = [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures]
return total, captures


def get_hhh_investigator_search(hhh: str, offset: int, limit: int, search: str) -> tuple[int, list[CaptureCache]]:
def get_hhh_investigator_search(hhh: str, search: str) -> tuple[int, list[CaptureCache]]:
cached_captures: list[CaptureCache] = []
while True:
total, entries = get_indexing(flask_login.current_user).get_captures_hhhash(hhh, offset=offset, limit=limit)
cached_captures += [capture for capture in lookyloo.sorted_capture_cache([uuid for uuid, _ in entries], cached_captures_only=False) if capture.search(search)]
offset += limit
if total < offset:
break
total = get_indexing(flask_login.current_user).get_captures_hhhash_count(hhh)
entries = [uuid for uuid, _ in get_indexing(flask_login.current_user).scan_captures_hhhash(hhh)]
cached_captures += [capture for capture in lookyloo.sorted_capture_cache(entries, cached_captures_only=False) if capture.search(search)]
return total, cached_captures


def get_hhh_investigator(hhh: str, offset: int | None=None, limit: int | None=None, search: str | None=None) -> tuple[int, list[tuple[str, str, str, datetime, list[tuple[str, str]]]]]:
'''Returns all the captures related to a cookie name entry, used in the web interface.'''
if offset is not None and limit and search:
total, cached_captures = get_hhh_investigator_search(hhh, offset=offset, limit=limit, search=search)
if search:
total, cached_captures = get_hhh_investigator_search(hhh, search=search)
else:
total, entries = get_indexing(flask_login.current_user).get_captures_hhhash(hhh, offset=offset, limit=limit)
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in entries])
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in entries], cached_captures_only=False)

_captures = [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp, get_indexing(flask_login.current_user).get_capture_hhhash_nodes(cache.uuid, hhh)) for cache in cached_captures]
captures = []
Expand Down Expand Up @@ -2084,7 +2078,7 @@ def post_table(table_name: str, value: str) -> Response:
if table_name == 'HHHDetailsTable':
hhh = value.strip()
total, captures = get_hhh_investigator(hhh, offset=start, limit=length, search=search)
if search:
if search and start is not None and length is not None:
total_filtered = len(captures)
captures = captures[start:start + length]
prepared_captures = []
Expand Down Expand Up @@ -2124,11 +2118,11 @@ def post_table(table_name: str, value: str) -> Response:
if table_name == 'bodyHashDetailsTable':
body_hash = value.strip()
total, captures = _get_body_hash_investigator(body_hash, offset=start, limit=length, search=search)
if search:
if search and start is not None and length is not None:
total_filtered = len(captures)
captures = captures[start:start + length]
prepared_captures = []
for capture_uuid, title, capture_time, landing_page, nodes in captures:
for capture_uuid, title, landing_page, capture_time, nodes in captures:
_nodes = __prepare_node_view(capture_uuid, nodes, from_popup)
to_append = {
'capture_time': capture_time.isoformat(),
Expand Down

0 comments on commit 6f9101a

Please sign in to comment.