searpl

a small php sqlite search engine. <a href="https://thunix.net/~xfnw/search">see it in action</a>
Log | Files | Refs | README

commit 69de9f49dcd5d6d604620bafe406de7146d51450
Author: xfnw <xfnw@thunix.net>
Date:   Mon, 14 Dec 2020 16:59:16 -0500

working

Diffstat:
Aassets/style.css | 65+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acrawl.php | 42++++++++++++++++++++++++++++++++++++++++++
Acreate.php | 2++
Aindex.php | 80+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aurls.sh | 3+++
5 files changed, 192 insertions(+), 0 deletions(-)

diff --git a/assets/style.css b/assets/style.css @@ -0,0 +1,65 @@ + +body { +background: #141415; +font-family: 'Liberation Mono', monospace; +color: #cdcdcd; +} + +.preview, .box { +padding: 10px; +margin-top: 10px; +background-color: #222; +} + +.wrapper { +max-width: 700px; +margin: 0 auto; +} + +a { +color: #aac; +} + +h1 a { +text-decoration: none; +color: #cdcdcd; +} + +h1 a::before { +content: "#"; +color: #aac; +margin-right: 10px; +} +h1 a:hover::before { +text-decoration: underline; +} + + +.preview h1 { +margin: 0; +padding: 0; +margin-bottom: 5px; +} + + +.search-container input[type=text] { + padding: 6px; + margin-top: 3px; + font-size: 17px; + color: #cdcdcd; + background: #333; + border: 0; + width: calc(100% - 60px); +} + +.search-container button { + padding: 6px 10px; + margin-top: 3px; + background: #333; + float: right; + font-size: 17px; + border: none; + cursor: pointer; + color:#cdcdcd; +} + diff --git a/crawl.php b/crawl.php @@ -0,0 +1,42 @@ +<?php +ini_set('display_errors', '1'); +ini_set('display_startup_errors', '1'); +error_reporting(E_ALL); + +$db = new PDO("sqlite:db.sqlite"); + + + +$db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING ); +//$stmt = $db->prepare($sql); +//$stmt->execute($params); + + function page_title($fp) { + $res = preg_match("/<title>(.*)<\/title>/siU", $fp, $title_matches); + if (!$res) + return null; + + // Clean up title: remove EOL's and excessive whitespace. + $title = preg_replace('/\s+/', ' ', $title_matches[1]); + $title = trim($title); + return $title; + } + + + +$arg = $argv; +array_shift($arg); + +foreach ($arg as $url) { + $file = file_get_contents($url); + if (!$file) + continue; + $title = page_title($file); + $document = preg_replace('/[ \t]+/', ' ', preg_replace('/[\r\n]+/', "", strip_tags($file))); + if (!$title || !$document) + continue; + echo $title; + echo $document; + $stmt = $db->prepare('INSERT INTO indexed (title, url, content) VALUES (?, ?, ?)'); + $stmt->execute([$title, $url, $document]); +} diff --git a/create.php b/create.php @@ -0,0 +1,2 @@ +CREATE TABLE indexed (id INTEGER PRIMARY KEY, title VARCHAR(255), url VARCHAR(512), content TEXT) + diff --git a/index.php b/index.php @@ -0,0 +1,80 @@ +<?php +ini_set('display_errors', '1'); +ini_set('display_startup_errors', '1'); +error_reporting(E_ALL); +?> +<!DOCTYPE HTML> +<html lang="en"> +<link rel="stylesheet" type="text/css" href="assets/style.css"> +<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css"> +<meta name="viewport" content="width=device-width, initial-scale=1" /> +<meta name="description" content="a search engine"> +<title>searpl</title> + +<div class='wrapper'> +<h1>searpl</h1> + +<div class='box search-container'> +<form action="./"> + <input type="text" placeholder="Search.." name="q" value="<?php if (isset($_GET['q'])) {echo htmlspecialchars($_GET['q']); } ?>"> + <button type="submit"><i class="fa fa-search"></i></button> + </form> +</div> + +<?php + +if (isset($_GET['q']) && preg_replace('/\s+/', '', $_GET['q']) != '') { + $db = new PDO("sqlite:db.sqlite"); + + $sql = 'SELECT * FROM indexed WHERE 1=1'; + + $terms = explode(' ', preg_replace('/\s+/', '', $_GET['q'])); + $params = array(); + foreach ($terms as $term) { + if (substr($term, 0, 1) == '-') { + + $sql = $sql . ' AND content NOT LIKE ?'; + array_push($params,'%'.substr($term,1).'%'); + } else { + + $sql = $sql . ' AND content LIKE ?'; + array_push($params,'%'.$term.'%'); + } + } + $sql = $sql . ';'; + + $db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING ); + $stmt = $db->prepare($sql); + $stmt->execute($params); + + $results = false; + while ($row = $stmt->fetch()) { + $results = true; +?> + +<div class='box'> +<a href="<?php echo htmlspecialchars($row['url']); ?>"><?php echo htmlspecialchars($row['title']); ?></a> +<br> +...<?php + $content = $row['content']; + foreach ($terms as $param) { + $pos = strpos($content, $param); + if ($pos !== false) { + echo htmlspecialchars(substr($content,$pos-50,50)); + echo '<strong>'.htmlspecialchars($param).'</strong>'; + echo htmlspecialchars(substr($content,$pos+strlen($param),50)).'...'; + } + } + +?> +</div> +<?php + + } + if (!$results) + echo '<div class="box">No results.</div>'; + +} +?> +</div> + diff --git a/urls.sh b/urls.sh @@ -0,0 +1,3 @@ +wget --spider --force-html -r -l2 -H $@ 2>&1 \ + | grep '^--' | awk '{ print $3 }' \ + | grep -v '\.\(css\|js\|png\|gif\|jpg\)$'