commit 69de9f49dcd5d6d604620bafe406de7146d51450
Author: xfnw <xfnw@thunix.net>
Date: Mon, 14 Dec 2020 16:59:16 -0500
working
Diffstat:
A | assets/style.css | | | 65 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | crawl.php | | | 42 | ++++++++++++++++++++++++++++++++++++++++++ |
A | create.php | | | 2 | ++ |
A | index.php | | | 80 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | urls.sh | | | 3 | +++ |
5 files changed, 192 insertions(+), 0 deletions(-)
diff --git a/assets/style.css b/assets/style.css
@@ -0,0 +1,65 @@
+
+body {
+background: #141415;
+font-family: 'Liberation Mono', monospace;
+color: #cdcdcd;
+}
+
+.preview, .box {
+padding: 10px;
+margin-top: 10px;
+background-color: #222;
+}
+
+.wrapper {
+max-width: 700px;
+margin: 0 auto;
+}
+
+a {
+color: #aac;
+}
+
+h1 a {
+text-decoration: none;
+color: #cdcdcd;
+}
+
+h1 a::before {
+content: "#";
+color: #aac;
+margin-right: 10px;
+}
+h1 a:hover::before {
+text-decoration: underline;
+}
+
+
+.preview h1 {
+margin: 0;
+padding: 0;
+margin-bottom: 5px;
+}
+
+
+.search-container input[type=text] {
+ padding: 6px;
+ margin-top: 3px;
+ font-size: 17px;
+ color: #cdcdcd;
+ background: #333;
+ border: 0;
+ width: calc(100% - 60px);
+}
+
+.search-container button {
+ padding: 6px 10px;
+ margin-top: 3px;
+ background: #333;
+ float: right;
+ font-size: 17px;
+ border: none;
+ cursor: pointer;
+ color:#cdcdcd;
+}
+
diff --git a/crawl.php b/crawl.php
@@ -0,0 +1,42 @@
+<?php
+ini_set('display_errors', '1');
+ini_set('display_startup_errors', '1');
+error_reporting(E_ALL);
+
+$db = new PDO("sqlite:db.sqlite");
+
+
+
+$db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING );
+//$stmt = $db->prepare($sql);
+//$stmt->execute($params);
+
+ function page_title($fp) {
+ $res = preg_match("/<title>(.*)<\/title>/siU", $fp, $title_matches);
+ if (!$res)
+ return null;
+
+ // Clean up title: remove EOL's and excessive whitespace.
+ $title = preg_replace('/\s+/', ' ', $title_matches[1]);
+ $title = trim($title);
+ return $title;
+ }
+
+
+
+$arg = $argv;
+array_shift($arg);
+
+foreach ($arg as $url) {
+ $file = file_get_contents($url);
+ if (!$file)
+ continue;
+ $title = page_title($file);
+ $document = preg_replace('/[ \t]+/', ' ', preg_replace('/[\r\n]+/', "", strip_tags($file)));
+ if (!$title || !$document)
+ continue;
+ echo $title;
+ echo $document;
+ $stmt = $db->prepare('INSERT INTO indexed (title, url, content) VALUES (?, ?, ?)');
+ $stmt->execute([$title, $url, $document]);
+}
diff --git a/create.php b/create.php
@@ -0,0 +1,2 @@
+CREATE TABLE indexed (id INTEGER PRIMARY KEY, title VARCHAR(255), url VARCHAR(512), content TEXT)
+
diff --git a/index.php b/index.php
@@ -0,0 +1,80 @@
+<?php
+ini_set('display_errors', '1');
+ini_set('display_startup_errors', '1');
+error_reporting(E_ALL);
+?>
+<!DOCTYPE HTML>
+<html lang="en">
+<link rel="stylesheet" type="text/css" href="assets/style.css">
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
+<meta name="viewport" content="width=device-width, initial-scale=1" />
+<meta name="description" content="a search engine">
+<title>searpl</title>
+
+<div class='wrapper'>
+<h1>searpl</h1>
+
+<div class='box search-container'>
+<form action="./">
+ <input type="text" placeholder="Search.." name="q" value="<?php if (isset($_GET['q'])) {echo htmlspecialchars($_GET['q']); } ?>">
+ <button type="submit"><i class="fa fa-search"></i></button>
+ </form>
+</div>
+
+<?php
+
+if (isset($_GET['q']) && preg_replace('/\s+/', '', $_GET['q']) != '') {
+ $db = new PDO("sqlite:db.sqlite");
+
+ $sql = 'SELECT * FROM indexed WHERE 1=1';
+
+ $terms = explode(' ', preg_replace('/\s+/', '', $_GET['q']));
+ $params = array();
+ foreach ($terms as $term) {
+ if (substr($term, 0, 1) == '-') {
+
+ $sql = $sql . ' AND content NOT LIKE ?';
+ array_push($params,'%'.substr($term,1).'%');
+ } else {
+
+ $sql = $sql . ' AND content LIKE ?';
+ array_push($params,'%'.$term.'%');
+ }
+ }
+ $sql = $sql . ';';
+
+ $db->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_WARNING );
+ $stmt = $db->prepare($sql);
+ $stmt->execute($params);
+
+ $results = false;
+ while ($row = $stmt->fetch()) {
+ $results = true;
+?>
+
+<div class='box'>
+<a href="<?php echo htmlspecialchars($row['url']); ?>"><?php echo htmlspecialchars($row['title']); ?></a>
+<br>
+...<?php
+ $content = $row['content'];
+ foreach ($terms as $param) {
+ $pos = strpos($content, $param);
+ if ($pos !== false) {
+ echo htmlspecialchars(substr($content,$pos-50,50));
+ echo '<strong>'.htmlspecialchars($param).'</strong>';
+ echo htmlspecialchars(substr($content,$pos+strlen($param),50)).'...';
+ }
+ }
+
+?>
+</div>
+<?php
+
+ }
+ if (!$results)
+ echo '<div class="box">No results.</div>';
+
+}
+?>
+</div>
+
diff --git a/urls.sh b/urls.sh
@@ -0,0 +1,3 @@
+wget --spider --force-html -r -l2 -H $@ 2>&1 \
+ | grep '^--' | awk '{ print $3 }' \
+ | grep -v '\.\(css\|js\|png\|gif\|jpg\)$'