searpl

a small php sqlite search engine. <a href="https://thunix.net/~xfnw/search">see it in action</a>
Log | Files | Refs | README

commit 826e3c2b7c3a52d610607951b86ea13ee1cbc0ea
parent c6b476c4a35ec4e9b79ee2c9839f9b85cc93ae27
Author: xfnw <xfnw@thunix.net>
Date:   Wed, 27 Jan 2021 20:33:25 -0500

delete page before downloading new one, so dead pages do not sit in the database

Diffstat:
Mcrawl.php | 7++++---
1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/crawl.php b/crawl.php @@ -31,6 +31,10 @@ foreach ($arg as $url) { echo "\n"; $url = preg_replace('/\/$/','',$url); echo $url."\n"; + + $stmt = $db->prepare('DELETE FROM indexed WHERE url = ?'); + $stmt->execute([$url]); + $file = file_get_contents($url); if (!$file) continue; @@ -43,9 +47,6 @@ foreach ($arg as $url) { echo "title: ".$title."\n"; - $stmt = $db->prepare('DELETE FROM indexed WHERE url = ?'); - $stmt->execute([$url]); - $stmt = $db->prepare('INSERT INTO indexed (title, url, content) VALUES (?, ?, ?)'); $stmt->execute([$title, $url, $document]); }