commit 826e3c2b7c3a52d610607951b86ea13ee1cbc0ea
parent c6b476c4a35ec4e9b79ee2c9839f9b85cc93ae27
Author: xfnw <xfnw@thunix.net>
Date: Wed, 27 Jan 2021 20:33:25 -0500
delete page before downloading new one, so dead pages do not sit in the database
Diffstat:
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/crawl.php b/crawl.php
@@ -31,6 +31,10 @@ foreach ($arg as $url) {
echo "\n";
$url = preg_replace('/\/$/','',$url);
echo $url."\n";
+
+ $stmt = $db->prepare('DELETE FROM indexed WHERE url = ?');
+ $stmt->execute([$url]);
+
$file = file_get_contents($url);
if (!$file)
continue;
@@ -43,9 +47,6 @@ foreach ($arg as $url) {
echo "title: ".$title."\n";
- $stmt = $db->prepare('DELETE FROM indexed WHERE url = ?');
- $stmt->execute([$url]);
-
$stmt = $db->prepare('INSERT INTO indexed (title, url, content) VALUES (?, ?, ?)');
$stmt->execute([$title, $url, $document]);
}