php - Extract paragraph (>800 chars) from text files -
i need extract rather long excerpts text files. reason decided extract first encountered paragraph, >800 characters long, , cut 800 chars, adding "..." in end. if such paragraph not found – step down 700, 600, 500.
please me improve php code. or, if know bash-solution – nice too. in advance.
if ($text != false && $text != '') { preg_match('#^(.{50,100})(\s+)#s', $text, $asubject); preg_match('#([^\n]{400,800})(.*)#', $text, $aannotation); $text = preg_replace('#([\n\r\t\s]+)#s', " ", $text); // $text = preg_replace('#([\w|\s]+)#s', "\1", $text); $wordcount = count(explode(" ", $text)); // if (isset($aannotation[1]) && isset($asubject[1])) { if (isset($aannotation[1])) { $sannotation = preg_replace('#(\s{2,100})#', ' ', $aannotation[1]); $stmt = $hdb->prepare("insert {$_s['tabletext']} (filename, text) values (?, ?)"); $stmt->execute([$afilename[1], trim($text)]); // $stmt = $hdb->prepare("insert {$_s['tablekeywords']} (filename, subject, annotation) values (?, ?, ?)"); // $stmt->execute([$file, $asubject[1], $sannotation]); $stmt = $hdb->prepare("insert {$_s['tablekeywords']} (filename, annotation, wordcount) values (?, ?, ?)"); $stmt->execute([$afilename[1], $sannotation, $wordcount]); $aexts[$afilename[3]] = (isset($aexts[$afilename[3]])) ? $aexts[$afilename[3]] + 1 : 1; // unlink($_s['textdir'] . '/' . $file); } }
Comments
Post a Comment