Skip to content

Commit

Permalink
Merge pull request #45 from glensc/set-sig-regex
Browse files Browse the repository at this point in the history
add getSignatureRegex/setSignatureRegex
  • Loading branch information
glensc committed Nov 28, 2017
2 parents d3839b5 + 7c64d7d commit a1f6a22
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 4 deletions.
35 changes: 31 additions & 4 deletions src/EmailReplyParser/Parser/EmailParser.php
Expand Up @@ -18,12 +18,14 @@
*/
class EmailParser
{
const QUOTE_REGEX = '/>+$/s';

/**
* Regex to match signatures
*
* @var string
*/
const SIG_REGEX = '/(?:^\s*--|^\s*__|^-\w|^-- $)|(?:^Sent from my (?:\s*\w+){1,4}$)|(?:^={30,}$)$/s';

const QUOTE_REGEX = '/>+$/s';
private $signatureRegex = '/(?:^\s*--|^\s*__|^-\w|^-- $)|(?:^Sent from my (?:\s*\w+){1,4}$)|(?:^={30,}$)$/s';

/**
* @var string[]
Expand Down Expand Up @@ -141,6 +143,28 @@ public function setQuoteHeadersRegex(array $quoteHeadersRegex)
return $this;
}

/**
* @return string
* @since 2.7.0
*/
public function getSignatureRegex()
{
return $this->signatureRegex;
}

/**
* @param string $signatureRegex
*
* @return EmailParser
* @since 2.7.0
*/
public function setSignatureRegex($signatureRegex)
{
$this->signatureRegex = $signatureRegex;

return $this;
}

/**
* @param FragmentDTO[] $fragmentDTOs
*
Expand Down Expand Up @@ -174,11 +198,12 @@ private function isQuoteHeader($line)

private function isSignature($line)
{
return preg_match(static::SIG_REGEX, strrev($line)) ? true : false;
return preg_match($this->signatureRegex, strrev($line)) ? true : false;
}

/**
* @param string $line
* @return bool
*/
private function isQuote($line)
{
Expand All @@ -191,8 +216,10 @@ private function isEmpty(FragmentDTO $fragment)
}

/**
* @param FragmentDTO $fragment
* @param string $line
* @param boolean $isQuoted
* @return bool
*/
private function isFragmentLine(FragmentDTO $fragment, $line, $isQuoted)
{
Expand Down
18 changes: 18 additions & 0 deletions tests/EmailReplyParser/Tests/Parser/EmailParserTest.php
Expand Up @@ -410,6 +410,24 @@ public function testEmailWithFairAmountOfContent()
$this->assertRegexp('/^On Thursday/', (string) $fragments[0]);
}

/**
* override regexp, not to match too greedy signature.
*
* See: https://github.com/willdurand/EmailReplyParser/pull/42
*/
public function testCustomSignatureRegex()
{
$signatureRegex = '/(?:^\s*--|^\s*__|^-- $)|(?:^Sent from my (?:\s*\w+){1,3})$/s';
$this->parser->setSignatureRegex($signatureRegex);
$email = $this->parser->parse($this->getFixtures('email_ls-l.txt'));
$fragments = $email->getFragments();

// this should match two blocks, body and a signature
$this->assertCount(2, $fragments);
$this->assertFalse($fragments[0]->isSignature());
$this->assertTrue($fragments[1]->isSignature());
}

/**
* @dataProvider getDateFormats
*/
Expand Down
35 changes: 35 additions & 0 deletions tests/Fixtures/email_ls-l.txt
@@ -0,0 +1,35 @@
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8; format=flowed
Content-Transfer-Encoding: 7bit

here's some funny one


$ LC_ALL=C ls -l /tmp|grep sess|head
-rw------- 1 http http 62 Feb 15 12:45
sess_07ncrlhq50obbd5kp1vp02lp97
-rw------- 1 http http 0 Feb 15 10:18
sess_0g01akj9ccmq6r001p2klb55s0
-rw------- 1 http http 0 Feb 14 23:26
sess_0gcjo35c35f330p4qm31c1ovv1
-rw------- 1 http http 4410 Feb 15 12:32
sess_0i16be4lk5derhdfeas2uomnf4
-rw------- 1 http http 172 Feb 15 11:59
sess_0jv5f5i6eu7qfp41mc6hkfjpq3
-rw------- 1 http http 0 Feb 15 10:12
sess_0njep2fkt6v5j45t5r0hcfup77
-rw------- 1 http http 0 Feb 15 09:57
sess_1j44bltbjpkej984sfor5461u3
-rw------- 1 http http 0 Feb 14 23:37
sess_1r9r8a6kaqscq46psrcf1ssm24
-rw------- 1 http http 0 Feb 15 10:17
sess_25cvldfhk0nann15asctkrg3b2
-rw------- 1 http http 59 Feb 14 23:43
sess_25tni1suqgasqk8osnmk098sc6



--
glen


0 comments on commit a1f6a22

Please sign in to comment.