Skip to content

Commit

Permalink
ok this is way better scraping
Browse files Browse the repository at this point in the history
  • Loading branch information
alnutile committed Aug 27, 2023
1 parent 72d129e commit 123e9f9
Show file tree
Hide file tree
Showing 18 changed files with 79 additions and 114 deletions.
8 changes: 4 additions & 4 deletions app/Console/Commands/URlToMessageCommand.php
Expand Up @@ -28,13 +28,13 @@ class URlToMessageCommand extends Command
public function handle()
{
$url = $this->argument('url');
$this->info("Getting content for url " . $url . " check logs");
$this->info('Getting content for url '.$url.' check logs');
$dto = MailDto::from([
'subject' => "Get content for the url",
'body' => " This is the url to get copy for " . $url
'subject' => 'Get content for the url',
'body' => ' This is the url to get copy for '.$url,
]);
$job = new MailBoxParserJob($dto);
$job->handle();
$this->info("Check the queue!");
$this->info('Check the queue!');
}
}
39 changes: 18 additions & 21 deletions app/Domains/LlmFunctions/GetContentFromUrl/GetContentFromUrl.php
Expand Up @@ -3,42 +3,39 @@
namespace App\Domains\LlmFunctions\GetContentFromUrl;

use App\Models\Message;
use Facades\App\OpenAi\ChatClient;
use App\OpenAi\Dtos\FunctionCallDto;
use Facades\App\Tools\GetSiteWrapper;
use SundanceSolutions\LarachainTrimText\Facades\LarachainTrimText;

class GetContentFromUrl
{
public function handle(FunctionCallDto $functionCallDto): Message
{
$url = data_get($functionCallDto->arguments, 'url', null);

public function handle(FunctionCallDto $functionCallDto) : Message {
$url = data_get($functionCallDto->arguments, 'url', null);
if (! $url) {
$message = 'No url in this message '.$functionCallDto->message->id;
logger($message);
logger('See function call', $functionCallDto->toArray());
throw new \Exception($message);
}

if(!$url) {
$message = "No url in this message " . $functionCallDto->message->id;
logger($message);
logger("See function call", $functionCallDto->toArray());
throw new \Exception($message);
}
$body = GetSiteWrapper::handle($url);

$body = GetSiteWrapper::handle($url);

$content = sprintf("can you add a TLDR to the top of the following content:
$content = sprintf("can you add a TLDR to the top of the following content:
###
\n
\n
\n
URL: %s\n
Content: %s",
$url,
$body
);

$functionCallDto->message->content = $content;
$functionCallDto->message->updateQuietly();
return $functionCallDto->message->refresh();
$url,
$body
);

}
$functionCallDto->message->content = $content;
$functionCallDto->message->updateQuietly();

return $functionCallDto->message->refresh();

}
}
16 changes: 8 additions & 8 deletions app/Domains/Scraping/RapidScrapeClient.php
Expand Up @@ -6,22 +6,22 @@

class RapidScrapeClient
{

public function handle(string $url) : string {
$token = config("services.rapid.api");
if(!$token) {
throw new \Exception("Missing token");
public function handle(string $url): string
{
$token = config('services.rapid.api');
if (! $token) {
throw new \Exception('Missing token');
}

$response = Http::withHeaders([
'X-RapidAPI-Key' => $token,
'X-RapidAPI-Host' => 'proxycrawl-scraper.p.rapidapi.com'
'X-RapidAPI-Host' => 'proxycrawl-scraper.p.rapidapi.com',
])
->timeout(60)
->retry(3, 1500)
->get('https://proxycrawl-scraper.p.rapidapi.com/', [
'url' => $url
]);
'url' => $url,
]);

$body = $response->json();
$content = data_get($body, 'body.content');
Expand Down
1 change: 0 additions & 1 deletion app/Http/Controllers/HomeController.php
Expand Up @@ -3,7 +3,6 @@
namespace App\Http\Controllers;

use App\Http\Resources\MessageIndexResource;
use App\Http\Resources\MessageResource;
use App\Http\Resources\TagResource;
use App\Models\Message;
use App\Models\Tag;
Expand Down
1 change: 0 additions & 1 deletion app/Http/Resources/MessageIndexResource.php
Expand Up @@ -4,7 +4,6 @@

use Illuminate\Http\Request;
use Illuminate\Http\Resources\Json\JsonResource;
use Illuminate\Support\Str;

class MessageIndexResource extends JsonResource
{
Expand Down
1 change: 0 additions & 1 deletion app/Http/Resources/MessageResource.php
Expand Up @@ -4,7 +4,6 @@

use Illuminate\Http\Request;
use Illuminate\Http\Resources\Json\JsonResource;
use Illuminate\Support\Str;

class MessageResource extends JsonResource
{
Expand Down
9 changes: 2 additions & 7 deletions app/Jobs/MailBoxParserJob.php
Expand Up @@ -7,14 +7,11 @@
use App\Models\Message;
use App\Models\Tag;
use App\Models\User;
use Facades\App\OpenAi\ChatClient;
use Facades\App\Tools\GetSiteWrapper;
use Illuminate\Bus\Queueable;
use Illuminate\Contracts\Queue\ShouldQueue;
use Illuminate\Foundation\Bus\Dispatchable;
use Illuminate\Queue\InteractsWithQueue;
use Illuminate\Queue\SerializesModels;
use SundanceSolutions\LarachainTrimText\Facades\LarachainTrimText;

class MailBoxParserJob implements ShouldQueue
{
Expand Down Expand Up @@ -60,7 +57,7 @@ public function handle(): void
* @TODO strip signatures
* This will pick up signature :(
*/
if($url = get_url_from_body($content)) {
if ($url = get_url_from_body($content)) {
$content = str($content)->prepend("get content from the url {$url} using the included function")->toString();
}

Expand All @@ -77,7 +74,7 @@ public function handle(): void
]);

$message->llm_functions()->attach([
$function->id
$function->id,
]);

MessageCreatedJob::dispatch($message);
Expand All @@ -88,6 +85,4 @@ public function handle(): void
throw $e;
}
}


}
5 changes: 5 additions & 0 deletions app/Models/LlmFunction.php
Expand Up @@ -34,6 +34,11 @@ public function getParametersFormattedAttribute(): string

public function getParametersDecodedAttribute(): array
{
if (is_array($this->parameters)) {
return $this->parameters;
}

/** @phpstan-ignore-next-line */
return (array) json_decode($this->parameters, true);
}

Expand Down
4 changes: 1 addition & 3 deletions app/OpenAi/ChatClient.php
Expand Up @@ -9,7 +9,6 @@
use Facades\App\OpenAi\ChatClient as ChatClientFacade;
use Facades\App\OpenAi\FunctionCall;
use OpenAI\Laravel\Facades\OpenAI;
use SundanceSolutions\LarachainTrimText\Facades\LarachainTrimText;

class ChatClient
{
Expand Down Expand Up @@ -38,7 +37,7 @@ public function chat(array $messages, bool $run_functions = true): Response
$model = config('openai.chat_model');
}

logger("Model being used", ['model' => $model]);
logger('Model being used', ['model' => $model]);

$request = [
'model' => $model,
Expand All @@ -55,7 +54,6 @@ public function chat(array $messages, bool $run_functions = true): Response

$response = OpenAI::chat()->create($request);


if (data_get($response, 'choices.0.finish_reason') === 'function_call') {
$name = data_get($response, 'choices.0.message.function_call.name');
$arguments = data_get($response, 'choices.0.message.function_call.arguments');
Expand Down
5 changes: 0 additions & 5 deletions app/Tools/GetSiteWrapper.php
Expand Up @@ -3,11 +3,6 @@
namespace App\Tools;

use Facades\App\Domains\Scraping\RapidScrapeClient;
use App\Spiders\GetPageSpider;
use Illuminate\Support\Arr;
use RoachPHP\ItemPipeline\Item;
use RoachPHP\Roach;
use RoachPHP\Spider\Configuration\Overrides;

class GetSiteWrapper
{
Expand Down
10 changes: 3 additions & 7 deletions app/helpers.php
@@ -1,9 +1,9 @@
<?php

use App\Models\Message;
use Facades\App\Domains\LlmFunctions\GetContentFromUrl\GetContentFromUrl;
use App\Domains\Scheduling\Dtos\TasksDto;
use App\Models\Message;
use App\OpenAi\Dtos\FunctionCallDto;
use Facades\App\Domains\LlmFunctions\GetContentFromUrl\GetContentFromUrl;
use Facades\App\Domains\Scheduling\TaskRepository;
use Illuminate\Support\Arr;
use Illuminate\Support\Facades\File;
Expand All @@ -20,12 +20,10 @@ function llm_functions_scheduling(
}
}


if (! function_exists('get_content_from_url')) {
function get_content_from_url(
FunctionCallDto $functionCallDto
): Message
{
): Message {
return GetContentFromUrl::handle($functionCallDto);
}
}
Expand Down Expand Up @@ -76,8 +74,6 @@ function get_url_from_body(string $body): string|null
}
}



if (! function_exists('get_current_weather')) {
function get_current_weather($location, $unit = 'fahrenheit'): string
{
Expand Down
7 changes: 3 additions & 4 deletions config/services.php
Expand Up @@ -31,9 +31,8 @@
'region' => env('AWS_DEFAULT_REGION', 'us-east-1'),
],

'rapid' =>
[
'api' => env("RAPID_API_TOKEN")
]
'rapid' => [
'api' => env('RAPID_API_TOKEN'),
],

];
14 changes: 6 additions & 8 deletions database/migrations/2023_08_27_135254_add_get_url_function.php
Expand Up @@ -2,8 +2,6 @@

use App\Models\LlmFunction;
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;

return new class extends Migration
{
Expand All @@ -14,12 +12,12 @@ public function up(): void
{
$params = get_fixture_v2('get_content_from_url.json', false);
LlmFunction::firstOrCreate([
'label' => 'get_content_from_url'
],[
'label' => "get_content_from_url",
"description" => "This allows a user to put a URL into the question and it will then return the content for you to continue on with processing",
"parameters" => $params,
'active' => 1
'label' => 'get_content_from_url',
], [
'label' => 'get_content_from_url',
'description' => 'This allows a user to put a URL into the question and it will then return the content for you to continue on with processing',
'parameters' => $params,
'active' => 1,
]);
}

Expand Down
32 changes: 11 additions & 21 deletions tests/Feature/GetContentFromUrlTest.php
Expand Up @@ -2,30 +2,19 @@

namespace Tests\Feature;

use App\OpenAi\Dtos\FunctionCallDto;
use Facades\App\Domains\LlmFunctions\GetContentFromUrl\GetContentFromUrl;
use App\Models\Message;
use App\Models\User;
use App\OpenAi\Dtos\FunctionCallDto;
use Facades\App\Domains\LlmFunctions\GetContentFromUrl\GetContentFromUrl;
use Facades\App\Tools\GetSiteWrapper;
use Facades\App\OpenAi\ChatClient;
use App\OpenAi\Dtos\Response;
use Illuminate\Foundation\Testing\RefreshDatabase;
use Illuminate\Foundation\Testing\WithFaker;
use Tests\TestCase;

class GetContentFromUrlTest extends TestCase
{

public function test_get_content() {
$dto = Response::from([
'content' => 'reduced content',
]);

ChatClient::shouldReceive('chat')
->once()
->andReturn($dto);

public function test_get_content()
{
$text = get_fixture_v2('larger_text.txt', false);

GetSiteWrapper::shouldReceive('handle')
->once()
->andReturn($text);
Expand All @@ -35,13 +24,14 @@ public function test_get_content() {
$message = Message::factory()->create();

$dto = FunctionCallDto::from([
'message' => $message,
'arguments' => json_encode([
'url' => "https://foo.bar"
])
'message' => $message,
'arguments' => json_encode([
'url' => 'https://foo.bar',
]),
]);

GetContentFromUrl::handle($dto);

$this->assertStringContainsString('reduced content', $message->refresh()->content);
$this->assertStringContainsString('navigation', $message->refresh()->content);
}
}
Expand Up @@ -32,7 +32,7 @@ public function test_store(): void
'parameters' => ['foo' => 'bar'],
]
)->assertStatus(302);
$this->assertTrue(LlmFunction::whereLabel("Foobar")->exists());
$this->assertTrue(LlmFunction::whereLabel('Foobar')->exists());
}

public function test_update(): void
Expand Down
6 changes: 0 additions & 6 deletions tests/Feature/MailBoxParserJobTest.php
Expand Up @@ -6,9 +6,6 @@
use App\Jobs\MailBoxParserJob;
use App\Jobs\MessageCreatedJob;
use App\Models\User;
use App\OpenAi\Dtos\Response;
use Facades\App\OpenAi\ChatClient;
use Facades\App\Tools\GetSiteWrapper;
use Illuminate\Support\Facades\Queue;
use Tests\TestCase;

Expand All @@ -32,7 +29,4 @@ public function test_larger_message()
$this->assertDatabaseCount('messages', 1);
Queue::assertPushed(MessageCreatedJob::class);
}



}
7 changes: 4 additions & 3 deletions tests/Feature/Models/LlmFunctionTest.php
Expand Up @@ -29,10 +29,11 @@ public function test_creates_scheduler()
$this->assertNotNull($llm->parameters);
}

public function test_exists_get_content_from_url() {
$this->assertTrue(LlmFunction::where("label", 'get_content_from_url')->exists());
public function test_exists_get_content_from_url()
{
$this->assertTrue(LlmFunction::where('label', 'get_content_from_url')->exists());
/** @var LlmFunction $getContent */
$getContent = LlmFunction::where("label", 'get_content_from_url')->first();
$getContent = LlmFunction::where('label', 'get_content_from_url')->first();
$this->assertIsArray($getContent->parameters_decoded);
}
}

0 comments on commit 123e9f9

Please sign in to comment.