Spaces:
Running
Running
matt HOFFNER
commited on
Commit
Β·
a98334b
1
Parent(s):
caaca78
fix
Browse files- app/layout.tsx +1 -1
- app/tools/search.ts +1 -1
- app/tools/surfer.ts +1 -1
- pages/api/functions/index.ts +16 -13
app/layout.tsx
CHANGED
@@ -5,7 +5,7 @@ import './globals.css'
|
|
5 |
const inter = Inter({ subsets: ['latin'] })
|
6 |
|
7 |
export const metadata: Metadata = {
|
8 |
-
title: '
|
9 |
description: 'Navigate to URLs and perform realtime similarity search',
|
10 |
}
|
11 |
|
|
|
5 |
const inter = Inter({ subsets: ['latin'] })
|
6 |
|
7 |
export const metadata: Metadata = {
|
8 |
+
title: 'Functions Playground',
|
9 |
description: 'Navigate to URLs and perform realtime similarity search',
|
10 |
}
|
11 |
|
app/tools/search.ts
CHANGED
@@ -6,7 +6,7 @@ function createSearchApi({ apiKey }: { apiKey: string }) {
|
|
6 |
const paramsSchema = z.object({
|
7 |
input: z.string(),
|
8 |
});
|
9 |
-
const name = '
|
10 |
const description = 'A custom search engine. Useful for when you need to answer questions about current events. Input should be a search query. Outputs a JSON array of results.';
|
11 |
|
12 |
const execute = async ({ input }: z.infer<typeof paramsSchema>) => {
|
|
|
6 |
const paramsSchema = z.object({
|
7 |
input: z.string(),
|
8 |
});
|
9 |
+
const name = 'searchApi';
|
10 |
const description = 'A custom search engine. Useful for when you need to answer questions about current events. Input should be a search query. Outputs a JSON array of results.';
|
11 |
|
12 |
const execute = async ({ input }: z.infer<typeof paramsSchema>) => {
|
app/tools/surfer.ts
CHANGED
@@ -5,7 +5,7 @@ function createUrlSurfer() {
|
|
5 |
const paramsSchema = z.object({
|
6 |
input: z.string(),
|
7 |
});
|
8 |
-
const name = '
|
9 |
const description = 'A custom URL navigator. Useful when a URL is provided with a question. Input should be a prompt with a URL. Outputs a JSON array of relevant results.';
|
10 |
|
11 |
return new Tool(paramsSchema, name, description, {} as any).tool;
|
|
|
5 |
const paramsSchema = z.object({
|
6 |
input: z.string(),
|
7 |
});
|
8 |
+
const name = 'surfer';
|
9 |
const description = 'A custom URL navigator. Useful when a URL is provided with a question. Input should be a prompt with a URL. Outputs a JSON array of relevant results.';
|
10 |
|
11 |
return new Tool(paramsSchema, name, description, {} as any).tool;
|
pages/api/functions/index.ts
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import { NextApiRequest, NextApiResponse } from 'next';
|
2 |
-
import fetch from 'node-fetch';
|
3 |
import { JSDOM } from 'jsdom';
|
4 |
// @ts-ignore
|
5 |
import pdfParse from 'pdf-parse';
|
@@ -7,7 +7,7 @@ import puppeteer from 'puppeteer';
|
|
7 |
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
8 |
import { MemoryVectorStore } from 'langchain/vectorstores/memory';
|
9 |
import { HuggingFaceTransformersEmbeddings } from "langchain/embeddings/hf_transformers";
|
10 |
-
import {
|
11 |
|
12 |
export const config = {
|
13 |
api: {
|
@@ -33,15 +33,25 @@ const model = new HuggingFaceTransformersEmbeddings({
|
|
33 |
const urlRegex = /(https?:\/\/[^\s]+)/g;
|
34 |
|
35 |
const [serpApi] =
|
36 |
-
|
37 |
apiKey: process.env.SERP_API_KEY || "",
|
38 |
});
|
39 |
|
40 |
const handleContentText = async (targetUrl: string) => {
|
41 |
const response = await fetch(targetUrl);
|
|
|
42 |
const contentType = response.headers.get('content-type') || '';
|
43 |
let content;
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
const buffer = await response.arrayBuffer();
|
46 |
content = await extractTextFromPDF(buffer as any);
|
47 |
} else if (contentType.includes('text/html')) {
|
@@ -50,20 +60,13 @@ const handleContentText = async (targetUrl: string) => {
|
|
50 |
const scripts = dom.window.document.querySelectorAll('script, style');
|
51 |
scripts.forEach(element => element.remove());
|
52 |
content = dom.window.document.body.textContent || '';
|
53 |
-
|
54 |
-
if (!content.trim()) {
|
55 |
-
const browser = await puppeteer.launch();
|
56 |
-
const page = await browser.newPage();
|
57 |
-
await page.goto(targetUrl);
|
58 |
-
content = await page.evaluate(() => document.body.innerText);
|
59 |
-
await browser.close();
|
60 |
-
}
|
61 |
} else {
|
62 |
content = await response.text();
|
63 |
}
|
64 |
-
return content;
|
65 |
}
|
66 |
|
|
|
67 |
const surferEmbedApi = async ({ input }: any) => {
|
68 |
const urls = input.match(urlRegex);
|
69 |
const targetUrl = urls ? urls[0] : null;
|
|
|
1 |
import { NextApiRequest, NextApiResponse } from 'next';
|
2 |
+
import fetch, { RequestInfo } from 'node-fetch';
|
3 |
import { JSDOM } from 'jsdom';
|
4 |
// @ts-ignore
|
5 |
import pdfParse from 'pdf-parse';
|
|
|
7 |
import { RecursiveCharacterTextSplitter } from 'langchain/text_splitter';
|
8 |
import { MemoryVectorStore } from 'langchain/vectorstores/memory';
|
9 |
import { HuggingFaceTransformersEmbeddings } from "langchain/embeddings/hf_transformers";
|
10 |
+
import { createSearchApi } from '../../../app/tools/search'
|
11 |
|
12 |
export const config = {
|
13 |
api: {
|
|
|
33 |
const urlRegex = /(https?:\/\/[^\s]+)/g;
|
34 |
|
35 |
const [serpApi] =
|
36 |
+
createSearchApi({
|
37 |
apiKey: process.env.SERP_API_KEY || "",
|
38 |
});
|
39 |
|
40 |
const handleContentText = async (targetUrl: string) => {
|
41 |
const response = await fetch(targetUrl);
|
42 |
+
const status = response.status;
|
43 |
const contentType = response.headers.get('content-type') || '';
|
44 |
let content;
|
45 |
+
|
46 |
+
if (status >= 400) {
|
47 |
+
// If status is 400 or greater, try using puppeteer
|
48 |
+
const browser = await puppeteer.launch();
|
49 |
+
const page = await browser.newPage();
|
50 |
+
await page.goto(targetUrl, { waitUntil: 'networkidle0' }); // waits for the network to be idle before considering the navigation to be finished.
|
51 |
+
content = await page.evaluate(() => document.body.innerText);
|
52 |
+
await browser.close();
|
53 |
+
return content;
|
54 |
+
} else if (contentType.includes('application/pdf')) {
|
55 |
const buffer = await response.arrayBuffer();
|
56 |
content = await extractTextFromPDF(buffer as any);
|
57 |
} else if (contentType.includes('text/html')) {
|
|
|
60 |
const scripts = dom.window.document.querySelectorAll('script, style');
|
61 |
scripts.forEach(element => element.remove());
|
62 |
content = dom.window.document.body.textContent || '';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
} else {
|
64 |
content = await response.text();
|
65 |
}
|
66 |
+
return content.trim();
|
67 |
}
|
68 |
|
69 |
+
|
70 |
const surferEmbedApi = async ({ input }: any) => {
|
71 |
const urls = input.match(urlRegex);
|
72 |
const targetUrl = urls ? urls[0] : null;
|