Merge branch 'arc53:main' into Srayash/Languages

This commit is contained in:
Srayash Singh
2024-12-11 16:46:40 +05:30
committed by GitHub
12 changed files with 283 additions and 201 deletions

View File

@@ -1,4 +1,4 @@
anthropic==0.34.2
anthropic==0.40.0
boto3==1.34.153
beautifulsoup4==4.12.3
celery==5.3.6
@@ -43,7 +43,7 @@ multidict==6.1.0
mypy-extensions==1.0.0
networkx==3.3
numpy==1.26.4
openai==1.46.1
openai==1.55.3
openapi-schema-validator==0.6.2
openapi-spec-validator==0.6.0
openapi3-parser==1.1.18

View File

@@ -1,12 +1,11 @@
import React from 'react'
import styled, { keyframes, createGlobalStyle, ThemeProvider } from 'styled-components';
import styled, { ThemeProvider } from 'styled-components';
import { WidgetCore } from './DocsGPTWidget';
import { SearchBarProps } from '@/types';
import { getSearchResults } from '../requests/searchAPI'
import { Result } from '@/types';
import MarkdownIt from 'markdown-it';
import DOMPurify from 'dompurify';
import { getOS } from '../utils/helper'
import { getOS, preprocessSearchResultsToHTML } from '../utils/helper'
const themes = {
dark: {
bg: '#000',
@@ -116,7 +115,7 @@ const ResultWrapper = styled.div`
const Markdown = styled.div`
line-height:20px;
font-size: 12px;
word-break: break-all;
white-space: pre-wrap;
pre {
padding: 8px;
width: 90%;
@@ -147,17 +146,18 @@ word-break: break-all;
code:not(pre code) {
border-radius: 6px;
padding: 4px 4px;
font-size: 12px;
display: inline-block;
padding: 2px 2px;
margin: 2px;
font-size: 10px;
display: inline;
background-color: #646464;
color: #fff ;
}
img{
max-width: 50%;
}
code {
white-space: pre-wrap ;
overflow-wrap: break-word;
word-break: break-all;
overflow-x: auto;
}
a{
color: #007ee6;
@@ -291,6 +291,8 @@ export const SearchBar = ({
}, 500);
return () => {
console.log(results);
abortController.abort();
clearTimeout(debounceTimeout.current ?? undefined);
};
@@ -341,22 +343,27 @@ export const SearchBar = ({
(results.length > 0 ?
results.map((res, key) => {
const containsSource = res.source !== 'local';
return (
<ResultWrapper
key={key}
onClick={() => {
if (!containsSource) return;
window.open(res.source, '_blank', 'noopener, noreferrer')
}}
className={containsSource ? "contains-source" : ""}>
<Title>{res.title}</Title>
<Content>
<Markdown
dangerouslySetInnerHTML={{ __html: DOMPurify.sanitize(md.render((res.text).substring(0, 256) + "...")) }}
/>
</Content>
</ResultWrapper>
)
const filteredResults = preprocessSearchResultsToHTML(res.text,input)
if (filteredResults)
return (
<ResultWrapper
key={key}
onClick={() => {
if (!containsSource) return;
window.open(res.source, '_blank', 'noopener, noreferrer')
}}
className={containsSource ? "contains-source" : ""}>
<Title>{res.title}</Title>
<Content>
<Markdown
dangerouslySetInnerHTML={{ __html: filteredResults }}
/>
</Content>
</ResultWrapper>
)
else {
setResults((prevItems) => prevItems.filter((_, index) => index !== key));
}
})
:
<NoResults>No results</NoResults>

View File

@@ -1,27 +1,87 @@
import MarkdownIt from "markdown-it";
import DOMPurify from "dompurify";
export const getOS = () => {
const platform = window.navigator.platform;
const userAgent = window.navigator.userAgent || window.navigator.vendor;
if (/Mac/i.test(platform)) {
return 'mac';
const platform = window.navigator.platform;
const userAgent = window.navigator.userAgent || window.navigator.vendor;
if (/Mac/i.test(platform)) {
return 'mac';
}
if (/Win/i.test(platform)) {
return 'win';
}
if (/Linux/i.test(platform) && !/Android/i.test(userAgent)) {
return 'linux';
}
if (/Android/i.test(userAgent)) {
return 'android';
}
if (/iPhone|iPad|iPod/i.test(userAgent)) {
return 'ios';
}
return 'other';
};
export const preprocessSearchResultsToHTML = (text: string, keyword: string) => {
const md = new MarkdownIt();
const htmlString = md.render(text);
// Container for processed HTML
const filteredResults = document.createElement("div");
filteredResults.innerHTML = htmlString;
if (!processNode(filteredResults, keyword.trim())) return null;
return filteredResults.innerHTML.trim() ? filteredResults.outerHTML : null;
};
// Recursive function to process nodes
const processNode = (node: Node, keyword: string): boolean => {
const keywordRegex = new RegExp(`(${keyword})`, "gi");
if (node.nodeType === Node.TEXT_NODE) {
const textContent = node.textContent || "";
if (textContent.toLowerCase().includes(keyword.toLowerCase())) {
const highlightedHTML = textContent.replace(
keywordRegex,
`<mark>$1</mark>`
);
const tempContainer = document.createElement("div");
tempContainer.innerHTML = highlightedHTML;
// Replace the text node with highlighted content
while (tempContainer.firstChild) {
node.parentNode?.insertBefore(tempContainer.firstChild, node);
}
node.parentNode?.removeChild(node);
return true;
}
if (/Win/i.test(platform)) {
return 'win';
}
if (/Linux/i.test(platform) && !/Android/i.test(userAgent)) {
return 'linux';
}
if (/Android/i.test(userAgent)) {
return 'android';
}
if (/iPhone|iPad|iPod/i.test(userAgent)) {
return 'ios';
}
return 'other';
};
return false;
} else if (node.nodeType === Node.ELEMENT_NODE) {
const children = Array.from(node.childNodes);
let hasKeyword = false;
children.forEach((child) => {
if (!processNode(child, keyword)) {
node.removeChild(child);
} else {
hasKeyword = true;
}
});
return hasKeyword;
}
return false;
};

View File

@@ -21,7 +21,7 @@
/* Linting */
"strict": true,
"noUnusedLocals": false,
"noUnusedParameters": true,
"noUnusedParameters": false,
"noFallthroughCasesInSwitch": true,
/* The "typeRoots" configuration specifies the locations where
TypeScript looks for type definitions (.d.ts files) to

View File

@@ -5,7 +5,7 @@
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0,viewport-fit=cover" />
<meta name="apple-mobile-web-app-capable" content="yes">
<title>DocsGPT 🦖</title>
<title>DocsGPT</title>
<link rel="shortcut icon" type="image/x-icon" href="/favicon.ico" />
</head>

View File

@@ -17,6 +17,7 @@
"react-chartjs-2": "^5.2.0",
"react-copy-to-clipboard": "^5.1.0",
"react-dom": "^18.3.1",
"react-helmet": "^6.1.0",
"react-dropzone": "^14.3.5",
"react-i18next": "^15.0.2",
"react-markdown": "^9.0.1",
@@ -30,6 +31,7 @@
"devDependencies": {
"@types/react": "^18.0.27",
"@types/react-dom": "^18.3.0",
"@types/react-helmet": "^6.1.11",
"@types/react-syntax-highlighter": "^15.5.13",
"@typescript-eslint/eslint-plugin": "^5.51.0",
"@typescript-eslint/parser": "^5.62.0",
@@ -1675,7 +1677,17 @@
"version": "18.3.0",
"resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-18.3.0.tgz",
"integrity": "sha512-EhwApuTmMBmXuFOikhQLIBUn6uFg81SwLMOAUgodJF14SOBOCMdU04gDoYi0WOJJHD144TL32z4yDqCW3dnkQg==",
"devOptional": true,
"dev": true,
"dependencies": {
"@types/react": "*"
}
},
"node_modules/@types/react-helmet": {
"version": "6.1.11",
"resolved": "https://registry.npmjs.org/@types/react-helmet/-/react-helmet-6.1.11.tgz",
"integrity": "sha512-0QcdGLddTERotCXo3VFlUSWO3ztraw8nZ6e3zJSgG7apwV5xt+pJUS8ewPBqT4NYB1optGLprNQzFleIY84u/g==",
"dev": true,
"license": "MIT",
"dependencies": {
"@types/react": "*"
}
@@ -2848,10 +2860,11 @@
}
},
"node_modules/cross-spawn": {
"version": "7.0.3",
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
"integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
"version": "7.0.6",
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
"integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
"dev": true,
"license": "MIT",
"dependencies": {
"path-key": "^3.1.0",
"shebang-command": "^2.0.0",
@@ -7844,6 +7857,27 @@
"react": ">= 16.8 || 18.0.0"
}
},
"node_modules/react-fast-compare": {
"version": "3.2.2",
"resolved": "https://registry.npmjs.org/react-fast-compare/-/react-fast-compare-3.2.2.tgz",
"integrity": "sha512-nsO+KSNgo1SbJqJEYRE9ERzo7YtYbou/OqjSQKxV7jcKox7+usiUVZOAC+XnDOABXggQTno0Y1CpVnuWEc1boQ==",
"license": "MIT"
},
"node_modules/react-helmet": {
"version": "6.1.0",
"resolved": "https://registry.npmjs.org/react-helmet/-/react-helmet-6.1.0.tgz",
"integrity": "sha512-4uMzEY9nlDlgxr61NL3XbKRy1hEkXmKNXhjbAIOVw5vcFrsdYbH2FEwcNyWvWinl103nXgzYNlns9ca+8kFiWw==",
"license": "MIT",
"dependencies": {
"object-assign": "^4.1.1",
"prop-types": "^15.7.2",
"react-fast-compare": "^3.1.1",
"react-side-effect": "^2.1.0"
},
"peerDependencies": {
"react": ">=16.3.0"
}
},
"node_modules/react-i18next": {
"version": "15.0.2",
"resolved": "https://registry.npmjs.org/react-i18next/-/react-i18next-15.0.2.tgz",
@@ -7985,6 +8019,15 @@
"react-dom": ">=16.8"
}
},
"node_modules/react-side-effect": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/react-side-effect/-/react-side-effect-2.1.2.tgz",
"integrity": "sha512-PVjOcvVOyIILrYoyGEpDN3vmYNLdy1CajSFNt4TDsVQC5KpTijDvWVoR+/7Rz2xT978D8/ZtFceXxzsPwZEDvw==",
"license": "MIT",
"peerDependencies": {
"react": "^16.3.0 || ^17.0.0 || ^18.0.0"
}
},
"node_modules/react-syntax-highlighter": {
"version": "15.5.0",
"resolved": "https://registry.npmjs.org/react-syntax-highlighter/-/react-syntax-highlighter-15.5.0.tgz",

View File

@@ -28,6 +28,7 @@
"react-chartjs-2": "^5.2.0",
"react-copy-to-clipboard": "^5.1.0",
"react-dom": "^18.3.1",
"react-helmet": "^6.1.0",
"react-dropzone": "^14.3.5",
"react-i18next": "^15.0.2",
"react-markdown": "^9.0.1",
@@ -41,6 +42,7 @@
"devDependencies": {
"@types/react": "^18.0.27",
"@types/react-dom": "^18.3.0",
"@types/react-helmet": "^6.1.11",
"@types/react-syntax-highlighter": "^15.5.13",
"@typescript-eslint/eslint-plugin": "^5.51.0",
"@typescript-eslint/parser": "^5.62.0",

Binary file not shown.

After

Width:  |  Height:  |  Size: 735 B

View File

@@ -5,7 +5,7 @@ export default function Avatar({
size,
className,
}: {
avatar: string | ReactNode;
avatar: ReactNode;
size?: 'SMALL' | 'MEDIUM' | 'LARGE';
className: string;
}) {

View File

@@ -16,6 +16,7 @@ import Like from '../assets/like.svg?react';
import Link from '../assets/link.svg';
import Sources from '../assets/sources.svg';
import Edit from '../assets/edit.svg';
import UserIcon from '../assets/user.png';
import Avatar from '../components/Avatar';
import CopyButton from '../components/CopyButton';
import Sidebar from '../components/Sidebar';
@@ -90,9 +91,15 @@ const ConversationBubble = forwardRef<
>
<div
ref={ref}
className={`flex flex-row-reverse self-end flex-wrap items-baseline ${className}`}
className={`flex flex-row-reverse self-end flex-wrap ${className}`}
>
<Avatar className="mt-2 text-2xl" avatar="🧑‍💻"></Avatar>
<Avatar
size="SMALL"
className="mt-2 text-2xl"
avatar={
<img className="rounded-full mr-1" width={30} src={UserIcon} />
}
/>
{!isEditClicked && (
<div
style={{
@@ -137,7 +144,7 @@ const ConversationBubble = forwardRef<
setIsEditClicked(true);
setEditInputBox(message);
}}
className={`p-2 cursor-pointer rounded-full hover:bg-[#35363B] flex items-center ${isQuestionHovered || isEditClicked ? 'visible' : 'invisible'}`}
className={`h-fit mt-3 p-2 cursor-pointer rounded-full hover:bg-[#35363B] flex items-center ${isQuestionHovered || isEditClicked ? 'visible' : 'invisible'}`}
>
<img src={Edit} alt="Edit" className="cursor-pointer" />
</button>

View File

@@ -26,6 +26,7 @@ import {
selectQueries,
} from './sharedConversationSlice';
import { useSelector } from 'react-redux';
import { Helmet } from 'react-helmet';
export const SharedConversation = () => {
const navigate = useNavigate();
@@ -176,94 +177,111 @@ export const SharedConversation = () => {
}, []);
return (
<div className="flex h-full flex-col items-center justify-between gap-2 overflow-y-hidden dark:bg-raisin-black">
<div
ref={sharedConversationRef}
onWheel={handleUserInterruption}
onTouchMove={handleUserInterruption}
className="flex w-full justify-center overflow-auto"
>
<div className="mt-0 w-11/12 md:w-10/12 lg:w-6/12">
<div className="mb-2 w-full border-b pb-2 dark:border-b-silver">
<h1 className="font-semi-bold text-4xl text-chinese-black dark:text-chinese-silver">
{title}
</h1>
<h2 className="font-semi-bold text-base text-chinese-black dark:text-chinese-silver">
{t('sharedConv.subtitle')}{' '}
<a href="/" className="text-[#007DFF]">
DocsGPT
</a>
</h2>
<h2 className="font-semi-bold text-base text-chinese-black dark:text-chinese-silver">
{date}
</h2>
</div>
<div className="">
{queries?.map((query, index) => {
return (
<Fragment key={index}>
<ConversationBubble
ref={endMessageRef}
className={'mb-1 last:mb-28 md:mb-7'}
key={`${index}QUESTION`}
message={query.prompt}
type="QUESTION"
sources={query.sources}
></ConversationBubble>
<>
<Helmet>
<title>{`DocsGPT | ${title}`}</title>
<meta name="description" content="Shared conversations with DocsGPT" />
<meta property="og:title" content={title} />
<meta
property="og:description"
content="Shared conversations with DocsGPT"
/>
<meta name="twitter:card" content="summary_large_image" />
<meta name="twitter:title" content={title} />
<meta
name="twitter:description"
content="Shared conversations with DocsGPT"
/>
</Helmet>
<div className="flex h-full flex-col items-center justify-between gap-2 overflow-y-hidden dark:bg-raisin-black">
<div
ref={sharedConversationRef}
onWheel={handleUserInterruption}
onTouchMove={handleUserInterruption}
className="flex w-full justify-center overflow-auto"
>
<div className="mt-0 w-11/12 md:w-10/12 lg:w-6/12">
<div className="mb-2 w-full border-b pb-2 dark:border-b-silver">
<h1 className="font-semi-bold text-4xl text-chinese-black dark:text-chinese-silver">
{title}
</h1>
<h2 className="font-semi-bold text-base text-chinese-black dark:text-chinese-silver">
{t('sharedConv.subtitle')}{' '}
<a href="/" className="text-[#007DFF]">
DocsGPT
</a>
</h2>
<h2 className="font-semi-bold text-base text-chinese-black dark:text-chinese-silver">
{date}
</h2>
</div>
<div className="">
{queries?.map((query, index) => {
return (
<Fragment key={index}>
<ConversationBubble
ref={endMessageRef}
className={'mb-1 last:mb-28 md:mb-7'}
key={`${index}QUESTION`}
message={query.prompt}
type="QUESTION"
sources={query.sources}
></ConversationBubble>
{prepResponseView(query, index)}
</Fragment>
);
})}
{prepResponseView(query, index)}
</Fragment>
);
})}
</div>
</div>
</div>
</div>
<div className=" flex w-11/12 flex-col items-center gap-4 pb-2 md:w-10/12 lg:w-6/12">
{apiKey ? (
<div className="flex h-full w-full items-center rounded-[40px] border border-silver bg-white py-1 dark:bg-raisin-black">
<div
id="inputbox"
ref={inputRef}
tabIndex={1}
onPaste={handlePaste}
placeholder={t('inputPlaceholder')}
contentEditable
className={`inputbox-style max-h-24 w-full overflow-y-auto overflow-x-hidden whitespace-pre-wrap rounded-full bg-white pt-5 pb-[22px] text-base leading-tight opacity-100 focus:outline-none dark:bg-raisin-black dark:text-bright-gray`}
onKeyDown={(e) => {
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault();
handleQuestionSubmission();
}
}}
></div>
{status === 'loading' ? (
<img
src={Spinner}
className="relative right-[38px] bottom-[24px] -mr-[30px] animate-spin cursor-pointer self-end bg-transparent filter dark:invert"
></img>
) : (
<div className="mx-1 cursor-pointer rounded-full p-3 text-center hover:bg-gray-3000 dark:hover:bg-dark-charcoal">
<div className=" flex w-11/12 flex-col items-center gap-4 pb-2 md:w-10/12 lg:w-6/12">
{apiKey ? (
<div className="flex h-full w-full items-center rounded-[40px] border border-silver bg-white py-1 dark:bg-raisin-black">
<div
id="inputbox"
ref={inputRef}
tabIndex={1}
onPaste={handlePaste}
placeholder={t('inputPlaceholder')}
contentEditable
className={`inputbox-style max-h-24 w-full overflow-y-auto overflow-x-hidden whitespace-pre-wrap rounded-full bg-white pt-5 pb-[22px] text-base leading-tight opacity-100 focus:outline-none dark:bg-raisin-black dark:text-bright-gray`}
onKeyDown={(e) => {
if (e.key === 'Enter' && !e.shiftKey) {
e.preventDefault();
handleQuestionSubmission();
}
}}
></div>
{status === 'loading' ? (
<img
onClick={handleQuestionSubmission}
className="ml-[4px] h-6 w-6 text-white filter dark:invert"
src={Send}
src={Spinner}
className="relative right-[38px] bottom-[24px] -mr-[30px] animate-spin cursor-pointer self-end bg-transparent filter dark:invert"
></img>
</div>
)}
</div>
) : (
<button
onClick={() => navigate('/')}
className="w-fit rounded-full bg-purple-30 p-4 text-white shadow-xl transition-colors duration-200 hover:bg-purple-taupe"
>
{t('sharedConv.button')}
</button>
)}
<span className="mb-2 hidden text-xs text-dark-charcoal dark:text-silver sm:inline">
{t('sharedConv.meta')}
</span>
) : (
<div className="mx-1 cursor-pointer rounded-full p-3 text-center hover:bg-gray-3000 dark:hover:bg-dark-charcoal">
<img
onClick={handleQuestionSubmission}
className="ml-[4px] h-6 w-6 text-white filter dark:invert"
src={Send}
></img>
</div>
)}
</div>
) : (
<button
onClick={() => navigate('/')}
className="w-fit rounded-full bg-purple-30 p-4 text-white shadow-xl transition-colors duration-200 hover:bg-purple-taupe"
>
{t('sharedConv.button')}
</button>
)}
<span className="mb-2 hidden text-xs text-dark-charcoal dark:text-silver sm:inline">
{t('sharedConv.meta')}
</span>
</div>
</div>
</div>
</>
);
};

View File

@@ -1,55 +0,0 @@
# LLM Document Analysis by [LexEU](https://www.lexeu.ai/) Competition
## 🏆 Competition Details:
Welcome to the LLM Document Analysis by [LexEU](https://www.lexeu.ai/) competition, part of Hacktoberfest! This challenge is designed for participants who can devise the best new retrieval or workflow method to analyze a document using EU laws.
### 🏅 Prizes:
- **1st Place:** $200 + Special Holopin
- **2nd Place:** $100 + Special Holopin
- **3rd Place:** $50 + Special Holopin
- **Top 3 Winners:** Special Holopin
### 📆 Timeline:
- **Competition Announcement:** 1st October
- **Deadline for Submissions:** 8th November
- **Results Announcement:** Early November
## 📜 How to Participate:
Participants are required to analyze a given test contract by scraping EU law data, storing it in a database, and retrieving only the relevant portions for analysis. The solution must be optimized for efficiency, using a maximum of 500k tokens.
### Steps to Participate:
1. **Download Test Contract:** You can download it via this [link](https://docs.google.com/document/d/198d7gFJbVWttkIS9ZRUs_PTKIjhsOUeR/edit?usp=sharing&ouid=107667025862106683614&rtpof=true&sd=true).
2. **Ingest EU Law Data:** Gather and store data in any format, it's available [here](https://eur-lex.europa.eu/browse/directories/legislation.html?displayProfile=lastConsDocProfile&classification=in-force).
3. **Optimized Data Retrieval:** Implement methods to retrieve only small, relevant portions of the law data for efficient analysis of the test contract. Try to create a custom retriever and a parser.
4. **Analyze the Contract:** Use your optimized retrieval method to analyze the test contract against the EU law data.
5. **Submission Criteria:** Your solution will be judged based on:
- Amount of corrections/inconsistencies found
- Number of tokens used (Maximum 500k tokens)
- Your submission should be a fork of DocsGPT where all the ingestion and analysis steps can be replicated
### Submission Instructions:
1. **Submit Your Work:** Once you finish your analysis, submit your solution by filling out this [form](https://airtable.com/appikMaJwdHhC1SDP/pagLWdew2HKpEaBKr/form).
2. **Private Test Contract:** Your solution will also be benchmarked against a private test contract to validate its efficiency and effectiveness.
3. **Evaluation:** The winners will be evaluated based on the effectiveness of their solution in identifying corrections/inconsistencies and the number of tokens used in the process.
### Resources:
- **Documentation:** Refer to our [Documentation](https://docs.docsgpt.cloud/) for guidance.
- **Discord Support:** Join our [Discord](https://discord.gg/n5BX8dh8rU) server for support and discussions related to the competition.
- Try looking at existing [retrievers](https://github.com/arc53/DocsGPT/tree/main/application/retriever) and maybe creating a custom one
- Try looking at [worker.py](https://github.com/arc53/DocsGPT/blob/main/application/worker.py) which ingests data and creating a custom one for ingesting EU law
## 👥 Community and Support:
If you need assistance, feel free to join our [Discord](https://discord.gg/n5BX8dh8rU) server. We're here to help newcomers, so don't hesitate to jump in and ask questions!
## 📢 Announcement:
Stay tuned for updates, and good luck to all participants!
Thank you for participating in the LLM Document Analysis by LexEU competition. Your innovative solutions could not only win you prizes but also contribute significantly to the DocsGPT community. Happy coding! 🚀
---