Merge branch 'arc53:main' into Srayash/Languages

This commit is contained in:
Srayash Singh
2024-12-11 16:46:40 +05:30
committed by GitHub
12 changed files with 283 additions and 201 deletions

View File

@@ -1,4 +1,4 @@
anthropic==0.34.2
anthropic==0.40.0
boto3==1.34.153
beautifulsoup4==4.12.3
celery==5.3.6
@@ -43,7 +43,7 @@ multidict==6.1.0
mypy-extensions==1.0.0
networkx==3.3
numpy==1.26.4
openai==1.46.1
openai==1.55.3
openapi-schema-validator==0.6.2
openapi-spec-validator==0.6.0
openapi3-parser==1.1.18

View File

@@ -1,12 +1,11 @@
import React from 'react'
import styled, { keyframes, createGlobalStyle, ThemeProvider } from 'styled-components';
import styled, { ThemeProvider } from 'styled-components';
import { WidgetCore } from './DocsGPTWidget';
import { SearchBarProps } from '@/types';
import { getSearchResults } from '../requests/searchAPI'
import { Result } from '@/types';
import MarkdownIt from 'markdown-it';
import DOMPurify from 'dompurify';
import { getOS } from '../utils/helper'
import { getOS, preprocessSearchResultsToHTML } from '../utils/helper'
const themes = {
dark: {
bg: '#000',
@@ -116,7 +115,7 @@ const ResultWrapper = styled.div`
const Markdown = styled.div`
line-height:20px;
font-size: 12px;
word-break: break-all;
white-space: pre-wrap;
pre {
padding: 8px;
width: 90%;
@@ -147,17 +146,18 @@ word-break: break-all;
code:not(pre code) {
border-radius: 6px;
padding: 4px 4px;
font-size: 12px;
display: inline-block;
padding: 2px 2px;
margin: 2px;
font-size: 10px;
display: inline;
background-color: #646464;
color: #fff ;
}
img{
max-width: 50%;
}
code {
white-space: pre-wrap ;
overflow-wrap: break-word;
word-break: break-all;
overflow-x: auto;
}
a{
color: #007ee6;
@@ -291,6 +291,8 @@ export const SearchBar = ({
}, 500);
return () => {
console.log(results);
abortController.abort();
clearTimeout(debounceTimeout.current ?? undefined);
};
@@ -341,6 +343,8 @@ export const SearchBar = ({
(results.length > 0 ?
results.map((res, key) => {
const containsSource = res.source !== 'local';
const filteredResults = preprocessSearchResultsToHTML(res.text,input)
if (filteredResults)
return (
<ResultWrapper
key={key}
@@ -352,11 +356,14 @@ export const SearchBar = ({
<Title>{res.title}</Title>
<Content>
<Markdown
dangerouslySetInnerHTML={{ __html: DOMPurify.sanitize(md.render((res.text).substring(0, 256) + "...")) }}
dangerouslySetInnerHTML={{ __html: filteredResults }}
/>
</Content>
</ResultWrapper>
)
else {
setResults((prevItems) => prevItems.filter((_, index) => index !== key));
}
})
:
<NoResults>No results</NoResults>

View File

@@ -1,3 +1,5 @@
import MarkdownIt from "markdown-it";
import DOMPurify from "dompurify";
export const getOS = () => {
const platform = window.navigator.platform;
const userAgent = window.navigator.userAgent || window.navigator.vendor;
@@ -23,5 +25,63 @@ export const getOS = () => {
}
return 'other';
};
};
export const preprocessSearchResultsToHTML = (text: string, keyword: string) => {
const md = new MarkdownIt();
const htmlString = md.render(text);
// Container for processed HTML
const filteredResults = document.createElement("div");
filteredResults.innerHTML = htmlString;
if (!processNode(filteredResults, keyword.trim())) return null;
return filteredResults.innerHTML.trim() ? filteredResults.outerHTML : null;
};
// Recursive function to process nodes
const processNode = (node: Node, keyword: string): boolean => {
const keywordRegex = new RegExp(`(${keyword})`, "gi");
if (node.nodeType === Node.TEXT_NODE) {
const textContent = node.textContent || "";
if (textContent.toLowerCase().includes(keyword.toLowerCase())) {
const highlightedHTML = textContent.replace(
keywordRegex,
`<mark>$1</mark>`
);
const tempContainer = document.createElement("div");
tempContainer.innerHTML = highlightedHTML;
// Replace the text node with highlighted content
while (tempContainer.firstChild) {
node.parentNode?.insertBefore(tempContainer.firstChild, node);
}
node.parentNode?.removeChild(node);
return true;
}
return false;
} else if (node.nodeType === Node.ELEMENT_NODE) {
const children = Array.from(node.childNodes);
let hasKeyword = false;
children.forEach((child) => {
if (!processNode(child, keyword)) {
node.removeChild(child);
} else {
hasKeyword = true;
}
});
return hasKeyword;
}
return false;
};

View File

@@ -21,7 +21,7 @@
/* Linting */
"strict": true,
"noUnusedLocals": false,
"noUnusedParameters": true,
"noUnusedParameters": false,
"noFallthroughCasesInSwitch": true,
/* The "typeRoots" configuration specifies the locations where
TypeScript looks for type definitions (.d.ts files) to

View File

@@ -5,7 +5,7 @@
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0,viewport-fit=cover" />
<meta name="apple-mobile-web-app-capable" content="yes">
<title>DocsGPT 🦖</title>
<title>DocsGPT</title>
<link rel="shortcut icon" type="image/x-icon" href="/favicon.ico" />
</head>

View File

@@ -17,6 +17,7 @@
"react-chartjs-2": "^5.2.0",
"react-copy-to-clipboard": "^5.1.0",
"react-dom": "^18.3.1",
"react-helmet": "^6.1.0",
"react-dropzone": "^14.3.5",
"react-i18next": "^15.0.2",
"react-markdown": "^9.0.1",
@@ -30,6 +31,7 @@
"devDependencies": {
"@types/react": "^18.0.27",
"@types/react-dom": "^18.3.0",
"@types/react-helmet": "^6.1.11",
"@types/react-syntax-highlighter": "^15.5.13",
"@typescript-eslint/eslint-plugin": "^5.51.0",
"@typescript-eslint/parser": "^5.62.0",
@@ -1675,7 +1677,17 @@
"version": "18.3.0",
"resolved": "https://registry.npmjs.org/@types/react-dom/-/react-dom-18.3.0.tgz",
"integrity": "sha512-EhwApuTmMBmXuFOikhQLIBUn6uFg81SwLMOAUgodJF14SOBOCMdU04gDoYi0WOJJHD144TL32z4yDqCW3dnkQg==",
"devOptional": true,
"dev": true,
"dependencies": {
"@types/react": "*"
}
},
"node_modules/@types/react-helmet": {
"version": "6.1.11",
"resolved": "https://registry.npmjs.org/@types/react-helmet/-/react-helmet-6.1.11.tgz",
"integrity": "sha512-0QcdGLddTERotCXo3VFlUSWO3ztraw8nZ6e3zJSgG7apwV5xt+pJUS8ewPBqT4NYB1optGLprNQzFleIY84u/g==",
"dev": true,
"license": "MIT",
"dependencies": {
"@types/react": "*"
}
@@ -2848,10 +2860,11 @@
}
},
"node_modules/cross-spawn": {
"version": "7.0.3",
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
"integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
"version": "7.0.6",
"resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
"integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
"dev": true,
"license": "MIT",
"dependencies": {
"path-key": "^3.1.0",
"shebang-command": "^2.0.0",
@@ -7844,6 +7857,27 @@
"react": ">= 16.8 || 18.0.0"
}
},
"node_modules/react-fast-compare": {
"version": "3.2.2",
"resolved": "https://registry.npmjs.org/react-fast-compare/-/react-fast-compare-3.2.2.tgz",
"integrity": "sha512-nsO+KSNgo1SbJqJEYRE9ERzo7YtYbou/OqjSQKxV7jcKox7+usiUVZOAC+XnDOABXggQTno0Y1CpVnuWEc1boQ==",
"license": "MIT"
},
"node_modules/react-helmet": {
"version": "6.1.0",
"resolved": "https://registry.npmjs.org/react-helmet/-/react-helmet-6.1.0.tgz",
"integrity": "sha512-4uMzEY9nlDlgxr61NL3XbKRy1hEkXmKNXhjbAIOVw5vcFrsdYbH2FEwcNyWvWinl103nXgzYNlns9ca+8kFiWw==",
"license": "MIT",
"dependencies": {
"object-assign": "^4.1.1",
"prop-types": "^15.7.2",
"react-fast-compare": "^3.1.1",
"react-side-effect": "^2.1.0"
},
"peerDependencies": {
"react": ">=16.3.0"
}
},
"node_modules/react-i18next": {
"version": "15.0.2",
"resolved": "https://registry.npmjs.org/react-i18next/-/react-i18next-15.0.2.tgz",
@@ -7985,6 +8019,15 @@
"react-dom": ">=16.8"
}
},
"node_modules/react-side-effect": {
"version": "2.1.2",
"resolved": "https://registry.npmjs.org/react-side-effect/-/react-side-effect-2.1.2.tgz",
"integrity": "sha512-PVjOcvVOyIILrYoyGEpDN3vmYNLdy1CajSFNt4TDsVQC5KpTijDvWVoR+/7Rz2xT978D8/ZtFceXxzsPwZEDvw==",
"license": "MIT",
"peerDependencies": {
"react": "^16.3.0 || ^17.0.0 || ^18.0.0"
}
},
"node_modules/react-syntax-highlighter": {
"version": "15.5.0",
"resolved": "https://registry.npmjs.org/react-syntax-highlighter/-/react-syntax-highlighter-15.5.0.tgz",

View File

@@ -28,6 +28,7 @@
"react-chartjs-2": "^5.2.0",
"react-copy-to-clipboard": "^5.1.0",
"react-dom": "^18.3.1",
"react-helmet": "^6.1.0",
"react-dropzone": "^14.3.5",
"react-i18next": "^15.0.2",
"react-markdown": "^9.0.1",
@@ -41,6 +42,7 @@
"devDependencies": {
"@types/react": "^18.0.27",
"@types/react-dom": "^18.3.0",
"@types/react-helmet": "^6.1.11",
"@types/react-syntax-highlighter": "^15.5.13",
"@typescript-eslint/eslint-plugin": "^5.51.0",
"@typescript-eslint/parser": "^5.62.0",

Binary file not shown.

After

Width:  |  Height:  |  Size: 735 B

View File

@@ -5,7 +5,7 @@ export default function Avatar({
size,
className,
}: {
avatar: string | ReactNode;
avatar: ReactNode;
size?: 'SMALL' | 'MEDIUM' | 'LARGE';
className: string;
}) {

View File

@@ -16,6 +16,7 @@ import Like from '../assets/like.svg?react';
import Link from '../assets/link.svg';
import Sources from '../assets/sources.svg';
import Edit from '../assets/edit.svg';
import UserIcon from '../assets/user.png';
import Avatar from '../components/Avatar';
import CopyButton from '../components/CopyButton';
import Sidebar from '../components/Sidebar';
@@ -90,9 +91,15 @@ const ConversationBubble = forwardRef<
>
<div
ref={ref}
className={`flex flex-row-reverse self-end flex-wrap items-baseline ${className}`}
className={`flex flex-row-reverse self-end flex-wrap ${className}`}
>
<Avatar className="mt-2 text-2xl" avatar="🧑‍💻"></Avatar>
<Avatar
size="SMALL"
className="mt-2 text-2xl"
avatar={
<img className="rounded-full mr-1" width={30} src={UserIcon} />
}
/>
{!isEditClicked && (
<div
style={{
@@ -137,7 +144,7 @@ const ConversationBubble = forwardRef<
setIsEditClicked(true);
setEditInputBox(message);
}}
className={`p-2 cursor-pointer rounded-full hover:bg-[#35363B] flex items-center ${isQuestionHovered || isEditClicked ? 'visible' : 'invisible'}`}
className={`h-fit mt-3 p-2 cursor-pointer rounded-full hover:bg-[#35363B] flex items-center ${isQuestionHovered || isEditClicked ? 'visible' : 'invisible'}`}
>
<img src={Edit} alt="Edit" className="cursor-pointer" />
</button>

View File

@@ -26,6 +26,7 @@ import {
selectQueries,
} from './sharedConversationSlice';
import { useSelector } from 'react-redux';
import { Helmet } from 'react-helmet';
export const SharedConversation = () => {
const navigate = useNavigate();
@@ -176,6 +177,22 @@ export const SharedConversation = () => {
}, []);
return (
<>
<Helmet>
<title>{`DocsGPT | ${title}`}</title>
<meta name="description" content="Shared conversations with DocsGPT" />
<meta property="og:title" content={title} />
<meta
property="og:description"
content="Shared conversations with DocsGPT"
/>
<meta name="twitter:card" content="summary_large_image" />
<meta name="twitter:title" content={title} />
<meta
name="twitter:description"
content="Shared conversations with DocsGPT"
/>
</Helmet>
<div className="flex h-full flex-col items-center justify-between gap-2 overflow-y-hidden dark:bg-raisin-black">
<div
ref={sharedConversationRef}
@@ -265,5 +282,6 @@ export const SharedConversation = () => {
</span>
</div>
</div>
</>
);
};

View File

@@ -1,55 +0,0 @@
# LLM Document Analysis by [LexEU](https://www.lexeu.ai/) Competition
## 🏆 Competition Details:
Welcome to the LLM Document Analysis by [LexEU](https://www.lexeu.ai/) competition, part of Hacktoberfest! This challenge is designed for participants who can devise the best new retrieval or workflow method to analyze a document using EU laws.
### 🏅 Prizes:
- **1st Place:** $200 + Special Holopin
- **2nd Place:** $100 + Special Holopin
- **3rd Place:** $50 + Special Holopin
- **Top 3 Winners:** Special Holopin
### 📆 Timeline:
- **Competition Announcement:** 1st October
- **Deadline for Submissions:** 8th November
- **Results Announcement:** Early November
## 📜 How to Participate:
Participants are required to analyze a given test contract by scraping EU law data, storing it in a database, and retrieving only the relevant portions for analysis. The solution must be optimized for efficiency, using a maximum of 500k tokens.
### Steps to Participate:
1. **Download Test Contract:** You can download it via this [link](https://docs.google.com/document/d/198d7gFJbVWttkIS9ZRUs_PTKIjhsOUeR/edit?usp=sharing&ouid=107667025862106683614&rtpof=true&sd=true).
2. **Ingest EU Law Data:** Gather and store data in any format, it's available [here](https://eur-lex.europa.eu/browse/directories/legislation.html?displayProfile=lastConsDocProfile&classification=in-force).
3. **Optimized Data Retrieval:** Implement methods to retrieve only small, relevant portions of the law data for efficient analysis of the test contract. Try to create a custom retriever and a parser.
4. **Analyze the Contract:** Use your optimized retrieval method to analyze the test contract against the EU law data.
5. **Submission Criteria:** Your solution will be judged based on:
- Amount of corrections/inconsistencies found
- Number of tokens used (Maximum 500k tokens)
- Your submission should be a fork of DocsGPT where all the ingestion and analysis steps can be replicated
### Submission Instructions:
1. **Submit Your Work:** Once you finish your analysis, submit your solution by filling out this [form](https://airtable.com/appikMaJwdHhC1SDP/pagLWdew2HKpEaBKr/form).
2. **Private Test Contract:** Your solution will also be benchmarked against a private test contract to validate its efficiency and effectiveness.
3. **Evaluation:** The winners will be evaluated based on the effectiveness of their solution in identifying corrections/inconsistencies and the number of tokens used in the process.
### Resources:
- **Documentation:** Refer to our [Documentation](https://docs.docsgpt.cloud/) for guidance.
- **Discord Support:** Join our [Discord](https://discord.gg/n5BX8dh8rU) server for support and discussions related to the competition.
- Try looking at existing [retrievers](https://github.com/arc53/DocsGPT/tree/main/application/retriever) and maybe creating a custom one
- Try looking at [worker.py](https://github.com/arc53/DocsGPT/blob/main/application/worker.py) which ingests data and creating a custom one for ingesting EU law
## 👥 Community and Support:
If you need assistance, feel free to join our [Discord](https://discord.gg/n5BX8dh8rU) server. We're here to help newcomers, so don't hesitate to jump in and ask questions!
## 📢 Announcement:
Stay tuned for updates, and good luck to all participants!
Thank you for participating in the LLM Document Analysis by LexEU competition. Your innovative solutions could not only win you prizes but also contribute significantly to the DocsGPT community. Happy coding! 🚀
---