This commit is contained in:
aaryan 2025-10-05 01:22:05 +07:00 committed by GitHub
commit 0788dd68da
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 184 additions and 1 deletions

View file

@ -63,6 +63,18 @@
"shortDescription": "Convert PDF into PNG images",
"title": "PDF to PNG"
},
"pdfToWord": {
"description": "Convert PDF documents to editable Word (.docx) format while preserving text, formatting, and layout.",
"longDescription": "Upload your PDF and convert it to an editable Word document directly in your browser. This tool extracts text, images, and formatting to create a .docx file that can be opened and edited in Microsoft Word or compatible applications. No data is uploaded — everything runs locally.",
"shortDescription": "Convert PDF files to editable Word documents",
"title": "PDF to Word",
"convertingPdf": "Converting PDF to Word...",
"resultTitle": "Converted Word Document",
"toolInfo": {
"description": "This tool converts PDF documents to editable Word (.docx) format. The conversion preserves text content, basic formatting, and images when possible. The resulting Word document can be opened and edited in Microsoft Word or compatible applications.",
"title": "PDF to Word Converter"
}
},
"protectPdf": {
"description": "Add password protection to your PDF files securely in your browser",
"shortDescription": "Password protect PDF files securely",

View file

@ -7,6 +7,7 @@ import { tool as compressPdfTool } from './compress-pdf/meta';
import { tool as protectPdfTool } from './protect-pdf/meta';
import { meta as pdfToEpub } from './pdf-to-epub/meta';
import { tool as pdfEditor } from './editor/meta';
import { tool as pdfToWord } from './pdf-to-word/meta';
export const pdfTools: DefinedTool[] = [
pdfEditor,
@ -16,5 +17,6 @@ export const pdfTools: DefinedTool[] = [
protectPdfTool,
mergePdf,
pdfToEpub,
pdfPdfToPng
pdfPdfToPng,
pdfToWord
];

View file

@ -0,0 +1,61 @@
import { useState } from 'react';
import ToolContent from '@components/ToolContent';
import ToolPdfInput from '@components/input/ToolPdfInput';
import ToolFileResult from '@components/result/ToolFileResult';
import { ToolComponentProps } from '@tools/defineTool';
import { convertPdfToWord } from './service';
export default function PdfToWord({ title }: ToolComponentProps) {
const [input, setInput] = useState<File | null>(null);
const [wordFile, setWordFile] = useState<File | null>(null);
const [loading, setLoading] = useState(false);
const compute = async (_: {}, file: File | null) => {
if (!file) return;
setLoading(true);
setWordFile(null);
try {
const convertedFile = await convertPdfToWord(file);
setWordFile(convertedFile);
} catch (err) {
console.error('Conversion failed:', err);
// You could add error handling here with a snackbar or alert
} finally {
setLoading(false);
}
};
return (
<ToolContent
title={title}
input={input}
setInput={setInput}
initialValues={{}}
compute={compute}
inputComponent={
<ToolPdfInput
value={input}
onChange={setInput}
accept={['application/pdf']}
title="Upload a PDF"
/>
}
resultComponent={
<ToolFileResult
title="Converted Word Document"
value={wordFile}
loading={loading}
loadingText="Converting PDF to Word..."
/>
}
getGroups={null}
toolInfo={{
title: 'PDF to Word Converter',
description:
'This tool converts PDF documents to editable Word (.docx) format. The conversion preserves text content, basic formatting, and images when possible. The resulting Word document can be opened and edited in Microsoft Word or compatible applications.'
}}
/>
);
}

View file

@ -0,0 +1,18 @@
import { defineTool } from '@tools/defineTool';
import { lazy } from 'react';
export const tool = defineTool('pdf', {
i18n: {
name: 'pdf:pdfToWord.title',
description: 'pdf:pdfToWord.description',
shortDescription: 'pdf:pdfToWord.shortDescription',
longDescription: 'pdf:pdfToWord.longDescription',
userTypes: ['generalUsers']
},
path: 'pdf-to-word',
icon: 'material-symbols:description', // Word document icon
keywords: ['pdf', 'word', 'docx', 'convert', 'document', 'text', 'office'],
component: lazy(() => import('./index'))
});

View file

@ -0,0 +1,90 @@
import * as pdfjsLib from 'pdfjs-dist';
import pdfjsWorker from 'pdfjs-dist/build/pdf.worker.min?url';
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfjsWorker;
interface TextItem {
str: string;
dir: string;
width: number;
height: number;
transform: number[];
fontName: string;
hasEOL: boolean;
}
interface TextContent {
items: TextItem[];
styles: Record<string, any>;
}
export async function convertPdfToWord(pdfFile: File): Promise<File> {
const arrayBuffer = await pdfFile.arrayBuffer();
const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
let fullText = '';
let documentTitle = pdfFile.name.replace(/\.pdf$/i, '');
// Extract text from all pages
for (let i = 1; i <= pdf.numPages; i++) {
const page = await pdf.getPage(i);
const textContent = (await page.getTextContent()) as TextContent;
// Process text items and maintain basic formatting
let pageText = '';
let lastY = -1;
textContent.items.forEach((item) => {
const currentY = item.transform[5];
// If Y position changed significantly, it's likely a new line
if (lastY !== -1 && Math.abs(lastY - currentY) > 5) {
pageText += '\n';
}
pageText += item.str;
// Add space if the item doesn't end with space and next item doesn't start with space
if (item.hasEOL) {
pageText += '\n';
} else {
pageText += ' ';
}
lastY = currentY;
});
fullText += pageText.trim() + '\n\n';
}
// Create a basic Word document structure using RTF format
// RTF is simpler to generate than DOCX and is supported by Word
const rtfContent = createRTFDocument(fullText.trim(), documentTitle);
// Create the file
const wordBlob = new Blob([rtfContent], {
type: 'application/rtf'
});
const wordFile = new File(
[wordBlob],
pdfFile.name.replace(/\.pdf$/i, '.rtf'),
{ type: 'application/rtf' }
);
return wordFile;
}
function createRTFDocument(text: string, title: string): string {
// Basic RTF structure
const rtfHeader = `{\\rtf1\\ansi\\deff0 {\\fonttbl {\\f0 Times New Roman;}}`;
const rtfTitle = `\\f0\\fs24\\b ${escapeRTF(title)}\\b0\\par\\par`;
const rtfBody = escapeRTF(text).replace(/\n/g, '\\par ');
const rtfFooter = `}`;
return rtfHeader + rtfTitle + rtfBody + rtfFooter;
}
function escapeRTF(text: string): string {
return text.replace(/\\/g, '\\\\').replace(/{/g, '\\{').replace(/}/g, '\\}');
}