Spaces:
Running
Running
import fetch from 'node-fetch'; | |
import FormData from 'form-data'; | |
import fs from 'fs'; | |
import { pipeline } from 'stream/promises'; | |
import { dirname, join } from 'path'; | |
import { fileURLToPath } from 'url'; | |
// If you're using ESM, you might need to resolve __dirname | |
const __filename = fileURLToPath(import.meta.url); | |
const __dirname = dirname(__filename); | |
class PDFExtractor { | |
constructor(url = 'https://3martini-solar-eyes-dockerized.hf.space') { | |
this.bearerToken = process.env.HF_TOKEN; // Load from environment variable | |
this.url = url; | |
} | |
async extract_panel_image(pdfFilePath, outputFile) { | |
const form = new FormData(); | |
form.append('uploadFile', fs.createReadStream(pdfFilePath)); | |
const options = { | |
method: 'POST', | |
body: form, | |
headers: { | |
'Authorization': `Bearer ${this.bearerToken}`, | |
}, | |
}; | |
try { | |
const response = await fetch(this.url + "/uploadPdf", options); | |
if (!response.ok) { | |
throw new Error(`HTTP error! status: ${response.status}`); | |
} | |
// Assuming the response is a stream of the image | |
const dest = fs.createWriteStream(outputFile); | |
await pipeline(response.body, dest); | |
console.log(`Image saved as ${outputFile}`); | |
} catch (error) { | |
console.error('Error:', error); | |
} | |
} | |
async extract_panel_desc(pdfFilePath) { | |
const form = new FormData(); | |
form.append('uploadFile', fs.createReadStream(pdfFilePath)); | |
const options = { | |
method: 'POST', | |
body: form, | |
headers: { | |
'Authorization': `Bearer ${this.bearerToken}`, | |
}, | |
}; | |
try { | |
const response = await fetch(this.url + "/parsePdf", options); | |
if (!response.ok) { | |
throw new Error(`HTTP error! status: ${response.status}`); | |
} | |
const as_json = await response.json(); | |
return as_json; | |
} catch (error) { | |
console.error('Error:', error); | |
} | |
} | |
} | |
// Usage example | |
// const extractor = new PDFExtractor('http://localhost:7860'); | |
const extractor = new PDFExtractor(); | |
const pdfPath = join(__dirname, '../pdf_downloads/0a0824f0-a5e7-4643-a834-0cea06f36c49.pdf'); | |
const outputPath = join(__dirname, 'outputImage.jpg'); | |
extractor.extract_panel_image(pdfPath, outputPath); | |
console.log(await extractor.extract_panel_desc(pdfPath)); |