3martini's picture
Upload folder using huggingface_hub
fd6f0a4 verified
import fetch from 'node-fetch';
import FormData from 'form-data';
import fs from 'fs';
import { pipeline } from 'stream/promises';
import { dirname, join } from 'path';
import { fileURLToPath } from 'url';
// If you're using ESM, you might need to resolve __dirname
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
class PDFExtractor {
constructor(url = 'https://3martini-solar-eyes-dockerized.hf.space') {
this.bearerToken = process.env.HF_TOKEN; // Load from environment variable
this.url = url;
}
async extract_panel_image(pdfFilePath, outputFile) {
const form = new FormData();
form.append('uploadFile', fs.createReadStream(pdfFilePath));
const options = {
method: 'POST',
body: form,
headers: {
'Authorization': `Bearer ${this.bearerToken}`,
},
};
try {
const response = await fetch(this.url + "/uploadPdf", options);
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
// Assuming the response is a stream of the image
const dest = fs.createWriteStream(outputFile);
await pipeline(response.body, dest);
console.log(`Image saved as ${outputFile}`);
} catch (error) {
console.error('Error:', error);
}
}
async extract_panel_desc(pdfFilePath) {
const form = new FormData();
form.append('uploadFile', fs.createReadStream(pdfFilePath));
const options = {
method: 'POST',
body: form,
headers: {
'Authorization': `Bearer ${this.bearerToken}`,
},
};
try {
const response = await fetch(this.url + "/parsePdf", options);
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
const as_json = await response.json();
return as_json;
} catch (error) {
console.error('Error:', error);
}
}
}
// Usage example
// const extractor = new PDFExtractor('http://localhost:7860');
const extractor = new PDFExtractor();
const pdfPath = join(__dirname, '../pdf_downloads/0a0824f0-a5e7-4643-a834-0cea06f36c49.pdf');
const outputPath = join(__dirname, 'outputImage.jpg');
extractor.extract_panel_image(pdfPath, outputPath);
console.log(await extractor.extract_panel_desc(pdfPath));