3martini's picture
Upload folder using huggingface_hub
fd6f0a4 verified
const fs = require('fs');
const { pipeline } = require('stream');
const util = require('util');
const FormData = require('form-data');
// Convert pipeline to Promise for easier async/await usage
const pipelineAsync = util.promisify(pipeline);
class PDFExtractor {
constructor(url = 'https://3martini-solar-eyes-dockerized.hf.space') {
this.bearerToken = process.env.HF_TOKEN; // Load from environment variable
this.url = url;
}
async convert(pdfFilePath, outputFile) {
const form = new FormData();
form.append('uploadFile', fs.createReadStream(pdfFilePath));
const options = {
method: 'POST',
body: form,
headers: {
'Authorization': `Bearer ${this.bearerToken}`,
},
};
try {
const fetch = (await import('node-fetch')).default;
const response = await fetch(this.url + "/uploadPdf", options);
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
// Save the response as an image file
const dest = fs.createWriteStream(outputFile);
await pipelineAsync(response.body, dest);
console.log(`Image saved as ${outputFile}`);
} catch (error) {
console.error('Error:', error);
}
}
async extractDesc(pdfFilePath) {
const form = new FormData();
form.append('uploadFile', fs.createReadStream(pdfFilePath));
const options = {
method: 'POST',
body: form,
headers: {
'Authorization': `Bearer ${this.bearerToken}`,
},
};
try {
const fetch = (await import('node-fetch')).default;
const response = await fetch(this.url + "/parsePdf", options);
if (!response.ok) {
throw new Error(`HTTP error! status: ${response.status}`);
}
return await response.json();
} catch (error) {
console.error('Error:', error);
}
}
}
// Example usage wrapped in an async IIFE
(async () => {
// const converter = new PDFExtractor('http://localhost:7860');
const converter = new PDFExtractor();
const pdfPath = '/workspaces/solar_eyes/pdf_downloads/0a0824f0-a5e7-4643-a834-0cea06f36c49.pdf';
const outputPath = __dirname + '/outputImage2.jpg';
await converter.convert(pdfPath, outputPath); // Ensure this is awaited if you need sequential execution
// Moved inside the async IIFE to use await
const result = await converter.extractDesc(pdfPath);
console.log(result);
})();