Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- .python-version +1 -0
- README.md +16 -5
- clara_env/bin/Activate.ps1 +247 -0
- clara_env/bin/activate +69 -0
- clara_env/bin/activate.csh +26 -0
- clara_env/bin/activate.fish +66 -0
- clara_env/bin/convert-caffe2-to-onnx +8 -0
- clara_env/bin/convert-onnx-to-caffe2 +8 -0
- clara_env/bin/distro +8 -0
- clara_env/bin/dotenv +8 -0
- clara_env/bin/dumppdf.py +473 -0
- clara_env/bin/email_validator +8 -0
- clara_env/bin/f2py +8 -0
- clara_env/bin/fastapi +8 -0
- clara_env/bin/fonttools +8 -0
- clara_env/bin/gradio +8 -0
- clara_env/bin/httpx +8 -0
- clara_env/bin/huggingface-cli +8 -0
- clara_env/bin/ipython +8 -0
- clara_env/bin/ipython3 +8 -0
- clara_env/bin/isympy +8 -0
- clara_env/bin/jsondiff +41 -0
- clara_env/bin/jsonpatch +107 -0
- clara_env/bin/jsonpointer +69 -0
- clara_env/bin/jsonschema +8 -0
- clara_env/bin/langchain-server +8 -0
- clara_env/bin/langsmith +8 -0
- clara_env/bin/markdown-it +8 -0
- clara_env/bin/normalizer +8 -0
- clara_env/bin/openai +8 -0
- clara_env/bin/pdf2txt.py +317 -0
- clara_env/bin/pdfplumber +8 -0
- clara_env/bin/pip +8 -0
- clara_env/bin/pip3 +8 -0
- clara_env/bin/pip3.10 +8 -0
- clara_env/bin/pyftmerge +8 -0
- clara_env/bin/pyftsubset +8 -0
- clara_env/bin/pygmentize +8 -0
- clara_env/bin/pypdfium2 +8 -0
- clara_env/bin/pyrsa-decrypt +8 -0
- clara_env/bin/pyrsa-encrypt +8 -0
- clara_env/bin/pyrsa-keygen +8 -0
- clara_env/bin/pyrsa-priv2pub +8 -0
- clara_env/bin/pyrsa-sign +8 -0
- clara_env/bin/pyrsa-verify +8 -0
- clara_env/bin/python +0 -0
- clara_env/bin/python3 +0 -0
- clara_env/bin/python3.10 +0 -0
- clara_env/bin/ruff +3 -0
.gitattributes
CHANGED
@@ -37,3 +37,4 @@ PDF/Anticiper-les-effets-de-l-adaptation-dun-rechauffement-climatique-de-plus-4-
|
|
37 |
PDF/deu-2023.pdf filter=lfs diff=lfs merge=lfs -text
|
38 |
PDF/memo_risques_physiques_focus_batiment_2022.pdf filter=lfs diff=lfs merge=lfs -text
|
39 |
vectors/index.annoy filter=lfs diff=lfs merge=lfs -text
|
|
|
|
37 |
PDF/deu-2023.pdf filter=lfs diff=lfs merge=lfs -text
|
38 |
PDF/memo_risques_physiques_focus_batiment_2022.pdf filter=lfs diff=lfs merge=lfs -text
|
39 |
vectors/index.annoy filter=lfs diff=lfs merge=lfs -text
|
40 |
+
clara_env/bin/ruff filter=lfs diff=lfs merge=lfs -text
|
.python-version
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
3.10.8
|
README.md
CHANGED
@@ -1,12 +1,23 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 4.
|
8 |
app_file: app.py
|
|
|
9 |
pinned: false
|
|
|
10 |
---
|
11 |
|
12 |
-
|
|
|
1 |
---
|
2 |
+
title: clara
|
3 |
+
app_file: app.py
|
4 |
+
sdk: gradio
|
5 |
+
sdk_version: 4.19.1
|
6 |
+
---
|
7 |
+
# CLARA
|
8 |
+
|
9 |
+
|
10 |
+
---
|
11 |
+
title: ClimateQ&A
|
12 |
+
emoji: 🌍
|
13 |
+
colorFrom: blue
|
14 |
colorTo: red
|
15 |
sdk: gradio
|
16 |
+
sdk_version: 4.19.1
|
17 |
app_file: app.py
|
18 |
+
fullWidth: true
|
19 |
pinned: false
|
20 |
+
short_description: Ask any questions to the IPCC and IPBES reports
|
21 |
---
|
22 |
|
23 |
+
To run locally run ``gradio app.py``
|
clara_env/bin/Activate.ps1
ADDED
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<#
|
2 |
+
.Synopsis
|
3 |
+
Activate a Python virtual environment for the current PowerShell session.
|
4 |
+
|
5 |
+
.Description
|
6 |
+
Pushes the python executable for a virtual environment to the front of the
|
7 |
+
$Env:PATH environment variable and sets the prompt to signify that you are
|
8 |
+
in a Python virtual environment. Makes use of the command line switches as
|
9 |
+
well as the `pyvenv.cfg` file values present in the virtual environment.
|
10 |
+
|
11 |
+
.Parameter VenvDir
|
12 |
+
Path to the directory that contains the virtual environment to activate. The
|
13 |
+
default value for this is the parent of the directory that the Activate.ps1
|
14 |
+
script is located within.
|
15 |
+
|
16 |
+
.Parameter Prompt
|
17 |
+
The prompt prefix to display when this virtual environment is activated. By
|
18 |
+
default, this prompt is the name of the virtual environment folder (VenvDir)
|
19 |
+
surrounded by parentheses and followed by a single space (ie. '(.venv) ').
|
20 |
+
|
21 |
+
.Example
|
22 |
+
Activate.ps1
|
23 |
+
Activates the Python virtual environment that contains the Activate.ps1 script.
|
24 |
+
|
25 |
+
.Example
|
26 |
+
Activate.ps1 -Verbose
|
27 |
+
Activates the Python virtual environment that contains the Activate.ps1 script,
|
28 |
+
and shows extra information about the activation as it executes.
|
29 |
+
|
30 |
+
.Example
|
31 |
+
Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
|
32 |
+
Activates the Python virtual environment located in the specified location.
|
33 |
+
|
34 |
+
.Example
|
35 |
+
Activate.ps1 -Prompt "MyPython"
|
36 |
+
Activates the Python virtual environment that contains the Activate.ps1 script,
|
37 |
+
and prefixes the current prompt with the specified string (surrounded in
|
38 |
+
parentheses) while the virtual environment is active.
|
39 |
+
|
40 |
+
.Notes
|
41 |
+
On Windows, it may be required to enable this Activate.ps1 script by setting the
|
42 |
+
execution policy for the user. You can do this by issuing the following PowerShell
|
43 |
+
command:
|
44 |
+
|
45 |
+
PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
|
46 |
+
|
47 |
+
For more information on Execution Policies:
|
48 |
+
https://go.microsoft.com/fwlink/?LinkID=135170
|
49 |
+
|
50 |
+
#>
|
51 |
+
Param(
|
52 |
+
[Parameter(Mandatory = $false)]
|
53 |
+
[String]
|
54 |
+
$VenvDir,
|
55 |
+
[Parameter(Mandatory = $false)]
|
56 |
+
[String]
|
57 |
+
$Prompt
|
58 |
+
)
|
59 |
+
|
60 |
+
<# Function declarations --------------------------------------------------- #>
|
61 |
+
|
62 |
+
<#
|
63 |
+
.Synopsis
|
64 |
+
Remove all shell session elements added by the Activate script, including the
|
65 |
+
addition of the virtual environment's Python executable from the beginning of
|
66 |
+
the PATH variable.
|
67 |
+
|
68 |
+
.Parameter NonDestructive
|
69 |
+
If present, do not remove this function from the global namespace for the
|
70 |
+
session.
|
71 |
+
|
72 |
+
#>
|
73 |
+
function global:deactivate ([switch]$NonDestructive) {
|
74 |
+
# Revert to original values
|
75 |
+
|
76 |
+
# The prior prompt:
|
77 |
+
if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
|
78 |
+
Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
|
79 |
+
Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
|
80 |
+
}
|
81 |
+
|
82 |
+
# The prior PYTHONHOME:
|
83 |
+
if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
|
84 |
+
Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
|
85 |
+
Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
|
86 |
+
}
|
87 |
+
|
88 |
+
# The prior PATH:
|
89 |
+
if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
|
90 |
+
Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
|
91 |
+
Remove-Item -Path Env:_OLD_VIRTUAL_PATH
|
92 |
+
}
|
93 |
+
|
94 |
+
# Just remove the VIRTUAL_ENV altogether:
|
95 |
+
if (Test-Path -Path Env:VIRTUAL_ENV) {
|
96 |
+
Remove-Item -Path env:VIRTUAL_ENV
|
97 |
+
}
|
98 |
+
|
99 |
+
# Just remove VIRTUAL_ENV_PROMPT altogether.
|
100 |
+
if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
|
101 |
+
Remove-Item -Path env:VIRTUAL_ENV_PROMPT
|
102 |
+
}
|
103 |
+
|
104 |
+
# Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
|
105 |
+
if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
|
106 |
+
Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
|
107 |
+
}
|
108 |
+
|
109 |
+
# Leave deactivate function in the global namespace if requested:
|
110 |
+
if (-not $NonDestructive) {
|
111 |
+
Remove-Item -Path function:deactivate
|
112 |
+
}
|
113 |
+
}
|
114 |
+
|
115 |
+
<#
|
116 |
+
.Description
|
117 |
+
Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
|
118 |
+
given folder, and returns them in a map.
|
119 |
+
|
120 |
+
For each line in the pyvenv.cfg file, if that line can be parsed into exactly
|
121 |
+
two strings separated by `=` (with any amount of whitespace surrounding the =)
|
122 |
+
then it is considered a `key = value` line. The left hand string is the key,
|
123 |
+
the right hand is the value.
|
124 |
+
|
125 |
+
If the value starts with a `'` or a `"` then the first and last character is
|
126 |
+
stripped from the value before being captured.
|
127 |
+
|
128 |
+
.Parameter ConfigDir
|
129 |
+
Path to the directory that contains the `pyvenv.cfg` file.
|
130 |
+
#>
|
131 |
+
function Get-PyVenvConfig(
|
132 |
+
[String]
|
133 |
+
$ConfigDir
|
134 |
+
) {
|
135 |
+
Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
|
136 |
+
|
137 |
+
# Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
|
138 |
+
$pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
|
139 |
+
|
140 |
+
# An empty map will be returned if no config file is found.
|
141 |
+
$pyvenvConfig = @{ }
|
142 |
+
|
143 |
+
if ($pyvenvConfigPath) {
|
144 |
+
|
145 |
+
Write-Verbose "File exists, parse `key = value` lines"
|
146 |
+
$pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
|
147 |
+
|
148 |
+
$pyvenvConfigContent | ForEach-Object {
|
149 |
+
$keyval = $PSItem -split "\s*=\s*", 2
|
150 |
+
if ($keyval[0] -and $keyval[1]) {
|
151 |
+
$val = $keyval[1]
|
152 |
+
|
153 |
+
# Remove extraneous quotations around a string value.
|
154 |
+
if ("'""".Contains($val.Substring(0, 1))) {
|
155 |
+
$val = $val.Substring(1, $val.Length - 2)
|
156 |
+
}
|
157 |
+
|
158 |
+
$pyvenvConfig[$keyval[0]] = $val
|
159 |
+
Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
|
160 |
+
}
|
161 |
+
}
|
162 |
+
}
|
163 |
+
return $pyvenvConfig
|
164 |
+
}
|
165 |
+
|
166 |
+
|
167 |
+
<# Begin Activate script --------------------------------------------------- #>
|
168 |
+
|
169 |
+
# Determine the containing directory of this script
|
170 |
+
$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
|
171 |
+
$VenvExecDir = Get-Item -Path $VenvExecPath
|
172 |
+
|
173 |
+
Write-Verbose "Activation script is located in path: '$VenvExecPath'"
|
174 |
+
Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
|
175 |
+
Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
|
176 |
+
|
177 |
+
# Set values required in priority: CmdLine, ConfigFile, Default
|
178 |
+
# First, get the location of the virtual environment, it might not be
|
179 |
+
# VenvExecDir if specified on the command line.
|
180 |
+
if ($VenvDir) {
|
181 |
+
Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
|
182 |
+
}
|
183 |
+
else {
|
184 |
+
Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
|
185 |
+
$VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
|
186 |
+
Write-Verbose "VenvDir=$VenvDir"
|
187 |
+
}
|
188 |
+
|
189 |
+
# Next, read the `pyvenv.cfg` file to determine any required value such
|
190 |
+
# as `prompt`.
|
191 |
+
$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
|
192 |
+
|
193 |
+
# Next, set the prompt from the command line, or the config file, or
|
194 |
+
# just use the name of the virtual environment folder.
|
195 |
+
if ($Prompt) {
|
196 |
+
Write-Verbose "Prompt specified as argument, using '$Prompt'"
|
197 |
+
}
|
198 |
+
else {
|
199 |
+
Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
|
200 |
+
if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
|
201 |
+
Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
|
202 |
+
$Prompt = $pyvenvCfg['prompt'];
|
203 |
+
}
|
204 |
+
else {
|
205 |
+
Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
|
206 |
+
Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
|
207 |
+
$Prompt = Split-Path -Path $venvDir -Leaf
|
208 |
+
}
|
209 |
+
}
|
210 |
+
|
211 |
+
Write-Verbose "Prompt = '$Prompt'"
|
212 |
+
Write-Verbose "VenvDir='$VenvDir'"
|
213 |
+
|
214 |
+
# Deactivate any currently active virtual environment, but leave the
|
215 |
+
# deactivate function in place.
|
216 |
+
deactivate -nondestructive
|
217 |
+
|
218 |
+
# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
|
219 |
+
# that there is an activated venv.
|
220 |
+
$env:VIRTUAL_ENV = $VenvDir
|
221 |
+
|
222 |
+
if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
|
223 |
+
|
224 |
+
Write-Verbose "Setting prompt to '$Prompt'"
|
225 |
+
|
226 |
+
# Set the prompt to include the env name
|
227 |
+
# Make sure _OLD_VIRTUAL_PROMPT is global
|
228 |
+
function global:_OLD_VIRTUAL_PROMPT { "" }
|
229 |
+
Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
|
230 |
+
New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
|
231 |
+
|
232 |
+
function global:prompt {
|
233 |
+
Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
|
234 |
+
_OLD_VIRTUAL_PROMPT
|
235 |
+
}
|
236 |
+
$env:VIRTUAL_ENV_PROMPT = $Prompt
|
237 |
+
}
|
238 |
+
|
239 |
+
# Clear PYTHONHOME
|
240 |
+
if (Test-Path -Path Env:PYTHONHOME) {
|
241 |
+
Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
|
242 |
+
Remove-Item -Path Env:PYTHONHOME
|
243 |
+
}
|
244 |
+
|
245 |
+
# Add the venv to the PATH
|
246 |
+
Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
|
247 |
+
$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
|
clara_env/bin/activate
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file must be used with "source bin/activate" *from bash*
|
2 |
+
# you cannot run it directly
|
3 |
+
|
4 |
+
deactivate () {
|
5 |
+
# reset old environment variables
|
6 |
+
if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
|
7 |
+
PATH="${_OLD_VIRTUAL_PATH:-}"
|
8 |
+
export PATH
|
9 |
+
unset _OLD_VIRTUAL_PATH
|
10 |
+
fi
|
11 |
+
if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
|
12 |
+
PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
|
13 |
+
export PYTHONHOME
|
14 |
+
unset _OLD_VIRTUAL_PYTHONHOME
|
15 |
+
fi
|
16 |
+
|
17 |
+
# This should detect bash and zsh, which have a hash command that must
|
18 |
+
# be called to get it to forget past commands. Without forgetting
|
19 |
+
# past commands the $PATH changes we made may not be respected
|
20 |
+
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
|
21 |
+
hash -r 2> /dev/null
|
22 |
+
fi
|
23 |
+
|
24 |
+
if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
|
25 |
+
PS1="${_OLD_VIRTUAL_PS1:-}"
|
26 |
+
export PS1
|
27 |
+
unset _OLD_VIRTUAL_PS1
|
28 |
+
fi
|
29 |
+
|
30 |
+
unset VIRTUAL_ENV
|
31 |
+
unset VIRTUAL_ENV_PROMPT
|
32 |
+
if [ ! "${1:-}" = "nondestructive" ] ; then
|
33 |
+
# Self destruct!
|
34 |
+
unset -f deactivate
|
35 |
+
fi
|
36 |
+
}
|
37 |
+
|
38 |
+
# unset irrelevant variables
|
39 |
+
deactivate nondestructive
|
40 |
+
|
41 |
+
VIRTUAL_ENV="/Users/lucas.s/Dev/clara/clara_env"
|
42 |
+
export VIRTUAL_ENV
|
43 |
+
|
44 |
+
_OLD_VIRTUAL_PATH="$PATH"
|
45 |
+
PATH="$VIRTUAL_ENV/bin:$PATH"
|
46 |
+
export PATH
|
47 |
+
|
48 |
+
# unset PYTHONHOME if set
|
49 |
+
# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
|
50 |
+
# could use `if (set -u; : $PYTHONHOME) ;` in bash
|
51 |
+
if [ -n "${PYTHONHOME:-}" ] ; then
|
52 |
+
_OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
|
53 |
+
unset PYTHONHOME
|
54 |
+
fi
|
55 |
+
|
56 |
+
if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
|
57 |
+
_OLD_VIRTUAL_PS1="${PS1:-}"
|
58 |
+
PS1="(clara_env) ${PS1:-}"
|
59 |
+
export PS1
|
60 |
+
VIRTUAL_ENV_PROMPT="(clara_env) "
|
61 |
+
export VIRTUAL_ENV_PROMPT
|
62 |
+
fi
|
63 |
+
|
64 |
+
# This should detect bash and zsh, which have a hash command that must
|
65 |
+
# be called to get it to forget past commands. Without forgetting
|
66 |
+
# past commands the $PATH changes we made may not be respected
|
67 |
+
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
|
68 |
+
hash -r 2> /dev/null
|
69 |
+
fi
|
clara_env/bin/activate.csh
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file must be used with "source bin/activate.csh" *from csh*.
|
2 |
+
# You cannot run it directly.
|
3 |
+
# Created by Davide Di Blasi <davidedb@gmail.com>.
|
4 |
+
# Ported to Python 3.3 venv by Andrew Svetlov <andrew.svetlov@gmail.com>
|
5 |
+
|
6 |
+
alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate'
|
7 |
+
|
8 |
+
# Unset irrelevant variables.
|
9 |
+
deactivate nondestructive
|
10 |
+
|
11 |
+
setenv VIRTUAL_ENV "/Users/lucas.s/Dev/clara/clara_env"
|
12 |
+
|
13 |
+
set _OLD_VIRTUAL_PATH="$PATH"
|
14 |
+
setenv PATH "$VIRTUAL_ENV/bin:$PATH"
|
15 |
+
|
16 |
+
|
17 |
+
set _OLD_VIRTUAL_PROMPT="$prompt"
|
18 |
+
|
19 |
+
if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
|
20 |
+
set prompt = "(clara_env) $prompt"
|
21 |
+
setenv VIRTUAL_ENV_PROMPT "(clara_env) "
|
22 |
+
endif
|
23 |
+
|
24 |
+
alias pydoc python -m pydoc
|
25 |
+
|
26 |
+
rehash
|
clara_env/bin/activate.fish
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file must be used with "source <venv>/bin/activate.fish" *from fish*
|
2 |
+
# (https://fishshell.com/); you cannot run it directly.
|
3 |
+
|
4 |
+
function deactivate -d "Exit virtual environment and return to normal shell environment"
|
5 |
+
# reset old environment variables
|
6 |
+
if test -n "$_OLD_VIRTUAL_PATH"
|
7 |
+
set -gx PATH $_OLD_VIRTUAL_PATH
|
8 |
+
set -e _OLD_VIRTUAL_PATH
|
9 |
+
end
|
10 |
+
if test -n "$_OLD_VIRTUAL_PYTHONHOME"
|
11 |
+
set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
|
12 |
+
set -e _OLD_VIRTUAL_PYTHONHOME
|
13 |
+
end
|
14 |
+
|
15 |
+
if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
|
16 |
+
functions -e fish_prompt
|
17 |
+
set -e _OLD_FISH_PROMPT_OVERRIDE
|
18 |
+
functions -c _old_fish_prompt fish_prompt
|
19 |
+
functions -e _old_fish_prompt
|
20 |
+
end
|
21 |
+
|
22 |
+
set -e VIRTUAL_ENV
|
23 |
+
set -e VIRTUAL_ENV_PROMPT
|
24 |
+
if test "$argv[1]" != "nondestructive"
|
25 |
+
# Self-destruct!
|
26 |
+
functions -e deactivate
|
27 |
+
end
|
28 |
+
end
|
29 |
+
|
30 |
+
# Unset irrelevant variables.
|
31 |
+
deactivate nondestructive
|
32 |
+
|
33 |
+
set -gx VIRTUAL_ENV "/Users/lucas.s/Dev/clara/clara_env"
|
34 |
+
|
35 |
+
set -gx _OLD_VIRTUAL_PATH $PATH
|
36 |
+
set -gx PATH "$VIRTUAL_ENV/bin" $PATH
|
37 |
+
|
38 |
+
# Unset PYTHONHOME if set.
|
39 |
+
if set -q PYTHONHOME
|
40 |
+
set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
|
41 |
+
set -e PYTHONHOME
|
42 |
+
end
|
43 |
+
|
44 |
+
if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
|
45 |
+
# fish uses a function instead of an env var to generate the prompt.
|
46 |
+
|
47 |
+
# Save the current fish_prompt function as the function _old_fish_prompt.
|
48 |
+
functions -c fish_prompt _old_fish_prompt
|
49 |
+
|
50 |
+
# With the original prompt function renamed, we can override with our own.
|
51 |
+
function fish_prompt
|
52 |
+
# Save the return status of the last command.
|
53 |
+
set -l old_status $status
|
54 |
+
|
55 |
+
# Output the venv prompt; color taken from the blue of the Python logo.
|
56 |
+
printf "%s%s%s" (set_color 4B8BBE) "(clara_env) " (set_color normal)
|
57 |
+
|
58 |
+
# Restore the return status of the previous command.
|
59 |
+
echo "exit $old_status" | .
|
60 |
+
# Output the original/"old" prompt.
|
61 |
+
_old_fish_prompt
|
62 |
+
end
|
63 |
+
|
64 |
+
set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
|
65 |
+
set -gx VIRTUAL_ENV_PROMPT "(clara_env) "
|
66 |
+
end
|
clara_env/bin/convert-caffe2-to-onnx
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from caffe2.python.onnx.bin.conversion import caffe2_to_onnx
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(caffe2_to_onnx())
|
clara_env/bin/convert-onnx-to-caffe2
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from caffe2.python.onnx.bin.conversion import onnx_to_caffe2
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(onnx_to_caffe2())
|
clara_env/bin/distro
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from distro.distro import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
clara_env/bin/dotenv
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from dotenv.__main__ import cli
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(cli())
|
clara_env/bin/dumppdf.py
ADDED
@@ -0,0 +1,473 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
"""Extract pdf structure in XML format"""
|
3 |
+
import logging
|
4 |
+
import os.path
|
5 |
+
import re
|
6 |
+
import sys
|
7 |
+
from typing import Any, Container, Dict, Iterable, List, Optional, TextIO, Union, cast
|
8 |
+
from argparse import ArgumentParser
|
9 |
+
|
10 |
+
import pdfminer
|
11 |
+
from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines, PDFXRefFallback
|
12 |
+
from pdfminer.pdfpage import PDFPage
|
13 |
+
from pdfminer.pdfparser import PDFParser
|
14 |
+
from pdfminer.pdftypes import PDFObjectNotFound, PDFValueError
|
15 |
+
from pdfminer.pdftypes import PDFStream, PDFObjRef, resolve1, stream_value
|
16 |
+
from pdfminer.psparser import PSKeyword, PSLiteral, LIT
|
17 |
+
from pdfminer.utils import isnumber
|
18 |
+
|
19 |
+
logging.basicConfig()
|
20 |
+
logger = logging.getLogger(__name__)
|
21 |
+
|
22 |
+
ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]')
|
23 |
+
|
24 |
+
|
25 |
+
def escape(s: Union[str, bytes]) -> str:
|
26 |
+
if isinstance(s, bytes):
|
27 |
+
us = str(s, "latin-1")
|
28 |
+
else:
|
29 |
+
us = s
|
30 |
+
return ESC_PAT.sub(lambda m: "&#%d;" % ord(m.group(0)), us)
|
31 |
+
|
32 |
+
|
33 |
+
def dumpxml(out: TextIO, obj: object, codec: Optional[str] = None) -> None:
|
34 |
+
if obj is None:
|
35 |
+
out.write("<null />")
|
36 |
+
return
|
37 |
+
|
38 |
+
if isinstance(obj, dict):
|
39 |
+
out.write('<dict size="%d">\n' % len(obj))
|
40 |
+
for (k, v) in obj.items():
|
41 |
+
out.write("<key>%s</key>\n" % k)
|
42 |
+
out.write("<value>")
|
43 |
+
dumpxml(out, v)
|
44 |
+
out.write("</value>\n")
|
45 |
+
out.write("</dict>")
|
46 |
+
return
|
47 |
+
|
48 |
+
if isinstance(obj, list):
|
49 |
+
out.write('<list size="%d">\n' % len(obj))
|
50 |
+
for v in obj:
|
51 |
+
dumpxml(out, v)
|
52 |
+
out.write("\n")
|
53 |
+
out.write("</list>")
|
54 |
+
return
|
55 |
+
|
56 |
+
if isinstance(obj, (str, bytes)):
|
57 |
+
out.write('<string size="%d">%s</string>' % (len(obj), escape(obj)))
|
58 |
+
return
|
59 |
+
|
60 |
+
if isinstance(obj, PDFStream):
|
61 |
+
if codec == "raw":
|
62 |
+
# Bug: writing bytes to text I/O. This will raise TypeError.
|
63 |
+
out.write(obj.get_rawdata()) # type: ignore [arg-type]
|
64 |
+
elif codec == "binary":
|
65 |
+
# Bug: writing bytes to text I/O. This will raise TypeError.
|
66 |
+
out.write(obj.get_data()) # type: ignore [arg-type]
|
67 |
+
else:
|
68 |
+
out.write("<stream>\n<props>\n")
|
69 |
+
dumpxml(out, obj.attrs)
|
70 |
+
out.write("\n</props>\n")
|
71 |
+
if codec == "text":
|
72 |
+
data = obj.get_data()
|
73 |
+
out.write('<data size="%d">%s</data>\n' % (len(data), escape(data)))
|
74 |
+
out.write("</stream>")
|
75 |
+
return
|
76 |
+
|
77 |
+
if isinstance(obj, PDFObjRef):
|
78 |
+
out.write('<ref id="%d" />' % obj.objid)
|
79 |
+
return
|
80 |
+
|
81 |
+
if isinstance(obj, PSKeyword):
|
82 |
+
# Likely bug: obj.name is bytes, not str
|
83 |
+
out.write("<keyword>%s</keyword>" % obj.name) # type: ignore [str-bytes-safe]
|
84 |
+
return
|
85 |
+
|
86 |
+
if isinstance(obj, PSLiteral):
|
87 |
+
# Likely bug: obj.name may be bytes, not str
|
88 |
+
out.write("<literal>%s</literal>" % obj.name) # type: ignore [str-bytes-safe]
|
89 |
+
return
|
90 |
+
|
91 |
+
if isnumber(obj):
|
92 |
+
out.write("<number>%s</number>" % obj)
|
93 |
+
return
|
94 |
+
|
95 |
+
raise TypeError(obj)
|
96 |
+
|
97 |
+
|
98 |
+
def dumptrailers(
|
99 |
+
out: TextIO, doc: PDFDocument, show_fallback_xref: bool = False
|
100 |
+
) -> None:
|
101 |
+
for xref in doc.xrefs:
|
102 |
+
if not isinstance(xref, PDFXRefFallback) or show_fallback_xref:
|
103 |
+
out.write("<trailer>\n")
|
104 |
+
dumpxml(out, xref.get_trailer())
|
105 |
+
out.write("\n</trailer>\n\n")
|
106 |
+
no_xrefs = all(isinstance(xref, PDFXRefFallback) for xref in doc.xrefs)
|
107 |
+
if no_xrefs and not show_fallback_xref:
|
108 |
+
msg = (
|
109 |
+
"This PDF does not have an xref. Use --show-fallback-xref if "
|
110 |
+
"you want to display the content of a fallback xref that "
|
111 |
+
"contains all objects."
|
112 |
+
)
|
113 |
+
logger.warning(msg)
|
114 |
+
return
|
115 |
+
|
116 |
+
|
117 |
+
def dumpallobjs(
|
118 |
+
out: TextIO,
|
119 |
+
doc: PDFDocument,
|
120 |
+
codec: Optional[str] = None,
|
121 |
+
show_fallback_xref: bool = False,
|
122 |
+
) -> None:
|
123 |
+
visited = set()
|
124 |
+
out.write("<pdf>")
|
125 |
+
for xref in doc.xrefs:
|
126 |
+
for objid in xref.get_objids():
|
127 |
+
if objid in visited:
|
128 |
+
continue
|
129 |
+
visited.add(objid)
|
130 |
+
try:
|
131 |
+
obj = doc.getobj(objid)
|
132 |
+
if obj is None:
|
133 |
+
continue
|
134 |
+
out.write('<object id="%d">\n' % objid)
|
135 |
+
dumpxml(out, obj, codec=codec)
|
136 |
+
out.write("\n</object>\n\n")
|
137 |
+
except PDFObjectNotFound as e:
|
138 |
+
print("not found: %r" % e)
|
139 |
+
dumptrailers(out, doc, show_fallback_xref)
|
140 |
+
out.write("</pdf>")
|
141 |
+
return
|
142 |
+
|
143 |
+
|
144 |
+
def dumpoutline(
|
145 |
+
outfp: TextIO,
|
146 |
+
fname: str,
|
147 |
+
objids: Any,
|
148 |
+
pagenos: Container[int],
|
149 |
+
password: str = "",
|
150 |
+
dumpall: bool = False,
|
151 |
+
codec: Optional[str] = None,
|
152 |
+
extractdir: Optional[str] = None,
|
153 |
+
) -> None:
|
154 |
+
fp = open(fname, "rb")
|
155 |
+
parser = PDFParser(fp)
|
156 |
+
doc = PDFDocument(parser, password)
|
157 |
+
pages = {
|
158 |
+
page.pageid: pageno
|
159 |
+
for (pageno, page) in enumerate(PDFPage.create_pages(doc), 1)
|
160 |
+
}
|
161 |
+
|
162 |
+
def resolve_dest(dest: object) -> Any:
|
163 |
+
if isinstance(dest, (str, bytes)):
|
164 |
+
dest = resolve1(doc.get_dest(dest))
|
165 |
+
elif isinstance(dest, PSLiteral):
|
166 |
+
dest = resolve1(doc.get_dest(dest.name))
|
167 |
+
if isinstance(dest, dict):
|
168 |
+
dest = dest["D"]
|
169 |
+
if isinstance(dest, PDFObjRef):
|
170 |
+
dest = dest.resolve()
|
171 |
+
return dest
|
172 |
+
|
173 |
+
try:
|
174 |
+
outlines = doc.get_outlines()
|
175 |
+
outfp.write("<outlines>\n")
|
176 |
+
for (level, title, dest, a, se) in outlines:
|
177 |
+
pageno = None
|
178 |
+
if dest:
|
179 |
+
dest = resolve_dest(dest)
|
180 |
+
pageno = pages[dest[0].objid]
|
181 |
+
elif a:
|
182 |
+
action = a
|
183 |
+
if isinstance(action, dict):
|
184 |
+
subtype = action.get("S")
|
185 |
+
if subtype and repr(subtype) == "/'GoTo'" and action.get("D"):
|
186 |
+
dest = resolve_dest(action["D"])
|
187 |
+
pageno = pages[dest[0].objid]
|
188 |
+
s = escape(title)
|
189 |
+
outfp.write('<outline level="{!r}" title="{}">\n'.format(level, s))
|
190 |
+
if dest is not None:
|
191 |
+
outfp.write("<dest>")
|
192 |
+
dumpxml(outfp, dest)
|
193 |
+
outfp.write("</dest>\n")
|
194 |
+
if pageno is not None:
|
195 |
+
outfp.write("<pageno>%r</pageno>\n" % pageno)
|
196 |
+
outfp.write("</outline>\n")
|
197 |
+
outfp.write("</outlines>\n")
|
198 |
+
except PDFNoOutlines:
|
199 |
+
pass
|
200 |
+
parser.close()
|
201 |
+
fp.close()
|
202 |
+
return
|
203 |
+
|
204 |
+
|
205 |
+
LITERAL_FILESPEC = LIT("Filespec")
|
206 |
+
LITERAL_EMBEDDEDFILE = LIT("EmbeddedFile")
|
207 |
+
|
208 |
+
|
209 |
+
def extractembedded(fname: str, password: str, extractdir: str) -> None:
|
210 |
+
def extract1(objid: int, obj: Dict[str, Any]) -> None:
|
211 |
+
filename = os.path.basename(obj.get("UF") or cast(bytes, obj.get("F")).decode())
|
212 |
+
fileref = obj["EF"].get("UF") or obj["EF"].get("F")
|
213 |
+
fileobj = doc.getobj(fileref.objid)
|
214 |
+
if not isinstance(fileobj, PDFStream):
|
215 |
+
error_msg = (
|
216 |
+
"unable to process PDF: reference for %r is not a "
|
217 |
+
"PDFStream" % filename
|
218 |
+
)
|
219 |
+
raise PDFValueError(error_msg)
|
220 |
+
if fileobj.get("Type") is not LITERAL_EMBEDDEDFILE:
|
221 |
+
raise PDFValueError(
|
222 |
+
"unable to process PDF: reference for %r "
|
223 |
+
"is not an EmbeddedFile" % (filename)
|
224 |
+
)
|
225 |
+
path = os.path.join(extractdir, "%.6d-%s" % (objid, filename))
|
226 |
+
if os.path.exists(path):
|
227 |
+
raise IOError("file exists: %r" % path)
|
228 |
+
print("extracting: %r" % path)
|
229 |
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
230 |
+
out = open(path, "wb")
|
231 |
+
out.write(fileobj.get_data())
|
232 |
+
out.close()
|
233 |
+
return
|
234 |
+
|
235 |
+
with open(fname, "rb") as fp:
|
236 |
+
parser = PDFParser(fp)
|
237 |
+
doc = PDFDocument(parser, password)
|
238 |
+
extracted_objids = set()
|
239 |
+
for xref in doc.xrefs:
|
240 |
+
for objid in xref.get_objids():
|
241 |
+
obj = doc.getobj(objid)
|
242 |
+
if (
|
243 |
+
objid not in extracted_objids
|
244 |
+
and isinstance(obj, dict)
|
245 |
+
and obj.get("Type") is LITERAL_FILESPEC
|
246 |
+
):
|
247 |
+
extracted_objids.add(objid)
|
248 |
+
extract1(objid, obj)
|
249 |
+
return
|
250 |
+
|
251 |
+
|
252 |
+
def dumppdf(
|
253 |
+
outfp: TextIO,
|
254 |
+
fname: str,
|
255 |
+
objids: Iterable[int],
|
256 |
+
pagenos: Container[int],
|
257 |
+
password: str = "",
|
258 |
+
dumpall: bool = False,
|
259 |
+
codec: Optional[str] = None,
|
260 |
+
extractdir: Optional[str] = None,
|
261 |
+
show_fallback_xref: bool = False,
|
262 |
+
) -> None:
|
263 |
+
fp = open(fname, "rb")
|
264 |
+
parser = PDFParser(fp)
|
265 |
+
doc = PDFDocument(parser, password)
|
266 |
+
if objids:
|
267 |
+
for objid in objids:
|
268 |
+
obj = doc.getobj(objid)
|
269 |
+
dumpxml(outfp, obj, codec=codec)
|
270 |
+
if pagenos:
|
271 |
+
for (pageno, page) in enumerate(PDFPage.create_pages(doc)):
|
272 |
+
if pageno in pagenos:
|
273 |
+
if codec:
|
274 |
+
for obj in page.contents:
|
275 |
+
obj = stream_value(obj)
|
276 |
+
dumpxml(outfp, obj, codec=codec)
|
277 |
+
else:
|
278 |
+
dumpxml(outfp, page.attrs)
|
279 |
+
if dumpall:
|
280 |
+
dumpallobjs(outfp, doc, codec, show_fallback_xref)
|
281 |
+
if (not objids) and (not pagenos) and (not dumpall):
|
282 |
+
dumptrailers(outfp, doc, show_fallback_xref)
|
283 |
+
fp.close()
|
284 |
+
if codec not in ("raw", "binary"):
|
285 |
+
outfp.write("\n")
|
286 |
+
return
|
287 |
+
|
288 |
+
|
289 |
+
def create_parser() -> ArgumentParser:
|
290 |
+
parser = ArgumentParser(description=__doc__, add_help=True)
|
291 |
+
parser.add_argument(
|
292 |
+
"files",
|
293 |
+
type=str,
|
294 |
+
default=None,
|
295 |
+
nargs="+",
|
296 |
+
help="One or more paths to PDF files.",
|
297 |
+
)
|
298 |
+
|
299 |
+
parser.add_argument(
|
300 |
+
"--version",
|
301 |
+
"-v",
|
302 |
+
action="version",
|
303 |
+
version="pdfminer.six v{}".format(pdfminer.__version__),
|
304 |
+
)
|
305 |
+
parser.add_argument(
|
306 |
+
"--debug",
|
307 |
+
"-d",
|
308 |
+
default=False,
|
309 |
+
action="store_true",
|
310 |
+
help="Use debug logging level.",
|
311 |
+
)
|
312 |
+
procedure_parser = parser.add_mutually_exclusive_group()
|
313 |
+
procedure_parser.add_argument(
|
314 |
+
"--extract-toc",
|
315 |
+
"-T",
|
316 |
+
default=False,
|
317 |
+
action="store_true",
|
318 |
+
help="Extract structure of outline",
|
319 |
+
)
|
320 |
+
procedure_parser.add_argument(
|
321 |
+
"--extract-embedded", "-E", type=str, help="Extract embedded files"
|
322 |
+
)
|
323 |
+
|
324 |
+
parse_params = parser.add_argument_group(
|
325 |
+
"Parser", description="Used during PDF parsing"
|
326 |
+
)
|
327 |
+
parse_params.add_argument(
|
328 |
+
"--page-numbers",
|
329 |
+
type=int,
|
330 |
+
default=None,
|
331 |
+
nargs="+",
|
332 |
+
help="A space-seperated list of page numbers to parse.",
|
333 |
+
)
|
334 |
+
parse_params.add_argument(
|
335 |
+
"--pagenos",
|
336 |
+
"-p",
|
337 |
+
type=str,
|
338 |
+
help="A comma-separated list of page numbers to parse. Included for "
|
339 |
+
"legacy applications, use --page-numbers for more idiomatic "
|
340 |
+
"argument entry.",
|
341 |
+
)
|
342 |
+
parse_params.add_argument(
|
343 |
+
"--objects",
|
344 |
+
"-i",
|
345 |
+
type=str,
|
346 |
+
help="Comma separated list of object numbers to extract",
|
347 |
+
)
|
348 |
+
parse_params.add_argument(
|
349 |
+
"--all",
|
350 |
+
"-a",
|
351 |
+
default=False,
|
352 |
+
action="store_true",
|
353 |
+
help="If the structure of all objects should be extracted",
|
354 |
+
)
|
355 |
+
parse_params.add_argument(
|
356 |
+
"--show-fallback-xref",
|
357 |
+
action="store_true",
|
358 |
+
help="Additionally show the fallback xref. Use this if the PDF "
|
359 |
+
"has zero or only invalid xref's. This setting is ignored if "
|
360 |
+
"--extract-toc or --extract-embedded is used.",
|
361 |
+
)
|
362 |
+
parse_params.add_argument(
|
363 |
+
"--password",
|
364 |
+
"-P",
|
365 |
+
type=str,
|
366 |
+
default="",
|
367 |
+
help="The password to use for decrypting PDF file.",
|
368 |
+
)
|
369 |
+
|
370 |
+
output_params = parser.add_argument_group(
|
371 |
+
"Output", description="Used during output generation."
|
372 |
+
)
|
373 |
+
output_params.add_argument(
|
374 |
+
"--outfile",
|
375 |
+
"-o",
|
376 |
+
type=str,
|
377 |
+
default="-",
|
378 |
+
help='Path to file where output is written. Or "-" (default) to '
|
379 |
+
"write to stdout.",
|
380 |
+
)
|
381 |
+
codec_parser = output_params.add_mutually_exclusive_group()
|
382 |
+
codec_parser.add_argument(
|
383 |
+
"--raw-stream",
|
384 |
+
"-r",
|
385 |
+
default=False,
|
386 |
+
action="store_true",
|
387 |
+
help="Write stream objects without encoding",
|
388 |
+
)
|
389 |
+
codec_parser.add_argument(
|
390 |
+
"--binary-stream",
|
391 |
+
"-b",
|
392 |
+
default=False,
|
393 |
+
action="store_true",
|
394 |
+
help="Write stream objects with binary encoding",
|
395 |
+
)
|
396 |
+
codec_parser.add_argument(
|
397 |
+
"--text-stream",
|
398 |
+
"-t",
|
399 |
+
default=False,
|
400 |
+
action="store_true",
|
401 |
+
help="Write stream objects as plain text",
|
402 |
+
)
|
403 |
+
|
404 |
+
return parser
|
405 |
+
|
406 |
+
|
407 |
+
def main(argv: Optional[List[str]] = None) -> None:
|
408 |
+
parser = create_parser()
|
409 |
+
args = parser.parse_args(args=argv)
|
410 |
+
|
411 |
+
if args.debug:
|
412 |
+
logging.getLogger().setLevel(logging.DEBUG)
|
413 |
+
|
414 |
+
if args.outfile == "-":
|
415 |
+
outfp = sys.stdout
|
416 |
+
else:
|
417 |
+
outfp = open(args.outfile, "w")
|
418 |
+
|
419 |
+
if args.objects:
|
420 |
+
objids = [int(x) for x in args.objects.split(",")]
|
421 |
+
else:
|
422 |
+
objids = []
|
423 |
+
|
424 |
+
if args.page_numbers:
|
425 |
+
pagenos = {x - 1 for x in args.page_numbers}
|
426 |
+
elif args.pagenos:
|
427 |
+
pagenos = {int(x) - 1 for x in args.pagenos.split(",")}
|
428 |
+
else:
|
429 |
+
pagenos = set()
|
430 |
+
|
431 |
+
password = args.password
|
432 |
+
|
433 |
+
if args.raw_stream:
|
434 |
+
codec: Optional[str] = "raw"
|
435 |
+
elif args.binary_stream:
|
436 |
+
codec = "binary"
|
437 |
+
elif args.text_stream:
|
438 |
+
codec = "text"
|
439 |
+
else:
|
440 |
+
codec = None
|
441 |
+
|
442 |
+
for fname in args.files:
|
443 |
+
if args.extract_toc:
|
444 |
+
dumpoutline(
|
445 |
+
outfp,
|
446 |
+
fname,
|
447 |
+
objids,
|
448 |
+
pagenos,
|
449 |
+
password=password,
|
450 |
+
dumpall=args.all,
|
451 |
+
codec=codec,
|
452 |
+
extractdir=None,
|
453 |
+
)
|
454 |
+
elif args.extract_embedded:
|
455 |
+
extractembedded(fname, password=password, extractdir=args.extract_embedded)
|
456 |
+
else:
|
457 |
+
dumppdf(
|
458 |
+
outfp,
|
459 |
+
fname,
|
460 |
+
objids,
|
461 |
+
pagenos,
|
462 |
+
password=password,
|
463 |
+
dumpall=args.all,
|
464 |
+
codec=codec,
|
465 |
+
extractdir=None,
|
466 |
+
show_fallback_xref=args.show_fallback_xref,
|
467 |
+
)
|
468 |
+
|
469 |
+
outfp.close()
|
470 |
+
|
471 |
+
|
472 |
+
if __name__ == "__main__":
|
473 |
+
main()
|
clara_env/bin/email_validator
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from email_validator.__main__ import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
clara_env/bin/f2py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from numpy.f2py.f2py2e import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
clara_env/bin/fastapi
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from fastapi_cli.cli import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
clara_env/bin/fonttools
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from fontTools.__main__ import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
clara_env/bin/gradio
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from gradio.cli import cli
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(cli())
|
clara_env/bin/httpx
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from httpx import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
clara_env/bin/huggingface-cli
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from huggingface_hub.commands.huggingface_cli import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
clara_env/bin/ipython
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from IPython import start_ipython
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(start_ipython())
|
clara_env/bin/ipython3
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from IPython import start_ipython
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(start_ipython())
|
clara_env/bin/isympy
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from isympy import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
clara_env/bin/jsondiff
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
from __future__ import print_function
|
5 |
+
|
6 |
+
import sys
|
7 |
+
import json
|
8 |
+
import jsonpatch
|
9 |
+
import argparse
|
10 |
+
|
11 |
+
|
12 |
+
parser = argparse.ArgumentParser(description='Diff two JSON files')
|
13 |
+
parser.add_argument('FILE1', type=argparse.FileType('r'))
|
14 |
+
parser.add_argument('FILE2', type=argparse.FileType('r'))
|
15 |
+
parser.add_argument('--indent', type=int, default=None,
|
16 |
+
help='Indent output by n spaces')
|
17 |
+
parser.add_argument('-u', '--preserve-unicode', action='store_true',
|
18 |
+
help='Output Unicode character as-is without using Code Point')
|
19 |
+
parser.add_argument('-v', '--version', action='version',
|
20 |
+
version='%(prog)s ' + jsonpatch.__version__)
|
21 |
+
|
22 |
+
|
23 |
+
def main():
|
24 |
+
try:
|
25 |
+
diff_files()
|
26 |
+
except KeyboardInterrupt:
|
27 |
+
sys.exit(1)
|
28 |
+
|
29 |
+
|
30 |
+
def diff_files():
|
31 |
+
""" Diffs two JSON files and prints a patch """
|
32 |
+
args = parser.parse_args()
|
33 |
+
doc1 = json.load(args.FILE1)
|
34 |
+
doc2 = json.load(args.FILE2)
|
35 |
+
patch = jsonpatch.make_patch(doc1, doc2)
|
36 |
+
if patch.patch:
|
37 |
+
print(json.dumps(patch.patch, indent=args.indent, ensure_ascii=not(args.preserve_unicode)))
|
38 |
+
sys.exit(1)
|
39 |
+
|
40 |
+
if __name__ == "__main__":
|
41 |
+
main()
|
clara_env/bin/jsonpatch
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
import sys
|
5 |
+
import os.path
|
6 |
+
import json
|
7 |
+
import jsonpatch
|
8 |
+
import tempfile
|
9 |
+
import argparse
|
10 |
+
|
11 |
+
|
12 |
+
parser = argparse.ArgumentParser(
|
13 |
+
description='Apply a JSON patch on a JSON file')
|
14 |
+
parser.add_argument('ORIGINAL', type=argparse.FileType('r'),
|
15 |
+
help='Original file')
|
16 |
+
parser.add_argument('PATCH', type=argparse.FileType('r'),
|
17 |
+
nargs='?', default=sys.stdin,
|
18 |
+
help='Patch file (read from stdin if omitted)')
|
19 |
+
parser.add_argument('--indent', type=int, default=None,
|
20 |
+
help='Indent output by n spaces')
|
21 |
+
parser.add_argument('-b', '--backup', action='store_true',
|
22 |
+
help='Back up ORIGINAL if modifying in-place')
|
23 |
+
parser.add_argument('-i', '--in-place', action='store_true',
|
24 |
+
help='Modify ORIGINAL in-place instead of to stdout')
|
25 |
+
parser.add_argument('-v', '--version', action='version',
|
26 |
+
version='%(prog)s ' + jsonpatch.__version__)
|
27 |
+
parser.add_argument('-u', '--preserve-unicode', action='store_true',
|
28 |
+
help='Output Unicode character as-is without using Code Point')
|
29 |
+
|
30 |
+
def main():
|
31 |
+
try:
|
32 |
+
patch_files()
|
33 |
+
except KeyboardInterrupt:
|
34 |
+
sys.exit(1)
|
35 |
+
|
36 |
+
|
37 |
+
def patch_files():
|
38 |
+
""" Diffs two JSON files and prints a patch """
|
39 |
+
args = parser.parse_args()
|
40 |
+
doc = json.load(args.ORIGINAL)
|
41 |
+
patch = json.load(args.PATCH)
|
42 |
+
result = jsonpatch.apply_patch(doc, patch)
|
43 |
+
|
44 |
+
if args.in_place:
|
45 |
+
dirname = os.path.abspath(os.path.dirname(args.ORIGINAL.name))
|
46 |
+
|
47 |
+
try:
|
48 |
+
# Attempt to replace the file atomically. We do this by
|
49 |
+
# creating a temporary file in the same directory as the
|
50 |
+
# original file so we can atomically move the new file over
|
51 |
+
# the original later. (This is done in the same directory
|
52 |
+
# because atomic renames do not work across mount points.)
|
53 |
+
|
54 |
+
fd, pathname = tempfile.mkstemp(dir=dirname)
|
55 |
+
fp = os.fdopen(fd, 'w')
|
56 |
+
atomic = True
|
57 |
+
|
58 |
+
except OSError:
|
59 |
+
# We failed to create the temporary file for an atomic
|
60 |
+
# replace, so fall back to non-atomic mode by backing up
|
61 |
+
# the original (if desired) and writing a new file.
|
62 |
+
|
63 |
+
if args.backup:
|
64 |
+
os.rename(args.ORIGINAL.name, args.ORIGINAL.name + '.orig')
|
65 |
+
fp = open(args.ORIGINAL.name, 'w')
|
66 |
+
atomic = False
|
67 |
+
|
68 |
+
else:
|
69 |
+
# Since we're not replacing the original file in-place, write
|
70 |
+
# the modified JSON to stdout instead.
|
71 |
+
|
72 |
+
fp = sys.stdout
|
73 |
+
|
74 |
+
# By this point we have some sort of file object we can write the
|
75 |
+
# modified JSON to.
|
76 |
+
|
77 |
+
json.dump(result, fp, indent=args.indent, ensure_ascii=not(args.preserve_unicode))
|
78 |
+
fp.write('\n')
|
79 |
+
|
80 |
+
if args.in_place:
|
81 |
+
# Close the new file. If we aren't replacing atomically, this
|
82 |
+
# is our last step, since everything else is already in place.
|
83 |
+
|
84 |
+
fp.close()
|
85 |
+
|
86 |
+
if atomic:
|
87 |
+
try:
|
88 |
+
# Complete the atomic replace by linking the original
|
89 |
+
# to a backup (if desired), fixing up the permissions
|
90 |
+
# on the temporary file, and moving it into place.
|
91 |
+
|
92 |
+
if args.backup:
|
93 |
+
os.link(args.ORIGINAL.name, args.ORIGINAL.name + '.orig')
|
94 |
+
os.chmod(pathname, os.stat(args.ORIGINAL.name).st_mode)
|
95 |
+
os.rename(pathname, args.ORIGINAL.name)
|
96 |
+
|
97 |
+
except OSError:
|
98 |
+
# In the event we could not actually do the atomic
|
99 |
+
# replace, unlink the original to move it out of the
|
100 |
+
# way and finally move the temporary file into place.
|
101 |
+
|
102 |
+
os.unlink(args.ORIGINAL.name)
|
103 |
+
os.rename(pathname, args.ORIGINAL.name)
|
104 |
+
|
105 |
+
|
106 |
+
if __name__ == "__main__":
|
107 |
+
main()
|
clara_env/bin/jsonpointer
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
|
4 |
+
from __future__ import print_function
|
5 |
+
|
6 |
+
import sys
|
7 |
+
import os.path
|
8 |
+
import json
|
9 |
+
import jsonpointer
|
10 |
+
import argparse
|
11 |
+
|
12 |
+
|
13 |
+
parser = argparse.ArgumentParser(
|
14 |
+
description='Resolve a JSON pointer on JSON files')
|
15 |
+
|
16 |
+
# Accept pointer as argument or as file
|
17 |
+
ptr_group = parser.add_mutually_exclusive_group(required=True)
|
18 |
+
|
19 |
+
ptr_group.add_argument('-f', '--pointer-file', type=argparse.FileType('r'),
|
20 |
+
nargs='?',
|
21 |
+
help='File containing a JSON pointer expression')
|
22 |
+
|
23 |
+
ptr_group.add_argument('POINTER', type=str, nargs='?',
|
24 |
+
help='A JSON pointer expression')
|
25 |
+
|
26 |
+
parser.add_argument('FILE', type=argparse.FileType('r'), nargs='+',
|
27 |
+
help='Files for which the pointer should be resolved')
|
28 |
+
parser.add_argument('--indent', type=int, default=None,
|
29 |
+
help='Indent output by n spaces')
|
30 |
+
parser.add_argument('-v', '--version', action='version',
|
31 |
+
version='%(prog)s ' + jsonpointer.__version__)
|
32 |
+
|
33 |
+
|
34 |
+
def main():
|
35 |
+
try:
|
36 |
+
resolve_files()
|
37 |
+
except KeyboardInterrupt:
|
38 |
+
sys.exit(1)
|
39 |
+
|
40 |
+
|
41 |
+
def parse_pointer(args):
|
42 |
+
if args.POINTER:
|
43 |
+
ptr = args.POINTER
|
44 |
+
elif args.pointer_file:
|
45 |
+
ptr = args.pointer_file.read().strip()
|
46 |
+
else:
|
47 |
+
parser.print_usage()
|
48 |
+
sys.exit(1)
|
49 |
+
|
50 |
+
return ptr
|
51 |
+
|
52 |
+
|
53 |
+
def resolve_files():
|
54 |
+
""" Resolve a JSON pointer on JSON files """
|
55 |
+
args = parser.parse_args()
|
56 |
+
|
57 |
+
ptr = parse_pointer(args)
|
58 |
+
|
59 |
+
for f in args.FILE:
|
60 |
+
doc = json.load(f)
|
61 |
+
try:
|
62 |
+
result = jsonpointer.resolve_pointer(doc, ptr)
|
63 |
+
print(json.dumps(result, indent=args.indent))
|
64 |
+
except jsonpointer.JsonPointerException as e:
|
65 |
+
print('Could not resolve pointer: %s' % str(e), file=sys.stderr)
|
66 |
+
|
67 |
+
|
68 |
+
if __name__ == "__main__":
|
69 |
+
main()
|
clara_env/bin/jsonschema
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from jsonschema.cli import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
clara_env/bin/langchain-server
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from langchain.server import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
clara_env/bin/langsmith
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from langsmith.cli.main import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
clara_env/bin/markdown-it
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from markdown_it.cli.parse import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
clara_env/bin/normalizer
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from charset_normalizer.cli import cli_detect
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(cli_detect())
|
clara_env/bin/openai
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from openai.cli import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
clara_env/bin/pdf2txt.py
ADDED
@@ -0,0 +1,317 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
"""A command line tool for extracting text and images from PDF and
|
3 |
+
output it to plain text, html, xml or tags."""
|
4 |
+
import argparse
|
5 |
+
import logging
|
6 |
+
import sys
|
7 |
+
from typing import Any, Container, Iterable, List, Optional
|
8 |
+
|
9 |
+
import pdfminer.high_level
|
10 |
+
from pdfminer.layout import LAParams
|
11 |
+
from pdfminer.utils import AnyIO
|
12 |
+
|
13 |
+
logging.basicConfig()
|
14 |
+
|
15 |
+
OUTPUT_TYPES = ((".htm", "html"), (".html", "html"), (".xml", "xml"), (".tag", "tag"))
|
16 |
+
|
17 |
+
|
18 |
+
def float_or_disabled(x: str) -> Optional[float]:
|
19 |
+
if x.lower().strip() == "disabled":
|
20 |
+
return None
|
21 |
+
try:
|
22 |
+
return float(x)
|
23 |
+
except ValueError:
|
24 |
+
raise argparse.ArgumentTypeError("invalid float value: {}".format(x))
|
25 |
+
|
26 |
+
|
27 |
+
def extract_text(
|
28 |
+
files: Iterable[str] = [],
|
29 |
+
outfile: str = "-",
|
30 |
+
laparams: Optional[LAParams] = None,
|
31 |
+
output_type: str = "text",
|
32 |
+
codec: str = "utf-8",
|
33 |
+
strip_control: bool = False,
|
34 |
+
maxpages: int = 0,
|
35 |
+
page_numbers: Optional[Container[int]] = None,
|
36 |
+
password: str = "",
|
37 |
+
scale: float = 1.0,
|
38 |
+
rotation: int = 0,
|
39 |
+
layoutmode: str = "normal",
|
40 |
+
output_dir: Optional[str] = None,
|
41 |
+
debug: bool = False,
|
42 |
+
disable_caching: bool = False,
|
43 |
+
**kwargs: Any
|
44 |
+
) -> AnyIO:
|
45 |
+
if not files:
|
46 |
+
raise ValueError("Must provide files to work upon!")
|
47 |
+
|
48 |
+
if output_type == "text" and outfile != "-":
|
49 |
+
for override, alttype in OUTPUT_TYPES:
|
50 |
+
if outfile.endswith(override):
|
51 |
+
output_type = alttype
|
52 |
+
|
53 |
+
if outfile == "-":
|
54 |
+
outfp: AnyIO = sys.stdout
|
55 |
+
if sys.stdout.encoding is not None:
|
56 |
+
codec = "utf-8"
|
57 |
+
else:
|
58 |
+
outfp = open(outfile, "wb")
|
59 |
+
|
60 |
+
for fname in files:
|
61 |
+
with open(fname, "rb") as fp:
|
62 |
+
pdfminer.high_level.extract_text_to_fp(fp, **locals())
|
63 |
+
return outfp
|
64 |
+
|
65 |
+
|
66 |
+
def create_parser() -> argparse.ArgumentParser:
|
67 |
+
parser = argparse.ArgumentParser(description=__doc__, add_help=True)
|
68 |
+
parser.add_argument(
|
69 |
+
"files",
|
70 |
+
type=str,
|
71 |
+
default=None,
|
72 |
+
nargs="+",
|
73 |
+
help="One or more paths to PDF files.",
|
74 |
+
)
|
75 |
+
|
76 |
+
parser.add_argument(
|
77 |
+
"--version",
|
78 |
+
"-v",
|
79 |
+
action="version",
|
80 |
+
version="pdfminer.six v{}".format(pdfminer.__version__),
|
81 |
+
)
|
82 |
+
parser.add_argument(
|
83 |
+
"--debug",
|
84 |
+
"-d",
|
85 |
+
default=False,
|
86 |
+
action="store_true",
|
87 |
+
help="Use debug logging level.",
|
88 |
+
)
|
89 |
+
parser.add_argument(
|
90 |
+
"--disable-caching",
|
91 |
+
"-C",
|
92 |
+
default=False,
|
93 |
+
action="store_true",
|
94 |
+
help="If caching or resources, such as fonts, should be disabled.",
|
95 |
+
)
|
96 |
+
|
97 |
+
parse_params = parser.add_argument_group(
|
98 |
+
"Parser", description="Used during PDF parsing"
|
99 |
+
)
|
100 |
+
parse_params.add_argument(
|
101 |
+
"--page-numbers",
|
102 |
+
type=int,
|
103 |
+
default=None,
|
104 |
+
nargs="+",
|
105 |
+
help="A space-seperated list of page numbers to parse.",
|
106 |
+
)
|
107 |
+
parse_params.add_argument(
|
108 |
+
"--pagenos",
|
109 |
+
"-p",
|
110 |
+
type=str,
|
111 |
+
help="A comma-separated list of page numbers to parse. "
|
112 |
+
"Included for legacy applications, use --page-numbers "
|
113 |
+
"for more idiomatic argument entry.",
|
114 |
+
)
|
115 |
+
parse_params.add_argument(
|
116 |
+
"--maxpages",
|
117 |
+
"-m",
|
118 |
+
type=int,
|
119 |
+
default=0,
|
120 |
+
help="The maximum number of pages to parse.",
|
121 |
+
)
|
122 |
+
parse_params.add_argument(
|
123 |
+
"--password",
|
124 |
+
"-P",
|
125 |
+
type=str,
|
126 |
+
default="",
|
127 |
+
help="The password to use for decrypting PDF file.",
|
128 |
+
)
|
129 |
+
parse_params.add_argument(
|
130 |
+
"--rotation",
|
131 |
+
"-R",
|
132 |
+
default=0,
|
133 |
+
type=int,
|
134 |
+
help="The number of degrees to rotate the PDF "
|
135 |
+
"before other types of processing.",
|
136 |
+
)
|
137 |
+
|
138 |
+
la_params = LAParams() # will be used for defaults
|
139 |
+
la_param_group = parser.add_argument_group(
|
140 |
+
"Layout analysis", description="Used during layout analysis."
|
141 |
+
)
|
142 |
+
la_param_group.add_argument(
|
143 |
+
"--no-laparams",
|
144 |
+
"-n",
|
145 |
+
default=False,
|
146 |
+
action="store_true",
|
147 |
+
help="If layout analysis parameters should be ignored.",
|
148 |
+
)
|
149 |
+
la_param_group.add_argument(
|
150 |
+
"--detect-vertical",
|
151 |
+
"-V",
|
152 |
+
default=la_params.detect_vertical,
|
153 |
+
action="store_true",
|
154 |
+
help="If vertical text should be considered during layout analysis",
|
155 |
+
)
|
156 |
+
la_param_group.add_argument(
|
157 |
+
"--line-overlap",
|
158 |
+
type=float,
|
159 |
+
default=la_params.line_overlap,
|
160 |
+
help="If two characters have more overlap than this they "
|
161 |
+
"are considered to be on the same line. The overlap is specified "
|
162 |
+
"relative to the minimum height of both characters.",
|
163 |
+
)
|
164 |
+
la_param_group.add_argument(
|
165 |
+
"--char-margin",
|
166 |
+
"-M",
|
167 |
+
type=float,
|
168 |
+
default=la_params.char_margin,
|
169 |
+
help="If two characters are closer together than this margin they "
|
170 |
+
"are considered to be part of the same line. The margin is "
|
171 |
+
"specified relative to the width of the character.",
|
172 |
+
)
|
173 |
+
la_param_group.add_argument(
|
174 |
+
"--word-margin",
|
175 |
+
"-W",
|
176 |
+
type=float,
|
177 |
+
default=la_params.word_margin,
|
178 |
+
help="If two characters on the same line are further apart than this "
|
179 |
+
"margin then they are considered to be two separate words, and "
|
180 |
+
"an intermediate space will be added for readability. The margin "
|
181 |
+
"is specified relative to the width of the character.",
|
182 |
+
)
|
183 |
+
la_param_group.add_argument(
|
184 |
+
"--line-margin",
|
185 |
+
"-L",
|
186 |
+
type=float,
|
187 |
+
default=la_params.line_margin,
|
188 |
+
help="If two lines are close together they are considered to "
|
189 |
+
"be part of the same paragraph. The margin is specified "
|
190 |
+
"relative to the height of a line.",
|
191 |
+
)
|
192 |
+
la_param_group.add_argument(
|
193 |
+
"--boxes-flow",
|
194 |
+
"-F",
|
195 |
+
type=float_or_disabled,
|
196 |
+
default=la_params.boxes_flow,
|
197 |
+
help="Specifies how much a horizontal and vertical position of a "
|
198 |
+
"text matters when determining the order of lines. The value "
|
199 |
+
"should be within the range of -1.0 (only horizontal position "
|
200 |
+
"matters) to +1.0 (only vertical position matters). You can also "
|
201 |
+
"pass `disabled` to disable advanced layout analysis, and "
|
202 |
+
"instead return text based on the position of the bottom left "
|
203 |
+
"corner of the text box.",
|
204 |
+
)
|
205 |
+
la_param_group.add_argument(
|
206 |
+
"--all-texts",
|
207 |
+
"-A",
|
208 |
+
default=la_params.all_texts,
|
209 |
+
action="store_true",
|
210 |
+
help="If layout analysis should be performed on text in figures.",
|
211 |
+
)
|
212 |
+
|
213 |
+
output_params = parser.add_argument_group(
|
214 |
+
"Output", description="Used during output generation."
|
215 |
+
)
|
216 |
+
output_params.add_argument(
|
217 |
+
"--outfile",
|
218 |
+
"-o",
|
219 |
+
type=str,
|
220 |
+
default="-",
|
221 |
+
help="Path to file where output is written. "
|
222 |
+
'Or "-" (default) to write to stdout.',
|
223 |
+
)
|
224 |
+
output_params.add_argument(
|
225 |
+
"--output_type",
|
226 |
+
"-t",
|
227 |
+
type=str,
|
228 |
+
default="text",
|
229 |
+
help="Type of output to generate {text,html,xml,tag}.",
|
230 |
+
)
|
231 |
+
output_params.add_argument(
|
232 |
+
"--codec",
|
233 |
+
"-c",
|
234 |
+
type=str,
|
235 |
+
default="utf-8",
|
236 |
+
help="Text encoding to use in output file.",
|
237 |
+
)
|
238 |
+
output_params.add_argument(
|
239 |
+
"--output-dir",
|
240 |
+
"-O",
|
241 |
+
default=None,
|
242 |
+
help="The output directory to put extracted images in. If not given, "
|
243 |
+
"images are not extracted.",
|
244 |
+
)
|
245 |
+
output_params.add_argument(
|
246 |
+
"--layoutmode",
|
247 |
+
"-Y",
|
248 |
+
default="normal",
|
249 |
+
type=str,
|
250 |
+
help="Type of layout to use when generating html "
|
251 |
+
"{normal,exact,loose}. If normal,each line is"
|
252 |
+
" positioned separately in the html. If exact"
|
253 |
+
", each character is positioned separately in"
|
254 |
+
" the html. If loose, same result as normal "
|
255 |
+
"but with an additional newline after each "
|
256 |
+
"text line. Only used when output_type is html.",
|
257 |
+
)
|
258 |
+
output_params.add_argument(
|
259 |
+
"--scale",
|
260 |
+
"-s",
|
261 |
+
type=float,
|
262 |
+
default=1.0,
|
263 |
+
help="The amount of zoom to use when generating html file. "
|
264 |
+
"Only used when output_type is html.",
|
265 |
+
)
|
266 |
+
output_params.add_argument(
|
267 |
+
"--strip-control",
|
268 |
+
"-S",
|
269 |
+
default=False,
|
270 |
+
action="store_true",
|
271 |
+
help="Remove control statement from text. "
|
272 |
+
"Only used when output_type is xml.",
|
273 |
+
)
|
274 |
+
|
275 |
+
return parser
|
276 |
+
|
277 |
+
|
278 |
+
def parse_args(args: Optional[List[str]]) -> argparse.Namespace:
|
279 |
+
parsed_args = create_parser().parse_args(args=args)
|
280 |
+
|
281 |
+
# Propagate parsed layout parameters to LAParams object
|
282 |
+
if parsed_args.no_laparams:
|
283 |
+
parsed_args.laparams = None
|
284 |
+
else:
|
285 |
+
parsed_args.laparams = LAParams(
|
286 |
+
line_overlap=parsed_args.line_overlap,
|
287 |
+
char_margin=parsed_args.char_margin,
|
288 |
+
line_margin=parsed_args.line_margin,
|
289 |
+
word_margin=parsed_args.word_margin,
|
290 |
+
boxes_flow=parsed_args.boxes_flow,
|
291 |
+
detect_vertical=parsed_args.detect_vertical,
|
292 |
+
all_texts=parsed_args.all_texts,
|
293 |
+
)
|
294 |
+
|
295 |
+
if parsed_args.page_numbers:
|
296 |
+
parsed_args.page_numbers = {x - 1 for x in parsed_args.page_numbers}
|
297 |
+
|
298 |
+
if parsed_args.pagenos:
|
299 |
+
parsed_args.page_numbers = {int(x) - 1 for x in parsed_args.pagenos.split(",")}
|
300 |
+
|
301 |
+
if parsed_args.output_type == "text" and parsed_args.outfile != "-":
|
302 |
+
for override, alttype in OUTPUT_TYPES:
|
303 |
+
if parsed_args.outfile.endswith(override):
|
304 |
+
parsed_args.output_type = alttype
|
305 |
+
|
306 |
+
return parsed_args
|
307 |
+
|
308 |
+
|
309 |
+
def main(args: Optional[List[str]] = None) -> int:
|
310 |
+
parsed_args = parse_args(args)
|
311 |
+
outfp = extract_text(**vars(parsed_args))
|
312 |
+
outfp.close()
|
313 |
+
return 0
|
314 |
+
|
315 |
+
|
316 |
+
if __name__ == "__main__":
|
317 |
+
sys.exit(main())
|
clara_env/bin/pdfplumber
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from pdfplumber.cli import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
clara_env/bin/pip
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from pip._internal.cli.main import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
clara_env/bin/pip3
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from pip._internal.cli.main import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
clara_env/bin/pip3.10
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from pip._internal.cli.main import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
clara_env/bin/pyftmerge
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from fontTools.merge import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
clara_env/bin/pyftsubset
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from fontTools.subset import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
clara_env/bin/pygmentize
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from pygments.cmdline import main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(main())
|
clara_env/bin/pypdfium2
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from pypdfium2.__main__ import cli_main
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(cli_main())
|
clara_env/bin/pyrsa-decrypt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from rsa.cli import decrypt
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(decrypt())
|
clara_env/bin/pyrsa-encrypt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from rsa.cli import encrypt
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(encrypt())
|
clara_env/bin/pyrsa-keygen
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from rsa.cli import keygen
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(keygen())
|
clara_env/bin/pyrsa-priv2pub
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from rsa.util import private_to_public
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(private_to_public())
|
clara_env/bin/pyrsa-sign
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from rsa.cli import sign
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(sign())
|
clara_env/bin/pyrsa-verify
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/Users/lucas.s/Dev/clara/clara_env/bin/python3
|
2 |
+
# -*- coding: utf-8 -*-
|
3 |
+
import re
|
4 |
+
import sys
|
5 |
+
from rsa.cli import verify
|
6 |
+
if __name__ == '__main__':
|
7 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
8 |
+
sys.exit(verify())
|
clara_env/bin/python
ADDED
Binary file (8.82 kB). View file
|
|
clara_env/bin/python3
ADDED
Binary file (8.82 kB). View file
|
|
clara_env/bin/python3.10
ADDED
Binary file (8.82 kB). View file
|
|
clara_env/bin/ruff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de19d5eaffe9b251eb65ad787557d8a6b8a59acfa01e494e9e44b8f74fab5d48
|
3 |
+
size 21805272
|