Spaces:
Running
Running
inoki-giskard
commited on
Commit
•
b6a7e2b
1
Parent(s):
27538a2
Init cicd with commit 9bf277b
Browse files- cicd/.github/workflows/giskard_action.yaml +51 -0
- cicd/.models_and_datasets_to_be_skipped.csv +4 -0
- cicd/automation/__init__.py +3 -0
- cicd/automation/post_discussion.py +5 -0
- cicd/cli.py +69 -0
- cicd/examples/github/cicd_config.yaml +1 -0
- cicd/examples/github/readme.md +1 -0
- cicd/examples/github/requirements.txt +6 -0
- cicd/examples/github/titanic_test_data.csv +447 -0
- cicd/examples/github/train.py +38 -0
- cicd/giskard_cicd/__init__.py +3 -0
- cicd/giskard_cicd/loaders/__init__.py +5 -0
- cicd/giskard_cicd/loaders/base_loader.py +33 -0
- cicd/giskard_cicd/loaders/github_loader.py +33 -0
- cicd/giskard_cicd/loaders/huggingface_loader.py +254 -0
- cicd/giskard_cicd/pipeline/runner.py +48 -0
- cicd/giskard_cicd/utils.py +26 -0
- cicd/pyproject.toml +14 -0
- cicd/readme.md +99 -0
- cicd/retriever.py +55 -0
- cicd/scan_config_template.yaml +7 -0
- cicd/scan_retrieved.py +107 -0
- cicd/setup.cfg +13 -0
cicd/.github/workflows/giskard_action.yaml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Deployment workflow
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
paths:
|
6 |
+
- 'examples/github/train.py'
|
7 |
+
- 'examples/github/titanic_test_data.csv'
|
8 |
+
- 'examples/github/requirements.txt' # temporarily
|
9 |
+
- '.github/workflows/giskard_action.yaml' # temporarily
|
10 |
+
|
11 |
+
jobs:
|
12 |
+
Deployment:
|
13 |
+
runs-on: ubuntu-latest
|
14 |
+
steps:
|
15 |
+
- name: Extract branch name
|
16 |
+
shell: bash
|
17 |
+
run: echo "##[set-output name=branch;]$(echo ${GITHUB_REF#refs/heads/})"
|
18 |
+
id: extract_branch
|
19 |
+
|
20 |
+
- name: checkout repo content
|
21 |
+
uses: actions/checkout@v4 # checkout the repository content to github runner
|
22 |
+
|
23 |
+
- name: setup python
|
24 |
+
uses: actions/setup-python@v4
|
25 |
+
with:
|
26 |
+
python-version: '3.10' # install the python version needed
|
27 |
+
|
28 |
+
- uses: syphar/restore-virtualenv@v1
|
29 |
+
id: cache-virtualenv
|
30 |
+
with:
|
31 |
+
requirement_files: examples/github/requirements.txt # this is optional
|
32 |
+
|
33 |
+
- uses: syphar/restore-pip-download-cache@v1
|
34 |
+
if: steps.cache-virtualenv.outputs.cache-hit != 'true'
|
35 |
+
|
36 |
+
# the package installation will only be executed when the
|
37 |
+
# requirements-files have changed.
|
38 |
+
- run: pip install -r examples/github/requirements.txt
|
39 |
+
env:
|
40 |
+
EMAIL_ADDRESS: ${{ secrets.EMAIL_ADDRESS }}
|
41 |
+
EMAIL_PASSWORD: ${{ secrets.EMAIL_PASSWORD }}
|
42 |
+
EMAIL_RECIPIENT: ${{ secrets.EMAIL_RECIPIENT }}
|
43 |
+
if: steps.cache-virtualenv.outputs.cache-hit != 'true'
|
44 |
+
|
45 |
+
- name: training
|
46 |
+
run: |
|
47 |
+
python examples/github/train.py
|
48 |
+
|
49 |
+
- name: execute pipeline
|
50 |
+
run: |
|
51 |
+
python cli.py --loader github --model examples/github/artifacts/model --dataset examples/github/artifacts/dataset --output_format markdown
|
cicd/.models_and_datasets_to_be_skipped.csv
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model,dataset,status
|
2 |
+
facebook/bart-large-mnli,multi_nli,error
|
3 |
+
distilbert-base-uncased-finetuned-sst-2-english,sst2,done
|
4 |
+
cardiffnlp/twitter-roberta-base-sentiment-latest,tweet_eval,done
|
cicd/automation/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .post_discussion import create_discussion
|
2 |
+
|
3 |
+
__all__ = ["create_discussion"]
|
cicd/automation/post_discussion.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import huggingface_hub as hf_hub
|
2 |
+
def create_discussion(repo_id, model_name, hf_token, report):
|
3 |
+
# Create a discussion
|
4 |
+
discussion = hf_hub.create_discussion(repo_id, title=f"Report for {model_name}", token=hf_token, description=report, repo_type="space")
|
5 |
+
return discussion
|
cicd/cli.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
|
3 |
+
from giskard_cicd.loaders import GithubLoader, HuggingFaceLoader
|
4 |
+
from giskard_cicd.pipeline.runner import PipelineRunner
|
5 |
+
|
6 |
+
from automation import create_discussion
|
7 |
+
|
8 |
+
if __name__ == "__main__":
|
9 |
+
parser = argparse.ArgumentParser(
|
10 |
+
prog="Giskard Scanner", description="Scans a model for vulnerabilities and produces a report."
|
11 |
+
)
|
12 |
+
parser.add_argument(
|
13 |
+
"--loader",
|
14 |
+
help="Which loader to use to set up the model. Currently only `github` and `huggingface` are supported.",
|
15 |
+
required=True,
|
16 |
+
)
|
17 |
+
parser.add_argument("--model", help="The model to scan.", required=True)
|
18 |
+
parser.add_argument("--dataset", help="The validation or test dataset that will be used.")
|
19 |
+
parser.add_argument(
|
20 |
+
"--dataset_split", help="The split of the dataset to use. If not provided, the best split will be selected."
|
21 |
+
)
|
22 |
+
parser.add_argument("--dataset_config", help="The name of the dataset config subset to use.")
|
23 |
+
parser.add_argument("--scan_config", help="Path to YAML file containing the configuration of the scan.")
|
24 |
+
parser.add_argument("--output", help="Optional name of the output file.")
|
25 |
+
parser.add_argument("--output_format", help="Format of the report (either HTML or markdown). Default is HTML.")
|
26 |
+
parser.add_argument("--output_portal", help="The output portal of the report (either huggingface or local directory). Default is local.")
|
27 |
+
parser.add_argument("--discussion_repo", help="The repo to push the report to.")
|
28 |
+
parser.add_argument("--hf_token", help="The token to push the report to the repo.")
|
29 |
+
|
30 |
+
args = parser.parse_args()
|
31 |
+
|
32 |
+
supported_loaders = {
|
33 |
+
"huggingface": HuggingFaceLoader(),
|
34 |
+
"github": GithubLoader(),
|
35 |
+
}
|
36 |
+
|
37 |
+
runner = PipelineRunner(loaders=supported_loaders)
|
38 |
+
|
39 |
+
runner_kwargs = {"loader_id": args.loader,
|
40 |
+
"model": args.model,
|
41 |
+
"dataset": args.dataset,
|
42 |
+
"scan_config": args.scan_config}
|
43 |
+
|
44 |
+
if args.loader == "huggingface":
|
45 |
+
runner_kwargs.update({"dataset_split": args.dataset_split,
|
46 |
+
"dataset_config": args.dataset_config})
|
47 |
+
|
48 |
+
report = runner.run(**runner_kwargs)
|
49 |
+
|
50 |
+
# In the future, write markdown report or directly push to discussion.
|
51 |
+
if args.output_format == "markdown":
|
52 |
+
rendered_report = report.to_markdown(template="github")
|
53 |
+
else:
|
54 |
+
rendered_report = report.to_html()
|
55 |
+
|
56 |
+
if args.output_portal == "huggingface":
|
57 |
+
# Push to discussion
|
58 |
+
create_discussion(args.discussion_repo, args.model, args.hf_token, rendered_report)
|
59 |
+
|
60 |
+
|
61 |
+
if args.output:
|
62 |
+
with open(args.output, "w") as f:
|
63 |
+
f.write(rendered_report)
|
64 |
+
else:
|
65 |
+
# To stdout
|
66 |
+
# print(rendered_report)
|
67 |
+
model_name = args.model.split("/")[-1]
|
68 |
+
with open(f"{model_name}_report.html", "w") as f:
|
69 |
+
f.write(rendered_report)
|
cicd/examples/github/cicd_config.yaml
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
artifact_path: "examples/github"
|
cicd/examples/github/readme.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
# Github CI/CD
|
cicd/examples/github/requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
giskard>=2.0.0b
|
2 |
+
git+https://github.com/Giskard-AI/cicd.git@main
|
3 |
+
json5==0.9.10
|
4 |
+
jsonpatch==1.32
|
5 |
+
jsonpointer==2.3
|
6 |
+
jsonschema==3.2.0
|
cicd/examples/github/titanic_test_data.csv
ADDED
@@ -0,0 +1,447 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"PassengerId","Pclass","Name","Sex","Age","SibSp","Parch","Fare","Embarked","Survived"
|
2 |
+
124,2,"Webber, Miss. Susan","female",32.5,0,0,13.0,"S","yes"
|
3 |
+
715,2,"Greenberg, Mr. Samuel","male",52.0,0,0,13.0,"S","no"
|
4 |
+
413,1,"Minahan, Miss. Daisy E","female",33.0,1,0,90.0,"Q","yes"
|
5 |
+
82,3,"Sheerlinck, Mr. Jan Baptist","male",29.0,0,0,9.5,"S","yes"
|
6 |
+
556,1,"Wright, Mr. George","male",62.0,0,0,26.55,"S","no"
|
7 |
+
533,3,"Elias, Mr. Joseph Jr","male",17.0,1,1,7.2292,"C","no"
|
8 |
+
850,1,"Goldenberg, Mrs. Samuel L (Edwiga Grabowska)","female","_GSK_NA_",1,0,89.1042,"C","yes"
|
9 |
+
569,3,"Doharr, Mr. Tannous","male","_GSK_NA_",0,0,7.2292,"C","no"
|
10 |
+
126,3,"Nicola-Yarred, Master. Elias","male",12.0,1,0,11.2417,"C","yes"
|
11 |
+
544,2,"Beane, Mr. Edward","male",32.0,1,0,26.0,"S","yes"
|
12 |
+
111,1,"Porter, Mr. Walter Chamberlain","male",47.0,0,0,52.0,"S","no"
|
13 |
+
484,3,"Turkula, Mrs. (Hedwig)","female",63.0,0,0,9.5875,"S","yes"
|
14 |
+
593,3,"Elsbury, Mr. William James","male",47.0,0,0,7.25,"S","no"
|
15 |
+
422,3,"Charters, Mr. David","male",21.0,0,0,7.7333,"Q","no"
|
16 |
+
847,3,"Sage, Mr. Douglas Bullen","male","_GSK_NA_",8,2,69.55,"S","no"
|
17 |
+
328,2,"Ball, Mrs. (Ada E Hall)","female",36.0,0,0,13.0,"S","yes"
|
18 |
+
828,2,"Mallet, Master. Andre","male",1.0,0,2,37.0042,"C","yes"
|
19 |
+
883,3,"Dahlberg, Miss. Gerda Ulrika","female",22.0,0,0,10.5167,"S","no"
|
20 |
+
437,3,"Ford, Miss. Doolina Margaret ""Daisy""","female",21.0,2,2,34.375,"S","no"
|
21 |
+
88,3,"Slocovski, Mr. Selman Francis","male","_GSK_NA_",0,0,8.05,"S","no"
|
22 |
+
705,3,"Hansen, Mr. Henrik Juul","male",26.0,1,0,7.8542,"S","no"
|
23 |
+
391,1,"Carter, Mr. William Ernest","male",36.0,1,2,120.0,"S","yes"
|
24 |
+
40,3,"Nicola-Yarred, Miss. Jamila","female",14.0,1,0,11.2417,"C","yes"
|
25 |
+
672,1,"Davidson, Mr. Thornton","male",31.0,1,0,52.0,"S","no"
|
26 |
+
620,2,"Gavey, Mr. Lawrence","male",26.0,0,0,10.5,"S","no"
|
27 |
+
791,3,"Keane, Mr. Andrew ""Andy""","male","_GSK_NA_",0,0,7.75,"Q","no"
|
28 |
+
63,1,"Harris, Mr. Henry Birkhardt","male",45.0,1,0,83.475,"S","no"
|
29 |
+
800,3,"Van Impe, Mrs. Jean Baptiste (Rosalie Paula Govaert)","female",30.0,1,1,24.15,"S","no"
|
30 |
+
317,2,"Kantor, Mrs. Sinai (Miriam Sternin)","female",24.0,1,0,26.0,"S","yes"
|
31 |
+
617,3,"Danbom, Mr. Ernst Gilbert","male",34.0,1,1,14.4,"S","no"
|
32 |
+
206,3,"Strom, Miss. Telma Matilda","female",2.0,0,1,10.4625,"S","no"
|
33 |
+
274,1,"Natsch, Mr. Charles H","male",37.0,0,1,29.7,"C","no"
|
34 |
+
567,3,"Stoytcheff, Mr. Ilia","male",19.0,0,0,7.8958,"S","no"
|
35 |
+
632,3,"Lundahl, Mr. Johan Svensson","male",51.0,0,0,7.0542,"S","no"
|
36 |
+
888,1,"Graham, Miss. Margaret Edith","female",19.0,0,0,30.0,"S","yes"
|
37 |
+
480,3,"Hirvonen, Miss. Hildur E","female",2.0,0,1,12.2875,"S","yes"
|
38 |
+
477,2,"Renouf, Mr. Peter Henry","male",34.0,1,0,21.0,"S","no"
|
39 |
+
424,3,"Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria Brogren)","female",28.0,1,1,14.4,"S","no"
|
40 |
+
741,1,"Hawksford, Mr. Walter James","male","_GSK_NA_",0,0,30.0,"S","yes"
|
41 |
+
531,2,"Quick, Miss. Phyllis May","female",2.0,1,1,26.0,"S","yes"
|
42 |
+
799,3,"Ibrahim Shawah, Mr. Yousseff","male",30.0,0,0,7.2292,"C","no"
|
43 |
+
160,3,"Sage, Master. Thomas Henry","male","_GSK_NA_",8,2,69.55,"S","no"
|
44 |
+
116,3,"Pekoniemi, Mr. Edvard","male",21.0,0,0,7.925,"S","no"
|
45 |
+
290,3,"Connolly, Miss. Kate","female",22.0,0,0,7.75,"Q","yes"
|
46 |
+
252,3,"Strom, Mrs. Wilhelm (Elna Matilda Persson)","female",29.0,1,1,10.4625,"S","no"
|
47 |
+
306,1,"Allison, Master. Hudson Trevor","male",0.92,1,2,151.55,"S","yes"
|
48 |
+
449,3,"Baclini, Miss. Marie Catherine","female",5.0,2,1,19.2583,"C","yes"
|
49 |
+
483,3,"Rouse, Mr. Richard Henry","male",50.0,0,0,8.05,"S","no"
|
50 |
+
587,2,"Jarvis, Mr. John Denzil","male",47.0,0,0,15.0,"S","no"
|
51 |
+
25,3,"Palsson, Miss. Torborg Danira","female",8.0,3,1,21.075,"S","no"
|
52 |
+
289,2,"Hosono, Mr. Masabumi","male",42.0,0,0,13.0,"S","yes"
|
53 |
+
769,3,"Moran, Mr. Daniel J","male","_GSK_NA_",1,0,24.15,"Q","no"
|
54 |
+
697,3,"Kelly, Mr. James","male",44.0,0,0,8.05,"S","no"
|
55 |
+
172,3,"Rice, Master. Arthur","male",4.0,4,1,29.125,"Q","no"
|
56 |
+
548,2,"Padro y Manent, Mr. Julian","male","_GSK_NA_",0,0,13.8625,"C","yes"
|
57 |
+
586,1,"Taussig, Miss. Ruth","female",18.0,0,2,79.65,"S","yes"
|
58 |
+
52,3,"Nosworthy, Mr. Richard Cater","male",21.0,0,0,7.8,"S","no"
|
59 |
+
862,2,"Giles, Mr. Frederick Edward","male",21.0,1,0,11.5,"S","no"
|
60 |
+
553,3,"O'Brien, Mr. Timothy","male","_GSK_NA_",0,0,7.8292,"Q","no"
|
61 |
+
36,1,"Holverson, Mr. Alexander Oskar","male",42.0,1,0,52.0,"S","no"
|
62 |
+
261,3,"Smith, Mr. Thomas","male","_GSK_NA_",0,0,7.75,"Q","no"
|
63 |
+
366,3,"Adahl, Mr. Mauritz Nils Martin","male",30.0,0,0,7.25,"S","no"
|
64 |
+
201,3,"Vande Walle, Mr. Nestor Cyriel","male",28.0,0,0,9.5,"S","no"
|
65 |
+
761,3,"Garfirth, Mr. John","male","_GSK_NA_",0,0,14.5,"S","no"
|
66 |
+
706,2,"Morley, Mr. Henry Samuel (""Mr Henry Marshall"")","male",39.0,0,0,26.0,"S","no"
|
67 |
+
594,3,"Bourke, Miss. Mary","female","_GSK_NA_",0,2,7.75,"Q","no"
|
68 |
+
53,1,"Harper, Mrs. Henry Sleeper (Myna Haxtun)","female",49.0,1,0,76.7292,"C","yes"
|
69 |
+
546,1,"Nicholson, Mr. Arthur Ernest","male",64.0,0,0,26.0,"S","no"
|
70 |
+
195,1,"Brown, Mrs. James Joseph (Margaret Tobin)","female",44.0,0,0,27.7208,"C","yes"
|
71 |
+
530,2,"Hocking, Mr. Richard George","male",23.0,2,1,11.5,"S","no"
|
72 |
+
702,1,"Silverthorne, Mr. Spencer Victor","male",35.0,0,0,26.2875,"S","yes"
|
73 |
+
279,3,"Rice, Master. Eric","male",7.0,4,1,29.125,"Q","no"
|
74 |
+
223,3,"Green, Mr. George Henry","male",51.0,0,0,8.05,"S","no"
|
75 |
+
372,3,"Wiklund, Mr. Jakob Alfred","male",18.0,1,0,6.4958,"S","no"
|
76 |
+
5,3,"Allen, Mr. William Henry","male",35.0,0,0,8.05,"S","no"
|
77 |
+
519,2,"Angle, Mrs. William A (Florence ""Mary"" Agnes Hughes)","female",36.0,1,0,26.0,"S","yes"
|
78 |
+
326,1,"Young, Miss. Marie Grice","female",36.0,0,0,135.6333,"C","yes"
|
79 |
+
492,3,"Windelov, Mr. Einar","male",21.0,0,0,7.25,"S","no"
|
80 |
+
344,2,"Sedgwick, Mr. Charles Frederick Waddington","male",25.0,0,0,13.0,"S","no"
|
81 |
+
469,3,"Scanlan, Mr. James","male","_GSK_NA_",0,0,7.725,"Q","no"
|
82 |
+
77,3,"Staneff, Mr. Ivan","male","_GSK_NA_",0,0,7.8958,"S","no"
|
83 |
+
272,3,"Tornquist, Mr. William Henry","male",25.0,0,0,0.0,"S","yes"
|
84 |
+
753,3,"Vande Velde, Mr. Johannes Joseph","male",33.0,0,0,9.5,"S","no"
|
85 |
+
658,3,"Bourke, Mrs. John (Catherine)","female",32.0,1,1,15.5,"Q","no"
|
86 |
+
388,2,"Buss, Miss. Kate","female",36.0,0,0,13.0,"S","yes"
|
87 |
+
738,1,"Lesurer, Mr. Gustave J","male",35.0,0,0,512.3292,"C","yes"
|
88 |
+
823,1,"Reuchlin, Jonkheer. John George","male",38.0,0,0,0.0,"S","no"
|
89 |
+
814,3,"Andersson, Miss. Ebba Iris Alfrida","female",6.0,4,2,31.275,"S","no"
|
90 |
+
596,3,"Van Impe, Mr. Jean Baptiste","male",36.0,1,1,24.15,"S","no"
|
91 |
+
468,1,"Smart, Mr. John Montgomery","male",56.0,0,0,26.55,"S","no"
|
92 |
+
95,3,"Coxon, Mr. Daniel","male",59.0,0,0,7.25,"S","no"
|
93 |
+
148,3,"Ford, Miss. Robina Maggie ""Ruby""","female",9.0,2,2,34.375,"S","no"
|
94 |
+
704,3,"Gallagher, Mr. Martin","male",25.0,0,0,7.7417,"Q","no"
|
95 |
+
426,3,"Wiseman, Mr. Phillippe","male","_GSK_NA_",0,0,7.25,"S","no"
|
96 |
+
730,3,"Ilmakangas, Miss. Pieta Sofia","female",25.0,1,0,7.925,"S","no"
|
97 |
+
525,3,"Kassem, Mr. Fared","male","_GSK_NA_",0,0,7.2292,"C","no"
|
98 |
+
727,2,"Renouf, Mrs. Peter Henry (Lillian Jefferys)","female",30.0,3,0,21.0,"S","yes"
|
99 |
+
578,1,"Silvey, Mrs. William Baird (Alice Munger)","female",39.0,1,0,55.9,"S","yes"
|
100 |
+
467,2,"Campbell, Mr. William","male","_GSK_NA_",0,0,0.0,"S","no"
|
101 |
+
609,2,"Laroche, Mrs. Joseph (Juliette Marie Louise Lafargue)","female",22.0,1,2,41.5792,"C","yes"
|
102 |
+
774,3,"Elias, Mr. Dibo","male","_GSK_NA_",0,0,7.225,"C","no"
|
103 |
+
504,3,"Laitinen, Miss. Kristina Sofia","female",37.0,0,0,9.5875,"S","no"
|
104 |
+
100,2,"Kantor, Mr. Sinai","male",34.0,1,0,26.0,"S","no"
|
105 |
+
320,1,"Spedden, Mrs. Frederic Oakley (Margaretta Corning Stone)","female",40.0,1,1,134.5,"C","yes"
|
106 |
+
98,1,"Greenfield, Mr. William Bertram","male",23.0,0,1,63.3583,"C","yes"
|
107 |
+
880,1,"Potter, Mrs. Thomas Jr (Lily Alexenia Wilson)","female",56.0,0,1,83.1583,"C","yes"
|
108 |
+
716,3,"Soholt, Mr. Peter Andreas Lauritz Andersen","male",19.0,0,0,7.65,"S","no"
|
109 |
+
349,3,"Coutts, Master. William Loch ""William""","male",3.0,1,1,15.9,"S","yes"
|
110 |
+
44,2,"Laroche, Miss. Simonne Marie Anne Andree","female",3.0,1,2,41.5792,"C","yes"
|
111 |
+
631,1,"Barkworth, Mr. Algernon Henry Wilson","male",80.0,0,0,30.0,"S","yes"
|
112 |
+
154,3,"van Billiard, Mr. Austin Blyler","male",40.5,0,2,14.5,"S","no"
|
113 |
+
683,3,"Olsvigen, Mr. Thor Anderson","male",20.0,0,0,9.225,"S","no"
|
114 |
+
92,3,"Andreasson, Mr. Paul Edvin","male",20.0,0,0,7.8542,"S","no"
|
115 |
+
574,3,"Kelly, Miss. Mary","female","_GSK_NA_",0,0,7.75,"Q","yes"
|
116 |
+
541,1,"Crosby, Miss. Harriet R","female",36.0,0,2,71.0,"S","yes"
|
117 |
+
886,3,"Rice, Mrs. William (Margaret Norton)","female",39.0,0,5,29.125,"Q","no"
|
118 |
+
215,3,"Kiernan, Mr. Philip","male","_GSK_NA_",1,0,7.75,"Q","no"
|
119 |
+
381,1,"Bidois, Miss. Rosalie","female",42.0,0,0,227.525,"C","yes"
|
120 |
+
776,3,"Myhrman, Mr. Pehr Fabian Oliver Malkolm","male",18.0,0,0,7.75,"S","no"
|
121 |
+
430,3,"Pickard, Mr. Berk (Berk Trembisky)","male",32.0,0,0,8.05,"S","yes"
|
122 |
+
222,2,"Bracken, Mr. James H","male",27.0,0,0,13.0,"S","no"
|
123 |
+
820,3,"Skoog, Master. Karl Thorsten","male",10.0,3,2,27.9,"S","no"
|
124 |
+
51,3,"Panula, Master. Juha Niilo","male",7.0,4,1,39.6875,"S","no"
|
125 |
+
250,2,"Carter, Rev. Ernest Courtenay","male",54.0,1,0,26.0,"S","no"
|
126 |
+
692,3,"Karun, Miss. Manca","female",4.0,0,1,13.4167,"C","yes"
|
127 |
+
435,1,"Silvey, Mr. William Baird","male",50.0,1,0,55.9,"S","no"
|
128 |
+
781,3,"Ayoub, Miss. Banoura","female",13.0,0,0,7.2292,"C","yes"
|
129 |
+
491,3,"Hagland, Mr. Konrad Mathias Reiersen","male","_GSK_NA_",1,0,19.9667,"S","no"
|
130 |
+
554,3,"Leeni, Mr. Fahim (""Philip Zenni"")","male",22.0,0,0,7.225,"C","yes"
|
131 |
+
656,2,"Hickman, Mr. Leonard Mark","male",24.0,2,0,73.5,"S","no"
|
132 |
+
380,3,"Gustafsson, Mr. Karl Gideon","male",19.0,0,0,7.775,"S","no"
|
133 |
+
509,3,"Olsen, Mr. Henry Margido","male",28.0,0,0,22.525,"S","no"
|
134 |
+
230,3,"Lefebre, Miss. Mathilde","female","_GSK_NA_",3,1,25.4667,"S","no"
|
135 |
+
458,1,"Kenyon, Mrs. Frederick R (Marion)","female","_GSK_NA_",1,0,51.8625,"S","yes"
|
136 |
+
733,2,"Knight, Mr. Robert J","male","_GSK_NA_",0,0,0.0,"S","no"
|
137 |
+
740,3,"Nankoff, Mr. Minko","male","_GSK_NA_",0,0,7.8958,"S","no"
|
138 |
+
420,3,"Van Impe, Miss. Catharina","female",10.0,0,2,24.15,"S","no"
|
139 |
+
175,1,"Smith, Mr. James Clinch","male",56.0,0,0,30.6958,"C","no"
|
140 |
+
767,1,"Brewe, Dr. Arthur Jackson","male","_GSK_NA_",0,0,39.6,"C","no"
|
141 |
+
608,1,"Daniel, Mr. Robert Williams","male",27.0,0,0,30.5,"S","yes"
|
142 |
+
75,3,"Bing, Mr. Lee","male",32.0,0,0,56.4958,"S","yes"
|
143 |
+
242,3,"Murphy, Miss. Katherine ""Kate""","female","_GSK_NA_",1,0,15.5,"Q","yes"
|
144 |
+
506,1,"Penasco y Castellana, Mr. Victor de Satode","male",18.0,1,0,108.9,"C","no"
|
145 |
+
481,3,"Goodwin, Master. Harold Victor","male",9.0,5,2,46.9,"S","no"
|
146 |
+
461,1,"Anderson, Mr. Harry","male",48.0,0,0,26.55,"S","yes"
|
147 |
+
185,3,"Kink-Heilmann, Miss. Luise Gretchen","female",4.0,0,2,22.025,"S","yes"
|
148 |
+
866,2,"Bystrom, Mrs. (Karolina)","female",42.0,0,0,13.0,"S","yes"
|
149 |
+
165,3,"Panula, Master. Eino Viljami","male",1.0,4,1,39.6875,"S","no"
|
150 |
+
406,2,"Gale, Mr. Shadrach","male",34.0,1,0,21.0,"S","no"
|
151 |
+
248,2,"Hamalainen, Mrs. William (Anna)","female",24.0,0,2,14.5,"S","yes"
|
152 |
+
211,3,"Ali, Mr. Ahmed","male",24.0,0,0,7.05,"S","no"
|
153 |
+
337,1,"Pears, Mr. Thomas Clinton","male",29.0,1,0,66.6,"S","no"
|
154 |
+
879,3,"Laleff, Mr. Kristo","male","_GSK_NA_",0,0,7.8958,"S","no"
|
155 |
+
15,3,"Vestrom, Miss. Hulda Amanda Adolfina","female",14.0,0,0,7.8542,"S","no"
|
156 |
+
56,1,"Woolner, Mr. Hugh","male","_GSK_NA_",0,0,35.5,"S","yes"
|
157 |
+
302,3,"McCoy, Mr. Bernard","male","_GSK_NA_",2,0,23.25,"Q","yes"
|
158 |
+
97,1,"Goldschmidt, Mr. George B","male",71.0,0,0,34.6542,"C","no"
|
159 |
+
600,1,"Duff Gordon, Sir. Cosmo Edmund (""Mr Morgan"")","male",49.0,1,0,56.9292,"C","yes"
|
160 |
+
876,3,"Najib, Miss. Adele Kiamie ""Jane""","female",15.0,0,0,7.225,"C","yes"
|
161 |
+
731,1,"Allen, Miss. Elisabeth Walton","female",29.0,0,0,211.3375,"S","yes"
|
162 |
+
744,3,"McNamee, Mr. Neal","male",24.0,1,0,16.1,"S","no"
|
163 |
+
30,3,"Todoroff, Mr. Lalio","male","_GSK_NA_",0,0,7.8958,"S","no"
|
164 |
+
673,2,"Mitchell, Mr. Henry Michael","male",70.0,0,0,10.5,"S","no"
|
165 |
+
841,3,"Alhomaki, Mr. Ilmari Rudolf","male",20.0,0,0,7.925,"S","no"
|
166 |
+
140,1,"Giglio, Mr. Victor","male",24.0,0,0,79.2,"C","no"
|
167 |
+
62,1,"Icard, Miss. Amelie","female",38.0,0,0,80.0,"_GSK_NA_","yes"
|
168 |
+
639,3,"Panula, Mrs. Juha (Maria Emilia Ojala)","female",41.0,0,5,39.6875,"S","no"
|
169 |
+
693,3,"Lam, Mr. Ali","male","_GSK_NA_",0,0,56.4958,"S","yes"
|
170 |
+
176,3,"Klasen, Mr. Klas Albin","male",18.0,1,1,7.8542,"S","no"
|
171 |
+
417,2,"Drew, Mrs. James Vivian (Lulu Thorne Christian)","female",34.0,1,1,32.5,"S","yes"
|
172 |
+
348,3,"Davison, Mrs. Thomas Henry (Mary E Finck)","female","_GSK_NA_",1,0,16.1,"S","yes"
|
173 |
+
542,3,"Andersson, Miss. Ingeborg Constanzia","female",9.0,4,2,31.275,"S","no"
|
174 |
+
433,2,"Louch, Mrs. Charles Alexander (Alice Adelaide Slow)","female",42.0,1,0,26.0,"S","yes"
|
175 |
+
760,1,"Rothes, the Countess. of (Lucy Noel Martha Dyer-Edwards)","female",33.0,0,0,86.5,"S","yes"
|
176 |
+
725,1,"Chambers, Mr. Norman Campbell","male",27.0,1,0,53.1,"S","yes"
|
177 |
+
451,2,"West, Mr. Edwy Arthur","male",36.0,1,2,27.75,"S","no"
|
178 |
+
91,3,"Christmann, Mr. Emil","male",29.0,0,0,8.05,"S","no"
|
179 |
+
76,3,"Moen, Mr. Sigurd Hansen","male",25.0,0,0,7.65,"S","no"
|
180 |
+
47,3,"Lennon, Mr. Denis","male","_GSK_NA_",1,0,15.5,"Q","no"
|
181 |
+
65,1,"Stewart, Mr. Albert A","male","_GSK_NA_",0,0,27.7208,"C","no"
|
182 |
+
258,1,"Cherry, Miss. Gladys","female",30.0,0,0,86.5,"S","yes"
|
183 |
+
214,2,"Givard, Mr. Hans Kristensen","male",30.0,0,0,13.0,"S","no"
|
184 |
+
245,3,"Attalah, Mr. Sleiman","male",30.0,0,0,7.225,"C","no"
|
185 |
+
599,3,"Boulos, Mr. Hanna","male","_GSK_NA_",0,0,7.225,"C","no"
|
186 |
+
400,2,"Trout, Mrs. William H (Jessie L)","female",28.0,0,0,12.65,"S","yes"
|
187 |
+
772,3,"Jensen, Mr. Niels Peder","male",48.0,0,0,7.8542,"S","no"
|
188 |
+
37,3,"Mamee, Mr. Hanna","male","_GSK_NA_",0,0,7.2292,"C","yes"
|
189 |
+
114,3,"Jussila, Miss. Katriina","female",20.0,1,0,9.825,"S","no"
|
190 |
+
853,3,"Boulos, Miss. Nourelain","female",9.0,1,1,15.2458,"C","no"
|
191 |
+
676,3,"Edvardsson, Mr. Gustaf Hjalmar","male",18.0,0,0,7.775,"S","no"
|
192 |
+
287,3,"de Mulder, Mr. Theodore","male",30.0,0,0,9.5,"S","yes"
|
193 |
+
583,2,"Downton, Mr. William James","male",54.0,0,0,26.0,"S","no"
|
194 |
+
71,2,"Jenkin, Mr. Stephen Curnow","male",32.0,0,0,10.5,"S","no"
|
195 |
+
120,3,"Andersson, Miss. Ellis Anna Maria","female",2.0,4,2,31.275,"S","no"
|
196 |
+
144,3,"Burke, Mr. Jeremiah","male",19.0,0,0,6.75,"Q","no"
|
197 |
+
493,1,"Molson, Mr. Harry Markland","male",55.0,0,0,30.5,"S","no"
|
198 |
+
870,3,"Johnson, Master. Harold Theodor","male",4.0,1,1,11.1333,"S","yes"
|
199 |
+
869,3,"van Melkebeke, Mr. Philemon","male","_GSK_NA_",0,0,9.5,"S","no"
|
200 |
+
13,3,"Saundercock, Mr. William Henry","male",20.0,0,0,8.05,"S","no"
|
201 |
+
685,2,"Brown, Mr. Thomas William Solomon","male",60.0,1,1,39.0,"S","no"
|
202 |
+
643,3,"Skoog, Miss. Margit Elizabeth","female",2.0,3,2,27.9,"S","no"
|
203 |
+
87,3,"Ford, Mr. William Neal","male",16.0,1,3,34.375,"S","no"
|
204 |
+
296,1,"Lewy, Mr. Ervin G","male","_GSK_NA_",0,0,27.7208,"C","no"
|
205 |
+
694,3,"Saad, Mr. Khalil","male",25.0,0,0,7.225,"C","no"
|
206 |
+
410,3,"Lefebre, Miss. Ida","female","_GSK_NA_",3,1,25.4667,"S","no"
|
207 |
+
645,3,"Baclini, Miss. Eugenie","female",0.75,2,1,19.2583,"C","yes"
|
208 |
+
803,1,"Carter, Master. William Thornton II","male",11.0,1,2,120.0,"S","yes"
|
209 |
+
450,1,"Peuchen, Major. Arthur Godfrey","male",52.0,0,0,30.5,"S","yes"
|
210 |
+
550,2,"Davies, Master. John Morgan Jr","male",8.0,1,1,36.75,"S","yes"
|
211 |
+
352,1,"Williams-Lambert, Mr. Fletcher Fellows","male","_GSK_NA_",0,0,35.0,"S","no"
|
212 |
+
580,3,"Jussila, Mr. Eiriik","male",32.0,0,0,7.925,"S","yes"
|
213 |
+
319,1,"Wick, Miss. Mary Natalie","female",31.0,0,2,164.8667,"S","yes"
|
214 |
+
831,3,"Yasbeck, Mrs. Antoni (Selini Alexander)","female",15.0,1,0,14.4542,"C","yes"
|
215 |
+
777,3,"Tobin, Mr. Roger","male","_GSK_NA_",0,0,7.75,"Q","no"
|
216 |
+
341,2,"Navratil, Master. Edmond Roger","male",2.0,1,1,26.0,"S","yes"
|
217 |
+
871,3,"Balkic, Mr. Cerin","male",26.0,0,0,7.8958,"S","no"
|
218 |
+
271,1,"Cairns, Mr. Alexander","male","_GSK_NA_",0,0,31.0,"S","no"
|
219 |
+
755,2,"Herman, Mrs. Samuel (Jane Laver)","female",48.0,1,2,65.0,"S","yes"
|
220 |
+
110,3,"Moran, Miss. Bertha","female","_GSK_NA_",1,0,24.15,"Q","yes"
|
221 |
+
829,3,"McCormack, Mr. Thomas Joseph","male","_GSK_NA_",0,0,7.75,"Q","yes"
|
222 |
+
448,1,"Seward, Mr. Frederic Kimber","male",34.0,0,0,26.55,"S","yes"
|
223 |
+
33,3,"Glynn, Miss. Mary Agatha","female","_GSK_NA_",0,0,7.75,"Q","yes"
|
224 |
+
465,3,"Maisner, Mr. Simon","male","_GSK_NA_",0,0,8.05,"S","no"
|
225 |
+
427,2,"Clarke, Mrs. Charles V (Ada Maria Winfield)","female",28.0,1,0,26.0,"S","yes"
|
226 |
+
204,3,"Youseff, Mr. Gerious","male",45.5,0,0,7.225,"C","no"
|
227 |
+
431,1,"Bjornstrom-Steffansson, Mr. Mauritz Hakan","male",28.0,0,0,26.55,"S","yes"
|
228 |
+
732,3,"Hassan, Mr. Houssein G N","male",11.0,0,0,18.7875,"C","no"
|
229 |
+
787,3,"Sjoblom, Miss. Anna Sofia","female",18.0,0,0,7.4958,"S","yes"
|
230 |
+
508,1,"Bradley, Mr. George (""George Arthur Brayton"")","male","_GSK_NA_",0,0,26.55,"S","yes"
|
231 |
+
802,2,"Collyer, Mrs. Harvey (Charlotte Annie Tate)","female",31.0,1,1,26.25,"S","yes"
|
232 |
+
310,1,"Francatelli, Miss. Laura Mabel","female",30.0,0,0,56.9292,"C","yes"
|
233 |
+
107,3,"Salkjelsvik, Miss. Anna Kristine","female",21.0,0,0,7.65,"S","yes"
|
234 |
+
299,1,"Saalfeld, Mr. Adolphe","male","_GSK_NA_",0,0,30.5,"S","yes"
|
235 |
+
459,2,"Toomey, Miss. Ellen","female",50.0,0,0,10.5,"S","yes"
|
236 |
+
641,3,"Jensen, Mr. Hans Peder","male",20.0,0,0,7.8542,"S","no"
|
237 |
+
668,3,"Rommetvedt, Mr. Knud Paust","male","_GSK_NA_",0,0,7.775,"S","no"
|
238 |
+
523,3,"Lahoud, Mr. Sarkis","male","_GSK_NA_",0,0,7.225,"C","no"
|
239 |
+
710,3,"Moubarek, Master. Halim Gonios (""William George"")","male","_GSK_NA_",1,1,15.2458,"C","yes"
|
240 |
+
249,1,"Beckwith, Mr. Richard Leonard","male",37.0,1,1,52.5542,"S","yes"
|
241 |
+
677,3,"Sawyer, Mr. Frederick Charles","male",24.5,0,0,8.05,"S","no"
|
242 |
+
595,2,"Chapman, Mr. John Henry","male",37.0,1,0,26.0,"S","no"
|
243 |
+
667,2,"Butler, Mr. Reginald Fenton","male",25.0,0,0,13.0,"S","no"
|
244 |
+
537,1,"Butt, Major. Archibald Willingham","male",45.0,0,0,26.55,"S","no"
|
245 |
+
666,2,"Hickman, Mr. Lewis","male",32.0,2,0,73.5,"S","no"
|
246 |
+
581,2,"Christy, Miss. Julie Rachel","female",25.0,1,1,30.0,"S","yes"
|
247 |
+
630,3,"O'Connell, Mr. Patrick D","male","_GSK_NA_",0,0,7.7333,"Q","no"
|
248 |
+
648,1,"Simonius-Blumer, Col. Oberst Alfons","male",56.0,0,0,35.5,"C","yes"
|
249 |
+
878,3,"Petroff, Mr. Nedelio","male",19.0,0,0,7.8958,"S","no"
|
250 |
+
269,1,"Graham, Mrs. William Thompson (Edith Junkins)","female",58.0,0,1,153.4625,"S","yes"
|
251 |
+
234,3,"Asplund, Miss. Lillian Gertrud","female",5.0,4,2,31.3875,"S","yes"
|
252 |
+
644,3,"Foo, Mr. Choong","male","_GSK_NA_",0,0,56.4958,"S","yes"
|
253 |
+
118,2,"Turpin, Mr. William John Robert","male",29.0,1,0,21.0,"S","no"
|
254 |
+
333,1,"Graham, Mr. George Edward","male",38.0,0,1,153.4625,"S","no"
|
255 |
+
454,1,"Goldenberg, Mr. Samuel L","male",49.0,1,0,89.1042,"C","yes"
|
256 |
+
139,3,"Osen, Mr. Olaf Elon","male",16.0,0,0,9.2167,"S","no"
|
257 |
+
606,3,"Lindell, Mr. Edvard Bengtsson","male",36.0,1,0,15.55,"S","no"
|
258 |
+
535,3,"Cacic, Miss. Marija","female",30.0,0,0,8.6625,"S","no"
|
259 |
+
221,3,"Sunderland, Mr. Victor Francis","male",16.0,0,0,8.05,"S","yes"
|
260 |
+
444,2,"Reynaldo, Ms. Encarnacion","female",28.0,0,0,13.0,"S","yes"
|
261 |
+
330,1,"Hippach, Miss. Jean Gertrude","female",16.0,0,1,57.9792,"C","yes"
|
262 |
+
805,3,"Hedman, Mr. Oskar Arvid","male",27.0,0,0,6.975,"S","yes"
|
263 |
+
55,1,"Ostby, Mr. Engelhart Cornelius","male",65.0,0,1,61.9792,"C","no"
|
264 |
+
528,1,"Farthing, Mr. John","male","_GSK_NA_",0,0,221.7792,"S","no"
|
265 |
+
359,3,"McGovern, Miss. Mary","female","_GSK_NA_",0,0,7.8792,"Q","yes"
|
266 |
+
354,3,"Arnold-Franchi, Mr. Josef","male",25.0,1,0,17.8,"S","no"
|
267 |
+
678,3,"Turja, Miss. Anna Sofia","female",18.0,0,0,9.8417,"S","yes"
|
268 |
+
273,2,"Mellinger, Mrs. (Elizabeth Anne Maidment)","female",41.0,0,1,19.5,"S","yes"
|
269 |
+
429,3,"Flynn, Mr. James","male","_GSK_NA_",0,0,7.75,"Q","no"
|
270 |
+
536,2,"Hart, Miss. Eva Miriam","female",7.0,0,2,26.25,"S","yes"
|
271 |
+
838,3,"Sirota, Mr. Maurice","male","_GSK_NA_",0,0,8.05,"S","no"
|
272 |
+
179,2,"Hale, Mr. Reginald","male",30.0,0,0,13.0,"S","no"
|
273 |
+
339,3,"Dahl, Mr. Karl Edwart","male",45.0,0,0,8.05,"S","yes"
|
274 |
+
724,2,"Hodges, Mr. Henry Price","male",50.0,0,0,13.0,"S","no"
|
275 |
+
524,1,"Hippach, Mrs. Louis Albert (Ida Sophia Fischer)","female",44.0,0,1,57.9792,"C","yes"
|
276 |
+
734,2,"Berriman, Mr. William John","male",23.0,0,0,13.0,"S","no"
|
277 |
+
164,3,"Calic, Mr. Jovo","male",17.0,0,0,8.6625,"S","no"
|
278 |
+
304,2,"Keane, Miss. Nora A","female","_GSK_NA_",0,0,12.35,"Q","yes"
|
279 |
+
356,3,"Vanden Steen, Mr. Leo Peter","male",28.0,0,0,9.5,"S","no"
|
280 |
+
436,1,"Carter, Miss. Lucile Polk","female",14.0,1,2,120.0,"S","yes"
|
281 |
+
622,1,"Kimball, Mr. Edwin Nelson Jr","male",42.0,1,0,52.5542,"S","yes"
|
282 |
+
551,1,"Thayer, Mr. John Borland Jr","male",17.0,0,2,110.8833,"C","yes"
|
283 |
+
109,3,"Rekic, Mr. Tido","male",38.0,0,0,7.8958,"S","no"
|
284 |
+
265,3,"Henry, Miss. Delia","female","_GSK_NA_",0,0,7.75,"Q","no"
|
285 |
+
628,1,"Longley, Miss. Gretchen Fiske","female",21.0,0,0,77.9583,"S","yes"
|
286 |
+
394,1,"Newell, Miss. Marjorie","female",23.0,1,0,113.275,"C","yes"
|
287 |
+
748,2,"Sinkkonen, Miss. Anna","female",30.0,0,0,13.0,"S","yes"
|
288 |
+
698,3,"Mullens, Miss. Katherine ""Katie""","female","_GSK_NA_",0,0,7.7333,"Q","yes"
|
289 |
+
66,3,"Moubarek, Master. Gerios","male","_GSK_NA_",1,1,15.2458,"C","yes"
|
290 |
+
681,3,"Peters, Miss. Katie","female","_GSK_NA_",0,0,8.1375,"Q","no"
|
291 |
+
663,1,"Colley, Mr. Edward Pomeroy","male",47.0,0,0,25.5875,"S","no"
|
292 |
+
158,3,"Corn, Mr. Harry","male",30.0,0,0,8.05,"S","no"
|
293 |
+
298,1,"Allison, Miss. Helen Loraine","female",2.0,1,2,151.55,"S","no"
|
294 |
+
674,2,"Wilhelms, Mr. Charles","male",31.0,0,0,13.0,"S","yes"
|
295 |
+
808,3,"Pettersson, Miss. Ellen Natalia","female",18.0,0,0,7.775,"S","no"
|
296 |
+
545,1,"Douglas, Mr. Walter Donald","male",50.0,1,0,106.425,"C","no"
|
297 |
+
338,1,"Burns, Miss. Elizabeth Margaret","female",41.0,0,0,134.5,"C","yes"
|
298 |
+
833,3,"Saad, Mr. Amin","male","_GSK_NA_",0,0,7.2292,"C","no"
|
299 |
+
94,3,"Dean, Mr. Bertram Frank","male",26.0,1,2,20.575,"S","no"
|
300 |
+
133,3,"Robins, Mrs. Alexander A (Grace Charity Laury)","female",47.0,1,0,14.5,"S","no"
|
301 |
+
383,3,"Tikkanen, Mr. Juho","male",32.0,0,0,7.925,"S","no"
|
302 |
+
720,3,"Johnson, Mr. Malkolm Joackim","male",33.0,0,0,7.775,"S","no"
|
303 |
+
739,3,"Ivanoff, Mr. Kanio","male","_GSK_NA_",0,0,7.8958,"S","no"
|
304 |
+
343,2,"Collander, Mr. Erik Gustaf","male",28.0,0,0,13.0,"S","no"
|
305 |
+
647,3,"Cor, Mr. Liudevit","male",19.0,0,0,7.8958,"S","no"
|
306 |
+
286,3,"Stankovic, Mr. Ivan","male",33.0,0,0,8.6625,"C","no"
|
307 |
+
743,1,"Ryerson, Miss. Susan Parker ""Suzette""","female",21.0,2,2,262.375,"C","yes"
|
308 |
+
371,1,"Harder, Mr. George Achilles","male",25.0,1,0,55.4417,"C","yes"
|
309 |
+
457,1,"Millet, Mr. Francis Davis","male",65.0,0,0,26.55,"S","no"
|
310 |
+
882,3,"Markun, Mr. Johann","male",33.0,0,0,7.8958,"S","no"
|
311 |
+
884,2,"Banfield, Mr. Frederick James","male",28.0,0,0,10.5,"S","no"
|
312 |
+
560,3,"de Messemaeker, Mrs. Guillaume Joseph (Emma)","female",36.0,1,0,17.4,"S","yes"
|
313 |
+
168,3,"Skoog, Mrs. William (Anna Bernhardina Karlsson)","female",45.0,1,4,27.9,"S","no"
|
314 |
+
636,2,"Davis, Miss. Mary","female",28.0,0,0,13.0,"S","yes"
|
315 |
+
885,3,"Sutehall, Mr. Henry Jr","male",25.0,0,0,7.05,"S","no"
|
316 |
+
131,3,"Drazenoic, Mr. Jozef","male",33.0,0,0,7.8958,"C","no"
|
317 |
+
505,1,"Maioni, Miss. Roberta","female",16.0,0,0,86.5,"S","yes"
|
318 |
+
332,1,"Partner, Mr. Austen","male",45.5,0,0,28.5,"S","no"
|
319 |
+
132,3,"Coelho, Mr. Domingos Fernandeo","male",20.0,0,0,7.05,"S","no"
|
320 |
+
500,3,"Svensson, Mr. Olof","male",24.0,0,0,7.7958,"S","no"
|
321 |
+
135,2,"Sobey, Mr. Samuel James Hayden","male",25.0,0,0,13.0,"S","no"
|
322 |
+
192,2,"Carbines, Mr. William","male",19.0,0,0,13.0,"S","no"
|
323 |
+
61,3,"Sirayanian, Mr. Orsen","male",22.0,0,0,7.2292,"C","no"
|
324 |
+
819,3,"Holm, Mr. John Fredrik Alexander","male",43.0,0,0,6.45,"S","no"
|
325 |
+
428,2,"Phillips, Miss. Kate Florence (""Mrs Kate Louise Phillips Marshall"")","female",19.0,0,0,26.0,"S","yes"
|
326 |
+
161,3,"Cribb, Mr. John Hatfield","male",44.0,0,1,16.1,"S","no"
|
327 |
+
117,3,"Connors, Mr. Patrick","male",70.5,0,0,7.75,"Q","no"
|
328 |
+
839,3,"Chip, Mr. Chang","male",32.0,0,0,56.4958,"S","yes"
|
329 |
+
861,3,"Hansen, Mr. Claus Peter","male",41.0,2,0,14.1083,"S","no"
|
330 |
+
688,3,"Dakic, Mr. Branko","male",19.0,0,0,10.1708,"S","no"
|
331 |
+
283,3,"de Pelsmaeker, Mr. Alfons","male",16.0,0,0,9.5,"S","no"
|
332 |
+
402,3,"Adams, Mr. John","male",26.0,0,0,8.05,"S","no"
|
333 |
+
843,1,"Serepeca, Miss. Augusta","female",30.0,0,0,31.0,"C","yes"
|
334 |
+
48,3,"O'Driscoll, Miss. Bridget","female","_GSK_NA_",0,0,7.75,"Q","yes"
|
335 |
+
770,3,"Gronnestad, Mr. Daniel Danielsen","male",32.0,0,0,8.3625,"S","no"
|
336 |
+
405,3,"Oreskovic, Miss. Marija","female",20.0,0,0,8.6625,"S","no"
|
337 |
+
874,3,"Vander Cruyssen, Mr. Victor","male",47.0,0,0,9.0,"S","no"
|
338 |
+
196,1,"Lurette, Miss. Elise","female",58.0,0,0,146.5208,"C","yes"
|
339 |
+
167,1,"Chibnall, Mrs. (Edith Martha Bowerman)","female","_GSK_NA_",0,1,55.0,"S","yes"
|
340 |
+
517,2,"Lemore, Mrs. (Amelia Milley)","female",34.0,0,0,10.5,"S","yes"
|
341 |
+
526,3,"Farrell, Mr. James","male",40.5,0,0,7.75,"Q","no"
|
342 |
+
473,2,"West, Mrs. Edwy Arthur (Ada Mary Worth)","female",33.0,1,2,27.75,"S","yes"
|
343 |
+
113,3,"Barton, Mr. David John","male",22.0,0,0,8.05,"S","no"
|
344 |
+
701,1,"Astor, Mrs. John Jacob (Madeleine Talmadge Force)","female",18.0,1,0,227.525,"C","yes"
|
345 |
+
369,3,"Jermyn, Miss. Annie","female","_GSK_NA_",0,0,7.75,"Q","yes"
|
346 |
+
779,3,"Kilgannon, Mr. Thomas J","male","_GSK_NA_",0,0,7.7375,"Q","no"
|
347 |
+
475,3,"Strandberg, Miss. Ida Sofia","female",22.0,0,0,9.8375,"S","no"
|
348 |
+
184,2,"Becker, Master. Richard F","male",1.0,2,1,39.0,"S","yes"
|
349 |
+
707,2,"Kelly, Mrs. Florence ""Fannie""","female",45.0,0,0,13.5,"S","yes"
|
350 |
+
136,2,"Richard, Mr. Emile","male",23.0,0,0,15.0458,"C","no"
|
351 |
+
865,2,"Gill, Mr. John William","male",24.0,0,0,13.0,"S","no"
|
352 |
+
364,3,"Asim, Mr. Adola","male",35.0,0,0,7.05,"S","no"
|
353 |
+
149,2,"Navratil, Mr. Michel (""Louis M Hoffman"")","male",36.5,0,2,26.0,"S","no"
|
354 |
+
789,3,"Dean, Master. Bertram Vere","male",1.0,1,2,20.575,"S","yes"
|
355 |
+
745,3,"Stranden, Mr. Juho","male",31.0,0,0,7.925,"S","yes"
|
356 |
+
293,2,"Levy, Mr. Rene Jacques","male",36.0,0,0,12.875,"C","no"
|
357 |
+
726,3,"Oreskovic, Mr. Luka","male",20.0,0,0,8.6625,"S","no"
|
358 |
+
679,3,"Goodwin, Mrs. Frederick (Augusta Tyler)","female",43.0,1,6,46.9,"S","no"
|
359 |
+
476,1,"Clifford, Mr. George Quincy","male","_GSK_NA_",0,0,52.0,"S","no"
|
360 |
+
157,3,"Gilnagh, Miss. Katherine ""Katie""","female",16.0,0,0,7.7333,"Q","yes"
|
361 |
+
875,2,"Abelson, Mrs. Samuel (Hannah Wizosky)","female",28.0,1,0,24.0,"C","yes"
|
362 |
+
193,3,"Andersen-Jensen, Miss. Carla Christine Nielsine","female",19.0,1,0,7.8542,"S","yes"
|
363 |
+
357,1,"Bowerman, Miss. Elsie Edith","female",22.0,0,1,55.0,"S","yes"
|
364 |
+
610,1,"Shutes, Miss. Elizabeth W","female",40.0,0,0,153.4625,"S","yes"
|
365 |
+
568,3,"Palsson, Mrs. Nils (Alma Cornelia Berglund)","female",29.0,0,4,21.075,"S","no"
|
366 |
+
634,1,"Parr, Mr. William Henry Marsh","male","_GSK_NA_",0,0,0.0,"S","no"
|
367 |
+
18,2,"Williams, Mr. Charles Eugene","male","_GSK_NA_",0,0,13.0,"S","yes"
|
368 |
+
751,2,"Wells, Miss. Joan","female",4.0,1,1,23.0,"S","yes"
|
369 |
+
128,3,"Madsen, Mr. Fridtjof Arne","male",24.0,0,0,7.1417,"S","yes"
|
370 |
+
38,3,"Cann, Mr. Ernest Charles","male",21.0,0,0,8.05,"S","no"
|
371 |
+
564,3,"Simmons, Mr. John","male","_GSK_NA_",0,0,8.05,"S","no"
|
372 |
+
224,3,"Nenkoff, Mr. Christo","male","_GSK_NA_",0,0,7.8958,"S","no"
|
373 |
+
266,2,"Reeves, Mr. David","male",36.0,0,0,10.5,"S","no"
|
374 |
+
397,3,"Olsson, Miss. Elina","female",31.0,0,0,7.8542,"S","no"
|
375 |
+
754,3,"Jonkoff, Mr. Lalio","male",23.0,0,0,7.8958,"S","no"
|
376 |
+
412,3,"Hart, Mr. Henry","male","_GSK_NA_",0,0,6.8583,"Q","no"
|
377 |
+
890,1,"Behr, Mr. Karl Howell","male",26.0,0,0,30.0,"C","yes"
|
378 |
+
709,1,"Cleaver, Miss. Alice","female",22.0,0,0,151.55,"S","yes"
|
379 |
+
818,2,"Mallet, Mr. Albert","male",31.0,1,1,37.0042,"C","no"
|
380 |
+
336,3,"Denkoff, Mr. Mitto","male","_GSK_NA_",0,0,7.8958,"S","no"
|
381 |
+
809,2,"Meyer, Mr. August","male",39.0,0,0,13.0,"S","no"
|
382 |
+
373,3,"Beavan, Mr. William Thomas","male",19.0,0,0,8.05,"S","no"
|
383 |
+
311,1,"Hays, Miss. Margaret Bechstein","female",24.0,0,0,83.1583,"C","yes"
|
384 |
+
181,3,"Sage, Miss. Constance Gladys","female","_GSK_NA_",8,2,69.55,"S","no"
|
385 |
+
392,3,"Jansson, Mr. Carl Olof","male",21.0,0,0,7.7958,"S","yes"
|
386 |
+
496,3,"Yousseff, Mr. Gerious","male","_GSK_NA_",0,0,14.4583,"C","no"
|
387 |
+
81,3,"Waelens, Mr. Achille","male",22.0,0,0,9.0,"S","no"
|
388 |
+
125,1,"White, Mr. Percival Wayland","male",54.0,0,1,77.2875,"S","no"
|
389 |
+
301,3,"Kelly, Miss. Anna Katherine ""Annie Kate""","female","_GSK_NA_",0,0,7.75,"Q","yes"
|
390 |
+
816,1,"Fry, Mr. Richard","male","_GSK_NA_",0,0,0.0,"S","no"
|
391 |
+
794,1,"Hoyt, Mr. William Fisher","male","_GSK_NA_",0,0,30.6958,"C","no"
|
392 |
+
867,2,"Duran y More, Miss. Asuncion","female",27.0,1,0,13.8583,"C","yes"
|
393 |
+
759,3,"Theobald, Mr. Thomas Leonard","male",34.0,0,0,8.05,"S","no"
|
394 |
+
793,3,"Sage, Miss. Stella Anna","female","_GSK_NA_",8,2,69.55,"S","no"
|
395 |
+
764,1,"Carter, Mrs. William Ernest (Lucile Polk)","female",36.0,1,2,120.0,"S","yes"
|
396 |
+
687,3,"Panula, Mr. Jaako Arnold","male",14.0,4,1,39.6875,"S","no"
|
397 |
+
246,1,"Minahan, Dr. William Edward","male",44.0,2,0,90.0,"Q","no"
|
398 |
+
309,2,"Abelson, Mr. Samuel","male",30.0,1,0,24.0,"C","no"
|
399 |
+
708,1,"Calderhead, Mr. Edward Pennington","male",42.0,0,0,26.2875,"S","yes"
|
400 |
+
848,3,"Markoff, Mr. Marin","male",35.0,0,0,7.8958,"C","no"
|
401 |
+
825,3,"Panula, Master. Urho Abraham","male",2.0,4,1,39.6875,"S","no"
|
402 |
+
690,1,"Madill, Miss. Georgette Alexandra","female",15.0,0,1,211.3375,"S","yes"
|
403 |
+
385,3,"Plotcharsky, Mr. Vasil","male","_GSK_NA_",0,0,7.8958,"S","no"
|
404 |
+
758,2,"Bailey, Mr. Percy Andrew","male",18.0,0,0,11.5,"S","no"
|
405 |
+
233,2,"Sjostedt, Mr. Ernst Adolf","male",59.0,0,0,13.5,"S","no"
|
406 |
+
651,3,"Mitkoff, Mr. Mito","male","_GSK_NA_",0,0,7.8958,"S","no"
|
407 |
+
616,2,"Herman, Miss. Alice","female",24.0,1,2,65.0,"S","yes"
|
408 |
+
19,3,"Vander Planke, Mrs. Julius (Emelia Maria Vandemoortele)","female",31.0,1,0,18.0,"S","no"
|
409 |
+
183,3,"Asplund, Master. Clarence Gustaf Hugo","male",9.0,4,2,31.3875,"S","no"
|
410 |
+
597,2,"Leitch, Miss. Jessie Wills","female","_GSK_NA_",0,0,33.0,"S","yes"
|
411 |
+
463,1,"Gee, Mr. Arthur H","male",47.0,0,0,38.5,"S","no"
|
412 |
+
67,2,"Nye, Mrs. (Elizabeth Ramell)","female",29.0,0,0,10.5,"S","yes"
|
413 |
+
788,3,"Rice, Master. George Hugh","male",8.0,4,1,29.125,"Q","no"
|
414 |
+
518,3,"Ryan, Mr. Patrick","male","_GSK_NA_",0,0,24.15,"Q","no"
|
415 |
+
104,3,"Johansson, Mr. Gustaf Joel","male",33.0,0,0,8.6542,"S","no"
|
416 |
+
729,2,"Bryhl, Mr. Kurt Arnold Gottfrid","male",25.0,1,0,26.0,"S","no"
|
417 |
+
8,3,"Palsson, Master. Gosta Leonard","male",2.0,3,1,21.075,"S","no"
|
418 |
+
812,3,"Lester, Mr. James","male",39.0,0,0,24.15,"S","no"
|
419 |
+
502,3,"Canavan, Miss. Mary","female",21.0,0,0,7.75,"Q","no"
|
420 |
+
614,3,"Horgan, Mr. John","male","_GSK_NA_",0,0,7.75,"Q","no"
|
421 |
+
34,2,"Wheadon, Mr. Edward H","male",66.0,0,0,10.5,"S","no"
|
422 |
+
294,3,"Haas, Miss. Aloisia","female",24.0,0,0,8.85,"S","no"
|
423 |
+
323,2,"Slayter, Miss. Hilda Mary","female",30.0,0,0,12.35,"Q","yes"
|
424 |
+
652,2,"Doling, Miss. Elsie","female",18.0,0,1,23.0,"S","yes"
|
425 |
+
827,3,"Lam, Mr. Len","male","_GSK_NA_",0,0,56.4958,"S","no"
|
426 |
+
331,3,"McCoy, Miss. Agnes","female","_GSK_NA_",2,0,23.25,"Q","yes"
|
427 |
+
439,1,"Fortune, Mr. Mark","male",64.0,1,4,263.0,"S","no"
|
428 |
+
798,3,"Osman, Mrs. Mara","female",31.0,0,0,8.6833,"S","yes"
|
429 |
+
623,3,"Nakid, Mr. Sahid","male",20.0,1,1,15.7417,"C","yes"
|
430 |
+
276,1,"Andrews, Miss. Kornelia Theodosia","female",63.0,1,0,77.9583,"S","yes"
|
431 |
+
78,3,"Moutal, Mr. Rahamin Haim","male","_GSK_NA_",0,0,8.05,"S","no"
|
432 |
+
742,1,"Cavendish, Mr. Tyrell William","male",36.0,1,0,78.85,"S","no"
|
433 |
+
370,1,"Aubart, Mme. Leontine Pauline","female",24.0,0,0,69.3,"C","yes"
|
434 |
+
425,3,"Rosblom, Mr. Viktor Richard","male",18.0,1,1,20.2125,"S","no"
|
435 |
+
189,3,"Bourke, Mr. John","male",40.0,1,1,15.5,"Q","no"
|
436 |
+
143,3,"Hakkarainen, Mrs. Pekka Pietari (Elin Matilda Dolck)","female",24.0,1,0,15.85,"S","yes"
|
437 |
+
627,2,"Kirkland, Rev. Charles Leonard","male",57.0,0,0,12.35,"Q","no"
|
438 |
+
703,3,"Barbara, Miss. Saiide","female",18.0,0,1,14.4542,"C","no"
|
439 |
+
638,2,"Collyer, Mr. Harvey","male",31.0,1,1,26.25,"S","no"
|
440 |
+
549,3,"Goldsmith, Mr. Frank John","male",33.0,1,1,20.525,"S","no"
|
441 |
+
43,3,"Kraeff, Mr. Theodor","male","_GSK_NA_",0,0,7.8958,"C","no"
|
442 |
+
68,3,"Crease, Mr. Ernest James","male",19.0,0,0,8.1583,"S","no"
|
443 |
+
756,2,"Hamalainen, Master. Viljo","male",0.67,1,1,14.5,"S","yes"
|
444 |
+
443,3,"Petterson, Mr. Johan Emil","male",25.0,1,0,7.775,"S","no"
|
445 |
+
472,3,"Cacic, Mr. Luka","male",38.0,0,0,8.6625,"S","no"
|
446 |
+
696,2,"Chapman, Mr. Charles Henry","male",52.0,0,0,13.5,"S","no"
|
447 |
+
665,3,"Lindqvist, Mr. Eino William","male",20.0,1,0,7.925,"S","yes"
|
cicd/examples/github/train.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import giskard
|
2 |
+
import yaml
|
3 |
+
|
4 |
+
path_to_config = __file__.split("train.py")[0]+"cicd_config.yaml"
|
5 |
+
with open(path_to_config) as yaml_f:
|
6 |
+
cicd_config = yaml.load(yaml_f, Loader=yaml.Loader)
|
7 |
+
|
8 |
+
# Replace this with your own data & model creation.
|
9 |
+
df = giskard.demo.titanic_df()
|
10 |
+
data_preprocessor, clf = giskard.demo.titanic_pipeline()
|
11 |
+
|
12 |
+
# Wrap your Pandas DataFrame with Giskard.Dataset (test set, a golden dataset, etc.). Check the dedicated doc page: https://docs.giskard.ai/en/latest/guides/wrap_dataset/index.html
|
13 |
+
giskard_dataset = giskard.Dataset(
|
14 |
+
df=df, # A pandas.DataFrame that contains the raw data (before all the pre-processing steps) and the actual ground truth variable (target).
|
15 |
+
target="Survived", # Ground truth variable
|
16 |
+
name="Titanic dataset", # Optional
|
17 |
+
cat_columns=['Pclass', 'Sex', "SibSp", "Parch", "Embarked"] # Optional, but is a MUST if available. Inferred automatically if not.
|
18 |
+
)
|
19 |
+
|
20 |
+
# Wrap your model with Giskard.Model. Check the dedicated doc page: https://docs.giskard.ai/en/latest/guides/wrap_model/index.html
|
21 |
+
# you can use any tabular, text or LLM models (PyTorch, HuggingFace, LangChain, etc.),
|
22 |
+
# for classification, regression & text generation.
|
23 |
+
def prediction_function(df):
|
24 |
+
# The pre-processor can be a pipeline of one-hot encoding, imputer, scaler, etc.
|
25 |
+
preprocessed_df = data_preprocessor(df)
|
26 |
+
return clf.predict_proba(preprocessed_df)
|
27 |
+
|
28 |
+
giskard_model = giskard.Model(
|
29 |
+
model=prediction_function, # A prediction function that encapsulates all the data pre-processing steps and that could be executed with the dataset used by the scan.
|
30 |
+
model_type="classification", # Either regression, classification or text_generation.
|
31 |
+
name="Titanic model", # Optional
|
32 |
+
classification_labels=clf.classes_, # Their order MUST be identical to the prediction_function's output order
|
33 |
+
feature_names=['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked'], # Default: all columns of your dataset
|
34 |
+
# classification_threshold=0.5, # Default: 0.5
|
35 |
+
)
|
36 |
+
|
37 |
+
from giskard_cicd.utils import dump_model_and_dataset_for_cicd
|
38 |
+
dump_model_and_dataset_for_cicd(cicd_config["artifact_path"], giskard_model, giskard_dataset)
|
cicd/giskard_cicd/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
from .utils import dump_model_and_dataset_for_cicd
|
2 |
+
|
3 |
+
__all__ = ["dump_model_and_dataset_for_cicd"]
|
cicd/giskard_cicd/loaders/__init__.py
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from giskard_cicd.loaders.github_loader import GithubLoader
|
2 |
+
from giskard_cicd.loaders.huggingface_loader import HuggingFaceLoader
|
3 |
+
from giskard_cicd.loaders.base_loader import BaseLoader
|
4 |
+
|
5 |
+
__all__ = ["GithubLoader", "HuggingFaceLoader", "BaseLoader"]
|
cicd/giskard_cicd/loaders/base_loader.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Load models and datasets from Github."""
|
2 |
+
|
3 |
+
import logging
|
4 |
+
from abc import ABC, abstractmethod
|
5 |
+
|
6 |
+
from giskard.models.base import BaseModel
|
7 |
+
from giskard.core.model_validation import validate_model
|
8 |
+
from giskard import Dataset
|
9 |
+
|
10 |
+
logger = logging.getLogger(__name__)
|
11 |
+
|
12 |
+
|
13 |
+
class LoaderError(RuntimeError):
|
14 |
+
"""Could not load the model and/or dataset."""
|
15 |
+
|
16 |
+
|
17 |
+
class DatasetError(LoaderError):
|
18 |
+
"""Problems related to the dataset."""
|
19 |
+
|
20 |
+
|
21 |
+
class ModelError(LoaderError):
|
22 |
+
"""Problems related to the model."""
|
23 |
+
|
24 |
+
|
25 |
+
class BaseLoader(ABC):
|
26 |
+
|
27 |
+
@abstractmethod
|
28 |
+
def load_giskard_model_dataset(self) -> (BaseModel, Dataset):
|
29 |
+
...
|
30 |
+
|
31 |
+
def validate(self):
|
32 |
+
gsk_model, gsk_dataset = self.load_giskard_model_dataset()
|
33 |
+
validate_model(gsk_model, gsk_dataset)
|
cicd/giskard_cicd/loaders/github_loader.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import yaml
|
2 |
+
from giskard import Dataset, Model
|
3 |
+
from giskard.models.base import BaseModel
|
4 |
+
from giskard.ml_worker.utils.file_utils import get_file_name
|
5 |
+
|
6 |
+
from .base_loader import BaseLoader
|
7 |
+
from pathlib import Path
|
8 |
+
from giskard.core.core import DatasetMeta
|
9 |
+
|
10 |
+
|
11 |
+
class GithubLoader(BaseLoader):
|
12 |
+
#TODO: change the way dataset is loaded, factor out some of the logic contained in Dataset.download()
|
13 |
+
def load_giskard_model_dataset(self, model, dataset) -> (BaseModel, Dataset):
|
14 |
+
with open(Path(dataset) / "giskard-dataset-meta.yaml") as f:
|
15 |
+
saved_meta = yaml.load(f, Loader=yaml.Loader)
|
16 |
+
meta = DatasetMeta(
|
17 |
+
name=saved_meta["name"],
|
18 |
+
target=saved_meta["target"],
|
19 |
+
column_types=saved_meta["column_types"],
|
20 |
+
column_dtypes=saved_meta["column_dtypes"],
|
21 |
+
number_of_rows=saved_meta["number_of_rows"],
|
22 |
+
category_features=saved_meta["category_features"],
|
23 |
+
)
|
24 |
+
|
25 |
+
df = Dataset.load(Path(dataset) / get_file_name("data", "csv.zst", False))
|
26 |
+
df = Dataset.cast_column_to_dtypes(df, meta.column_dtypes)
|
27 |
+
|
28 |
+
return Model.load(model), Dataset(
|
29 |
+
df=df,
|
30 |
+
name=meta.name,
|
31 |
+
target=meta.target,
|
32 |
+
column_types=meta.column_types,
|
33 |
+
)
|
cicd/giskard_cicd/loaders/huggingface_loader.py
ADDED
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Load models and datasets from the HuggingFace hub."""
|
2 |
+
|
3 |
+
import logging
|
4 |
+
import time
|
5 |
+
|
6 |
+
import datasets
|
7 |
+
import giskard as gsk
|
8 |
+
import huggingface_hub
|
9 |
+
import torch
|
10 |
+
from giskard import Dataset
|
11 |
+
from giskard.models.base import BaseModel
|
12 |
+
from giskard.models.huggingface import HuggingFaceModel
|
13 |
+
from transformers.pipelines import TextClassificationPipeline
|
14 |
+
import pandas as pd
|
15 |
+
from .base_loader import BaseLoader, DatasetError
|
16 |
+
|
17 |
+
logger = logging.getLogger(__name__)
|
18 |
+
|
19 |
+
|
20 |
+
class HuggingFaceLoader(BaseLoader):
|
21 |
+
|
22 |
+
def __init__(self, device=None):
|
23 |
+
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
|
24 |
+
|
25 |
+
def _find_dataset_id_from_model(self, model_id):
|
26 |
+
"""Find the dataset ID from the model metadata."""
|
27 |
+
model_card = huggingface_hub.model_info(model_id).cardData
|
28 |
+
|
29 |
+
if "datasets" not in model_card:
|
30 |
+
msg = f"Could not find dataset for model `{model_id}`."
|
31 |
+
raise DatasetError(msg)
|
32 |
+
|
33 |
+
# Take the first one
|
34 |
+
dataset_id = model_card["datasets"][0]
|
35 |
+
return dataset_id
|
36 |
+
|
37 |
+
def load_giskard_model_dataset(self, model, dataset=None, dataset_config=None, dataset_split=None):
|
38 |
+
# If no dataset was provided, we try to get it from the model metadata.
|
39 |
+
if dataset is None:
|
40 |
+
logger.debug("No dataset provided. Trying to get it from the model metadata.")
|
41 |
+
dataset = self._find_dataset_id_from_model(model)
|
42 |
+
logger.debug(f"Found dataset `{dataset}`.")
|
43 |
+
|
44 |
+
# Loading the model is easy. What is complicated is to get the dataset.
|
45 |
+
# So we start by trying to get the dataset, because if we fail, we don't
|
46 |
+
# want to waste time downloading the model.
|
47 |
+
hf_dataset = self.load_dataset(dataset, dataset_config, dataset_split, model)
|
48 |
+
|
49 |
+
# Load the model.
|
50 |
+
hf_model = self.load_model(model)
|
51 |
+
|
52 |
+
# Check that the dataset has the good feature names for the task.
|
53 |
+
feature_mapping = self._get_feature_mapping(hf_model, hf_dataset)
|
54 |
+
|
55 |
+
df = self._flatten_hf_dataset(hf_dataset, dataset_split)
|
56 |
+
df = pd.DataFrame(df).rename(columns={v: k for k, v in feature_mapping.items()})
|
57 |
+
|
58 |
+
# remove rows with multiple labels
|
59 |
+
# this is a hacky way to do it
|
60 |
+
# we do not support multi-label classification for now
|
61 |
+
if "label" in df and isinstance(df.label[0], list):
|
62 |
+
df = df[df.apply(lambda row: len(row['label']) == 1, axis=1)]
|
63 |
+
else:
|
64 |
+
print(df)
|
65 |
+
# @TODO: currently for classification models only.
|
66 |
+
id2label = hf_model.model.config.id2label
|
67 |
+
|
68 |
+
if "label" in df and isinstance(df.label[0], list):
|
69 |
+
# need to include all labels
|
70 |
+
# rewrite this lambda function to include all labels
|
71 |
+
df.label = df.label.apply(lambda x: id2label[x[0]])
|
72 |
+
else:
|
73 |
+
# TODO: when the label for test is not provided, what do we do?
|
74 |
+
df["label"] = df.label.apply(lambda x: id2label[x] if x >= 0 else "-1")
|
75 |
+
# map the list of label ids to the list of labels
|
76 |
+
# df["label"] = df.label.apply(lambda x: [id2label[i] for i in x])
|
77 |
+
gsk_dataset = gsk.Dataset(df, target="label", column_types={"text": "text"}, validation=False)
|
78 |
+
|
79 |
+
gsk_model = HuggingFaceModel(
|
80 |
+
hf_model,
|
81 |
+
model_type="classification",
|
82 |
+
data_preprocessing_function=lambda df: df.text.tolist(),
|
83 |
+
classification_labels=[id2label[i] for i in range(len(id2label))],
|
84 |
+
batch_size=None,
|
85 |
+
device=self.device,
|
86 |
+
)
|
87 |
+
|
88 |
+
# Optimize batch size
|
89 |
+
if self.device.startswith("cuda"):
|
90 |
+
gsk_model.batch_size = self._find_optimal_batch_size(gsk_model, gsk_dataset)
|
91 |
+
|
92 |
+
return gsk_model, gsk_dataset
|
93 |
+
|
94 |
+
def load_dataset(self, dataset_id, dataset_config=None, dataset_split=None, model_id=None):
|
95 |
+
print(f"Loading dataset {dataset_id} with config {dataset_config} and split {dataset_split}")
|
96 |
+
"""Load a dataset from the HuggingFace Hub."""
|
97 |
+
logger.debug(f"Trying to load dataset `{dataset_id}` (config = `{dataset_config}`, split = `{dataset_split}`).")
|
98 |
+
try:
|
99 |
+
# we do not set the split here
|
100 |
+
# because we want to be able to select the best split later with preprocessing
|
101 |
+
hf_dataset = datasets.load_dataset(dataset_id, name=dataset_config)
|
102 |
+
if dataset_split is None:
|
103 |
+
dataset_split = self._select_best_dataset_split(list(hf_dataset.keys()))
|
104 |
+
logger.debug(f"No split provided, automatically selected split = `{dataset_split}`).")
|
105 |
+
hf_dataset = hf_dataset[dataset_split]
|
106 |
+
|
107 |
+
return hf_dataset
|
108 |
+
except ValueError as err:
|
109 |
+
msg = f"Could not load dataset `{dataset_id}` with config `{dataset_config}`."
|
110 |
+
raise DatasetError(msg) from err
|
111 |
+
|
112 |
+
def load_model(self, model_id):
|
113 |
+
from transformers import pipeline
|
114 |
+
|
115 |
+
task = huggingface_hub.model_info(model_id).pipeline_tag
|
116 |
+
|
117 |
+
return pipeline(task=task, model=model_id, device=self.device)
|
118 |
+
|
119 |
+
def _get_dataset_features(self, hf_dataset):
|
120 |
+
'''
|
121 |
+
Recursively get the features of the dataset
|
122 |
+
'''
|
123 |
+
dataset_features = {}
|
124 |
+
try:
|
125 |
+
dataset_features = hf_dataset.features
|
126 |
+
return dataset_features
|
127 |
+
except AttributeError:
|
128 |
+
print("hf_dataset.features not found")
|
129 |
+
if isinstance(hf_dataset, datasets.DatasetDict):
|
130 |
+
keys = list(hf_dataset.keys())
|
131 |
+
return self._get_dataset_features(hf_dataset[keys[0]])
|
132 |
+
|
133 |
+
def _flatten_hf_dataset(self, hf_dataset, data_split=None):
|
134 |
+
'''
|
135 |
+
Flatten the dataset to a pandas dataframe
|
136 |
+
'''
|
137 |
+
flat_dataset = pd.DataFrame()
|
138 |
+
if isinstance(hf_dataset, datasets.DatasetDict):
|
139 |
+
keys = list(hf_dataset.keys())
|
140 |
+
|
141 |
+
for k in keys:
|
142 |
+
if k.startswith("train"):
|
143 |
+
continue
|
144 |
+
elif k.startswith(data_split):
|
145 |
+
# TODO: only support one split for now
|
146 |
+
# Maybe we can merge all the datasets into one
|
147 |
+
flat_dataset = hf_dataset[k]
|
148 |
+
break
|
149 |
+
else:
|
150 |
+
flat_dataset = hf_dataset[k]
|
151 |
+
|
152 |
+
# If there are only train datasets
|
153 |
+
if isinstance(flat_dataset, pd.DataFrame) and flat_dataset.empty:
|
154 |
+
flat_dataset = hf_dataset[keys[0]]
|
155 |
+
|
156 |
+
return flat_dataset
|
157 |
+
|
158 |
+
def _get_feature_mapping(self, hf_model, hf_dataset):
|
159 |
+
if isinstance(hf_model, TextClassificationPipeline):
|
160 |
+
task_features = {"text": "string", "label": "class_label"}
|
161 |
+
else:
|
162 |
+
print(type(hf_model))
|
163 |
+
msg = "Unsupported model type."
|
164 |
+
raise NotImplementedError(msg)
|
165 |
+
|
166 |
+
dataset_features = self._get_dataset_features(hf_dataset)
|
167 |
+
print(dataset_features)
|
168 |
+
# map features
|
169 |
+
feature_mapping = {}
|
170 |
+
for f in set(dataset_features):
|
171 |
+
if f in task_features:
|
172 |
+
feature_mapping[f] = f
|
173 |
+
else:
|
174 |
+
for t in task_features:
|
175 |
+
if f.startswith(t):
|
176 |
+
feature_mapping[t] = f
|
177 |
+
|
178 |
+
if not set(task_features) - set(feature_mapping):
|
179 |
+
return feature_mapping
|
180 |
+
else:
|
181 |
+
# If not, we try to find a suitable mapping by matching types.
|
182 |
+
return self._amend_missing_features(task_features, dataset_features, feature_mapping)
|
183 |
+
|
184 |
+
def _amend_missing_features(self, task_features, dataset_features, feature_mapping):
|
185 |
+
'''
|
186 |
+
Question: what is this code doing?
|
187 |
+
'''
|
188 |
+
available_features = set(dataset_features) - set(feature_mapping)
|
189 |
+
missing_features = set(task_features) - set(feature_mapping)
|
190 |
+
|
191 |
+
for feature in missing_features:
|
192 |
+
expected_type = task_features[feature]
|
193 |
+
if expected_type == "class_label":
|
194 |
+
candidates = [f for f in available_features if isinstance(dataset_features[f], datasets.ClassLabel)]
|
195 |
+
else:
|
196 |
+
candidates = [f for f in available_features if dataset_features[f].dtype == expected_type]
|
197 |
+
|
198 |
+
# If we have more than one match, it`s not possible to know which one is the good one.
|
199 |
+
if len(candidates) != 1:
|
200 |
+
msg = f"Could not find a suitable mapping for feature for `{feature}`."
|
201 |
+
raise RuntimeError(msg)
|
202 |
+
|
203 |
+
feature_mapping[feature] = candidates[0]
|
204 |
+
available_features.remove(candidates[0])
|
205 |
+
return feature_mapping
|
206 |
+
|
207 |
+
def _select_best_dataset_split(self, split_names):
|
208 |
+
"""Get the best split for testing.
|
209 |
+
|
210 |
+
Selects the split `test` if available, otherwise `validation`, and as a last resort `train`.
|
211 |
+
If there is only one split, we return that split.
|
212 |
+
"""
|
213 |
+
# If only one split is available, we just use that one.
|
214 |
+
if len(split_names) == 1:
|
215 |
+
return split_names[0]
|
216 |
+
|
217 |
+
# Otherwise iterate based on the preferred prefixes.
|
218 |
+
for prefix in ["test", "valid", "train"]:
|
219 |
+
try:
|
220 |
+
return next(x for x in split_names if x.startswith(prefix))
|
221 |
+
except StopIteration:
|
222 |
+
pass
|
223 |
+
|
224 |
+
return None
|
225 |
+
|
226 |
+
def _find_optimal_batch_size(self, model: BaseModel, dataset: Dataset):
|
227 |
+
"""Find the optimal batch size for the model and dataset."""
|
228 |
+
initial_batch_size = model.batch_size
|
229 |
+
try:
|
230 |
+
model.batch_size = 1
|
231 |
+
inference_time = float("inf")
|
232 |
+
while True:
|
233 |
+
num_runs = min(30, len(dataset) // model.batch_size)
|
234 |
+
num_samples = num_runs * model.batch_size
|
235 |
+
if num_runs == 0:
|
236 |
+
return model.batch_size // 2
|
237 |
+
|
238 |
+
ds_slice = dataset.slice(lambda df: df.sample(num_samples), row_level=False)
|
239 |
+
|
240 |
+
t_start = time.perf_counter_ns()
|
241 |
+
try:
|
242 |
+
with gsk.models.cache.no_cache():
|
243 |
+
model.predict(ds_slice)
|
244 |
+
except RuntimeError:
|
245 |
+
return model.batch_size // 2
|
246 |
+
elapsed = time.perf_counter_ns() - t_start
|
247 |
+
|
248 |
+
time_per_sample = elapsed / (num_samples)
|
249 |
+
if time_per_sample > inference_time:
|
250 |
+
return model.batch_size // 2
|
251 |
+
inference_time = time_per_sample
|
252 |
+
model.batch_size *= 2
|
253 |
+
finally:
|
254 |
+
model.batch_size = initial_batch_size
|
cicd/giskard_cicd/pipeline/runner.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import yaml
|
2 |
+
import giskard as gsk
|
3 |
+
import time
|
4 |
+
|
5 |
+
|
6 |
+
class PipelineReport:
|
7 |
+
def __init__(self, scan_result):
|
8 |
+
self.scan_result = scan_result
|
9 |
+
|
10 |
+
def to_html(self):
|
11 |
+
return self.scan_result.to_html()
|
12 |
+
|
13 |
+
def to_markdown(self, template):
|
14 |
+
return self.scan_result.to_markdown(template="github")
|
15 |
+
|
16 |
+
|
17 |
+
class PipelineRunner:
|
18 |
+
def __init__(self, loaders):
|
19 |
+
self.loaders = loaders
|
20 |
+
|
21 |
+
def run(self, loader_id, **kwargs):
|
22 |
+
|
23 |
+
# Get the loader
|
24 |
+
loader = self.loaders[loader_id]
|
25 |
+
|
26 |
+
# Get scan configuration
|
27 |
+
scan_config_path = kwargs.pop("scan_config", None)
|
28 |
+
params, detectors = None, None
|
29 |
+
if scan_config_path is not None:
|
30 |
+
with open(scan_config_path) as yaml_f:
|
31 |
+
scan_config = yaml.load(yaml_f, Loader=yaml.Loader)
|
32 |
+
params = dict(scan_config.get("configuration", None))
|
33 |
+
detectors = list(scan_config.get("detectors", None))
|
34 |
+
|
35 |
+
start = time.time()
|
36 |
+
# Load the model and dataset
|
37 |
+
gsk_model, gsk_dataset = loader.load_giskard_model_dataset(**kwargs)
|
38 |
+
print(f"Loading took {time.time() - start:.2f}s")
|
39 |
+
|
40 |
+
start = time.time()
|
41 |
+
# Run the scanner
|
42 |
+
scan_result = gsk.scan(gsk_model, gsk_dataset, params=params, only=detectors)
|
43 |
+
print(f"Scanning took {time.time() - start:.2f}s")
|
44 |
+
|
45 |
+
# Report
|
46 |
+
report = PipelineReport(scan_result)
|
47 |
+
|
48 |
+
return report
|
cicd/giskard_cicd/utils.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pathlib
|
2 |
+
|
3 |
+
|
4 |
+
def dump_model_and_dataset_for_cicd(artifact_path, giskard_model, giskard_dataset):
|
5 |
+
from giskard.core.model_validation import validate_model, validate_model_loading_and_saving
|
6 |
+
|
7 |
+
try:
|
8 |
+
reloaded_model = validate_model_loading_and_saving(giskard_model)
|
9 |
+
except Exception as e:
|
10 |
+
raise Exception("An issue occured during the serialization/deserialization of your model. Please submit the traceback as a GitHub issue in the following "
|
11 |
+
"repository for further assistance: https://github.com/Giskard-AI/giskard.") from e
|
12 |
+
try:
|
13 |
+
validate_model(reloaded_model, giskard_dataset)
|
14 |
+
except Exception as e:
|
15 |
+
raise Exception("An issue occured during the validation of your model. Please submit the traceback as a GitHub issue in the following "
|
16 |
+
"repository for further assistance: https://github.com/Giskard-AI/giskard.") from e
|
17 |
+
|
18 |
+
pathlib.Path(artifact_path).mkdir(parents=True, exist_ok=True)
|
19 |
+
pathlib.Path(artifact_path+'/artifacts').mkdir(parents=True, exist_ok=True)
|
20 |
+
pathlib.Path(artifact_path+'/artifacts/dataset').mkdir(parents=True, exist_ok=True)
|
21 |
+
pathlib.Path(artifact_path+'/artifacts/model').mkdir(parents=True, exist_ok=True)
|
22 |
+
|
23 |
+
#TODO: change the Dataset.save() method to be like Model.save(), i.e. without the id requirement
|
24 |
+
giskard_dataset.save(pathlib.Path(artifact_path+"/artifacts/dataset"), 0)
|
25 |
+
giskard_model.save(pathlib.Path(artifact_path+"/artifacts/model"))
|
26 |
+
print("Your model and dataset are successfully dumped for CI/CD.")
|
cicd/pyproject.toml
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[build-system]
|
2 |
+
requires = ["setuptools"]
|
3 |
+
build-backend = "setuptools.build_meta"
|
4 |
+
[project]
|
5 |
+
name = "giskard_cicd"
|
6 |
+
readme = "README.md"
|
7 |
+
dependencies = [
|
8 |
+
"datasets",
|
9 |
+
"giskard >= 2.0.0b",
|
10 |
+
"huggingface_hub",
|
11 |
+
"torch",
|
12 |
+
"transformers",
|
13 |
+
]
|
14 |
+
requires-python = ">=3.9"
|
cicd/readme.md
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Giskard CI/CD runner (WIP)
|
2 |
+
|
3 |
+
## Overview
|
4 |
+
|
5 |
+
The idea is to have a common CI/CD core that can interface with different input sources (loaders) and output destinations (reporters).
|
6 |
+
|
7 |
+
The **core** is responsible for running the tests and generating a report.
|
8 |
+
|
9 |
+
The **loaders** are responsible for loading the model and dataset, wrapped as Giskard objects, from a given source (for example the HuggingFace hub, a Github repository, etc.).
|
10 |
+
|
11 |
+
The **reporters** are responsible for sending the report to the appropriate destination (e.g. a comment to a Github PR, a HuggingFace discussion, etc.).
|
12 |
+
|
13 |
+
|
14 |
+
### Tasks
|
15 |
+
|
16 |
+
Task could be data objects containing all the information needed to run a CI/CD pipeline. For example:
|
17 |
+
|
18 |
+
```json
|
19 |
+
{
|
20 |
+
"loader_id": "huggingface",
|
21 |
+
"model": "distilbert-base-uncased",
|
22 |
+
"dataset": "sst2",
|
23 |
+
"loader_args": {
|
24 |
+
"dataset_split": "validation",
|
25 |
+
},
|
26 |
+
"reporter_id": "huggingface_discussion",
|
27 |
+
"reporter_args": {
|
28 |
+
"discussion_id": 1234,
|
29 |
+
}
|
30 |
+
}
|
31 |
+
```
|
32 |
+
|
33 |
+
or
|
34 |
+
|
35 |
+
|
36 |
+
```json
|
37 |
+
{
|
38 |
+
"loader_id": "github",
|
39 |
+
"model": "my.package::load_model",
|
40 |
+
"dataset": "my.package::load_test_dataset",
|
41 |
+
"loader_args": {
|
42 |
+
"repository": "My-Organization/my_project",
|
43 |
+
"branch": "dev-test2",
|
44 |
+
},
|
45 |
+
"reporter_id": "github_pr",
|
46 |
+
"reported_args": {
|
47 |
+
"repository": "My-Organization/my_project",
|
48 |
+
"pr_id": 1234,
|
49 |
+
}
|
50 |
+
}
|
51 |
+
```
|
52 |
+
|
53 |
+
These tasks may be generated by a watcher (e.g. a Github action, a HuggingFace webhook, etc.) and put in a queue. The CI/CD runner will then pick them up and run the pipeline.
|
54 |
+
|
55 |
+
Otherwise, a single task can be created to run a single-shot Github action, without queueing.
|
56 |
+
|
57 |
+
|
58 |
+
### CI/CD Core
|
59 |
+
|
60 |
+
In pseudocode, the CI/CD core could look like this:
|
61 |
+
|
62 |
+
```python
|
63 |
+
task = get_task_from_queue_or_envirnoment()
|
64 |
+
|
65 |
+
loader = get_loader(task.loader_id)
|
66 |
+
gsk_model, gsk_dataset = loader.load_model_dataset(
|
67 |
+
task.model,
|
68 |
+
task.dataset,
|
69 |
+
**task.loader_args,
|
70 |
+
)
|
71 |
+
|
72 |
+
runner = PipelineRunner()
|
73 |
+
report = runner.run(gsk_model, gsk_dataset)
|
74 |
+
|
75 |
+
reporter = get_reporter(task.reporter_id)
|
76 |
+
reporter.push_report(report, **task.reporter_args)
|
77 |
+
```
|
78 |
+
|
79 |
+
## Prototype
|
80 |
+
|
81 |
+
Current implementation has two loaders:
|
82 |
+
- The `github` loader which can be run from the command line (after running `python train.py` in `examples/github`):
|
83 |
+
|
84 |
+
```bash
|
85 |
+
$ python cli.py --loader github --model examples/github/artifacts/model --dataset examples/github/artifacts/dataset
|
86 |
+
```
|
87 |
+
|
88 |
+
- The `huggingface` loader which can be run from the command line:
|
89 |
+
|
90 |
+
```bash
|
91 |
+
$ python cli.py --loader huggingface --model distilbert-base-uncased-finetuned-sst-2-english --dataset_split validation --output demo_report.html
|
92 |
+
```
|
93 |
+
|
94 |
+
- Automatically post to discussion area for a given repo
|
95 |
+
```bash
|
96 |
+
$ python cli.py --loader huggingface --model distilbert-base-uncased-finetuned-sst-2-english --dataset_split validation --output_format markdown --output_portal huggingface --discussion_repo [REPO_ID] --hf_token [HF_TOKEN]
|
97 |
+
```
|
98 |
+
|
99 |
+
This will launch a pipeline that will load the model and dataset from the HuggingFace hub, run the scan and generate a report in HTML format (for now).
|
cicd/retriever.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import huggingface_hub
|
3 |
+
|
4 |
+
|
5 |
+
def model_has_dataset(model):
|
6 |
+
for tag in model.tags:
|
7 |
+
if tag.startswith("dataset:"):
|
8 |
+
return True
|
9 |
+
return False
|
10 |
+
|
11 |
+
|
12 |
+
if __name__ == "__main__":
|
13 |
+
parser = argparse.ArgumentParser(
|
14 |
+
prog="Giskard Retriever", description="Retrieves HF models that are bound to datasets."
|
15 |
+
)
|
16 |
+
parser.add_argument(
|
17 |
+
"--model_type",
|
18 |
+
help="Hugging Face model types. default: text-classification",
|
19 |
+
required=False,
|
20 |
+
)
|
21 |
+
parser.add_argument("--output_format",
|
22 |
+
help="Format of the information retrieved. Default: parquet. Options: parquet, csv, json.")
|
23 |
+
|
24 |
+
args = parser.parse_args()
|
25 |
+
|
26 |
+
MODEL_TYPE = args.model_type if args.model_type is not None else "text-classification"
|
27 |
+
|
28 |
+
models_with_dataset = filter(
|
29 |
+
model_has_dataset, huggingface_hub.list_models(filter=MODEL_TYPE, sort="likes", direction=-1)
|
30 |
+
)
|
31 |
+
|
32 |
+
import pandas as pd
|
33 |
+
|
34 |
+
df = pd.DataFrame(
|
35 |
+
[
|
36 |
+
{
|
37 |
+
"modelId": m.modelId,
|
38 |
+
"modelType": MODEL_TYPE,
|
39 |
+
"author": m.author,
|
40 |
+
"downloads": m.downloads,
|
41 |
+
"likes": m.likes,
|
42 |
+
"datasets": [t[8:] for t in m.tags if t.startswith("dataset:")],
|
43 |
+
}
|
44 |
+
for m in models_with_dataset
|
45 |
+
]
|
46 |
+
)
|
47 |
+
|
48 |
+
output_format = args.output_format
|
49 |
+
|
50 |
+
if output_format is None or output_format == "parquet":
|
51 |
+
df.to_parquet(f"models_{MODEL_TYPE}.parquet", index=False)
|
52 |
+
elif output_format == "csv":
|
53 |
+
df.to_csv(f"models_{MODEL_TYPE}.csv", columns=df.columns, index=False)
|
54 |
+
elif output_format == "json":
|
55 |
+
df.to_json(f"models_{MODEL_TYPE}.json", index=False)
|
cicd/scan_config_template.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
detectors:
|
2 |
+
- ethical_bias
|
3 |
+
|
4 |
+
configuration:
|
5 |
+
ethical_bias:
|
6 |
+
threshold:
|
7 |
+
0.01
|
cicd/scan_retrieved.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse
|
2 |
+
import pandas as pd
|
3 |
+
from ast import literal_eval
|
4 |
+
from string import Template
|
5 |
+
import os
|
6 |
+
|
7 |
+
|
8 |
+
def model_has_dataset(model):
|
9 |
+
for tag in model.tags:
|
10 |
+
if tag.startswith("dataset:"):
|
11 |
+
return True
|
12 |
+
return False
|
13 |
+
|
14 |
+
|
15 |
+
if __name__ == "__main__":
|
16 |
+
parser = argparse.ArgumentParser(
|
17 |
+
prog="Giskard Batch Scanner", description="Scan Retrieved HF models."
|
18 |
+
)
|
19 |
+
parser.add_argument(
|
20 |
+
"--data_path",
|
21 |
+
help="Path to retrieved models in csv format (need to run retrieve.py first).",
|
22 |
+
required=True,
|
23 |
+
)
|
24 |
+
parser.add_argument("--first_Nmodels",
|
25 |
+
help="Number of models to be scanned from the sorted list of models available.",
|
26 |
+
required=True)
|
27 |
+
parser.add_argument("--output_path",
|
28 |
+
help="Path of dir to save all the reports",
|
29 |
+
required=True)
|
30 |
+
|
31 |
+
args = parser.parse_args()
|
32 |
+
|
33 |
+
df = pd.read_csv(args.data_path)
|
34 |
+
|
35 |
+
df_to_be_skipped = None
|
36 |
+
to_be_skipped_file_path = ".models_and_datasets_to_be_skipped.csv"
|
37 |
+
if os.path.exists(to_be_skipped_file_path):
|
38 |
+
df_to_be_skipped = pd.read_csv(to_be_skipped_file_path)
|
39 |
+
|
40 |
+
command_template = Template("python cli.py --loader huggingface --model $model --dataset $dataset "
|
41 |
+
"--dataset_split $dataset_split --dataset_config $dataset_config "
|
42 |
+
"--output ${output_path}/${model_name}__default_scan_with__${dataset_name}.html")
|
43 |
+
|
44 |
+
result_path_template = Template("${output_path}/${model_name}__default_scan_with__${dataset_name}.${suffix}")
|
45 |
+
|
46 |
+
if not os.path.exists(args.output_path):
|
47 |
+
os.makedirs(args.output_path)
|
48 |
+
|
49 |
+
dataset_split_exceptions = {"facebook/bart-large-mnli": "validation_matched"}
|
50 |
+
|
51 |
+
dataset_config_exceptions = {"tweet_eval": "sentiment"}
|
52 |
+
|
53 |
+
for i in range(int(args.first_Nmodels)):
|
54 |
+
row = df.iloc[i]
|
55 |
+
model = row.modelId
|
56 |
+
dataset = literal_eval(row.datasets)[0]
|
57 |
+
|
58 |
+
message = f"{model} with {dataset}"
|
59 |
+
|
60 |
+
if ((df_to_be_skipped['model'] == model) & (df_to_be_skipped['dataset'] == dataset)).any() \
|
61 |
+
and df_to_be_skipped is not None:
|
62 |
+
print(f"[{i}] ==== ⏩ skipping {message} ====")
|
63 |
+
continue
|
64 |
+
|
65 |
+
print(f"[{i}] ==== 🔍 scanning {message} ====")
|
66 |
+
|
67 |
+
result_path = result_path_template.substitute(model_name=model.replace("/", "--"),
|
68 |
+
dataset_name=dataset.replace("/", "--"),
|
69 |
+
output_path=args.output_path,
|
70 |
+
suffix="html")
|
71 |
+
if os.path.exists(result_path):
|
72 |
+
answer = input(f"{result_path} already exists, Overwrite[o] or Skip[s]? ")
|
73 |
+
|
74 |
+
while answer not in ["o", "s"]:
|
75 |
+
answer = input("Invalid answer, please choose between 'o' and 's'")
|
76 |
+
|
77 |
+
if answer == 'o':
|
78 |
+
os.remove(result_path)
|
79 |
+
elif answer == 's':
|
80 |
+
continue
|
81 |
+
|
82 |
+
command = command_template.substitute(model=model, dataset=dataset,
|
83 |
+
dataset_split=dataset_split_exceptions.get(model, "validation"),
|
84 |
+
dataset_config=dataset_config_exceptions.get(dataset, None),
|
85 |
+
model_name=model.replace("/", "--"),
|
86 |
+
dataset_name=dataset.replace("/", "--"),
|
87 |
+
output_path=args.output_path)
|
88 |
+
|
89 |
+
try:
|
90 |
+
os.system(command) # call the cli script in order for try, except to work
|
91 |
+
new_row = pd.DataFrame({"model": model, "dataset": dataset, "status": "done"}, index=[0])
|
92 |
+
df_to_be_skipped = pd.concat([df_to_be_skipped, new_row], ignore_index=True)
|
93 |
+
df_to_be_skipped.to_csv(to_be_skipped_file_path, index=False)
|
94 |
+
except Exception as e:
|
95 |
+
new_row = pd.DataFrame({"model": model, "dataset": dataset, "status": "error"}, index=[0])
|
96 |
+
df_to_be_skipped = pd.concat([df_to_be_skipped, new_row], ignore_index=True)
|
97 |
+
df_to_be_skipped.to_csv(to_be_skipped_file_path, index=False)
|
98 |
+
result_path = result_path_template.substitute(model_name=model.replace("/", "--"),
|
99 |
+
dataset_name=dataset.replace("/", "--"),
|
100 |
+
output_path=args.output_path,
|
101 |
+
suffix="error")
|
102 |
+
with open(result_path, "w") as error_log:
|
103 |
+
error_log.write(e)
|
104 |
+
print(
|
105 |
+
f"Something went wrong while {message}, error is logged at {result_path}. "
|
106 |
+
"continuing with the next model...")
|
107 |
+
# raise Exception(f"Something went wrong while {message}") from e
|
cicd/setup.cfg
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[metadata]
|
2 |
+
name = giskard_cicd
|
3 |
+
version = 0.1.0
|
4 |
+
|
5 |
+
[options]
|
6 |
+
packages = find:
|
7 |
+
install_requires =
|
8 |
+
giskard >= 2.0.0b
|
9 |
+
transformers
|
10 |
+
huggingface_hub
|
11 |
+
datasets
|
12 |
+
torch
|
13 |
+
|