Spaces:

pkumc
/

PyTest

Runtime error

App Files Files Community

pkumc commited on Sep 15, 2023

Commit

ae92d51

•

1 Parent(s): 0080880

Upload folder using huggingface_hub

Browse files

Files changed (30) hide show

.idea/.gitignore +3 -0
.idea/codeStyles/Project.xml +7 -0
.idea/codeStyles/codeStyleConfig.xml +5 -0
.idea/misc.xml +6 -0
.idea/modules.xml +8 -0
.idea/workspace.xml +215 -0
PyTest.iml +9 -0
README.md +2 -8
__pycache__/flask_app.cpython-310.pyc +0 -0
__pycache__/test1.cpython-310.pyc +0 -0
accuracy/accuracy.py +106 -0
covid.py +128 -0
flask_app.py +20 -0
main.py +196 -0
run.py +119 -0
run.sh +1 -0
run_wm_rgcn-20220407.py +577 -0
run_wm_rgcn.py +568 -0
test1.py +27 -0
test2.py +8 -0
~/Desktop/roberta-base/merges.txt +0 -0
~/Desktop/roberta-base/models--roberta-base/blobs/5606f48548d99a9829d10a96cd364b816b02cd21 +0 -0
~/Desktop/roberta-base/models--roberta-base/blobs/5606f48548d99a9829d10a96cd364b816b02cd21.lock +0 -0
~/Desktop/roberta-base/models--roberta-base/blobs/8db5e7ac5bfc9ec8b613b776009300fe3685d957 +21 -0
~/Desktop/roberta-base/models--roberta-base/blobs/8db5e7ac5bfc9ec8b613b776009300fe3685d957.lock +0 -0
~/Desktop/roberta-base/models--roberta-base/refs/main +1 -0
~/Desktop/roberta-base/special_tokens_map.json +15 -0
~/Desktop/roberta-base/tokenizer.json +0 -0
~/Desktop/roberta-base/tokenizer_config.json +15 -0
~/Desktop/roberta-base/vocab.json +0 -0

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+# Default ignored files
+/shelf/
+/workspace.xml

.idea/codeStyles/Project.xml ADDED Viewed

	@@ -0,0 +1,7 @@

+<component name="ProjectCodeStyleConfiguration">
+  <code_scheme name="Project" version="173">
+    <ScalaCodeStyleSettings>
+      <option name="MULTILINE_STRING_CLOSING_QUOTES_ON_NEW_LINE" value="true" />
+    </ScalaCodeStyleSettings>
+  </code_scheme>
+</component>

.idea/codeStyles/codeStyleConfig.xml ADDED Viewed

	@@ -0,0 +1,5 @@

+<component name="ProjectCodeStyleConfiguration">
+  <state>
+    <option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
+  </state>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" languageLevel="JDK_16" project-jdk-name="Python 3.10 (py3)" project-jdk-type="Python SDK">
+    <output url="file://$PROJECT_DIR$/out" />
+  </component>
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/PyTest.iml" filepath="$PROJECT_DIR$/PyTest.iml" />
+    </modules>
+  </component>
+</project>

.idea/workspace.xml ADDED Viewed

	@@ -0,0 +1,215 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="AutoImportSettings">
+    <option name="autoReloadType" value="SELECTIVE" />
+  </component>
+  <component name="ChangeListManager">
+    <list default="true" id="105163d0-50fb-4a71-ba3b-6920eac49287" name="Changes" comment="" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="CodeStyleSettingsInfer">
+    <option name="done" value="true" />
+  </component>
+  <component name="FileTemplateManagerImpl">
+    <option name="RECENT_TEMPLATES">
+      <list>
+        <option value="Python Script" />
+        <option value="HTML File" />
+      </list>
+    </option>
+  </component>
+  <component name="HighlightingSettingsPerFile">
+    <setting file="file://$PROJECT_DIR$/main.py" root0="FORCE_HIGHLIGHTING" />
+    <setting file="file://$PROJECT_DIR$/test1.py" root0="FORCE_HIGHLIGHTING" />
+  </component>
+  <component name="MarkdownSettingsMigration">
+    <option name="stateVersion" value="1" />
+  </component>
+  <component name="ProjectCodeStyleSettingsMigration">
+    <option name="version" value="2" />
+  </component>
+  <component name="ProjectId" id="1woOJMFsJmGkkv90kso4jUXOCPq" />
+  <component name="ProjectViewState">
+    <option name="hideEmptyMiddlePackages" value="true" />
+    <option name="showLibraryContents" value="true" />
+  </component>
+  <component name="PropertiesComponent">{
+  &quot;keyToString&quot;: {
+    &quot;DefaultHtmlFileTemplate&quot;: &quot;HTML File&quot;,
+    &quot;last_opened_file_path&quot;: &quot;/Users/machi/IdeaProjects/PyTest&quot;,
+    &quot;project.structure.last.edited&quot;: &quot;Project&quot;,
+    &quot;project.structure.proportion&quot;: &quot;0.0&quot;,
+    &quot;project.structure.side.proportion&quot;: &quot;0.0&quot;,
+    &quot;settings.editor.selected.configurable&quot;: &quot;preferences.pluginManager&quot;
+  }
+}</component>
+  <component name="RecentsManager">
+    <key name="CopyFile.RECENT_KEYS">
+      <recent name="$PROJECT_DIR$" />
+    </key>
+    <key name="MoveFile.RECENT_KEYS">
+      <recent name="$PROJECT_DIR$/accuracy" />
+      <recent name="$PROJECT_DIR$" />
+    </key>
+  </component>
+  <component name="RunManager" selected="Python.test2">
+    <configuration name="flask_app" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
+      <module name="PyTest" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="$USER_HOME$/miniconda3/envs/py3/bin/python" />
+      <option name="SDK_NAME" value="Python 3.10 (py3)" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/flask_app.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+    <configuration name="main" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
+      <module name="PyTest" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="$USER_HOME$/miniconda3/envs/py2/bin/python" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/main.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+    <configuration name="run" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
+      <module name="PyTest" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/run.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+    <configuration name="test1" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
+      <module name="PyTest" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="$USER_HOME$/miniconda3/envs/py3/bin/python" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="false" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/test1.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+    <configuration name="test2" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
+      <module name="PyTest" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/test2.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+    <list>
+      <item itemvalue="Python.flask_app" />
+      <item itemvalue="Python.test2" />
+      <item itemvalue="Python.test1" />
+      <item itemvalue="Python.main" />
+      <item itemvalue="Python.run" />
+    </list>
+    <recent_temporary>
+      <list>
+        <item itemvalue="Python.test2" />
+        <item itemvalue="Python.flask_app" />
+        <item itemvalue="Python.test1" />
+        <item itemvalue="Python.main" />
+      </list>
+    </recent_temporary>
+  </component>
+  <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="Default task">
+      <changelist id="105163d0-50fb-4a71-ba3b-6920eac49287" name="Changes" comment="" />
+      <created>1629121407956</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1629121407956</updated>
+    </task>
+    <servers />
+  </component>
+  <component name="XDebuggerManager">
+    <breakpoint-manager>
+      <breakpoints>
+        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
+          <url>file://$USER_HOME$/miniconda3/envs/py3/lib/python3.10/site-packages/datasets/load.py</url>
+          <line>1752</line>
+          <option name="timeStamp" value="2" />
+        </line-breakpoint>
+        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
+          <url>file://$USER_HOME$/miniconda3/envs/py3/lib/python3.10/site-packages/datasets/load.py</url>
+          <line>1733</line>
+          <option name="timeStamp" value="3" />
+        </line-breakpoint>
+      </breakpoints>
+    </breakpoint-manager>
+    <watches-manager>
+      <configuration name="PythonConfigurationType">
+        <watch expression="t.element_spec" />
+        <watch expression="t.element_spec" />
+        <watch expression="cnt[0]" />
+      </configuration>
+    </watches-manager>
+  </component>
+</project>

PyTest.iml ADDED Viewed

	@@ -0,0 +1,9 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager" inherit-compiler-output="true">
+    <exclude-output />
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

README.md CHANGED Viewed

@@ -1,12 +1,6 @@
 ---
 title: PyTest
-emoji: 📉
-colorFrom: purple
-colorTo: purple
 sdk: gradio
-sdk_version: 3.44.3
-app_file: app.py
-pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: PyTest
+app_file: test2.py
 sdk: gradio
+sdk_version: 3.36.1
 ---

__pycache__/flask_app.cpython-310.pyc ADDED Viewed

Binary file (1.75 kB). View file

__pycache__/test1.cpython-310.pyc ADDED Viewed

Binary file (1.04 kB). View file

accuracy/accuracy.py ADDED Viewed

	@@ -0,0 +1,106 @@

+# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Accuracy metric."""
+import datasets
+from sklearn.metrics import accuracy_score
+import evaluate
+_DESCRIPTION = """
+Accuracy is the proportion of correct predictions among the total number of cases processed. It can be computed with:
+Accuracy = (TP + TN) / (TP + TN + FP + FN)
+ Where:
+TP: True positive
+TN: True negative
+FP: False positive
+FN: False negative
+"""
+_KWARGS_DESCRIPTION = """
+Args:
+    predictions (`list` of `int`): Predicted labels.
+    references (`list` of `int`): Ground truth labels.
+    normalize (`boolean`): If set to False, returns the number of correctly classified samples. Otherwise, returns the fraction of correctly classified samples. Defaults to True.
+    sample_weight (`list` of `float`): Sample weights Defaults to None.
+Returns:
+    accuracy (`float` or `int`): Accuracy score. Minimum possible value is 0. Maximum possible value is 1.0, or the number of examples input, if `normalize` is set to `True`.. A higher score means higher accuracy.
+Examples:
+    Example 1-A simple example
+        >>> accuracy_metric = evaluate.load("accuracy")
+        >>> results = accuracy_metric.compute(references=[0, 1, 2, 0, 1, 2], predictions=[0, 1, 1, 2, 1, 0])
+        >>> print(results)
+        {'accuracy': 0.5}
+    Example 2-The same as Example 1, except with `normalize` set to `False`.
+        >>> accuracy_metric = evaluate.load("accuracy")
+        >>> results = accuracy_metric.compute(references=[0, 1, 2, 0, 1, 2], predictions=[0, 1, 1, 2, 1, 0], normalize=False)
+        >>> print(results)
+        {'accuracy': 3.0}
+    Example 3-The same as Example 1, except with `sample_weight` set.
+        >>> accuracy_metric = evaluate.load("accuracy")
+        >>> results = accuracy_metric.compute(references=[0, 1, 2, 0, 1, 2], predictions=[0, 1, 1, 2, 1, 0], sample_weight=[0.5, 2, 0.7, 0.5, 9, 0.4])
+        >>> print(results)
+        {'accuracy': 0.8778625954198473}
+"""
+_CITATION = """
+@article{scikit-learn,
+  title={Scikit-learn: Machine Learning in {P}ython},
+  author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
+         and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
+         and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
+         Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
+  journal={Journal of Machine Learning Research},
+  volume={12},
+  pages={2825--2830},
+  year={2011}
+}
+"""
+@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
+class Accuracy(evaluate.Metric):
+    def _info(self):
+        return evaluate.MetricInfo(
+            description=_DESCRIPTION,
+            citation=_CITATION,
+            inputs_description=_KWARGS_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "predictions": datasets.Sequence(datasets.Value("int32")),
+                    "references": datasets.Sequence(datasets.Value("int32")),
+                }
+                if self.config_name == "multilabel"
+                else {
+                    "predictions": datasets.Value("int32"),
+                    "references": datasets.Value("int32"),
+                }
+            ),
+            reference_urls=["https://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html"],
+        )
+    def _compute(self, predictions, references, normalize=True, sample_weight=None):
+        return {
+            "accuracy": float(
+                accuracy_score(references, predictions, normalize=normalize, sample_weight=sample_weight)
+            )
+        }

covid.py ADDED Viewed

	@@ -0,0 +1,128 @@

+# -*- coding: utf-8 -*
+import sys
+sys.path.append('/Users/machi/Library/Python/3.8/lib/python/site-packages')
+import os
+import asyncio
+# from pyppeteer import launcher
+# # 在导入 launch 之前 把 --enable-automation 禁用 防止监测webdriver
+# launcher.AUTOMATION_ARGS.remove("--enable-automation")
+from pyppeteer import launch
+from bs4 import BeautifulSoup
+import re
+import time
+async def pyppteer_fetchUrl(url):
+    browser = await launch({'headless': False,'dumpio':True, 'autoClose':True})
+    page = await browser.newPage()
+    # await page.setDefaultNavigationTimeout(60000)
+    await page.goto(url)
+    await asyncio.wait([page.waitForNavigation()])
+    str = await page.content()
+    await browser.close()
+    return str
+def fetchUrl(url):
+    return asyncio.get_event_loop().run_until_complete(pyppteer_fetchUrl(url))
+def getPageUrl():
+    for page in range(1,5):
+        if page == 1:
+            yield 'http://www.nhc.gov.cn/xcs/yqtb/list_gzbd.shtml'
+        else:
+            url = 'http://www.nhc.gov.cn/xcs/yqtb/list_gzbd_'+ str(page) +'.shtml'
+            yield url
+def getTitleUrl(html):
+    bsobj = BeautifulSoup(html,'html.parser')
+    titleList = bsobj.find('div', attrs={"class":"list"}).ul.find_all("li")
+    for item in titleList:
+        link = "http://www.nhc.gov.cn" + item.a["href"];
+        title = item.a["title"]
+        date = item.span.text
+        yield title, link, date
+def getInfo(pat, s):
+    res = re.search(pat, s)
+    if res:
+        return res.group(1)
+    return '0'
+def getContent(html):
+    bsobj = BeautifulSoup(html,'html.parser')
+    cnt = bsobj.find('div', attrs={"id":"xw_box"}).find_all("p")
+    res = []
+    if cnt:
+        # 从第一段解析
+        s = cnt[0].text
+        res.append(getInfo(r'新增确诊病例(\d+)例', s))
+        res.append(getInfo(r'本土病例(\d+)例', s))
+        res.append(getInfo(r'新增死亡病例(\d+)例', s))
+        # 从第二段解析
+        s = cnt[1].text
+        res.append(getInfo(r'新增治愈出院病例(\d+)例', s))
+        # 从第五段解析
+        s = cnt[4].text
+        res.append(getInfo(r'新增无症状感染者(\d+)例', s))
+        res.append(getInfo(r'本土(\d+)例', s))
+    return res
+def saveFile(path, filename, content):
+    if not os.path.exists(path):
+        os.makedirs(path)
+    # 保存文件
+    with open(path + filename + ".txt", 'w', encoding='utf-8') as f:
+        f.write(content)
+if "__main__" == __name__:
+    # print(getInfo(r'新增死亡病例(\d+)例', '无新增死亡病例。'))
+    # s = '4月28日0—24时，31个省（自治区、直辖市）和新疆生产建设兵团报告新增确诊病例5659例。其中境外输入病例13例（广东3例，北京2例，上海2例，福建2例，黑龙江1例，浙江1例，广西1例，四川1例），含2例由无症状感染者转为确诊病例（浙江1例，福建1例）；本土病例5646例（上海5487例，北京47例，吉林42例，浙江31例，山东7例，广东7例，黑龙江4例，江西4例，内蒙古3例，江苏3例，四川3例，河南2例，辽宁1例，福建1例，湖南1例，广西1例，重庆1例，云南1例），含5125例由无症状感染者转为确诊病例（上海5062例，吉林31例，浙江28例，辽宁1例，山东1例，河南1例，云南1例）。新增死亡病例52例，均为本土病例，在上海；无新增疑似病例。'
+    # res = re.search( r'新增确诊病例(\d+)例', s)
+    # print(res.group(1))
+    #
+    # res = re.search( r'本土病例.*），', s)
+    # print(res.group())
+    #
+    # res = re.search( r'新增死亡病例\d+例', s)
+    # print(res.group())
+    #
+    # res = re.search( r'新增治愈出院病例\d+例', s)
+    # print(res.group())
+    #
+    with open('/Users/machi/Desktop/covid.csv', 'w') as f:
+        header = ','.join(['日期', '新增确诊病例', '本土新增确诊病例', '新增死亡病例', '新增治愈出院病例', '新增无症状感染者', '本土新增无症状感染者'])
+        f.write(header + '\n')
+        for url in getPageUrl():
+            print(url)
+            try:
+                s =fetchUrl(url)
+            except:
+                continue
+            for title,link,date in getTitleUrl(s):
+                print(title,link)
+                # time.sleep(5)
+                try:
+                    html =fetchUrl(link)
+                    content = getContent(html)
+                    s = ','.join([date] + content)
+                    f.write(s + '\n')
+                    print('%s write finish' % date)
+                except Exception as e:
+                    print('%s process failed' % date, e)
+                    continue
+            # break

flask_app.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from flask import Flask, request
+from flask_restful import Resource, Api
+app = Flask(__name__)
+api = Api(app)
+todos = {}
+class TodoSimple(Resource):
+    def get(self, todo_id):
+        return {todo_id: todos[todo_id]}
+    def put(self, todo_id):
+        todos[todo_id] = request.form['data']
+        return {todo_id: todos[todo_id]}
+api.add_resource(TodoSimple, '/<string:todo_id>')
+if __name__ == '__main__':
+    app.run(debug=True)

main.py ADDED Viewed

	@@ -0,0 +1,196 @@

+# -*- coding: utf-8 -*
+# from __future__ import print_function
+import sys
+import tensorflow as tf
+# import tensorflow_datasets as tfds
+import numpy as np
+import json
+tf.enable_eager_execution()
+def test():
+    # mirrored_strategy = tf.distribute.MirroredStrategy()
+    # # 在config中加入镜像策略
+    # config = tf.estimator.RunConfig(train_distribute=mirrored_strategy, eval_distribute=mirrored_strategy)
+    # 把config加到模型里
+    regressor = tf.estimator.LinearRegressor(
+        feature_columns=[tf.feature_column.numeric_column('feats')],
+        optimizer='SGD'
+        # ,config=config
+    )
+    def input_fn():
+        dataset = tf.data.Dataset.from_tensors(({"feats":[1.]}, [1.]))
+        return dataset.repeat(1000).batch(10)
+    # 正常训练，正常评估
+    regressor.train(input_fn=input_fn
+                    , steps=20
+                    )
+    regressor.evaluate(input_fn=input_fn
+                       # , steps=10
+                       )
+def parse_from_json(config_path):
+    """ parse feature columns from feature config path
+    Args:
+      config_path: string, a feature config path
+    """
+    total = 0
+    correct = 0
+    with open(config_path, "r") as f:
+        config = json.load(f)
+    feature_names = set()
+    features = config["features"]
+    for feature in features:
+        feature_name = feature['feature_name']
+        if '#' in feature_name:
+            feature_name = feature_name.split('#')[0]
+        feature_names.add(feature_name)
+    return feature_names
+#convert model's format from *.pb to *.pbtxt
+def parse_model_2_txt(saved_model_dir ,output_file):
+    from tensorflow.python.saved_model import loader_impl
+    from google.protobuf import text_format
+    saved_model = loader_impl._parse_saved_model(saved_model_dir)
+    with open(output_file, 'w') as f:
+        f.write(text_format.MessageToString(saved_model))
+# parse_model_2_txt('/Users/machi/git/internal/starship_galaxy/model_zoo/scheduler/2022q2combo/old', '/Users/machi/git/internal/starship_galaxy/model_zoo/scheduler/2022q2combo/old/saved_model.pbtxt')
+import os
+def build_serving_input_new():
+    import pickle
+    with tf.gfile.Open('feature_desc.pkl', mode='rb') as f:
+        feature_dec = pickle.load(f)
+    sep_placeholder = {}
+    for name, desc in feature_dec.items():
+        if 'sg_poi_click_time_gap_seq_2d' in name:
+            print(desc)
+    # return sep_placeholder
+def read_schema(file):
+    d = {}
+    with open(file) as f:
+        for line in f:
+            line = line.strip()
+            fds = line.split(' ')
+            d[fds[0]] = fds[1]
+    return d
+def sparse_tensor():
+    indices_tf = tf.constant([[0, 0], [0, 1], [1, 1], [2, 2]], dtype=tf.int64)
+    values_tf = tf.constant([1, 2, 3, 4], dtype=tf.int32)
+    dense_shape_tf = tf.constant([3, 3], dtype=tf.int64)
+    sparse_tf = tf.SparseTensor(indices=indices_tf,
+                                values=values_tf,
+                                dense_shape=dense_shape_tf)
+    dense_tf = tf.sparse_tensor_to_dense(sparse_tf)
+    # print(dense_tf)
+    user_tf = tf.constant([1, 2, 3], dtype=tf.int32, shape=[3, 1])
+    # 一行为一个session，每一行包含不同个数的样本。以下示例中，共有3个session，第1个session包含3个样本，第2个session包含2个样本，第3个session行包含1个样本
+    # b为non_common特征
+    b = tf.constant([[1, 2, 1], [0, 3, 2], [0, 0, 4]])
+    # a为common特征，3个session有3个值
+    a = tf.constant([1, 2, 3], shape=[3, 1])
+    # 将a扩展为和b相同维度
+    a = tf.tile(a, tf.constant([1, 3]))
+    print(a)
+    # 获取b中非0元素的下标
+    indices = tf.where(tf.not_equal(b, 0))
+    print(indices)
+    # 将非0元素的下标处的a和b的值拼接起来，即样本展开后的结果
+    c = tf.concat(values=[tf.expand_dims(tf.gather_nd(a, indices), axis=1), tf.expand_dims(tf.gather_nd(b, indices), axis=1)], axis=1)
+    print(c)
+def kkv_attention(query, key, value, mask=None):
+    # Transpose key and value matrices
+    key_transpose = tf.transpose(key, perm=[0, 2, 1])
+    value_transpose = tf.transpose(value, perm=[0, 2, 1])
+    # Compute dot product between query and key
+    logits = tf.matmul(query, key_transpose)
+    # Apply mask (if provided) to logits
+    if mask is not None:
+        logits += mask
+    # Apply softmax activation to obtain attention scores
+    attention_scores = tf.nn.softmax(logits, axis=-1)
+    # Apply attention scores to value to obtain context vector
+    context_vector = tf.matmul(attention_scores, value_transpose)
+    # Transpose back the output
+    context_vector = tf.transpose(context_vector, perm=[0, 2, 1])
+    return context_vector, attention_scores
+# write kkv attention function
+def write_kkv_attention(query, key, value, mask=None):
+    # Transpose key and value matrices
+    # key_transpose = tf.transpose(key, perm=[0, 2, 1])
+    # value_transpose = tf.transpose(value, perm=[0, 2, 1])
+    # Compute dot product between query and key
+    logits = tf.matmul(query, key)
+    # Apply mask (if provided) to logits
+    if mask is not None:
+        logits += mask
+    # Apply softmax activation to obtain attention scores
+    attention_scores =   tf.nn.softmax(logits, axis=-1)
+    # Apply attention scores to value to obtain context vector
+    context_vector =  tf.matmul(attention_scores, value)
+    # Transpose back the output
+    # context_vector =  tf.transpose(context_vector, perm=[0, 2, 1])
+    return context_vector, attention_scores
+# test write_kkv_attention
+def test_write_kkv_attention():
+    # define query and key matrices
+    query =  tf.constant([[-0.1250,  0.0000, -0.5000,  0.5000,  0.0000]])
+    key =  tf.constant([[ -0.1250,  0.0000, -0.5000,  0.5000,  0.0000],
+                        [-0.5000,  0.0000,  0.5000,  0.5000,  0.0000],
+                        [-0.2500, -0.5000,  0.0000,  0.5000,  0.2500],
+                        [ 0.0000,  0.0000,  0.0000,  0.5000,  0.5000],
+                        [ 0.5000,  0.5000,  0.0000, -0.5000,  0.5000]])
+    value =   tf.constant([[-0.5000,  0.0000,  0.5000,  0.5000,  0.0000],
+                        [-0.5000,  0.0000,  0.5000,  0.5000,  0.0000],
+                        [-0.5000,  0.0000,  0.5000,  0.5000,  0.0000],
+                        [ 0.0000,  0.0000,  0.5000,  0.5000,  0.5000],
+                        [ 0.5000,  0.5000,  0.0000, -0.5000,  0.5000]])
+    mask = None
+    # call write_kkv_attention and obtain context vector and attention scores
+    context_vector, attention_scores = write_kkv_attention(query, key, value,mask)
+    # print results
+    print context_vector
+    print attention_scores
+print '123', 1

run.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import numpy as np
+import torch
+import dgl
+import dgl.function as fn
+import dgl.nn as dglnn
+import torch.nn as nn
+import torch.nn.functional as F
+class RGCN(nn.Module):
+    def __init__(self, in_feats, hid_feats, out_feats, rel_names):
+        super().__init__()
+        # 实例化HeteroGraphConv，in_feats是输入特征的维度，out_feats是输出特征的维度，aggregate是聚合函数的类型
+        self.conv1 = dglnn.HeteroGraphConv({
+            rel: dglnn.GraphConv(in_feats[rel], hid_feats)
+            for rel in rel_names}, aggregate='sum')
+        self.conv2 = dglnn.HeteroGraphConv({
+            rel: dglnn.GraphConv(hid_feats, out_feats)
+            for rel in rel_names}, aggregate='sum')
+    def forward(self, graph, inputs):
+        # 输入是节点的特征字典
+        h = self.conv1(graph, inputs)
+        h = {k: F.relu(v) for k, v in h.items()}
+        h = self.conv2(graph, h)
+        return h
+class HeteroDotProductPredictor(nn.Module):
+    def forward(self, graph, h, etype):
+        # h是从5.1节中对异构图的每种类型的边所计算的节点表示
+        with graph.local_scope():
+            graph.ndata['h'] = h
+            graph.apply_edges(fn.u_dot_v('h', 'h', 'score'), etype=etype)
+            return graph.edges[etype].data['score']
+class Model(nn.Module):
+    def __init__(self, in_features, hidden_features, out_features, rel_names):
+        super().__init__()
+        self.sage = RGCN(in_features, hidden_features, out_features, rel_names)
+        self.pred = HeteroDotProductPredictor()
+    def forward(self, g, neg_g, x, etype):
+        h = self.sage(g, x)
+        return self.pred(g, h, etype), self.pred(neg_g, h, etype)
+def construct_negative_graph(graph, k, etype):
+    utype, _, vtype = etype
+    src, dst = graph.edges(etype=etype)
+    neg_src = src.repeat_interleave(k)
+    neg_dst = torch.randint(0, graph.num_nodes(vtype), (len(src) * k,))
+    return dgl.heterograph(
+        {etype: (neg_src, neg_dst)},
+        num_nodes_dict={ntype: graph.num_nodes(ntype) for ntype in graph.ntypes})
+def compute_loss(pos_score, neg_score):
+    # 间隔损失
+    n_edges = pos_score.shape[0]
+    return (1 - pos_score.unsqueeze(1) + neg_score.view(n_edges, -1)).clamp(min=0).mean()
+n_users = 1000
+n_items = 500
+n_follows = 3000
+n_clicks = 5000
+n_dislikes = 500
+n_hetero_features_user = 10
+n_hetero_features_item = 5
+n_user_classes = 5
+n_max_clicks = 10
+follow_src = np.random.randint(0, n_users, n_follows)
+follow_dst = np.random.randint(0, n_users, n_follows)
+click_src = np.random.randint(0, n_users, n_clicks)
+click_dst = np.random.randint(0, n_items, n_clicks)
+dislike_src = np.random.randint(0, n_users, n_dislikes)
+dislike_dst = np.random.randint(0, n_items, n_dislikes)
+hetero_graph = dgl.heterograph({
+    ('user', 'follow', 'user'): (follow_src, follow_dst),
+    ('user', 'followed-by', 'user'): (follow_dst, follow_src),
+    ('user', 'click', 'item'): (click_src, click_dst),
+    ('item', 'clicked-by', 'user'): (click_dst, click_src),
+    ('user', 'dislike', 'item'): (dislike_src, dislike_dst),
+    ('item', 'disliked-by', 'user'): (dislike_dst, dislike_src)})
+hetero_graph.nodes['user'].data['feature'] = torch.randn(n_users, n_hetero_features_user)
+hetero_graph.nodes['item'].data['feature'] = torch.randn(n_items, n_hetero_features_item)
+hetero_graph.nodes['user'].data['label'] = torch.randint(0, n_user_classes, (n_users,))
+hetero_graph.edges['click'].data['label'] = torch.randint(1, n_max_clicks, (n_clicks,)).float()
+# 在user类型的节点和click类型的边上随机生成训练集的掩码
+hetero_graph.nodes['user'].data['train_mask'] = torch.zeros(n_users, dtype=torch.bool).bernoulli(0.6)
+hetero_graph.edges['click'].data['train_mask'] = torch.zeros(n_clicks, dtype=torch.bool).bernoulli(0.6)
+# print(hetero_graph)
+hetero_features_dims = {
+    'follow': n_hetero_features_user,
+    'followed-by': n_hetero_features_user,
+    'click': n_hetero_features_user,
+    'clicked-by': n_hetero_features_item,
+    'dislike': n_hetero_features_user,
+    'disliked-by': n_hetero_features_item
+}
+k = 5
+model = Model(hetero_features_dims, 20, 5, hetero_graph.etypes)
+user_feats = hetero_graph.nodes['user'].data['feature']
+item_feats = hetero_graph.nodes['item'].data['feature']
+node_features = {'user': user_feats, 'item': item_feats}
+opt = torch.optim.Adam(model.parameters())
+for epoch in range(10):
+    negative_graph = construct_negative_graph(hetero_graph, k, ('user', 'click', 'item'))
+    pos_score, neg_score = model(hetero_graph, negative_graph, node_features, ('user', 'click', 'item'))
+    loss = compute_loss(pos_score, neg_score)
+    opt.zero_grad()
+    loss.backward()
+    opt.step()
+    print(loss.item())

run.sh ADDED Viewed

	@@ -0,0 +1 @@


1	+ python -m main

run_wm_rgcn-20220407.py ADDED Viewed

	@@ -0,0 +1,577 @@

+"""
+Modeling Relational Data with Graph Convolutional Networks
+Paper: https://arxiv.org/abs/1703.06103
+Code: https://github.com/tkipf/relational-gcn
+Difference compared to tkipf/relation-gcn
+* l2norm applied to all weights
+* remove nodes that won't be touched
+"""
+import argparse, gc
+import numpy as np
+import time
+import torch as th
+import torch.nn as nn
+import dgl.function as fn
+import torch.nn.functional as F
+import dgl
+import dgl.multiprocessing as mp
+from torch.nn.parallel import DistributedDataParallel
+from dgl import DGLGraph
+from functools import partial
+from dgl.data.rdf import AIFBDataset
+from src.skeleton.graph_builder import StandaloneGraphBuilder
+from src.skeleton.train_type import SamplingGraphTraining
+from src.application.rgcn.rgcn import RelGraphEmbedLayer, EntityClassify
+from dgl.contrib.hostmap_tensor import HostMapTensor
+from src.skeleton.dataloader import Dataloader
+import tqdm
+from sklearn.metrics import roc_auc_score
+# from torch.utils.tensorboard import SummaryWriter
+'''
+    这是单机的异构图节点分类任务-Demo:
+    适用于:
+        -- 图的数据量较大，比如100万~1亿点, 1000万~10亿边。
+    class RgcnGraphBuilder 负责加载数据
+    class RgcnTrainer 负责训练和预测
+    class RgcnTrainingDataLoader 负责做训练采样和数据遍历
+    用户如果需要改动只需要：
+    1、改动RgcnGraphBuilder.build_dataset 此方法负责从DGL图中分离训练数据、预测数据、测试数据
+    2、改动RgcnTrainer.train 此方法负责训练逻辑
+    3、改动RgcnTrainer.evaluate 此方法负责离线预测逻辑
+    4、改动RgcnTrainingDataLoader.init 此方法负责输出返回一个迭代遍历器、用于遍历数据集
+    这里使用AIFB数据集做精度对齐（epoch=50, batch_size=128）
+    社区aifb数据集节点分类测试集精度： Final Test Accuracy: 0.9250 | Test loss: 0.3929
+    平台aifb数据集节点分类测试集精度： Final Test Accuracy: 0.9250 | Test loss: 0.2953
+'''
+class RgcnGraphBuilder(StandaloneGraphBuilder):
+    def build_dataset(self, g):
+        hg = g
+        # category = self.flags.category
+        num_classes = self.flags.num_classes
+        num_rels = len(hg.canonical_etypes)
+        num_of_ntype = len(hg.ntypes)
+        # train_mask = hg.nodes[category].data.pop(self.flags.train_mask)
+        # test_mask = hg.nodes[category].data.pop(self.flags.test_mask)
+        # labels = hg.nodes[category].data.pop(self.flags.label)
+        eids = th.arange(g.number_of_edges())
+        #eids = np.random.permutation(eids)
+        val_size = int(len(eids) * 0.1)
+        test_size = int(len(eids) * 0.2)
+        # train_size = g.number_of_edges() - val_size - test_size
+        # valid_eids = eids[:val_size]
+        # test_eids = eids[val_size: val_size + test_size]
+        # train_eids = eids[val_size + test_size:]
+        valid_eids = dgl.contrib.HostMapTensor('valid_eids', eids[:val_size])
+        test_eids = dgl.contrib.HostMapTensor('test_eids', eids[val_size: val_size + test_size])
+        train_eids = dgl.contrib.HostMapTensor('train_eids', eids[val_size + test_size:])
+        # train_idx = th.nonzero(train_mask, as_tuple=False).squeeze()
+        # test_idx = th.nonzero(test_mask, as_tuple=False).squeeze()
+        # val_idx = train_idx
+        node_feats = {}
+        for ntype in hg.ntypes:
+            if len(hg.nodes[ntype].data) == 0 or self.flags.node_feats is False:
+                node_feats[str(hg.get_ntype_id(ntype))] = hg.number_of_nodes(ntype)
+            else:
+                assert len(hg.nodes[ntype].data) == 1
+                feat = hg.nodes[ntype].data.pop(self.flags.feat)
+                if feat is not None:
+                    feats = HostMapTensor(ntype + '__' + self.flags.feat, feat)
+                    node_feats[str(hg.get_ntype_id(ntype))] = feats
+        # get target category id
+        # category_id = len(hg.ntypes)
+        # for i, ntype in enumerate(hg.ntypes):
+        #     if ntype == category:
+        #         category_id = i
+        #     print('{}:{}'.format(i, ntype))
+        g = dgl.to_homogeneous(hg)
+        ntype_tensor = g.ndata[dgl.NTYPE]
+        ntype_tensor.share_memory_()
+        etype_tensor = g.edata[dgl.ETYPE]
+        etype_tensor = dgl.contrib.HostMapTensor('etype_tensor', etype_tensor)
+        typeid_tensor = g.ndata[dgl.NID]
+        typeid_tensor.share_memory_()
+        #ntype_tensor = dgl.contrib.HostMapTensor('ntype_tensor', g.ndata[dgl.NTYPE])
+        #etype_tensor = dgl.contrib.HostMapTensor('etype_tensor', g.edata[dgl.ETYPE])
+        #typeid_tensor = dgl.contrib.HostMapTensor('typeid_tensor', g.edata[dgl.NID])
+        # node_ids = th.arange(g.number_of_nodes())
+        # # find out the target node ids
+        # node_tids = g.ndata[dgl.NTYPE]
+        # loc = (node_tids == category_id)
+        # target_idx = node_ids[loc]
+        # target_idx.share_memory_()
+        # train_idx.share_memory_()
+        # val_idx.share_memory_()
+        # test_idx.share_memory_()
+        # # This is a graph with multiple node types, so we want a way to map
+        # # our target node from their global node numberings, back to their
+        # # numberings within their type. This is used when taking the nodes in a
+        # # mini-batch, and looking up their type-specific labels
+        # inv_target = th.empty(node_ids.shape,
+        #                       dtype=node_ids.dtype)
+        # inv_target.share_memory_()
+        # inv_target[target_idx] = th.arange(0, target_idx.shape[0],
+        #                                    dtype=inv_target.dtype)
+        # Create csr/coo/csc formats before launching training processes with multi-gpu.
+        # This avoids creating certain formats in each sub-process, which saves momory and CPU.
+        g.create_formats_()
+        g = g.shared_memory('g')
+        return g, node_feats, num_of_ntype, num_classes, num_rels, train_eids, valid_eids, test_eids, ntype_tensor, etype_tensor, typeid_tensor
+class RgcnTrainer(SamplingGraphTraining):
+    def train(self, g, dataset, device, n_gpus, proc_id, **kwargs):
+        dev_id = -1 if n_gpus == 0 else device.index
+        queue = kwargs['queue'] if n_gpus > 1 else None
+        g, node_feats, num_of_ntype, num_classes, num_rels, train_eids, valid_eids, test_eids, ntype_tensor, etype_tensor, typeid_tensor = dataset
+        node_tids = ntype_tensor
+        world_size = n_gpus
+        if n_gpus > 0:
+            etype_tensor.uva(device)
+            for key in node_feats:
+                if not isinstance(node_feats[key], int):
+                    node_feats[key].uva(device)
+        if n_gpus == 1:
+            g = g.to(device)
+        if n_gpus > 1:
+            g = g.uva(device)
+            dist_init_method = 'tcp://{master_ip}:{master_port}'.format(
+                master_ip='127.0.0.1', master_port=self.flags.master_port)
+            th.distributed.init_process_group(backend=self.flags.communication_backend,
+                                              init_method=dist_init_method,
+                                              world_size=world_size,
+                                              rank=proc_id)
+        # node features
+        # None for one-hot feature, if not none, it should be the feature tensor.
+        embed_layer = RelGraphEmbedLayer(dev_id if self.flags.embedding_gpu or not self.flags.dgl_sparse else -1,
+                                         dev_id,
+                                         g.number_of_nodes(),
+                                         node_tids,
+                                         num_of_ntype,
+                                         node_feats,
+                                         self.flags.num_hidden,
+                                         dgl_sparse=self.flags.dgl_sparse)
+         # 设置目标函数
+        loss_fcn = CrossEntropyLoss()
+        # create model
+        # all model params are in device.
+        model = EntityClassify(dev_id,
+                               g.number_of_nodes(),
+                               self.flags.num_hidden,
+                               num_classes,
+                               num_rels,
+                               num_bases=self.flags.num_bases,
+                               num_hidden_layers=self.flags.num_layers - 2,
+                               dropout=self.flags.dropout,
+                               use_self_loop=self.flags.use_self_loop,
+                               low_mem=self.flags.low_mem,
+                               layer_norm=self.flags.layer_norm)
+        if n_gpus == 1:
+            th.cuda.set_device(dev_id)
+            model.cuda(dev_id)
+            if self.flags.dgl_sparse:
+                embed_layer.cuda(dev_id)
+        elif n_gpus > 1:
+            if dev_id >= 0:
+                model.cuda(dev_id)
+            model = DistributedDataParallel(model, device_ids=[dev_id], output_device=dev_id)
+            if self.flags.dgl_sparse:
+                embed_layer.cuda(dev_id)
+                if len(list(embed_layer.parameters())) > 0:
+                    embed_layer = DistributedDataParallel(embed_layer, device_ids=[dev_id], output_device=dev_id)
+            else:
+                if len(list(embed_layer.parameters())) > 0:
+                    embed_layer = DistributedDataParallel(embed_layer, device_ids=None, output_device=None)
+        # optimizer
+        dense_params = list(model.parameters())
+        if self.flags.node_feats:
+            if n_gpus > 1:
+                dense_params += list(embed_layer.module.embeds.parameters())
+            else:
+                dense_params += list(embed_layer.embeds.parameters())
+        optimizer = th.optim.Adam(dense_params, lr=self.flags.lr, weight_decay=self.flags.l2norm)
+        if self.flags.dgl_sparse:
+            all_params = list(model.parameters()) + list(embed_layer.parameters())
+            optimizer = th.optim.Adam(all_params, lr=self.flags.lr, weight_decay=self.flags.l2norm)
+            if n_gpus > 1 and isinstance(embed_layer, DistributedDataParallel):
+                dgl_emb = embed_layer.module.dgl_emb
+            else:
+                dgl_emb = embed_layer.dgl_emb
+            emb_optimizer = dgl.optim.SparseAdam(params=dgl_emb, lr=self.flags.sparse_lr, eps=1e-8) if len(dgl_emb) > 0 else None
+        else:
+            if n_gpus > 1:
+                embs = list(embed_layer.module.node_embeds.parameters())
+            else:
+                embs = list(embed_layer.node_embeds.parameters())
+            emb_optimizer = th.optim.SparseAdam(embs, lr=self.flags.sparse_lr) if len(embs) > 0 else None
+        ntype_tensor = ntype_tensor.to(device)
+        # etype_tensor = etype_tensor.to(device)
+        typeid_tensor = typeid_tensor.to(device)
+        # train_eids = train_eids.to(device)
+        # valid_eids = valid_eids.to(device)
+        # test_eids = test_eids.to(device)
+        dataset = train_eids, valid_eids, test_eids, device
+        dataloader = RgcnTrainingDataLoader(self.flags).init(g, dataset)
+        loader, val_loader, test_loader = dataloader
+        # training loop
+        print("start training...")
+        forward_time = []
+        backward_time = []
+        train_time = 0
+        validation_time = 0
+        test_time = 0
+        last_val_acc = 0.0
+        do_test = False
+        for epoch in range(self.flags.num_epochs):
+            if n_gpus > 1:
+                loader.set_epoch(epoch)
+            tstart = time.time()
+            model.train()
+            embed_layer.train()
+            # for i, sample_data in enumerate(loader):
+            for i, (input_nodes, pos_graph, neg_graph, blocks) in enumerate(loader):
+                # input_nodes, seeds, blocks = sample_data
+                # # map the seed nodes back to their type-specific ids, so that they
+                # # can be used to look up their respective labels
+                # seeds = inv_target[seeds]
+                for block in blocks:
+                    gen_norm(block, ntype_tensor, etype_tensor, typeid_tensor)
+                t0 = time.time()
+                feats = embed_layer(blocks[0].srcdata[dgl.NID],
+                                    blocks[0].srcdata['ntype'],
+                                    blocks[0].srcdata['type_id'],
+                                    node_feats)
+                blocks = [block.long().to(device) for block in blocks]
+                # logits = model(blocks, feats)
+                pos_graph = pos_graph.to(device)
+                neg_graph = neg_graph.to(device)
+                batch_pred = model(blocks, feats)
+                f_step = time.time()
+                loss = loss_fcn(batch_pred, pos_graph, neg_graph)
+                # loss = F.cross_entropy(logits, labels[seeds])
+                # writer.add_scalar('loss', loss, global_step)
+                t1 = time.time()
+                optimizer.zero_grad()
+                if emb_optimizer is not None:
+                    emb_optimizer.zero_grad()
+                loss.backward()
+                if emb_optimizer is not None:
+                    emb_optimizer.step()
+                optimizer.step()
+                t2 = time.time()
+                forward_time.append(t1 - t0)
+                backward_time.append(t2 - t1)
+                # train_acc = th.sum(logits.argmax(dim=1) == labels[seeds]).item() / len(seeds)
+                if i % 100 == 0 and proc_id == 0:
+                    print("Train Loss: {:.4f}".
+                          format(loss.item()))
+                # writer.add_scalar('train_acc', train_acc, global_step)
+                # global_step += 1
+            print("Epoch {:05d}:{:05d} | Train Forward Time(s) {:.4f} | Backward Time(s) {:.4f}".
+                  format(epoch, self.flags.num_epochs, forward_time[-1], backward_time[-1]))
+            tend = time.time()
+            train_time += (tend - tstart)
+            # val_acc, val_loss, validation_time = self._evaluate(n_gpus, labels, queue, proc_id, model, embed_layer,
+            #                                                 val_loader, node_feats, inv_target, 'Validation')
+            # do_test = val_acc > last_val_acc
+            # last_val_acc = val_acc
+            # if n_gpus > 1:
+            #     th.distributed.barrier()
+            #     if proc_id == 0:
+            #         for i in range(1, n_gpus):
+            #             queue.put(do_test)
+            #     else:
+            #         do_test = queue.get()
+            # if epoch == self.flags.num_epochs - 1 or (epoch > 0 and do_test):
+            #     test_acc, test_loss, test_time = self._evaluate(n_gpus, labels, queue, proc_id, model, embed_layer,
+            #                                                     test_loader, node_feats, inv_target, 'Test')
+            #     if n_gpus > 1:
+            #         th.distributed.barrier()
+        print("{}/{} Mean forward time: {:4f}".format(proc_id, n_gpus,
+                                                      np.mean(forward_time[len(forward_time) // 4:])))
+        print("{}/{} Mean backward time: {:4f}".format(proc_id, n_gpus,
+                                                       np.mean(backward_time[len(backward_time) // 4:])))
+        # if proc_id == 0:
+        #     print("Final Test Accuracy: {:.4f} | Test loss: {:.4f}".format(test_acc, test_loss))
+        #     print("Train {}s, valid {}s, test {}s".format(train_time, validation_time, test_time))
+    def _evaluate(self, n_gpus, labels, queue, proc_id, model, embed_layer,
+                  data_loader, node_feats, inv_target, mode):
+        tstart = time.time()
+        time_cost = 0
+        acc = 0
+        loss = 0
+        logits, seeds = evaluate(model, embed_layer,
+                                       data_loader, node_feats,
+                                       inv_target)
+        if queue is not None:
+            queue.put((logits, seeds))
+        if proc_id == 0:
+            loss, acc = self._collect_eval(n_gpus, labels, queue) if queue is not None else \
+                (F.cross_entropy(logits, labels[seeds].cpu()).item(), \
+                 th.sum(logits.argmax(dim=1) == labels[seeds].cpu()).item() / len(seeds))
+            print("{} Accuracy: {:.4f} | {} loss: {:.4f}".format(mode, acc, mode, loss))
+        tend = time.time()
+        time_cost = (tend-tstart)
+        return acc, loss, time_cost
+    def _collect_eval(self, n_gpus, labels, queue):
+        eval_logits = []
+        eval_seeds = []
+        for i in range(n_gpus):
+            log = queue.get()
+            eval_l, eval_s = log
+            eval_logits.append(eval_l)
+            eval_seeds.append(eval_s)
+        eval_logits = th.cat(eval_logits)
+        eval_seeds = th.cat(eval_seeds)
+        eval_loss = F.cross_entropy(eval_logits, labels[eval_seeds].cpu()).item()
+        eval_acc = th.sum(eval_logits.argmax(dim=1) == labels[eval_seeds].cpu()).item() / len(eval_seeds)
+        return eval_loss, eval_acc
+class RgcnTrainingDataLoader(Dataloader):
+    def init(self, g, dataset):
+        train_eids, valid_eids, test_eids, device = dataset
+        # target_idx = target_idx.to(device)
+        # 查找有几块GPU
+        n_gpus = len(list(map(int, self.flags.gpu.split(','))))
+        # 每层邻居数
+        fanouts = [int(fanout) for fanout in self.flags.fanout.split(',')]
+        sampler = dgl.dataloading.MultiLayerNeighborSampler(fanouts)
+        loader = dgl.dataloading.EdgeDataLoader(
+            g, train_eids, sampler,
+            negative_sampler=dgl.dataloading.negative_sampler.Uniform(5),
+            batch_size=self.flags.batch_size,
+            device=device,
+            use_ddp=n_gpus > 1,
+            shuffle=True,
+            drop_last=False,
+            num_workers=self.flags.num_workers)
+        val_loader = dgl.dataloading.EdgeDataLoader(
+            g, valid_eids, sampler,
+            negative_sampler=dgl.dataloading.negative_sampler.Uniform(5),
+            batch_size=self.flags.batch_size,
+            device=device,
+            use_ddp=n_gpus > 1,
+            shuffle=False,
+            drop_last=False,
+            num_workers=self.flags.num_workers)
+        test_loader = dgl.dataloading.EdgeDataLoader(
+            g, test_eids, sampler,
+            negative_sampler=dgl.dataloading.negative_sampler.Uniform(5),
+            batch_size=self.flags.batch_size,
+            device=device,
+            use_ddp=n_gpus > 1,
+            shuffle=True,
+            drop_last=False,
+            num_workers=self.flags.num_workers)
+        # loader = dgl.dataloading.NodeDataLoader(
+        #     g,
+        #     target_idx[train_idx],
+        #     sampler,
+        #     use_ddp=n_gpus > 1,
+        #     device=device if self.flags.num_workers == 0 else None,
+        #     batch_size=self.flags.batch_size,
+        #     shuffle=True,
+        #     drop_last=False,
+        #     num_workers=self.flags.num_workers)
+        # # validation sampler
+        # val_loader = dgl.dataloading.NodeDataLoader(
+        #     g,
+        #     target_idx[val_idx],
+        #     sampler,
+        #     use_ddp=n_gpus > 1,
+        #     device=device if self.flags.num_workers == 0 else None,
+        #     batch_size=self.flags.batch_size,
+        #     shuffle=False,
+        #     drop_last=False,
+        #     num_workers=self.flags.num_workers)
+        # # test sampler
+        # test_sampler = dgl.dataloading.MultiLayerNeighborSampler([-1] * self.flags.num_layers)
+        # test_loader = dgl.dataloading.NodeDataLoader(
+        #     g,
+        #     target_idx[test_idx],
+        #     test_sampler,
+        #     use_ddp=n_gpus > 1,
+        #     device=device if self.flags.num_workers == 0 else None,
+        #     batch_size=self.flags.eval_batch_size,
+        #     shuffle=False,
+        #     drop_last=False,
+        #     num_workers=self.flags.num_workers)
+        return loader, val_loader, test_loader
+def gen_norm(g, ntype_tensor, etype_tensor, typeid_tensor):
+    _, v, eid = g.all_edges(form='all')
+    _, inverse_index, count = th.unique(v, return_inverse=True, return_counts=True)
+    degrees = count[inverse_index]
+    norm = th.ones(eid.shape[0], device=eid.device) / degrees
+    norm = norm.unsqueeze(1)
+    g.edata['norm'] = norm
+    g.srcdata['ntype'] = ntype_tensor[g.srcdata[dgl.NID]]
+    g.edata['etype'] = etype_tensor[eid]
+    g.srcdata['type_id'] = typeid_tensor[g.srcdata[dgl.NID]]
+def evaluate(model, embed_layer, eval_loader, node_feats, inv_target, ntype_tensor, etype_tensor, typeid_tensor):
+    model.eval()
+    embed_layer.eval()
+    eval_logits = []
+    eval_seeds = []
+    with th.no_grad():
+        th.cuda.empty_cache()
+        for i, (input_nodes, pos_graph, neg_graph, blocks) in enumerate(eval_loader):
+            for block in blocks:
+                gen_norm(block, ntype_tensor, etype_tensor, typeid_tensor)
+            feats = embed_layer(blocks[0].srcdata[dgl.NID],
+                                blocks[0].srcdata['ntype'],
+                                blocks[0].srcdata['type_id'],
+                                node_feats)
+            logits = model(blocks, feats)
+            loss_fcn = AUC()
+            auc = loss_fcn(logits, pos_graph, neg_graph)
+            print("valid auc: {:.4f}".
+                          format(auc.item()))
+    #         eval_logits.append(logits.cpu())
+    eval_logits = th.cat(eval_logits)
+    eval_seeds = th.cat(eval_seeds)
+    return eval_logits, eval_seeds
+class CrossEntropyLoss(nn.Module):
+    def forward(self, block_outputs, pos_graph, neg_graph):
+        with pos_graph.local_scope():
+            pos_graph.ndata['h'] = block_outputs
+            pos_graph.apply_edges(fn.u_dot_v('h', 'h', 'score'))
+            pos_score = pos_graph.edata['score']
+        with neg_graph.local_scope():
+            neg_graph.ndata['h'] = block_outputs
+            neg_graph.apply_edges(fn.u_dot_v('h', 'h', 'score'))
+            neg_score = neg_graph.edata['score']
+        score = th.cat([pos_score, neg_score])
+        label = th.cat([th.ones_like(pos_score), th.zeros_like(neg_score)]).long()
+        loss = F.binary_cross_entropy_with_logits(score, label.float())
+        return loss
+class AUC(nn.Module):
+    def forward(self, block_outputs, pos_graph, neg_graph):
+        with pos_graph.local_scope():
+            pos_graph.ndata['h'] = block_outputs
+            pos_graph.apply_edges(fn.u_dot_v('h', 'h', 'score'))
+            pos_score = pos_graph.edata['score']
+        with neg_graph.local_scope():
+            neg_graph.ndata['h'] = block_outputs
+            neg_graph.apply_edges(fn.u_dot_v('h', 'h', 'score'))
+            neg_score = neg_graph.edata['score']
+        score = th.cat([pos_score, neg_score]).numpy()
+        label = th.cat([th.ones_like(pos_score), th.zeros_like(neg_score)]).numpy()
+        return roc_auc_score(label, score)

run_wm_rgcn.py ADDED Viewed

	@@ -0,0 +1,568 @@

+"""
+Modeling Relational Data with Graph Convolutional Networks
+Paper: https://arxiv.org/abs/1703.06103
+Code: https://github.com/tkipf/relational-gcn
+Difference compared to tkipf/relation-gcn
+* l2norm applied to all weights
+* remove nodes that won't be touched
+"""
+import argparse, gc
+import numpy as np
+import time
+import torch as th
+import torch.nn as nn
+import dgl.function as fn
+import torch.nn.functional as F
+import dgl
+import dgl.multiprocessing as mp
+from torch.nn.parallel import DistributedDataParallel
+from dgl import DGLGraph
+from functools import partial
+from dgl.data.rdf import AIFBDataset
+from src.skeleton.graph_builder import StandaloneGraphBuilder
+from src.skeleton.train_type import SamplingGraphTraining
+from src.application.rgcn.rgcn import RelGraphEmbedLayer, EntityClassify
+from dgl.contrib.hostmap_tensor import HostMapTensor
+from src.skeleton.dataloader import Dataloader
+import tqdm
+from sklearn.metrics import roc_auc_score
+# from torch.utils.tensorboard import SummaryWriter
+'''
+    这是单机的异构图节点分类任务-Demo:
+    适用于:
+        -- 图的数据量较大，比如100万~1亿点, 1000万~10亿边。
+    class RgcnGraphBuilder 负责加载数据
+    class RgcnTrainer 负责训练和预测
+    class RgcnTrainingDataLoader 负责做训练采样和数据遍历
+    用户如果需要改动只需要：
+    1、改动RgcnGraphBuilder.build_dataset 此方法负责从DGL图中分离训练数据、预测数据、测试数据
+    2、改动RgcnTrainer.train 此方法负责训练逻辑
+    3、改动RgcnTrainer.evaluate 此方法负责离线预测逻辑
+    4、改动RgcnTrainingDataLoader.init 此方法负责输出返回一个迭代遍历器、用于遍历数据集
+    这里使用AIFB数据集做精度对齐（epoch=50, batch_size=128）
+    社区aifb数据集节点分类测试集精度： Final Test Accuracy: 0.9250 | Test loss: 0.3929
+    平台aifb数据集节点分类测试集精度： Final Test Accuracy: 0.9250 | Test loss: 0.2953
+'''
+class RgcnGraphBuilder(StandaloneGraphBuilder):
+    def build_dataset(self, g):
+        hg = g
+        # category = self.flags.category
+        num_classes = self.flags.num_classes
+        num_rels = len(hg.canonical_etypes)
+        num_of_ntype = len(hg.ntypes)
+        # train_mask = hg.nodes[category].data.pop(self.flags.train_mask)
+        # test_mask = hg.nodes[category].data.pop(self.flags.test_mask)
+        # labels = hg.nodes[category].data.pop(self.flags.label)
+        eids = th.arange(g.number_of_edges())
+        #eids = np.random.permutation(eids)
+        val_size = int(len(eids) * 0.1)
+        test_size = int(len(eids) * 0.2)
+        # train_size = g.number_of_edges() - val_size - test_size
+        valid_eids = eids[:val_size]
+        test_eids = eids[val_size: val_size + test_size]
+        train_eids = eids[val_size + test_size:]
+        # train_idx = th.nonzero(train_mask, as_tuple=False).squeeze()
+        # test_idx = th.nonzero(test_mask, as_tuple=False).squeeze()
+        # val_idx = train_idx
+        node_feats = {}
+        for ntype in hg.ntypes:
+            if len(hg.nodes[ntype].data) == 0 or self.flags.node_feats is False:
+                node_feats[str(hg.get_ntype_id(ntype))] = hg.number_of_nodes(ntype)
+            else:
+                assert len(hg.nodes[ntype].data) == 1
+                feat = hg.nodes[ntype].data.pop(self.flags.feat)
+                if feat is not None:
+                    feats = HostMapTensor(ntype + '__' + self.flags.feat, feat)
+                    node_feats[str(hg.get_ntype_id(ntype))] = feats
+        # get target category id
+        # category_id = len(hg.ntypes)
+        # for i, ntype in enumerate(hg.ntypes):
+        #     if ntype == category:
+        #         category_id = i
+        #     print('{}:{}'.format(i, ntype))
+        g = dgl.to_homogeneous(hg)
+        ntype_tensor = g.ndata[dgl.NTYPE]
+        ntype_tensor.share_memory_()
+        etype_tensor = g.edata[dgl.ETYPE]
+        etype_tensor.share_memory_()
+        typeid_tensor = g.ndata[dgl.NID]
+        typeid_tensor.share_memory_()
+        #ntype_tensor = dgl.contrib.HostMapTensor('ntype_tensor', g.ndata[dgl.NTYPE])
+        #etype_tensor = dgl.contrib.HostMapTensor('etype_tensor', g.edata[dgl.ETYPE])
+        #typeid_tensor = dgl.contrib.HostMapTensor('typeid_tensor', g.edata[dgl.NID])
+        # node_ids = th.arange(g.number_of_nodes())
+        # # find out the target node ids
+        # node_tids = g.ndata[dgl.NTYPE]
+        # loc = (node_tids == category_id)
+        # target_idx = node_ids[loc]
+        # target_idx.share_memory_()
+        # train_idx.share_memory_()
+        # val_idx.share_memory_()
+        # test_idx.share_memory_()
+        # # This is a graph with multiple node types, so we want a way to map
+        # # our target node from their global node numberings, back to their
+        # # numberings within their type. This is used when taking the nodes in a
+        # # mini-batch, and looking up their type-specific labels
+        # inv_target = th.empty(node_ids.shape,
+        #                       dtype=node_ids.dtype)
+        # inv_target.share_memory_()
+        # inv_target[target_idx] = th.arange(0, target_idx.shape[0],
+        #                                    dtype=inv_target.dtype)
+        # Create csr/coo/csc formats before launching training processes with multi-gpu.
+        # This avoids creating certain formats in each sub-process, which saves momory and CPU.
+        g.create_formats_()
+        g = g.shared_memory('g')
+        return g, node_feats, num_of_ntype, num_classes, num_rels, train_eids, valid_eids, test_eids, ntype_tensor, etype_tensor, typeid_tensor
+class RgcnTrainer(SamplingGraphTraining):
+    def train(self, g, dataset, device, n_gpus, proc_id, **kwargs):
+        dev_id = -1 if n_gpus == 0 else device.index
+        queue = kwargs['queue'] if n_gpus > 1 else None
+        g, node_feats, num_of_ntype, num_classes, num_rels, train_eids, valid_eids, test_eids, ntype_tensor, etype_tensor, typeid_tensor = dataset
+        node_tids = ntype_tensor
+        world_size = n_gpus
+        if n_gpus > 0:
+            for key in node_feats:
+                if not isinstance(node_feats[key], int):
+                    node_feats[key].uva(device)
+        if n_gpus == 1:
+            g = g.to(device)
+        if n_gpus > 1:
+            g = g.uva(device)
+            dist_init_method = 'tcp://{master_ip}:{master_port}'.format(
+                master_ip='127.0.0.1', master_port=self.flags.master_port)
+            th.distributed.init_process_group(backend=self.flags.communication_backend,
+                                              init_method=dist_init_method,
+                                              world_size=world_size,
+                                              rank=proc_id)
+        # node features
+        # None for one-hot feature, if not none, it should be the feature tensor.
+        embed_layer = RelGraphEmbedLayer(dev_id if self.flags.embedding_gpu or not self.flags.dgl_sparse else -1,
+                                         dev_id,
+                                         g.number_of_nodes(),
+                                         node_tids,
+                                         num_of_ntype,
+                                         node_feats,
+                                         self.flags.num_hidden,
+                                         dgl_sparse=self.flags.dgl_sparse)
+         # 设置目标函数
+        loss_fcn = CrossEntropyLoss()
+        # create model
+        # all model params are in device.
+        model = EntityClassify(dev_id,
+                               g.number_of_nodes(),
+                               self.flags.num_hidden,
+                               num_classes,
+                               num_rels,
+                               num_bases=self.flags.num_bases,
+                               num_hidden_layers=self.flags.num_layers - 2,
+                               dropout=self.flags.dropout,
+                               use_self_loop=self.flags.use_self_loop,
+                               low_mem=self.flags.low_mem,
+                               layer_norm=self.flags.layer_norm)
+        if n_gpus == 1:
+            th.cuda.set_device(dev_id)
+            #labels = labels.to(dev_id)
+            model.cuda(dev_id)
+            if self.flags.dgl_sparse:
+                embed_layer.cuda(dev_id)
+        elif n_gpus > 1:
+            #labels = labels.to(dev_id)
+            if dev_id >= 0:
+                model.cuda(dev_id)
+            model = DistributedDataParallel(model, device_ids=[dev_id], output_device=dev_id)
+            if self.flags.dgl_sparse:
+                embed_layer.cuda(dev_id)
+                if len(list(embed_layer.parameters())) > 0:
+                    embed_layer = DistributedDataParallel(embed_layer, device_ids=[dev_id], output_device=dev_id)
+            else:
+                if len(list(embed_layer.parameters())) > 0:
+                    embed_layer = DistributedDataParallel(embed_layer, device_ids=None, output_device=None)
+        # optimizer
+        dense_params = list(model.parameters())
+        if self.flags.node_feats:
+            if n_gpus > 1:
+                dense_params += list(embed_layer.module.embeds.parameters())
+            else:
+                dense_params += list(embed_layer.embeds.parameters())
+        optimizer = th.optim.Adam(dense_params, lr=self.flags.lr, weight_decay=self.flags.l2norm)
+        if self.flags.dgl_sparse:
+            all_params = list(model.parameters()) + list(embed_layer.parameters())
+            optimizer = th.optim.Adam(all_params, lr=self.flags.lr, weight_decay=self.flags.l2norm)
+            if n_gpus > 1 and isinstance(embed_layer, DistributedDataParallel):
+                dgl_emb = embed_layer.module.dgl_emb
+            else:
+                dgl_emb = embed_layer.dgl_emb
+            emb_optimizer = dgl.optim.SparseAdam(params=dgl_emb, lr=self.flags.sparse_lr, eps=1e-8) if len(dgl_emb) > 0 else None
+        else:
+            if n_gpus > 1:
+                embs = list(embed_layer.module.node_embeds.parameters())
+            else:
+                embs = list(embed_layer.node_embeds.parameters())
+            emb_optimizer = th.optim.SparseAdam(embs, lr=self.flags.sparse_lr) if len(embs) > 0 else None
+        ntype_tensor = ntype_tensor.to(device)
+        etype_tensor = etype_tensor.to(device)
+        typeid_tensor = typeid_tensor.to(device)
+        train_eids = train_eids.to(device)
+        valid_eids = valid_eids.to(device)
+        test_eids = test_eids.to(device)
+        dataset = train_eids, valid_eids, test_eids, device
+        dataloader = RgcnTrainingDataLoader(self.flags).init(g, dataset)
+        loader, val_loader, test_loader = dataloader
+        # training loop
+        print("start training...")
+        forward_time = []
+        backward_time = []
+        train_time = 0
+        validation_time = 0
+        test_time = 0
+        last_val_acc = 0.0
+        do_test = False
+        for epoch in range(self.flags.num_epochs):
+            if n_gpus > 1:
+                loader.set_epoch(epoch)
+            tstart = time.time()
+            model.train()
+            embed_layer.train()
+            # for i, sample_data in enumerate(loader):
+            for i, (input_nodes, pos_graph, neg_graph, blocks) in enumerate(loader):
+                # input_nodes, seeds, blocks = sample_data
+                # # map the seed nodes back to their type-specific ids, so that they
+                # # can be used to look up their respective labels
+                # seeds = inv_target[seeds]
+                for block in blocks:
+                    gen_norm(block, ntype_tensor, etype_tensor, typeid_tensor)
+                t0 = time.time()
+                feats = embed_layer(blocks[0].srcdata[dgl.NID],
+                                    blocks[0].srcdata['ntype'],
+                                    blocks[0].srcdata['type_id'],
+                                    node_feats)
+                blocks = [block.int().to(device) for block in blocks]
+                # logits = model(blocks, feats)
+                pos_graph = pos_graph.to(device)
+                neg_graph = neg_graph.to(device)
+                batch_pred = model(blocks, feats)
+                f_step = time.time()
+                loss = loss_fcn(batch_pred, pos_graph, neg_graph)
+                # loss = F.cross_entropy(logits, labels[seeds])
+                # writer.add_scalar('loss', loss, global_step)
+                t1 = time.time()
+                optimizer.zero_grad()
+                if emb_optimizer is not None:
+                    emb_optimizer.zero_grad()
+                loss.backward()
+                if emb_optimizer is not None:
+                    emb_optimizer.step()
+                optimizer.step()
+                t2 = time.time()
+                forward_time.append(t1 - t0)
+                backward_time.append(t2 - t1)
+                # train_acc = th.sum(logits.argmax(dim=1) == labels[seeds]).item() / len(seeds)
+                if i % 100 == 0 and proc_id == 0:
+                    print("Train Loss: {:.4f}".
+                          format(loss.item()))
+                # writer.add_scalar('train_acc', train_acc, global_step)
+                # global_step += 1
+            print("Epoch {:05d}:{:05d} | Train Forward Time(s) {:.4f} | Backward Time(s) {:.4f}".
+                  format(epoch, self.flags.num_epochs, forward_time[-1], backward_time[-1]))
+            tend = time.time()
+            train_time += (tend - tstart)
+            # val_acc, val_loss, validation_time = self._evaluate(n_gpus, labels, queue, proc_id, model, embed_layer,
+            #                                                 val_loader, node_feats, inv_target, 'Validation')
+            # do_test = val_acc > last_val_acc
+            # last_val_acc = val_acc
+            # if n_gpus > 1:
+            #     th.distributed.barrier()
+            #     if proc_id == 0:
+            #         for i in range(1, n_gpus):
+            #             queue.put(do_test)
+            #     else:
+            #         do_test = queue.get()
+            # if epoch == self.flags.num_epochs - 1 or (epoch > 0 and do_test):
+            #     test_acc, test_loss, test_time = self._evaluate(n_gpus, labels, queue, proc_id, model, embed_layer,
+            #                                                     test_loader, node_feats, inv_target, 'Test')
+            #     if n_gpus > 1:
+            #         th.distributed.barrier()
+        print("{}/{} Mean forward time: {:4f}".format(proc_id, n_gpus,
+                                                      np.mean(forward_time[len(forward_time) // 4:])))
+        print("{}/{} Mean backward time: {:4f}".format(proc_id, n_gpus,
+                                                       np.mean(backward_time[len(backward_time) // 4:])))
+        # if proc_id == 0:
+        #     print("Final Test Accuracy: {:.4f} | Test loss: {:.4f}".format(test_acc, test_loss))
+        #     print("Train {}s, valid {}s, test {}s".format(train_time, validation_time, test_time))
+    def _evaluate(self, n_gpus, labels, queue, proc_id, model, embed_layer,
+                  data_loader, node_feats, inv_target, mode):
+        tstart = time.time()
+        time_cost = 0
+        acc = 0
+        loss = 0
+        logits, seeds = evaluate(model, embed_layer,
+                                       data_loader, node_feats,
+                                       inv_target)
+        if queue is not None:
+            queue.put((logits, seeds))
+        if proc_id == 0:
+            loss, acc = self._collect_eval(n_gpus, labels, queue) if queue is not None else \
+                (F.cross_entropy(logits, labels[seeds].cpu()).item(), \
+                 th.sum(logits.argmax(dim=1) == labels[seeds].cpu()).item() / len(seeds))
+            print("{} Accuracy: {:.4f} | {} loss: {:.4f}".format(mode, acc, mode, loss))
+        tend = time.time()
+        time_cost = (tend-tstart)
+        return acc, loss, time_cost
+    def _collect_eval(self, n_gpus, labels, queue):
+        eval_logits = []
+        eval_seeds = []
+        for i in range(n_gpus):
+            log = queue.get()
+            eval_l, eval_s = log
+            eval_logits.append(eval_l)
+            eval_seeds.append(eval_s)
+        eval_logits = th.cat(eval_logits)
+        eval_seeds = th.cat(eval_seeds)
+        eval_loss = F.cross_entropy(eval_logits, labels[eval_seeds].cpu()).item()
+        eval_acc = th.sum(eval_logits.argmax(dim=1) == labels[eval_seeds].cpu()).item() / len(eval_seeds)
+        return eval_loss, eval_acc
+class RgcnTrainingDataLoader(Dataloader):
+    def init(self, g, dataset):
+        train_eids, valid_eids, test_eids, device = dataset
+        # target_idx = target_idx.to(device)
+        # 查找有几块GPU
+        n_gpus = len(list(map(int, self.flags.gpu.split(','))))
+        # 每层邻居数
+        fanouts = [int(fanout) for fanout in self.flags.fanout.split(',')]
+        sampler = dgl.dataloading.MultiLayerNeighborSampler(fanouts)
+        loader = dgl.dataloading.EdgeDataLoader(
+            g, train_eids, sampler,
+            negative_sampler=dgl.dataloading.negative_sampler.Uniform(5),
+            batch_size=self.flags.batch_size,
+            device=device,
+            use_ddp=n_gpus > 1,
+            shuffle=True,
+            drop_last=False,
+            num_workers=self.flags.num_workers)
+        val_loader = dgl.dataloading.EdgeDataLoader(
+            g, valid_eids, sampler,
+            negative_sampler=dgl.dataloading.negative_sampler.Uniform(5),
+            batch_size=self.flags.batch_size,
+            device=device,
+            use_ddp=n_gpus > 1,
+            shuffle=False,
+            drop_last=False,
+            num_workers=self.flags.num_workers)
+        test_loader = dgl.dataloading.EdgeDataLoader(
+            g, test_eids, sampler,
+            negative_sampler=dgl.dataloading.negative_sampler.Uniform(5),
+            batch_size=self.flags.batch_size,
+            device=device,
+            use_ddp=n_gpus > 1,
+            shuffle=True,
+            drop_last=False,
+            num_workers=self.flags.num_workers)
+        # loader = dgl.dataloading.NodeDataLoader(
+        #     g,
+        #     target_idx[train_idx],
+        #     sampler,
+        #     use_ddp=n_gpus > 1,
+        #     device=device if self.flags.num_workers == 0 else None,
+        #     batch_size=self.flags.batch_size,
+        #     shuffle=True,
+        #     drop_last=False,
+        #     num_workers=self.flags.num_workers)
+        # # validation sampler
+        # val_loader = dgl.dataloading.NodeDataLoader(
+        #     g,
+        #     target_idx[val_idx],
+        #     sampler,
+        #     use_ddp=n_gpus > 1,
+        #     device=device if self.flags.num_workers == 0 else None,
+        #     batch_size=self.flags.batch_size,
+        #     shuffle=False,
+        #     drop_last=False,
+        #     num_workers=self.flags.num_workers)
+        # # test sampler
+        # test_sampler = dgl.dataloading.MultiLayerNeighborSampler([-1] * self.flags.num_layers)
+        # test_loader = dgl.dataloading.NodeDataLoader(
+        #     g,
+        #     target_idx[test_idx],
+        #     test_sampler,
+        #     use_ddp=n_gpus > 1,
+        #     device=device if self.flags.num_workers == 0 else None,
+        #     batch_size=self.flags.eval_batch_size,
+        #     shuffle=False,
+        #     drop_last=False,
+        #     num_workers=self.flags.num_workers)
+        return loader, val_loader, test_loader
+def gen_norm(g, ntype_tensor, etype_tensor, typeid_tensor):
+    _, v, eid = g.all_edges(form='all')
+    _, inverse_index, count = th.unique(v, return_inverse=True, return_counts=True)
+    degrees = count[inverse_index]
+    norm = th.ones(eid.shape[0], device=eid.device) / degrees
+    norm = norm.unsqueeze(1)
+    g.edata['norm'] = norm
+    g.srcdata['ntype'] = ntype_tensor[g.srcdata[dgl.NID]]
+    g.edata['etype'] = etype_tensor[eid]
+    g.srcdata['type_id'] = typeid_tensor[g.srcdata[dgl.NID]]
+def evaluate(model, embed_layer, eval_loader, node_feats, inv_target):
+    model.eval()
+    embed_layer.eval()
+    eval_logits = []
+    eval_seeds = []
+    with th.no_grad():
+        th.cuda.empty_cache()
+        for i, (input_nodes, pos_graph, neg_graph, blocks) in enumerate(eval_loader):
+            for block in blocks:
+                gen_norm(block)
+            feats = embed_layer(blocks[0].srcdata[dgl.NID],
+                                blocks[0].srcdata['ntype'],
+                                blocks[0].srcdata['type_id'],
+                                node_feats)
+            logits = model(blocks, feats)
+            loss_fcn = AUC()
+            auc = loss_fcn(logits, pos_graph, neg_graph)
+            print("valid auc: {:.4f}".
+                          format(auc.item()))
+    #         eval_logits.append(logits.cpu())
+    eval_logits = th.cat(eval_logits)
+    eval_seeds = th.cat(eval_seeds)
+    return eval_logits, eval_seeds
+class CrossEntropyLoss(nn.Module):
+    def forward(self, block_outputs, pos_graph, neg_graph):
+        with pos_graph.local_scope():
+            pos_graph.ndata['h'] = block_outputs
+            pos_graph.apply_edges(fn.u_dot_v('h', 'h', 'score'))
+            pos_score = pos_graph.edata['score']
+        with neg_graph.local_scope():
+            neg_graph.ndata['h'] = block_outputs
+            neg_graph.apply_edges(fn.u_dot_v('h', 'h', 'score'))
+            neg_score = neg_graph.edata['score']
+        score = th.cat([pos_score, neg_score])
+        label = th.cat([th.ones_like(pos_score), th.zeros_like(neg_score)]).long()
+        loss = F.binary_cross_entropy_with_logits(score, label.float())
+        return loss
+class AUC(nn.Module):
+    def forward(self, block_outputs, pos_graph, neg_graph):
+        with pos_graph.local_scope():
+            pos_graph.ndata['h'] = block_outputs
+            pos_graph.apply_edges(fn.u_dot_v('h', 'h', 'score'))
+            pos_score = pos_graph.edata['score']
+        with neg_graph.local_scope():
+            neg_graph.ndata['h'] = block_outputs
+            neg_graph.apply_edges(fn.u_dot_v('h', 'h', 'score'))
+            neg_score = neg_graph.edata['score']
+        score = th.cat([pos_score, neg_score]).numpy()
+        label = th.cat([th.ones_like(pos_score), th.zeros_like(neg_score)]).numpy()
+        return roc_auc_score(label, score)

test1.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from typing import Union
+from fastapi import FastAPI
+from pydantic import BaseModel
+app = FastAPI()
+class Item(BaseModel):
+    name: str
+    price: float
+    is_offer: Union[bool, None] = None
+@app.get("/")
+def read_root():
+    return {"Hello": "World"}
+@app.get("/items/{item_id}")
+def read_item(item_id: int, q: Union[str, None] = None):
+    return {"item_id": item_id, "q": q}
+@app.put("/items/{item_id}")
+def update_item(item_id: int, item: Item):
+    return {"item_name": item.name, "item_id": item_id}

test2.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import gradio as gr
+def greet(name):
+    return "Hello " + name + "!"
+demo = gr.Interface(fn=greet, inputs="text", outputs="text")
+demo.launch()

~/Desktop/roberta-base/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

~/Desktop/roberta-base/models--roberta-base/blobs/5606f48548d99a9829d10a96cd364b816b02cd21 ADDED Viewed

The diff for this file is too large to render. See raw diff

~/Desktop/roberta-base/models--roberta-base/blobs/5606f48548d99a9829d10a96cd364b816b02cd21.lock ADDED Viewed

File without changes

~/Desktop/roberta-base/models--roberta-base/blobs/8db5e7ac5bfc9ec8b613b776009300fe3685d957 ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+  "architectures": [
+    "RobertaForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "type_vocab_size": 1,
+  "vocab_size": 50265
+}

~/Desktop/roberta-base/models--roberta-base/blobs/8db5e7ac5bfc9ec8b613b776009300fe3685d957.lock ADDED Viewed

File without changes

~/Desktop/roberta-base/models--roberta-base/refs/main ADDED Viewed

	@@ -0,0 +1 @@


1	+ bc2764f8af2e92b6eb5679868df33e224075ca68

~/Desktop/roberta-base/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

~/Desktop/roberta-base/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

~/Desktop/roberta-base/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "add_prefix_space": false,
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "special_tokens_map_file": null,
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

~/Desktop/roberta-base/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff