Spaces:
Running
Running
Joshua Lochner
commited on
Commit
·
e3d3d3f
1
Parent(s):
cfbd4d5
Use correct logger per script
Browse files- src/evaluate.py +2 -0
- src/predict.py +1 -1
- src/preprocess.py +7 -5
src/evaluate.py
CHANGED
|
@@ -128,6 +128,8 @@ def calculate_metrics(labelled_words, predictions):
|
|
| 128 |
|
| 129 |
|
| 130 |
def main():
|
|
|
|
|
|
|
| 131 |
hf_parser = HfArgumentParser((
|
| 132 |
EvaluationArguments,
|
| 133 |
DatasetArguments,
|
|
|
|
| 128 |
|
| 129 |
|
| 130 |
def main():
|
| 131 |
+
logger.setLevel(logging.DEBUG)
|
| 132 |
+
|
| 133 |
hf_parser = HfArgumentParser((
|
| 134 |
EvaluationArguments,
|
| 135 |
DatasetArguments,
|
src/predict.py
CHANGED
|
@@ -393,7 +393,7 @@ def segments_to_predictions(segments, model, tokenizer):
|
|
| 393 |
|
| 394 |
def main():
|
| 395 |
# Test on unseen data
|
| 396 |
-
|
| 397 |
|
| 398 |
hf_parser = HfArgumentParser((
|
| 399 |
PredictArguments,
|
|
|
|
| 393 |
|
| 394 |
def main():
|
| 395 |
# Test on unseen data
|
| 396 |
+
logger.setLevel(logging.DEBUG)
|
| 397 |
|
| 398 |
hf_parser = HfArgumentParser((
|
| 399 |
PredictArguments,
|
src/preprocess.py
CHANGED
|
@@ -529,8 +529,7 @@ class DatasetArguments:
|
|
| 529 |
def main():
|
| 530 |
# Responsible for getting transcrips using youtube_transcript_api,
|
| 531 |
# then labelling it according to SponsorBlock's API
|
| 532 |
-
|
| 533 |
-
logging.getLogger().setLevel(logging.INFO) # TODO make param
|
| 534 |
|
| 535 |
# Generate final.json from sponsorTimes.csv
|
| 536 |
hf_parser = HfArgumentParser((
|
|
@@ -561,7 +560,8 @@ def main():
|
|
| 561 |
@lru_cache(maxsize=1)
|
| 562 |
def read_db():
|
| 563 |
if not preprocess_args.overwrite and os.path.exists(processed_db_path):
|
| 564 |
-
logger.info(
|
|
|
|
| 565 |
with open(processed_db_path) as fp:
|
| 566 |
return json.load(fp)
|
| 567 |
logger.info('Processing raw database')
|
|
@@ -700,7 +700,8 @@ def main():
|
|
| 700 |
progress.update()
|
| 701 |
|
| 702 |
except KeyboardInterrupt:
|
| 703 |
-
logger.info(
|
|
|
|
| 704 |
|
| 705 |
# only futures that are not done will prevent exiting
|
| 706 |
for future in to_process:
|
|
@@ -941,7 +942,8 @@ def main():
|
|
| 941 |
else:
|
| 942 |
logger.info(f'Skipping {dataset_args.excess_file}')
|
| 943 |
|
| 944 |
-
logger.info(
|
|
|
|
| 945 |
|
| 946 |
|
| 947 |
def split(arr, ratios):
|
|
|
|
| 529 |
def main():
|
| 530 |
# Responsible for getting transcrips using youtube_transcript_api,
|
| 531 |
# then labelling it according to SponsorBlock's API
|
| 532 |
+
logger.setLevel(logging.DEBUG)
|
|
|
|
| 533 |
|
| 534 |
# Generate final.json from sponsorTimes.csv
|
| 535 |
hf_parser = HfArgumentParser((
|
|
|
|
| 560 |
@lru_cache(maxsize=1)
|
| 561 |
def read_db():
|
| 562 |
if not preprocess_args.overwrite and os.path.exists(processed_db_path):
|
| 563 |
+
logger.info(
|
| 564 |
+
'Using cached processed database (use `--overwrite` to avoid this behaviour).')
|
| 565 |
with open(processed_db_path) as fp:
|
| 566 |
return json.load(fp)
|
| 567 |
logger.info('Processing raw database')
|
|
|
|
| 700 |
progress.update()
|
| 701 |
|
| 702 |
except KeyboardInterrupt:
|
| 703 |
+
logger.info(
|
| 704 |
+
'Gracefully shutting down: Cancelling unscheduled tasks')
|
| 705 |
|
| 706 |
# only futures that are not done will prevent exiting
|
| 707 |
for future in to_process:
|
|
|
|
| 942 |
else:
|
| 943 |
logger.info(f'Skipping {dataset_args.excess_file}')
|
| 944 |
|
| 945 |
+
logger.info(
|
| 946 |
+
f'Finished splitting: {len(sponsors)} sponsors, {len(non_sponsors)} non sponsors')
|
| 947 |
|
| 948 |
|
| 949 |
def split(arr, ratios):
|