sdap-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From eamonf...@apache.org
Subject [incubator-sdap-ingester] 01/01: Fix bug that prevented collection manager from seeing files in a directory when the path had no glob-style wildcard character. Also add sback support for scanning dirs recursively.
Date Tue, 05 Jan 2021 08:24:16 GMT
This is an automated email from the ASF dual-hosted git repository.

eamonford pushed a commit to branch fix-collection-manager-bugs
in repository https://gitbox.apache.org/repos/asf/incubator-sdap-ingester.git

commit 12dd0af277a6e5fb288980c9db780be150dd98d5
Author: Eamon Ford <eamon.d.ford@jpl.nasa.gov>
AuthorDate: Tue Jan 5 00:24:01 2021 -0800

    Fix bug that prevented collection manager from seeing files in a directory when the path
had no glob-style wildcard character. Also add sback support for scanning dirs recursively.
---
 .../collection_manager/entities/Collection.py              |  7 ++++---
 .../collection_manager/services/CollectionWatcher.py       | 14 +++++++++++---
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/collection_manager/collection_manager/entities/Collection.py b/collection_manager/collection_manager/entities/Collection.py
index 7a45b66..389e135 100644
--- a/collection_manager/collection_manager/entities/Collection.py
+++ b/collection_manager/collection_manager/entities/Collection.py
@@ -1,11 +1,12 @@
 import os
-from urllib.parse import urlparse
+import pathlib
 from dataclasses import dataclass
 from datetime import datetime
+from enum import Enum
 from fnmatch import fnmatch
 from glob import glob
 from typing import List, Optional
-from enum import Enum
+from urllib.parse import urlparse
 
 from collection_manager.entities.exceptions import MissingValueCollectionError
 
@@ -68,6 +69,6 @@ class Collection:
                 raise IsADirectoryError()
 
             if os.path.isdir(self.path):
-                return os.path.dirname(file_path) == self.path
+                return pathlib.Path(self.path) in pathlib.Path(file_path).parents
             else:
                 return fnmatch(file_path, self.path)
diff --git a/collection_manager/collection_manager/services/CollectionWatcher.py b/collection_manager/collection_manager/services/CollectionWatcher.py
index b1aaf4e..68b013a 100644
--- a/collection_manager/collection_manager/services/CollectionWatcher.py
+++ b/collection_manager/collection_manager/services/CollectionWatcher.py
@@ -116,11 +116,16 @@ class CollectionWatcher:
         logger.info(f"Scanning files for {len(collections)} collections...")
         start = time.perf_counter()
         for collection in collections:
-            for granule_path in glob(collection.path, recursive=True):
+            for granule_path in self._get_files_at_path(collection.path):
                 modified_time = int(os.path.getmtime(granule_path))
                 await self._granule_updated_callback(granule_path, modified_time, collection)
         logger.info(f"Finished scanning files in {time.perf_counter() - start} seconds.")
 
+    def _get_files_at_path(self, path: str) -> List[str]:
+        if os.path.isfile(path):
+            return [path]
+        return [f for f in glob(path + '/**', recursive=True) if os.path.isfile(f)]
+
     async def _reload_and_reschedule(self):
         try:
             updated_collections = self._get_updated_collections()
@@ -191,11 +196,14 @@ class _GranuleEventHandler(FileSystemEventHandler):
 
     def on_created(self, event):
         super().on_created(event)
-        self._handle_event(event)
+        if isinstance(event, S3Event) or not event.is_directory:
+            self._handle_event(event)
 
     def on_modified(self, event):
         super().on_modified(event)
-        self._handle_event(event)
+
+        if isinstance(event, S3Event) or not event.is_directory:
+            self._handle_event(event)
 
     def _handle_event(self, event):
         path = event.src_path


Mime
View raw message