Spaces:
Sleeping
Sleeping
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
class Entity(object): | |
def __init__(self, _id, _text, _mask, _interactive, _type, _start_idx, _end_idx, _image=None): | |
self.id = _id | |
self.text = _text | |
self.mask = _mask | |
self.interactive = _interactive | |
self.type = _type | |
self.start_idx = _start_idx | |
self.end_idx = _end_idx | |
self.image = _image | |
def split_by_ordered_substrings(sentence, substrings): | |
results = [] | |
substring_indices = [] | |
start_index = 0 | |
for i, substring in enumerate(substrings): | |
# Find the start of the substring in the remaining part of the sentence | |
index = sentence[start_index:].find(substring) | |
if index == -1: | |
continue | |
# Append any text before the substring to the results, including spaces | |
if index > 0: | |
results.append(sentence[start_index:start_index+index]) | |
substring_indices.append(None) # No match in the `substrings` list for this segment | |
# Append the substring to the results | |
results.append(substring) | |
substring_indices.append(i) # Append the index from the `substrings` list | |
start_index += index + len(substring) | |
# If there's any remaining part of the sentence after all substrings, append it to the results | |
if start_index < len(sentence): | |
results.append(sentence[start_index:]) | |
substring_indices.append(None) # No match in the `substrings` list for this segment | |
return results, substring_indices | |