Spaces:

kernel-luso-comfort
/

BiomedParse

Sleeping

App Files Files Community

BiomedParse / utilities /dataset.py

kernel-luso-comfort

Add Apache License 2.0 header to multiple source files

202eff6 about 1 month ago

raw

history blame contribute delete

2.05 kB

	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.


	class Entity(object):
	def __init__(self, _id, _text, _mask, _interactive, _type, _start_idx, _end_idx, _image=None):
	self.id = _id
	self.text = _text
	self.mask = _mask
	self.interactive = _interactive
	self.type = _type
	self.start_idx = _start_idx
	self.end_idx = _end_idx

	self.image = _image

	def split_by_ordered_substrings(sentence, substrings):
	results = []
	substring_indices = []

	start_index = 0
	for i, substring in enumerate(substrings):
	# Find the start of the substring in the remaining part of the sentence
	index = sentence[start_index:].find(substring)

	if index == -1:
	continue

	# Append any text before the substring to the results, including spaces
	if index > 0:
	results.append(sentence[start_index:start_index+index])
	substring_indices.append(None) # No match in the `substrings` list for this segment

	# Append the substring to the results
	results.append(substring)
	substring_indices.append(i) # Append the index from the `substrings` list
	start_index += index + len(substring)

	# If there's any remaining part of the sentence after all substrings, append it to the results
	if start_index < len(sentence):
	results.append(sentence[start_index:])
	substring_indices.append(None) # No match in the `substrings` list for this segment

	return results, substring_indices